Upload 36 files
Browse files- .gitattributes +4 -0
- kld_data/01_kld_vs_filesize.png +3 -0
- kld_data/01_kld_vs_filesize_pareto.png +3 -0
- kld_data/02_ppl_vs_filesize.png +3 -0
- kld_data/02_ppl_vs_filesize_pareto.png +3 -0
- kld_data/aes_sedai/MiniMax-M2.5-IQ3_S.md +588 -0
- kld_data/aes_sedai/MiniMax-M2.5-IQ4_XS.md +588 -0
- kld_data/aes_sedai/MiniMax-M2.5-Q4_K_M.md +588 -0
- kld_data/aes_sedai/MiniMax-M2.5-Q5_K_M.md +588 -0
- kld_data/llm_quantization_data.csv +32 -0
- kld_data/unsloth/IQ4_NL/MiniMax-M2.5-IQ4_NL.md +372 -0
- kld_data/unsloth/IQ4_XS/MiniMax-M2.5-IQ4_XS.md +372 -0
- kld_data/unsloth/MXFP4_MOE/MiniMax-M2.5-MXFP4_MOE.md +366 -0
- kld_data/unsloth/MiniMax-M2.5-UD-TQ1_0.md +370 -0
- kld_data/unsloth/Q2_K/MiniMax-M2.5-Q2_K.md +371 -0
- kld_data/unsloth/Q2_K_L/MiniMax-M2.5-Q2_K_L.md +371 -0
- kld_data/unsloth/Q3_K_M/MiniMax-M2.5-Q3_K_M.md +372 -0
- kld_data/unsloth/Q3_K_S/MiniMax-M2.5-Q3_K_S.md +369 -0
- kld_data/unsloth/Q4_0/MiniMax-M2.5-Q4_0.md +371 -0
- kld_data/unsloth/Q4_1/MiniMax-M2.5-Q4_1.md +370 -0
- kld_data/unsloth/Q4_K_M/MiniMax-M2.5-Q4_K_M.md +370 -0
- kld_data/unsloth/Q4_K_S/MiniMax-M2.5-Q4_K_S.md +371 -0
- kld_data/unsloth/Q5_K_M/MiniMax-M2.5-Q5_K_M.md +371 -0
- kld_data/unsloth/Q5_K_S/MiniMax-M2.5-Q5_K_S.md +371 -0
- kld_data/unsloth/Q6_K/MiniMax-M2.5-Q6_K.md +370 -0
- kld_data/unsloth/Q8_0/MiniMax-M2.5-Q8_0.md +371 -0
- kld_data/unsloth/UD-IQ1_M/MiniMax-M2.5-UD-IQ1_M.md +375 -0
- kld_data/unsloth/UD-IQ1_S/MiniMax-M2.5-UD-IQ1_S.md +377 -0
- kld_data/unsloth/UD-IQ2_M/MiniMax-M2.5-UD-IQ2_M.md +375 -0
- kld_data/unsloth/UD-IQ2_XXS/MiniMax-M2.5-UD-IQ2_XXS.md +376 -0
- kld_data/unsloth/UD-IQ3_XXS/MiniMax-M2.5-UD-IQ3_XXS.md +373 -0
- kld_data/unsloth/UD-Q2_K_XL/MiniMax-M2.5-UD-Q2_K_XL.md +372 -0
- kld_data/unsloth/UD-Q3_K_XL/MiniMax-M2.5-UD-Q3_K_XL.md +372 -0
- kld_data/unsloth/UD-Q4_K_XL/MiniMax-M2.5-UD-Q4_K_XL.md +371 -0
- kld_data/unsloth/UD-Q5_K_XL/MiniMax-M2.5-UD-Q5_K_XL.md +372 -0
- kld_data/unsloth/UD-Q6_K_XL/MiniMax-M2.5-UD-Q6_K_XL.md +371 -0
- kld_data/unsloth/UD-Q8_K_XL/MiniMax-M2.5-UD-Q8_K_XL.md +373 -0
.gitattributes
CHANGED
|
@@ -50,3 +50,7 @@ Q5_K_M/MiniMax-M2.5-Q5_K_M-00002-of-00005.gguf filter=lfs diff=lfs merge=lfs -te
|
|
| 50 |
Q5_K_M/MiniMax-M2.5-Q5_K_M-00003-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
| 51 |
Q5_K_M/MiniMax-M2.5-Q5_K_M-00004-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
| 52 |
Q5_K_M/MiniMax-M2.5-Q5_K_M-00005-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
Q5_K_M/MiniMax-M2.5-Q5_K_M-00003-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
| 51 |
Q5_K_M/MiniMax-M2.5-Q5_K_M-00004-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
| 52 |
Q5_K_M/MiniMax-M2.5-Q5_K_M-00005-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
kld_data/01_kld_vs_filesize_pareto.png filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
kld_data/01_kld_vs_filesize.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
kld_data/02_ppl_vs_filesize_pareto.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
kld_data/02_ppl_vs_filesize.png filter=lfs diff=lfs merge=lfs -text
|
kld_data/01_kld_vs_filesize.png
ADDED
|
Git LFS Details
|
kld_data/01_kld_vs_filesize_pareto.png
ADDED
|
Git LFS Details
|
kld_data/02_ppl_vs_filesize.png
ADDED
|
Git LFS Details
|
kld_data/02_ppl_vs_filesize_pareto.png
ADDED
|
Git LFS Details
|
kld_data/aes_sedai/MiniMax-M2.5-IQ3_S.md
ADDED
|
@@ -0,0 +1,588 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-IQ3_S (aes_sedai)
|
| 2 |
+
|
| 3 |
+
78.76 GiB (2.96 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
./build/bin/llama-perplexity --n-gpu-layers 999 --threads 48 --override-tensor "blk\.(0|1|2|3)\.ffn_.*=CUDA0" --override-tensor "blk\.(4|5|6)\.ffn_.*=CUDA1" --override-tensor "blk\..*_exps\.=CPU" --flash-attn on --file "/mnt/srv/host/resources/KLD/calibration_datav3.txt" --kl-divergence-base "/mnt/srv/snowdrift/ref-logits-MiniMax-M2.5-BF16-calibration-datav3.bin" --kl-divergence --model "/mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/aes_sedai/MiniMax-M2.5-IQ3_S.gguf"
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
DEPRECATED: argument '--override-tensor' specified multiple times, use comma-separated values instead (only last value will be used)
|
| 11 |
+
DEPRECATED: argument '--override-tensor' specified multiple times, use comma-separated values instead (only last value will be used)
|
| 12 |
+
build: 8038 (05a6f0e89) with GNU 14.2.1 for Linux x86_64
|
| 13 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 14 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 15 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 7383 used, 16488 free vs. target of 1024
|
| 16 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 6217 used, 17654 free vs. target of 1024
|
| 17 |
+
llama_params_fit_impl: projected to use 13600 MiB of device memory vs. 47743 MiB of free device memory
|
| 18 |
+
llama_params_fit_impl: targets for free memory can be met on all devices, no changes needed
|
| 19 |
+
llama_params_fit: successfully fit params to free device memory
|
| 20 |
+
llama_params_fit: fitting params to free memory took 0.43 seconds
|
| 21 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 22 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 23 |
+
llama_model_loader: loaded meta data with 41 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/aes_sedai/MiniMax-M2.5-IQ3_S.gguf (version GGUF V3 (latest))
|
| 24 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 25 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 26 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 27 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 28 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 29 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 30 |
+
llama_model_loader: - kv 5: general.name str = MiniMax M2.5
|
| 31 |
+
llama_model_loader: - kv 6: general.size_label str = 256x4.9B
|
| 32 |
+
llama_model_loader: - kv 7: general.license str = other
|
| 33 |
+
llama_model_loader: - kv 8: general.license.name str = modified-mit
|
| 34 |
+
llama_model_loader: - kv 9: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 35 |
+
llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"]
|
| 36 |
+
llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62
|
| 37 |
+
llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608
|
| 38 |
+
llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072
|
| 39 |
+
llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536
|
| 40 |
+
llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48
|
| 41 |
+
llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8
|
| 42 |
+
llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 43 |
+
llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 44 |
+
llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256
|
| 45 |
+
llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8
|
| 46 |
+
llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2
|
| 47 |
+
llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128
|
| 48 |
+
llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128
|
| 49 |
+
llama_model_loader: - kv 24: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 50 |
+
llama_model_loader: - kv 25: minimax-m2.rope.dimension_count u32 = 64
|
| 51 |
+
llama_model_loader: - kv 26: tokenizer.ggml.model str = gpt2
|
| 52 |
+
llama_model_loader: - kv 27: tokenizer.ggml.pre str = minimax-m2
|
| 53 |
+
llama_model_loader: - kv 28: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 54 |
+
llama_model_loader: - kv 29: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 55 |
+
llama_model_loader: - kv 30: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 56 |
+
llama_model_loader: - kv 31: tokenizer.ggml.bos_token_id u32 = 200034
|
| 57 |
+
llama_model_loader: - kv 32: tokenizer.ggml.eos_token_id u32 = 200020
|
| 58 |
+
llama_model_loader: - kv 33: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 59 |
+
llama_model_loader: - kv 34: tokenizer.chat_template str = {# ----------‑‑‑ special token ...
|
| 60 |
+
llama_model_loader: - kv 35: general.quantization_version u32 = 2
|
| 61 |
+
llama_model_loader: - kv 36: general.file_type u32 = 7
|
| 62 |
+
llama_model_loader: - kv 37: quantize.imatrix.file str = /mnt/srv/snowdrift/fp16/MiniMax-M2.5/...
|
| 63 |
+
llama_model_loader: - kv 38: quantize.imatrix.dataset str = /mnt/srv/host/resources/KLD/calibrati...
|
| 64 |
+
llama_model_loader: - kv 39: quantize.imatrix.entries_count u32 = 496
|
| 65 |
+
llama_model_loader: - kv 40: quantize.imatrix.chunks_count u32 = 49
|
| 66 |
+
llama_model_loader: - type f32: 373 tensors
|
| 67 |
+
llama_model_loader: - type q8_0: 250 tensors
|
| 68 |
+
llama_model_loader: - type iq3_s: 62 tensors
|
| 69 |
+
llama_model_loader: - type iq2_s: 124 tensors
|
| 70 |
+
print_info: file format = GGUF V3 (latest)
|
| 71 |
+
print_info: file type = Q8_0
|
| 72 |
+
print_info: file size = 78.76 GiB (2.96 BPW)
|
| 73 |
+
load: 0 unused tokens
|
| 74 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 75 |
+
load: printing all EOG tokens:
|
| 76 |
+
load: - 200004 ('<fim_pad>')
|
| 77 |
+
load: - 200005 ('<reponame>')
|
| 78 |
+
load: - 200020 ('[e~[')
|
| 79 |
+
load: special tokens cache size = 54
|
| 80 |
+
load: token to piece cache size = 1.3355 MB
|
| 81 |
+
print_info: arch = minimax-m2
|
| 82 |
+
print_info: vocab_only = 0
|
| 83 |
+
print_info: no_alloc = 0
|
| 84 |
+
print_info: n_ctx_train = 196608
|
| 85 |
+
print_info: n_embd = 3072
|
| 86 |
+
print_info: n_embd_inp = 3072
|
| 87 |
+
print_info: n_layer = 62
|
| 88 |
+
print_info: n_head = 48
|
| 89 |
+
print_info: n_head_kv = 8
|
| 90 |
+
print_info: n_rot = 64
|
| 91 |
+
print_info: n_swa = 0
|
| 92 |
+
print_info: is_swa_any = 0
|
| 93 |
+
print_info: n_embd_head_k = 128
|
| 94 |
+
print_info: n_embd_head_v = 128
|
| 95 |
+
print_info: n_gqa = 6
|
| 96 |
+
print_info: n_embd_k_gqa = 1024
|
| 97 |
+
print_info: n_embd_v_gqa = 1024
|
| 98 |
+
print_info: f_norm_eps = 0.0e+00
|
| 99 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 100 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 101 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 102 |
+
print_info: f_logit_scale = 0.0e+00
|
| 103 |
+
print_info: f_attn_scale = 0.0e+00
|
| 104 |
+
print_info: n_ff = 1536
|
| 105 |
+
print_info: n_expert = 256
|
| 106 |
+
print_info: n_expert_used = 8
|
| 107 |
+
print_info: n_expert_groups = 0
|
| 108 |
+
print_info: n_group_used = 0
|
| 109 |
+
print_info: causal attn = 1
|
| 110 |
+
print_info: pooling type = 0
|
| 111 |
+
print_info: rope type = 2
|
| 112 |
+
print_info: rope scaling = linear
|
| 113 |
+
print_info: freq_base_train = 5000000.0
|
| 114 |
+
print_info: freq_scale_train = 1
|
| 115 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 116 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 117 |
+
print_info: rope_finetuned = unknown
|
| 118 |
+
print_info: model type = 230B.A10B
|
| 119 |
+
print_info: model params = 228.69 B
|
| 120 |
+
print_info: general.name = MiniMax M2.5
|
| 121 |
+
print_info: vocab type = BPE
|
| 122 |
+
print_info: n_vocab = 200064
|
| 123 |
+
print_info: n_merges = 199744
|
| 124 |
+
print_info: BOS token = 200034 ']~!b['
|
| 125 |
+
print_info: EOS token = 200020 '[e~['
|
| 126 |
+
print_info: UNK token = 200021 ']!d~['
|
| 127 |
+
print_info: LF token = 10 'Ċ'
|
| 128 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 129 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 130 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 131 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 132 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 133 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 134 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 135 |
+
print_info: EOG token = 200020 '[e~['
|
| 136 |
+
print_info: max token length = 256
|
| 137 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 138 |
+
load_tensors: offloading output layer to GPU
|
| 139 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 140 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 141 |
+
load_tensors: CPU_Mapped model buffer size = 80024.72 MiB
|
| 142 |
+
load_tensors: CUDA0 model buffer size = 6448.62 MiB
|
| 143 |
+
load_tensors: CUDA1 model buffer size = 5761.11 MiB
|
| 144 |
+
....................................................................................................
|
| 145 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 146 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 147 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 148 |
+
llama_context: constructing llama_context
|
| 149 |
+
llama_context: n_seq_max = 1
|
| 150 |
+
llama_context: n_ctx = 512
|
| 151 |
+
llama_context: n_ctx_seq = 512
|
| 152 |
+
llama_context: n_batch = 512
|
| 153 |
+
llama_context: n_ubatch = 512
|
| 154 |
+
llama_context: causal_attn = 1
|
| 155 |
+
llama_context: flash_attn = enabled
|
| 156 |
+
llama_context: kv_unified = false
|
| 157 |
+
llama_context: freq_base = 5000000.0
|
| 158 |
+
llama_context: freq_scale = 1
|
| 159 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 160 |
+
llama_context: CUDA_Host output buffer size = 0.76 MiB
|
| 161 |
+
llama_kv_cache: CUDA0 KV buffer size = 64.00 MiB
|
| 162 |
+
llama_kv_cache: CUDA1 KV buffer size = 60.00 MiB
|
| 163 |
+
llama_kv_cache: size = 124.00 MiB ( 512 cells, 62 layers, 1/1 seqs), K (f16): 62.00 MiB, V (f16): 62.00 MiB
|
| 164 |
+
sched_reserve: reserving ...
|
| 165 |
+
sched_reserve: CUDA0 compute buffer size = 870.50 MiB
|
| 166 |
+
sched_reserve: CUDA1 compute buffer size = 396.75 MiB
|
| 167 |
+
sched_reserve: CUDA_Host compute buffer size = 13.01 MiB
|
| 168 |
+
sched_reserve: graph nodes = 3975
|
| 169 |
+
sched_reserve: graph splits = 210 (with bs=512), 124 (with bs=1)
|
| 170 |
+
sched_reserve: reserve took 6.80 ms, sched copies = 1
|
| 171 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 172 |
+
|
| 173 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 174 |
+
kl_divergence: 3.50 seconds per pass - ETA 7.05 minutes
|
| 175 |
+
|
| 176 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 177 |
+
1 7.1499 ± 1.4422 0.05757 ± 0.04423 0.10399 ± 0.02614 7.051 ± 0.715 % 88.627 ± 1.992 %
|
| 178 |
+
|
| 179 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 180 |
+
2 4.9123 ± 0.6103 0.02122 ± 0.02541 0.07214 ± 0.01395 7.380 ± 0.989 % 90.980 ± 1.270 %
|
| 181 |
+
|
| 182 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 183 |
+
3 4.6554 ± 0.4714 0.04611 ± 0.02155 0.08203 ± 0.01116 9.938 ± 1.009 % 90.588 ± 1.056 %
|
| 184 |
+
|
| 185 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 186 |
+
4 5.2074 ± 0.4625 0.03542 ± 0.01903 0.08611 ± 0.00877 9.455 ± 0.813 % 89.510 ± 0.960 %
|
| 187 |
+
|
| 188 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 189 |
+
5 5.0443 ± 0.4011 0.04590 ± 0.01864 0.09664 ± 0.00929 10.101 ± 0.772 % 89.412 ± 0.862 %
|
| 190 |
+
|
| 191 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 192 |
+
6 6.1524 ± 0.4769 0.05567 ± 0.01739 0.10923 ± 0.00825 10.074 ± 0.674 % 88.366 ± 0.820 %
|
| 193 |
+
|
| 194 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 195 |
+
7 5.7327 ± 0.3977 0.04837 ± 0.01676 0.13324 ± 0.00903 11.863 ± 0.672 % 87.059 ± 0.795 %
|
| 196 |
+
|
| 197 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 198 |
+
8 6.4382 ± 0.4224 0.04499 ± 0.01548 0.13075 ± 0.00799 11.511 ± 0.612 % 86.520 ± 0.756 %
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
9 6.3409 ± 0.3894 0.04444 ± 0.01433 0.12487 ± 0.00720 11.189 ± 0.567 % 86.710 ± 0.709 %
|
| 202 |
+
|
| 203 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 204 |
+
10 5.7970 ± 0.3318 0.04175 ± 0.01336 0.12157 ± 0.00698 11.156 ± 0.543 % 87.176 ± 0.662 %
|
| 205 |
+
|
| 206 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 207 |
+
11 6.3559 ± 0.3534 0.04212 ± 0.01262 0.12052 ± 0.00640 10.889 ± 0.509 % 86.809 ± 0.639 %
|
| 208 |
+
|
| 209 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 210 |
+
12 7.0069 ± 0.3770 0.03623 ± 0.01211 0.12284 ± 0.00614 10.644 ± 0.481 % 86.471 ± 0.618 %
|
| 211 |
+
|
| 212 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 213 |
+
13 7.2785 ± 0.3732 0.03625 ± 0.01140 0.11817 ± 0.00570 10.364 ± 0.457 % 86.456 ± 0.594 %
|
| 214 |
+
|
| 215 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 216 |
+
14 7.8410 ± 0.3901 0.03355 ± 0.01135 0.12081 ± 0.00587 10.339 ± 0.444 % 86.106 ± 0.579 %
|
| 217 |
+
|
| 218 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 219 |
+
15 8.2070 ± 0.3948 0.03266 ± 0.01081 0.11868 ± 0.00550 10.178 ± 0.423 % 85.935 ± 0.562 %
|
| 220 |
+
|
| 221 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 222 |
+
16 8.5150 ± 0.3966 0.03269 ± 0.01047 0.12223 ± 0.00617 10.198 ± 0.414 % 85.833 ± 0.546 %
|
| 223 |
+
|
| 224 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 225 |
+
17 8.7927 ± 0.4009 0.03832 ± 0.01016 0.12720 ± 0.00604 10.243 ± 0.409 % 85.767 ± 0.531 %
|
| 226 |
+
|
| 227 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 228 |
+
18 8.3038 ± 0.3659 0.03961 ± 0.01010 0.12776 ± 0.00586 10.195 ± 0.396 % 85.991 ± 0.512 %
|
| 229 |
+
|
| 230 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 231 |
+
19 8.3937 ± 0.3588 0.03389 ± 0.00977 0.12520 ± 0.00557 10.131 ± 0.379 % 86.109 ± 0.497 %
|
| 232 |
+
|
| 233 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 234 |
+
20 8.4696 ± 0.3533 0.03623 ± 0.00970 0.12906 ± 0.00542 10.184 ± 0.365 % 85.922 ± 0.487 %
|
| 235 |
+
|
| 236 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 237 |
+
21 8.4788 ± 0.3457 0.03885 ± 0.00948 0.12950 ± 0.00532 10.187 ± 0.354 % 86.106 ± 0.473 %
|
| 238 |
+
|
| 239 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 240 |
+
22 8.8788 ± 0.3578 0.04041 ± 0.00939 0.13394 ± 0.00524 10.220 ± 0.342 % 85.704 ± 0.467 %
|
| 241 |
+
|
| 242 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 243 |
+
23 8.8755 ± 0.3509 0.03724 ± 0.00956 0.14324 ± 0.00615 10.690 ± 0.357 % 85.610 ± 0.458 %
|
| 244 |
+
|
| 245 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 246 |
+
24 9.2798 ± 0.3603 0.03707 ± 0.00928 0.14229 ± 0.00591 10.626 ± 0.347 % 85.474 ± 0.450 %
|
| 247 |
+
|
| 248 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 249 |
+
25 9.3044 ± 0.3547 0.03910 ± 0.00916 0.14537 ± 0.00578 10.820 ± 0.340 % 85.412 ± 0.442 %
|
| 250 |
+
|
| 251 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 252 |
+
26 8.9995 ± 0.3330 0.07094 ± 0.00992 0.17438 ± 0.00651 13.282 ± 0.384 % 84.736 ± 0.442 %
|
| 253 |
+
|
| 254 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 255 |
+
27 8.8048 ± 0.3177 0.10193 ± 0.01075 0.20903 ± 0.00754 15.577 ± 0.409 % 83.907 ± 0.443 %
|
| 256 |
+
|
| 257 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 258 |
+
28 8.9153 ± 0.3165 0.10031 ± 0.01048 0.20692 ± 0.00729 15.459 ± 0.399 % 83.908 ± 0.435 %
|
| 259 |
+
|
| 260 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 261 |
+
29 8.8224 ± 0.3077 0.09771 ± 0.01024 0.20420 ± 0.00706 15.367 ± 0.390 % 83.989 ± 0.426 %
|
| 262 |
+
|
| 263 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 264 |
+
30 8.2702 ± 0.2816 0.09973 ± 0.01007 0.20087 ± 0.00691 15.335 ± 0.386 % 84.405 ± 0.415 %
|
| 265 |
+
|
| 266 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 267 |
+
31 7.7733 ± 0.2583 0.09829 ± 0.00978 0.19636 ± 0.00671 15.247 ± 0.379 % 84.794 ± 0.404 %
|
| 268 |
+
|
| 269 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 270 |
+
32 7.5499 ± 0.2447 0.09420 ± 0.00955 0.19273 ± 0.00651 15.139 ± 0.371 % 84.951 ± 0.396 %
|
| 271 |
+
|
| 272 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 273 |
+
33 7.3707 ± 0.2334 0.09094 ± 0.00932 0.18925 ± 0.00632 15.021 ± 0.363 % 84.991 ± 0.389 %
|
| 274 |
+
|
| 275 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 276 |
+
34 7.5728 ± 0.2378 0.09102 ± 0.00917 0.19023 ± 0.00616 14.957 ± 0.355 % 84.890 ± 0.385 %
|
| 277 |
+
|
| 278 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 279 |
+
35 7.6673 ± 0.2391 0.09023 ± 0.00909 0.19283 ± 0.00605 14.976 ± 0.348 % 84.672 ± 0.381 %
|
| 280 |
+
|
| 281 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 282 |
+
36 7.7082 ± 0.2377 0.08761 ± 0.00894 0.19149 ± 0.00590 14.885 ± 0.341 % 84.662 ± 0.376 %
|
| 283 |
+
|
| 284 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 285 |
+
37 7.7490 ± 0.2359 0.09084 ± 0.00888 0.19479 ± 0.00590 15.160 ± 0.338 % 84.441 ± 0.373 %
|
| 286 |
+
|
| 287 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 288 |
+
38 7.9571 ± 0.2399 0.08851 ± 0.00871 0.19279 ± 0.00576 15.028 ± 0.333 % 84.458 ± 0.368 %
|
| 289 |
+
|
| 290 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 291 |
+
39 7.9857 ± 0.2376 0.09869 ± 0.00883 0.20215 ± 0.00594 15.535 ± 0.333 % 84.294 ± 0.365 %
|
| 292 |
+
|
| 293 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 294 |
+
40 7.9897 ± 0.2339 0.13124 ± 0.00941 0.23174 ± 0.00660 16.856 ± 0.336 % 83.627 ± 0.366 %
|
| 295 |
+
|
| 296 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 297 |
+
41 7.9773 ± 0.2298 0.16129 ± 0.00998 0.26265 ± 0.00734 18.262 ± 0.338 % 82.975 ± 0.368 %
|
| 298 |
+
|
| 299 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 300 |
+
42 7.9274 ± 0.2250 0.18562 ± 0.01044 0.28571 ± 0.00775 19.316 ± 0.338 % 82.512 ± 0.367 %
|
| 301 |
+
|
| 302 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 303 |
+
43 7.8259 ± 0.2189 0.20379 ± 0.01059 0.29920 ± 0.00784 19.972 ± 0.336 % 82.262 ± 0.365 %
|
| 304 |
+
|
| 305 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 306 |
+
44 7.7184 ± 0.2123 0.19742 ± 0.01039 0.29406 ± 0.00767 19.777 ± 0.332 % 82.371 ± 0.360 %
|
| 307 |
+
|
| 308 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 309 |
+
45 7.8606 ± 0.2149 0.19459 ± 0.01021 0.29127 ± 0.00751 19.597 ± 0.328 % 82.257 ± 0.357 %
|
| 310 |
+
|
| 311 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 312 |
+
46 7.9905 ± 0.2163 0.19031 ± 0.01003 0.28770 ± 0.00735 19.412 ± 0.324 % 82.234 ± 0.353 %
|
| 313 |
+
|
| 314 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 315 |
+
47 8.1436 ± 0.2186 0.18677 ± 0.00984 0.28352 ± 0.00720 19.230 ± 0.320 % 82.320 ± 0.348 %
|
| 316 |
+
|
| 317 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 318 |
+
48 7.9770 ± 0.2107 0.18328 ± 0.00966 0.27894 ± 0.00706 19.060 ± 0.317 % 82.451 ± 0.344 %
|
| 319 |
+
|
| 320 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 321 |
+
49 8.1435 ± 0.2137 0.18526 ± 0.00965 0.28316 ± 0.00720 18.950 ± 0.313 % 82.345 ± 0.341 %
|
| 322 |
+
|
| 323 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 324 |
+
50 8.2504 ± 0.2153 0.18214 ± 0.00955 0.28241 ± 0.00713 18.852 ± 0.310 % 82.290 ± 0.338 %
|
| 325 |
+
|
| 326 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 327 |
+
51 8.3558 ± 0.2161 0.17914 ± 0.00939 0.27850 ± 0.00700 18.686 ± 0.307 % 82.361 ± 0.334 %
|
| 328 |
+
|
| 329 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 330 |
+
52 8.4145 ± 0.2152 0.17767 ± 0.00929 0.27821 ± 0.00689 18.613 ± 0.303 % 82.293 ± 0.332 %
|
| 331 |
+
|
| 332 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 333 |
+
53 8.5291 ± 0.2160 0.17482 ± 0.00914 0.27509 ± 0.00676 18.469 ± 0.300 % 82.257 ± 0.329 %
|
| 334 |
+
|
| 335 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 336 |
+
54 8.5785 ± 0.2148 0.17257 ± 0.00900 0.27189 ± 0.00665 18.322 ± 0.297 % 82.367 ± 0.325 %
|
| 337 |
+
|
| 338 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 339 |
+
55 8.6183 ± 0.2134 0.17042 ± 0.00885 0.26850 ± 0.00653 18.186 ± 0.294 % 82.460 ± 0.321 %
|
| 340 |
+
|
| 341 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 342 |
+
56 8.6509 ± 0.2126 0.16862 ± 0.00874 0.26645 ± 0.00645 18.075 ± 0.291 % 82.521 ± 0.318 %
|
| 343 |
+
|
| 344 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 345 |
+
57 8.6574 ± 0.2108 0.16914 ± 0.00865 0.26670 ± 0.00637 18.072 ± 0.288 % 82.456 ± 0.315 %
|
| 346 |
+
|
| 347 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 348 |
+
58 8.6691 ± 0.2093 0.16742 ± 0.00856 0.26694 ± 0.00636 18.021 ± 0.285 % 82.495 ± 0.312 %
|
| 349 |
+
|
| 350 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 351 |
+
59 8.5966 ± 0.2054 0.16443 ± 0.00844 0.26364 ± 0.00626 17.887 ± 0.283 % 82.612 ± 0.309 %
|
| 352 |
+
|
| 353 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 354 |
+
60 8.5875 ± 0.2034 0.16260 ± 0.00832 0.26138 ± 0.00616 17.804 ± 0.280 % 82.627 ± 0.306 %
|
| 355 |
+
|
| 356 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 357 |
+
61 8.6188 ± 0.2025 0.16046 ± 0.00821 0.25886 ± 0.00606 17.686 ± 0.277 % 82.674 ± 0.303 %
|
| 358 |
+
|
| 359 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 360 |
+
62 8.5523 ± 0.1994 0.15723 ± 0.00812 0.25725 ± 0.00598 17.632 ± 0.274 % 82.751 ± 0.300 %
|
| 361 |
+
|
| 362 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 363 |
+
63 8.5922 ± 0.1994 0.15631 ± 0.00804 0.25574 ± 0.00590 17.542 ± 0.272 % 82.764 ± 0.298 %
|
| 364 |
+
|
| 365 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 366 |
+
64 8.5506 ± 0.1964 0.15450 ± 0.00794 0.25381 ± 0.00582 17.443 ± 0.270 % 82.800 ± 0.295 %
|
| 367 |
+
|
| 368 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 369 |
+
65 8.5116 ± 0.1938 0.15166 ± 0.00784 0.25242 ± 0.00574 17.354 ± 0.267 % 82.890 ± 0.293 %
|
| 370 |
+
|
| 371 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 372 |
+
66 8.5348 ± 0.1929 0.14909 ± 0.00775 0.25105 ± 0.00567 17.276 ± 0.265 % 82.852 ± 0.291 %
|
| 373 |
+
|
| 374 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 375 |
+
67 8.5341 ± 0.1917 0.14726 ± 0.00766 0.24910 ± 0.00559 17.175 ± 0.263 % 82.938 ± 0.288 %
|
| 376 |
+
|
| 377 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 378 |
+
68 8.4694 ± 0.1886 0.14609 ± 0.00758 0.24674 ± 0.00552 17.074 ± 0.260 % 83.057 ± 0.285 %
|
| 379 |
+
|
| 380 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 381 |
+
69 8.5017 ± 0.1880 0.14538 ± 0.00750 0.24499 ± 0.00544 16.976 ± 0.258 % 83.120 ± 0.282 %
|
| 382 |
+
|
| 383 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 384 |
+
70 8.4489 ± 0.1851 0.14337 ± 0.00742 0.24322 ± 0.00537 16.903 ± 0.256 % 83.193 ± 0.280 %
|
| 385 |
+
|
| 386 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 387 |
+
71 8.4248 ± 0.1834 0.14301 ± 0.00733 0.24123 ± 0.00531 16.830 ± 0.254 % 83.297 ± 0.277 %
|
| 388 |
+
|
| 389 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 390 |
+
72 8.4391 ± 0.1827 0.14201 ± 0.00727 0.23967 ± 0.00524 16.751 ± 0.252 % 83.306 ± 0.275 %
|
| 391 |
+
|
| 392 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 393 |
+
73 8.4302 ± 0.1812 0.14029 ± 0.00718 0.23790 ± 0.00517 16.675 ± 0.250 % 83.352 ± 0.273 %
|
| 394 |
+
|
| 395 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 396 |
+
74 8.4021 ± 0.1791 0.13861 ± 0.00711 0.23631 ± 0.00511 16.610 ± 0.248 % 83.392 ± 0.271 %
|
| 397 |
+
|
| 398 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 399 |
+
75 8.3833 ± 0.1774 0.13684 ± 0.00704 0.23523 ± 0.00504 16.545 ± 0.246 % 83.461 ± 0.269 %
|
| 400 |
+
|
| 401 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 402 |
+
76 8.4480 ± 0.1779 0.13570 ± 0.00697 0.23409 ± 0.00499 16.466 ± 0.244 % 83.540 ± 0.266 %
|
| 403 |
+
|
| 404 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 405 |
+
77 8.4310 ± 0.1763 0.13416 ± 0.00690 0.23238 ± 0.00493 16.389 ± 0.242 % 83.606 ± 0.264 %
|
| 406 |
+
|
| 407 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 408 |
+
78 8.4395 ± 0.1755 0.13322 ± 0.00684 0.23131 ± 0.00487 16.323 ± 0.241 % 83.620 ± 0.262 %
|
| 409 |
+
|
| 410 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 411 |
+
79 8.4375 ± 0.1744 0.13152 ± 0.00677 0.22986 ± 0.00481 16.248 ± 0.239 % 83.619 ± 0.261 %
|
| 412 |
+
|
| 413 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 414 |
+
80 8.4238 ± 0.1735 0.13068 ± 0.00673 0.22920 ± 0.00476 16.199 ± 0.237 % 83.642 ± 0.259 %
|
| 415 |
+
|
| 416 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 417 |
+
81 8.3844 ± 0.1716 0.12975 ± 0.00667 0.22784 ± 0.00471 16.139 ± 0.236 % 83.718 ± 0.257 %
|
| 418 |
+
|
| 419 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 420 |
+
82 8.3577 ± 0.1698 0.12901 ± 0.00661 0.22628 ± 0.00466 16.068 ± 0.234 % 83.797 ± 0.255 %
|
| 421 |
+
|
| 422 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 423 |
+
83 8.3795 ± 0.1689 0.12754 ± 0.00654 0.22468 ± 0.00460 15.999 ± 0.232 % 83.780 ± 0.253 %
|
| 424 |
+
|
| 425 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 426 |
+
84 8.3903 ± 0.1678 0.12629 ± 0.00648 0.22346 ± 0.00455 15.944 ± 0.231 % 83.758 ± 0.252 %
|
| 427 |
+
|
| 428 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 429 |
+
85 8.3761 ± 0.1663 0.12562 ± 0.00641 0.22183 ± 0.00450 15.873 ± 0.229 % 83.829 ± 0.250 %
|
| 430 |
+
|
| 431 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 432 |
+
86 8.2883 ± 0.1630 0.12429 ± 0.00635 0.22014 ± 0.00445 15.810 ± 0.227 % 83.894 ± 0.248 %
|
| 433 |
+
|
| 434 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 435 |
+
87 8.2106 ± 0.1600 0.12306 ± 0.00628 0.21822 ± 0.00440 15.741 ± 0.226 % 83.958 ± 0.246 %
|
| 436 |
+
|
| 437 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 438 |
+
88 8.1296 ± 0.1570 0.12209 ± 0.00622 0.21656 ± 0.00436 15.686 ± 0.224 % 84.015 ± 0.245 %
|
| 439 |
+
|
| 440 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 441 |
+
89 8.0343 ± 0.1538 0.12070 ± 0.00615 0.21471 ± 0.00431 15.622 ± 0.223 % 84.089 ± 0.243 %
|
| 442 |
+
|
| 443 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 444 |
+
90 7.9600 ± 0.1510 0.11944 ± 0.00609 0.21282 ± 0.00426 15.550 ± 0.221 % 84.157 ± 0.241 %
|
| 445 |
+
|
| 446 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 447 |
+
91 7.8935 ± 0.1486 0.11822 ± 0.00603 0.21108 ± 0.00422 15.483 ± 0.220 % 84.240 ± 0.239 %
|
| 448 |
+
|
| 449 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 450 |
+
92 7.8172 ± 0.1458 0.11658 ± 0.00598 0.20958 ± 0.00418 15.420 ± 0.219 % 84.241 ± 0.238 %
|
| 451 |
+
|
| 452 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 453 |
+
93 7.8185 ± 0.1452 0.11570 ± 0.00595 0.20945 ± 0.00415 15.388 ± 0.217 % 84.238 ± 0.237 %
|
| 454 |
+
|
| 455 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 456 |
+
94 7.8436 ± 0.1449 0.11464 ± 0.00590 0.20823 ± 0.00411 15.328 ± 0.216 % 84.259 ± 0.235 %
|
| 457 |
+
|
| 458 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 459 |
+
95 7.9542 ± 0.1465 0.11403 ± 0.00586 0.20747 ± 0.00407 15.280 ± 0.214 % 84.256 ± 0.234 %
|
| 460 |
+
|
| 461 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 462 |
+
96 8.0501 ± 0.1476 0.11330 ± 0.00581 0.20663 ± 0.00403 15.219 ± 0.213 % 84.228 ± 0.233 %
|
| 463 |
+
|
| 464 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 465 |
+
97 8.1347 ± 0.1485 0.11275 ± 0.00576 0.20554 ± 0.00399 15.154 ± 0.212 % 84.172 ± 0.232 %
|
| 466 |
+
|
| 467 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 468 |
+
98 8.2790 ± 0.1510 0.11182 ± 0.00572 0.20430 ± 0.00395 15.086 ± 0.211 % 84.134 ± 0.231 %
|
| 469 |
+
|
| 470 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 471 |
+
99 8.4031 ± 0.1528 0.11146 ± 0.00567 0.20316 ± 0.00391 15.022 ± 0.209 % 84.124 ± 0.230 %
|
| 472 |
+
|
| 473 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 474 |
+
100 8.4300 ± 0.1525 0.11038 ± 0.00563 0.20304 ± 0.00391 14.981 ± 0.208 % 84.141 ± 0.229 %
|
| 475 |
+
|
| 476 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 477 |
+
101 8.4644 ± 0.1525 0.10918 ± 0.00559 0.20257 ± 0.00389 14.942 ± 0.207 % 84.151 ± 0.228 %
|
| 478 |
+
|
| 479 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 480 |
+
102 8.5669 ± 0.1544 0.11161 ± 0.00562 0.20552 ± 0.00393 14.994 ± 0.207 % 84.087 ± 0.227 %
|
| 481 |
+
|
| 482 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 483 |
+
103 8.5368 ± 0.1533 0.11175 ± 0.00558 0.20523 ± 0.00391 15.022 ± 0.206 % 84.081 ± 0.226 %
|
| 484 |
+
|
| 485 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 486 |
+
104 8.4745 ± 0.1512 0.11193 ± 0.00557 0.20586 ± 0.00390 15.104 ± 0.205 % 84.091 ± 0.225 %
|
| 487 |
+
|
| 488 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 489 |
+
105 8.3561 ± 0.1481 0.11252 ± 0.00555 0.20640 ± 0.00389 15.201 ± 0.205 % 84.116 ± 0.223 %
|
| 490 |
+
|
| 491 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 492 |
+
106 8.2233 ± 0.1447 0.11359 ± 0.00553 0.20688 ± 0.00388 15.326 ± 0.205 % 84.188 ± 0.222 %
|
| 493 |
+
|
| 494 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 495 |
+
107 8.2782 ± 0.1450 0.11269 ± 0.00548 0.20553 ± 0.00385 15.268 ± 0.204 % 84.171 ± 0.221 %
|
| 496 |
+
|
| 497 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 498 |
+
108 8.2828 ± 0.1443 0.11175 ± 0.00544 0.20462 ± 0.00382 15.218 ± 0.203 % 84.176 ± 0.220 %
|
| 499 |
+
|
| 500 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 501 |
+
109 8.3118 ± 0.1443 0.11189 ± 0.00541 0.20433 ± 0.00379 15.212 ± 0.202 % 84.159 ± 0.219 %
|
| 502 |
+
|
| 503 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 504 |
+
110 8.3403 ± 0.1441 0.11076 ± 0.00538 0.20359 ± 0.00376 15.178 ± 0.201 % 84.139 ± 0.218 %
|
| 505 |
+
|
| 506 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 507 |
+
111 8.3843 ± 0.1441 0.10992 ± 0.00534 0.20278 ± 0.00373 15.121 ± 0.200 % 84.116 ± 0.217 %
|
| 508 |
+
|
| 509 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 510 |
+
112 8.3867 ± 0.1434 0.10882 ± 0.00530 0.20170 ± 0.00370 15.073 ± 0.199 % 84.142 ± 0.216 %
|
| 511 |
+
|
| 512 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 513 |
+
113 8.3978 ± 0.1428 0.10848 ± 0.00526 0.20055 ± 0.00367 15.018 ± 0.198 % 84.137 ± 0.215 %
|
| 514 |
+
|
| 515 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 516 |
+
114 8.4165 ± 0.1426 0.10839 ± 0.00523 0.19974 ± 0.00364 14.973 ± 0.197 % 84.152 ± 0.214 %
|
| 517 |
+
|
| 518 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 519 |
+
115 8.4054 ± 0.1417 0.10946 ± 0.00522 0.20032 ± 0.00363 15.032 ± 0.196 % 84.143 ± 0.213 %
|
| 520 |
+
|
| 521 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 522 |
+
116 8.4659 ± 0.1425 0.11645 ± 0.00537 0.20911 ± 0.00382 15.332 ± 0.197 % 83.938 ± 0.213 %
|
| 523 |
+
|
| 524 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 525 |
+
117 8.4258 ± 0.1410 0.12445 ± 0.00545 0.21664 ± 0.00390 15.793 ± 0.198 % 83.787 ± 0.213 %
|
| 526 |
+
|
| 527 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 528 |
+
118 8.3828 ± 0.1395 0.13186 ± 0.00552 0.22317 ± 0.00396 16.165 ± 0.198 % 83.662 ± 0.213 %
|
| 529 |
+
|
| 530 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 531 |
+
119 8.3386 ± 0.1380 0.13955 ± 0.00558 0.22985 ± 0.00402 16.557 ± 0.198 % 83.543 ± 0.213 %
|
| 532 |
+
|
| 533 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 534 |
+
120 8.3145 ± 0.1368 0.14799 ± 0.00565 0.23779 ± 0.00409 17.007 ± 0.198 % 83.333 ± 0.213 %
|
| 535 |
+
|
| 536 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 537 |
+
121 8.2849 ± 0.1357 0.15651 ± 0.00573 0.24410 ± 0.00415 17.343 ± 0.198 % 83.244 ± 0.213 %
|
| 538 |
+
|
| 539 |
+
====== Perplexity statistics ======
|
| 540 |
+
Mean PPL(Q) : 8.284882 ± 0.135705
|
| 541 |
+
Mean PPL(base) : 7.084621 ± 0.114399
|
| 542 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 93.80%
|
| 543 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.156506 ± 0.005730
|
| 544 |
+
Mean PPL(Q)/PPL(base) : 1.169418 ± 0.006700
|
| 545 |
+
Mean PPL(Q)-PPL(base) : 1.200261 ± 0.048762
|
| 546 |
+
|
| 547 |
+
====== KL divergence statistics ======
|
| 548 |
+
Mean KLD: 0.244096 ± 0.004148
|
| 549 |
+
Maximum KLD: 16.568100
|
| 550 |
+
99.9% KLD: 7.662232
|
| 551 |
+
99.0% KLD: 3.809325
|
| 552 |
+
95.0% KLD: 1.171860
|
| 553 |
+
90.0% KLD: 0.451059
|
| 554 |
+
Median KLD: 0.051302
|
| 555 |
+
10.0% KLD: 0.000251
|
| 556 |
+
5.0% KLD: 0.000037
|
| 557 |
+
1.0% KLD: 0.000002
|
| 558 |
+
0.1% KLD: -0.000001
|
| 559 |
+
Minimum KLD: -0.000004
|
| 560 |
+
|
| 561 |
+
====== Token probability statistics ======
|
| 562 |
+
Mean Δp: -3.782 ± 0.096 %
|
| 563 |
+
Maximum Δp: 99.859%
|
| 564 |
+
99.9% Δp: 67.832%
|
| 565 |
+
99.0% Δp: 28.277%
|
| 566 |
+
95.0% Δp: 10.539%
|
| 567 |
+
90.0% Δp: 4.938%
|
| 568 |
+
75.0% Δp: 0.357%
|
| 569 |
+
Median Δp: -0.027%
|
| 570 |
+
25.0% Δp: -3.034%
|
| 571 |
+
10.0% Δp: -14.168%
|
| 572 |
+
5.0% Δp: -32.051%
|
| 573 |
+
1.0% Δp: -87.072%
|
| 574 |
+
0.1% Δp: -98.263%
|
| 575 |
+
Minimum Δp: -99.944%
|
| 576 |
+
RMS Δp : 17.343 ± 0.198 %
|
| 577 |
+
Same top p: 83.244 ± 0.213 %
|
| 578 |
+
|
| 579 |
+
llama_perf_context_print: load time = 30656.10 ms
|
| 580 |
+
llama_perf_context_print: prompt eval time = 396129.95 ms / 61952 tokens ( 6.39 ms per token, 156.39 tokens per second)
|
| 581 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 582 |
+
llama_perf_context_print: total time = 407916.05 ms / 61953 tokens
|
| 583 |
+
llama_perf_context_print: graphs reused = 120
|
| 584 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 585 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 16425 + ( 7383 = 6448 + 64 + 870) + 326 |
|
| 586 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 17559 + ( 6217 = 5761 + 60 + 396) + 357 |
|
| 587 |
+
llama_memory_breakdown_print: | - Host | 80037 = 80024 + 0 + 13 |
|
| 588 |
+
```
|
kld_data/aes_sedai/MiniMax-M2.5-IQ4_XS.md
ADDED
|
@@ -0,0 +1,588 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-IQ4_XS (aes_sedai)
|
| 2 |
+
|
| 3 |
+
101.10 GiB (3.80 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
./build/bin/llama-perplexity --n-gpu-layers 999 --threads 48 --override-tensor "blk\.(0|1|2|3)\.ffn_.*=CUDA0" --override-tensor "blk\.(4|5|6)\.ffn_.*=CUDA1" --override-tensor "blk\..*_exps\.=CPU" --flash-attn on --file "/mnt/srv/host/resources/KLD/calibration_datav3.txt" --kl-divergence-base "/mnt/srv/snowdrift/ref-logits-MiniMax-M2.5-BF16-calibration-datav3.bin" --kl-divergence --model "/mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/aes_sedai/MiniMax-M2.5-IQ4_XS.gguf"
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
DEPRECATED: argument '--override-tensor' specified multiple times, use comma-separated values instead (only last value will be used)
|
| 11 |
+
DEPRECATED: argument '--override-tensor' specified multiple times, use comma-separated values instead (only last value will be used)
|
| 12 |
+
build: 8038 (05a6f0e89) with GNU 14.2.1 for Linux x86_64
|
| 13 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 14 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 15 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 9102 used, 14769 free vs. target of 1024
|
| 16 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 7324 used, 16547 free vs. target of 1024
|
| 17 |
+
llama_params_fit_impl: projected to use 16426 MiB of device memory vs. 47743 MiB of free device memory
|
| 18 |
+
llama_params_fit_impl: targets for free memory can be met on all devices, no changes needed
|
| 19 |
+
llama_params_fit: successfully fit params to free device memory
|
| 20 |
+
llama_params_fit: fitting params to free memory took 0.41 seconds
|
| 21 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 22 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 23 |
+
llama_model_loader: loaded meta data with 41 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/aes_sedai/MiniMax-M2.5-IQ4_XS.gguf (version GGUF V3 (latest))
|
| 24 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 25 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 26 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 27 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 28 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 29 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 30 |
+
llama_model_loader: - kv 5: general.name str = MiniMax M2.5
|
| 31 |
+
llama_model_loader: - kv 6: general.size_label str = 256x4.9B
|
| 32 |
+
llama_model_loader: - kv 7: general.license str = other
|
| 33 |
+
llama_model_loader: - kv 8: general.license.name str = modified-mit
|
| 34 |
+
llama_model_loader: - kv 9: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 35 |
+
llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"]
|
| 36 |
+
llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62
|
| 37 |
+
llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608
|
| 38 |
+
llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072
|
| 39 |
+
llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536
|
| 40 |
+
llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48
|
| 41 |
+
llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8
|
| 42 |
+
llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 43 |
+
llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 44 |
+
llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256
|
| 45 |
+
llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8
|
| 46 |
+
llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2
|
| 47 |
+
llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128
|
| 48 |
+
llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128
|
| 49 |
+
llama_model_loader: - kv 24: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 50 |
+
llama_model_loader: - kv 25: minimax-m2.rope.dimension_count u32 = 64
|
| 51 |
+
llama_model_loader: - kv 26: tokenizer.ggml.model str = gpt2
|
| 52 |
+
llama_model_loader: - kv 27: tokenizer.ggml.pre str = minimax-m2
|
| 53 |
+
llama_model_loader: - kv 28: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 54 |
+
llama_model_loader: - kv 29: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 55 |
+
llama_model_loader: - kv 30: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 56 |
+
llama_model_loader: - kv 31: tokenizer.ggml.bos_token_id u32 = 200034
|
| 57 |
+
llama_model_loader: - kv 32: tokenizer.ggml.eos_token_id u32 = 200020
|
| 58 |
+
llama_model_loader: - kv 33: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 59 |
+
llama_model_loader: - kv 34: tokenizer.chat_template str = {# ----------‑‑‑ special token ...
|
| 60 |
+
llama_model_loader: - kv 35: general.quantization_version u32 = 2
|
| 61 |
+
llama_model_loader: - kv 36: general.file_type u32 = 7
|
| 62 |
+
llama_model_loader: - kv 37: quantize.imatrix.file str = /mnt/srv/snowdrift/fp16/MiniMax-M2.5/...
|
| 63 |
+
llama_model_loader: - kv 38: quantize.imatrix.dataset str = /mnt/srv/host/resources/KLD/calibrati...
|
| 64 |
+
llama_model_loader: - kv 39: quantize.imatrix.entries_count u32 = 496
|
| 65 |
+
llama_model_loader: - kv 40: quantize.imatrix.chunks_count u32 = 49
|
| 66 |
+
llama_model_loader: - type f32: 373 tensors
|
| 67 |
+
llama_model_loader: - type q8_0: 250 tensors
|
| 68 |
+
llama_model_loader: - type iq3_s: 124 tensors
|
| 69 |
+
llama_model_loader: - type iq4_xs: 62 tensors
|
| 70 |
+
print_info: file format = GGUF V3 (latest)
|
| 71 |
+
print_info: file type = Q8_0
|
| 72 |
+
print_info: file size = 101.10 GiB (3.80 BPW)
|
| 73 |
+
load: 0 unused tokens
|
| 74 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 75 |
+
load: printing all EOG tokens:
|
| 76 |
+
load: - 200004 ('<fim_pad>')
|
| 77 |
+
load: - 200005 ('<reponame>')
|
| 78 |
+
load: - 200020 ('[e~[')
|
| 79 |
+
load: special tokens cache size = 54
|
| 80 |
+
load: token to piece cache size = 1.3355 MB
|
| 81 |
+
print_info: arch = minimax-m2
|
| 82 |
+
print_info: vocab_only = 0
|
| 83 |
+
print_info: no_alloc = 0
|
| 84 |
+
print_info: n_ctx_train = 196608
|
| 85 |
+
print_info: n_embd = 3072
|
| 86 |
+
print_info: n_embd_inp = 3072
|
| 87 |
+
print_info: n_layer = 62
|
| 88 |
+
print_info: n_head = 48
|
| 89 |
+
print_info: n_head_kv = 8
|
| 90 |
+
print_info: n_rot = 64
|
| 91 |
+
print_info: n_swa = 0
|
| 92 |
+
print_info: is_swa_any = 0
|
| 93 |
+
print_info: n_embd_head_k = 128
|
| 94 |
+
print_info: n_embd_head_v = 128
|
| 95 |
+
print_info: n_gqa = 6
|
| 96 |
+
print_info: n_embd_k_gqa = 1024
|
| 97 |
+
print_info: n_embd_v_gqa = 1024
|
| 98 |
+
print_info: f_norm_eps = 0.0e+00
|
| 99 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 100 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 101 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 102 |
+
print_info: f_logit_scale = 0.0e+00
|
| 103 |
+
print_info: f_attn_scale = 0.0e+00
|
| 104 |
+
print_info: n_ff = 1536
|
| 105 |
+
print_info: n_expert = 256
|
| 106 |
+
print_info: n_expert_used = 8
|
| 107 |
+
print_info: n_expert_groups = 0
|
| 108 |
+
print_info: n_group_used = 0
|
| 109 |
+
print_info: causal attn = 1
|
| 110 |
+
print_info: pooling type = 0
|
| 111 |
+
print_info: rope type = 2
|
| 112 |
+
print_info: rope scaling = linear
|
| 113 |
+
print_info: freq_base_train = 5000000.0
|
| 114 |
+
print_info: freq_scale_train = 1
|
| 115 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 116 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 117 |
+
print_info: rope_finetuned = unknown
|
| 118 |
+
print_info: model type = 230B.A10B
|
| 119 |
+
print_info: model params = 228.69 B
|
| 120 |
+
print_info: general.name = MiniMax M2.5
|
| 121 |
+
print_info: vocab type = BPE
|
| 122 |
+
print_info: n_vocab = 200064
|
| 123 |
+
print_info: n_merges = 199744
|
| 124 |
+
print_info: BOS token = 200034 ']~!b['
|
| 125 |
+
print_info: EOS token = 200020 '[e~['
|
| 126 |
+
print_info: UNK token = 200021 ']!d~['
|
| 127 |
+
print_info: LF token = 10 'Ċ'
|
| 128 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 129 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 130 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 131 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 132 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 133 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 134 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 135 |
+
print_info: EOG token = 200020 '[e~['
|
| 136 |
+
print_info: max token length = 256
|
| 137 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 138 |
+
load_tensors: offloading output layer to GPU
|
| 139 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 140 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 141 |
+
load_tensors: CPU_Mapped model buffer size = 102902.72 MiB
|
| 142 |
+
load_tensors: CUDA0 model buffer size = 7924.62 MiB
|
| 143 |
+
load_tensors: CUDA1 model buffer size = 6868.11 MiB
|
| 144 |
+
....................................................................................................
|
| 145 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 146 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 147 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 148 |
+
llama_context: constructing llama_context
|
| 149 |
+
llama_context: n_seq_max = 1
|
| 150 |
+
llama_context: n_ctx = 512
|
| 151 |
+
llama_context: n_ctx_seq = 512
|
| 152 |
+
llama_context: n_batch = 512
|
| 153 |
+
llama_context: n_ubatch = 512
|
| 154 |
+
llama_context: causal_attn = 1
|
| 155 |
+
llama_context: flash_attn = enabled
|
| 156 |
+
llama_context: kv_unified = false
|
| 157 |
+
llama_context: freq_base = 5000000.0
|
| 158 |
+
llama_context: freq_scale = 1
|
| 159 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 160 |
+
llama_context: CUDA_Host output buffer size = 0.76 MiB
|
| 161 |
+
llama_kv_cache: CUDA0 KV buffer size = 64.00 MiB
|
| 162 |
+
llama_kv_cache: CUDA1 KV buffer size = 60.00 MiB
|
| 163 |
+
llama_kv_cache: size = 124.00 MiB ( 512 cells, 62 layers, 1/1 seqs), K (f16): 62.00 MiB, V (f16): 62.00 MiB
|
| 164 |
+
sched_reserve: reserving ...
|
| 165 |
+
sched_reserve: CUDA0 compute buffer size = 1113.50 MiB
|
| 166 |
+
sched_reserve: CUDA1 compute buffer size = 396.75 MiB
|
| 167 |
+
sched_reserve: CUDA_Host compute buffer size = 13.01 MiB
|
| 168 |
+
sched_reserve: graph nodes = 3975
|
| 169 |
+
sched_reserve: graph splits = 210 (with bs=512), 124 (with bs=1)
|
| 170 |
+
sched_reserve: reserve took 7.99 ms, sched copies = 1
|
| 171 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 172 |
+
|
| 173 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 174 |
+
kl_divergence: 4.25 seconds per pass - ETA 8.57 minutes
|
| 175 |
+
|
| 176 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 177 |
+
1 6.7372 ± 1.3181 -0.00189 ± 0.02157 0.02458 ± 0.00240 4.711 ± 0.511 % 94.510 ± 1.429 %
|
| 178 |
+
|
| 179 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 180 |
+
2 4.7833 ± 0.5792 -0.00539 ± 0.01533 0.01987 ± 0.00155 3.930 ± 0.342 % 95.098 ± 0.957 %
|
| 181 |
+
|
| 182 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 183 |
+
3 4.4822 ± 0.4439 0.00819 ± 0.01383 0.02610 ± 0.00268 5.982 ± 0.756 % 94.641 ± 0.815 %
|
| 184 |
+
|
| 185 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 186 |
+
4 5.0368 ± 0.4433 0.00212 ± 0.01170 0.02920 ± 0.00230 5.691 ± 0.606 % 93.824 ± 0.754 %
|
| 187 |
+
|
| 188 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 189 |
+
5 4.7770 ± 0.3727 -0.00855 ± 0.01143 0.03466 ± 0.00297 6.737 ± 0.670 % 93.333 ± 0.699 %
|
| 190 |
+
|
| 191 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 192 |
+
6 5.8295 ± 0.4459 0.00175 ± 0.01085 0.04033 ± 0.00276 6.660 ± 0.586 % 92.288 ± 0.682 %
|
| 193 |
+
|
| 194 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 195 |
+
7 5.4983 ± 0.3777 0.00663 ± 0.01070 0.04915 ± 0.00315 7.690 ± 0.543 % 91.373 ± 0.665 %
|
| 196 |
+
|
| 197 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 198 |
+
8 6.2224 ± 0.4062 0.01090 ± 0.00973 0.04858 ± 0.00282 7.350 ± 0.499 % 91.225 ± 0.627 %
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
9 6.1246 ± 0.3743 0.00973 ± 0.00902 0.04620 ± 0.00255 7.142 ± 0.466 % 91.198 ± 0.592 %
|
| 202 |
+
|
| 203 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 204 |
+
10 5.6342 ± 0.3219 0.01326 ± 0.00845 0.04447 ± 0.00233 7.002 ± 0.429 % 91.373 ± 0.556 %
|
| 205 |
+
|
| 206 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 207 |
+
11 6.1796 ± 0.3428 0.01400 ± 0.00797 0.04399 ± 0.00213 6.807 ± 0.402 % 91.087 ± 0.538 %
|
| 208 |
+
|
| 209 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 210 |
+
12 6.8432 ± 0.3686 0.01259 ± 0.00761 0.04425 ± 0.00199 6.619 ± 0.380 % 90.980 ± 0.518 %
|
| 211 |
+
|
| 212 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 213 |
+
13 7.1110 ± 0.3649 0.01297 ± 0.00716 0.04341 ± 0.00187 6.456 ± 0.360 % 91.011 ± 0.497 %
|
| 214 |
+
|
| 215 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 216 |
+
14 7.6915 ± 0.3854 0.01430 ± 0.00686 0.04293 ± 0.00175 6.342 ± 0.342 % 91.036 ± 0.478 %
|
| 217 |
+
|
| 218 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 219 |
+
15 8.0652 ± 0.3910 0.01523 ± 0.00650 0.04227 ± 0.00165 6.231 ± 0.325 % 90.797 ± 0.467 %
|
| 220 |
+
|
| 221 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 222 |
+
16 8.3308 ± 0.3905 0.01082 ± 0.00642 0.04677 ± 0.00359 6.387 ± 0.332 % 90.760 ± 0.453 %
|
| 223 |
+
|
| 224 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 225 |
+
17 8.5500 ± 0.3910 0.01033 ± 0.00626 0.04822 ± 0.00344 6.312 ± 0.321 % 90.727 ± 0.441 %
|
| 226 |
+
|
| 227 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 228 |
+
18 8.0708 ± 0.3567 0.01114 ± 0.00616 0.04951 ± 0.00333 6.458 ± 0.322 % 90.763 ± 0.427 %
|
| 229 |
+
|
| 230 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 231 |
+
19 8.1897 ± 0.3520 0.00928 ± 0.00596 0.04829 ± 0.00316 6.405 ± 0.308 % 90.795 ± 0.415 %
|
| 232 |
+
|
| 233 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 234 |
+
20 8.2268 ± 0.3448 0.00713 ± 0.00592 0.04987 ± 0.00304 6.425 ± 0.295 % 90.765 ± 0.405 %
|
| 235 |
+
|
| 236 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 237 |
+
21 8.2105 ± 0.3354 0.00670 ± 0.00574 0.05052 ± 0.00296 6.412 ± 0.283 % 90.719 ± 0.397 %
|
| 238 |
+
|
| 239 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 240 |
+
22 8.5841 ± 0.3467 0.00665 ± 0.00568 0.05221 ± 0.00286 6.427 ± 0.271 % 90.517 ± 0.391 %
|
| 241 |
+
|
| 242 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 243 |
+
23 8.6195 ± 0.3418 0.00798 ± 0.00595 0.05605 ± 0.00301 6.839 ± 0.291 % 90.384 ± 0.385 %
|
| 244 |
+
|
| 245 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 246 |
+
24 9.0360 ± 0.3528 0.01044 ± 0.00576 0.05548 ± 0.00289 6.762 ± 0.282 % 90.261 ± 0.379 %
|
| 247 |
+
|
| 248 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 249 |
+
25 9.0632 ± 0.3478 0.01284 ± 0.00574 0.05699 ± 0.00285 6.923 ± 0.280 % 90.196 ± 0.372 %
|
| 250 |
+
|
| 251 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 252 |
+
26 8.6137 ± 0.3211 0.02713 ± 0.00618 0.06904 ± 0.00330 8.401 ± 0.316 % 89.940 ± 0.369 %
|
| 253 |
+
|
| 254 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 255 |
+
27 8.2454 ± 0.2995 0.03629 ± 0.00649 0.07829 ± 0.00353 9.272 ± 0.315 % 89.789 ± 0.365 %
|
| 256 |
+
|
| 257 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 258 |
+
28 8.3463 ± 0.2982 0.03435 ± 0.00635 0.07838 ± 0.00342 9.256 ± 0.308 % 89.762 ± 0.359 %
|
| 259 |
+
|
| 260 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 261 |
+
29 8.2718 ± 0.2907 0.03327 ± 0.00622 0.07737 ± 0.00331 9.162 ± 0.301 % 89.899 ± 0.350 %
|
| 262 |
+
|
| 263 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 264 |
+
30 7.7457 ± 0.2654 0.03422 ± 0.00610 0.07573 ± 0.00323 9.106 ± 0.296 % 90.183 ± 0.340 %
|
| 265 |
+
|
| 266 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 267 |
+
31 7.2897 ± 0.2435 0.03405 ± 0.00593 0.07377 ± 0.00313 8.977 ± 0.291 % 90.474 ± 0.330 %
|
| 268 |
+
|
| 269 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 270 |
+
32 7.0992 ± 0.2315 0.03264 �� 0.00579 0.07225 ± 0.00304 8.899 ± 0.284 % 90.564 ± 0.324 %
|
| 271 |
+
|
| 272 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 273 |
+
33 6.9447 ± 0.2213 0.03140 ± 0.00565 0.07102 ± 0.00295 8.848 ± 0.278 % 90.624 ± 0.318 %
|
| 274 |
+
|
| 275 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 276 |
+
34 7.1483 ± 0.2259 0.03334 ± 0.00563 0.07229 ± 0.00291 8.820 ± 0.273 % 90.450 ± 0.316 %
|
| 277 |
+
|
| 278 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 279 |
+
35 7.2490 ± 0.2276 0.03412 ± 0.00558 0.07359 ± 0.00284 8.835 ± 0.267 % 90.218 ± 0.314 %
|
| 280 |
+
|
| 281 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 282 |
+
36 7.3006 ± 0.2269 0.03330 ± 0.00548 0.07284 ± 0.00277 8.751 ± 0.262 % 90.283 ± 0.309 %
|
| 283 |
+
|
| 284 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 285 |
+
37 7.3424 ± 0.2254 0.03694 ± 0.00553 0.07574 ± 0.00287 9.017 ± 0.266 % 90.196 ± 0.306 %
|
| 286 |
+
|
| 287 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 288 |
+
38 7.5511 ± 0.2298 0.03614 ± 0.00541 0.07491 ± 0.00280 8.931 ± 0.262 % 90.165 ± 0.303 %
|
| 289 |
+
|
| 290 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 291 |
+
39 7.5266 ± 0.2257 0.03949 ± 0.00542 0.07817 ± 0.00288 9.263 ± 0.269 % 90.035 ± 0.300 %
|
| 292 |
+
|
| 293 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 294 |
+
40 7.3538 ± 0.2162 0.04830 ± 0.00562 0.08758 ± 0.00307 10.137 ± 0.273 % 89.735 ± 0.301 %
|
| 295 |
+
|
| 296 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 297 |
+
41 7.1858 ± 0.2073 0.05680 ± 0.00582 0.09715 ± 0.00331 11.068 ± 0.284 % 89.479 ± 0.300 %
|
| 298 |
+
|
| 299 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 300 |
+
42 7.0267 ± 0.1992 0.06501 ± 0.00599 0.10471 ± 0.00345 11.736 ± 0.285 % 89.290 ± 0.299 %
|
| 301 |
+
|
| 302 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 303 |
+
43 6.8699 ± 0.1915 0.07350 ± 0.00606 0.10978 ± 0.00352 12.110 ± 0.281 % 89.129 ± 0.297 %
|
| 304 |
+
|
| 305 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 306 |
+
44 6.8076 ± 0.1867 0.07186 ± 0.00595 0.10795 ± 0.00344 12.000 ± 0.277 % 89.198 ± 0.293 %
|
| 307 |
+
|
| 308 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 309 |
+
45 6.9516 ± 0.1897 0.07170 ± 0.00587 0.10732 ± 0.00337 11.899 ± 0.274 % 89.072 ± 0.291 %
|
| 310 |
+
|
| 311 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 312 |
+
46 7.0903 ± 0.1916 0.07078 ± 0.00576 0.10631 ± 0.00330 11.797 ± 0.270 % 89.020 ± 0.289 %
|
| 313 |
+
|
| 314 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 315 |
+
47 7.2413 ± 0.1941 0.06935 ± 0.00565 0.10479 ± 0.00323 11.679 ± 0.267 % 89.045 ± 0.285 %
|
| 316 |
+
|
| 317 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 318 |
+
48 7.1144 ± 0.1876 0.06883 ± 0.00555 0.10319 ± 0.00317 11.588 ± 0.264 % 89.101 ± 0.282 %
|
| 319 |
+
|
| 320 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 321 |
+
49 7.2645 ± 0.1901 0.07104 ± 0.00565 0.10803 ± 0.00352 11.549 ± 0.260 % 88.916 ± 0.281 %
|
| 322 |
+
|
| 323 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 324 |
+
50 7.3846 ± 0.1926 0.07128 ± 0.00559 0.10852 ± 0.00352 11.522 ± 0.257 % 88.910 ± 0.278 %
|
| 325 |
+
|
| 326 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 327 |
+
51 7.4884 ± 0.1935 0.06955 ± 0.00550 0.10706 ± 0.00345 11.426 ± 0.255 % 88.935 ± 0.275 %
|
| 328 |
+
|
| 329 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 330 |
+
52 7.5559 ± 0.1932 0.07004 ± 0.00547 0.10750 ± 0.00339 11.426 ± 0.251 % 88.914 ± 0.273 %
|
| 331 |
+
|
| 332 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 333 |
+
53 7.6726 ± 0.1943 0.06899 ± 0.00538 0.10642 ± 0.00333 11.355 ± 0.248 % 88.916 ± 0.270 %
|
| 334 |
+
|
| 335 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 336 |
+
54 7.7223 ± 0.1934 0.06742 ± 0.00530 0.10504 ± 0.00327 11.263 ± 0.245 % 88.983 ± 0.267 %
|
| 337 |
+
|
| 338 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 339 |
+
55 7.7682 ± 0.1925 0.06656 ± 0.00521 0.10391 ± 0.00321 11.182 ± 0.243 % 88.984 ± 0.264 %
|
| 340 |
+
|
| 341 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 342 |
+
56 7.8110 ± 0.1920 0.06649 ± 0.00514 0.10326 ± 0.00316 11.111 ± 0.240 % 88.985 ± 0.262 %
|
| 343 |
+
|
| 344 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 345 |
+
57 7.8077 ± 0.1902 0.06584 ± 0.00509 0.10352 ± 0.00312 11.115 ± 0.238 % 88.958 ± 0.260 %
|
| 346 |
+
|
| 347 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 348 |
+
58 7.8344 ± 0.1894 0.06618 ± 0.00506 0.10503 ± 0.00335 11.094 ± 0.236 % 88.972 ± 0.258 %
|
| 349 |
+
|
| 350 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 351 |
+
59 7.7855 ± 0.1862 0.06533 ± 0.00498 0.10372 ± 0.00330 11.027 ± 0.234 % 89.046 ± 0.255 %
|
| 352 |
+
|
| 353 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 354 |
+
60 7.7866 ± 0.1847 0.06470 ± 0.00491 0.10282 ± 0.00325 10.977 ± 0.231 % 89.098 ± 0.252 %
|
| 355 |
+
|
| 356 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 357 |
+
61 7.8286 ± 0.1842 0.06431 ± 0.00484 0.10184 ± 0.00319 10.911 ± 0.229 % 89.110 ± 0.250 %
|
| 358 |
+
|
| 359 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 360 |
+
62 7.7891 ± 0.1820 0.06374 ± 0.00481 0.10094 ± 0.00314 10.857 ± 0.227 % 89.171 ± 0.247 %
|
| 361 |
+
|
| 362 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 363 |
+
63 7.8313 ± 0.1821 0.06359 ± 0.00478 0.10039 ± 0.00310 10.812 ± 0.225 % 89.175 ± 0.245 %
|
| 364 |
+
|
| 365 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 366 |
+
64 7.8064 ± 0.1797 0.06344 ± 0.00472 0.09967 ± 0.00305 10.772 ± 0.223 % 89.191 ± 0.243 %
|
| 367 |
+
|
| 368 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 369 |
+
65 7.7931 ± 0.1780 0.06347 ± 0.00467 0.09911 ± 0.00301 10.731 ± 0.221 % 89.189 ± 0.241 %
|
| 370 |
+
|
| 371 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 372 |
+
66 7.8316 ± 0.1777 0.06310 ± 0.00462 0.09854 ± 0.00297 10.677 ± 0.219 % 89.180 ± 0.239 %
|
| 373 |
+
|
| 374 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 375 |
+
67 7.8427 ± 0.1768 0.06277 ± 0.00457 0.09808 ± 0.00293 10.646 ± 0.217 % 89.213 ± 0.237 %
|
| 376 |
+
|
| 377 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 378 |
+
68 7.7907 ± 0.1742 0.06256 ± 0.00453 0.09715 ± 0.00289 10.598 ± 0.215 % 89.279 ± 0.235 %
|
| 379 |
+
|
| 380 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 381 |
+
69 7.8219 ± 0.1737 0.06204 ± 0.00449 0.09659 ± 0.00285 10.552 ± 0.213 % 89.287 ± 0.233 %
|
| 382 |
+
|
| 383 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 384 |
+
70 7.7833 ± 0.1713 0.06132 ± 0.00445 0.09612 ± 0.00281 10.508 ± 0.211 % 89.350 ± 0.231 %
|
| 385 |
+
|
| 386 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 387 |
+
71 7.7622 ± 0.1696 0.06110 ± 0.00440 0.09531 ± 0.00277 10.464 ± 0.210 % 89.439 ± 0.228 %
|
| 388 |
+
|
| 389 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 390 |
+
72 7.7734 ± 0.1688 0.05984 ± 0.00436 0.09469 ± 0.00273 10.412 ± 0.208 % 89.472 ± 0.227 %
|
| 391 |
+
|
| 392 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 393 |
+
73 7.7732 ± 0.1676 0.05915 ± 0.00431 0.09389 ± 0.00270 10.357 ± 0.206 % 89.476 ± 0.225 %
|
| 394 |
+
|
| 395 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 396 |
+
74 7.7569 ± 0.1659 0.05871 ± 0.00427 0.09334 ± 0.00266 10.308 ± 0.204 % 89.518 ± 0.223 %
|
| 397 |
+
|
| 398 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 399 |
+
75 7.7428 ± 0.1644 0.05737 ± 0.00424 0.09288 ± 0.00263 10.283 ± 0.202 % 89.574 ± 0.221 %
|
| 400 |
+
|
| 401 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 402 |
+
76 7.8064 ± 0.1649 0.05672 ± 0.00420 0.09229 ± 0.00259 10.237 ± 0.201 % 89.628 ± 0.219 %
|
| 403 |
+
|
| 404 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 405 |
+
77 7.7993 ± 0.1636 0.05627 ± 0.00416 0.09160 ± 0.00256 10.187 ± 0.199 % 89.677 ± 0.217 %
|
| 406 |
+
|
| 407 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 408 |
+
78 7.8104 ± 0.1629 0.05575 ± 0.00413 0.09113 ± 0.00253 10.138 ± 0.198 % 89.698 ± 0.216 %
|
| 409 |
+
|
| 410 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 411 |
+
79 7.8110 ± 0.1619 0.05436 ± 0.00409 0.09072 ± 0.00250 10.090 ± 0.196 % 89.705 ± 0.214 %
|
| 412 |
+
|
| 413 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 414 |
+
80 7.8080 ± 0.1612 0.05476 ± 0.00408 0.09035 ± 0.00247 10.057 ± 0.194 % 89.696 ± 0.213 %
|
| 415 |
+
|
| 416 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 417 |
+
81 7.7750 ± 0.1595 0.05428 ± 0.00404 0.08996 ± 0.00245 10.015 ± 0.193 % 89.717 ± 0.211 %
|
| 418 |
+
|
| 419 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 420 |
+
82 7.7514 ± 0.1578 0.05369 ± 0.00401 0.08931 ± 0.00242 9.970 ± 0.192 % 89.742 ± 0.210 %
|
| 421 |
+
|
| 422 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 423 |
+
83 7.7789 ± 0.1572 0.05316 ± 0.00397 0.08863 ± 0.00239 9.923 ± 0.190 % 89.771 ± 0.208 %
|
| 424 |
+
|
| 425 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 426 |
+
84 7.7968 ± 0.1563 0.05292 ± 0.00393 0.08797 ± 0.00236 9.880 ± 0.189 % 89.767 ± 0.207 %
|
| 427 |
+
|
| 428 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 429 |
+
85 7.7857 ± 0.1549 0.05253 ± 0.00389 0.08731 ± 0.00234 9.834 ± 0.187 % 89.832 ± 0.205 %
|
| 430 |
+
|
| 431 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 432 |
+
86 7.7097 ± 0.1519 0.05193 ± 0.00386 0.08676 ± 0.00231 9.793 ± 0.186 % 89.881 ± 0.204 %
|
| 433 |
+
|
| 434 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 435 |
+
87 7.6432 ± 0.1493 0.05144 ± 0.00382 0.08600 ± 0.00229 9.743 ± 0.185 % 89.912 ± 0.202 %
|
| 436 |
+
|
| 437 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 438 |
+
88 7.5714 ± 0.1466 0.05095 ± 0.00378 0.08533 ± 0.00226 9.703 ± 0.184 % 89.929 ± 0.201 %
|
| 439 |
+
|
| 440 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 441 |
+
89 7.4866 ± 0.1436 0.05009 ± 0.00374 0.08467 ± 0.00224 9.666 ± 0.182 % 89.967 ± 0.199 %
|
| 442 |
+
|
| 443 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 444 |
+
90 7.4219 ± 0.1411 0.04945 ± 0.00370 0.08404 ± 0.00221 9.621 ± 0.181 % 90.004 ± 0.198 %
|
| 445 |
+
|
| 446 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 447 |
+
91 7.3657 ± 0.1389 0.04902 ± 0.00367 0.08338 ± 0.00219 9.584 ± 0.180 % 90.071 ± 0.196 %
|
| 448 |
+
|
| 449 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 450 |
+
92 7.3002 ± 0.1365 0.04815 ± 0.00364 0.08281 ± 0.00217 9.547 ± 0.179 % 90.072 ± 0.195 %
|
| 451 |
+
|
| 452 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 453 |
+
93 7.3086 ± 0.1361 0.04825 ± 0.00362 0.08297 ± 0.00216 9.545 ± 0.178 % 90.070 ± 0.194 %
|
| 454 |
+
|
| 455 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 456 |
+
94 7.3383 ± 0.1358 0.04805 ± 0.00359 0.08242 ± 0.00214 9.504 ± 0.177 % 90.117 ± 0.193 %
|
| 457 |
+
|
| 458 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 459 |
+
95 7.4432 ± 0.1374 0.04763 ± 0.00356 0.08229 ± 0.00211 9.476 ± 0.176 % 90.072 ± 0.192 %
|
| 460 |
+
|
| 461 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 462 |
+
96 7.5357 ± 0.1386 0.04726 ± 0.00354 0.08218 ± 0.00210 9.445 ± 0.175 % 90.025 ± 0.192 %
|
| 463 |
+
|
| 464 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 465 |
+
97 7.6161 ± 0.1394 0.04688 ± 0.00351 0.08180 ± 0.00208 9.405 ± 0.174 % 90.030 ± 0.190 %
|
| 466 |
+
|
| 467 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 468 |
+
98 7.7578 ± 0.1419 0.04680 ± 0.00350 0.08129 ± 0.00205 9.362 ± 0.173 % 90.036 ± 0.189 %
|
| 469 |
+
|
| 470 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 471 |
+
99 7.8742 ± 0.1435 0.04644 ± 0.00347 0.08089 ± 0.00203 9.325 ± 0.172 % 90.006 ± 0.189 %
|
| 472 |
+
|
| 473 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 474 |
+
100 7.9026 ± 0.1433 0.04578 ± 0.00345 0.08131 ± 0.00209 9.315 ± 0.170 % 89.961 ± 0.188 %
|
| 475 |
+
|
| 476 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 477 |
+
101 7.9432 ± 0.1436 0.04563 ± 0.00344 0.08122 ± 0.00208 9.294 ± 0.169 % 89.951 ± 0.187 %
|
| 478 |
+
|
| 479 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 480 |
+
102 8.0216 ± 0.1449 0.04585 ± 0.00343 0.08181 ± 0.00207 9.292 ± 0.168 % 89.927 ± 0.187 %
|
| 481 |
+
|
| 482 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 483 |
+
103 7.9914 ± 0.1438 0.04573 ± 0.00341 0.08140 ± 0.00205 9.269 ± 0.167 % 89.949 ± 0.186 %
|
| 484 |
+
|
| 485 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 486 |
+
104 7.9335 ± 0.1419 0.04596 ± 0.00340 0.08176 ± 0.00205 9.350 ± 0.168 % 89.962 ± 0.185 %
|
| 487 |
+
|
| 488 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 489 |
+
105 7.8204 ± 0.1390 0.04626 ± 0.00341 0.08188 ± 0.00204 9.388 ± 0.167 % 90.017 ± 0.183 %
|
| 490 |
+
|
| 491 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 492 |
+
106 7.6844 ± 0.1356 0.04582 ± 0.00339 0.08158 ± 0.00203 9.394 ± 0.167 % 90.100 ± 0.182 %
|
| 493 |
+
|
| 494 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 495 |
+
107 7.7402 ± 0.1359 0.04549 ± 0.00336 0.08106 ± 0.00201 9.362 ± 0.166 % 90.090 ± 0.181 %
|
| 496 |
+
|
| 497 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 498 |
+
108 7.7495 ± 0.1354 0.04519 ± 0.00333 0.08063 ± 0.00199 9.330 ± 0.165 % 90.113 ± 0.180 %
|
| 499 |
+
|
| 500 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 501 |
+
109 7.7732 ± 0.1353 0.04490 ± 0.00331 0.08035 ± 0.00198 9.304 ± 0.164 % 90.121 ± 0.179 %
|
| 502 |
+
|
| 503 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 504 |
+
110 7.8073 ± 0.1353 0.04472 ± 0.00329 0.07999 ± 0.00196 9.275 ± 0.163 % 90.143 ± 0.178 %
|
| 505 |
+
|
| 506 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 507 |
+
111 7.8522 ± 0.1354 0.04434 ± 0.00326 0.07969 ± 0.00194 9.242 ± 0.162 % 90.154 ± 0.177 %
|
| 508 |
+
|
| 509 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 510 |
+
112 7.8595 ± 0.1348 0.04390 ± 0.00324 0.07928 ± 0.00193 9.215 ± 0.161 % 90.168 ± 0.176 %
|
| 511 |
+
|
| 512 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 513 |
+
113 7.8706 ± 0.1342 0.04364 ± 0.00322 0.07887 ± 0.00191 9.188 ± 0.161 % 90.172 ± 0.175 %
|
| 514 |
+
|
| 515 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 516 |
+
114 7.8878 ± 0.1340 0.04352 ± 0.00320 0.07879 ± 0.00191 9.171 ± 0.160 % 90.179 ± 0.175 %
|
| 517 |
+
|
| 518 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 519 |
+
115 7.8710 ± 0.1331 0.04378 ± 0.00320 0.07931 ± 0.00191 9.220 ± 0.160 % 90.169 ± 0.174 %
|
| 520 |
+
|
| 521 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 522 |
+
116 7.8854 ± 0.1329 0.04542 ± 0.00328 0.08307 ± 0.00204 9.458 ± 0.161 % 90.051 ± 0.174 %
|
| 523 |
+
|
| 524 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 525 |
+
117 7.8009 ± 0.1306 0.04739 ± 0.00331 0.08528 ± 0.00206 9.665 ± 0.162 % 90.002 ± 0.174 %
|
| 526 |
+
|
| 527 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 528 |
+
118 7.7242 ± 0.1284 0.05003 ± 0.00333 0.08794 ± 0.00209 9.922 ± 0.163 % 89.950 ± 0.173 %
|
| 529 |
+
|
| 530 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 531 |
+
119 7.6506 ± 0.1265 0.05343 ± 0.00339 0.09068 ± 0.00214 10.155 ± 0.164 % 89.893 ± 0.173 %
|
| 532 |
+
|
| 533 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 534 |
+
120 7.5833 ± 0.1246 0.05593 ± 0.00341 0.09291 ± 0.00215 10.370 ± 0.165 % 89.824 ± 0.173 %
|
| 535 |
+
|
| 536 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 537 |
+
121 7.5136 ± 0.1227 0.05879 ± 0.00343 0.09508 ± 0.00217 10.585 ± 0.165 % 89.781 ± 0.172 %
|
| 538 |
+
|
| 539 |
+
====== Perplexity statistics ======
|
| 540 |
+
Mean PPL(Q) : 7.513587 ± 0.122746
|
| 541 |
+
Mean PPL(base) : 7.084621 ± 0.114399
|
| 542 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 97.77%
|
| 543 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.058787 ± 0.003433
|
| 544 |
+
Mean PPL(Q)/PPL(base) : 1.060549 ± 0.003641
|
| 545 |
+
Mean PPL(Q)-PPL(base) : 0.428967 ± 0.026367
|
| 546 |
+
|
| 547 |
+
====== KL divergence statistics ======
|
| 548 |
+
Mean KLD: 0.095077 ± 0.002168
|
| 549 |
+
Maximum KLD: 16.468470
|
| 550 |
+
99.9% KLD: 4.400382
|
| 551 |
+
99.0% KLD: 1.549927
|
| 552 |
+
95.0% KLD: 0.361787
|
| 553 |
+
90.0% KLD: 0.160903
|
| 554 |
+
Median KLD: 0.018892
|
| 555 |
+
10.0% KLD: 0.000074
|
| 556 |
+
5.0% KLD: 0.000011
|
| 557 |
+
1.0% KLD: 0.000000
|
| 558 |
+
0.1% KLD: -0.000003
|
| 559 |
+
Minimum KLD: -0.000007
|
| 560 |
+
|
| 561 |
+
====== Token probability statistics ======
|
| 562 |
+
Mean Δp: -1.323 ± 0.060 %
|
| 563 |
+
Maximum Δp: 99.378%
|
| 564 |
+
99.9% Δp: 61.536%
|
| 565 |
+
99.0% Δp: 21.758%
|
| 566 |
+
95.0% Δp: 7.648%
|
| 567 |
+
90.0% Δp: 3.780%
|
| 568 |
+
75.0% Δp: 0.362%
|
| 569 |
+
Median Δp: -0.002%
|
| 570 |
+
25.0% Δp: -1.204%
|
| 571 |
+
10.0% Δp: -6.515%
|
| 572 |
+
5.0% Δp: -13.250%
|
| 573 |
+
1.0% Δp: -50.844%
|
| 574 |
+
0.1% Δp: -90.948%
|
| 575 |
+
Minimum Δp: -98.497%
|
| 576 |
+
RMS Δp : 10.585 ± 0.165 %
|
| 577 |
+
Same top p: 89.781 ± 0.172 %
|
| 578 |
+
|
| 579 |
+
llama_perf_context_print: load time = 47033.62 ms
|
| 580 |
+
llama_perf_context_print: prompt eval time = 474777.07 ms / 61952 tokens ( 7.66 ms per token, 130.49 tokens per second)
|
| 581 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 582 |
+
llama_perf_context_print: total time = 486633.57 ms / 61953 tokens
|
| 583 |
+
llama_perf_context_print: graphs reused = 120
|
| 584 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 585 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 14707 + ( 9102 = 7924 + 64 + 1113) + 325 |
|
| 586 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 16451 + ( 7324 = 6868 + 60 + 396) + 358 |
|
| 587 |
+
llama_memory_breakdown_print: | - Host | 102915 = 102902 + 0 + 13 |
|
| 588 |
+
```
|
kld_data/aes_sedai/MiniMax-M2.5-Q4_K_M.md
ADDED
|
@@ -0,0 +1,588 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q4_K_M (aes_sedai)
|
| 2 |
+
|
| 3 |
+
130.52 GiB (4.90 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
./build/bin/llama-perplexity --n-gpu-layers 999 --threads 48 --override-tensor "blk\.(0|1|2|3)\.ffn_.*=CUDA0" --override-tensor "blk\.(4|5|6)\.ffn_.*=CUDA1" --override-tensor "blk\..*_exps\.=CPU" --flash-attn on --file "/mnt/srv/host/resources/KLD/calibration_datav3.txt" --kl-divergence-base "/mnt/srv/snowdrift/ref-logits-MiniMax-M2.5-BF16-calibration-datav3.bin" --kl-divergence --model "/mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/aes_sedai/MiniMax-M2.5-Q4_K_M.gguf"
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
DEPRECATED: argument '--override-tensor' specified multiple times, use comma-separated values instead (only last value will be used)
|
| 11 |
+
DEPRECATED: argument '--override-tensor' specified multiple times, use comma-separated values instead (only last value will be used)
|
| 12 |
+
build: 8038 (05a6f0e89) with GNU 14.2.1 for Linux x86_64
|
| 13 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 14 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 15 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 11379 used, 12492 free vs. target of 1024
|
| 16 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 8782 used, 15089 free vs. target of 1024
|
| 17 |
+
llama_params_fit_impl: projected to use 20161 MiB of device memory vs. 47743 MiB of free device memory
|
| 18 |
+
llama_params_fit_impl: targets for free memory can be met on all devices, no changes needed
|
| 19 |
+
llama_params_fit: successfully fit params to free device memory
|
| 20 |
+
llama_params_fit: fitting params to free memory took 0.44 seconds
|
| 21 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 22 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 23 |
+
llama_model_loader: loaded meta data with 41 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/aes_sedai/MiniMax-M2.5-Q4_K_M.gguf (version GGUF V3 (latest))
|
| 24 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 25 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 26 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 27 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 28 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 29 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 30 |
+
llama_model_loader: - kv 5: general.name str = MiniMax M2.5
|
| 31 |
+
llama_model_loader: - kv 6: general.size_label str = 256x4.9B
|
| 32 |
+
llama_model_loader: - kv 7: general.license str = other
|
| 33 |
+
llama_model_loader: - kv 8: general.license.name str = modified-mit
|
| 34 |
+
llama_model_loader: - kv 9: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 35 |
+
llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"]
|
| 36 |
+
llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62
|
| 37 |
+
llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608
|
| 38 |
+
llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072
|
| 39 |
+
llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536
|
| 40 |
+
llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48
|
| 41 |
+
llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8
|
| 42 |
+
llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 43 |
+
llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 44 |
+
llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256
|
| 45 |
+
llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8
|
| 46 |
+
llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2
|
| 47 |
+
llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128
|
| 48 |
+
llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128
|
| 49 |
+
llama_model_loader: - kv 24: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 50 |
+
llama_model_loader: - kv 25: minimax-m2.rope.dimension_count u32 = 64
|
| 51 |
+
llama_model_loader: - kv 26: tokenizer.ggml.model str = gpt2
|
| 52 |
+
llama_model_loader: - kv 27: tokenizer.ggml.pre str = minimax-m2
|
| 53 |
+
llama_model_loader: - kv 28: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 54 |
+
llama_model_loader: - kv 29: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 55 |
+
llama_model_loader: - kv 30: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 56 |
+
llama_model_loader: - kv 31: tokenizer.ggml.bos_token_id u32 = 200034
|
| 57 |
+
llama_model_loader: - kv 32: tokenizer.ggml.eos_token_id u32 = 200020
|
| 58 |
+
llama_model_loader: - kv 33: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 59 |
+
llama_model_loader: - kv 34: tokenizer.chat_template str = {# ----------‑‑‑ special token ...
|
| 60 |
+
llama_model_loader: - kv 35: general.quantization_version u32 = 2
|
| 61 |
+
llama_model_loader: - kv 36: general.file_type u32 = 7
|
| 62 |
+
llama_model_loader: - kv 37: quantize.imatrix.file str = /mnt/srv/snowdrift/fp16/MiniMax-M2.5/...
|
| 63 |
+
llama_model_loader: - kv 38: quantize.imatrix.dataset str = /mnt/srv/host/resources/KLD/calibrati...
|
| 64 |
+
llama_model_loader: - kv 39: quantize.imatrix.entries_count u32 = 496
|
| 65 |
+
llama_model_loader: - kv 40: quantize.imatrix.chunks_count u32 = 49
|
| 66 |
+
llama_model_loader: - type f32: 373 tensors
|
| 67 |
+
llama_model_loader: - type q8_0: 250 tensors
|
| 68 |
+
llama_model_loader: - type q4_K: 124 tensors
|
| 69 |
+
llama_model_loader: - type q5_K: 62 tensors
|
| 70 |
+
print_info: file format = GGUF V3 (latest)
|
| 71 |
+
print_info: file type = Q8_0
|
| 72 |
+
print_info: file size = 130.52 GiB (4.90 BPW)
|
| 73 |
+
load: 0 unused tokens
|
| 74 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 75 |
+
load: printing all EOG tokens:
|
| 76 |
+
load: - 200004 ('<fim_pad>')
|
| 77 |
+
load: - 200005 ('<reponame>')
|
| 78 |
+
load: - 200020 ('[e~[')
|
| 79 |
+
load: special tokens cache size = 54
|
| 80 |
+
load: token to piece cache size = 1.3355 MB
|
| 81 |
+
print_info: arch = minimax-m2
|
| 82 |
+
print_info: vocab_only = 0
|
| 83 |
+
print_info: no_alloc = 0
|
| 84 |
+
print_info: n_ctx_train = 196608
|
| 85 |
+
print_info: n_embd = 3072
|
| 86 |
+
print_info: n_embd_inp = 3072
|
| 87 |
+
print_info: n_layer = 62
|
| 88 |
+
print_info: n_head = 48
|
| 89 |
+
print_info: n_head_kv = 8
|
| 90 |
+
print_info: n_rot = 64
|
| 91 |
+
print_info: n_swa = 0
|
| 92 |
+
print_info: is_swa_any = 0
|
| 93 |
+
print_info: n_embd_head_k = 128
|
| 94 |
+
print_info: n_embd_head_v = 128
|
| 95 |
+
print_info: n_gqa = 6
|
| 96 |
+
print_info: n_embd_k_gqa = 1024
|
| 97 |
+
print_info: n_embd_v_gqa = 1024
|
| 98 |
+
print_info: f_norm_eps = 0.0e+00
|
| 99 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 100 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 101 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 102 |
+
print_info: f_logit_scale = 0.0e+00
|
| 103 |
+
print_info: f_attn_scale = 0.0e+00
|
| 104 |
+
print_info: n_ff = 1536
|
| 105 |
+
print_info: n_expert = 256
|
| 106 |
+
print_info: n_expert_used = 8
|
| 107 |
+
print_info: n_expert_groups = 0
|
| 108 |
+
print_info: n_group_used = 0
|
| 109 |
+
print_info: causal attn = 1
|
| 110 |
+
print_info: pooling type = 0
|
| 111 |
+
print_info: rope type = 2
|
| 112 |
+
print_info: rope scaling = linear
|
| 113 |
+
print_info: freq_base_train = 5000000.0
|
| 114 |
+
print_info: freq_scale_train = 1
|
| 115 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 116 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 117 |
+
print_info: rope_finetuned = unknown
|
| 118 |
+
print_info: model type = 230B.A10B
|
| 119 |
+
print_info: model params = 228.69 B
|
| 120 |
+
print_info: general.name = MiniMax M2.5
|
| 121 |
+
print_info: vocab type = BPE
|
| 122 |
+
print_info: n_vocab = 200064
|
| 123 |
+
print_info: n_merges = 199744
|
| 124 |
+
print_info: BOS token = 200034 ']~!b['
|
| 125 |
+
print_info: EOS token = 200020 '[e~['
|
| 126 |
+
print_info: UNK token = 200021 ']!d~['
|
| 127 |
+
print_info: LF token = 10 'Ċ'
|
| 128 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 129 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 130 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 131 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 132 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 133 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 134 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 135 |
+
print_info: EOG token = 200020 '[e~['
|
| 136 |
+
print_info: max token length = 256
|
| 137 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 138 |
+
load_tensors: offloading output layer to GPU
|
| 139 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 140 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 141 |
+
load_tensors: CPU_Mapped model buffer size = 133034.72 MiB
|
| 142 |
+
load_tensors: CUDA0 model buffer size = 9868.62 MiB
|
| 143 |
+
load_tensors: CUDA1 model buffer size = 8326.11 MiB
|
| 144 |
+
....................................................................................................
|
| 145 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 146 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 147 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 148 |
+
llama_context: constructing llama_context
|
| 149 |
+
llama_context: n_seq_max = 1
|
| 150 |
+
llama_context: n_ctx = 512
|
| 151 |
+
llama_context: n_ctx_seq = 512
|
| 152 |
+
llama_context: n_batch = 512
|
| 153 |
+
llama_context: n_ubatch = 512
|
| 154 |
+
llama_context: causal_attn = 1
|
| 155 |
+
llama_context: flash_attn = enabled
|
| 156 |
+
llama_context: kv_unified = false
|
| 157 |
+
llama_context: freq_base = 5000000.0
|
| 158 |
+
llama_context: freq_scale = 1
|
| 159 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 160 |
+
llama_context: CUDA_Host output buffer size = 0.76 MiB
|
| 161 |
+
llama_kv_cache: CUDA0 KV buffer size = 64.00 MiB
|
| 162 |
+
llama_kv_cache: CUDA1 KV buffer size = 60.00 MiB
|
| 163 |
+
llama_kv_cache: size = 124.00 MiB ( 512 cells, 62 layers, 1/1 seqs), K (f16): 62.00 MiB, V (f16): 62.00 MiB
|
| 164 |
+
sched_reserve: reserving ...
|
| 165 |
+
sched_reserve: CUDA0 compute buffer size = 1446.50 MiB
|
| 166 |
+
sched_reserve: CUDA1 compute buffer size = 396.75 MiB
|
| 167 |
+
sched_reserve: CUDA_Host compute buffer size = 13.01 MiB
|
| 168 |
+
sched_reserve: graph nodes = 3975
|
| 169 |
+
sched_reserve: graph splits = 210 (with bs=512), 124 (with bs=1)
|
| 170 |
+
sched_reserve: reserve took 6.59 ms, sched copies = 1
|
| 171 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 172 |
+
|
| 173 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 174 |
+
kl_divergence: 5.11 seconds per pass - ETA 10.28 minutes
|
| 175 |
+
|
| 176 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 177 |
+
1 6.7331 ± 1.3236 -0.00249 ± 0.01382 0.01105 ± 0.00134 2.985 ± 0.395 % 92.549 ± 1.648 %
|
| 178 |
+
|
| 179 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 180 |
+
2 4.8478 ± 0.5978 0.00800 ± 0.00968 0.00977 ± 0.00108 2.822 ± 0.357 % 94.314 ± 1.026 %
|
| 181 |
+
|
| 182 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 183 |
+
3 4.4982 ± 0.4508 0.01175 ± 0.00931 0.01127 ± 0.00091 3.337 ± 0.254 % 94.641 ± 0.815 %
|
| 184 |
+
|
| 185 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 186 |
+
4 5.0575 ± 0.4491 0.00623 ± 0.00824 0.01255 ± 0.00084 3.437 ± 0.214 % 94.314 ± 0.725 %
|
| 187 |
+
|
| 188 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 189 |
+
5 4.8352 ± 0.3807 0.00354 ± 0.00760 0.01458 ± 0.00163 3.972 ± 0.521 % 94.745 ± 0.625 %
|
| 190 |
+
|
| 191 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 192 |
+
6 5.8632 ± 0.4508 0.00752 ± 0.00733 0.01749 ± 0.00153 3.990 ± 0.464 % 94.510 ± 0.583 %
|
| 193 |
+
|
| 194 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 195 |
+
7 5.4948 ± 0.3789 0.00600 ± 0.00732 0.02199 ± 0.00229 4.933 ± 0.470 % 93.950 ± 0.564 %
|
| 196 |
+
|
| 197 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 198 |
+
8 6.1849 ± 0.4038 0.00485 ± 0.00670 0.02161 ± 0.00202 4.803 ± 0.425 % 93.922 ± 0.529 %
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
9 6.0793 ± 0.3710 0.00230 ± 0.00632 0.02079 ± 0.00181 4.622 ± 0.393 % 93.987 ± 0.496 %
|
| 202 |
+
|
| 203 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 204 |
+
10 5.5801 ± 0.3178 0.00361 ± 0.00602 0.02013 ± 0.00164 4.569 ± 0.359 % 94.078 ± 0.467 %
|
| 205 |
+
|
| 206 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 207 |
+
11 6.1177 ± 0.3379 0.00392 ± 0.00570 0.02004 ± 0.00151 4.490 ± 0.335 % 94.046 ± 0.447 %
|
| 208 |
+
|
| 209 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 210 |
+
12 6.7971 ± 0.3655 0.00583 ± 0.00542 0.02068 ± 0.00152 4.371 ± 0.316 % 93.922 ± 0.432 %
|
| 211 |
+
|
| 212 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 213 |
+
13 7.0632 ± 0.3619 0.00622 ± 0.00508 0.02014 ± 0.00143 4.250 ± 0.300 % 93.997 ± 0.413 %
|
| 214 |
+
|
| 215 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 216 |
+
14 7.6333 ± 0.3815 0.00670 ± 0.00487 0.02037 ± 0.00136 4.252 ± 0.286 % 93.782 ± 0.404 %
|
| 217 |
+
|
| 218 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 219 |
+
15 7.9991 ± 0.3871 0.00700 ± 0.00462 0.02022 ± 0.00128 4.213 ± 0.272 % 93.804 ± 0.390 %
|
| 220 |
+
|
| 221 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 222 |
+
16 8.2742 ± 0.3878 0.00400 ± 0.00472 0.02654 ± 0.00358 4.822 ± 0.379 % 93.701 ± 0.380 %
|
| 223 |
+
|
| 224 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 225 |
+
17 8.5047 ± 0.3895 0.00502 ± 0.00465 0.02695 ± 0.00339 4.723 ± 0.365 % 93.610 ± 0.372 %
|
| 226 |
+
|
| 227 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 228 |
+
18 8.0191 ± 0.3546 0.00471 ± 0.00451 0.02673 ± 0.00321 4.773 ± 0.349 % 93.725 ± 0.358 %
|
| 229 |
+
|
| 230 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 231 |
+
19 8.1397 ± 0.3502 0.00316 ± 0.00439 0.02605 ± 0.00304 4.719 ± 0.335 % 93.684 ± 0.349 %
|
| 232 |
+
|
| 233 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 234 |
+
20 8.1759 ± 0.3427 0.00093 ± 0.00432 0.02658 ± 0.00292 4.717 ± 0.320 % 93.784 ± 0.338 %
|
| 235 |
+
|
| 236 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 237 |
+
21 8.1651 ± 0.3338 0.00115 ± 0.00421 0.02636 ± 0.00278 4.682 ± 0.308 % 93.782 ± 0.330 %
|
| 238 |
+
|
| 239 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 240 |
+
22 8.5568 ± 0.3466 0.00347 ± 0.00423 0.02751 ± 0.00268 4.692 ± 0.295 % 93.583 ± 0.327 %
|
| 241 |
+
|
| 242 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 243 |
+
23 8.5713 ± 0.3405 0.00238 ± 0.00458 0.03078 ± 0.00323 4.932 ± 0.322 % 93.504 ± 0.322 %
|
| 244 |
+
|
| 245 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 246 |
+
24 8.9707 ± 0.3508 0.00319 ± 0.00446 0.03050 ± 0.00310 4.859 ± 0.313 % 93.513 ± 0.315 %
|
| 247 |
+
|
| 248 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 249 |
+
25 8.9678 ± 0.3445 0.00226 ± 0.00436 0.03055 ± 0.00298 4.868 ± 0.301 % 93.506 ± 0.309 %
|
| 250 |
+
|
| 251 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 252 |
+
26 8.4191 ± 0.3139 0.00427 ± 0.00445 0.03414 ± 0.00297 5.644 ± 0.294 % 93.303 ± 0.307 %
|
| 253 |
+
|
| 254 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 255 |
+
27 7.9916 ± 0.2899 0.00502 ± 0.00455 0.03737 ± 0.00297 6.244 ± 0.303 % 93.217 ± 0.303 %
|
| 256 |
+
|
| 257 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 258 |
+
28 8.1077 ± 0.2894 0.00535 ± 0.00444 0.03705 ± 0.00287 6.191 ± 0.295 % 93.165 ± 0.299 %
|
| 259 |
+
|
| 260 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 261 |
+
29 8.0410 ± 0.2822 0.00497 ± 0.00434 0.03664 ± 0.00277 6.137 ± 0.287 % 93.252 ± 0.292 %
|
| 262 |
+
|
| 263 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 264 |
+
30 7.5204 ± 0.2572 0.00470 ± 0.00425 0.03585 ± 0.00269 6.143 ± 0.286 % 93.464 ± 0.283 %
|
| 265 |
+
|
| 266 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 267 |
+
31 7.0768 ± 0.2359 0.00441 ± 0.00414 0.03531 ± 0.00261 6.170 ± 0.282 % 93.612 ± 0.275 %
|
| 268 |
+
|
| 269 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 270 |
+
32 6.8971 ± 0.2244 0.00376 ± 0.00403 0.03462 ± 0.00253 6.118 ± 0.275 % 93.652 ± 0.270 %
|
| 271 |
+
|
| 272 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 273 |
+
33 6.7528 ± 0.2147 0.00339 ± 0.00393 0.03393 ± 0.00245 6.067 ± 0.269 % 93.737 ± 0.264 %
|
| 274 |
+
|
| 275 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 276 |
+
34 6.9506 ± 0.2191 0.00528 ± 0.00388 0.03422 ± 0.00239 6.009 ± 0.264 % 93.679 ± 0.261 %
|
| 277 |
+
|
| 278 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 279 |
+
35 7.0436 ± 0.2206 0.00538 ± 0.00386 0.03475 ± 0.00233 6.046 ± 0.258 % 93.546 ± 0.260 %
|
| 280 |
+
|
| 281 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 282 |
+
36 7.0944 ± 0.2199 0.00463 ± 0.00379 0.03437 ± 0.00227 5.999 ± 0.253 % 93.573 ± 0.256 %
|
| 283 |
+
|
| 284 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 285 |
+
37 7.1225 ± 0.2180 0.00653 ± 0.00381 0.03504 ± 0.00225 6.219 ± 0.263 % 93.514 ± 0.254 %
|
| 286 |
+
|
| 287 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 288 |
+
38 7.3326 ± 0.2226 0.00677 ± 0.00374 0.03490 ± 0.00220 6.170 ± 0.258 % 93.437 ± 0.252 %
|
| 289 |
+
|
| 290 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 291 |
+
39 7.2889 ± 0.2181 0.00740 ± 0.00369 0.03544 ± 0.00216 6.259 ± 0.252 % 93.374 ± 0.249 %
|
| 292 |
+
|
| 293 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 294 |
+
40 7.0573 ± 0.2069 0.00715 ± 0.00382 0.03883 ± 0.00217 6.742 ± 0.244 % 93.206 ± 0.249 %
|
| 295 |
+
|
| 296 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 297 |
+
41 6.8483 ± 0.1971 0.00869 ± 0.00391 0.04166 ± 0.00220 7.092 ± 0.245 % 93.085 ± 0.248 %
|
| 298 |
+
|
| 299 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 300 |
+
42 6.6456 ± 0.1877 0.00925 ± 0.00394 0.04360 ± 0.00219 7.348 ± 0.243 % 92.988 ± 0.247 %
|
| 301 |
+
|
| 302 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 303 |
+
43 6.4598 ± 0.1793 0.01195 ± 0.00406 0.04583 ± 0.00226 7.587 ± 0.244 % 92.923 ± 0.245 %
|
| 304 |
+
|
| 305 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 306 |
+
44 6.4078 ± 0.1750 0.01133 ± 0.00398 0.04510 ± 0.00221 7.524 ± 0.240 % 92.923 ± 0.242 %
|
| 307 |
+
|
| 308 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 309 |
+
45 6.5466 ± 0.1778 0.01168 ± 0.00392 0.04492 ± 0.00217 7.476 ± 0.237 % 92.837 ± 0.241 %
|
| 310 |
+
|
| 311 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 312 |
+
46 6.6826 ± 0.1798 0.01156 ± 0.00385 0.04440 ± 0.00212 7.411 ± 0.234 % 92.805 ± 0.239 %
|
| 313 |
+
|
| 314 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 315 |
+
47 6.8358 ± 0.1825 0.01172 ± 0.00378 0.04376 ± 0.00208 7.337 ± 0.231 % 92.808 ± 0.236 %
|
| 316 |
+
|
| 317 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 318 |
+
48 6.7160 ± 0.1764 0.01121 ± 0.00371 0.04313 ± 0.00203 7.284 ± 0.228 % 92.851 ± 0.233 %
|
| 319 |
+
|
| 320 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 321 |
+
49 6.8492 ± 0.1784 0.01217 ± 0.00380 0.04712 ± 0.00242 7.316 ± 0.226 % 92.661 ± 0.233 %
|
| 322 |
+
|
| 323 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 324 |
+
50 6.9566 ± 0.1804 0.01157 ± 0.00376 0.04741 ± 0.00243 7.303 ± 0.223 % 92.604 ± 0.232 %
|
| 325 |
+
|
| 326 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 327 |
+
51 7.0649 ± 0.1816 0.01133 ± 0.00369 0.04682 ± 0.00238 7.242 ± 0.220 % 92.649 ± 0.229 %
|
| 328 |
+
|
| 329 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 330 |
+
52 7.1258 ± 0.1813 0.01142 ± 0.00366 0.04687 ± 0.00233 7.215 ± 0.217 % 92.624 ± 0.227 %
|
| 331 |
+
|
| 332 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 333 |
+
53 7.2427 ± 0.1826 0.01133 ± 0.00360 0.04637 ± 0.00229 7.167 ± 0.214 % 92.623 ± 0.225 %
|
| 334 |
+
|
| 335 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 336 |
+
54 7.2996 ± 0.1820 0.01113 ± 0.00354 0.04582 ± 0.00225 7.113 ± 0.212 % 92.643 ± 0.222 %
|
| 337 |
+
|
| 338 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 339 |
+
55 7.3516 ± 0.1814 0.01145 ± 0.00349 0.04534 ± 0.00221 7.068 ± 0.210 % 92.649 ± 0.220 %
|
| 340 |
+
|
| 341 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 342 |
+
56 7.3950 ± 0.1810 0.01176 ± 0.00344 0.04523 ± 0.00218 7.028 ± 0.207 % 92.626 ± 0.219 %
|
| 343 |
+
|
| 344 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 345 |
+
57 7.3968 ± 0.1794 0.01177 ± 0.00341 0.04542 ± 0.00214 7.019 ± 0.204 % 92.570 ± 0.218 %
|
| 346 |
+
|
| 347 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 348 |
+
58 7.4156 ± 0.1784 0.01125 ± 0.00338 0.04684 ± 0.00247 7.024 ± 0.205 % 92.556 ± 0.216 %
|
| 349 |
+
|
| 350 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 351 |
+
59 7.3751 ± 0.1756 0.01118 ± 0.00333 0.04626 ± 0.00243 6.975 ± 0.203 % 92.589 ± 0.214 %
|
| 352 |
+
|
| 353 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 354 |
+
60 7.3795 ± 0.1742 0.01101 ± 0.00329 0.04580 ± 0.00239 6.934 ± 0.201 % 92.588 ± 0.212 %
|
| 355 |
+
|
| 356 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 357 |
+
61 7.4237 ± 0.1738 0.01120 ± 0.00324 0.04536 ± 0.00235 6.894 ± 0.199 % 92.626 ± 0.210 %
|
| 358 |
+
|
| 359 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 360 |
+
62 7.3877 ± 0.1717 0.01085 ± 0.00322 0.04497 ± 0.00232 6.864 ± 0.197 % 92.694 ± 0.207 %
|
| 361 |
+
|
| 362 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 363 |
+
63 7.4310 ± 0.1718 0.01112 ± 0.00320 0.04463 ± 0.00228 6.824 ± 0.195 % 92.686 ± 0.205 %
|
| 364 |
+
|
| 365 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 366 |
+
64 7.4037 ± 0.1694 0.01048 ± 0.00317 0.04435 ± 0.00225 6.787 ± 0.193 % 92.678 ± 0.204 %
|
| 367 |
+
|
| 368 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 369 |
+
65 7.3936 ± 0.1678 0.01085 ± 0.00313 0.04408 ± 0.00221 6.759 ± 0.191 % 92.718 ± 0.202 %
|
| 370 |
+
|
| 371 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 372 |
+
66 7.4348 ± 0.1677 0.01110 ± 0.00310 0.04391 ± 0.00218 6.732 ± 0.189 % 92.692 ± 0.201 %
|
| 373 |
+
|
| 374 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 375 |
+
67 7.4496 ± 0.1670 0.01135 ± 0.00308 0.04370 ± 0.00215 6.701 ± 0.187 % 92.707 ± 0.199 %
|
| 376 |
+
|
| 377 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 378 |
+
68 7.4020 ± 0.1645 0.01138 ± 0.00305 0.04327 ± 0.00212 6.664 ± 0.185 % 92.739 ± 0.197 %
|
| 379 |
+
|
| 380 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 381 |
+
69 7.4355 ± 0.1641 0.01138 ± 0.00302 0.04304 ± 0.00209 6.642 ± 0.183 % 92.725 ± 0.196 %
|
| 382 |
+
|
| 383 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 384 |
+
70 7.4023 ± 0.1619 0.01112 ± 0.00299 0.04284 ± 0.00206 6.615 ± 0.181 % 92.739 ± 0.194 %
|
| 385 |
+
|
| 386 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 387 |
+
71 7.3875 ± 0.1605 0.01163 ± 0.00296 0.04261 ± 0.00203 6.601 ± 0.180 % 92.770 ± 0.192 %
|
| 388 |
+
|
| 389 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 390 |
+
72 7.4094 ± 0.1601 0.01189 ± 0.00295 0.04240 ± 0.00200 6.572 ± 0.178 % 92.783 ± 0.191 %
|
| 391 |
+
|
| 392 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 393 |
+
73 7.4155 ± 0.1591 0.01204 ± 0.00292 0.04218 ± 0.00198 6.547 ± 0.176 % 92.780 ± 0.190 %
|
| 394 |
+
|
| 395 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 396 |
+
74 7.4053 ± 0.1577 0.01233 ± 0.00289 0.04195 ± 0.00195 6.515 ± 0.175 % 92.793 ± 0.188 %
|
| 397 |
+
|
| 398 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 399 |
+
75 7.4024 ± 0.1565 0.01240 ± 0.00286 0.04180 ± 0.00192 6.496 ± 0.173 % 92.784 ± 0.187 %
|
| 400 |
+
|
| 401 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 402 |
+
76 7.4677 ± 0.1571 0.01236 ± 0.00284 0.04157 ± 0.00190 6.467 ± 0.172 % 92.802 ± 0.186 %
|
| 403 |
+
|
| 404 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 405 |
+
77 7.4615 ± 0.1559 0.01199 ± 0.00282 0.04126 ± 0.00187 6.432 ± 0.170 % 92.834 ± 0.184 %
|
| 406 |
+
|
| 407 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 408 |
+
78 7.4764 ± 0.1553 0.01205 ± 0.00279 0.04102 ± 0.00185 6.405 ± 0.169 % 92.846 ± 0.183 %
|
| 409 |
+
|
| 410 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 411 |
+
79 7.4852 ± 0.1546 0.01176 ± 0.00277 0.04085 ± 0.00183 6.384 ± 0.168 % 92.847 ± 0.182 %
|
| 412 |
+
|
| 413 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 414 |
+
80 7.4805 ± 0.1539 0.01191 ± 0.00278 0.04074 ± 0.00181 6.363 ± 0.166 % 92.868 ± 0.180 %
|
| 415 |
+
|
| 416 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 417 |
+
81 7.4513 ± 0.1523 0.01176 ± 0.00276 0.04069 ± 0.00180 6.366 ± 0.166 % 92.878 ± 0.179 %
|
| 418 |
+
|
| 419 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 420 |
+
82 7.4312 ± 0.1507 0.01151 ± 0.00273 0.04040 ± 0.00177 6.341 ± 0.165 % 92.879 ± 0.178 %
|
| 421 |
+
|
| 422 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 423 |
+
83 7.4598 ± 0.1502 0.01127 ± 0.00270 0.04010 ± 0.00175 6.313 ± 0.163 % 92.884 ± 0.177 %
|
| 424 |
+
|
| 425 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 426 |
+
84 7.4791 ± 0.1494 0.01132 ± 0.00267 0.03981 ± 0.00173 6.285 ± 0.162 % 92.862 ± 0.176 %
|
| 427 |
+
|
| 428 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 429 |
+
85 7.4718 ± 0.1481 0.01138 ± 0.00264 0.03953 ± 0.00171 6.266 ± 0.161 % 92.895 ± 0.175 %
|
| 430 |
+
|
| 431 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 432 |
+
86 7.4028 ± 0.1454 0.01130 ± 0.00262 0.03920 ± 0.00169 6.237 ± 0.160 % 92.932 ± 0.173 %
|
| 433 |
+
|
| 434 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 435 |
+
87 7.3409 ± 0.1428 0.01110 ± 0.00259 0.03883 ± 0.00167 6.205 ± 0.159 % 92.973 ± 0.172 %
|
| 436 |
+
|
| 437 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 438 |
+
88 7.2746 ± 0.1403 0.01096 ± 0.00256 0.03849 ± 0.00165 6.177 ± 0.158 % 92.999 ± 0.170 %
|
| 439 |
+
|
| 440 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 441 |
+
89 7.1987 ± 0.1375 0.01088 ± 0.00253 0.03814 ± 0.00164 6.150 ± 0.157 % 93.025 ± 0.169 %
|
| 442 |
+
|
| 443 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 444 |
+
90 7.1397 ± 0.1352 0.01068 ± 0.00251 0.03780 ± 0.00162 6.123 ± 0.156 % 93.050 ± 0.168 %
|
| 445 |
+
|
| 446 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 447 |
+
91 7.0868 ± 0.1331 0.01042 ± 0.00248 0.03746 ± 0.00160 6.095 ± 0.155 % 93.083 ± 0.167 %
|
| 448 |
+
|
| 449 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 450 |
+
92 7.0297 ± 0.1309 0.01039 ± 0.00246 0.03717 ± 0.00158 6.075 ± 0.154 % 93.082 ± 0.166 %
|
| 451 |
+
|
| 452 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 453 |
+
93 7.0380 ± 0.1305 0.01053 ± 0.00244 0.03711 ± 0.00157 6.075 ± 0.152 % 93.076 ± 0.165 %
|
| 454 |
+
|
| 455 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 456 |
+
94 7.0685 ± 0.1303 0.01059 ± 0.00242 0.03691 ± 0.00155 6.050 ± 0.151 % 93.100 ± 0.164 %
|
| 457 |
+
|
| 458 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 459 |
+
95 7.1731 ± 0.1319 0.01066 ± 0.00241 0.03679 ± 0.00153 6.027 ± 0.150 % 93.098 ± 0.163 %
|
| 460 |
+
|
| 461 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 462 |
+
96 7.2663 ± 0.1331 0.01086 ± 0.00239 0.03672 ± 0.00152 6.007 ± 0.149 % 93.100 ± 0.162 %
|
| 463 |
+
|
| 464 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 465 |
+
97 7.3445 ± 0.1339 0.01056 ± 0.00237 0.03658 ± 0.00150 5.985 ± 0.148 % 93.087 ± 0.161 %
|
| 466 |
+
|
| 467 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 468 |
+
98 7.4817 ± 0.1363 0.01056 ± 0.00237 0.03638 ± 0.00149 5.964 ± 0.148 % 93.069 ± 0.161 %
|
| 469 |
+
|
| 470 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 471 |
+
99 7.5953 ± 0.1380 0.01038 ± 0.00235 0.03621 ± 0.00147 5.940 ± 0.147 % 93.040 ± 0.160 %
|
| 472 |
+
|
| 473 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 474 |
+
100 7.6248 ± 0.1378 0.00999 ± 0.00233 0.03635 ± 0.00147 5.936 ± 0.146 % 93.016 ± 0.160 %
|
| 475 |
+
|
| 476 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 477 |
+
101 7.6649 ± 0.1380 0.00996 ± 0.00234 0.03650 ± 0.00146 5.939 ± 0.145 % 92.996 ± 0.159 %
|
| 478 |
+
|
| 479 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 480 |
+
102 7.7374 ± 0.1393 0.00977 ± 0.00235 0.03721 ± 0.00149 5.937 ± 0.144 % 92.972 ± 0.159 %
|
| 481 |
+
|
| 482 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 483 |
+
103 7.7091 ± 0.1382 0.00976 ± 0.00233 0.03705 ± 0.00148 5.940 ± 0.143 % 92.983 ± 0.158 %
|
| 484 |
+
|
| 485 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 486 |
+
104 7.6516 ± 0.1364 0.00979 ± 0.00232 0.03715 ± 0.00147 5.985 ± 0.145 % 93.002 ± 0.157 %
|
| 487 |
+
|
| 488 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 489 |
+
105 7.5427 ± 0.1335 0.01011 ± 0.00231 0.03707 ± 0.00146 6.005 ± 0.143 % 93.038 ± 0.156 %
|
| 490 |
+
|
| 491 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 492 |
+
106 7.4150 ± 0.1304 0.01014 ± 0.00230 0.03692 ± 0.00145 6.022 ± 0.143 % 93.074 ± 0.154 %
|
| 493 |
+
|
| 494 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 495 |
+
107 7.4712 ± 0.1307 0.01012 ± 0.00228 0.03672 ± 0.00144 6.008 ± 0.142 % 93.062 ± 0.154 %
|
| 496 |
+
|
| 497 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 498 |
+
108 7.4801 ± 0.1302 0.00981 ± 0.00226 0.03650 ± 0.00142 5.989 ± 0.142 % 93.083 ± 0.153 %
|
| 499 |
+
|
| 500 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 501 |
+
109 7.5070 ± 0.1302 0.01004 ± 0.00225 0.03638 ± 0.00141 5.979 ± 0.141 % 93.096 ± 0.152 %
|
| 502 |
+
|
| 503 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 504 |
+
110 7.5385 ± 0.1302 0.00968 ± 0.00223 0.03621 ± 0.00140 5.966 ± 0.140 % 93.112 ± 0.151 %
|
| 505 |
+
|
| 506 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 507 |
+
111 7.5849 ± 0.1303 0.00972 ± 0.00222 0.03606 ± 0.00139 5.945 ± 0.139 % 93.121 ± 0.150 %
|
| 508 |
+
|
| 509 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 510 |
+
112 7.5939 ± 0.1298 0.00951 ± 0.00220 0.03586 ± 0.00137 5.929 ± 0.138 % 93.137 ± 0.150 %
|
| 511 |
+
|
| 512 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 513 |
+
113 7.6058 ± 0.1293 0.00942 ± 0.00219 0.03570 ± 0.00136 5.912 ± 0.137 % 93.136 ± 0.149 %
|
| 514 |
+
|
| 515 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 516 |
+
114 7.6241 ± 0.1292 0.00951 ± 0.00218 0.03553 ± 0.00135 5.893 ± 0.137 % 93.154 ± 0.148 %
|
| 517 |
+
|
| 518 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 519 |
+
115 7.6074 ± 0.1282 0.00972 ± 0.00218 0.03594 ± 0.00136 5.942 ± 0.137 % 93.149 ± 0.148 %
|
| 520 |
+
|
| 521 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 522 |
+
116 7.6050 ± 0.1277 0.00920 ± 0.00222 0.03742 ± 0.00144 6.045 ± 0.135 % 93.100 ± 0.147 %
|
| 523 |
+
|
| 524 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 525 |
+
117 7.5142 ± 0.1253 0.00995 ± 0.00223 0.03818 ± 0.00144 6.192 ± 0.135 % 93.089 ± 0.147 %
|
| 526 |
+
|
| 527 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 528 |
+
118 7.4221 ± 0.1230 0.01014 ± 0.00223 0.03874 ± 0.00143 6.270 ± 0.134 % 93.081 ± 0.146 %
|
| 529 |
+
|
| 530 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 531 |
+
119 7.3325 ± 0.1207 0.01098 ± 0.00224 0.03944 ± 0.00142 6.371 ± 0.133 % 93.076 ± 0.146 %
|
| 532 |
+
|
| 533 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 534 |
+
120 7.2534 ± 0.1186 0.01146 ± 0.00226 0.04046 ± 0.00143 6.566 ± 0.134 % 92.997 ± 0.146 %
|
| 535 |
+
|
| 536 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 537 |
+
121 7.1735 ± 0.1167 0.01246 ± 0.00226 0.04127 ± 0.00143 6.703 ± 0.135 % 92.974 ± 0.146 %
|
| 538 |
+
|
| 539 |
+
====== Perplexity statistics ======
|
| 540 |
+
Mean PPL(Q) : 7.173459 ± 0.116673
|
| 541 |
+
Mean PPL(base) : 7.084621 ± 0.114399
|
| 542 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.03%
|
| 543 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.012462 ± 0.002264
|
| 544 |
+
Mean PPL(Q)/PPL(base) : 1.012540 ± 0.002292
|
| 545 |
+
Mean PPL(Q)-PPL(base) : 0.088838 ± 0.016278
|
| 546 |
+
|
| 547 |
+
====== KL divergence statistics ======
|
| 548 |
+
Mean KLD: 0.041269 ± 0.001426
|
| 549 |
+
Maximum KLD: 17.297672
|
| 550 |
+
99.9% KLD: 2.695222
|
| 551 |
+
99.0% KLD: 0.573439
|
| 552 |
+
95.0% KLD: 0.139787
|
| 553 |
+
90.0% KLD: 0.066285
|
| 554 |
+
Median KLD: 0.007790
|
| 555 |
+
10.0% KLD: 0.000026
|
| 556 |
+
5.0% KLD: 0.000004
|
| 557 |
+
1.0% KLD: -0.000000
|
| 558 |
+
0.1% KLD: -0.000003
|
| 559 |
+
Minimum KLD: -0.000024
|
| 560 |
+
|
| 561 |
+
====== Token probability statistics ======
|
| 562 |
+
Mean Δp: -0.257 ± 0.038 %
|
| 563 |
+
Maximum Δp: 99.867%
|
| 564 |
+
99.9% Δp: 54.264%
|
| 565 |
+
99.0% Δp: 16.855%
|
| 566 |
+
95.0% Δp: 5.889%
|
| 567 |
+
90.0% Δp: 2.992%
|
| 568 |
+
75.0% Δp: 0.395%
|
| 569 |
+
Median Δp: 0.000%
|
| 570 |
+
25.0% Δp: -0.488%
|
| 571 |
+
10.0% Δp: -3.292%
|
| 572 |
+
5.0% Δp: -6.613%
|
| 573 |
+
1.0% Δp: -24.156%
|
| 574 |
+
0.1% Δp: -64.883%
|
| 575 |
+
Minimum Δp: -97.161%
|
| 576 |
+
RMS Δp : 6.703 ± 0.135 %
|
| 577 |
+
Same top p: 92.974 ± 0.146 %
|
| 578 |
+
|
| 579 |
+
llama_perf_context_print: load time = 49682.57 ms
|
| 580 |
+
llama_perf_context_print: prompt eval time = 591438.68 ms / 61952 tokens ( 9.55 ms per token, 104.75 tokens per second)
|
| 581 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 582 |
+
llama_perf_context_print: total time = 604106.81 ms / 61953 tokens
|
| 583 |
+
llama_perf_context_print: graphs reused = 120
|
| 584 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 585 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 12429 + ( 11379 = 9868 + 64 + 1446) + 326 |
|
| 586 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 14993 + ( 8782 = 8326 + 60 + 396) + 358 |
|
| 587 |
+
llama_memory_breakdown_print: | - Host | 133047 = 133034 + 0 + 13 |
|
| 588 |
+
```
|
kld_data/aes_sedai/MiniMax-M2.5-Q5_K_M.md
ADDED
|
@@ -0,0 +1,588 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q5_K_M (aes_sedai)
|
| 2 |
+
|
| 3 |
+
157.23 GiB (5.91 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
./build/bin/llama-perplexity --n-gpu-layers 999 --threads 48 --override-tensor "blk\.(0|1|2|3)\.ffn_.*=CUDA0" --override-tensor "blk\.(4|5|6)\.ffn_.*=CUDA1" --override-tensor "blk\..*_exps\.=CPU" --flash-attn on --file "/mnt/srv/host/resources/KLD/calibration_datav3.txt" --kl-divergence-base "/mnt/srv/snowdrift/ref-logits-MiniMax-M2.5-BF16-calibration-datav3.bin" --kl-divergence --model "/mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/aes_sedai/MiniMax-M2.5-Q5_K_M.gguf"
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
DEPRECATED: argument '--override-tensor' specified multiple times, use comma-separated values instead (only last value will be used)
|
| 11 |
+
DEPRECATED: argument '--override-tensor' specified multiple times, use comma-separated values instead (only last value will be used)
|
| 12 |
+
build: 8038 (05a6f0e89) with GNU 14.2.1 for Linux x86_64
|
| 13 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 14 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 15 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 13440 used, 10431 free vs. target of 1024
|
| 16 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 10105 used, 13766 free vs. target of 1024
|
| 17 |
+
llama_params_fit_impl: projected to use 23545 MiB of device memory vs. 47743 MiB of free device memory
|
| 18 |
+
llama_params_fit_impl: targets for free memory can be met on all devices, no changes needed
|
| 19 |
+
llama_params_fit: successfully fit params to free device memory
|
| 20 |
+
llama_params_fit: fitting params to free memory took 0.41 seconds
|
| 21 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 22 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 23 |
+
llama_model_loader: loaded meta data with 41 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/aes_sedai/MiniMax-M2.5-Q5_K_M.gguf (version GGUF V3 (latest))
|
| 24 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 25 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 26 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 27 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 28 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 29 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 30 |
+
llama_model_loader: - kv 5: general.name str = MiniMax M2.5
|
| 31 |
+
llama_model_loader: - kv 6: general.size_label str = 256x4.9B
|
| 32 |
+
llama_model_loader: - kv 7: general.license str = other
|
| 33 |
+
llama_model_loader: - kv 8: general.license.name str = modified-mit
|
| 34 |
+
llama_model_loader: - kv 9: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 35 |
+
llama_model_loader: - kv 10: general.tags arr[str,1] = ["text-generation"]
|
| 36 |
+
llama_model_loader: - kv 11: minimax-m2.block_count u32 = 62
|
| 37 |
+
llama_model_loader: - kv 12: minimax-m2.context_length u32 = 196608
|
| 38 |
+
llama_model_loader: - kv 13: minimax-m2.embedding_length u32 = 3072
|
| 39 |
+
llama_model_loader: - kv 14: minimax-m2.feed_forward_length u32 = 1536
|
| 40 |
+
llama_model_loader: - kv 15: minimax-m2.attention.head_count u32 = 48
|
| 41 |
+
llama_model_loader: - kv 16: minimax-m2.attention.head_count_kv u32 = 8
|
| 42 |
+
llama_model_loader: - kv 17: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 43 |
+
llama_model_loader: - kv 18: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 44 |
+
llama_model_loader: - kv 19: minimax-m2.expert_count u32 = 256
|
| 45 |
+
llama_model_loader: - kv 20: minimax-m2.expert_used_count u32 = 8
|
| 46 |
+
llama_model_loader: - kv 21: minimax-m2.expert_gating_func u32 = 2
|
| 47 |
+
llama_model_loader: - kv 22: minimax-m2.attention.key_length u32 = 128
|
| 48 |
+
llama_model_loader: - kv 23: minimax-m2.attention.value_length u32 = 128
|
| 49 |
+
llama_model_loader: - kv 24: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 50 |
+
llama_model_loader: - kv 25: minimax-m2.rope.dimension_count u32 = 64
|
| 51 |
+
llama_model_loader: - kv 26: tokenizer.ggml.model str = gpt2
|
| 52 |
+
llama_model_loader: - kv 27: tokenizer.ggml.pre str = minimax-m2
|
| 53 |
+
llama_model_loader: - kv 28: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 54 |
+
llama_model_loader: - kv 29: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 55 |
+
llama_model_loader: - kv 30: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 56 |
+
llama_model_loader: - kv 31: tokenizer.ggml.bos_token_id u32 = 200034
|
| 57 |
+
llama_model_loader: - kv 32: tokenizer.ggml.eos_token_id u32 = 200020
|
| 58 |
+
llama_model_loader: - kv 33: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 59 |
+
llama_model_loader: - kv 34: tokenizer.chat_template str = {# ----------‑‑‑ special token ...
|
| 60 |
+
llama_model_loader: - kv 35: general.quantization_version u32 = 2
|
| 61 |
+
llama_model_loader: - kv 36: general.file_type u32 = 7
|
| 62 |
+
llama_model_loader: - kv 37: quantize.imatrix.file str = /mnt/srv/snowdrift/fp16/MiniMax-M2.5/...
|
| 63 |
+
llama_model_loader: - kv 38: quantize.imatrix.dataset str = /mnt/srv/host/resources/KLD/calibrati...
|
| 64 |
+
llama_model_loader: - kv 39: quantize.imatrix.entries_count u32 = 496
|
| 65 |
+
llama_model_loader: - kv 40: quantize.imatrix.chunks_count u32 = 49
|
| 66 |
+
llama_model_loader: - type f32: 373 tensors
|
| 67 |
+
llama_model_loader: - type q8_0: 250 tensors
|
| 68 |
+
llama_model_loader: - type q5_K: 124 tensors
|
| 69 |
+
llama_model_loader: - type q6_K: 62 tensors
|
| 70 |
+
print_info: file format = GGUF V3 (latest)
|
| 71 |
+
print_info: file type = Q8_0
|
| 72 |
+
print_info: file size = 157.23 GiB (5.91 BPW)
|
| 73 |
+
load: 0 unused tokens
|
| 74 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 75 |
+
load: printing all EOG tokens:
|
| 76 |
+
load: - 200004 ('<fim_pad>')
|
| 77 |
+
load: - 200005 ('<reponame>')
|
| 78 |
+
load: - 200020 ('[e~[')
|
| 79 |
+
load: special tokens cache size = 54
|
| 80 |
+
load: token to piece cache size = 1.3355 MB
|
| 81 |
+
print_info: arch = minimax-m2
|
| 82 |
+
print_info: vocab_only = 0
|
| 83 |
+
print_info: no_alloc = 0
|
| 84 |
+
print_info: n_ctx_train = 196608
|
| 85 |
+
print_info: n_embd = 3072
|
| 86 |
+
print_info: n_embd_inp = 3072
|
| 87 |
+
print_info: n_layer = 62
|
| 88 |
+
print_info: n_head = 48
|
| 89 |
+
print_info: n_head_kv = 8
|
| 90 |
+
print_info: n_rot = 64
|
| 91 |
+
print_info: n_swa = 0
|
| 92 |
+
print_info: is_swa_any = 0
|
| 93 |
+
print_info: n_embd_head_k = 128
|
| 94 |
+
print_info: n_embd_head_v = 128
|
| 95 |
+
print_info: n_gqa = 6
|
| 96 |
+
print_info: n_embd_k_gqa = 1024
|
| 97 |
+
print_info: n_embd_v_gqa = 1024
|
| 98 |
+
print_info: f_norm_eps = 0.0e+00
|
| 99 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 100 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 101 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 102 |
+
print_info: f_logit_scale = 0.0e+00
|
| 103 |
+
print_info: f_attn_scale = 0.0e+00
|
| 104 |
+
print_info: n_ff = 1536
|
| 105 |
+
print_info: n_expert = 256
|
| 106 |
+
print_info: n_expert_used = 8
|
| 107 |
+
print_info: n_expert_groups = 0
|
| 108 |
+
print_info: n_group_used = 0
|
| 109 |
+
print_info: causal attn = 1
|
| 110 |
+
print_info: pooling type = 0
|
| 111 |
+
print_info: rope type = 2
|
| 112 |
+
print_info: rope scaling = linear
|
| 113 |
+
print_info: freq_base_train = 5000000.0
|
| 114 |
+
print_info: freq_scale_train = 1
|
| 115 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 116 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 117 |
+
print_info: rope_finetuned = unknown
|
| 118 |
+
print_info: model type = 230B.A10B
|
| 119 |
+
print_info: model params = 228.69 B
|
| 120 |
+
print_info: general.name = MiniMax M2.5
|
| 121 |
+
print_info: vocab type = BPE
|
| 122 |
+
print_info: n_vocab = 200064
|
| 123 |
+
print_info: n_merges = 199744
|
| 124 |
+
print_info: BOS token = 200034 ']~!b['
|
| 125 |
+
print_info: EOS token = 200020 '[e~['
|
| 126 |
+
print_info: UNK token = 200021 ']!d~['
|
| 127 |
+
print_info: LF token = 10 'Ċ'
|
| 128 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 129 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 130 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 131 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 132 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 133 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 134 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 135 |
+
print_info: EOG token = 200020 '[e~['
|
| 136 |
+
print_info: max token length = 256
|
| 137 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 138 |
+
load_tensors: offloading output layer to GPU
|
| 139 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 140 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 141 |
+
load_tensors: CPU_Mapped model buffer size = 160376.72 MiB
|
| 142 |
+
load_tensors: CUDA0 model buffer size = 11632.62 MiB
|
| 143 |
+
load_tensors: CUDA1 model buffer size = 9649.11 MiB
|
| 144 |
+
....................................................................................................
|
| 145 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 146 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 147 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 148 |
+
llama_context: constructing llama_context
|
| 149 |
+
llama_context: n_seq_max = 1
|
| 150 |
+
llama_context: n_ctx = 512
|
| 151 |
+
llama_context: n_ctx_seq = 512
|
| 152 |
+
llama_context: n_batch = 512
|
| 153 |
+
llama_context: n_ubatch = 512
|
| 154 |
+
llama_context: causal_attn = 1
|
| 155 |
+
llama_context: flash_attn = enabled
|
| 156 |
+
llama_context: kv_unified = false
|
| 157 |
+
llama_context: freq_base = 5000000.0
|
| 158 |
+
llama_context: freq_scale = 1
|
| 159 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 160 |
+
llama_context: CUDA_Host output buffer size = 0.76 MiB
|
| 161 |
+
llama_kv_cache: CUDA0 KV buffer size = 64.00 MiB
|
| 162 |
+
llama_kv_cache: CUDA1 KV buffer size = 60.00 MiB
|
| 163 |
+
llama_kv_cache: size = 124.00 MiB ( 512 cells, 62 layers, 1/1 seqs), K (f16): 62.00 MiB, V (f16): 62.00 MiB
|
| 164 |
+
sched_reserve: reserving ...
|
| 165 |
+
sched_reserve: CUDA0 compute buffer size = 1743.50 MiB
|
| 166 |
+
sched_reserve: CUDA1 compute buffer size = 396.75 MiB
|
| 167 |
+
sched_reserve: CUDA_Host compute buffer size = 13.01 MiB
|
| 168 |
+
sched_reserve: graph nodes = 3975
|
| 169 |
+
sched_reserve: graph splits = 210 (with bs=512), 124 (with bs=1)
|
| 170 |
+
sched_reserve: reserve took 6.53 ms, sched copies = 1
|
| 171 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 172 |
+
|
| 173 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 174 |
+
kl_divergence: 6.05 seconds per pass - ETA 12.20 minutes
|
| 175 |
+
|
| 176 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 177 |
+
1 6.6921 ± 1.3171 -0.00860 ± 0.01202 0.00733 ± 0.00103 2.556 ± 0.394 % 96.078 ± 1.218 %
|
| 178 |
+
|
| 179 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 180 |
+
2 4.7821 ± 0.5855 -0.00564 ± 0.00740 0.00680 ± 0.00099 2.363 ± 0.353 % 96.667 ± 0.796 %
|
| 181 |
+
|
| 182 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 183 |
+
3 4.4424 ± 0.4423 -0.00073 ± 0.00677 0.00695 ± 0.00072 2.655 ± 0.241 % 96.601 ± 0.656 %
|
| 184 |
+
|
| 185 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 186 |
+
4 5.0361 ± 0.4474 0.00199 ± 0.00597 0.00745 ± 0.00060 2.631 ± 0.201 % 96.471 ± 0.578 %
|
| 187 |
+
|
| 188 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 189 |
+
5 4.8199 ± 0.3809 0.00038 ± 0.00622 0.00774 ± 0.00056 2.709 ± 0.189 % 96.471 ± 0.517 %
|
| 190 |
+
|
| 191 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 192 |
+
6 5.8451 ± 0.4507 0.00443 ± 0.00584 0.01033 ± 0.00069 2.813 ± 0.202 % 95.948 ± 0.504 %
|
| 193 |
+
|
| 194 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 195 |
+
7 5.4826 ± 0.3795 0.00377 ± 0.00525 0.01094 ± 0.00064 3.052 ± 0.189 % 95.910 ± 0.469 %
|
| 196 |
+
|
| 197 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 198 |
+
8 6.1986 ± 0.4074 0.00707 ± 0.00486 0.01145 ± 0.00062 2.988 ± 0.170 % 95.490 ± 0.460 %
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
9 6.0994 ± 0.3751 0.00561 ± 0.00461 0.01133 ± 0.00061 2.975 ± 0.169 % 95.468 ± 0.434 %
|
| 202 |
+
|
| 203 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 204 |
+
10 5.5965 ± 0.3205 0.00654 ± 0.00435 0.01115 ± 0.00057 3.060 ± 0.154 % 95.294 ± 0.419 %
|
| 205 |
+
|
| 206 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 207 |
+
11 6.1361 ± 0.3407 0.00692 ± 0.00424 0.01133 ± 0.00057 3.031 ± 0.144 % 95.080 ± 0.408 %
|
| 208 |
+
|
| 209 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 210 |
+
12 6.8078 ± 0.3676 0.00741 ± 0.00427 0.01254 ± 0.00121 2.988 ± 0.135 % 95.065 ± 0.392 %
|
| 211 |
+
|
| 212 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 213 |
+
13 7.0709 ± 0.3638 0.00732 ± 0.00405 0.01265 ± 0.00117 2.948 ± 0.130 % 94.932 ± 0.381 %
|
| 214 |
+
|
| 215 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 216 |
+
14 7.6401 ± 0.3830 0.00759 ± 0.00386 0.01287 ± 0.00112 2.976 ± 0.145 % 94.958 ± 0.366 %
|
| 217 |
+
|
| 218 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 219 |
+
15 8.0058 ± 0.3885 0.00784 ± 0.00369 0.01296 ± 0.00106 2.976 ± 0.139 % 94.902 ± 0.356 %
|
| 220 |
+
|
| 221 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 222 |
+
16 8.2763 ± 0.3887 0.00426 ± 0.00376 0.01628 ± 0.00192 3.540 ± 0.327 % 94.853 ± 0.346 %
|
| 223 |
+
|
| 224 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 225 |
+
17 8.5058 ± 0.3903 0.00514 ± 0.00362 0.01637 ± 0.00181 3.478 ± 0.314 % 94.833 ± 0.336 %
|
| 226 |
+
|
| 227 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 228 |
+
18 8.0295 ± 0.3561 0.00602 ± 0.00353 0.01617 ± 0.00172 3.499 ± 0.299 % 94.967 ± 0.323 %
|
| 229 |
+
|
| 230 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 231 |
+
19 8.1624 ± 0.3523 0.00594 ± 0.00348 0.01575 ± 0.00163 3.482 ± 0.286 % 95.005 ± 0.313 %
|
| 232 |
+
|
| 233 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 234 |
+
20 8.2197 ± 0.3459 0.00627 ± 0.00341 0.01595 ± 0.00155 3.512 ± 0.271 % 94.961 ± 0.306 %
|
| 235 |
+
|
| 236 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 237 |
+
21 8.2041 ± 0.3365 0.00591 ± 0.00337 0.01652 ± 0.00155 3.486 ± 0.261 % 94.939 ± 0.300 %
|
| 238 |
+
|
| 239 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 240 |
+
22 8.5789 ± 0.3484 0.00605 ± 0.00344 0.01710 ± 0.00149 3.463 ± 0.251 % 94.848 ± 0.295 %
|
| 241 |
+
|
| 242 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 243 |
+
23 8.5840 ± 0.3419 0.00385 ± 0.00376 0.01939 ± 0.00179 3.779 ± 0.313 % 94.766 ± 0.291 %
|
| 244 |
+
|
| 245 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 246 |
+
24 8.9718 ± 0.3513 0.00331 ± 0.00363 0.01908 ± 0.00172 3.731 ± 0.304 % 94.690 ± 0.287 %
|
| 247 |
+
|
| 248 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 249 |
+
25 8.9749 ± 0.3453 0.00305 ± 0.00354 0.01930 ± 0.00165 3.747 ± 0.291 % 94.635 ± 0.282 %
|
| 250 |
+
|
| 251 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 252 |
+
26 8.4023 ± 0.3139 0.00229 ± 0.00358 0.02087 ± 0.00167 4.341 ± 0.295 % 94.555 ± 0.279 %
|
| 253 |
+
|
| 254 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 255 |
+
27 7.9681 ± 0.2897 0.00208 ± 0.00366 0.02189 ± 0.00164 4.553 ± 0.279 % 94.611 ± 0.272 %
|
| 256 |
+
|
| 257 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 258 |
+
28 8.0798 ± 0.2890 0.00190 ± 0.00356 0.02160 ± 0.00158 4.503 ± 0.272 % 94.636 ± 0.267 %
|
| 259 |
+
|
| 260 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 261 |
+
29 8.0137 ± 0.2819 0.00157 ± 0.00350 0.02142 ± 0.00153 4.450 ± 0.266 % 94.699 ± 0.261 %
|
| 262 |
+
|
| 263 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 264 |
+
30 7.4945 ± 0.2568 0.00124 ± 0.00341 0.02085 ± 0.00148 4.407 ± 0.261 % 94.863 ± 0.252 %
|
| 265 |
+
|
| 266 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 267 |
+
31 7.0537 ± 0.2356 0.00115 ± 0.00331 0.02033 ± 0.00144 4.363 ± 0.255 % 94.991 ± 0.245 %
|
| 268 |
+
|
| 269 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 270 |
+
32 6.8731 ± 0.2240 0.00027 ± 0.00323 0.01999 ± 0.00139 4.338 ± 0.249 % 94.951 ± 0.242 %
|
| 271 |
+
|
| 272 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 273 |
+
33 6.7300 ± 0.2142 -0.00000 ± 0.00315 0.01964 ± 0.00135 4.312 ± 0.243 % 94.985 ± 0.238 %
|
| 274 |
+
|
| 275 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 276 |
+
34 6.9217 ± 0.2184 0.00112 ± 0.00313 0.01980 ± 0.00131 4.281 ± 0.238 % 94.937 ± 0.235 %
|
| 277 |
+
|
| 278 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 279 |
+
35 7.0111 ± 0.2198 0.00076 ± 0.00311 0.02006 ± 0.00128 4.323 ± 0.231 % 94.846 ± 0.234 %
|
| 280 |
+
|
| 281 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 282 |
+
36 7.0632 ± 0.2192 0.00024 ± 0.00304 0.01978 ± 0.00125 4.278 ± 0.227 % 94.956 ± 0.228 %
|
| 283 |
+
|
| 284 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 285 |
+
37 7.0816 ± 0.2169 0.00078 ± 0.00299 0.01976 ± 0.00121 4.299 ± 0.221 % 94.891 ± 0.227 %
|
| 286 |
+
|
| 287 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 288 |
+
38 7.2939 ± 0.2215 0.00148 ± 0.00295 0.01981 ± 0.00119 4.283 ± 0.216 % 94.809 ± 0.225 %
|
| 289 |
+
|
| 290 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 291 |
+
39 7.2490 ± 0.2170 0.00191 ± 0.00289 0.02007 ± 0.00117 4.347 ± 0.211 % 94.791 ± 0.223 %
|
| 292 |
+
|
| 293 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 294 |
+
40 7.0205 ± 0.2060 0.00192 ± 0.00291 0.02150 ± 0.00118 4.698 ± 0.219 % 94.686 ± 0.222 %
|
| 295 |
+
|
| 296 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 297 |
+
41 6.8083 ± 0.1960 0.00283 ± 0.00294 0.02294 ± 0.00123 5.115 ± 0.233 % 94.634 ± 0.220 %
|
| 298 |
+
|
| 299 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 300 |
+
42 6.6027 ± 0.1866 0.00277 ± 0.00292 0.02363 ± 0.00121 5.275 ± 0.229 % 94.678 ± 0.217 %
|
| 301 |
+
|
| 302 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 303 |
+
43 6.4097 ± 0.1778 0.00416 ± 0.00293 0.02437 ± 0.00121 5.391 ± 0.224 % 94.665 ± 0.215 %
|
| 304 |
+
|
| 305 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 306 |
+
44 6.3619 ± 0.1737 0.00415 ± 0.00287 0.02399 ± 0.00118 5.346 ± 0.220 % 94.661 ± 0.212 %
|
| 307 |
+
|
| 308 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 309 |
+
45 6.5034 ± 0.1767 0.00506 ± 0.00284 0.02406 ± 0.00115 5.323 ± 0.217 % 94.580 ± 0.211 %
|
| 310 |
+
|
| 311 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 312 |
+
46 6.6371 ± 0.1786 0.00473 ± 0.00279 0.02389 ± 0.00113 5.280 ± 0.214 % 94.493 ± 0.211 %
|
| 313 |
+
|
| 314 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 315 |
+
47 6.7865 ± 0.1812 0.00448 ± 0.00274 0.02356 ± 0.00111 5.231 ± 0.211 % 94.510 ± 0.208 %
|
| 316 |
+
|
| 317 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 318 |
+
48 6.6696 ± 0.1752 0.00427 ± 0.00269 0.02325 ± 0.00108 5.197 ± 0.208 % 94.526 ± 0.206 %
|
| 319 |
+
|
| 320 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 321 |
+
49 6.7905 ± 0.1768 0.00356 ± 0.00285 0.02612 ± 0.00136 5.312 ± 0.211 % 94.398 ± 0.206 %
|
| 322 |
+
|
| 323 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 324 |
+
50 6.8990 ± 0.1789 0.00327 ± 0.00282 0.02650 ± 0.00140 5.335 ± 0.209 % 94.384 ± 0.204 %
|
| 325 |
+
|
| 326 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 327 |
+
51 7.0089 ± 0.1802 0.00337 ± 0.00277 0.02617 ± 0.00137 5.291 ± 0.206 % 94.410 ± 0.201 %
|
| 328 |
+
|
| 329 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 330 |
+
52 7.0655 ± 0.1797 0.00292 ± 0.00277 0.02638 ± 0.00135 5.285 ± 0.203 % 94.374 ± 0.200 %
|
| 331 |
+
|
| 332 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 333 |
+
53 7.1831 ± 0.1810 0.00306 ± 0.00273 0.02614 ± 0.00133 5.250 ± 0.201 % 94.362 ± 0.198 %
|
| 334 |
+
|
| 335 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 336 |
+
54 7.2411 ± 0.1805 0.00309 ± 0.00269 0.02588 ± 0.00130 5.214 ± 0.198 % 94.394 ± 0.196 %
|
| 337 |
+
|
| 338 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 339 |
+
55 7.2901 ± 0.1798 0.00305 ± 0.00265 0.02569 ± 0.00128 5.181 ± 0.196 % 94.360 ± 0.195 %
|
| 340 |
+
|
| 341 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 342 |
+
56 7.3285 ± 0.1792 0.00273 ± 0.00266 0.02598 ± 0.00137 5.146 ± 0.194 % 94.370 ± 0.193 %
|
| 343 |
+
|
| 344 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 345 |
+
57 7.3302 ± 0.1777 0.00273 ± 0.00263 0.02603 ± 0.00134 5.148 ± 0.191 % 94.386 ± 0.191 %
|
| 346 |
+
|
| 347 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 348 |
+
58 7.3499 ± 0.1767 0.00235 ± 0.00264 0.02793 ± 0.00204 5.201 ± 0.197 % 94.375 ± 0.189 %
|
| 349 |
+
|
| 350 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 351 |
+
59 7.3093 ± 0.1739 0.00221 ± 0.00260 0.02759 ± 0.00201 5.168 ± 0.195 % 94.397 ± 0.188 %
|
| 352 |
+
|
| 353 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 354 |
+
60 7.3140 ± 0.1725 0.00209 ± 0.00257 0.02732 ± 0.00197 5.138 ± 0.193 % 94.392 ± 0.186 %
|
| 355 |
+
|
| 356 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 357 |
+
61 7.3568 ± 0.1721 0.00215 ± 0.00254 0.02707 ± 0.00194 5.111 ± 0.191 % 94.426 ± 0.184 %
|
| 358 |
+
|
| 359 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 360 |
+
62 7.3247 ± 0.1701 0.00228 ± 0.00251 0.02684 ± 0.00191 5.093 ± 0.189 % 94.459 ± 0.182 %
|
| 361 |
+
|
| 362 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 363 |
+
63 7.3708 ± 0.1703 0.00299 ± 0.00251 0.02666 ± 0.00188 5.066 ± 0.187 % 94.435 ± 0.181 %
|
| 364 |
+
|
| 365 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 366 |
+
64 7.3460 ± 0.1680 0.00265 ± 0.00247 0.02640 ± 0.00185 5.035 ± 0.185 % 94.485 ± 0.179 %
|
| 367 |
+
|
| 368 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 369 |
+
65 7.3342 ± 0.1664 0.00278 ± 0.00244 0.02633 ± 0.00183 5.012 ± 0.183 % 94.504 ± 0.177 %
|
| 370 |
+
|
| 371 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 372 |
+
66 7.3727 ± 0.1662 0.00271 ± 0.00242 0.02623 ± 0.00180 5.002 ± 0.181 % 94.486 ± 0.176 %
|
| 373 |
+
|
| 374 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 375 |
+
67 7.3889 ± 0.1655 0.00317 ± 0.00239 0.02605 ± 0.00177 4.981 ± 0.179 % 94.481 ± 0.175 %
|
| 376 |
+
|
| 377 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 378 |
+
68 7.3436 ± 0.1631 0.00346 ± 0.00239 0.02580 ± 0.00175 4.953 ± 0.177 % 94.510 ± 0.173 %
|
| 379 |
+
|
| 380 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 381 |
+
69 7.3788 ± 0.1628 0.00373 ± 0.00236 0.02565 ± 0.00172 4.926 ± 0.176 % 94.510 ± 0.172 %
|
| 382 |
+
|
| 383 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 384 |
+
70 7.3462 ± 0.1606 0.00352 ± 0.00234 0.02572 ± 0.00171 4.940 ± 0.175 % 94.532 ± 0.170 %
|
| 385 |
+
|
| 386 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 387 |
+
71 7.3279 ± 0.1591 0.00352 ± 0.00231 0.02554 ± 0.00169 4.922 ± 0.174 % 94.582 ± 0.168 %
|
| 388 |
+
|
| 389 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 390 |
+
72 7.3486 ± 0.1586 0.00363 ± 0.00230 0.02546 ± 0.00166 4.909 ± 0.172 % 94.564 ± 0.167 %
|
| 391 |
+
|
| 392 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 393 |
+
73 7.3534 ± 0.1576 0.00363 ± 0.00228 0.02536 ± 0.00164 4.900 ± 0.170 % 94.574 ± 0.166 %
|
| 394 |
+
|
| 395 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 396 |
+
74 7.3428 ± 0.1562 0.00385 ± 0.00226 0.02523 ± 0.00162 4.881 ± 0.169 % 94.557 ± 0.165 %
|
| 397 |
+
|
| 398 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 399 |
+
75 7.3390 ± 0.1550 0.00380 ± 0.00224 0.02517 ± 0.00160 4.871 ± 0.167 % 94.573 ± 0.164 %
|
| 400 |
+
|
| 401 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 402 |
+
76 7.4029 ± 0.1555 0.00364 ± 0.00222 0.02499 ± 0.00158 4.848 ± 0.165 % 94.556 ± 0.163 %
|
| 403 |
+
|
| 404 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 405 |
+
77 7.3980 ± 0.1544 0.00345 ± 0.00220 0.02482 ± 0.00156 4.824 ± 0.164 % 94.561 ± 0.162 %
|
| 406 |
+
|
| 407 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 408 |
+
78 7.4152 ± 0.1539 0.00383 ± 0.00218 0.02472 ± 0.00154 4.810 ± 0.163 % 94.565 ± 0.161 %
|
| 409 |
+
|
| 410 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 411 |
+
79 7.4258 ± 0.1532 0.00379 ± 0.00217 0.02464 ± 0.00152 4.794 ± 0.161 % 94.550 ± 0.160 %
|
| 412 |
+
|
| 413 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 414 |
+
80 7.4231 ± 0.1526 0.00421 ± 0.00220 0.02451 ± 0.00150 4.773 ± 0.160 % 94.554 ± 0.159 %
|
| 415 |
+
|
| 416 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 417 |
+
81 7.3958 ± 0.1511 0.00429 ± 0.00219 0.02440 ± 0.00148 4.760 ± 0.158 % 94.568 ± 0.158 %
|
| 418 |
+
|
| 419 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 420 |
+
82 7.3786 ± 0.1496 0.00440 ± 0.00216 0.02422 ± 0.00146 4.744 ± 0.157 % 94.591 ± 0.156 %
|
| 421 |
+
|
| 422 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 423 |
+
83 7.4088 ± 0.1491 0.00441 ± 0.00214 0.02406 ± 0.00144 4.724 ± 0.156 % 94.595 ± 0.155 %
|
| 424 |
+
|
| 425 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 426 |
+
84 7.4283 ± 0.1483 0.00450 ± 0.00212 0.02389 ± 0.00143 4.706 ± 0.154 % 94.556 ± 0.155 %
|
| 427 |
+
|
| 428 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 429 |
+
85 7.4205 ± 0.1470 0.00449 ± 0.00210 0.02371 ± 0.00141 4.687 ± 0.153 % 94.579 ± 0.154 %
|
| 430 |
+
|
| 431 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 432 |
+
86 7.3525 ± 0.1443 0.00449 ± 0.00208 0.02350 ± 0.00139 4.666 ± 0.152 % 94.619 ± 0.152 %
|
| 433 |
+
|
| 434 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 435 |
+
87 7.2925 ± 0.1418 0.00448 ± 0.00205 0.02329 ± 0.00138 4.644 ± 0.151 % 94.632 ± 0.151 %
|
| 436 |
+
|
| 437 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 438 |
+
88 7.2280 ± 0.1393 0.00453 ± 0.00203 0.02309 ± 0.00136 4.623 ± 0.150 % 94.648 ± 0.150 %
|
| 439 |
+
|
| 440 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 441 |
+
89 7.1526 ± 0.1366 0.00445 ± 0.00201 0.02291 ± 0.00135 4.606 ± 0.149 % 94.677 ± 0.149 %
|
| 442 |
+
|
| 443 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 444 |
+
90 7.0965 ± 0.1344 0.00461 ± 0.00199 0.02271 ± 0.00133 4.587 ± 0.148 % 94.693 ± 0.148 %
|
| 445 |
+
|
| 446 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 447 |
+
91 7.0446 ± 0.1323 0.00444 ± 0.00197 0.02251 ± 0.00132 4.566 ± 0.147 % 94.717 ± 0.147 %
|
| 448 |
+
|
| 449 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 450 |
+
92 6.9886 ± 0.1301 0.00453 ± 0.00195 0.02233 ± 0.00130 4.551 ± 0.146 % 94.731 ± 0.146 %
|
| 451 |
+
|
| 452 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 453 |
+
93 6.9966 ± 0.1297 0.00463 ± 0.00194 0.02232 ± 0.00129 4.550 ± 0.145 % 94.721 ± 0.145 %
|
| 454 |
+
|
| 455 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 456 |
+
94 7.0251 ± 0.1294 0.00444 ± 0.00192 0.02218 ± 0.00128 4.534 ± 0.144 % 94.723 ± 0.144 %
|
| 457 |
+
|
| 458 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 459 |
+
95 7.1292 ± 0.1310 0.00452 ± 0.00191 0.02220 ± 0.00126 4.520 ± 0.143 % 94.704 ± 0.144 %
|
| 460 |
+
|
| 461 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 462 |
+
96 7.2202 ± 0.1322 0.00450 ± 0.00190 0.02215 ± 0.00125 4.503 ± 0.142 % 94.702 ± 0.143 %
|
| 463 |
+
|
| 464 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 465 |
+
97 7.2992 ± 0.1330 0.00437 ± 0.00188 0.02206 ± 0.00124 4.486 ± 0.141 % 94.704 ± 0.142 %
|
| 466 |
+
|
| 467 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 468 |
+
98 7.4368 ± 0.1355 0.00453 ± 0.00189 0.02195 ± 0.00123 4.474 ± 0.140 % 94.662 ± 0.142 %
|
| 469 |
+
|
| 470 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 471 |
+
99 7.5502 ± 0.1371 0.00442 ± 0.00187 0.02185 ± 0.00121 4.455 ± 0.139 % 94.641 ± 0.142 %
|
| 472 |
+
|
| 473 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 474 |
+
100 7.5831 ± 0.1371 0.00451 ± 0.00186 0.02208 ± 0.00123 4.461 ± 0.138 % 94.624 ± 0.141 %
|
| 475 |
+
|
| 476 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 477 |
+
101 7.6211 ± 0.1372 0.00422 ± 0.00190 0.02227 ± 0.00123 4.461 ± 0.137 % 94.603 ± 0.141 %
|
| 478 |
+
|
| 479 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 480 |
+
102 7.6960 ± 0.1385 0.00441 ± 0.00190 0.02241 ± 0.00122 4.459 ± 0.136 % 94.583 ± 0.140 %
|
| 481 |
+
|
| 482 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 483 |
+
103 7.6687 ± 0.1375 0.00451 ± 0.00188 0.02230 ± 0.00120 4.449 ± 0.135 % 94.590 ± 0.140 %
|
| 484 |
+
|
| 485 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 486 |
+
104 7.6116 ± 0.1356 0.00454 ± 0.00187 0.02225 ± 0.00119 4.450 ± 0.134 % 94.615 ± 0.139 %
|
| 487 |
+
|
| 488 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 489 |
+
105 7.5010 ± 0.1328 0.00456 ± 0.00186 0.02217 ± 0.00118 4.453 ± 0.133 % 94.648 ± 0.138 %
|
| 490 |
+
|
| 491 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 492 |
+
106 7.3736 ± 0.1296 0.00453 ± 0.00184 0.02203 ± 0.00117 4.454 ± 0.133 % 94.684 ± 0.136 %
|
| 493 |
+
|
| 494 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 495 |
+
107 7.4292 ± 0.1300 0.00449 ± 0.00183 0.02190 ± 0.00116 4.437 ± 0.132 % 94.671 ± 0.136 %
|
| 496 |
+
|
| 497 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 498 |
+
108 7.4401 ± 0.1295 0.00445 ± 0.00181 0.02175 ± 0.00115 4.423 ± 0.131 % 94.673 ± 0.135 %
|
| 499 |
+
|
| 500 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 501 |
+
109 7.4658 ± 0.1295 0.00455 ± 0.00180 0.02166 ± 0.00114 4.412 ± 0.130 % 94.675 ± 0.135 %
|
| 502 |
+
|
| 503 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 504 |
+
110 7.4995 ± 0.1295 0.00450 ± 0.00179 0.02154 ± 0.00113 4.403 ± 0.129 % 94.684 ± 0.134 %
|
| 505 |
+
|
| 506 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 507 |
+
111 7.5443 ± 0.1296 0.00435 ± 0.00177 0.02144 ± 0.00112 4.390 ± 0.129 % 94.662 ± 0.134 %
|
| 508 |
+
|
| 509 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 510 |
+
112 7.5540 ± 0.1291 0.00425 ± 0.00176 0.02131 ± 0.00111 4.375 ± 0.128 % 94.685 ± 0.133 %
|
| 511 |
+
|
| 512 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 513 |
+
113 7.5672 ± 0.1286 0.00433 ± 0.00175 0.02121 ± 0.00110 4.361 ± 0.127 % 94.690 ± 0.132 %
|
| 514 |
+
|
| 515 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 516 |
+
114 7.5850 ± 0.1285 0.00437 ± 0.00174 0.02113 ± 0.00109 4.348 ± 0.126 % 94.692 ± 0.131 %
|
| 517 |
+
|
| 518 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 519 |
+
115 7.5679 ± 0.1275 0.00451 ± 0.00174 0.02132 ± 0.00108 4.408 ± 0.127 % 94.694 ± 0.131 %
|
| 520 |
+
|
| 521 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 522 |
+
116 7.5683 ± 0.1271 0.00437 ± 0.00181 0.02219 ± 0.00111 4.492 ± 0.126 % 94.625 ± 0.131 %
|
| 523 |
+
|
| 524 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 525 |
+
117 7.4748 ± 0.1247 0.00469 ± 0.00181 0.02239 ± 0.00111 4.552 ± 0.124 % 94.627 ± 0.131 %
|
| 526 |
+
|
| 527 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 528 |
+
118 7.3819 ± 0.1223 0.00471 ± 0.00180 0.02264 ± 0.00110 4.605 ± 0.122 % 94.606 ± 0.130 %
|
| 529 |
+
|
| 530 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 531 |
+
119 7.2890 ± 0.1200 0.00503 ± 0.00181 0.02290 ± 0.00109 4.674 ± 0.121 % 94.612 ± 0.130 %
|
| 532 |
+
|
| 533 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 534 |
+
120 7.2094 ± 0.1179 0.00537 ± 0.00181 0.02321 ± 0.00108 4.751 ± 0.120 % 94.595 ± 0.129 %
|
| 535 |
+
|
| 536 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 537 |
+
121 7.1263 ± 0.1158 0.00586 ± 0.00181 0.02347 ± 0.00108 4.839 ± 0.120 % 94.594 ± 0.129 %
|
| 538 |
+
|
| 539 |
+
====== Perplexity statistics ======
|
| 540 |
+
Mean PPL(Q) : 7.126261 ± 0.115850
|
| 541 |
+
Mean PPL(base) : 7.084621 ± 0.114399
|
| 542 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.38%
|
| 543 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.005860 ± 0.001811
|
| 544 |
+
Mean PPL(Q)/PPL(base) : 1.005877 ± 0.001821
|
| 545 |
+
Mean PPL(Q)-PPL(base) : 0.041640 ± 0.012925
|
| 546 |
+
|
| 547 |
+
====== KL divergence statistics ======
|
| 548 |
+
Mean KLD: 0.023465 ± 0.001079
|
| 549 |
+
Maximum KLD: 21.223093
|
| 550 |
+
99.9% KLD: 1.623019
|
| 551 |
+
99.0% KLD: 0.278114
|
| 552 |
+
95.0% KLD: 0.076726
|
| 553 |
+
90.0% KLD: 0.039192
|
| 554 |
+
Median KLD: 0.004631
|
| 555 |
+
10.0% KLD: 0.000015
|
| 556 |
+
5.0% KLD: 0.000002
|
| 557 |
+
1.0% KLD: -0.000000
|
| 558 |
+
0.1% KLD: -0.000004
|
| 559 |
+
Minimum KLD: -0.000043
|
| 560 |
+
|
| 561 |
+
====== Token probability statistics ======
|
| 562 |
+
Mean Δp: -0.059 ± 0.028 %
|
| 563 |
+
Maximum Δp: 99.094%
|
| 564 |
+
99.9% Δp: 40.583%
|
| 565 |
+
99.0% Δp: 13.248%
|
| 566 |
+
95.0% Δp: 4.621%
|
| 567 |
+
90.0% Δp: 2.344%
|
| 568 |
+
75.0% Δp: 0.334%
|
| 569 |
+
Median Δp: 0.000%
|
| 570 |
+
25.0% Δp: -0.357%
|
| 571 |
+
10.0% Δp: -2.518%
|
| 572 |
+
5.0% Δp: -4.759%
|
| 573 |
+
1.0% Δp: -14.909%
|
| 574 |
+
0.1% Δp: -41.509%
|
| 575 |
+
Minimum Δp: -91.317%
|
| 576 |
+
RMS Δp : 4.839 ± 0.120 %
|
| 577 |
+
Same top p: 94.594 ± 0.129 %
|
| 578 |
+
|
| 579 |
+
llama_perf_context_print: load time = 58865.61 ms
|
| 580 |
+
llama_perf_context_print: prompt eval time = 699964.51 ms / 61952 tokens ( 11.30 ms per token, 88.51 tokens per second)
|
| 581 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 582 |
+
llama_perf_context_print: total time = 714308.56 ms / 61953 tokens
|
| 583 |
+
llama_perf_context_print: graphs reused = 120
|
| 584 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 585 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 10369 + ( 13440 = 11632 + 64 + 1743) + 325 |
|
| 586 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 13671 + ( 10105 = 9649 + 60 + 396) + 357 |
|
| 587 |
+
llama_memory_breakdown_print: | - Host | 160389 = 160376 + 0 + 13 |
|
| 588 |
+
```
|
kld_data/llm_quantization_data.csv
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name,file_size_gb,bpw,Mean KLD_mean,0.1% KLD,0.1% Δp,1.0% KLD,1.0% Δp,10.0% KLD,10.0% Δp,25.0% Δp,5.0% KLD,5.0% Δp,75.0% Δp,90.0% KLD,90.0% Δp,95.0% KLD,95.0% Δp,99.0% KLD,99.0% Δp,99.9% KLD,99.9% Δp,"Cor(ln(PPL(Q)), ln(PPL(base)))",Device 0,Device 1,Maximum KLD,Maximum Δp,Mean KLD_std,Mean PPL(Q)-PPL(base)_mean,Mean PPL(Q)-PPL(base)_std,Mean PPL(Q)/PPL(base)_mean,Mean PPL(Q)/PPL(base)_std,Mean PPL(Q)_mean,Mean PPL(Q)_std,Mean PPL(base)_mean,Mean PPL(base)_std,Mean ln(PPL(Q)/PPL(base))_mean,Mean ln(PPL(Q)/PPL(base))_std,Mean Δp_mean,Mean Δp_std,Median KLD,Median Δp,Minimum KLD,Minimum Δp,RMS Δp_mean,RMS Δp_std,Same top p_mean,Same top p_std,file_path,file_size_gib,ggml_cuda_init,kl_divergence,llama_context,llama_kv_cache,llama_memory_breakdown_print,llama_model_loader,llama_params_fit,llama_params_fit_impl,llama_perf_context_print,load,load_tensors,print_info,sched_reserve,system_info
|
| 2 |
+
MiniMax-M2.5-IQ3_S (aes_sedai),84.56790605824001,2.96,0.244096,-1e-06,-98.263,2e-06,-87.072,0.000251,-14.168,-3.034,3.7e-05,-32.051,0.357,0.451059,4.938,1.17186,10.539,3.809325,28.277,7.662232,67.832,93.8,30908.6,30908.6,16.5681,99.859,0.004148,1.200261,0.048762,1.169418,0.0067,8.284882,0.135705,7.084621,0.114399,0.156506,0.00573,-3.782,0.096,0.051302,-0.027,-4e-06,-99.944,17.343,0.198,83.244,0.213,kld/MiniMax-M2.5/calibration_datav3/aes_sedai_prev/MiniMax-M2.5-IQ3_S.md,78.76,2.0,,0.76,160.0,-8003780024013.0,-2124.0,0.43,13600.47743,120.0,1.3355,15761.11,256.0,6.801,4.848568601128313e+47
|
| 3 |
+
MiniMax-M2.5-IQ4_XS (aes_sedai),108.5552984064,3.8,0.095077,-3e-06,-90.948,0.0,-50.844,7.4e-05,-6.515,-1.204,1.1e-05,-13.25,0.362,0.160903,3.78,0.361787,7.648,1.549927,21.758,4.400382,61.536,97.77,30908.6,30908.6,16.46847,99.378,0.002168,0.428967,0.026367,1.060549,0.003641,7.513587,0.122746,7.084621,0.114399,0.058787,0.003433,-1.323,0.06,0.018892,-0.002,-7e-06,-98.497,10.585,0.165,89.781,0.172,kld/MiniMax-M2.5/calibration_datav3/aes_sedai_prev/MiniMax-M2.5-IQ4_XS.md,101.1,2.0,,0.76,160.0,-102915102902013.0,-462.0,0.41,16426.47743,120.0,1.3355,16868.11,256.0,7.991,4.848568601128313e+47
|
| 4 |
+
MiniMax-M2.5-Q4_K_M (aes_sedai),140.14478286848,4.9,0.041269,-3e-06,-64.883,-0.0,-24.156,2.6e-05,-3.292,-0.488,4e-06,-6.613,0.395,0.066285,2.992,0.139787,5.889,0.573439,16.855,2.695222,54.264,99.03,30908.6,30908.6,17.297672,99.867,0.001426,0.088838,0.016278,1.01254,0.002292,7.173459,0.116673,7.084621,0.114399,0.012462,0.002264,-0.257,0.038,0.00779,0.0,-2.4e-05,-97.161,6.703,0.135,92.974,0.146,kld/MiniMax-M2.5/calibration_datav3/aes_sedai_prev/MiniMax-M2.5-Q4_K_M.md,130.52,2.0,,0.76,160.0,-133047133034013.0,-562.0,0.44,20161.47743,120.0,1.3355,18326.11,256.0,6.591,4.848568601128313e+47
|
| 5 |
+
MiniMax-M2.5-Q5_K_M (aes_sedai),168.82442698752,5.91,0.023465,-4e-06,-41.509,-0.0,-14.909,1.5e-05,-2.518,-0.357,2e-06,-4.759,0.334,0.039192,2.344,0.076726,4.621,0.278114,13.248,1.623019,40.583,99.38,30908.6,30908.6,21.223093,99.094,0.001079,0.04164,0.012925,1.005877,0.001821,7.126261,0.11585,7.084621,0.114399,0.00586,0.001811,-0.059,0.028,0.004631,0.0,-4.3e-05,-91.317,4.839,0.12,94.594,0.129,kld/MiniMax-M2.5/calibration_datav3/aes_sedai_prev/MiniMax-M2.5-Q5_K_M.md,157.23,2.0,,0.76,160.0,-160389160376013.0,-662.0,0.41,23545.47743,120.0,1.3355,19649.11,256.0,6.531,4.848568601128313e+47
|
| 6 |
+
MiniMax-M2.5-IQ4_NL (unsloth),128.98860531712,4.51,0.064735,-2e-06,-78.434,0.0,-34.326,7.1e-05,-4.897,-0.789,1.1e-05,-9.714,0.499,0.117601,3.781,0.238753,7.62,0.976666,21.321,2.917501,64.64,98.56,30908.6,30908.6,14.542189,97.725,0.001377,0.194069,0.020173,1.027422,0.002829,7.271309,0.118527,7.07724,0.114279,0.027052,0.002753,-0.558,0.049,0.015033,-0.0,-5e-06,-99.608,8.646,0.149,90.971,0.163,kld/MiniMax-M2.5/calibration_datav3/unsloth/IQ4_NL/MiniMax-M2.5-IQ4_NL.md,120.13,2.0,12151240968.0,6.11,1816.0,-1226321225280104.0,-4372.0,5.15,-1.3090524422695117e+17,0.0,1.3355,118705.46,256.0,22.901,4.848568601128313e+47
|
| 7 |
+
MiniMax-M2.5-IQ4_XS (unsloth),121.89117186048,4.26,0.064262,-2e-06,-81.043,0.0,-35.226,7.1e-05,-4.983,-0.828,1.2e-05,-10.011,0.494,0.117462,3.712,0.236258,7.679,0.970652,20.813,2.938713,62.267,98.6,30908.6,30908.6,11.49209,97.103,0.001341,0.196931,0.019861,1.027826,0.002787,7.27417,0.118372,7.07724,0.114279,0.027446,0.002712,-0.587,0.049,0.014982,-0.001,-4e-06,-96.341,8.631,0.148,90.964,0.163,kld/MiniMax-M2.5/calibration_datav3/unsloth/IQ4_XS/MiniMax-M2.5-IQ4_XS.md,113.52,2.0,12151240968.0,6.11,1800.0,-1158441157400104.0,-4372.0,5.03,-1.309051432227016e+17,0.0,1.3355,118296.47,256.0,22.701,4.848568601128313e+47
|
| 8 |
+
MiniMax-M2.5-MXFP4_MOE (unsloth),123.77022005248,4.33,0.090198,-1e-06,-89.542,1e-06,-45.752,0.00011,-5.444,-0.799,1.8e-05,-11.92,0.75,0.158955,4.746,0.339766,8.912,1.398163,24.093,4.447414,70.726,98.01,30908.6,30908.6,11.834617,97.807,0.001826,0.250086,0.023933,1.035337,0.003359,7.327326,0.119622,7.07724,0.114279,0.034727,0.003244,-0.666,0.059,0.019966,-0.001,-4e-06,-99.668,10.303,0.165,89.934,0.171,kld/MiniMax-M2.5/calibration_datav3/unsloth/MXFP4_MOE/MiniMax-M2.5-MXFP4_MOE.md,115.27,2.0,12151240968.0,6.11,1800.0,-1175141174100104.0,-4186.0,5.2,-1.3090514422283158e+17,0.0,1.3355,118309.61,256.0,22.541,4.848568601128313e+47
|
| 9 |
+
MiniMax-M2.5-UD-TQ1_0 (unsloth),55.75941292032,1.95,0.668673,1e-06,-99.884,2e-05,-98.185,0.002351,-40.787,-9.676,0.000373,-75.113,0.3,1.688242,7.379,3.32587,15.509,6.945683,40.406,10.942347,75.65,83.35,30908.6,30908.6,19.120047,99.93,0.007572,4.096171,0.115254,1.578781,0.015248,11.173411,0.191659,7.07724,0.114279,0.456653,0.009658,-8.9,0.145,0.218502,-0.356,-3e-06,-99.992,26.926,0.207,70.676,0.259,kld/MiniMax-M2.5/calibration_datav3/unsloth/MiniMax-M2.5-UD-TQ1_0.md,51.93,2.0,12151240968.0,6.11,1560.0,-52797526930104.0,-435.0,4.05,-1.3090361722622125e+17,0.0,1.3355,118888.73,256.0,23.181,4.848568601128313e+47
|
| 10 |
+
MiniMax-M2.5-Q2_K (unsloth),83.29015328768,2.91,0.313928,-0.0,-99.236,5e-06,-91.474,0.000522,-18.212,-3.628,8.6e-05,-38.882,0.558,0.644059,6.356,1.466614,13.412,4.444966,34.047,7.899748,78.428,92.53,30908.6,30908.6,17.506372,99.777,0.004614,1.52182,0.056629,1.21503,0.007688,8.599059,0.142194,7.07724,0.114279,0.194769,0.006328,-4.223,0.107,0.082348,-0.039,-4e-06,-99.964,19.287,0.2,80.512,0.226,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q2_K/MiniMax-M2.5-Q2_K.md,77.57,2.0,12151240968.0,6.11,1720.0,-79042789380104.0,-61.0,4.68,-1.309046332276011e+17,0.0,1.3355,118866.21,256.0,22.761,4.848568601128313e+47
|
| 11 |
+
MiniMax-M2.5-Q2_K_L (unsloth),83.44047714304,2.92,0.311413,0.0,-99.161,5e-06,-91.576,0.000529,-17.933,-3.596,8.4e-05,-38.879,0.556,0.644883,6.471,1.484256,13.223,4.343384,34.084,7.724108,76.443,92.64,30908.6,30908.6,19.883234,99.923,0.004532,1.513547,0.05623,1.213861,0.007625,8.590786,0.142066,7.07724,0.114279,0.193806,0.006281,-4.196,0.107,0.082115,-0.039,-3e-06,-99.964,19.217,0.2,80.301,0.226,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q2_K_L/MiniMax-M2.5-Q2_K_L.md,77.71,2.0,12151240968.0,6.11,1720.0,-79193790890104.0,-61.0,4.79,-1.309046332276011e+17,0.0,1.3355,118866.21,256.0,23.131,4.848568601128313e+47
|
| 12 |
+
MiniMax-M2.5-Q3_K_M (unsloth),109.26396801024,3.82,0.122623,-1e-06,-94.293,1e-06,-58.682,0.000141,-7.503,-1.384,2.3e-05,-15.646,0.55,0.218426,4.655,0.478294,9.206,2.02842,26.457,4.831515,69.05,97.25,30908.6,30908.6,14.374887,99.035,0.0023,0.469656,0.029228,1.066361,0.004064,7.546896,0.123231,7.07724,0.114279,0.064252,0.003811,-1.434,0.068,0.027433,-0.003,-6e-06,-99.551,12.005,0.175,87.85,0.186,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q3_K_M/MiniMax-M2.5-Q3_K_M.md,101.76,2.0,12151240968.0,6.11,1784.0,-1038011036970104.0,-61.0,5.03,-1.309050412248714e+17,0.0,1.3355,118529.31,256.0,22.991,4.848568601128313e+47
|
| 13 |
+
MiniMax-M2.5-Q3_K_S (unsloth),98.68761104384001,3.45,0.202119,-1e-06,-98.089,2e-06,-77.397,0.000254,-11.397,-2.3,4.4e-05,-23.594,0.561,0.390511,5.604,0.855274,11.539,3.069579,31.663,6.40655,77.255,95.25,30908.6,30908.6,15.605315,99.015,0.00332,0.840381,0.040702,1.118744,0.005621,7.917621,0.129973,7.07724,0.114279,0.112207,0.005024,-2.448,0.085,0.051354,-0.013,-4e-06,-99.954,15.149,0.187,84.152,0.208,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q3_K_S/MiniMax-M2.5-Q3_K_S.md,91.91,2.0,12151240968.0,6.11,1752.0,-93715936110104.0,-61.0,4.84,-1.3090483822726115e+17,0.0,1.3355,118800.45,256.0,23.191,4.848568601128313e+47
|
| 14 |
+
MiniMax-M2.5-Q4_0 (unsloth),129.4932639744,4.53,0.081161,-1e-06,-85.917,1e-06,-42.549,9.7e-05,-6.194,-1.128,1.6e-05,-11.813,0.43,0.150507,4.032,0.301211,7.951,1.201161,22.243,3.576555,63.169,98.22,30908.6,30908.6,9.65519,90.107,0.001589,0.272225,0.022708,1.038465,0.003182,7.349464,0.119566,7.07724,0.114279,0.037743,0.003065,-0.955,0.055,0.020212,-0.003,-4e-06,-98.687,9.622,0.156,89.882,0.172,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q4_0/MiniMax-M2.5-Q4_0.md,120.6,2.0,12151240968.0,6.11,1816.0,-1231191230150104.0,-61.0,5.23,-1.309052442267912e+17,0.0,1.3355,118689.33,256.0,24.481,4.848568601128313e+47
|
| 15 |
+
MiniMax-M2.5-Q4_1 (unsloth),143.21568448512,5.01,0.066154,-2e-06,-78.081,0.0,-35.755,7.1e-05,-5.108,-0.911,1.2e-05,-9.877,0.412,0.117684,3.644,0.242768,7.395,1.036349,22.12,3.2009,65.042,98.58,30908.6,30908.6,10.726225,97.286,0.0014,0.187881,0.019941,1.026547,0.002803,7.265121,0.118153,7.07724,0.114279,0.026201,0.00273,-0.668,0.05,0.015232,-0.002,-1e-05,-98.534,8.817,0.151,91.006,0.163,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q4_1/MiniMax-M2.5-Q4_1.md,133.38,2.0,12151240968.0,6.11,1832.0,-1361801360760104.0,-61.0,4.08,-1.309053462256713e+17,0.0,1.3355,118561.51,256.0,22.971,4.848568601128313e+47
|
| 16 |
+
MiniMax-M2.5-Q4_K_M (unsloth),138.33015918592002,4.84,0.059396,-2e-06,-76.276,0.0,-30.774,6.3e-05,-4.696,-0.802,1.1e-05,-9.33,0.448,0.106275,3.568,0.209868,7.08,0.916992,21.613,2.728867,62.163,98.73,30908.6,30908.6,13.867476,98.983,0.001379,0.147136,0.018739,1.02079,0.002639,7.224376,0.11742,7.07724,0.114279,0.020577,0.002586,-0.495,0.047,0.013731,-0.001,-8e-06,-99.429,8.247,0.148,91.369,0.16,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q4_K_M/MiniMax-M2.5-Q4_K_M.md,128.83,2.0,12151240968.0,6.11,1832.0,-1315421314380104.0,-661.0,4.09,-1.3090534622334154e+17,0.0,1.3355,118328.02,256.0,22.771,4.848568601128313e+47
|
| 17 |
+
MiniMax-M2.5-Q4_K_S (unsloth),130.0301348864,4.55,0.066373,-2e-06,-83.438,0.0,-33.451,7.2e-05,-4.932,-0.861,1.2e-05,-9.856,0.461,0.118923,3.765,0.242942,7.421,1.020737,22.124,3.265465,65.027,98.54,30908.6,30908.6,10.885548,98.147,0.00141,0.184504,0.020245,1.02607,0.002847,7.261744,0.118146,7.07724,0.114279,0.025736,0.002775,-0.573,0.05,0.015188,-0.001,-1.2e-05,-99.56,8.835,0.156,90.848,0.164,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q4_K_S/MiniMax-M2.5-Q4_K_S.md,121.1,2.0,12151240968.0,6.11,1816.0,-1235771234730104.0,-61.0,4.08,-1.3090524522682118e+17,0.0,1.3355,118692.33,256.0,22.721,4.848568601128313e+47
|
| 18 |
+
MiniMax-M2.5-Q5_K_M (unsloth),162.29607669760003,5.68,0.031922,-2e-06,-58.74,0.0,-19.839,3.4e-05,-3.379,-0.563,5e-06,-6.405,0.344,0.058731,2.719,0.112822,5.434,0.434627,15.872,1.532143,50.362,99.29,30908.6,30908.6,15.861979,88.707,0.000913,0.065414,0.013786,1.009243,0.001945,7.142653,0.115912,7.07724,0.114279,0.0092,0.001927,-0.248,0.034,0.007488,-0.0,-9e-06,-83.023,5.998,0.121,93.508,0.14,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q5_K_M/MiniMax-M2.5-Q5_K_M.md,151.15,2.0,12151240968.0,6.11,1848.0,-1543391542350104.0,-661.0,4.13,-1.3090544922353152e+17,0.0,1.3355,118331.1,256.0,24.421,4.848568601128313e+47
|
| 19 |
+
MiniMax-M2.5-Q5_K_S (unsloth),157.47497590784002,5.51,0.03376,-2e-06,-61.381,0.0,-21.343,3.7e-05,-3.529,-0.54,6e-06,-6.455,0.373,0.062966,2.87,0.122497,5.556,0.452087,16.258,1.588454,48.648,99.24,30908.6,30908.6,11.39131,98.972,0.000879,0.065874,0.014247,1.009308,0.002011,7.143114,0.115919,7.07724,0.114279,0.009265,0.001992,-0.273,0.035,0.008101,-0.0,-1.1e-05,-97.513,6.213,0.128,93.285,0.142,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q5_K_S/MiniMax-M2.5-Q5_K_S.md,146.66,2.0,12151240968.0,6.11,1848.0,-1497661496620104.0,-61.0,4.08,-1.3090544822826104e+17,0.0,1.3355,118804.94,256.0,23.271,4.848568601128313e+47
|
| 20 |
+
MiniMax-M2.5-Q6_K (unsloth),187.75449534464002,6.57,0.020127,-3e-06,-41.212,-0.0,-13.809,2e-05,-2.513,-0.379,4e-06,-4.751,0.338,0.038561,2.435,0.071629,4.77,0.261472,13.488,1.02455,40.507,99.49,30908.6,30908.6,6.100366,78.814,0.000519,0.018383,0.011593,1.002597,0.001637,7.095623,0.115273,7.07724,0.114279,0.002594,0.001633,-0.024,0.026,0.004884,0.0,-3e-05,-99.775,4.61,0.099,94.711,0.127,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q6_K/MiniMax-M2.5-Q6_K.md,174.86,2.0,12151240968.0,6.11,1864.0,-1786421785380104.0,-6436.0,4.1,-1.3090555122612126e+17,0.0,1.3355,118574.08,256.0,22.431,4.848568601128313e+47
|
| 21 |
+
MiniMax-M2.5-Q8_0 (unsloth),243.12736120832002,8.51,0.016814,-3e-06,-34.414,-0.0,-11.482,1.5e-05,-2.211,-0.326,3e-06,-4.178,0.293,0.030438,2.076,0.05575,4.001,0.195046,12.005,0.988008,37.836,99.58,30908.6,30908.6,13.118722,96.224,0.000772,0.031335,0.01058,1.004428,0.001491,7.108575,0.115604,7.07724,0.114279,0.004418,0.001484,-0.016,0.024,0.003656,0.0,-9e-06,-68.203,4.136,0.105,95.281,0.121,kld/MiniMax-M2.5/calibration_datav3/unsloth/Q8_0/MiniMax-M2.5-Q8_0.md,226.43,2.0,12151240968.0,6.11,1896.0,-2312992311950104.0,-80436.0,4.18,-1.3090575422050182e+17,0.0,1.3355,117980.67,256.0,22.091,4.848568601128313e+47
|
| 22 |
+
MiniMax-M2.5-UD-IQ1_M (unsloth),68.42956644352,2.39,0.47889,0.0,-99.748,6e-06,-97.106,0.000874,-26.713,-5.199,0.000141,-60.999,0.513,1.090507,7.115,2.622951,14.629,6.009495,37.119,9.95442,82.469,87.75,30908.6,30908.6,21.323242,99.309,0.006451,2.933232,0.089724,1.41446,0.01172,10.010472,0.171308,7.07724,0.114279,0.346748,0.008286,-6.219,0.129,0.120992,-0.071,-3e-06,-99.988,23.529,0.211,76.364,0.242,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-IQ1_M/MiniMax-M2.5-UD-IQ1_M.md,63.73,2.0,12151240968.0,6.11,1640.0,-64861647570104.0,-192.0,4.19,-1.3090412522814106e+17,0.0,1.3355,119000.01,256.0,23.601,4.848568601128313e+47
|
| 23 |
+
MiniMax-M2.5-UD-IQ1_S (unsloth),63.18970634240001,2.21,0.563569,0.0,-99.845,1.1e-05,-97.535,0.00129,-30.853,-6.781,0.000202,-67.276,0.46,1.339546,7.47,2.957068,15.406,6.580419,39.711,10.335377,79.946,85.61,30908.6,30908.6,19.248781,98.664,0.007049,3.468577,0.102926,1.490103,0.013455,10.545816,0.182117,7.07724,0.114279,0.398845,0.00903,-7.067,0.136,0.157771,-0.139,-3e-06,-99.988,24.837,0.21,73.713,0.251,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-IQ1_S/MiniMax-M2.5-UD-IQ1_S.md,58.85,2.0,12151240968.0,6.11,1608.0,-59865597610104.0,-114.0,4.17,-1.3090392222619125e+17,0.0,1.3355,118837.02,256.0,23.591,4.848568601128313e+47
|
| 24 |
+
MiniMax-M2.5-UD-IQ2_M (unsloth),78.18987962368,2.74,0.298683,-1e-06,-99.389,3e-06,-91.5,0.000416,-16.104,-3.293,6.2e-05,-36.994,0.475,0.588759,5.691,1.470186,11.678,4.460805,30.773,8.051671,74.673,92.57,30908.6,30908.6,16.626404,97.156,0.004697,1.623099,0.058276,1.229341,0.007796,8.700339,0.144862,7.07724,0.114279,0.206478,0.006342,-4.179,0.104,0.065393,-0.027,-4e-06,-99.982,18.785,0.204,81.491,0.221,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-IQ2_M/MiniMax-M2.5-UD-IQ2_M.md,72.82,2.0,12151240968.0,6.11,1688.0,-74189740850104.0,-4234.0,4.2,-1.3090443022553131e+17,0.0,1.3355,118691.54,256.0,23.691,4.848568601128313e+47
|
| 25 |
+
MiniMax-M2.5-UD-IQ2_XXS (unsloth),74.10966069248,2.59,0.355179,-0.0,-99.583,4e-06,-93.948,0.000532,-18.567,-4.003,8.4e-05,-45.571,0.474,0.736885,6.058,1.803043,12.563,5.058808,32.687,8.835881,76.421,90.99,30908.6,30908.6,19.298819,99.455,0.005328,1.9566,0.066985,1.276464,0.008944,9.03384,0.151314,7.07724,0.114279,0.244094,0.007007,-4.85,0.113,0.079771,-0.041,-3e-06,-99.957,20.422,0.207,79.767,0.229,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-IQ2_XXS/MiniMax-M2.5-UD-IQ2_XXS.md,69.02,2.0,12151240968.0,6.11,1672.0,-70301701970104.0,-4234.0,4.15,-1.3090432822839104e+17,0.0,1.3355,118993.18,256.0,22.981,4.848568601128313e+47
|
| 26 |
+
MiniMax-M2.5-UD-IQ3_XXS (unsloth),93.30816450560002,3.26,0.163725,-1e-06,-97.304,2e-06,-74.846,0.000195,-9.9,-2.025,3e-05,-20.65,0.422,0.299838,4.425,0.687143,9.443,2.694207,27.025,5.711734,74.181,96.11,30908.6,30908.6,9.271864,94.447,0.002878,0.727652,0.036143,1.102816,0.004999,7.804892,0.127436,7.07724,0.114279,0.097867,0.004533,-2.365,0.08,0.034524,-0.011,-5e-06,-99.748,14.174,0.188,86.271,0.196,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-IQ3_XXS/MiniMax-M2.5-UD-IQ3_XXS.md,86.9,2.0,12151240968.0,6.11,1736.0,-88605885010104.0,-4231.0,4.25,-1.3090473622822106e+17,0.0,1.3355,118912.2,256.0,23.391,4.848568601128313e+47
|
| 27 |
+
MiniMax-M2.5-UD-Q2_K_XL (unsloth),85.91008333824001,3.01,0.248731,-0.0,-98.99,3e-06,-88.453,0.000318,-12.819,-2.45,5.2e-05,-30.17,0.68,0.465921,6.502,1.125538,12.625,3.810123,32.363,7.450212,77.019,94.07,30908.6,30908.6,15.101192,99.792,0.004073,1.168212,0.048381,1.165066,0.006575,8.245452,0.136454,7.07724,0.114279,0.152778,0.005643,-3.037,0.097,0.056457,-0.011,-3e-06,-99.911,17.363,0.199,82.991,0.214,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-Q2_K_XL/MiniMax-M2.5-UD-Q2_K_XL.md,80.01,2.0,12151240968.0,6.11,1720.0,-81529814250104.0,-611.0,3.92,-1.3090463422561131e+17,0.0,1.3355,118667.83,256.0,22.741,4.848568601128313e+47
|
| 28 |
+
MiniMax-M2.5-UD-Q3_K_XL (unsloth),101.28606625792,3.54,0.130427,-1e-06,-93.782,1e-06,-60.177,0.000148,-7.498,-1.251,2.4e-05,-16.279,0.628,0.237711,5.009,0.508409,9.813,2.102021,27.309,5.405243,71.001,97.06,30908.6,30908.6,15.723516,99.654,0.002466,0.450305,0.029935,1.063627,0.004188,7.527545,0.122738,7.07724,0.114279,0.061685,0.003937,-1.373,0.069,0.028882,-0.002,-4e-06,-99.858,12.257,0.175,87.704,0.187,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-Q3_K_XL/MiniMax-M2.5-UD-Q3_K_XL.md,94.33,2.0,12151240968.0,6.11,1768.0,-96190960860104.0,-611.0,4.01,-1.309049392258113e+17,0.0,1.3355,118639.11,256.0,23.011,4.848568601128313e+47
|
| 29 |
+
MiniMax-M2.5-UD-Q4_K_XL (unsloth),131.3186250752,4.59,0.063959,-2e-06,-80.314,0.0,-33.469,6.5e-05,-4.746,-0.822,1.1e-05,-9.491,0.444,0.111804,3.683,0.230491,7.26,0.990056,22.195,3.390165,66.834,98.6,30908.6,30908.6,16.526793,97.605,0.00151,0.148901,0.019641,1.021039,0.002772,7.226141,0.117342,7.07724,0.114279,0.020821,0.002714,-0.525,0.049,0.014112,-0.001,-4e-06,-99.345,8.666,0.154,91.233,0.161,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-Q4_K_XL/MiniMax-M2.5-UD-Q4_K_XL.md,122.3,2.0,12151240968.0,6.11,1816.0,-1248501247460104.0,-611.0,4.04,-1.3090524422729114e+17,0.0,1.3355,118739.39,256.0,22.711,4.848568601128313e+47
|
| 30 |
+
MiniMax-M2.5-UD-Q5_K_XL (unsloth),161.75920578560002,5.66,0.030918,-3e-06,-54.258,0.0,-18.928,3.1e-05,-3.236,-0.522,5e-06,-6.139,0.347,0.056246,2.777,0.108696,5.429,0.421479,15.958,1.663864,52.836,99.31,30908.6,30908.6,10.278928,96.566,0.000823,0.059542,0.013596,1.008413,0.001918,7.136782,0.115879,7.07724,0.114279,0.008378,0.001902,-0.186,0.034,0.007063,-0.0,-5e-06,-95.246,5.907,0.124,93.716,0.138,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-Q5_K_XL/MiniMax-M2.5-UD-Q5_K_XL.md,150.65,2.0,12151240968.0,6.11,1832.0,-1538601537560104.0,-6104.0,4.4,-1.3090534822330154e+17,0.0,1.3355,118324.67,256.0,22.571,4.848568601128313e+47
|
| 31 |
+
MiniMax-M2.5-UD-Q6_K_XL (unsloth),194.28284563456,6.8,0.018391,-3e-06,-37.281,-0.0,-12.721,1.8e-05,-2.347,-0.343,3e-06,-4.557,0.303,0.034229,2.184,0.064967,4.283,0.220442,12.226,1.094286,37.605,99.55,30908.6,30908.6,9.594548,77.764,0.000591,0.035718,0.011009,1.005047,0.001552,7.112958,0.115626,7.07724,0.114279,0.005034,0.001544,-0.049,0.024,0.004147,-0.0,-3.9e-05,-72.369,4.249,0.093,95.015,0.124,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-Q6_K_XL/MiniMax-M2.5-UD-Q6_K_XL.md,180.94,2.0,12151240968.0,6.11,1880.0,-1847591846550104.0,-6228.0,4.13,-1.3090565222461141e+17,0.0,1.3355,118407.23,256.0,21.731,4.848568601128313e+47
|
| 32 |
+
MiniMax-M2.5-UD-Q8_K_XL (unsloth),261.37023479808,9.14,0.017066,-2e-06,-34.207,-0.0,-11.905,1.7e-05,-2.288,-0.341,3e-06,-4.339,0.323,0.03165,2.248,0.057932,4.292,0.206448,12.02,0.835423,36.779,99.58,30908.6,30908.6,14.732626,97.571,0.000686,0.033229,0.010599,1.004695,0.001493,7.110469,0.115634,7.07724,0.114279,0.004684,0.001486,0.002,0.024,0.004152,0.0,-1.2e-05,-63.506,4.13,0.099,95.197,0.122,kld/MiniMax-M2.5/calibration_datav3/unsloth/UD-Q8_K_XL/MiniMax-M2.5-UD-Q8_K_XL.md,243.42,2.0,12151240968.0,6.11,1912.0,-2480822479780104.0,-80409.0,4.17,-1.309058552276211e+17,0.0,1.3355,118676.27,256.0,22.291,4.848568601128313e+47
|
kld_data/unsloth/IQ4_NL/MiniMax-M2.5-IQ4_NL.md
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-IQ4_NL (unsloth)
|
| 2 |
+
|
| 3 |
+
120.13 GiB (4.51 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/IQ4_NL/MiniMax-M2.5-IQ4_NL-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 64581 used, -40710 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 63666 used, -39794 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 128248 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 82552 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37943 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 8268 MiB used, 15603 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1347 MiB used, 22523 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 11 layers ( 1 overflowing), 22557 MiB used, 1314 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 52 layers (44 overflowing), 22695 MiB used, 1176 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 5.15 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/IQ4_NL/MiniMax-M2.5-IQ4_NL-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 25
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 1 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 62 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 89 |
+
llama_model_loader: - type iq4_nl: 372 tensors
|
| 90 |
+
print_info: file format = GGUF V3 (latest)
|
| 91 |
+
print_info: file type = IQ4_NL - 4.5 bpw
|
| 92 |
+
print_info: file size = 120.13 GiB (4.51 BPW)
|
| 93 |
+
load: 0 unused tokens
|
| 94 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 95 |
+
load: printing all EOG tokens:
|
| 96 |
+
load: - 200004 ('<fim_pad>')
|
| 97 |
+
load: - 200005 ('<reponame>')
|
| 98 |
+
load: - 200020 ('[e~[')
|
| 99 |
+
load: special tokens cache size = 54
|
| 100 |
+
load: token to piece cache size = 1.3355 MB
|
| 101 |
+
print_info: arch = minimax-m2
|
| 102 |
+
print_info: vocab_only = 0
|
| 103 |
+
print_info: no_alloc = 0
|
| 104 |
+
print_info: n_ctx_train = 196608
|
| 105 |
+
print_info: n_embd = 3072
|
| 106 |
+
print_info: n_embd_inp = 3072
|
| 107 |
+
print_info: n_layer = 62
|
| 108 |
+
print_info: n_head = 48
|
| 109 |
+
print_info: n_head_kv = 8
|
| 110 |
+
print_info: n_rot = 64
|
| 111 |
+
print_info: n_swa = 0
|
| 112 |
+
print_info: is_swa_any = 0
|
| 113 |
+
print_info: n_embd_head_k = 128
|
| 114 |
+
print_info: n_embd_head_v = 128
|
| 115 |
+
print_info: n_gqa = 6
|
| 116 |
+
print_info: n_embd_k_gqa = 1024
|
| 117 |
+
print_info: n_embd_v_gqa = 1024
|
| 118 |
+
print_info: f_norm_eps = 0.0e+00
|
| 119 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 120 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 121 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 122 |
+
print_info: f_logit_scale = 0.0e+00
|
| 123 |
+
print_info: f_attn_scale = 0.0e+00
|
| 124 |
+
print_info: n_ff = 1536
|
| 125 |
+
print_info: n_expert = 256
|
| 126 |
+
print_info: n_expert_used = 8
|
| 127 |
+
print_info: n_expert_groups = 0
|
| 128 |
+
print_info: n_group_used = 0
|
| 129 |
+
print_info: causal attn = 1
|
| 130 |
+
print_info: pooling type = 0
|
| 131 |
+
print_info: rope type = 2
|
| 132 |
+
print_info: rope scaling = linear
|
| 133 |
+
print_info: freq_base_train = 5000000.0
|
| 134 |
+
print_info: freq_scale_train = 1
|
| 135 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 136 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 137 |
+
print_info: rope_finetuned = unknown
|
| 138 |
+
print_info: model type = 230B.A10B
|
| 139 |
+
print_info: model params = 228.69 B
|
| 140 |
+
print_info: general.name = Minimax-M2.5
|
| 141 |
+
print_info: vocab type = BPE
|
| 142 |
+
print_info: n_vocab = 200064
|
| 143 |
+
print_info: n_merges = 199744
|
| 144 |
+
print_info: BOS token = 200034 ']~!b['
|
| 145 |
+
print_info: EOS token = 200020 '[e~['
|
| 146 |
+
print_info: UNK token = 200021 ']!d~['
|
| 147 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 148 |
+
print_info: LF token = 10 'Ċ'
|
| 149 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 150 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 151 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 152 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 155 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 156 |
+
print_info: EOG token = 200020 '[e~['
|
| 157 |
+
print_info: max token length = 256
|
| 158 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 159 |
+
load_tensors: offloading output layer to GPU
|
| 160 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 161 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 46983.93 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 47302.23 MiB
|
| 164 |
+
load_tensors: CPU_Mapped model buffer size = 28242.72 MiB
|
| 165 |
+
load_tensors: CUDA0 model buffer size = 21033.57 MiB
|
| 166 |
+
load_tensors: CUDA1 model buffer size = 18705.46 MiB
|
| 167 |
+
....................................................................................................
|
| 168 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 169 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 170 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 171 |
+
llama_context: constructing llama_context
|
| 172 |
+
llama_context: n_seq_max = 8
|
| 173 |
+
llama_context: n_ctx = 4096
|
| 174 |
+
llama_context: n_ctx_seq = 512
|
| 175 |
+
llama_context: n_batch = 4096
|
| 176 |
+
llama_context: n_ubatch = 4096
|
| 177 |
+
llama_context: causal_attn = 1
|
| 178 |
+
llama_context: flash_attn = enabled
|
| 179 |
+
llama_context: kv_unified = false
|
| 180 |
+
llama_context: freq_base = 5000000.0
|
| 181 |
+
llama_context: freq_scale = 1
|
| 182 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 183 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 184 |
+
llama_kv_cache: CUDA0 KV buffer size = 176.00 MiB
|
| 185 |
+
llama_kv_cache: CUDA1 KV buffer size = 816.00 MiB
|
| 186 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 187 |
+
sched_reserve: reserving ...
|
| 188 |
+
sched_reserve: CUDA0 compute buffer size = 1348.00 MiB
|
| 189 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 190 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 191 |
+
sched_reserve: graph nodes = 4099
|
| 192 |
+
sched_reserve: graph splits = 177 (with bs=4096), 93 (with bs=1)
|
| 193 |
+
sched_reserve: reserve took 22.90 ms, sched copies = 1
|
| 194 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 195 |
+
|
| 196 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 197 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 198 |
+
kl_divergence: 6.28 seconds per pass - ETA 1.57 minutes
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
1 6.3873 ± 1.2294 0.00224 ± 0.02170 0.03151 ± 0.00424 6.727 ± 0.897 % 92.157 ± 1.687 %
|
| 202 |
+
2 4.7090 ± 0.5670 0.00727 ± 0.01261 0.02231 ± 0.00230 5.088 ± 0.603 % 95.098 ± 0.957 %
|
| 203 |
+
3 4.5118 ± 0.4479 0.00521 ± 0.01176 0.02496 ± 0.00216 6.011 ± 0.581 % 95.033 ± 0.786 %
|
| 204 |
+
4 5.1453 ± 0.4584 0.00330 ± 0.01041 0.02644 ± 0.00207 5.930 ± 0.528 % 94.804 ± 0.695 %
|
| 205 |
+
5 4.9183 ± 0.3929 0.00474 ± 0.01030 0.02951 ± 0.00274 6.578 ± 0.612 % 94.588 ± 0.634 %
|
| 206 |
+
6 6.0040 ± 0.4695 0.00124 ± 0.00976 0.03483 ± 0.00291 6.731 ± 0.641 % 93.725 ± 0.620 %
|
| 207 |
+
7 5.6368 ± 0.3966 0.00997 ± 0.00932 0.04004 ± 0.00299 6.751 ± 0.561 % 93.389 ± 0.588 %
|
| 208 |
+
8 6.3836 ± 0.4271 0.01088 ± 0.00853 0.03871 ± 0.00264 6.434 ± 0.516 % 92.941 ± 0.567 %
|
| 209 |
+
9 6.2601 ± 0.3904 0.01063 ± 0.00793 0.03686 ± 0.00236 6.214 ± 0.477 % 92.854 ± 0.538 %
|
| 210 |
+
10 5.7323 ± 0.3329 0.01160 ± 0.00740 0.03559 ± 0.00215 6.134 ± 0.438 % 92.902 ± 0.509 %
|
| 211 |
+
11 6.2902 ± 0.3531 0.01266 ± 0.00703 0.03612 ± 0.00200 6.016 ± 0.408 % 92.799 ± 0.488 %
|
| 212 |
+
12 6.9760 ± 0.3804 0.01464 ± 0.00670 0.03658 ± 0.00188 5.856 ± 0.385 % 92.778 ± 0.468 %
|
| 213 |
+
13 7.2279 ± 0.3752 0.01285 ± 0.00629 0.03556 ± 0.00175 5.708 ± 0.365 % 92.881 ± 0.447 %
|
| 214 |
+
14 7.7993 ± 0.3948 0.01368 ± 0.00619 0.03618 ± 0.00169 5.654 ± 0.345 % 92.605 ± 0.438 %
|
| 215 |
+
15 8.1766 ± 0.4007 0.01480 ± 0.00591 0.03611 ± 0.00160 5.617 ± 0.328 % 92.497 ± 0.426 %
|
| 216 |
+
16 8.4449 ± 0.4011 0.01489 ± 0.00560 0.03510 ± 0.00150 5.509 ± 0.313 % 92.500 ± 0.412 %
|
| 217 |
+
17 8.6687 ± 0.4018 0.01392 ± 0.00551 0.03608 ± 0.00146 5.455 ± 0.301 % 92.388 ± 0.403 %
|
| 218 |
+
18 8.1881 ± 0.3666 0.01612 ± 0.00533 0.03629 ± 0.00142 5.493 ± 0.296 % 92.462 ± 0.390 %
|
| 219 |
+
19 8.3016 ± 0.3614 0.01409 ± 0.00514 0.03536 ± 0.00135 5.437 ± 0.284 % 92.466 ± 0.379 %
|
| 220 |
+
20 8.3627 ± 0.3551 0.01355 ± 0.00522 0.03702 ± 0.00137 5.489 ± 0.271 % 92.431 ± 0.370 %
|
| 221 |
+
21 8.3453 ± 0.3457 0.01403 ± 0.00505 0.03687 ± 0.00132 5.464 ± 0.261 % 92.474 ± 0.361 %
|
| 222 |
+
22 8.6630 ± 0.3539 0.01372 ± 0.00493 0.03729 ± 0.00128 5.509 ± 0.251 % 92.282 ± 0.356 %
|
| 223 |
+
23 8.6733 ± 0.3477 0.01370 ± 0.00509 0.03953 ± 0.00142 5.852 ± 0.265 % 92.225 ± 0.350 %
|
| 224 |
+
24 9.0834 ± 0.3581 0.01382 ± 0.00497 0.03932 ± 0.00137 5.777 ± 0.258 % 92.124 ± 0.344 %
|
| 225 |
+
25 9.0692 ± 0.3509 0.01365 ± 0.00494 0.04076 ± 0.00150 6.027 ± 0.278 % 92.000 ± 0.340 %
|
| 226 |
+
26 8.5033 ± 0.3195 0.01784 ± 0.00506 0.04527 ± 0.00167 6.897 ± 0.290 % 91.885 ± 0.335 %
|
| 227 |
+
27 8.1140 ± 0.2970 0.02490 ± 0.00529 0.05134 ± 0.00190 7.765 ± 0.302 % 91.663 ± 0.333 %
|
| 228 |
+
28 8.2151 ± 0.2958 0.02361 ± 0.00519 0.05126 ± 0.00185 7.741 ± 0.295 % 91.667 ± 0.327 %
|
| 229 |
+
29 8.1432 ± 0.2882 0.02429 ± 0.00509 0.05101 ± 0.00179 7.685 ± 0.288 % 91.765 ± 0.320 %
|
| 230 |
+
30 7.6104 ± 0.2624 0.02382 ± 0.00494 0.04952 ± 0.00174 7.593 ± 0.282 % 92.026 ± 0.310 %
|
| 231 |
+
31 7.1718 ± 0.2411 0.02383 ± 0.00489 0.04979 ± 0.00188 7.633 ± 0.284 % 92.207 ± 0.302 %
|
| 232 |
+
32 6.9889 ± 0.2294 0.02323 ± 0.00477 0.04898 ± 0.00183 7.589 ± 0.277 % 92.230 ± 0.296 %
|
| 233 |
+
33 6.8477 ± 0.2197 0.02273 ± 0.00468 0.04857 ± 0.00177 7.563 ± 0.270 % 92.204 ± 0.292 %
|
| 234 |
+
34 7.0348 ± 0.2236 0.02265 ± 0.00466 0.04965 ± 0.00174 7.540 ± 0.264 % 92.122 ± 0.289 %
|
| 235 |
+
35 7.1469 ± 0.2259 0.02382 ± 0.00462 0.05149 ± 0.00174 7.659 ± 0.258 % 91.933 ± 0.288 %
|
| 236 |
+
36 7.2118 ± 0.2256 0.02398 ± 0.00454 0.05104 ± 0.00170 7.612 ± 0.253 % 91.928 ± 0.284 %
|
| 237 |
+
37 7.2372 ± 0.2235 0.02522 ± 0.00457 0.05254 ± 0.00177 7.732 ± 0.249 % 91.849 ± 0.282 %
|
| 238 |
+
38 7.4503 ± 0.2281 0.02449 ± 0.00449 0.05229 ± 0.00172 7.674 ± 0.245 % 91.785 ± 0.279 %
|
| 239 |
+
39 7.4143 ± 0.2236 0.02660 ± 0.00446 0.05351 ± 0.00173 7.858 ± 0.244 % 91.704 ± 0.277 %
|
| 240 |
+
40 7.1857 ± 0.2124 0.03020 ± 0.00453 0.05836 ± 0.00185 8.339 ± 0.246 % 91.500 ± 0.276 %
|
| 241 |
+
41 6.9777 ± 0.2023 0.03165 ± 0.00463 0.06249 ± 0.00198 8.779 ± 0.248 % 91.392 ± 0.274 %
|
| 242 |
+
42 6.7928 ± 0.1936 0.03611 ± 0.00474 0.06708 ± 0.00214 9.228 ± 0.250 % 91.289 ± 0.273 %
|
| 243 |
+
43 6.5946 ± 0.1845 0.03765 ± 0.00473 0.07002 ± 0.00222 9.520 ± 0.254 % 91.218 ± 0.270 %
|
| 244 |
+
44 6.5422 ± 0.1801 0.03600 ± 0.00464 0.06905 ± 0.00217 9.436 ± 0.251 % 91.248 ± 0.267 %
|
| 245 |
+
45 6.6875 ± 0.1832 0.03607 ± 0.00459 0.06895 ± 0.00213 9.363 ± 0.247 % 91.146 ± 0.265 %
|
| 246 |
+
46 6.8255 ± 0.1851 0.03442 ± 0.00451 0.06833 ± 0.00209 9.278 ± 0.244 % 91.134 ± 0.262 %
|
| 247 |
+
47 6.9777 ± 0.1878 0.03427 ± 0.00443 0.06740 ± 0.00204 9.189 ± 0.241 % 91.106 ± 0.260 %
|
| 248 |
+
48 6.8564 ± 0.1816 0.03339 ± 0.00435 0.06644 ± 0.00200 9.117 ± 0.238 % 91.144 ± 0.257 %
|
| 249 |
+
49 6.9402 ± 0.1817 0.03052 ± 0.00440 0.06992 ± 0.00244 9.189 ± 0.241 % 91.020 ± 0.256 %
|
| 250 |
+
50 7.0402 ± 0.1834 0.03067 ± 0.00433 0.06954 ± 0.00240 9.125 ± 0.237 % 90.957 ± 0.254 %
|
| 251 |
+
51 7.1514 ± 0.1848 0.03094 ± 0.00426 0.06887 ± 0.00235 9.065 ± 0.234 % 90.980 ± 0.251 %
|
| 252 |
+
52 7.2183 ± 0.1845 0.03094 ± 0.00422 0.06893 ± 0.00231 9.020 ± 0.231 % 90.860 ± 0.250 %
|
| 253 |
+
53 7.3332 ± 0.1857 0.03094 ± 0.00416 0.06831 ± 0.00227 8.964 ± 0.228 % 90.884 ± 0.248 %
|
| 254 |
+
54 7.3865 ± 0.1849 0.03041 ± 0.00410 0.06764 ± 0.00223 8.902 ± 0.226 % 90.915 ± 0.245 %
|
| 255 |
+
55 7.4374 ± 0.1842 0.03024 ± 0.00404 0.06702 ± 0.00219 8.841 ± 0.223 % 90.895 ± 0.243 %
|
| 256 |
+
56 7.4775 ± 0.1835 0.02990 ± 0.00398 0.06635 ± 0.00215 8.786 ± 0.221 % 90.910 ± 0.241 %
|
| 257 |
+
57 7.4746 ± 0.1818 0.02956 ± 0.00395 0.06637 ± 0.00212 8.787 ± 0.218 % 90.939 ± 0.238 %
|
| 258 |
+
58 7.4825 ± 0.1804 0.02926 ± 0.00390 0.06575 ± 0.00208 8.737 ± 0.216 % 90.947 ± 0.236 %
|
| 259 |
+
59 7.4410 ± 0.1775 0.02882 ± 0.00384 0.06486 ± 0.00205 8.676 ± 0.214 % 91.007 ± 0.233 %
|
| 260 |
+
60 7.4490 ± 0.1762 0.02842 ± 0.00380 0.06437 ± 0.00202 8.629 ± 0.212 % 90.974 ± 0.232 %
|
| 261 |
+
61 7.4936 ± 0.1758 0.02774 ± 0.00376 0.06403 ± 0.00199 8.595 ± 0.209 % 90.987 ± 0.230 %
|
| 262 |
+
62 7.4675 ± 0.1740 0.02752 ± 0.00374 0.06387 ± 0.00197 8.566 ± 0.207 % 91.006 ± 0.228 %
|
| 263 |
+
63 7.5168 ± 0.1743 0.02778 ± 0.00372 0.06352 ± 0.00194 8.520 ± 0.205 % 91.012 ± 0.226 %
|
| 264 |
+
64 7.4959 ± 0.1720 0.02757 ± 0.00368 0.06321 ± 0.00191 8.481 ± 0.203 % 91.011 ± 0.224 %
|
| 265 |
+
65 7.4868 ± 0.1705 0.02752 ± 0.00365 0.06308 ± 0.00189 8.471 ± 0.201 % 91.029 ± 0.222 %
|
| 266 |
+
66 7.5210 ± 0.1701 0.02708 ± 0.00363 0.06298 ± 0.00187 8.445 ± 0.199 % 91.016 ± 0.220 %
|
| 267 |
+
67 7.5287 ± 0.1691 0.02716 ± 0.00359 0.06297 ± 0.00184 8.405 ± 0.197 % 91.016 ± 0.219 %
|
| 268 |
+
68 7.4823 ± 0.1666 0.02694 ± 0.00356 0.06254 ± 0.00182 8.361 ± 0.195 % 91.107 ± 0.216 %
|
| 269 |
+
69 7.5134 ± 0.1662 0.02664 ± 0.00353 0.06225 ± 0.00179 8.329 ± 0.193 % 91.122 ± 0.214 %
|
| 270 |
+
70 7.4868 ± 0.1641 0.02688 ± 0.00350 0.06191 ± 0.00177 8.293 ± 0.191 % 91.143 ± 0.213 %
|
| 271 |
+
71 7.4710 ± 0.1627 0.02676 ± 0.00347 0.06158 ± 0.00174 8.258 ± 0.190 % 91.157 ± 0.211 %
|
| 272 |
+
72 7.4924 ± 0.1623 0.02744 ± 0.00345 0.06131 ± 0.00172 8.225 ± 0.188 % 91.155 ± 0.210 %
|
| 273 |
+
73 7.4971 ± 0.1611 0.02734 ± 0.00342 0.06109 ± 0.00170 8.192 ± 0.186 % 91.093 ± 0.209 %
|
| 274 |
+
74 7.4847 ± 0.1597 0.02651 ± 0.00339 0.06095 ± 0.00168 8.159 ± 0.184 % 91.076 ± 0.208 %
|
| 275 |
+
75 7.4843 ± 0.1586 0.02555 ± 0.00338 0.06104 ± 0.00166 8.161 ± 0.182 % 91.043 ± 0.206 %
|
| 276 |
+
76 7.5454 ± 0.1589 0.02518 ± 0.00336 0.06107 ± 0.00165 8.161 ± 0.182 % 91.027 ± 0.205 %
|
| 277 |
+
77 7.5398 ± 0.1578 0.02471 ± 0.00333 0.06071 ± 0.00163 8.124 ± 0.180 % 91.062 ± 0.204 %
|
| 278 |
+
78 7.5475 ± 0.1569 0.02391 ± 0.00331 0.06050 ± 0.00161 8.081 ± 0.179 % 91.081 ± 0.202 %
|
| 279 |
+
79 7.5503 ± 0.1560 0.02282 ± 0.00330 0.06048 ± 0.00160 8.065 ± 0.177 % 91.070 ± 0.201 %
|
| 280 |
+
80 7.5555 ± 0.1557 0.02326 ± 0.00331 0.06053 ± 0.00158 8.046 ± 0.176 % 91.059 ± 0.200 %
|
| 281 |
+
81 7.5272 ± 0.1542 0.02261 ± 0.00328 0.06030 ± 0.00156 8.015 ± 0.174 % 91.082 ± 0.198 %
|
| 282 |
+
82 7.5072 ± 0.1526 0.02288 ± 0.00325 0.06005 ± 0.00155 7.988 ± 0.173 % 91.100 ± 0.197 %
|
| 283 |
+
83 7.5370 ± 0.1520 0.02252 ± 0.00322 0.05972 ± 0.00153 7.951 ± 0.172 % 91.127 ± 0.195 %
|
| 284 |
+
84 7.5509 ± 0.1511 0.02218 ± 0.00319 0.05935 ± 0.00151 7.918 ± 0.170 % 91.130 ± 0.194 %
|
| 285 |
+
85 7.5448 ± 0.1498 0.02192 ± 0.00315 0.05895 ± 0.00150 7.885 ± 0.169 % 91.123 ± 0.193 %
|
| 286 |
+
86 7.4757 ± 0.1471 0.02177 ± 0.00313 0.05869 ± 0.00148 7.859 ± 0.168 % 91.149 ± 0.192 %
|
| 287 |
+
87 7.4166 ± 0.1446 0.02163 ± 0.00310 0.05831 ± 0.00146 7.831 ± 0.166 % 91.161 ± 0.191 %
|
| 288 |
+
88 7.3537 ± 0.1422 0.02144 ± 0.00307 0.05805 ± 0.00145 7.809 ± 0.165 % 91.190 ± 0.189 %
|
| 289 |
+
89 7.2806 ± 0.1395 0.02143 ± 0.00304 0.05775 ± 0.00144 7.785 ± 0.164 % 91.240 ± 0.188 %
|
| 290 |
+
90 7.2255 ± 0.1372 0.02154 ± 0.00301 0.05746 ± 0.00143 7.772 ± 0.163 % 91.264 ± 0.186 %
|
| 291 |
+
91 7.1752 ± 0.1352 0.02166 ± 0.00299 0.05714 ± 0.00141 7.757 ± 0.162 % 91.295 ± 0.185 %
|
| 292 |
+
92 7.1176 ± 0.1330 0.02165 ± 0.00297 0.05688 ± 0.00140 7.736 ± 0.160 % 91.300 ± 0.184 %
|
| 293 |
+
93 7.1315 ± 0.1328 0.02111 ± 0.00297 0.05769 ± 0.00146 7.762 ± 0.161 % 91.280 ± 0.183 %
|
| 294 |
+
94 7.1611 ± 0.1325 0.02093 ± 0.00294 0.05734 ± 0.00145 7.733 ± 0.160 % 91.281 ± 0.182 %
|
| 295 |
+
95 7.2699 ± 0.1342 0.02080 ± 0.00292 0.05730 ± 0.00144 7.720 ± 0.160 % 91.236 ± 0.182 %
|
| 296 |
+
96 7.3611 ± 0.1353 0.02031 ± 0.00290 0.05717 ± 0.00142 7.694 ± 0.159 % 91.205 ± 0.181 %
|
| 297 |
+
97 7.4391 ± 0.1361 0.02019 ± 0.00288 0.05691 ± 0.00141 7.663 ± 0.158 % 91.191 ± 0.180 %
|
| 298 |
+
98 7.5770 ± 0.1385 0.02041 ± 0.00287 0.05672 ± 0.00139 7.633 ± 0.157 % 91.168 ± 0.180 %
|
| 299 |
+
99 7.6938 ± 0.1402 0.02015 ± 0.00285 0.05659 ± 0.00138 7.603 ± 0.156 % 91.119 ± 0.179 %
|
| 300 |
+
100 7.7251 ± 0.1401 0.01933 ± 0.00283 0.05672 ± 0.00137 7.581 ± 0.155 % 91.086 ± 0.178 %
|
| 301 |
+
101 7.7606 ± 0.1402 0.01907 ± 0.00281 0.05661 ± 0.00136 7.559 ± 0.154 % 91.089 ± 0.178 %
|
| 302 |
+
102 7.8269 ± 0.1413 0.01969 ± 0.00281 0.05668 ± 0.00136 7.536 ± 0.153 % 91.077 ± 0.177 %
|
| 303 |
+
103 7.8003 ± 0.1404 0.01965 ± 0.00279 0.05643 ± 0.00135 7.520 ± 0.152 % 91.091 ± 0.176 %
|
| 304 |
+
104 7.7454 ± 0.1385 0.02010 ± 0.00279 0.05661 ± 0.00135 7.571 ± 0.151 % 91.112 ± 0.175 %
|
| 305 |
+
105 7.6362 ± 0.1357 0.02041 ± 0.00280 0.05679 ± 0.00134 7.612 ± 0.151 % 91.160 ± 0.173 %
|
| 306 |
+
106 7.5013 ± 0.1323 0.01989 ± 0.00278 0.05658 ± 0.00134 7.637 ± 0.152 % 91.217 ± 0.172 %
|
| 307 |
+
107 7.5587 ± 0.1326 0.01978 ± 0.00276 0.05626 ± 0.00133 7.607 ± 0.151 % 91.230 ± 0.171 %
|
| 308 |
+
108 7.5727 ± 0.1323 0.01984 ± 0.00274 0.05598 ± 0.00131 7.586 ± 0.150 % 91.249 ± 0.170 %
|
| 309 |
+
109 7.5947 ± 0.1321 0.02002 ± 0.00272 0.05581 ± 0.00130 7.566 ± 0.149 % 91.257 ± 0.169 %
|
| 310 |
+
110 7.6298 ± 0.1322 0.01994 ± 0.00270 0.05559 ± 0.00129 7.549 ± 0.148 % 91.251 ± 0.169 %
|
| 311 |
+
111 7.6755 ± 0.1323 0.01961 ± 0.00268 0.05542 ± 0.00128 7.526 ± 0.147 % 91.245 ± 0.168 %
|
| 312 |
+
112 7.6850 ± 0.1318 0.01950 ± 0.00266 0.05513 ± 0.00127 7.503 ± 0.147 % 91.246 ± 0.167 %
|
| 313 |
+
113 7.6963 ± 0.1312 0.01960 ± 0.00264 0.05488 ± 0.00126 7.482 ± 0.146 % 91.227 ± 0.167 %
|
| 314 |
+
114 7.7129 ± 0.1310 0.01941 ± 0.00263 0.05470 ± 0.00125 7.459 ± 0.145 % 91.232 ± 0.166 %
|
| 315 |
+
115 7.6945 ± 0.1301 0.01937 ± 0.00263 0.05491 ± 0.00124 7.496 ± 0.144 % 91.253 ± 0.165 %
|
| 316 |
+
116 7.6942 ± 0.1295 0.02072 ± 0.00265 0.05656 ± 0.00126 7.664 ± 0.144 % 91.146 ± 0.165 %
|
| 317 |
+
117 7.5984 ± 0.1271 0.02176 ± 0.00267 0.05797 ± 0.00127 7.832 ± 0.145 % 91.128 ± 0.165 %
|
| 318 |
+
118 7.5110 ± 0.1248 0.02305 ± 0.00269 0.05930 ± 0.00129 7.958 ± 0.144 % 91.100 ± 0.164 %
|
| 319 |
+
119 7.4225 ± 0.1225 0.02465 ± 0.00270 0.06110 ± 0.00132 8.241 ± 0.148 % 91.069 ± 0.164 %
|
| 320 |
+
120 7.3468 ± 0.1205 0.02575 ± 0.00273 0.06311 ± 0.00135 8.468 ± 0.149 % 91.007 ± 0.164 %
|
| 321 |
+
121 7.2713 ± 0.1185 0.02705 ± 0.00275 0.06474 ± 0.00138 8.646 ± 0.149 % 90.971 ± 0.163 %
|
| 322 |
+
|
| 323 |
+
====== Perplexity statistics ======
|
| 324 |
+
Mean PPL(Q) : 7.271309 ± 0.118527
|
| 325 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 326 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.56%
|
| 327 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.027052 ± 0.002753
|
| 328 |
+
Mean PPL(Q)/PPL(base) : 1.027422 ± 0.002829
|
| 329 |
+
Mean PPL(Q)-PPL(base) : 0.194069 ± 0.020173
|
| 330 |
+
|
| 331 |
+
====== KL divergence statistics ======
|
| 332 |
+
Mean KLD: 0.064735 ± 0.001377
|
| 333 |
+
Maximum KLD: 14.542189
|
| 334 |
+
99.9% KLD: 2.917501
|
| 335 |
+
99.0% KLD: 0.976666
|
| 336 |
+
95.0% KLD: 0.238753
|
| 337 |
+
90.0% KLD: 0.117601
|
| 338 |
+
Median KLD: 0.015033
|
| 339 |
+
10.0% KLD: 0.000071
|
| 340 |
+
5.0% KLD: 0.000011
|
| 341 |
+
1.0% KLD: 0.000000
|
| 342 |
+
0.1% KLD: -0.000002
|
| 343 |
+
Minimum KLD: -0.000005
|
| 344 |
+
|
| 345 |
+
====== Token probability statistics ======
|
| 346 |
+
Mean Δp: -0.558 ± 0.049 %
|
| 347 |
+
Maximum Δp: 97.725%
|
| 348 |
+
99.9% Δp: 64.640%
|
| 349 |
+
99.0% Δp: 21.321%
|
| 350 |
+
95.0% Δp: 7.620%
|
| 351 |
+
90.0% Δp: 3.781%
|
| 352 |
+
75.0% Δp: 0.499%
|
| 353 |
+
Median Δp: -0.000%
|
| 354 |
+
25.0% Δp: -0.789%
|
| 355 |
+
10.0% Δp: -4.897%
|
| 356 |
+
5.0% Δp: -9.714%
|
| 357 |
+
1.0% Δp: -34.326%
|
| 358 |
+
0.1% Δp: -78.434%
|
| 359 |
+
Minimum Δp: -99.608%
|
| 360 |
+
RMS Δp : 8.646 ± 0.149 %
|
| 361 |
+
Same top p: 90.971 ± 0.163 %
|
| 362 |
+
|
| 363 |
+
llama_perf_context_print: load time = 51178.35 ms
|
| 364 |
+
llama_perf_context_print: prompt eval time = 86483.99 ms / 61952 tokens ( 1.40 ms per token, 716.34 tokens per second)
|
| 365 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 366 |
+
llama_perf_context_print: total time = 100459.89 ms / 61953 tokens
|
| 367 |
+
llama_perf_context_print: graphs reused = 0
|
| 368 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1125 + ( 22557 = 21033 + 176 + 1347) + 451 |
|
| 370 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1019 + ( 22695 = 18705 + 816 + 3174) + 419 |
|
| 371 |
+
llama_memory_breakdown_print: | - Host | 122632 = 122528 + 0 + 104 |
|
| 372 |
+
```
|
kld_data/unsloth/IQ4_XS/MiniMax-M2.5-IQ4_XS.md
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-IQ4_XS (unsloth)
|
| 2 |
+
|
| 3 |
+
113.52 GiB (4.26 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/IQ4_XS/MiniMax-M2.5-IQ4_XS-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 61086 used, -37215 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 60390 used, -36518 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 121477 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 75781 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 38055 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 8084 MiB used, 15787 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1275 MiB used, 22595 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 12 layers ( 1 overflowing), 22582 MiB used, 1288 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 51 layers (43 overflowing), 22270 MiB used, 1601 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 5.03 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/IQ4_XS/MiniMax-M2.5-IQ4_XS-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 30
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 1 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 62 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 89 |
+
llama_model_loader: - type iq4_xs: 372 tensors
|
| 90 |
+
print_info: file format = GGUF V3 (latest)
|
| 91 |
+
print_info: file type = IQ4_XS - 4.25 bpw
|
| 92 |
+
print_info: file size = 113.52 GiB (4.26 BPW)
|
| 93 |
+
load: 0 unused tokens
|
| 94 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 95 |
+
load: printing all EOG tokens:
|
| 96 |
+
load: - 200004 ('<fim_pad>')
|
| 97 |
+
load: - 200005 ('<reponame>')
|
| 98 |
+
load: - 200020 ('[e~[')
|
| 99 |
+
load: special tokens cache size = 54
|
| 100 |
+
load: token to piece cache size = 1.3355 MB
|
| 101 |
+
print_info: arch = minimax-m2
|
| 102 |
+
print_info: vocab_only = 0
|
| 103 |
+
print_info: no_alloc = 0
|
| 104 |
+
print_info: n_ctx_train = 196608
|
| 105 |
+
print_info: n_embd = 3072
|
| 106 |
+
print_info: n_embd_inp = 3072
|
| 107 |
+
print_info: n_layer = 62
|
| 108 |
+
print_info: n_head = 48
|
| 109 |
+
print_info: n_head_kv = 8
|
| 110 |
+
print_info: n_rot = 64
|
| 111 |
+
print_info: n_swa = 0
|
| 112 |
+
print_info: is_swa_any = 0
|
| 113 |
+
print_info: n_embd_head_k = 128
|
| 114 |
+
print_info: n_embd_head_v = 128
|
| 115 |
+
print_info: n_gqa = 6
|
| 116 |
+
print_info: n_embd_k_gqa = 1024
|
| 117 |
+
print_info: n_embd_v_gqa = 1024
|
| 118 |
+
print_info: f_norm_eps = 0.0e+00
|
| 119 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 120 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 121 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 122 |
+
print_info: f_logit_scale = 0.0e+00
|
| 123 |
+
print_info: f_attn_scale = 0.0e+00
|
| 124 |
+
print_info: n_ff = 1536
|
| 125 |
+
print_info: n_expert = 256
|
| 126 |
+
print_info: n_expert_used = 8
|
| 127 |
+
print_info: n_expert_groups = 0
|
| 128 |
+
print_info: n_group_used = 0
|
| 129 |
+
print_info: causal attn = 1
|
| 130 |
+
print_info: pooling type = 0
|
| 131 |
+
print_info: rope type = 2
|
| 132 |
+
print_info: rope scaling = linear
|
| 133 |
+
print_info: freq_base_train = 5000000.0
|
| 134 |
+
print_info: freq_scale_train = 1
|
| 135 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 136 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 137 |
+
print_info: rope_finetuned = unknown
|
| 138 |
+
print_info: model type = 230B.A10B
|
| 139 |
+
print_info: model params = 228.69 B
|
| 140 |
+
print_info: general.name = Minimax-M2.5
|
| 141 |
+
print_info: vocab type = BPE
|
| 142 |
+
print_info: n_vocab = 200064
|
| 143 |
+
print_info: n_merges = 199744
|
| 144 |
+
print_info: BOS token = 200034 ']~!b['
|
| 145 |
+
print_info: EOS token = 200020 '[e~['
|
| 146 |
+
print_info: UNK token = 200021 ']!d~['
|
| 147 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 148 |
+
print_info: LF token = 10 'Ċ'
|
| 149 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 150 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 151 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 152 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 155 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 156 |
+
print_info: EOG token = 200020 '[e~['
|
| 157 |
+
print_info: max token length = 256
|
| 158 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 159 |
+
load_tensors: offloading output layer to GPU
|
| 160 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 161 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 46875.52 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 47157.83 MiB
|
| 164 |
+
load_tensors: CPU_Mapped model buffer size = 21707.17 MiB
|
| 165 |
+
load_tensors: CUDA0 model buffer size = 21115.00 MiB
|
| 166 |
+
load_tensors: CUDA1 model buffer size = 18296.47 MiB
|
| 167 |
+
....................................................................................................
|
| 168 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 169 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 170 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 171 |
+
llama_context: constructing llama_context
|
| 172 |
+
llama_context: n_seq_max = 8
|
| 173 |
+
llama_context: n_ctx = 4096
|
| 174 |
+
llama_context: n_ctx_seq = 512
|
| 175 |
+
llama_context: n_batch = 4096
|
| 176 |
+
llama_context: n_ubatch = 4096
|
| 177 |
+
llama_context: causal_attn = 1
|
| 178 |
+
llama_context: flash_attn = enabled
|
| 179 |
+
llama_context: kv_unified = false
|
| 180 |
+
llama_context: freq_base = 5000000.0
|
| 181 |
+
llama_context: freq_scale = 1
|
| 182 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 183 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 184 |
+
llama_kv_cache: CUDA0 KV buffer size = 192.00 MiB
|
| 185 |
+
llama_kv_cache: CUDA1 KV buffer size = 800.00 MiB
|
| 186 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 187 |
+
sched_reserve: reserving ...
|
| 188 |
+
sched_reserve: CUDA0 compute buffer size = 1288.00 MiB
|
| 189 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 190 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 191 |
+
sched_reserve: graph nodes = 4099
|
| 192 |
+
sched_reserve: graph splits = 177 (with bs=4096), 95 (with bs=1)
|
| 193 |
+
sched_reserve: reserve took 22.70 ms, sched copies = 1
|
| 194 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 195 |
+
|
| 196 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 197 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 198 |
+
kl_divergence: 6.41 seconds per pass - ETA 1.60 minutes
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
1 6.4107 ± 1.2288 0.00591 ± 0.01975 0.02652 ± 0.00289 5.892 ± 0.761 % 92.157 ± 1.687 %
|
| 202 |
+
2 4.6907 ± 0.5628 0.00338 ± 0.01194 0.01995 ± 0.00173 4.498 ± 0.507 % 94.510 ± 1.010 %
|
| 203 |
+
3 4.4983 ± 0.4445 0.00221 ± 0.01157 0.02552 ± 0.00219 6.186 ± 0.606 % 94.510 ± 0.824 %
|
| 204 |
+
4 5.1812 ± 0.4623 0.01025 ± 0.01059 0.02824 ± 0.00206 6.233 ± 0.537 % 94.118 ± 0.737 %
|
| 205 |
+
5 4.9487 ± 0.3954 0.01090 ± 0.01070 0.02983 ± 0.00211 6.535 ± 0.488 % 94.039 ± 0.663 %
|
| 206 |
+
6 6.0530 ± 0.4753 0.00936 ± 0.01023 0.03416 ± 0.00213 6.289 ± 0.429 % 92.810 ± 0.661 %
|
| 207 |
+
7 5.6545 ± 0.3989 0.01311 ± 0.00936 0.03940 ± 0.00239 6.381 ± 0.380 % 92.717 ± 0.615 %
|
| 208 |
+
8 6.4004 ± 0.4287 0.01352 ± 0.00854 0.03830 ± 0.00211 6.119 ± 0.348 % 92.255 ± 0.592 %
|
| 209 |
+
9 6.2859 ± 0.3922 0.01474 ± 0.00780 0.03655 ± 0.00190 5.956 ± 0.323 % 92.244 ± 0.558 %
|
| 210 |
+
10 5.7596 ± 0.3348 0.01634 ± 0.00732 0.03514 ± 0.00174 5.828 ± 0.298 % 92.392 ± 0.525 %
|
| 211 |
+
11 6.3124 ± 0.3545 0.01618 ± 0.00707 0.03588 ± 0.00165 5.717 ± 0.281 % 92.121 ± 0.509 %
|
| 212 |
+
12 7.0060 ± 0.3822 0.01894 ± 0.00675 0.03684 ± 0.00169 5.595 ± 0.264 % 91.928 ± 0.493 %
|
| 213 |
+
13 7.2589 ± 0.3767 0.01712 ± 0.00634 0.03590 ± 0.00158 5.473 ± 0.250 % 92.097 ± 0.469 %
|
| 214 |
+
14 7.8176 ± 0.3958 0.01603 ± 0.00622 0.03627 ± 0.00158 5.412 ± 0.238 % 91.849 ± 0.458 %
|
| 215 |
+
15 8.1865 ± 0.4010 0.01601 ± 0.00592 0.03637 ± 0.00150 5.410 ± 0.227 % 91.791 ± 0.444 %
|
| 216 |
+
16 8.4451 ± 0.4007 0.01491 ± 0.00563 0.03540 ± 0.00141 5.336 ± 0.217 % 91.863 ± 0.428 %
|
| 217 |
+
17 8.6709 ± 0.4017 0.01418 ± 0.00548 0.03630 ± 0.00138 5.280 ± 0.209 % 91.857 ± 0.415 %
|
| 218 |
+
18 8.1855 ± 0.3661 0.01580 ± 0.00531 0.03647 ± 0.00135 5.304 ± 0.215 % 92.070 ± 0.399 %
|
| 219 |
+
19 8.3003 ± 0.3610 0.01392 ± 0.00512 0.03566 ± 0.00129 5.275 ± 0.207 % 92.012 ± 0.390 %
|
| 220 |
+
20 8.3653 ± 0.3547 0.01386 ± 0.00514 0.03685 ± 0.00127 5.315 ± 0.198 % 91.980 ± 0.380 %
|
| 221 |
+
21 8.3396 ± 0.3449 0.01335 ± 0.00499 0.03688 ± 0.00125 5.404 ± 0.216 % 92.101 ± 0.369 %
|
| 222 |
+
22 8.6596 ± 0.3531 0.01332 ± 0.00490 0.03766 ± 0.00124 5.489 ± 0.213 % 91.836 ± 0.366 %
|
| 223 |
+
23 8.6650 ± 0.3463 0.01274 ± 0.00494 0.03922 ± 0.00131 5.742 ± 0.222 % 91.765 ± 0.359 %
|
| 224 |
+
24 9.0703 ± 0.3566 0.01238 ± 0.00483 0.03903 ± 0.00126 5.674 ± 0.216 % 91.732 ± 0.352 %
|
| 225 |
+
25 9.0572 ± 0.3494 0.01233 ± 0.00480 0.04050 ± 0.00137 5.922 ± 0.245 % 91.639 ± 0.347 %
|
| 226 |
+
26 8.5059 ± 0.3186 0.01814 ± 0.00497 0.04643 ± 0.00170 7.183 ± 0.307 % 91.569 ± 0.341 %
|
| 227 |
+
27 8.1153 ± 0.2962 0.02506 ± 0.00533 0.05283 ± 0.00200 8.118 ± 0.318 % 91.445 ± 0.337 %
|
| 228 |
+
28 8.2247 ± 0.2954 0.02477 ± 0.00523 0.05245 ± 0.00194 8.037 ± 0.311 % 91.401 ± 0.332 %
|
| 229 |
+
29 8.1444 ± 0.2874 0.02444 ± 0.00513 0.05200 ± 0.00187 7.977 ± 0.303 % 91.454 ± 0.325 %
|
| 230 |
+
30 7.6138 ± 0.2618 0.02426 ± 0.00498 0.05056 ± 0.00182 7.894 ± 0.298 % 91.712 ± 0.315 %
|
| 231 |
+
31 7.1719 ± 0.2404 0.02384 ± 0.00486 0.05005 ± 0.00179 7.872 ± 0.294 % 91.929 ± 0.306 %
|
| 232 |
+
32 6.9868 ± 0.2286 0.02293 ± 0.00474 0.04925 ± 0.00174 7.827 ± 0.287 % 91.936 ± 0.301 %
|
| 233 |
+
33 6.8440 ± 0.2189 0.02218 ± 0.00465 0.04884 ± 0.00169 7.791 ± 0.280 % 91.931 ± 0.297 %
|
| 234 |
+
34 7.0312 ± 0.2227 0.02214 ± 0.00462 0.04970 ± 0.00166 7.768 ± 0.274 % 91.880 ± 0.293 %
|
| 235 |
+
35 7.1445 ± 0.2252 0.02348 ± 0.00459 0.05174 ± 0.00167 7.877 ± 0.266 % 91.675 ± 0.292 %
|
| 236 |
+
36 7.2119 ± 0.2251 0.02400 ± 0.00450 0.05111 ± 0.00163 7.810 ± 0.261 % 91.678 ± 0.288 %
|
| 237 |
+
37 7.2361 ± 0.2230 0.02505 ± 0.00453 0.05253 ± 0.00171 7.902 ± 0.259 % 91.659 ± 0.285 %
|
| 238 |
+
38 7.4517 ± 0.2276 0.02469 ± 0.00445 0.05229 ± 0.00167 7.831 ± 0.254 % 91.538 ± 0.283 %
|
| 239 |
+
39 7.4077 ± 0.2228 0.02571 ± 0.00441 0.05376 ± 0.00169 7.984 ± 0.254 % 91.503 ± 0.280 %
|
| 240 |
+
40 7.1777 ± 0.2117 0.02909 ± 0.00446 0.05818 ± 0.00181 8.415 ± 0.255 % 91.343 ± 0.278 %
|
| 241 |
+
41 6.9760 ± 0.2018 0.03142 ± 0.00459 0.06283 ± 0.00197 8.910 ± 0.260 % 91.229 ± 0.277 %
|
| 242 |
+
42 6.7901 ± 0.1930 0.03572 ± 0.00468 0.06690 ± 0.00208 9.322 ± 0.257 % 91.158 ± 0.274 %
|
| 243 |
+
43 6.5969 ± 0.1842 0.03800 ± 0.00471 0.06997 ± 0.00219 9.564 ± 0.257 % 91.145 ± 0.271 %
|
| 244 |
+
44 6.5459 ± 0.1798 0.03657 ± 0.00462 0.06902 ± 0.00214 9.481 ± 0.254 % 91.168 ± 0.268 %
|
| 245 |
+
45 6.6922 ± 0.1829 0.03677 ± 0.00456 0.06898 ± 0.00210 9.415 ± 0.250 % 91.129 ± 0.265 %
|
| 246 |
+
46 6.8288 ± 0.1848 0.03490 ± 0.00449 0.06835 ± 0.00205 9.331 ± 0.247 % 91.168 ± 0.262 %
|
| 247 |
+
47 6.9814 ± 0.1875 0.03479 ± 0.00441 0.06744 ± 0.00201 9.245 ± 0.244 % 91.156 ± 0.259 %
|
| 248 |
+
48 6.8574 ± 0.1812 0.03353 ± 0.00432 0.06644 ± 0.00197 9.168 ± 0.241 % 91.217 ± 0.256 %
|
| 249 |
+
49 6.9464 ± 0.1816 0.03141 ± 0.00434 0.06991 ± 0.00236 9.256 ± 0.244 % 91.100 ± 0.255 %
|
| 250 |
+
50 7.0425 ± 0.1831 0.03099 ± 0.00429 0.06940 ± 0.00232 9.197 ± 0.240 % 91.012 ± 0.253 %
|
| 251 |
+
51 7.1503 ± 0.1844 0.03078 ± 0.00422 0.06866 ± 0.00227 9.130 ± 0.237 % 91.034 ± 0.251 %
|
| 252 |
+
52 7.2192 ± 0.1842 0.03107 ± 0.00418 0.06870 ± 0.00223 9.092 ± 0.234 % 90.958 ± 0.249 %
|
| 253 |
+
53 7.3313 ± 0.1853 0.03069 ± 0.00413 0.06810 ± 0.00219 9.036 ± 0.231 % 90.988 ± 0.246 %
|
| 254 |
+
54 7.3853 ± 0.1845 0.03025 ± 0.00407 0.06745 ± 0.00215 8.978 ± 0.229 % 91.038 ± 0.243 %
|
| 255 |
+
55 7.4371 ± 0.1838 0.03020 ± 0.00401 0.06677 ± 0.00211 8.918 ± 0.226 % 90.966 ± 0.242 %
|
| 256 |
+
56 7.4750 ± 0.1831 0.02956 ± 0.00395 0.06613 ± 0.00208 8.858 ± 0.224 % 91.008 ± 0.239 %
|
| 257 |
+
57 7.4723 ± 0.1813 0.02925 ± 0.00392 0.06621 ± 0.00205 8.854 ± 0.221 % 91.008 ± 0.237 %
|
| 258 |
+
58 7.4787 ± 0.1799 0.02874 ± 0.00388 0.06567 ± 0.00202 8.810 ± 0.219 % 90.994 ± 0.235 %
|
| 259 |
+
59 7.4371 ± 0.1770 0.02830 ± 0.00382 0.06479 ± 0.00199 8.750 ± 0.217 % 91.073 ± 0.232 %
|
| 260 |
+
60 7.4497 ± 0.1759 0.02850 ± 0.00378 0.06432 ± 0.00195 8.709 ± 0.214 % 91.026 ± 0.231 %
|
| 261 |
+
61 7.4975 ± 0.1755 0.02825 ± 0.00374 0.06396 ± 0.00193 8.673 ± 0.212 % 91.013 ± 0.229 %
|
| 262 |
+
62 7.4676 ± 0.1736 0.02753 ± 0.00371 0.06364 ± 0.00190 8.643 ± 0.210 % 91.031 ± 0.227 %
|
| 263 |
+
63 7.5128 ± 0.1737 0.02724 ± 0.00369 0.06348 ± 0.00187 8.600 ± 0.208 % 91.018 ± 0.226 %
|
| 264 |
+
64 7.4917 ± 0.1715 0.02701 ± 0.00365 0.06320 ± 0.00184 8.555 ± 0.206 % 91.011 ± 0.224 %
|
| 265 |
+
65 7.4807 ± 0.1699 0.02671 ± 0.00362 0.06298 ± 0.00182 8.529 ± 0.203 % 91.059 ± 0.222 %
|
| 266 |
+
66 7.5117 ± 0.1694 0.02583 ± 0.00360 0.06281 ± 0.00180 8.503 ± 0.201 % 91.046 ± 0.220 %
|
| 267 |
+
67 7.5189 ± 0.1684 0.02586 ± 0.00356 0.06272 ± 0.00177 8.460 ± 0.199 % 91.027 ± 0.219 %
|
| 268 |
+
68 7.4715 ± 0.1659 0.02550 ± 0.00353 0.06221 ± 0.00175 8.410 ± 0.198 % 91.113 ± 0.216 %
|
| 269 |
+
69 7.5069 ± 0.1656 0.02577 ± 0.00350 0.06192 ± 0.00172 8.372 ± 0.196 % 91.111 ± 0.215 %
|
| 270 |
+
70 7.4795 ± 0.1636 0.02591 ± 0.00348 0.06166 ± 0.00170 8.350 ± 0.194 % 91.115 ± 0.213 %
|
| 271 |
+
71 7.4599 ± 0.1620 0.02527 ± 0.00344 0.06135 ± 0.00168 8.316 ± 0.192 % 91.130 ± 0.211 %
|
| 272 |
+
72 7.4810 ± 0.1615 0.02592 ± 0.00342 0.06116 ± 0.00166 8.287 ± 0.190 % 91.122 ± 0.210 %
|
| 273 |
+
73 7.4863 ± 0.1604 0.02589 ± 0.00339 0.06092 ± 0.00164 8.250 ± 0.189 % 91.109 ± 0.209 %
|
| 274 |
+
74 7.4728 ± 0.1588 0.02491 ± 0.00336 0.06079 ± 0.00162 8.216 ± 0.187 % 91.097 ± 0.207 %
|
| 275 |
+
75 7.4750 ± 0.1578 0.02430 ± 0.00334 0.06088 ± 0.00160 8.198 ± 0.185 % 91.080 ± 0.206 %
|
| 276 |
+
76 7.5370 ± 0.1583 0.02407 ± 0.00331 0.06088 ± 0.00159 8.188 ± 0.184 % 91.073 ± 0.205 %
|
| 277 |
+
77 7.5334 ± 0.1572 0.02386 ± 0.00330 0.06058 ± 0.00157 8.150 ± 0.182 % 91.087 ± 0.203 %
|
| 278 |
+
78 7.5417 ± 0.1564 0.02314 ± 0.00327 0.06035 ± 0.00156 8.108 ± 0.181 % 91.106 ± 0.202 %
|
| 279 |
+
79 7.5468 ± 0.1555 0.02236 ± 0.00327 0.06033 ± 0.00154 8.090 ± 0.179 % 91.090 ± 0.201 %
|
| 280 |
+
80 7.5503 ± 0.1551 0.02258 ± 0.00328 0.06038 ± 0.00153 8.068 ± 0.178 % 91.088 ± 0.199 %
|
| 281 |
+
81 7.5240 ± 0.1537 0.02219 ± 0.00325 0.06019 ± 0.00151 8.036 ± 0.176 % 91.116 ± 0.198 %
|
| 282 |
+
82 7.5033 ± 0.1521 0.02235 ± 0.00323 0.05993 ± 0.00149 8.006 ± 0.175 % 91.153 ± 0.196 %
|
| 283 |
+
83 7.5343 ± 0.1516 0.02216 ± 0.00319 0.05959 ± 0.00148 7.972 ± 0.173 % 91.165 ± 0.195 %
|
| 284 |
+
84 7.5493 ± 0.1507 0.02197 ± 0.00316 0.05926 ± 0.00146 7.940 ± 0.172 % 91.120 ± 0.194 %
|
| 285 |
+
85 7.5420 ± 0.1494 0.02156 ± 0.00313 0.05887 ± 0.00144 7.907 ± 0.171 % 91.105 ± 0.193 %
|
| 286 |
+
86 7.4738 ± 0.1467 0.02152 ± 0.00310 0.05854 ± 0.00143 7.880 ± 0.169 % 91.104 ± 0.192 %
|
| 287 |
+
87 7.4140 ± 0.1442 0.02129 ± 0.00308 0.05824 ± 0.00141 7.854 ± 0.168 % 91.116 ± 0.191 %
|
| 288 |
+
88 7.3519 ± 0.1418 0.02120 ± 0.00305 0.05798 ± 0.00140 7.826 ± 0.167 % 91.141 ± 0.190 %
|
| 289 |
+
89 7.2800 ± 0.1391 0.02135 ± 0.00302 0.05768 ± 0.00139 7.802 ± 0.166 % 91.170 ± 0.188 %
|
| 290 |
+
90 7.2246 ± 0.1369 0.02141 ± 0.00299 0.05740 ± 0.00137 7.789 ± 0.165 % 91.181 ± 0.187 %
|
| 291 |
+
91 7.1748 ± 0.1349 0.02160 ± 0.00297 0.05705 ± 0.00136 7.765 ± 0.163 % 91.196 ± 0.186 %
|
| 292 |
+
92 7.1184 ± 0.1327 0.02177 ± 0.00294 0.05674 ± 0.00135 7.744 ± 0.162 % 91.215 ± 0.185 %
|
| 293 |
+
93 7.1328 ± 0.1325 0.02130 ± 0.00293 0.05737 ± 0.00141 7.759 ± 0.162 % 91.191 ± 0.184 %
|
| 294 |
+
94 7.1630 ± 0.1323 0.02119 ± 0.00290 0.05700 ± 0.00139 7.728 ± 0.161 % 91.214 ± 0.183 %
|
| 295 |
+
95 7.2722 ± 0.1340 0.02113 ± 0.00288 0.05697 ± 0.00138 7.704 ± 0.160 % 91.154 ± 0.182 %
|
| 296 |
+
96 7.3644 ± 0.1351 0.02076 ± 0.00287 0.05694 ± 0.00137 7.677 ± 0.159 % 91.115 ± 0.182 %
|
| 297 |
+
97 7.4423 ± 0.1359 0.02062 ± 0.00284 0.05666 ± 0.00135 7.644 ± 0.158 % 91.086 ± 0.181 %
|
| 298 |
+
98 7.5805 ± 0.1383 0.02087 ± 0.00283 0.05640 ± 0.00134 7.614 ± 0.157 % 91.076 ± 0.180 %
|
| 299 |
+
99 7.6987 ± 0.1401 0.02079 ± 0.00281 0.05623 ± 0.00133 7.582 ± 0.156 % 91.040 ± 0.180 %
|
| 300 |
+
100 7.7354 ± 0.1401 0.02066 ± 0.00279 0.05615 ± 0.00131 7.572 ± 0.155 % 91.012 ± 0.179 %
|
| 301 |
+
101 7.7718 ± 0.1403 0.02052 ± 0.00278 0.05600 ± 0.00130 7.551 ± 0.154 % 91.011 ± 0.178 %
|
| 302 |
+
102 7.8360 ± 0.1413 0.02084 ± 0.00277 0.05618 ± 0.00130 7.534 ± 0.153 % 91.000 ± 0.177 %
|
| 303 |
+
103 7.8096 ± 0.1403 0.02085 ± 0.00276 0.05592 ± 0.00129 7.516 ± 0.152 % 91.018 ± 0.176 %
|
| 304 |
+
104 7.7543 ± 0.1385 0.02125 ± 0.00275 0.05603 ± 0.00129 7.552 ± 0.151 % 91.044 ± 0.175 %
|
| 305 |
+
105 7.6451 ± 0.1356 0.02158 ± 0.00276 0.05640 ± 0.00130 7.597 ± 0.151 % 91.081 ± 0.174 %
|
| 306 |
+
106 7.5111 ± 0.1323 0.02119 ± 0.00274 0.05621 ± 0.00129 7.635 ± 0.151 % 91.132 ± 0.173 %
|
| 307 |
+
107 7.5683 ± 0.1326 0.02104 ± 0.00272 0.05587 ± 0.00128 7.604 ± 0.150 % 91.160 ± 0.172 %
|
| 308 |
+
108 7.5809 ± 0.1322 0.02093 ± 0.00270 0.05559 ± 0.00127 7.582 ± 0.149 % 91.191 ± 0.171 %
|
| 309 |
+
109 7.6016 ± 0.1321 0.02093 ± 0.00268 0.05540 ± 0.00126 7.566 ± 0.148 % 91.207 ± 0.170 %
|
| 310 |
+
110 7.6374 ± 0.1321 0.02094 ± 0.00266 0.05520 ± 0.00125 7.552 ± 0.147 % 91.201 ± 0.169 %
|
| 311 |
+
111 7.6844 ± 0.1323 0.02076 ± 0.00264 0.05503 ± 0.00124 7.528 ± 0.147 % 91.203 ± 0.168 %
|
| 312 |
+
112 7.6933 ± 0.1317 0.02057 ± 0.00262 0.05479 ± 0.00123 7.510 ± 0.146 % 91.218 ± 0.167 %
|
| 313 |
+
113 7.7030 ± 0.1311 0.02047 ± 0.00260 0.05454 ± 0.00122 7.489 ± 0.145 % 91.220 ± 0.167 %
|
| 314 |
+
114 7.7198 ± 0.1310 0.02031 ± 0.00259 0.05434 ± 0.00121 7.465 ± 0.144 % 91.218 ± 0.166 %
|
| 315 |
+
115 7.6990 ± 0.1299 0.01995 ± 0.00259 0.05447 ± 0.00120 7.473 ± 0.143 % 91.229 ± 0.165 %
|
| 316 |
+
116 7.6985 ± 0.1294 0.02128 ± 0.00261 0.05619 ± 0.00122 7.657 ± 0.144 % 91.129 ± 0.165 %
|
| 317 |
+
117 7.6039 ± 0.1269 0.02249 ± 0.00264 0.05776 ± 0.00124 7.869 ± 0.145 % 91.111 ± 0.165 %
|
| 318 |
+
118 7.5185 ± 0.1247 0.02405 ± 0.00266 0.05927 ± 0.00126 8.025 ± 0.146 % 91.063 ± 0.164 %
|
| 319 |
+
119 7.4308 ± 0.1224 0.02576 ± 0.00267 0.06078 ± 0.00129 8.246 ± 0.147 % 91.050 ± 0.164 %
|
| 320 |
+
120 7.3513 ± 0.1203 0.02635 ± 0.00269 0.06266 ± 0.00132 8.464 ± 0.148 % 90.990 ± 0.164 %
|
| 321 |
+
121 7.2742 ± 0.1184 0.02745 ± 0.00271 0.06426 ± 0.00134 8.631 ± 0.148 % 90.964 ± 0.163 %
|
| 322 |
+
|
| 323 |
+
====== Perplexity statistics ======
|
| 324 |
+
Mean PPL(Q) : 7.274170 ± 0.118372
|
| 325 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 326 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.60%
|
| 327 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.027446 ± 0.002712
|
| 328 |
+
Mean PPL(Q)/PPL(base) : 1.027826 ± 0.002787
|
| 329 |
+
Mean PPL(Q)-PPL(base) : 0.196931 ± 0.019861
|
| 330 |
+
|
| 331 |
+
====== KL divergence statistics ======
|
| 332 |
+
Mean KLD: 0.064262 ± 0.001341
|
| 333 |
+
Maximum KLD: 11.492090
|
| 334 |
+
99.9% KLD: 2.938713
|
| 335 |
+
99.0% KLD: 0.970652
|
| 336 |
+
95.0% KLD: 0.236258
|
| 337 |
+
90.0% KLD: 0.117462
|
| 338 |
+
Median KLD: 0.014982
|
| 339 |
+
10.0% KLD: 0.000071
|
| 340 |
+
5.0% KLD: 0.000012
|
| 341 |
+
1.0% KLD: 0.000000
|
| 342 |
+
0.1% KLD: -0.000002
|
| 343 |
+
Minimum KLD: -0.000004
|
| 344 |
+
|
| 345 |
+
====== Token probability statistics ======
|
| 346 |
+
Mean Δp: -0.587 ± 0.049 %
|
| 347 |
+
Maximum Δp: 97.103%
|
| 348 |
+
99.9% Δp: 62.267%
|
| 349 |
+
99.0% Δp: 20.813%
|
| 350 |
+
95.0% Δp: 7.679%
|
| 351 |
+
90.0% Δp: 3.712%
|
| 352 |
+
75.0% Δp: 0.494%
|
| 353 |
+
Median Δp: -0.001%
|
| 354 |
+
25.0% Δp: -0.828%
|
| 355 |
+
10.0% Δp: -4.983%
|
| 356 |
+
5.0% Δp: -10.011%
|
| 357 |
+
1.0% Δp: -35.226%
|
| 358 |
+
0.1% Δp: -81.043%
|
| 359 |
+
Minimum Δp: -96.341%
|
| 360 |
+
RMS Δp : 8.631 ± 0.148 %
|
| 361 |
+
Same top p: 90.964 ± 0.163 %
|
| 362 |
+
|
| 363 |
+
llama_perf_context_print: load time = 45586.90 ms
|
| 364 |
+
llama_perf_context_print: prompt eval time = 82797.05 ms / 61952 tokens ( 1.34 ms per token, 748.24 tokens per second)
|
| 365 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 366 |
+
llama_perf_context_print: total time = 97889.31 ms / 61953 tokens
|
| 367 |
+
llama_perf_context_print: graphs reused = 0
|
| 368 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1087 + ( 22594 = 21114 + 192 + 1287) + 452 |
|
| 370 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1411 + ( 22270 = 18296 + 800 + 3174) + 452 |
|
| 371 |
+
llama_memory_breakdown_print: | - Host | 115844 = 115740 + 0 + 104 |
|
| 372 |
+
```
|
kld_data/unsloth/MXFP4_MOE/MiniMax-M2.5-MXFP4_MOE.md
ADDED
|
@@ -0,0 +1,366 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-MXFP4_MOE (unsloth)
|
| 2 |
+
|
| 3 |
+
115.27 GiB (4.33 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/MXFP4_MOE/MiniMax-M2.5-MXFP4_MOE-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 61785 used, -37914 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 61187 used, -37315 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 122973 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 77277 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 36558 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 9580 MiB used, 14291 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1275 MiB used, 22595 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 12 layers ( 1 overflowing), 22845 MiB used, 1026 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 51 layers (44 overflowing), 22283 MiB used, 1588 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 5.20 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 49 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/MXFP4_MOE/MiniMax-M2.5-MXFP4_MOE-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 38
|
| 78 |
+
llama_model_loader: - kv 46: split.no u16 = 0
|
| 79 |
+
llama_model_loader: - kv 47: split.tensors.count i32 = 809
|
| 80 |
+
llama_model_loader: - kv 48: split.count u16 = 4
|
| 81 |
+
llama_model_loader: - type f32: 373 tensors
|
| 82 |
+
llama_model_loader: - type q8_0: 250 tensors
|
| 83 |
+
llama_model_loader: - type mxfp4: 186 tensors
|
| 84 |
+
print_info: file format = GGUF V3 (latest)
|
| 85 |
+
print_info: file type = MXFP4 MoE
|
| 86 |
+
print_info: file size = 115.27 GiB (4.33 BPW)
|
| 87 |
+
load: 0 unused tokens
|
| 88 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 89 |
+
load: printing all EOG tokens:
|
| 90 |
+
load: - 200004 ('<fim_pad>')
|
| 91 |
+
load: - 200005 ('<reponame>')
|
| 92 |
+
load: - 200020 ('[e~[')
|
| 93 |
+
load: special tokens cache size = 54
|
| 94 |
+
load: token to piece cache size = 1.3355 MB
|
| 95 |
+
print_info: arch = minimax-m2
|
| 96 |
+
print_info: vocab_only = 0
|
| 97 |
+
print_info: no_alloc = 0
|
| 98 |
+
print_info: n_ctx_train = 196608
|
| 99 |
+
print_info: n_embd = 3072
|
| 100 |
+
print_info: n_embd_inp = 3072
|
| 101 |
+
print_info: n_layer = 62
|
| 102 |
+
print_info: n_head = 48
|
| 103 |
+
print_info: n_head_kv = 8
|
| 104 |
+
print_info: n_rot = 64
|
| 105 |
+
print_info: n_swa = 0
|
| 106 |
+
print_info: is_swa_any = 0
|
| 107 |
+
print_info: n_embd_head_k = 128
|
| 108 |
+
print_info: n_embd_head_v = 128
|
| 109 |
+
print_info: n_gqa = 6
|
| 110 |
+
print_info: n_embd_k_gqa = 1024
|
| 111 |
+
print_info: n_embd_v_gqa = 1024
|
| 112 |
+
print_info: f_norm_eps = 0.0e+00
|
| 113 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 114 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 115 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 116 |
+
print_info: f_logit_scale = 0.0e+00
|
| 117 |
+
print_info: f_attn_scale = 0.0e+00
|
| 118 |
+
print_info: n_ff = 1536
|
| 119 |
+
print_info: n_expert = 256
|
| 120 |
+
print_info: n_expert_used = 8
|
| 121 |
+
print_info: n_expert_groups = 0
|
| 122 |
+
print_info: n_group_used = 0
|
| 123 |
+
print_info: causal attn = 1
|
| 124 |
+
print_info: pooling type = 0
|
| 125 |
+
print_info: rope type = 2
|
| 126 |
+
print_info: rope scaling = linear
|
| 127 |
+
print_info: freq_base_train = 5000000.0
|
| 128 |
+
print_info: freq_scale_train = 1
|
| 129 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 130 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 131 |
+
print_info: rope_finetuned = unknown
|
| 132 |
+
print_info: model type = 230B.A10B
|
| 133 |
+
print_info: model params = 228.69 B
|
| 134 |
+
print_info: general.name = Minimax-M2.5
|
| 135 |
+
print_info: vocab type = BPE
|
| 136 |
+
print_info: n_vocab = 200064
|
| 137 |
+
print_info: n_merges = 199744
|
| 138 |
+
print_info: BOS token = 200034 ']~!b['
|
| 139 |
+
print_info: EOS token = 200020 '[e~['
|
| 140 |
+
print_info: UNK token = 200021 ']!d~['
|
| 141 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 142 |
+
print_info: LF token = 10 'Ċ'
|
| 143 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 144 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 145 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 146 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 147 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 148 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 149 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 150 |
+
print_info: EOG token = 200020 '[e~['
|
| 151 |
+
print_info: max token length = 256
|
| 152 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 153 |
+
load_tensors: offloading output layer to GPU
|
| 154 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 155 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 156 |
+
load_tensors: CPU_Mapped model buffer size = 46487.67 MiB
|
| 157 |
+
load_tensors: CPU_Mapped model buffer size = 47091.92 MiB
|
| 158 |
+
load_tensors: CPU_Mapped model buffer size = 23831.13 MiB
|
| 159 |
+
load_tensors: CUDA0 model buffer size = 21377.12 MiB
|
| 160 |
+
load_tensors: CUDA1 model buffer size = 18309.61 MiB
|
| 161 |
+
....................................................................................................
|
| 162 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 163 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 164 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 165 |
+
llama_context: constructing llama_context
|
| 166 |
+
llama_context: n_seq_max = 8
|
| 167 |
+
llama_context: n_ctx = 4096
|
| 168 |
+
llama_context: n_ctx_seq = 512
|
| 169 |
+
llama_context: n_batch = 4096
|
| 170 |
+
llama_context: n_ubatch = 4096
|
| 171 |
+
llama_context: causal_attn = 1
|
| 172 |
+
llama_context: flash_attn = enabled
|
| 173 |
+
llama_context: kv_unified = false
|
| 174 |
+
llama_context: freq_base = 5000000.0
|
| 175 |
+
llama_context: freq_scale = 1
|
| 176 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 177 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 178 |
+
llama_kv_cache: CUDA0 KV buffer size = 192.00 MiB
|
| 179 |
+
llama_kv_cache: CUDA1 KV buffer size = 800.00 MiB
|
| 180 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 181 |
+
sched_reserve: reserving ...
|
| 182 |
+
sched_reserve: CUDA0 compute buffer size = 1276.00 MiB
|
| 183 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 184 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 185 |
+
sched_reserve: graph nodes = 4099
|
| 186 |
+
sched_reserve: graph splits = 177 (with bs=4096), 93 (with bs=1)
|
| 187 |
+
sched_reserve: reserve took 22.54 ms, sched copies = 1
|
| 188 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 189 |
+
|
| 190 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 191 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 192 |
+
kl_divergence: 6.37 seconds per pass - ETA 1.60 minutes
|
| 193 |
+
|
| 194 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 195 |
+
1 6.6975 ± 1.3081 0.04967 ± 0.02792 0.05206 ± 0.01256 9.038 ± 2.103 % 91.373 ± 1.762 %
|
| 196 |
+
2 4.8704 ± 0.5968 0.04096 ± 0.01618 0.03654 ± 0.00650 6.975 ± 1.384 % 92.745 ± 1.150 %
|
| 197 |
+
3 4.5994 ± 0.4600 0.02444 ± 0.01379 0.03988 ± 0.00484 7.337 ± 0.937 % 93.203 ± 0.911 %
|
| 198 |
+
4 5.2175 ± 0.4671 0.01724 ± 0.01206 0.04036 ± 0.00381 6.898 ± 0.754 % 92.941 ± 0.802 %
|
| 199 |
+
5 5.0150 ± 0.4005 0.02421 ± 0.01463 0.05225 ± 0.00697 7.455 ± 0.764 % 92.863 ± 0.721 %
|
| 200 |
+
6 6.0874 ± 0.4767 0.01503 ± 0.01370 0.05528 ± 0.00592 7.101 ± 0.672 % 92.484 ± 0.674 %
|
| 201 |
+
7 5.6676 ± 0.3976 0.01542 ± 0.01280 0.06156 ± 0.00543 7.756 ± 0.569 % 91.709 ± 0.653 %
|
| 202 |
+
8 6.3734 ± 0.4241 0.00928 ± 0.01175 0.05914 ± 0.00478 7.422 ± 0.521 % 91.569 ± 0.615 %
|
| 203 |
+
9 6.2709 ± 0.3894 0.01235 ± 0.01065 0.05606 ± 0.00427 7.132 ± 0.485 % 91.547 ± 0.581 %
|
| 204 |
+
10 5.7268 ± 0.3306 0.01063 ± 0.00972 0.05299 ± 0.00386 6.987 ± 0.447 % 91.725 ± 0.546 %
|
| 205 |
+
11 6.2667 ± 0.3503 0.00892 ± 0.00909 0.05254 ± 0.00354 6.860 ± 0.417 % 91.373 ± 0.530 %
|
| 206 |
+
12 6.9499 ± 0.3777 0.01090 ± 0.00857 0.05291 ± 0.00347 6.674 ± 0.393 % 91.176 ± 0.513 %
|
| 207 |
+
13 7.2110 ± 0.3732 0.01051 ± 0.00806 0.05118 ± 0.00322 6.576 ± 0.377 % 91.192 ± 0.492 %
|
| 208 |
+
14 7.7654 ± 0.3918 0.00933 ± 0.00773 0.05097 ± 0.00301 6.489 ± 0.356 % 91.148 ± 0.475 %
|
| 209 |
+
15 8.1511 ± 0.3984 0.01168 ± 0.00734 0.05012 ± 0.00282 6.386 ± 0.339 % 91.190 ± 0.458 %
|
| 210 |
+
16 8.3937 ± 0.3967 0.00881 ± 0.00699 0.04860 ± 0.00265 6.293 ± 0.323 % 91.446 ± 0.438 %
|
| 211 |
+
17 8.6442 ± 0.3992 0.01110 ± 0.00691 0.05017 ± 0.00257 6.200 ± 0.310 % 91.396 ± 0.426 %
|
| 212 |
+
18 8.1229 ± 0.3615 0.00812 ± 0.00675 0.05082 ± 0.00249 6.161 ± 0.297 % 91.569 ± 0.410 %
|
| 213 |
+
19 8.2694 ± 0.3584 0.01019 ± 0.00649 0.04987 ± 0.00237 6.171 ± 0.286 % 91.620 ± 0.398 %
|
| 214 |
+
20 8.3377 ± 0.3524 0.01055 ± 0.00641 0.05236 ± 0.00237 6.371 ± 0.287 % 91.471 ± 0.391 %
|
| 215 |
+
21 8.3044 ± 0.3420 0.00912 ± 0.00620 0.05187 ± 0.00227 6.352 ± 0.276 % 91.503 ± 0.381 %
|
| 216 |
+
22 8.6162 ± 0.3499 0.00831 ± 0.00603 0.05217 ± 0.00220 6.326 ± 0.268 % 91.266 ± 0.377 %
|
| 217 |
+
23 8.6219 ± 0.3432 0.00775 ± 0.00604 0.05382 ± 0.00217 6.443 ± 0.262 % 91.185 ± 0.370 %
|
| 218 |
+
24 9.0267 ± 0.3538 0.00757 ± 0.00585 0.05341 ± 0.00208 6.361 ± 0.255 % 91.176 ± 0.363 %
|
| 219 |
+
25 9.0037 ± 0.3464 0.00640 ± 0.00576 0.05451 ± 0.00207 6.503 ± 0.254 % 91.012 ± 0.358 %
|
| 220 |
+
26 8.4459 ± 0.3152 0.01105 ± 0.00588 0.06162 ± 0.00237 7.765 ± 0.293 % 90.905 ± 0.353 %
|
| 221 |
+
27 8.0863 ± 0.2941 0.02148 ± 0.00625 0.07197 ± 0.00291 8.893 ± 0.317 % 90.777 ± 0.349 %
|
| 222 |
+
28 8.2051 ± 0.2940 0.02239 ± 0.00611 0.07138 ± 0.00281 8.808 ± 0.310 % 90.742 ± 0.343 %
|
| 223 |
+
29 8.1337 ± 0.2867 0.02312 ± 0.00596 0.07071 ± 0.00272 8.714 ± 0.302 % 90.832 ± 0.336 %
|
| 224 |
+
30 7.6134 ± 0.2615 0.02422 ± 0.00585 0.06968 ± 0.00273 8.718 ± 0.300 % 91.059 ± 0.326 %
|
| 225 |
+
31 7.1749 ± 0.2403 0.02426 ± 0.00570 0.06842 ± 0.00266 8.663 ± 0.294 % 91.271 ± 0.317 %
|
| 226 |
+
32 6.9883 ± 0.2285 0.02315 ± 0.00556 0.06726 ± 0.00258 8.606 ± 0.287 % 91.397 ± 0.310 %
|
| 227 |
+
33 6.8524 ± 0.2190 0.02340 ± 0.00542 0.06615 ± 0.00250 8.545 ± 0.281 % 91.468 ± 0.305 %
|
| 228 |
+
34 7.0372 ± 0.2226 0.02300 ± 0.00540 0.06786 ± 0.00247 8.566 ± 0.274 % 91.292 ± 0.303 %
|
| 229 |
+
35 7.1526 ± 0.2253 0.02462 ± 0.00541 0.06994 ± 0.00244 8.666 ± 0.268 % 91.025 ± 0.303 %
|
| 230 |
+
36 7.2116 ± 0.2249 0.02395 ± 0.00533 0.06945 ± 0.00238 8.626 ± 0.263 % 91.035 ± 0.298 %
|
| 231 |
+
37 7.2321 ± 0.2226 0.02451 ± 0.00536 0.07285 ± 0.00257 8.962 ± 0.270 % 90.959 ± 0.295 %
|
| 232 |
+
38 7.4552 ± 0.2276 0.02515 ± 0.00525 0.07240 ± 0.00251 8.929 ± 0.265 % 90.939 ± 0.292 %
|
| 233 |
+
39 7.4121 ± 0.2230 0.02630 ± 0.00521 0.07419 ± 0.00252 9.166 ± 0.266 % 90.840 ± 0.289 %
|
| 234 |
+
40 7.2004 ± 0.2123 0.03225 ± 0.00530 0.08207 ± 0.00274 9.835 ± 0.270 % 90.598 ± 0.289 %
|
| 235 |
+
41 7.0321 ± 0.2035 0.03943 ± 0.00545 0.09043 ± 0.00294 10.507 ± 0.273 % 90.368 ± 0.289 %
|
| 236 |
+
42 6.8405 ± 0.1943 0.04312 ± 0.00561 0.09804 ± 0.00314 11.164 ± 0.279 % 90.215 ± 0.287 %
|
| 237 |
+
43 6.6622 ± 0.1859 0.04785 ± 0.00567 0.10348 ± 0.00326 11.584 ± 0.279 % 90.105 ± 0.285 %
|
| 238 |
+
44 6.6077 ± 0.1815 0.04596 ± 0.00556 0.10189 ± 0.00319 11.489 ± 0.275 % 90.125 ± 0.282 %
|
| 239 |
+
45 6.7523 ± 0.1844 0.04571 ± 0.00549 0.10133 ± 0.00313 11.398 ± 0.272 % 90.083 ± 0.279 %
|
| 240 |
+
46 6.8944 ± 0.1866 0.04447 ± 0.00539 0.10013 ± 0.00306 11.292 ± 0.268 % 90.068 ± 0.276 %
|
| 241 |
+
47 7.0442 ± 0.1891 0.04375 ± 0.00529 0.09871 ± 0.00300 11.183 ± 0.265 % 90.054 ± 0.273 %
|
| 242 |
+
48 6.9233 ± 0.1829 0.04309 ± 0.00520 0.09726 ± 0.00294 11.092 ± 0.262 % 90.090 ± 0.270 %
|
| 243 |
+
49 7.0236 ± 0.1838 0.04246 ± 0.00526 0.10136 ± 0.00327 11.148 ± 0.261 % 90.012 ± 0.268 %
|
| 244 |
+
50 7.1193 ± 0.1854 0.04184 ± 0.00520 0.10060 ± 0.00321 11.089 ± 0.259 % 89.961 ± 0.266 %
|
| 245 |
+
51 7.2258 ± 0.1865 0.04129 ± 0.00511 0.09939 ± 0.00315 11.007 ± 0.256 % 89.988 ± 0.263 %
|
| 246 |
+
52 7.2907 ± 0.1862 0.04093 ± 0.00505 0.09904 ± 0.00309 10.932 ± 0.253 % 89.932 ± 0.261 %
|
| 247 |
+
53 7.3899 ± 0.1868 0.03865 ± 0.00499 0.09809 ± 0.00303 10.860 ± 0.250 % 89.885 ± 0.259 %
|
| 248 |
+
54 7.4424 ± 0.1860 0.03794 ± 0.00491 0.09697 ± 0.00298 10.777 ± 0.247 % 89.913 ± 0.257 %
|
| 249 |
+
55 7.4928 ± 0.1853 0.03766 ± 0.00483 0.09581 ± 0.00293 10.699 ± 0.244 % 89.882 ± 0.255 %
|
| 250 |
+
56 7.5299 ± 0.1846 0.03688 ± 0.00476 0.09478 ± 0.00288 10.623 ± 0.242 % 89.853 ± 0.253 %
|
| 251 |
+
57 7.5400 ± 0.1832 0.03827 ± 0.00478 0.09519 ± 0.00285 10.597 ± 0.239 % 89.866 ± 0.250 %
|
| 252 |
+
58 7.5511 ± 0.1819 0.03838 ± 0.00473 0.09443 ± 0.00281 10.553 ± 0.236 % 89.872 ± 0.248 %
|
| 253 |
+
59 7.5070 ± 0.1790 0.03765 ± 0.00466 0.09312 ± 0.00276 10.476 ± 0.234 % 89.937 ± 0.245 %
|
| 254 |
+
60 7.5208 ± 0.1779 0.03801 ± 0.00461 0.09271 ± 0.00274 10.422 ± 0.231 % 89.941 ± 0.243 %
|
| 255 |
+
61 7.5636 ± 0.1774 0.03704 ± 0.00455 0.09209 ± 0.00270 10.380 ± 0.229 % 89.926 ± 0.241 %
|
| 256 |
+
62 7.5275 ± 0.1753 0.03553 ± 0.00451 0.09132 ± 0.00265 10.345 ± 0.227 % 89.987 ± 0.239 %
|
| 257 |
+
63 7.5759 ± 0.1755 0.03561 ± 0.00446 0.09097 ± 0.00262 10.302 ± 0.225 % 89.953 ± 0.237 %
|
| 258 |
+
64 7.5499 ± 0.1731 0.03475 ± 0.00441 0.09032 ± 0.00258 10.253 ± 0.222 % 89.969 ± 0.235 %
|
| 259 |
+
65 7.5337 ± 0.1714 0.03377 ± 0.00437 0.09005 ± 0.00254 10.227 ± 0.220 % 90.003 ± 0.233 %
|
| 260 |
+
66 7.5670 ± 0.1711 0.03318 ± 0.00432 0.08961 ± 0.00251 10.183 ± 0.218 % 90.006 ± 0.231 %
|
| 261 |
+
67 7.5775 ± 0.1702 0.03363 ± 0.00427 0.08908 ± 0.00247 10.127 ± 0.216 % 90.020 ± 0.229 %
|
| 262 |
+
68 7.5279 ± 0.1676 0.03302 ± 0.00423 0.08821 ± 0.00244 10.064 ± 0.214 % 90.087 ± 0.227 %
|
| 263 |
+
69 7.5648 ± 0.1674 0.03347 ± 0.00419 0.08776 ± 0.00240 10.016 ± 0.212 % 90.111 ± 0.225 %
|
| 264 |
+
70 7.5313 ± 0.1652 0.03280 ± 0.00416 0.08755 ± 0.00237 10.014 ± 0.210 % 90.146 ± 0.223 %
|
| 265 |
+
71 7.5095 ± 0.1635 0.03191 ± 0.00412 0.08700 ± 0.00234 9.960 ± 0.209 % 90.163 ± 0.221 %
|
| 266 |
+
72 7.5361 ± 0.1632 0.03326 ± 0.00409 0.08677 ± 0.00233 9.922 ± 0.207 % 90.191 ± 0.220 %
|
| 267 |
+
73 7.5400 ± 0.1621 0.03305 ± 0.00405 0.08627 ± 0.00230 9.873 ± 0.205 % 90.137 ± 0.219 %
|
| 268 |
+
74 7.5309 ± 0.1607 0.03267 ± 0.00401 0.08587 ± 0.00227 9.830 ± 0.203 % 90.185 ± 0.217 %
|
| 269 |
+
75 7.5302 ± 0.1596 0.03166 ± 0.00399 0.08605 ± 0.00225 9.834 ± 0.201 % 90.133 ± 0.216 %
|
| 270 |
+
76 7.5895 ± 0.1599 0.03102 ± 0.00396 0.08582 ± 0.00222 9.811 ± 0.200 % 90.098 ± 0.215 %
|
| 271 |
+
77 7.5856 ± 0.1588 0.03077 ± 0.00393 0.08534 ± 0.00220 9.763 ± 0.198 % 90.094 ± 0.213 %
|
| 272 |
+
78 7.5977 ± 0.1582 0.03054 ± 0.00390 0.08499 ± 0.00217 9.720 ± 0.197 % 90.080 ± 0.212 %
|
| 273 |
+
79 7.6058 ± 0.1574 0.03015 ± 0.00386 0.08458 ± 0.00214 9.679 ± 0.195 % 90.137 ± 0.210 %
|
| 274 |
+
80 7.6068 ± 0.1569 0.03003 ± 0.00386 0.08485 ± 0.00213 9.666 ± 0.193 % 90.088 ± 0.209 %
|
| 275 |
+
81 7.5812 ± 0.1554 0.02975 ± 0.00383 0.08437 ± 0.00210 9.627 ± 0.192 % 90.123 ± 0.208 %
|
| 276 |
+
82 7.5627 ± 0.1539 0.03024 ± 0.00379 0.08406 ± 0.00208 9.605 ± 0.190 % 90.143 ± 0.206 %
|
| 277 |
+
83 7.5946 ± 0.1534 0.03014 ± 0.00375 0.08352 ± 0.00205 9.564 ± 0.189 % 90.154 ± 0.205 %
|
| 278 |
+
84 7.6067 ± 0.1524 0.02954 ± 0.00372 0.08294 ± 0.00203 9.522 ± 0.187 % 90.098 ± 0.204 %
|
| 279 |
+
85 7.5968 ± 0.1510 0.02879 ± 0.00368 0.08244 ± 0.00201 9.485 ± 0.186 % 90.085 ± 0.203 %
|
| 280 |
+
86 7.5287 ± 0.1483 0.02883 ± 0.00365 0.08212 ± 0.00199 9.464 ± 0.184 % 90.087 ± 0.202 %
|
| 281 |
+
87 7.4666 ± 0.1457 0.02835 ± 0.00362 0.08180 ± 0.00196 9.437 ± 0.183 % 90.106 ± 0.200 %
|
| 282 |
+
88 7.4036 ± 0.1432 0.02821 ± 0.00360 0.08160 ± 0.00195 9.425 ± 0.181 % 90.120 ± 0.199 %
|
| 283 |
+
89 7.3300 ± 0.1405 0.02819 ± 0.00357 0.08119 ± 0.00193 9.396 ± 0.180 % 90.139 ± 0.198 %
|
| 284 |
+
90 7.2735 ± 0.1383 0.02816 ± 0.00354 0.08082 ± 0.00191 9.360 ± 0.179 % 90.179 ± 0.196 %
|
| 285 |
+
91 7.2231 ± 0.1362 0.02831 ± 0.00351 0.08042 ± 0.00189 9.351 ± 0.177 % 90.209 ± 0.195 %
|
| 286 |
+
92 7.1647 ± 0.1340 0.02825 ± 0.00348 0.08028 ± 0.00188 9.342 ± 0.176 % 90.200 ± 0.194 %
|
| 287 |
+
93 7.1725 ± 0.1336 0.02685 ± 0.00348 0.08064 ± 0.00191 9.339 ± 0.176 % 90.226 ± 0.193 %
|
| 288 |
+
94 7.2037 ± 0.1334 0.02685 ± 0.00345 0.08013 ± 0.00189 9.301 ± 0.174 % 90.250 ± 0.192 %
|
| 289 |
+
95 7.3109 ± 0.1351 0.02642 ± 0.00342 0.07993 ± 0.00187 9.270 ± 0.173 % 90.241 ± 0.191 %
|
| 290 |
+
96 7.4071 ± 0.1363 0.02655 ± 0.00340 0.07967 ± 0.00185 9.234 ± 0.172 % 90.192 ± 0.190 %
|
| 291 |
+
97 7.4816 ± 0.1370 0.02590 ± 0.00337 0.07924 ± 0.00184 9.193 ± 0.171 % 90.168 ± 0.189 %
|
| 292 |
+
98 7.6221 ± 0.1395 0.02634 ± 0.00335 0.07879 ± 0.00182 9.151 ± 0.170 % 90.152 ± 0.188 %
|
| 293 |
+
99 7.7417 ± 0.1413 0.02635 ± 0.00332 0.07845 ± 0.00180 9.110 ± 0.169 % 90.129 ± 0.188 %
|
| 294 |
+
100 7.7776 ± 0.1413 0.02609 ± 0.00330 0.07826 ± 0.00178 9.092 ± 0.168 % 90.125 ± 0.187 %
|
| 295 |
+
101 7.8113 ± 0.1414 0.02558 ± 0.00328 0.07803 ± 0.00177 9.068 ± 0.167 % 90.111 ± 0.186 %
|
| 296 |
+
102 7.8698 ± 0.1423 0.02515 ± 0.00326 0.07805 ± 0.00175 9.054 ± 0.166 % 90.100 ± 0.185 %
|
| 297 |
+
103 7.8411 ± 0.1412 0.02487 ± 0.00324 0.07767 ± 0.00174 9.035 ± 0.165 % 90.116 ± 0.184 %
|
| 298 |
+
104 7.7873 ± 0.1394 0.02549 ± 0.00324 0.07840 ± 0.00175 9.144 ± 0.167 % 90.128 ± 0.183 %
|
| 299 |
+
105 7.6823 ± 0.1366 0.02643 ± 0.00327 0.07888 ± 0.00176 9.221 ± 0.168 % 90.155 ± 0.182 %
|
| 300 |
+
106 7.5550 ± 0.1334 0.02702 ± 0.00326 0.07859 ± 0.00175 9.229 ± 0.167 % 90.233 ± 0.181 %
|
| 301 |
+
107 7.6131 ± 0.1337 0.02695 ± 0.00324 0.07816 ± 0.00173 9.199 ± 0.166 % 90.229 ± 0.180 %
|
| 302 |
+
108 7.6210 ± 0.1332 0.02620 ± 0.00321 0.07781 ± 0.00172 9.171 ± 0.165 % 90.251 ± 0.179 %
|
| 303 |
+
109 7.6443 ± 0.1331 0.02653 ± 0.00319 0.07756 ± 0.00170 9.148 ± 0.164 % 90.254 ± 0.178 %
|
| 304 |
+
110 7.6758 ± 0.1331 0.02596 ± 0.00317 0.07722 ± 0.00169 9.118 ± 0.163 % 90.296 ± 0.177 %
|
| 305 |
+
111 7.7227 ± 0.1332 0.02574 ± 0.00315 0.07692 ± 0.00168 9.084 ± 0.162 % 90.302 ± 0.176 %
|
| 306 |
+
112 7.7307 ± 0.1327 0.02543 ± 0.00313 0.07651 ± 0.00166 9.056 ± 0.161 % 90.319 ± 0.175 %
|
| 307 |
+
113 7.7408 ± 0.1321 0.02536 ± 0.00310 0.07612 ± 0.00165 9.027 ± 0.161 % 90.321 ± 0.174 %
|
| 308 |
+
114 7.7566 ± 0.1319 0.02506 ± 0.00308 0.07578 ± 0.00163 8.996 ± 0.160 % 90.323 ± 0.173 %
|
| 309 |
+
115 7.7396 ± 0.1310 0.02521 ± 0.00308 0.07592 ± 0.00162 9.002 ± 0.159 % 90.309 ± 0.173 %
|
| 310 |
+
116 7.7327 ± 0.1303 0.02571 ± 0.00310 0.07812 ± 0.00165 9.177 ± 0.159 % 90.210 ± 0.173 %
|
| 311 |
+
117 7.6453 ± 0.1279 0.02792 ± 0.00315 0.08036 ± 0.00168 9.340 ± 0.159 % 90.186 ± 0.172 %
|
| 312 |
+
118 7.5633 ± 0.1258 0.02998 ± 0.00319 0.08314 ± 0.00173 9.577 ± 0.160 % 90.126 ± 0.172 %
|
| 313 |
+
119 7.4767 ± 0.1235 0.03193 ± 0.00321 0.08532 ± 0.00176 9.835 ± 0.162 % 90.061 ± 0.172 %
|
| 314 |
+
120 7.4040 ± 0.1216 0.03349 ± 0.00323 0.08800 ± 0.00180 10.103 ± 0.164 % 89.974 ± 0.172 %
|
| 315 |
+
121 7.3273 ± 0.1196 0.03473 ± 0.00324 0.09020 ± 0.00183 10.303 ± 0.165 % 89.934 ± 0.171 %
|
| 316 |
+
|
| 317 |
+
====== Perplexity statistics ======
|
| 318 |
+
Mean PPL(Q) : 7.327326 ± 0.119622
|
| 319 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 320 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.01%
|
| 321 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.034727 ± 0.003244
|
| 322 |
+
Mean PPL(Q)/PPL(base) : 1.035337 ± 0.003359
|
| 323 |
+
Mean PPL(Q)-PPL(base) : 0.250086 ± 0.023933
|
| 324 |
+
|
| 325 |
+
====== KL divergence statistics ======
|
| 326 |
+
Mean KLD: 0.090198 ± 0.001826
|
| 327 |
+
Maximum KLD: 11.834617
|
| 328 |
+
99.9% KLD: 4.447414
|
| 329 |
+
99.0% KLD: 1.398163
|
| 330 |
+
95.0% KLD: 0.339766
|
| 331 |
+
90.0% KLD: 0.158955
|
| 332 |
+
Median KLD: 0.019966
|
| 333 |
+
10.0% KLD: 0.000110
|
| 334 |
+
5.0% KLD: 0.000018
|
| 335 |
+
1.0% KLD: 0.000001
|
| 336 |
+
0.1% KLD: -0.000001
|
| 337 |
+
Minimum KLD: -0.000004
|
| 338 |
+
|
| 339 |
+
====== Token probability statistics ======
|
| 340 |
+
Mean Δp: -0.666 ± 0.059 %
|
| 341 |
+
Maximum Δp: 97.807%
|
| 342 |
+
99.9% Δp: 70.726%
|
| 343 |
+
99.0% Δp: 24.093%
|
| 344 |
+
95.0% Δp: 8.912%
|
| 345 |
+
90.0% Δp: 4.746%
|
| 346 |
+
75.0% Δp: 0.750%
|
| 347 |
+
Median Δp: -0.001%
|
| 348 |
+
25.0% Δp: -0.799%
|
| 349 |
+
10.0% Δp: -5.444%
|
| 350 |
+
5.0% Δp: -11.920%
|
| 351 |
+
1.0% Δp: -45.752%
|
| 352 |
+
0.1% Δp: -89.542%
|
| 353 |
+
Minimum Δp: -99.668%
|
| 354 |
+
RMS Δp : 10.303 ± 0.165 %
|
| 355 |
+
Same top p: 89.934 ± 0.171 %
|
| 356 |
+
|
| 357 |
+
llama_perf_context_print: load time = 45893.88 ms
|
| 358 |
+
llama_perf_context_print: prompt eval time = 83989.05 ms / 61952 tokens ( 1.36 ms per token, 737.62 tokens per second)
|
| 359 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 360 |
+
llama_perf_context_print: total time = 99169.13 ms / 61953 tokens
|
| 361 |
+
llama_perf_context_print: graphs reused = 0
|
| 362 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 363 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 869 + ( 22845 = 21377 + 192 + 1275) + 420 |
|
| 364 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1399 + ( 22283 = 18309 + 800 + 3174) + 451 |
|
| 365 |
+
llama_memory_breakdown_print: | - Host | 117514 = 117410 + 0 + 104 |
|
| 366 |
+
```
|
kld_data/unsloth/MiniMax-M2.5-UD-TQ1_0.md
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-TQ1_0 (unsloth)
|
| 2 |
+
|
| 3 |
+
51.93 GiB (1.95 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/MiniMax-M2.5-UD-TQ1_0.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 26904 used, -3032 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 31503 used, -7631 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 58407 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 12711 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 38241 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 6736 MiB used, 17134 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1344 MiB used, 22526 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 27 layers ( 1 overflowing), 22815 MiB used, 1056 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 36 layers (17 overflowing), 22622 MiB used, 1249 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.05 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: loaded meta data with 50 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/MiniMax-M2.5-UD-TQ1_0.gguf (version GGUF V3 (latest))
|
| 30 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 31 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 32 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 33 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 34 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 35 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 36 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 37 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 39 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 40 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 41 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 42 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 43 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 44 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 45 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 46 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 47 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 48 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 49 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 50 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 51 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 52 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 53 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 54 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 55 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 56 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 57 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 58 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 59 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 60 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 61 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 63 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 64 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 65 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 66 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 67 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 68 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 69 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 70 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 71 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 72 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 73 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 74 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 75 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 76 |
+
llama_model_loader: - kv 45: general.file_type u32 = 24
|
| 77 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 78 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 79 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 80 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 81 |
+
llama_model_loader: - type f32: 373 tensors
|
| 82 |
+
llama_model_loader: - type q4_K: 1 tensors
|
| 83 |
+
llama_model_loader: - type q5_K: 18 tensors
|
| 84 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 85 |
+
llama_model_loader: - type iq2_xxs: 29 tensors
|
| 86 |
+
llama_model_loader: - type iq3_xxs: 104 tensors
|
| 87 |
+
llama_model_loader: - type iq1_s: 135 tensors
|
| 88 |
+
llama_model_loader: - type iq3_s: 103 tensors
|
| 89 |
+
llama_model_loader: - type iq4_xs: 35 tensors
|
| 90 |
+
print_info: file format = GGUF V3 (latest)
|
| 91 |
+
print_info: file type = IQ1_S - 1.5625 bpw
|
| 92 |
+
print_info: file size = 51.93 GiB (1.95 BPW)
|
| 93 |
+
load: 0 unused tokens
|
| 94 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 95 |
+
load: printing all EOG tokens:
|
| 96 |
+
load: - 200004 ('<fim_pad>')
|
| 97 |
+
load: - 200005 ('<reponame>')
|
| 98 |
+
load: - 200020 ('[e~[')
|
| 99 |
+
load: special tokens cache size = 54
|
| 100 |
+
load: token to piece cache size = 1.3355 MB
|
| 101 |
+
print_info: arch = minimax-m2
|
| 102 |
+
print_info: vocab_only = 0
|
| 103 |
+
print_info: no_alloc = 0
|
| 104 |
+
print_info: n_ctx_train = 196608
|
| 105 |
+
print_info: n_embd = 3072
|
| 106 |
+
print_info: n_embd_inp = 3072
|
| 107 |
+
print_info: n_layer = 62
|
| 108 |
+
print_info: n_head = 48
|
| 109 |
+
print_info: n_head_kv = 8
|
| 110 |
+
print_info: n_rot = 64
|
| 111 |
+
print_info: n_swa = 0
|
| 112 |
+
print_info: is_swa_any = 0
|
| 113 |
+
print_info: n_embd_head_k = 128
|
| 114 |
+
print_info: n_embd_head_v = 128
|
| 115 |
+
print_info: n_gqa = 6
|
| 116 |
+
print_info: n_embd_k_gqa = 1024
|
| 117 |
+
print_info: n_embd_v_gqa = 1024
|
| 118 |
+
print_info: f_norm_eps = 0.0e+00
|
| 119 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 120 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 121 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 122 |
+
print_info: f_logit_scale = 0.0e+00
|
| 123 |
+
print_info: f_attn_scale = 0.0e+00
|
| 124 |
+
print_info: n_ff = 1536
|
| 125 |
+
print_info: n_expert = 256
|
| 126 |
+
print_info: n_expert_used = 8
|
| 127 |
+
print_info: n_expert_groups = 0
|
| 128 |
+
print_info: n_group_used = 0
|
| 129 |
+
print_info: causal attn = 1
|
| 130 |
+
print_info: pooling type = 0
|
| 131 |
+
print_info: rope type = 2
|
| 132 |
+
print_info: rope scaling = linear
|
| 133 |
+
print_info: freq_base_train = 5000000.0
|
| 134 |
+
print_info: freq_scale_train = 1
|
| 135 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 136 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 137 |
+
print_info: rope_finetuned = unknown
|
| 138 |
+
print_info: model type = 230B.A10B
|
| 139 |
+
print_info: model params = 228.69 B
|
| 140 |
+
print_info: general.name = Minimax-M2.5
|
| 141 |
+
print_info: vocab type = BPE
|
| 142 |
+
print_info: n_vocab = 200064
|
| 143 |
+
print_info: n_merges = 199744
|
| 144 |
+
print_info: BOS token = 200034 ']~!b['
|
| 145 |
+
print_info: EOS token = 200020 '[e~['
|
| 146 |
+
print_info: UNK token = 200021 ']!d~['
|
| 147 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 148 |
+
print_info: LF token = 10 'Ċ'
|
| 149 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 150 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 151 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 152 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 155 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 156 |
+
print_info: EOG token = 200020 '[e~['
|
| 157 |
+
print_info: max token length = 256
|
| 158 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 159 |
+
load_tensors: offloading output layer to GPU
|
| 160 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 161 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 52693.83 MiB
|
| 163 |
+
load_tensors: CUDA0 model buffer size = 21038.22 MiB
|
| 164 |
+
load_tensors: CUDA1 model buffer size = 18888.73 MiB
|
| 165 |
+
.................................................................................................
|
| 166 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 167 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 168 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 169 |
+
llama_context: constructing llama_context
|
| 170 |
+
llama_context: n_seq_max = 8
|
| 171 |
+
llama_context: n_ctx = 4096
|
| 172 |
+
llama_context: n_ctx_seq = 512
|
| 173 |
+
llama_context: n_batch = 4096
|
| 174 |
+
llama_context: n_ubatch = 4096
|
| 175 |
+
llama_context: causal_attn = 1
|
| 176 |
+
llama_context: flash_attn = enabled
|
| 177 |
+
llama_context: kv_unified = false
|
| 178 |
+
llama_context: freq_base = 5000000.0
|
| 179 |
+
llama_context: freq_scale = 1
|
| 180 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 181 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 182 |
+
llama_kv_cache: CUDA0 KV buffer size = 432.00 MiB
|
| 183 |
+
llama_kv_cache: CUDA1 KV buffer size = 560.00 MiB
|
| 184 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 185 |
+
sched_reserve: reserving ...
|
| 186 |
+
sched_reserve: CUDA0 compute buffer size = 1345.00 MiB
|
| 187 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 188 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 189 |
+
sched_reserve: graph nodes = 4099
|
| 190 |
+
sched_reserve: graph splits = 69 (with bs=4096), 39 (with bs=1)
|
| 191 |
+
sched_reserve: reserve took 23.18 ms, sched copies = 1
|
| 192 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 193 |
+
|
| 194 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 195 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 196 |
+
kl_divergence: 3.62 seconds per pass - ETA 0.90 minutes
|
| 197 |
+
|
| 198 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 199 |
+
1 6.5687 ± 1.2285 0.03025 ± 0.06493 0.30776 ± 0.05115 15.087 ± 1.681 % 80.784 ± 2.472 %
|
| 200 |
+
2 4.9186 ± 0.5828 0.05082 ± 0.04197 0.24707 ± 0.02886 14.050 ± 1.267 % 83.333 ± 1.652 %
|
| 201 |
+
3 5.1409 ± 0.5104 0.13575 ± 0.04107 0.32601 ± 0.02620 18.349 ± 1.138 % 81.830 ± 1.395 %
|
| 202 |
+
4 5.7461 ± 0.5057 0.11374 ± 0.03598 0.34642 ± 0.02203 18.893 ± 0.960 % 79.608 ± 1.262 %
|
| 203 |
+
5 5.8929 ± 0.4719 0.18553 ± 0.03533 0.39689 ± 0.02274 20.648 ± 0.923 % 79.059 ± 1.140 %
|
| 204 |
+
6 7.2247 ± 0.5698 0.18631 ± 0.03308 0.42816 ± 0.02137 20.069 ± 0.835 % 77.190 ± 1.073 %
|
| 205 |
+
7 7.2041 ± 0.5199 0.25531 ± 0.03363 0.49804 ± 0.02402 21.990 ± 0.794 % 76.303 ± 1.007 %
|
| 206 |
+
8 7.9326 ± 0.5357 0.22812 ± 0.03111 0.48562 ± 0.02132 21.288 ± 0.729 % 75.735 ± 0.949 %
|
| 207 |
+
9 7.7451 ± 0.4904 0.22349 ± 0.02844 0.46433 ± 0.01952 20.610 ± 0.679 % 76.166 ± 0.890 %
|
| 208 |
+
10 7.0186 ± 0.4138 0.21404 ± 0.02628 0.44570 ± 0.01790 20.519 ± 0.635 % 76.627 ± 0.838 %
|
| 209 |
+
11 7.6754 ± 0.4367 0.21168 ± 0.02481 0.44693 ± 0.01686 20.352 ± 0.603 % 75.936 ± 0.807 %
|
| 210 |
+
12 8.4308 ± 0.4640 0.20407 ± 0.02336 0.43751 ± 0.01564 19.754 ± 0.574 % 75.719 ± 0.775 %
|
| 211 |
+
13 8.6523 ± 0.4524 0.19272 ± 0.02182 0.41900 ± 0.01452 19.202 ± 0.548 % 76.018 ± 0.742 %
|
| 212 |
+
14 9.2500 ± 0.4689 0.18428 ± 0.02143 0.42386 ± 0.01406 18.932 ± 0.524 % 75.490 ± 0.720 %
|
| 213 |
+
15 9.6381 ± 0.4726 0.17925 ± 0.02045 0.41798 ± 0.01321 18.712 ± 0.501 % 75.399 ± 0.696 %
|
| 214 |
+
16 9.8949 ± 0.4688 0.17334 ± 0.01945 0.40479 ± 0.01245 18.354 ± 0.482 % 75.564 ± 0.673 %
|
| 215 |
+
17 10.2318 ± 0.4740 0.17971 ± 0.01900 0.41951 ± 0.01254 18.276 ± 0.469 % 75.386 ± 0.654 %
|
| 216 |
+
18 9.6321 ± 0.4313 0.17853 ± 0.01850 0.42643 ± 0.01255 18.314 ± 0.460 % 75.599 ± 0.634 %
|
| 217 |
+
19 9.7141 ± 0.4206 0.17121 ± 0.01784 0.41660 ± 0.01195 18.187 ± 0.442 % 75.645 ± 0.617 %
|
| 218 |
+
20 9.9077 ± 0.4192 0.18307 ± 0.01772 0.43244 ± 0.01185 18.466 ± 0.432 % 75.196 ± 0.605 %
|
| 219 |
+
21 9.9058 ± 0.4099 0.18545 ± 0.01730 0.42874 ± 0.01145 18.413 ± 0.420 % 75.331 ± 0.589 %
|
| 220 |
+
22 10.3633 ± 0.4238 0.19293 ± 0.01690 0.43436 ± 0.01116 18.481 ± 0.413 % 74.973 ± 0.578 %
|
| 221 |
+
23 10.4896 ± 0.4215 0.20383 ± 0.01689 0.44795 ± 0.01146 18.820 ± 0.410 % 74.851 ± 0.567 %
|
| 222 |
+
24 10.9061 ± 0.4297 0.19670 ± 0.01641 0.44524 ± 0.01104 18.614 ± 0.400 % 74.608 ± 0.556 %
|
| 223 |
+
25 10.9540 ± 0.4225 0.20248 ± 0.01621 0.45119 ± 0.01082 18.903 ± 0.392 % 74.353 ± 0.547 %
|
| 224 |
+
26 10.9535 ± 0.4126 0.27104 ± 0.01743 0.51047 ± 0.01228 21.907 ± 0.419 % 73.454 ± 0.542 %
|
| 225 |
+
27 11.1679 ± 0.4125 0.34436 ± 0.01847 0.58038 ± 0.01369 24.575 ± 0.429 % 72.317 ± 0.539 %
|
| 226 |
+
28 11.3088 ± 0.4106 0.34322 ± 0.01809 0.58166 ± 0.01330 24.562 ± 0.420 % 72.115 ± 0.531 %
|
| 227 |
+
29 11.1874 ± 0.3994 0.34189 ± 0.01775 0.58090 ± 0.01301 24.529 ± 0.412 % 72.211 ± 0.521 %
|
| 228 |
+
30 10.4448 ± 0.3646 0.34041 ± 0.01741 0.57026 ± 0.01276 24.484 ± 0.405 % 72.889 ± 0.508 %
|
| 229 |
+
31 9.8191 ± 0.3353 0.33801 ± 0.01701 0.56221 ± 0.01251 24.490 ± 0.399 % 73.409 ± 0.497 %
|
| 230 |
+
32 9.5454 ± 0.3187 0.33497 ± 0.01664 0.55699 ± 0.01219 24.462 ± 0.390 % 73.566 ± 0.488 %
|
| 231 |
+
33 9.2733 ± 0.3030 0.32595 ± 0.01626 0.54982 ± 0.01186 24.322 ± 0.382 % 73.654 ± 0.480 %
|
| 232 |
+
34 9.5450 ± 0.3085 0.32780 ± 0.01600 0.55862 ± 0.01168 24.236 ± 0.376 % 73.230 ± 0.476 %
|
| 233 |
+
35 9.7218 ± 0.3116 0.33151 ± 0.01587 0.56839 ± 0.01154 24.375 ± 0.369 % 73.053 ± 0.470 %
|
| 234 |
+
36 9.7478 ± 0.3091 0.32530 ± 0.01559 0.56605 ± 0.01130 24.276 ± 0.363 % 73.028 ± 0.463 %
|
| 235 |
+
37 9.9644 ± 0.3121 0.34500 ± 0.01572 0.58528 ± 0.01158 25.081 ± 0.363 % 72.623 ± 0.459 %
|
| 236 |
+
38 10.2063 ± 0.3164 0.33925 ± 0.01540 0.58114 ± 0.01132 24.897 ± 0.358 % 72.632 ± 0.453 %
|
| 237 |
+
39 10.3484 ± 0.3177 0.36002 ± 0.01554 0.59736 ± 0.01153 25.359 ± 0.356 % 72.428 ± 0.448 %
|
| 238 |
+
40 10.6455 ± 0.3234 0.42325 ± 0.01627 0.65366 ± 0.01242 26.943 ± 0.356 % 71.539 ± 0.447 %
|
| 239 |
+
41 10.9618 ± 0.3296 0.48335 ± 0.01695 0.71191 ± 0.01326 28.440 ± 0.356 % 70.617 ± 0.446 %
|
| 240 |
+
42 11.2093 ± 0.3339 0.53700 ± 0.01743 0.76122 ± 0.01383 29.811 ± 0.355 % 69.879 ± 0.443 %
|
| 241 |
+
43 11.3019 ± 0.3327 0.57637 ± 0.01767 0.79598 ± 0.01413 30.762 ± 0.351 % 69.366 ± 0.440 %
|
| 242 |
+
44 11.0748 ± 0.3208 0.56240 ± 0.01734 0.78574 ± 0.01384 30.528 ± 0.347 % 69.528 ± 0.435 %
|
| 243 |
+
45 11.2498 ± 0.3238 0.55618 ± 0.01708 0.78157 ± 0.01359 30.289 ± 0.343 % 69.473 ± 0.430 %
|
| 244 |
+
46 11.3797 ± 0.3238 0.54559 ± 0.01680 0.77339 ± 0.01333 30.031 ± 0.339 % 69.531 ± 0.425 %
|
| 245 |
+
47 11.5295 ± 0.3247 0.53646 ± 0.01649 0.76226 ± 0.01307 29.746 ± 0.336 % 69.762 ± 0.420 %
|
| 246 |
+
48 11.2495 ± 0.3121 0.52852 ± 0.01619 0.75217 ± 0.01283 29.545 ± 0.332 % 69.902 ± 0.415 %
|
| 247 |
+
49 11.3198 ± 0.3109 0.51973 ± 0.01600 0.75051 ± 0.01276 29.349 ± 0.328 % 69.924 ± 0.410 %
|
| 248 |
+
50 11.4660 ± 0.3134 0.51842 ± 0.01583 0.74867 ± 0.01258 29.231 ± 0.325 % 69.859 ± 0.406 %
|
| 249 |
+
51 11.5769 ± 0.3131 0.51264 ± 0.01557 0.74088 ± 0.01235 29.033 ± 0.321 % 69.873 ± 0.402 %
|
| 250 |
+
52 11.6496 ± 0.3115 0.50959 ± 0.01538 0.73850 ± 0.01217 28.882 ± 0.318 % 69.811 ± 0.399 %
|
| 251 |
+
53 11.7470 ± 0.3106 0.50213 ± 0.01515 0.73230 ± 0.01196 28.707 ± 0.315 % 69.693 ± 0.395 %
|
| 252 |
+
54 11.7747 ± 0.3078 0.49670 ± 0.01492 0.72470 ± 0.01176 28.502 ± 0.311 % 69.622 ± 0.392 %
|
| 253 |
+
55 11.7816 ± 0.3045 0.49027 ± 0.01468 0.71645 ± 0.01156 28.298 ± 0.308 % 69.668 ± 0.388 %
|
| 254 |
+
56 11.7881 ± 0.3017 0.48509 ± 0.01447 0.70958 ± 0.01138 28.091 ± 0.305 % 69.734 ± 0.384 %
|
| 255 |
+
57 11.8480 ± 0.3010 0.49021 ± 0.01439 0.71410 ± 0.01131 28.085 ± 0.303 % 69.653 ± 0.381 %
|
| 256 |
+
58 11.8256 ± 0.2977 0.48695 ± 0.01423 0.71023 ± 0.01116 27.975 ± 0.300 % 69.797 ± 0.378 %
|
| 257 |
+
59 11.6751 ± 0.2908 0.47927 ± 0.01403 0.70194 ± 0.01099 27.798 ± 0.297 % 69.917 ± 0.374 %
|
| 258 |
+
60 11.6572 ± 0.2879 0.47626 ± 0.01386 0.69756 ± 0.01084 27.679 ± 0.294 % 69.922 ± 0.371 %
|
| 259 |
+
61 11.7125 ± 0.2871 0.47434 ± 0.01373 0.69389 ± 0.01069 27.524 ± 0.291 % 69.932 ± 0.368 %
|
| 260 |
+
62 11.6465 ± 0.2836 0.47196 ± 0.01363 0.69240 ± 0.01062 27.501 ± 0.289 % 70.025 ± 0.364 %
|
| 261 |
+
63 11.6801 ± 0.2829 0.46852 ± 0.01347 0.68862 ± 0.01048 27.363 ± 0.286 % 70.109 ± 0.361 %
|
| 262 |
+
64 11.5955 ± 0.2778 0.46383 ± 0.01333 0.68457 ± 0.01034 27.249 ± 0.284 % 70.214 ± 0.358 %
|
| 263 |
+
65 11.5334 ± 0.2740 0.45963 ± 0.01322 0.68280 ± 0.01024 27.167 ± 0.282 % 70.299 ± 0.355 %
|
| 264 |
+
66 11.5333 ± 0.2721 0.45462 ± 0.01307 0.67902 ± 0.01010 27.040 ± 0.279 % 70.392 ± 0.352 %
|
| 265 |
+
67 11.5173 ± 0.2697 0.45229 ± 0.01293 0.67590 ± 0.00998 26.954 ± 0.277 % 70.495 ± 0.349 %
|
| 266 |
+
68 11.3834 ± 0.2641 0.44656 ± 0.01276 0.66973 ± 0.00985 26.794 ± 0.275 % 70.629 ± 0.346 %
|
| 267 |
+
69 11.3954 ± 0.2626 0.44317 ± 0.01263 0.66592 ± 0.00972 26.674 ± 0.272 % 70.685 ± 0.343 %
|
| 268 |
+
70 11.3095 ± 0.2584 0.43938 ± 0.01252 0.66374 ± 0.00964 26.582 ± 0.270 % 70.756 ± 0.340 %
|
| 269 |
+
71 11.2434 ± 0.2551 0.43551 ± 0.01239 0.65922 ± 0.00953 26.460 ± 0.268 % 70.920 ± 0.338 %
|
| 270 |
+
72 11.2307 ± 0.2534 0.43221 ± 0.01226 0.65558 ± 0.00943 26.345 ± 0.266 % 71.024 ± 0.335 %
|
| 271 |
+
73 11.2213 ± 0.2514 0.43063 ± 0.01215 0.65369 ± 0.00935 26.257 ± 0.264 % 70.986 ± 0.333 %
|
| 272 |
+
74 11.1586 ± 0.2480 0.42586 ± 0.01203 0.65000 ± 0.00925 26.147 ± 0.262 % 71.086 ± 0.330 %
|
| 273 |
+
75 11.1373 ± 0.2459 0.42303 ± 0.01192 0.64761 ± 0.00915 26.070 ± 0.260 % 71.090 ± 0.328 %
|
| 274 |
+
76 11.1961 ± 0.2455 0.41981 ± 0.01183 0.64547 ± 0.00906 26.001 ± 0.259 % 71.104 ± 0.326 %
|
| 275 |
+
77 11.1536 ± 0.2431 0.41628 ± 0.01172 0.64108 ± 0.00895 25.894 ± 0.257 % 71.199 ± 0.323 %
|
| 276 |
+
78 11.1434 ± 0.2413 0.41354 ± 0.01162 0.63991 ± 0.00888 25.826 ± 0.255 % 71.222 ± 0.321 %
|
| 277 |
+
79 11.1260 ± 0.2393 0.41052 ± 0.01151 0.63725 ± 0.00879 25.736 ± 0.253 % 71.214 ± 0.319 %
|
| 278 |
+
80 11.1214 ± 0.2383 0.40986 ± 0.01146 0.64029 ± 0.00877 25.711 ± 0.252 % 71.225 ± 0.317 %
|
| 279 |
+
81 11.0463 ± 0.2353 0.40618 ± 0.01134 0.63669 ± 0.00868 25.614 ± 0.250 % 71.310 ± 0.315 %
|
| 280 |
+
82 10.9951 ± 0.2325 0.40446 ± 0.01123 0.63439 ± 0.00859 25.536 ± 0.248 % 71.296 ± 0.313 %
|
| 281 |
+
83 11.0240 ± 0.2313 0.40277 ± 0.01113 0.63131 ± 0.00849 25.438 ± 0.246 % 71.316 ± 0.311 %
|
| 282 |
+
84 11.0244 ± 0.2295 0.40062 ± 0.01103 0.62800 ± 0.00840 25.327 ± 0.244 % 71.335 ± 0.309 %
|
| 283 |
+
85 10.9863 ± 0.2270 0.39772 ± 0.01093 0.62453 ± 0.00831 25.232 ± 0.243 % 71.331 ± 0.307 %
|
| 284 |
+
86 10.8524 ± 0.2223 0.39450 ± 0.01082 0.62000 ± 0.00822 25.131 ± 0.241 % 71.455 ± 0.305 %
|
| 285 |
+
87 10.7325 ± 0.2180 0.39119 ± 0.01071 0.61560 ± 0.00814 25.032 ± 0.239 % 71.566 ± 0.303 %
|
| 286 |
+
88 10.6137 ± 0.2138 0.38839 ± 0.01061 0.61168 ± 0.00806 24.939 ± 0.238 % 71.671 ± 0.301 %
|
| 287 |
+
89 10.4789 ± 0.2093 0.38559 ± 0.01051 0.60742 ± 0.00798 24.843 ± 0.236 % 71.774 ± 0.299 %
|
| 288 |
+
90 10.3689 ± 0.2054 0.38273 ± 0.01041 0.60342 ± 0.00790 24.745 ± 0.235 % 71.900 ± 0.297 %
|
| 289 |
+
91 10.2546 ± 0.2016 0.37876 ± 0.01032 0.59980 ± 0.00782 24.646 ± 0.233 % 72.019 ± 0.295 %
|
| 290 |
+
92 10.1416 ± 0.1977 0.37573 ± 0.01022 0.59607 ± 0.00775 24.557 ± 0.232 % 72.093 ± 0.293 %
|
| 291 |
+
93 10.1347 ± 0.1967 0.37256 ± 0.01018 0.59685 ± 0.00772 24.553 ± 0.230 % 72.026 ± 0.291 %
|
| 292 |
+
94 10.1452 ± 0.1956 0.36926 ± 0.01009 0.59348 ± 0.00765 24.451 ± 0.229 % 72.090 ± 0.290 %
|
| 293 |
+
95 10.2805 ± 0.1975 0.36731 ± 0.01001 0.59140 ± 0.00758 24.365 ± 0.228 % 72.074 ± 0.288 %
|
| 294 |
+
96 10.3880 ± 0.1985 0.36475 ± 0.00994 0.58999 ± 0.00750 24.282 ± 0.226 % 71.993 ± 0.287 %
|
| 295 |
+
97 10.4857 ± 0.1994 0.36346 ± 0.00986 0.58746 ± 0.00743 24.189 ± 0.225 % 71.951 ± 0.286 %
|
| 296 |
+
98 10.6567 ± 0.2021 0.36148 ± 0.00978 0.58468 ± 0.00736 24.081 ± 0.224 % 71.929 ± 0.284 %
|
| 297 |
+
99 10.8063 ± 0.2041 0.35987 ± 0.00970 0.58271 ± 0.00729 24.006 ± 0.222 % 71.864 ± 0.283 %
|
| 298 |
+
100 10.8396 ± 0.2037 0.35805 ± 0.00964 0.58160 ± 0.00724 23.945 ± 0.221 % 71.843 ± 0.282 %
|
| 299 |
+
101 10.8766 ± 0.2035 0.35663 ± 0.00958 0.58097 ± 0.00720 23.911 ± 0.220 % 71.873 ± 0.280 %
|
| 300 |
+
102 10.9816 ± 0.2051 0.35833 ± 0.00957 0.58356 ± 0.00721 23.919 ± 0.219 % 71.819 ± 0.279 %
|
| 301 |
+
103 10.9584 ± 0.2039 0.35959 ± 0.00951 0.58289 ± 0.00717 23.971 ± 0.218 % 71.860 ± 0.277 %
|
| 302 |
+
104 10.9343 ± 0.2025 0.36490 ± 0.00952 0.58693 ± 0.00720 24.125 ± 0.218 % 71.817 ± 0.276 %
|
| 303 |
+
105 10.8048 ± 0.1987 0.36750 ± 0.00948 0.58910 ± 0.00718 24.365 ± 0.217 % 71.858 ± 0.275 %
|
| 304 |
+
106 10.6898 ± 0.1954 0.37410 ± 0.00949 0.59389 ± 0.00722 24.670 ± 0.217 % 71.898 ± 0.273 %
|
| 305 |
+
107 10.7394 ± 0.1952 0.37100 ± 0.00941 0.59019 ± 0.00716 24.571 ± 0.216 % 71.904 ± 0.272 %
|
| 306 |
+
108 10.7412 ± 0.1943 0.36939 ± 0.00935 0.58902 ± 0.00710 24.523 ± 0.215 % 71.921 ± 0.271 %
|
| 307 |
+
109 10.7702 ± 0.1940 0.36935 ± 0.00930 0.58815 ± 0.00706 24.501 ± 0.214 % 71.919 ± 0.270 %
|
| 308 |
+
110 10.7972 ± 0.1936 0.36717 ± 0.00924 0.58681 ± 0.00701 24.453 ± 0.213 % 71.882 ± 0.268 %
|
| 309 |
+
111 10.8359 ± 0.1932 0.36444 ± 0.00918 0.58475 ± 0.00695 24.368 ± 0.212 % 71.871 ± 0.267 %
|
| 310 |
+
112 10.8199 ± 0.1918 0.36162 ± 0.00911 0.58213 ± 0.00690 24.306 ± 0.210 % 71.884 ± 0.266 %
|
| 311 |
+
113 10.8055 ± 0.1904 0.35891 ± 0.00905 0.57913 ± 0.00684 24.227 ± 0.209 % 71.910 ± 0.265 %
|
| 312 |
+
114 10.8035 ± 0.1896 0.35639 ± 0.00899 0.57680 ± 0.00679 24.157 ± 0.208 % 71.978 ± 0.263 %
|
| 313 |
+
115 10.7802 ± 0.1883 0.35658 ± 0.00894 0.57709 ± 0.00675 24.187 ± 0.207 % 71.959 ± 0.262 %
|
| 314 |
+
116 10.8555 ± 0.1890 0.36493 ± 0.00897 0.58702 ± 0.00682 24.426 ± 0.207 % 71.775 ± 0.262 %
|
| 315 |
+
117 10.9631 ± 0.1906 0.38836 ± 0.00921 0.60714 ± 0.00707 25.009 ± 0.207 % 71.520 ± 0.261 %
|
| 316 |
+
118 11.0014 ± 0.1906 0.40470 ± 0.00932 0.62126 ± 0.00719 25.455 ± 0.207 % 71.336 ± 0.261 %
|
| 317 |
+
119 11.0679 ± 0.1911 0.42418 ± 0.00947 0.63863 ± 0.00736 25.973 ± 0.207 % 71.135 ± 0.260 %
|
| 318 |
+
120 11.1134 ± 0.1911 0.43963 ± 0.00956 0.65345 ± 0.00746 26.432 ± 0.207 % 70.886 ± 0.260 %
|
| 319 |
+
121 11.1734 ± 0.1917 0.45665 ± 0.00966 0.66867 ± 0.00757 26.926 ± 0.207 % 70.676 ± 0.259 %
|
| 320 |
+
|
| 321 |
+
====== Perplexity statistics ======
|
| 322 |
+
Mean PPL(Q) : 11.173411 ± 0.191659
|
| 323 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 324 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 83.35%
|
| 325 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.456653 ± 0.009658
|
| 326 |
+
Mean PPL(Q)/PPL(base) : 1.578781 ± 0.015248
|
| 327 |
+
Mean PPL(Q)-PPL(base) : 4.096171 ± 0.115254
|
| 328 |
+
|
| 329 |
+
====== KL divergence statistics ======
|
| 330 |
+
Mean KLD: 0.668673 ± 0.007572
|
| 331 |
+
Maximum KLD: 19.120047
|
| 332 |
+
99.9% KLD: 10.942347
|
| 333 |
+
99.0% KLD: 6.945683
|
| 334 |
+
95.0% KLD: 3.325870
|
| 335 |
+
90.0% KLD: 1.688242
|
| 336 |
+
Median KLD: 0.218502
|
| 337 |
+
10.0% KLD: 0.002351
|
| 338 |
+
5.0% KLD: 0.000373
|
| 339 |
+
1.0% KLD: 0.000020
|
| 340 |
+
0.1% KLD: 0.000001
|
| 341 |
+
Minimum KLD: -0.000003
|
| 342 |
+
|
| 343 |
+
====== Token probability statistics ======
|
| 344 |
+
Mean Δp: -8.900 ± 0.145 %
|
| 345 |
+
Maximum Δp: 99.930%
|
| 346 |
+
99.9% Δp: 75.650%
|
| 347 |
+
99.0% Δp: 40.406%
|
| 348 |
+
95.0% Δp: 15.509%
|
| 349 |
+
90.0% Δp: 7.379%
|
| 350 |
+
75.0% Δp: 0.300%
|
| 351 |
+
Median Δp: -0.356%
|
| 352 |
+
25.0% Δp: -9.676%
|
| 353 |
+
10.0% Δp: -40.787%
|
| 354 |
+
5.0% Δp: -75.113%
|
| 355 |
+
1.0% Δp: -98.185%
|
| 356 |
+
0.1% Δp: -99.884%
|
| 357 |
+
Minimum Δp: -99.992%
|
| 358 |
+
RMS Δp : 26.926 ± 0.207 %
|
| 359 |
+
Same top p: 70.676 ± 0.259 %
|
| 360 |
+
|
| 361 |
+
llama_perf_context_print: load time = 22335.72 ms
|
| 362 |
+
llama_perf_context_print: prompt eval time = 42992.87 ms / 61952 tokens ( 0.69 ms per token, 1440.98 tokens per second)
|
| 363 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 364 |
+
llama_perf_context_print: total time = 54219.30 ms / 61953 tokens
|
| 365 |
+
llama_perf_context_print: graphs reused = 0
|
| 366 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 367 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 863 + (22815 = 21038 + 432 + 1344) + 456 |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1089 + (22622 = 18888 + 560 + 3174) + 422 |
|
| 369 |
+
llama_memory_breakdown_print: | - Host | 52797 = 52693 + 0 + 104 |
|
| 370 |
+
```
|
kld_data/unsloth/Q2_K/MiniMax-M2.5-Q2_K.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q2_K (unsloth)
|
| 2 |
+
|
| 3 |
+
77.57 GiB (2.91 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q2_K/MiniMax-M2.5-Q2_K-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 42159 used, -18288 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 42646 used, -18774 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 84806 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 39110 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 38539 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7098 MiB used, 16773 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1308 MiB used, 22562 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 17 layers ( 1 overflowing), 22682 MiB used, 1189 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 46 layers (33 overflowing), 22760 MiB used, 1111 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.68 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q2_K/MiniMax-M2.5-Q2_K-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 10
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q2_K: 249 tensors
|
| 87 |
+
llama_model_loader: - type q3_K: 124 tensors
|
| 88 |
+
llama_model_loader: - type q4_K: 62 tensors
|
| 89 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 90 |
+
print_info: file format = GGUF V3 (latest)
|
| 91 |
+
print_info: file type = Q2_K - Medium
|
| 92 |
+
print_info: file size = 77.57 GiB (2.91 BPW)
|
| 93 |
+
load: 0 unused tokens
|
| 94 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 95 |
+
load: printing all EOG tokens:
|
| 96 |
+
load: - 200004 ('<fim_pad>')
|
| 97 |
+
load: - 200005 ('<reponame>')
|
| 98 |
+
load: - 200020 ('[e~[')
|
| 99 |
+
load: special tokens cache size = 54
|
| 100 |
+
load: token to piece cache size = 1.3355 MB
|
| 101 |
+
print_info: arch = minimax-m2
|
| 102 |
+
print_info: vocab_only = 0
|
| 103 |
+
print_info: no_alloc = 0
|
| 104 |
+
print_info: n_ctx_train = 196608
|
| 105 |
+
print_info: n_embd = 3072
|
| 106 |
+
print_info: n_embd_inp = 3072
|
| 107 |
+
print_info: n_layer = 62
|
| 108 |
+
print_info: n_head = 48
|
| 109 |
+
print_info: n_head_kv = 8
|
| 110 |
+
print_info: n_rot = 64
|
| 111 |
+
print_info: n_swa = 0
|
| 112 |
+
print_info: is_swa_any = 0
|
| 113 |
+
print_info: n_embd_head_k = 128
|
| 114 |
+
print_info: n_embd_head_v = 128
|
| 115 |
+
print_info: n_gqa = 6
|
| 116 |
+
print_info: n_embd_k_gqa = 1024
|
| 117 |
+
print_info: n_embd_v_gqa = 1024
|
| 118 |
+
print_info: f_norm_eps = 0.0e+00
|
| 119 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 120 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 121 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 122 |
+
print_info: f_logit_scale = 0.0e+00
|
| 123 |
+
print_info: f_attn_scale = 0.0e+00
|
| 124 |
+
print_info: n_ff = 1536
|
| 125 |
+
print_info: n_expert = 256
|
| 126 |
+
print_info: n_expert_used = 8
|
| 127 |
+
print_info: n_expert_groups = 0
|
| 128 |
+
print_info: n_group_used = 0
|
| 129 |
+
print_info: causal attn = 1
|
| 130 |
+
print_info: pooling type = 0
|
| 131 |
+
print_info: rope type = 2
|
| 132 |
+
print_info: rope scaling = linear
|
| 133 |
+
print_info: freq_base_train = 5000000.0
|
| 134 |
+
print_info: freq_scale_train = 1
|
| 135 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 136 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 137 |
+
print_info: rope_finetuned = unknown
|
| 138 |
+
print_info: model type = 230B.A10B
|
| 139 |
+
print_info: model params = 228.69 B
|
| 140 |
+
print_info: general.name = Minimax-M2.5
|
| 141 |
+
print_info: vocab type = BPE
|
| 142 |
+
print_info: n_vocab = 200064
|
| 143 |
+
print_info: n_merges = 199744
|
| 144 |
+
print_info: BOS token = 200034 ']~!b['
|
| 145 |
+
print_info: EOS token = 200020 '[e~['
|
| 146 |
+
print_info: UNK token = 200021 ']!d~['
|
| 147 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 148 |
+
print_info: LF token = 10 'Ċ'
|
| 149 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 150 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 151 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 152 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 155 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 156 |
+
print_info: EOG token = 200020 '[e~['
|
| 157 |
+
print_info: max token length = 256
|
| 158 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 159 |
+
load_tensors: offloading output layer to GPU
|
| 160 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 161 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 47195.80 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 31742.75 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 21101.19 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 18866.21 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 272.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 720.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 1309.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 129 (with bs=4096), 67 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 22.76 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 4.90 seconds per pass - ETA 1.23 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.8148 ± 1.3448 0.06702 ± 0.04158 0.13944 ± 0.02320 10.445 ± 1.566 % 85.882 ± 2.185 %
|
| 201 |
+
2 5.0400 ± 0.6331 0.07519 ± 0.02779 0.09990 ± 0.01224 8.468 ± 0.996 % 89.608 ± 1.353 %
|
| 202 |
+
3 4.7499 ± 0.4824 0.05663 ± 0.02686 0.12898 ± 0.01170 11.321 ± 0.933 % 88.758 ± 1.143 %
|
| 203 |
+
4 5.3901 ± 0.4892 0.04979 ± 0.02278 0.13508 ± 0.00976 11.752 ± 0.814 % 87.157 ± 1.048 %
|
| 204 |
+
5 5.1235 ± 0.4120 0.04562 ± 0.02240 0.14051 ± 0.00975 11.737 ± 0.767 % 87.686 ± 0.921 %
|
| 205 |
+
6 6.2834 ± 0.4935 0.04672 ± 0.02123 0.15297 ± 0.00877 11.483 ± 0.687 % 87.059 ± 0.858 %
|
| 206 |
+
7 5.9307 ± 0.4186 0.06080 ± 0.02064 0.17799 ± 0.00962 12.905 ± 0.678 % 86.275 ± 0.815 %
|
| 207 |
+
8 6.7039 ± 0.4486 0.05984 ± 0.01874 0.17308 ± 0.00854 12.443 ± 0.620 % 85.686 ± 0.776 %
|
| 208 |
+
9 6.5859 ± 0.4122 0.06136 ± 0.01701 0.16605 ± 0.00779 11.978 ± 0.575 % 85.969 ± 0.725 %
|
| 209 |
+
10 6.0148 ± 0.3503 0.05969 ± 0.01561 0.15915 ± 0.00710 11.959 ± 0.527 % 86.235 ± 0.682 %
|
| 210 |
+
11 6.5626 ± 0.3702 0.05506 ± 0.01479 0.16089 ± 0.00663 12.002 ± 0.494 % 85.526 ± 0.664 %
|
| 211 |
+
12 7.2570 ± 0.3972 0.05414 ± 0.01398 0.16006 ± 0.00621 11.670 ± 0.467 % 85.196 ± 0.642 %
|
| 212 |
+
13 7.4963 ± 0.3902 0.04931 ± 0.01317 0.15573 ± 0.00577 11.438 ± 0.442 % 85.098 ± 0.619 %
|
| 213 |
+
14 8.0994 ± 0.4098 0.05144 ± 0.01297 0.15851 ± 0.00564 11.345 ± 0.429 % 84.482 ± 0.606 %
|
| 214 |
+
15 8.4769 ± 0.4157 0.05087 ± 0.01239 0.15737 ± 0.00530 11.254 ± 0.408 % 84.288 ± 0.588 %
|
| 215 |
+
16 8.7118 ± 0.4125 0.04601 ± 0.01184 0.15281 ± 0.00500 11.063 ± 0.391 % 84.387 ± 0.568 %
|
| 216 |
+
17 8.9711 ± 0.4158 0.04821 ± 0.01207 0.16135 ± 0.00519 11.158 ± 0.384 % 84.152 ± 0.555 %
|
| 217 |
+
18 8.4711 ± 0.3791 0.05010 ± 0.01190 0.16582 ± 0.00534 11.295 ± 0.378 % 84.205 ± 0.538 %
|
| 218 |
+
19 8.5884 ± 0.3728 0.04805 ± 0.01162 0.16237 ± 0.00509 11.229 ± 0.363 % 84.314 ± 0.523 %
|
| 219 |
+
20 8.6628 ± 0.3668 0.04880 ± 0.01147 0.17008 ± 0.00516 11.386 ± 0.349 % 83.961 ± 0.514 %
|
| 220 |
+
21 8.6158 ± 0.3552 0.04593 ± 0.01109 0.16940 ± 0.00504 11.418 ± 0.343 % 84.127 ± 0.499 %
|
| 221 |
+
22 8.9395 ± 0.3632 0.04514 ± 0.01082 0.17008 ± 0.00485 11.381 ± 0.332 % 83.672 ± 0.494 %
|
| 222 |
+
23 8.9826 ± 0.3589 0.04873 ± 0.01080 0.17806 ± 0.00529 11.644 ± 0.332 % 83.546 ± 0.484 %
|
| 223 |
+
24 9.3987 ± 0.3697 0.04795 ± 0.01051 0.17778 ± 0.00511 11.552 ± 0.323 % 83.399 ± 0.476 %
|
| 224 |
+
25 9.4097 ± 0.3631 0.05051 ± 0.01045 0.18413 ± 0.00527 11.914 ± 0.326 % 83.231 ± 0.468 %
|
| 225 |
+
26 9.1820 ± 0.3445 0.09462 ± 0.01148 0.22520 ± 0.00707 14.917 ± 0.392 % 82.443 ± 0.467 %
|
| 226 |
+
27 9.0581 ± 0.3314 0.13497 ± 0.01228 0.26422 ± 0.00824 17.164 ± 0.410 % 81.786 ± 0.465 %
|
| 227 |
+
28 9.1834 ± 0.3311 0.13503 ± 0.01200 0.26236 ± 0.00798 17.057 ± 0.400 % 81.765 ± 0.457 %
|
| 228 |
+
29 9.0878 ± 0.3224 0.13403 ± 0.01175 0.26169 ± 0.00776 16.966 ± 0.391 % 81.907 ± 0.448 %
|
| 229 |
+
30 8.5084 ± 0.2946 0.13536 ± 0.01152 0.25665 ± 0.00760 16.909 ± 0.385 % 82.379 ± 0.436 %
|
| 230 |
+
31 7.9970 ± 0.2701 0.13275 ± 0.01120 0.25200 ± 0.00741 16.818 ± 0.379 % 82.821 ± 0.424 %
|
| 231 |
+
32 7.7835 ± 0.2566 0.13092 ± 0.01093 0.24849 ± 0.00720 16.727 ± 0.371 % 82.868 ± 0.417 %
|
| 232 |
+
33 7.6187 ± 0.2459 0.12941 ± 0.01072 0.24623 ± 0.00701 16.633 ± 0.362 % 82.900 ± 0.410 %
|
| 233 |
+
34 7.8389 ± 0.2507 0.13088 ± 0.01056 0.24970 ± 0.00688 16.577 ± 0.356 % 82.710 ± 0.406 %
|
| 234 |
+
35 7.9346 ± 0.2518 0.12838 ± 0.01047 0.25332 ± 0.00680 16.618 ± 0.348 % 82.644 ± 0.401 %
|
| 235 |
+
36 7.9933 ± 0.2510 0.12687 ± 0.01027 0.25178 ± 0.00664 16.550 ± 0.342 % 82.658 ± 0.395 %
|
| 236 |
+
37 8.1035 ± 0.2512 0.13827 ± 0.01038 0.26254 ± 0.00687 17.241 ± 0.347 % 82.438 ± 0.392 %
|
| 237 |
+
38 8.3284 ± 0.2562 0.13591 ± 0.01016 0.26102 ± 0.00676 17.091 ± 0.342 % 82.353 ± 0.387 %
|
| 238 |
+
39 8.3852 ± 0.2545 0.14965 ± 0.01029 0.27262 ± 0.00702 17.635 ± 0.342 % 82.192 ± 0.384 %
|
| 239 |
+
40 8.3630 ± 0.2497 0.18193 ± 0.01078 0.30365 ± 0.00772 18.846 ± 0.344 % 81.647 ± 0.383 %
|
| 240 |
+
41 8.4101 ± 0.2474 0.21836 ± 0.01137 0.34066 ± 0.00845 20.322 ± 0.349 % 80.966 ± 0.384 %
|
| 241 |
+
42 8.3791 ± 0.2428 0.24600 ± 0.01173 0.36968 ± 0.00891 21.513 ± 0.349 % 80.448 ± 0.383 %
|
| 242 |
+
43 8.2956 ± 0.2369 0.26713 ± 0.01199 0.39001 ± 0.00919 22.306 ± 0.347 % 80.100 ± 0.381 %
|
| 243 |
+
44 8.1762 ± 0.2297 0.25895 ± 0.01177 0.38435 ± 0.00899 22.126 ± 0.342 % 80.223 ± 0.376 %
|
| 244 |
+
45 8.3260 ± 0.2324 0.25522 ± 0.01156 0.38125 ± 0.00882 21.927 ± 0.338 % 80.200 ± 0.372 %
|
| 245 |
+
46 8.4701 ± 0.2341 0.25030 ± 0.01135 0.37697 ± 0.00864 21.747 ± 0.334 % 80.205 ± 0.368 %
|
| 246 |
+
47 8.6332 ± 0.2366 0.24717 ± 0.01114 0.37171 ± 0.00847 21.551 ± 0.330 % 80.259 ± 0.364 %
|
| 247 |
+
48 8.4501 ± 0.2278 0.24238 ± 0.01094 0.36657 ± 0.00830 21.393 ± 0.326 % 80.302 ± 0.359 %
|
| 248 |
+
49 8.5337 ± 0.2275 0.23721 ± 0.01084 0.36821 ± 0.00835 21.283 ± 0.323 % 80.312 ± 0.356 %
|
| 249 |
+
50 8.6338 ± 0.2289 0.23471 ± 0.01069 0.36553 ± 0.00821 21.153 ± 0.319 % 80.275 ± 0.352 %
|
| 250 |
+
51 8.7389 ± 0.2295 0.23142 ± 0.01051 0.36135 ± 0.00805 20.994 ± 0.316 % 80.315 ± 0.349 %
|
| 251 |
+
52 8.7857 ± 0.2281 0.22744 ± 0.01039 0.35930 ± 0.00791 20.841 ± 0.312 % 80.241 ± 0.346 %
|
| 252 |
+
53 8.8842 ± 0.2281 0.22280 ± 0.01024 0.35592 ± 0.00777 20.692 ± 0.309 % 80.163 ± 0.343 %
|
| 253 |
+
54 8.9299 ± 0.2267 0.22016 ± 0.01007 0.35208 ± 0.00763 20.533 ± 0.306 % 80.145 ± 0.340 %
|
| 254 |
+
55 8.9619 ± 0.2250 0.21670 ± 0.00991 0.34815 ± 0.00750 20.383 ± 0.302 % 80.185 ± 0.337 %
|
| 255 |
+
56 8.9865 ± 0.2236 0.21373 ± 0.00976 0.34454 ± 0.00737 20.244 ± 0.299 % 80.182 ± 0.334 %
|
| 256 |
+
57 8.9872 ± 0.2217 0.21385 ± 0.00969 0.34451 ± 0.00728 20.187 ± 0.296 % 80.172 ± 0.331 %
|
| 257 |
+
58 8.9910 ± 0.2198 0.21291 ± 0.00957 0.34217 ± 0.00717 20.099 ± 0.293 % 80.210 ± 0.328 %
|
| 258 |
+
59 8.9164 ± 0.2157 0.20970 ± 0.00943 0.33770 ± 0.00705 19.963 ± 0.290 % 80.326 ± 0.324 %
|
| 259 |
+
60 8.9124 ± 0.2139 0.20778 ± 0.00930 0.33492 ± 0.00695 19.850 ± 0.287 % 80.399 ± 0.321 %
|
| 260 |
+
61 8.9468 ± 0.2129 0.20499 ± 0.00918 0.33236 ± 0.00685 19.734 ± 0.284 % 80.450 ± 0.318 %
|
| 261 |
+
62 8.8887 ± 0.2100 0.20173 ± 0.00909 0.32965 ± 0.00676 19.638 ± 0.281 % 80.557 ± 0.315 %
|
| 262 |
+
63 8.9208 ± 0.2097 0.19902 ± 0.00899 0.32765 ± 0.00667 19.507 ± 0.279 % 80.560 ± 0.312 %
|
| 263 |
+
64 8.8741 ± 0.2064 0.19635 ± 0.00888 0.32555 ± 0.00658 19.409 ± 0.277 % 80.570 ± 0.310 %
|
| 264 |
+
65 8.8309 ± 0.2036 0.19264 ± 0.00879 0.32390 ± 0.00649 19.319 ± 0.274 % 80.609 ± 0.307 %
|
| 265 |
+
66 8.8508 ± 0.2027 0.18989 ± 0.00869 0.32179 ± 0.00640 19.222 ± 0.272 % 80.588 ± 0.305 %
|
| 266 |
+
67 8.8485 ± 0.2013 0.18869 ± 0.00860 0.31949 ± 0.00631 19.108 ± 0.269 % 80.667 ± 0.302 %
|
| 267 |
+
68 8.7772 ± 0.1979 0.18656 ± 0.00849 0.31654 ± 0.00623 18.989 ± 0.267 % 80.779 ± 0.299 %
|
| 268 |
+
69 8.7986 ± 0.1970 0.18454 ± 0.00839 0.31432 ± 0.00614 18.890 ± 0.265 % 80.824 ± 0.297 %
|
| 269 |
+
70 8.7463 ± 0.1940 0.18237 ± 0.00831 0.31263 ± 0.00607 18.823 ± 0.263 % 80.902 ± 0.294 %
|
| 270 |
+
71 8.7111 ± 0.1919 0.18033 ± 0.00821 0.31017 ± 0.00599 18.733 ± 0.261 % 80.967 ± 0.292 %
|
| 271 |
+
72 8.7260 ± 0.1912 0.17986 ± 0.00815 0.30881 ± 0.00596 18.662 ± 0.258 % 81.008 ± 0.289 %
|
| 272 |
+
73 8.7103 ± 0.1894 0.17732 ± 0.00806 0.30695 ± 0.00588 18.586 ± 0.256 % 81.015 ± 0.287 %
|
| 273 |
+
74 8.6851 ± 0.1873 0.17526 ± 0.00798 0.30534 ± 0.00581 18.501 ± 0.254 % 81.033 ± 0.285 %
|
| 274 |
+
75 8.6786 ± 0.1860 0.17359 ± 0.00790 0.30459 ± 0.00574 18.463 ± 0.252 % 80.993 ± 0.284 %
|
| 275 |
+
76 8.7407 ± 0.1862 0.17224 ± 0.00782 0.30352 ± 0.00567 18.394 ± 0.250 % 80.996 ± 0.282 %
|
| 276 |
+
77 8.7183 ± 0.1845 0.16994 ± 0.00777 0.30199 ± 0.00561 18.316 ± 0.248 % 81.024 ± 0.280 %
|
| 277 |
+
78 8.7225 ± 0.1835 0.16860 ± 0.00771 0.30085 ± 0.00555 18.269 ± 0.246 % 81.036 ± 0.278 %
|
| 278 |
+
79 8.7194 ± 0.1823 0.16678 ± 0.00764 0.29948 ± 0.00549 18.202 ± 0.245 % 81.033 ± 0.276 %
|
| 279 |
+
80 8.7079 ± 0.1812 0.16522 ± 0.00759 0.30011 ± 0.00545 18.175 ± 0.243 % 81.000 ± 0.275 %
|
| 280 |
+
81 8.6703 ± 0.1793 0.16399 ± 0.00752 0.29860 ± 0.00539 18.109 ± 0.241 % 81.070 ± 0.273 %
|
| 281 |
+
82 8.6422 ± 0.1774 0.16367 ± 0.00745 0.29717 ± 0.00533 18.045 ± 0.239 % 81.076 ± 0.271 %
|
| 282 |
+
83 8.6699 ± 0.1767 0.16255 ± 0.00738 0.29572 ± 0.00527 17.982 ± 0.237 % 81.035 ± 0.269 %
|
| 283 |
+
84 8.6723 ± 0.1753 0.16064 ± 0.00731 0.29395 ± 0.00521 17.903 ± 0.235 % 80.994 ± 0.268 %
|
| 284 |
+
85 8.6585 ± 0.1737 0.15961 ± 0.00723 0.29208 ± 0.00515 17.831 ± 0.234 % 81.024 ± 0.266 %
|
| 285 |
+
86 8.5749 ± 0.1705 0.15895 ± 0.00717 0.29019 ± 0.00510 17.776 ± 0.232 % 81.094 ± 0.264 %
|
| 286 |
+
87 8.5092 ± 0.1677 0.15907 ± 0.00710 0.28897 ± 0.00504 17.754 ± 0.230 % 81.100 ± 0.263 %
|
| 287 |
+
88 8.4290 ± 0.1647 0.15792 ± 0.00704 0.28720 ± 0.00499 17.688 ± 0.229 % 81.172 ± 0.261 %
|
| 288 |
+
89 8.3409 ± 0.1615 0.15739 ± 0.00697 0.28546 ± 0.00494 17.633 ± 0.227 % 81.247 ± 0.259 %
|
| 289 |
+
90 8.2736 ± 0.1589 0.15700 ± 0.00691 0.28384 ± 0.00489 17.564 ± 0.225 % 81.312 ± 0.257 %
|
| 290 |
+
91 8.2088 ± 0.1564 0.15624 ± 0.00685 0.28215 ± 0.00484 17.503 ± 0.224 % 81.370 ± 0.256 %
|
| 291 |
+
92 8.1362 ± 0.1537 0.15541 ± 0.00679 0.28063 ± 0.00479 17.451 ± 0.222 % 81.385 ± 0.254 %
|
| 292 |
+
93 8.1411 ± 0.1532 0.15352 ± 0.00677 0.28080 ± 0.00479 17.424 ± 0.221 % 81.396 ± 0.253 %
|
| 293 |
+
94 8.1691 ± 0.1528 0.15262 ± 0.00671 0.27915 ± 0.00474 17.353 ± 0.220 % 81.406 ± 0.251 %
|
| 294 |
+
95 8.2902 ± 0.1547 0.15213 ± 0.00667 0.27802 ± 0.00469 17.280 ± 0.218 % 81.416 ± 0.250 %
|
| 295 |
+
96 8.3906 ± 0.1559 0.15122 ± 0.00662 0.27732 ± 0.00465 17.221 ± 0.217 % 81.348 ± 0.249 %
|
| 296 |
+
97 8.4770 ± 0.1568 0.15080 ± 0.00656 0.27589 ± 0.00460 17.157 ± 0.216 % 81.322 ± 0.248 %
|
| 297 |
+
98 8.6276 ± 0.1594 0.15027 ± 0.00651 0.27436 ± 0.00455 17.079 ± 0.215 % 81.325 ± 0.247 %
|
| 298 |
+
99 8.7516 ± 0.1611 0.14897 ± 0.00646 0.27328 ± 0.00451 17.011 ± 0.213 % 81.252 ± 0.246 %
|
| 299 |
+
100 8.7823 ± 0.1609 0.14759 ± 0.00641 0.27243 ± 0.00447 16.962 ± 0.212 % 81.224 ± 0.245 %
|
| 300 |
+
101 8.8091 ± 0.1607 0.14580 ± 0.00637 0.27181 ± 0.00448 16.922 ± 0.211 % 81.277 ± 0.243 %
|
| 301 |
+
102 8.8818 ± 0.1619 0.14613 ± 0.00633 0.27193 ± 0.00445 16.886 ± 0.210 % 81.261 ± 0.242 %
|
| 302 |
+
103 8.8533 ± 0.1607 0.14628 ± 0.00630 0.27125 ± 0.00442 16.889 ± 0.209 % 81.264 ± 0.241 %
|
| 303 |
+
104 8.8013 ± 0.1588 0.14790 ± 0.00630 0.27305 ± 0.00445 17.024 ± 0.209 % 81.278 ± 0.240 %
|
| 304 |
+
105 8.6825 ± 0.1556 0.14882 ± 0.00629 0.27414 ± 0.00445 17.148 ± 0.208 % 81.348 ± 0.238 %
|
| 305 |
+
106 8.5422 ± 0.1521 0.14983 ± 0.00627 0.27404 ± 0.00443 17.249 ± 0.208 % 81.443 ± 0.236 %
|
| 306 |
+
107 8.5986 ± 0.1523 0.14868 ± 0.00622 0.27253 ± 0.00439 17.183 ± 0.207 % 81.415 ± 0.235 %
|
| 307 |
+
108 8.6047 ± 0.1517 0.14761 ± 0.00617 0.27168 ± 0.00436 17.146 ± 0.205 % 81.423 ± 0.234 %
|
| 308 |
+
109 8.6257 ± 0.1514 0.14732 ± 0.00613 0.27097 ± 0.00432 17.115 ± 0.204 % 81.414 ± 0.233 %
|
| 309 |
+
110 8.6573 ± 0.1513 0.14628 ± 0.00609 0.26988 ± 0.00429 17.069 ± 0.203 % 81.390 ± 0.232 %
|
| 310 |
+
111 8.7043 ± 0.1513 0.14539 ± 0.00605 0.26884 ± 0.00425 17.017 ± 0.202 % 81.406 ± 0.231 %
|
| 311 |
+
112 8.7047 ± 0.1505 0.14409 ± 0.00601 0.26761 ± 0.00422 16.972 ± 0.201 % 81.415 ± 0.230 %
|
| 312 |
+
113 8.7114 ± 0.1498 0.14348 ± 0.00596 0.26628 ± 0.00418 16.918 ± 0.200 % 81.444 ± 0.229 %
|
| 313 |
+
114 8.7242 ± 0.1494 0.14262 ± 0.00592 0.26515 ± 0.00415 16.863 ± 0.199 % 81.465 ± 0.228 %
|
| 314 |
+
115 8.7053 ± 0.1484 0.14279 ± 0.00590 0.26538 ± 0.00412 16.881 ± 0.198 % 81.456 ± 0.227 %
|
| 315 |
+
116 8.7261 ± 0.1482 0.14658 ± 0.00591 0.27019 ± 0.00415 17.083 ± 0.197 % 81.322 ± 0.227 %
|
| 316 |
+
117 8.7017 ± 0.1469 0.15734 ± 0.00601 0.27982 ± 0.00426 17.589 ± 0.198 % 81.143 ± 0.226 %
|
| 317 |
+
118 8.6762 ± 0.1458 0.16727 ± 0.00610 0.28778 ± 0.00434 17.984 ± 0.199 % 81.004 ± 0.226 %
|
| 318 |
+
119 8.6490 ± 0.1445 0.17757 ± 0.00619 0.29686 ± 0.00445 18.468 ± 0.200 % 80.847 ± 0.226 %
|
| 319 |
+
120 8.6270 ± 0.1433 0.18637 ± 0.00626 0.30510 ± 0.00452 18.855 ± 0.200 % 80.660 ± 0.226 %
|
| 320 |
+
121 8.5991 ± 0.1422 0.19477 ± 0.00633 0.31393 ± 0.00461 19.287 ± 0.200 % 80.512 ± 0.226 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 8.599059 ± 0.142194
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 92.53%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.194769 ± 0.006328
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.215030 ± 0.007688
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 1.521820 ± 0.056629
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.313928 ± 0.004614
|
| 332 |
+
Maximum KLD: 17.506372
|
| 333 |
+
99.9% KLD: 7.899748
|
| 334 |
+
99.0% KLD: 4.444966
|
| 335 |
+
95.0% KLD: 1.466614
|
| 336 |
+
90.0% KLD: 0.644059
|
| 337 |
+
Median KLD: 0.082348
|
| 338 |
+
10.0% KLD: 0.000522
|
| 339 |
+
5.0% KLD: 0.000086
|
| 340 |
+
1.0% KLD: 0.000005
|
| 341 |
+
0.1% KLD: -0.000000
|
| 342 |
+
Minimum KLD: -0.000004
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -4.223 ± 0.107 %
|
| 346 |
+
Maximum Δp: 99.777%
|
| 347 |
+
99.9% Δp: 78.428%
|
| 348 |
+
99.0% Δp: 34.047%
|
| 349 |
+
95.0% Δp: 13.412%
|
| 350 |
+
90.0% Δp: 6.356%
|
| 351 |
+
75.0% Δp: 0.558%
|
| 352 |
+
Median Δp: -0.039%
|
| 353 |
+
25.0% Δp: -3.628%
|
| 354 |
+
10.0% Δp: -18.212%
|
| 355 |
+
5.0% Δp: -38.882%
|
| 356 |
+
1.0% Δp: -91.474%
|
| 357 |
+
0.1% Δp: -99.236%
|
| 358 |
+
Minimum Δp: -99.964%
|
| 359 |
+
RMS Δp : 19.287 ± 0.200 %
|
| 360 |
+
Same top p: 80.512 ± 0.226 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 29294.99 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 66791.99 ms / 61952 tokens ( 1.08 ms per token, 927.54 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 80155.16 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1031 + (22682 = 21101 + 272 + 1308) + 421 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 951 + (22760 = 18866 + 720 + 3174) + 423 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 79042 = 78938 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/Q2_K_L/MiniMax-M2.5-Q2_K_L.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q2_K_L (unsloth)
|
| 2 |
+
|
| 3 |
+
77.71 GiB (2.92 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q2_K_L/MiniMax-M2.5-Q2_K_L-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 42159 used, -18288 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 42646 used, -18774 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 84806 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 39110 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 38539 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7098 MiB used, 16773 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1308 MiB used, 22562 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 17 layers ( 1 overflowing), 22682 MiB used, 1189 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 46 layers (33 overflowing), 22760 MiB used, 1111 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.79 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q2_K_L/MiniMax-M2.5-Q2_K_L-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 10
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q2_K: 248 tensors
|
| 87 |
+
llama_model_loader: - type q3_K: 124 tensors
|
| 88 |
+
llama_model_loader: - type q4_K: 63 tensors
|
| 89 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 90 |
+
print_info: file format = GGUF V3 (latest)
|
| 91 |
+
print_info: file type = Q2_K - Medium
|
| 92 |
+
print_info: file size = 77.71 GiB (2.92 BPW)
|
| 93 |
+
load: 0 unused tokens
|
| 94 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 95 |
+
load: printing all EOG tokens:
|
| 96 |
+
load: - 200004 ('<fim_pad>')
|
| 97 |
+
load: - 200005 ('<reponame>')
|
| 98 |
+
load: - 200020 ('[e~[')
|
| 99 |
+
load: special tokens cache size = 54
|
| 100 |
+
load: token to piece cache size = 1.3355 MB
|
| 101 |
+
print_info: arch = minimax-m2
|
| 102 |
+
print_info: vocab_only = 0
|
| 103 |
+
print_info: no_alloc = 0
|
| 104 |
+
print_info: n_ctx_train = 196608
|
| 105 |
+
print_info: n_embd = 3072
|
| 106 |
+
print_info: n_embd_inp = 3072
|
| 107 |
+
print_info: n_layer = 62
|
| 108 |
+
print_info: n_head = 48
|
| 109 |
+
print_info: n_head_kv = 8
|
| 110 |
+
print_info: n_rot = 64
|
| 111 |
+
print_info: n_swa = 0
|
| 112 |
+
print_info: is_swa_any = 0
|
| 113 |
+
print_info: n_embd_head_k = 128
|
| 114 |
+
print_info: n_embd_head_v = 128
|
| 115 |
+
print_info: n_gqa = 6
|
| 116 |
+
print_info: n_embd_k_gqa = 1024
|
| 117 |
+
print_info: n_embd_v_gqa = 1024
|
| 118 |
+
print_info: f_norm_eps = 0.0e+00
|
| 119 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 120 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 121 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 122 |
+
print_info: f_logit_scale = 0.0e+00
|
| 123 |
+
print_info: f_attn_scale = 0.0e+00
|
| 124 |
+
print_info: n_ff = 1536
|
| 125 |
+
print_info: n_expert = 256
|
| 126 |
+
print_info: n_expert_used = 8
|
| 127 |
+
print_info: n_expert_groups = 0
|
| 128 |
+
print_info: n_group_used = 0
|
| 129 |
+
print_info: causal attn = 1
|
| 130 |
+
print_info: pooling type = 0
|
| 131 |
+
print_info: rope type = 2
|
| 132 |
+
print_info: rope scaling = linear
|
| 133 |
+
print_info: freq_base_train = 5000000.0
|
| 134 |
+
print_info: freq_scale_train = 1
|
| 135 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 136 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 137 |
+
print_info: rope_finetuned = unknown
|
| 138 |
+
print_info: model type = 230B.A10B
|
| 139 |
+
print_info: model params = 228.69 B
|
| 140 |
+
print_info: general.name = Minimax-M2.5
|
| 141 |
+
print_info: vocab type = BPE
|
| 142 |
+
print_info: n_vocab = 200064
|
| 143 |
+
print_info: n_merges = 199744
|
| 144 |
+
print_info: BOS token = 200034 ']~!b['
|
| 145 |
+
print_info: EOS token = 200020 '[e~['
|
| 146 |
+
print_info: UNK token = 200021 ']!d~['
|
| 147 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 148 |
+
print_info: LF token = 10 'Ċ'
|
| 149 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 150 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 151 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 152 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 155 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 156 |
+
print_info: EOG token = 200020 '[e~['
|
| 157 |
+
print_info: max token length = 256
|
| 158 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 159 |
+
load_tensors: offloading output layer to GPU
|
| 160 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 161 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 46952.16 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 32137.11 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 21101.19 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 18866.21 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 272.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 720.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 1309.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 129 (with bs=4096), 67 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 23.13 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 5.36 seconds per pass - ETA 1.35 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.7772 ± 1.3167 0.06149 ± 0.03920 0.15100 ± 0.02777 11.889 ± 1.687 % 86.275 ± 2.159 %
|
| 201 |
+
2 5.0652 ± 0.6329 0.08018 ± 0.02670 0.10902 ± 0.01488 10.118 ± 1.217 % 88.824 ± 1.397 %
|
| 202 |
+
3 4.7566 ± 0.4798 0.05805 ± 0.02610 0.13653 ± 0.01287 12.397 ± 0.980 % 88.105 ± 1.171 %
|
| 203 |
+
4 5.4222 ± 0.4904 0.05573 ± 0.02207 0.13997 ± 0.01044 12.470 ± 0.822 % 86.176 ± 1.081 %
|
| 204 |
+
5 5.1563 ± 0.4141 0.05201 ± 0.02217 0.14428 ± 0.01009 12.410 ± 0.769 % 86.588 ± 0.955 %
|
| 205 |
+
6 6.3055 ± 0.4958 0.05024 ± 0.02106 0.15667 ± 0.00907 12.016 ± 0.687 % 85.556 ± 0.899 %
|
| 206 |
+
7 5.9410 ± 0.4190 0.06254 ± 0.02075 0.18447 ± 0.01030 13.618 ± 0.694 % 84.594 ± 0.855 %
|
| 207 |
+
8 6.6863 ± 0.4464 0.05722 ± 0.01886 0.17876 ± 0.00913 13.071 ± 0.638 % 84.216 ± 0.807 %
|
| 208 |
+
9 6.5612 ± 0.4101 0.05761 ± 0.01716 0.17158 ± 0.00833 12.559 ± 0.593 % 84.488 ± 0.756 %
|
| 209 |
+
10 5.9941 ± 0.3488 0.05626 ± 0.01572 0.16471 ± 0.00759 12.493 ± 0.546 % 84.941 ± 0.708 %
|
| 210 |
+
11 6.5476 ± 0.3694 0.05276 ± 0.01493 0.16595 ± 0.00704 12.488 ± 0.508 % 84.385 ± 0.686 %
|
| 211 |
+
12 7.2416 ± 0.3965 0.05201 ± 0.01407 0.16417 ± 0.00656 12.129 ± 0.482 % 84.150 ± 0.660 %
|
| 212 |
+
13 7.4910 ± 0.3902 0.04861 ± 0.01323 0.15971 ± 0.00609 11.881 ± 0.456 % 84.193 ± 0.634 %
|
| 213 |
+
14 8.0880 ± 0.4091 0.05003 ± 0.01301 0.16264 ± 0.00592 11.811 ± 0.443 % 83.641 ± 0.619 %
|
| 214 |
+
15 8.4556 ± 0.4144 0.04836 ± 0.01241 0.16097 ± 0.00555 11.673 ± 0.422 % 83.477 ± 0.601 %
|
| 215 |
+
16 8.6979 ± 0.4117 0.04441 ± 0.01187 0.15662 ± 0.00524 11.491 ± 0.404 % 83.578 ± 0.580 %
|
| 216 |
+
17 8.9639 ± 0.4151 0.04742 ± 0.01203 0.16486 ± 0.00537 11.515 ± 0.394 % 83.368 ± 0.566 %
|
| 217 |
+
18 8.4675 ± 0.3788 0.04966 ± 0.01183 0.16847 ± 0.00547 11.623 ± 0.386 % 83.573 ± 0.547 %
|
| 218 |
+
19 8.5869 ± 0.3726 0.04787 ± 0.01154 0.16483 ± 0.00521 11.544 ± 0.371 % 83.736 ± 0.530 %
|
| 219 |
+
20 8.6512 ± 0.3656 0.04746 ± 0.01132 0.17074 ± 0.00517 11.648 ± 0.356 % 83.549 ± 0.519 %
|
| 220 |
+
21 8.6064 ± 0.3542 0.04483 ± 0.01096 0.16941 ± 0.00500 11.668 ± 0.349 % 83.772 ± 0.504 %
|
| 221 |
+
22 8.9342 ± 0.3624 0.04454 ± 0.01070 0.17029 ± 0.00483 11.624 ± 0.338 % 83.387 ± 0.497 %
|
| 222 |
+
23 8.9699 ± 0.3577 0.04733 ± 0.01064 0.17666 ± 0.00514 11.820 ± 0.333 % 83.257 ± 0.488 %
|
| 223 |
+
24 9.3864 ± 0.3684 0.04664 ± 0.01037 0.17663 ± 0.00498 11.714 ± 0.324 % 83.154 ± 0.478 %
|
| 224 |
+
25 9.3973 ± 0.3618 0.04920 ± 0.01035 0.18359 ± 0.00518 12.094 ± 0.326 % 82.824 ± 0.472 %
|
| 225 |
+
26 9.1624 ± 0.3429 0.09248 ± 0.01137 0.22367 ± 0.00693 15.042 ± 0.390 % 82.051 ± 0.471 %
|
| 226 |
+
27 9.0112 ± 0.3288 0.12979 ± 0.01206 0.26054 ± 0.00797 17.210 ± 0.407 % 81.365 ± 0.469 %
|
| 227 |
+
28 9.1310 ± 0.3282 0.12931 ± 0.01179 0.25883 ± 0.00772 17.112 ± 0.397 % 81.317 ± 0.461 %
|
| 228 |
+
29 9.0431 ± 0.3201 0.12910 ± 0.01157 0.25833 ± 0.00751 17.016 ± 0.388 % 81.420 ± 0.452 %
|
| 229 |
+
30 8.4763 ± 0.2928 0.13158 ± 0.01137 0.25417 ± 0.00739 17.005 ± 0.384 % 81.895 ± 0.440 %
|
| 230 |
+
31 7.9641 ± 0.2684 0.12862 ± 0.01104 0.24915 ± 0.00721 16.868 ± 0.377 % 82.353 ± 0.429 %
|
| 231 |
+
32 7.7471 ± 0.2548 0.12624 ± 0.01078 0.24543 ± 0.00700 16.754 ± 0.369 % 82.475 ± 0.421 %
|
| 232 |
+
33 7.5849 ± 0.2442 0.12497 ± 0.01057 0.24328 ± 0.00681 16.667 ± 0.360 % 82.531 ± 0.414 %
|
| 233 |
+
34 7.8080 ± 0.2492 0.12693 ± 0.01040 0.24748 ± 0.00670 16.622 ± 0.353 % 82.295 ± 0.410 %
|
| 234 |
+
35 7.9081 ± 0.2505 0.12503 ± 0.01032 0.25130 ± 0.00663 16.654 ± 0.346 % 82.230 ± 0.405 %
|
| 235 |
+
36 7.9710 ± 0.2499 0.12407 ± 0.01013 0.25000 ± 0.00648 16.584 ± 0.340 % 82.288 ± 0.398 %
|
| 236 |
+
37 8.0766 ± 0.2499 0.13495 ± 0.01024 0.26051 ± 0.00671 17.266 ± 0.345 % 82.077 ± 0.395 %
|
| 237 |
+
38 8.3027 ± 0.2549 0.13282 ± 0.01003 0.25911 ± 0.00661 17.127 ± 0.339 % 82.033 ± 0.390 %
|
| 238 |
+
39 8.3595 ± 0.2533 0.14658 ± 0.01018 0.27096 ± 0.00688 17.665 ± 0.341 % 81.850 ± 0.387 %
|
| 239 |
+
40 8.3283 ± 0.2482 0.17777 ± 0.01064 0.30119 ± 0.00758 18.840 ± 0.343 % 81.255 ± 0.386 %
|
| 240 |
+
41 8.3727 ± 0.2458 0.21391 ± 0.01125 0.33773 ± 0.00832 20.294 ± 0.348 % 80.603 ± 0.387 %
|
| 241 |
+
42 8.3327 ± 0.2409 0.24045 ± 0.01160 0.36615 ± 0.00877 21.468 ± 0.348 % 80.065 ± 0.386 %
|
| 242 |
+
43 8.2354 ± 0.2345 0.25984 ± 0.01182 0.38497 ± 0.00902 22.221 ± 0.346 % 79.754 ± 0.384 %
|
| 243 |
+
44 8.1165 ± 0.2274 0.25162 ± 0.01161 0.37933 ± 0.00882 22.040 ± 0.341 % 79.866 ± 0.379 %
|
| 244 |
+
45 8.2697 ± 0.2302 0.24843 ± 0.01142 0.37648 ± 0.00865 21.838 ± 0.337 % 79.878 ± 0.374 %
|
| 245 |
+
46 8.4146 ± 0.2320 0.24372 ± 0.01121 0.37226 ± 0.00848 21.654 ± 0.333 % 79.855 ± 0.370 %
|
| 246 |
+
47 8.5787 ± 0.2346 0.24083 ± 0.01100 0.36704 ± 0.00831 21.457 ± 0.329 % 79.908 ± 0.366 %
|
| 247 |
+
48 8.3983 ± 0.2260 0.23623 ± 0.01080 0.36202 ± 0.00814 21.300 ± 0.325 % 79.959 ± 0.362 %
|
| 248 |
+
49 8.4765 ± 0.2255 0.23048 ± 0.01067 0.36274 ± 0.00814 21.150 ± 0.321 % 79.936 ± 0.358 %
|
| 249 |
+
50 8.5838 ± 0.2272 0.22891 ± 0.01052 0.36039 ± 0.00801 21.023 ± 0.318 % 79.890 ± 0.355 %
|
| 250 |
+
51 8.6891 ± 0.2279 0.22570 ± 0.01034 0.35625 ± 0.00786 20.865 ± 0.314 % 79.977 ± 0.351 %
|
| 251 |
+
52 8.7428 ± 0.2266 0.22255 ± 0.01023 0.35466 ± 0.00772 20.719 ± 0.311 % 79.872 ± 0.348 %
|
| 252 |
+
53 8.8372 ± 0.2266 0.21751 ± 0.01008 0.35116 ± 0.00758 20.569 ± 0.307 % 79.837 ± 0.345 %
|
| 253 |
+
54 8.8806 ± 0.2251 0.21463 ± 0.00992 0.34738 ± 0.00745 20.411 ± 0.304 % 79.826 ± 0.342 %
|
| 254 |
+
55 8.9155 ± 0.2235 0.21151 ± 0.00976 0.34355 ± 0.00732 20.257 ± 0.301 % 79.815 ± 0.339 %
|
| 255 |
+
56 8.9433 ± 0.2222 0.20890 ± 0.00962 0.33999 ± 0.00720 20.116 ± 0.298 % 79.839 ± 0.336 %
|
| 256 |
+
57 8.9416 ± 0.2203 0.20877 ± 0.00956 0.33993 ± 0.00710 20.071 ± 0.295 % 79.842 ± 0.333 %
|
| 257 |
+
58 8.9500 ± 0.2186 0.20833 ± 0.00945 0.33781 ± 0.00700 19.990 ± 0.292 % 79.899 ± 0.330 %
|
| 258 |
+
59 8.8742 ± 0.2144 0.20496 ± 0.00931 0.33339 ± 0.00688 19.857 ± 0.289 % 80.033 ± 0.326 %
|
| 259 |
+
60 8.8688 ± 0.2126 0.20287 ± 0.00918 0.33038 ± 0.00678 19.742 ± 0.286 % 80.092 ± 0.323 %
|
| 260 |
+
61 8.9013 ± 0.2115 0.19988 ± 0.00906 0.32793 ± 0.00668 19.627 ± 0.283 % 80.122 ± 0.320 %
|
| 261 |
+
62 8.8474 ± 0.2088 0.19708 ± 0.00897 0.32534 ± 0.00659 19.536 ± 0.280 % 80.215 ± 0.317 %
|
| 262 |
+
63 8.8761 ± 0.2083 0.19399 ± 0.00887 0.32333 ± 0.00650 19.404 ± 0.278 % 80.212 ± 0.314 %
|
| 263 |
+
64 8.8315 ± 0.2052 0.19154 ± 0.00877 0.32123 ± 0.00641 19.302 ± 0.275 % 80.257 ± 0.312 %
|
| 264 |
+
65 8.7878 ± 0.2024 0.18774 ± 0.00868 0.31961 ± 0.00633 19.218 ± 0.273 % 80.302 ± 0.309 %
|
| 265 |
+
66 8.8094 ± 0.2016 0.18520 ± 0.00858 0.31769 ± 0.00624 19.122 ± 0.271 % 80.345 ± 0.306 %
|
| 266 |
+
67 8.8024 ± 0.1999 0.18346 ± 0.00849 0.31562 ± 0.00616 19.013 ± 0.268 % 80.433 ± 0.304 %
|
| 267 |
+
68 8.7315 ± 0.1966 0.18135 ± 0.00838 0.31274 ± 0.00608 18.897 ± 0.266 % 80.536 ± 0.301 %
|
| 268 |
+
69 8.7537 ± 0.1957 0.17943 ± 0.00829 0.31063 ± 0.00599 18.801 ± 0.264 % 80.574 ± 0.298 %
|
| 269 |
+
70 8.7033 ± 0.1928 0.17745 ± 0.00820 0.30888 ± 0.00592 18.729 ± 0.262 % 80.672 ± 0.296 %
|
| 270 |
+
71 8.6702 ± 0.1907 0.17562 ± 0.00811 0.30653 ± 0.00584 18.643 ± 0.259 % 80.740 ± 0.293 %
|
| 271 |
+
72 8.6848 ± 0.1901 0.17513 ± 0.00804 0.30490 ± 0.00578 18.574 ± 0.257 % 80.768 ± 0.291 %
|
| 272 |
+
73 8.6713 ± 0.1883 0.17284 ± 0.00795 0.30325 ± 0.00571 18.504 ± 0.255 % 80.774 ± 0.289 %
|
| 273 |
+
74 8.6515 ± 0.1864 0.17138 ± 0.00787 0.30167 ± 0.00564 18.416 ± 0.253 % 80.795 ± 0.287 %
|
| 274 |
+
75 8.6408 ± 0.1849 0.16923 ± 0.00780 0.30080 ± 0.00557 18.356 ± 0.251 % 80.784 ± 0.285 %
|
| 275 |
+
76 8.7056 ± 0.1852 0.16822 ± 0.00772 0.29989 ± 0.00551 18.289 ± 0.249 % 80.764 ± 0.283 %
|
| 276 |
+
77 8.6848 ± 0.1836 0.16609 ± 0.00768 0.29838 ± 0.00545 18.201 ± 0.247 % 80.830 ± 0.281 %
|
| 277 |
+
78 8.6910 ± 0.1827 0.16498 ± 0.00762 0.29719 ± 0.00539 18.144 ± 0.245 % 80.845 ± 0.279 %
|
| 278 |
+
79 8.6926 ± 0.1816 0.16371 ± 0.00755 0.29581 ± 0.00533 18.076 ± 0.244 % 80.884 ± 0.277 %
|
| 279 |
+
80 8.6793 ± 0.1805 0.16192 ± 0.00751 0.29658 ± 0.00530 18.063 ± 0.242 % 80.863 ± 0.275 %
|
| 280 |
+
81 8.6388 ± 0.1786 0.16036 ± 0.00744 0.29483 ± 0.00524 17.993 ± 0.240 % 80.934 ± 0.273 %
|
| 281 |
+
82 8.6148 ± 0.1768 0.16049 ± 0.00736 0.29345 ± 0.00518 17.933 ± 0.238 % 80.918 ± 0.272 %
|
| 282 |
+
83 8.6446 ± 0.1761 0.15963 ± 0.00729 0.29190 ± 0.00512 17.864 ± 0.236 % 80.869 ± 0.270 %
|
| 283 |
+
84 8.6485 ± 0.1747 0.15790 ± 0.00722 0.29013 ± 0.00506 17.786 ± 0.234 % 80.845 ± 0.269 %
|
| 284 |
+
85 8.6350 ± 0.1731 0.15690 ± 0.00715 0.28839 ± 0.00500 17.713 ± 0.233 % 80.858 ± 0.267 %
|
| 285 |
+
86 8.5567 ± 0.1700 0.15683 ± 0.00709 0.28696 ± 0.00495 17.668 ± 0.231 % 80.885 ± 0.266 %
|
| 286 |
+
87 8.4908 ± 0.1672 0.15690 ± 0.00703 0.28582 ± 0.00490 17.645 ± 0.229 % 80.883 ± 0.264 %
|
| 287 |
+
88 8.4126 ± 0.1643 0.15597 ± 0.00697 0.28430 ± 0.00485 17.584 ± 0.227 % 80.949 ± 0.262 %
|
| 288 |
+
89 8.3257 ± 0.1611 0.15557 ± 0.00690 0.28278 ± 0.00480 17.541 ± 0.226 % 80.987 ± 0.260 %
|
| 289 |
+
90 8.2629 ± 0.1586 0.15570 ± 0.00685 0.28145 ± 0.00475 17.487 ± 0.224 % 81.024 ± 0.259 %
|
| 290 |
+
91 8.1991 ± 0.1562 0.15506 ± 0.00680 0.28007 ± 0.00471 17.440 ± 0.223 % 81.069 ± 0.257 %
|
| 291 |
+
92 8.1308 ± 0.1536 0.15475 ± 0.00674 0.27886 ± 0.00466 17.399 ± 0.221 % 81.078 ± 0.256 %
|
| 292 |
+
93 8.1366 ± 0.1531 0.15297 ± 0.00671 0.27877 ± 0.00464 17.365 ± 0.220 % 81.113 ± 0.254 %
|
| 293 |
+
94 8.1659 ± 0.1527 0.15223 ± 0.00665 0.27714 ± 0.00459 17.295 ± 0.218 % 81.151 ± 0.253 %
|
| 294 |
+
95 8.2868 ± 0.1546 0.15172 ± 0.00661 0.27600 ± 0.00455 17.220 ± 0.217 % 81.143 ± 0.251 %
|
| 295 |
+
96 8.3897 ± 0.1559 0.15111 ± 0.00656 0.27530 ± 0.00450 17.163 ± 0.216 % 81.070 ± 0.250 %
|
| 296 |
+
97 8.4763 ± 0.1568 0.15072 ± 0.00650 0.27389 ± 0.00446 17.096 ± 0.215 % 81.035 ± 0.249 %
|
| 297 |
+
98 8.6257 ± 0.1593 0.15005 ± 0.00646 0.27233 ± 0.00442 17.017 ± 0.213 % 81.044 ± 0.248 %
|
| 298 |
+
99 8.7525 ± 0.1612 0.14907 ± 0.00641 0.27130 ± 0.00437 16.952 ± 0.212 % 80.974 ± 0.247 %
|
| 299 |
+
100 8.7854 ± 0.1610 0.14794 ± 0.00636 0.27034 ± 0.00434 16.897 ± 0.211 % 80.965 ± 0.246 %
|
| 300 |
+
101 8.8103 ± 0.1608 0.14594 ± 0.00632 0.26994 ± 0.00437 16.856 ± 0.210 % 81.013 ± 0.244 %
|
| 301 |
+
102 8.8793 ± 0.1618 0.14584 ± 0.00628 0.26996 ± 0.00434 16.817 ± 0.208 % 81.015 ± 0.243 %
|
| 302 |
+
103 8.8488 ± 0.1606 0.14577 ± 0.00624 0.26919 ± 0.00431 16.824 ± 0.208 % 81.009 ± 0.242 %
|
| 303 |
+
104 8.7977 ± 0.1587 0.14749 ± 0.00624 0.27088 ± 0.00433 16.960 ± 0.208 % 81.029 ± 0.241 %
|
| 304 |
+
105 8.6805 ± 0.1556 0.14859 ± 0.00623 0.27211 ± 0.00434 17.104 ± 0.208 % 81.083 ± 0.239 %
|
| 305 |
+
106 8.5383 ± 0.1520 0.14938 ± 0.00620 0.27186 ± 0.00432 17.189 ± 0.207 % 81.173 ± 0.238 %
|
| 306 |
+
107 8.5937 ± 0.1522 0.14811 ± 0.00615 0.27032 ± 0.00428 17.123 ± 0.206 % 81.162 ± 0.237 %
|
| 307 |
+
108 8.5995 ± 0.1515 0.14700 ± 0.00611 0.26940 ± 0.00425 17.087 ± 0.205 % 81.187 ± 0.236 %
|
| 308 |
+
109 8.6199 ± 0.1512 0.14665 ± 0.00607 0.26860 ± 0.00421 17.049 ± 0.203 % 81.180 ± 0.234 %
|
| 309 |
+
110 8.6516 ± 0.1511 0.14563 ± 0.00603 0.26761 ± 0.00418 17.007 ± 0.202 % 81.162 ± 0.233 %
|
| 310 |
+
111 8.6991 ± 0.1512 0.14479 ± 0.00599 0.26664 ± 0.00415 16.956 ± 0.201 % 81.148 ± 0.232 %
|
| 311 |
+
112 8.6995 ± 0.1504 0.14350 ± 0.00594 0.26538 ± 0.00411 16.911 ± 0.200 % 81.148 ± 0.231 %
|
| 312 |
+
113 8.7070 ± 0.1497 0.14298 ± 0.00590 0.26406 ± 0.00408 16.858 ± 0.199 % 81.176 ± 0.230 %
|
| 313 |
+
114 8.7193 ± 0.1493 0.14206 ± 0.00586 0.26292 ± 0.00404 16.802 ± 0.198 % 81.204 ± 0.229 %
|
| 314 |
+
115 8.7009 ± 0.1483 0.14229 ± 0.00583 0.26303 ± 0.00402 16.812 ± 0.197 % 81.221 ± 0.228 %
|
| 315 |
+
116 8.7203 ± 0.1481 0.14591 ± 0.00585 0.26779 ± 0.00404 17.013 ± 0.197 % 81.105 ± 0.228 %
|
| 316 |
+
117 8.6933 ± 0.1468 0.15638 ± 0.00594 0.27735 ± 0.00416 17.527 ± 0.198 % 80.942 ± 0.227 %
|
| 317 |
+
118 8.6681 ± 0.1456 0.16633 ± 0.00604 0.28529 ± 0.00425 17.917 ± 0.198 % 80.804 ± 0.227 %
|
| 318 |
+
119 8.6432 ± 0.1444 0.17690 ± 0.00614 0.29466 ± 0.00436 18.415 ± 0.200 % 80.623 ± 0.227 %
|
| 319 |
+
120 8.6202 ± 0.1432 0.18559 ± 0.00622 0.30270 ± 0.00444 18.781 ± 0.200 % 80.458 ± 0.227 %
|
| 320 |
+
121 8.5908 ± 0.1421 0.19381 ± 0.00628 0.31141 ± 0.00453 19.217 ± 0.200 % 80.301 ± 0.226 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 8.590786 ± 0.142066
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 92.64%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.193806 ± 0.006281
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.213861 ± 0.007625
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 1.513547 ± 0.056230
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.311413 ± 0.004532
|
| 332 |
+
Maximum KLD: 19.883234
|
| 333 |
+
99.9% KLD: 7.724108
|
| 334 |
+
99.0% KLD: 4.343384
|
| 335 |
+
95.0% KLD: 1.484256
|
| 336 |
+
90.0% KLD: 0.644883
|
| 337 |
+
Median KLD: 0.082115
|
| 338 |
+
10.0% KLD: 0.000529
|
| 339 |
+
5.0% KLD: 0.000084
|
| 340 |
+
1.0% KLD: 0.000005
|
| 341 |
+
0.1% KLD: 0.000000
|
| 342 |
+
Minimum KLD: -0.000003
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -4.196 ± 0.107 %
|
| 346 |
+
Maximum Δp: 99.923%
|
| 347 |
+
99.9% Δp: 76.443%
|
| 348 |
+
99.0% Δp: 34.084%
|
| 349 |
+
95.0% Δp: 13.223%
|
| 350 |
+
90.0% Δp: 6.471%
|
| 351 |
+
75.0% Δp: 0.556%
|
| 352 |
+
Median Δp: -0.039%
|
| 353 |
+
25.0% Δp: -3.596%
|
| 354 |
+
10.0% Δp: -17.933%
|
| 355 |
+
5.0% Δp: -38.879%
|
| 356 |
+
1.0% Δp: -91.576%
|
| 357 |
+
0.1% Δp: -99.161%
|
| 358 |
+
Minimum Δp: -99.964%
|
| 359 |
+
RMS Δp : 19.217 ± 0.200 %
|
| 360 |
+
Same top p: 80.301 ± 0.226 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 30860.28 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 66866.53 ms / 61952 tokens ( 1.08 ms per token, 926.50 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 80620.39 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1031 + (22682 = 21101 + 272 + 1308) + 421 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 951 + (22760 = 18866 + 720 + 3174) + 423 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 79193 = 79089 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/Q3_K_M/MiniMax-M2.5-Q3_K_M.md
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q3_K_M (unsloth)
|
| 2 |
+
|
| 3 |
+
101.76 GiB (3.82 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q3_K_M/MiniMax-M2.5-Q3_K_M-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 55121 used, -31249 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 54391 used, -30519 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 109513 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 63817 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37988 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7910 MiB used, 15961 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1722 MiB used, 22148 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 13 layers ( 1 overflowing), 22678 MiB used, 1193 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 50 layers (41 overflowing), 22487 MiB used, 1384 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 5.03 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q3_K_M/MiniMax-M2.5-Q3_K_M-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 12
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q3_K: 249 tensors
|
| 87 |
+
llama_model_loader: - type q4_K: 181 tensors
|
| 88 |
+
llama_model_loader: - type q5_K: 5 tensors
|
| 89 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 90 |
+
print_info: file format = GGUF V3 (latest)
|
| 91 |
+
print_info: file type = Q3_K - Medium
|
| 92 |
+
print_info: file size = 101.76 GiB (3.82 BPW)
|
| 93 |
+
load: 0 unused tokens
|
| 94 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 95 |
+
load: printing all EOG tokens:
|
| 96 |
+
load: - 200004 ('<fim_pad>')
|
| 97 |
+
load: - 200005 ('<reponame>')
|
| 98 |
+
load: - 200020 ('[e~[')
|
| 99 |
+
load: special tokens cache size = 54
|
| 100 |
+
load: token to piece cache size = 1.3355 MB
|
| 101 |
+
print_info: arch = minimax-m2
|
| 102 |
+
print_info: vocab_only = 0
|
| 103 |
+
print_info: no_alloc = 0
|
| 104 |
+
print_info: n_ctx_train = 196608
|
| 105 |
+
print_info: n_embd = 3072
|
| 106 |
+
print_info: n_embd_inp = 3072
|
| 107 |
+
print_info: n_layer = 62
|
| 108 |
+
print_info: n_head = 48
|
| 109 |
+
print_info: n_head_kv = 8
|
| 110 |
+
print_info: n_rot = 64
|
| 111 |
+
print_info: n_swa = 0
|
| 112 |
+
print_info: is_swa_any = 0
|
| 113 |
+
print_info: n_embd_head_k = 128
|
| 114 |
+
print_info: n_embd_head_v = 128
|
| 115 |
+
print_info: n_gqa = 6
|
| 116 |
+
print_info: n_embd_k_gqa = 1024
|
| 117 |
+
print_info: n_embd_v_gqa = 1024
|
| 118 |
+
print_info: f_norm_eps = 0.0e+00
|
| 119 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 120 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 121 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 122 |
+
print_info: f_logit_scale = 0.0e+00
|
| 123 |
+
print_info: f_attn_scale = 0.0e+00
|
| 124 |
+
print_info: n_ff = 1536
|
| 125 |
+
print_info: n_expert = 256
|
| 126 |
+
print_info: n_expert_used = 8
|
| 127 |
+
print_info: n_expert_groups = 0
|
| 128 |
+
print_info: n_group_used = 0
|
| 129 |
+
print_info: causal attn = 1
|
| 130 |
+
print_info: pooling type = 0
|
| 131 |
+
print_info: rope type = 2
|
| 132 |
+
print_info: rope scaling = linear
|
| 133 |
+
print_info: freq_base_train = 5000000.0
|
| 134 |
+
print_info: freq_scale_train = 1
|
| 135 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 136 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 137 |
+
print_info: rope_finetuned = unknown
|
| 138 |
+
print_info: model type = 230B.A10B
|
| 139 |
+
print_info: model params = 228.69 B
|
| 140 |
+
print_info: general.name = Minimax-M2.5
|
| 141 |
+
print_info: vocab type = BPE
|
| 142 |
+
print_info: n_vocab = 200064
|
| 143 |
+
print_info: n_merges = 199744
|
| 144 |
+
print_info: BOS token = 200034 ']~!b['
|
| 145 |
+
print_info: EOS token = 200020 '[e~['
|
| 146 |
+
print_info: UNK token = 200021 ']!d~['
|
| 147 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 148 |
+
print_info: LF token = 10 'Ċ'
|
| 149 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 150 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 151 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 152 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 155 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 156 |
+
print_info: EOG token = 200020 '[e~['
|
| 157 |
+
print_info: max token length = 256
|
| 158 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 159 |
+
load_tensors: offloading output layer to GPU
|
| 160 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 161 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 46719.44 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 47027.86 MiB
|
| 164 |
+
load_tensors: CPU_Mapped model buffer size = 9950.45 MiB
|
| 165 |
+
load_tensors: CUDA0 model buffer size = 20891.29 MiB
|
| 166 |
+
load_tensors: CUDA1 model buffer size = 18529.31 MiB
|
| 167 |
+
....................................................................................................
|
| 168 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 169 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 170 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 171 |
+
llama_context: constructing llama_context
|
| 172 |
+
llama_context: n_seq_max = 8
|
| 173 |
+
llama_context: n_ctx = 4096
|
| 174 |
+
llama_context: n_ctx_seq = 512
|
| 175 |
+
llama_context: n_batch = 4096
|
| 176 |
+
llama_context: n_ubatch = 4096
|
| 177 |
+
llama_context: causal_attn = 1
|
| 178 |
+
llama_context: flash_attn = enabled
|
| 179 |
+
llama_context: kv_unified = false
|
| 180 |
+
llama_context: freq_base = 5000000.0
|
| 181 |
+
llama_context: freq_scale = 1
|
| 182 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 183 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 184 |
+
llama_kv_cache: CUDA0 KV buffer size = 208.00 MiB
|
| 185 |
+
llama_kv_cache: CUDA1 KV buffer size = 784.00 MiB
|
| 186 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 187 |
+
sched_reserve: reserving ...
|
| 188 |
+
sched_reserve: CUDA0 compute buffer size = 1579.00 MiB
|
| 189 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 190 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 191 |
+
sched_reserve: graph nodes = 4099
|
| 192 |
+
sched_reserve: graph splits = 165 (with bs=4096), 87 (with bs=1)
|
| 193 |
+
sched_reserve: reserve took 22.99 ms, sched copies = 1
|
| 194 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 195 |
+
|
| 196 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 197 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 198 |
+
kl_divergence: 6.21 seconds per pass - ETA 1.55 minutes
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
1 6.3629 ± 1.2215 -0.00159 ± 0.03200 0.06052 ± 0.00949 9.366 ± 1.719 % 90.980 ± 1.797 %
|
| 202 |
+
2 4.7038 ± 0.5704 0.00616 ± 0.01987 0.04340 ± 0.00521 7.201 ± 1.134 % 92.353 ± 1.178 %
|
| 203 |
+
3 4.5691 ± 0.4586 0.01783 ± 0.01648 0.05242 ± 0.00509 7.670 ± 0.801 % 91.895 ± 0.987 %
|
| 204 |
+
4 5.1755 ± 0.4642 0.00916 ± 0.01445 0.05430 ± 0.00442 7.553 ± 0.668 % 90.980 ± 0.897 %
|
| 205 |
+
5 5.0223 ± 0.4046 0.02567 ± 0.01526 0.06459 ± 0.00647 8.489 ± 0.707 % 90.824 ± 0.809 %
|
| 206 |
+
6 6.0813 ± 0.4766 0.01404 ± 0.01402 0.06953 ± 0.00563 8.087 ± 0.622 % 90.261 ± 0.758 %
|
| 207 |
+
7 5.6779 ± 0.3984 0.01724 ± 0.01299 0.07892 ± 0.00539 9.083 ± 0.564 % 89.580 ± 0.723 %
|
| 208 |
+
8 6.3968 ± 0.4261 0.01295 ± 0.01205 0.07663 ± 0.00478 8.703 ± 0.517 % 89.559 ± 0.677 %
|
| 209 |
+
9 6.2808 ± 0.3899 0.01393 ± 0.01093 0.07285 ± 0.00430 8.330 ± 0.481 % 89.325 ± 0.645 %
|
| 210 |
+
10 5.7594 ± 0.3325 0.01631 ± 0.01007 0.06986 ± 0.00391 8.291 ± 0.441 % 89.412 ± 0.609 %
|
| 211 |
+
11 6.3110 ± 0.3522 0.01595 ± 0.00956 0.06955 ± 0.00361 8.126 ± 0.412 % 89.198 ± 0.586 %
|
| 212 |
+
12 7.0158 ± 0.3811 0.02034 ± 0.00906 0.06926 ± 0.00335 7.962 ± 0.389 % 88.856 ± 0.569 %
|
| 213 |
+
13 7.2758 ± 0.3764 0.01946 ± 0.00847 0.06702 ± 0.00312 7.741 ± 0.370 % 88.929 ± 0.545 %
|
| 214 |
+
14 7.8824 ± 0.3983 0.02429 ± 0.00835 0.06817 ± 0.00310 7.784 ± 0.373 % 88.711 ± 0.530 %
|
| 215 |
+
15 8.2454 ± 0.4029 0.02319 ± 0.00798 0.06733 ± 0.00291 7.655 ± 0.356 % 88.680 ± 0.512 %
|
| 216 |
+
16 8.5099 ± 0.4025 0.02256 ± 0.00757 0.06512 ± 0.00274 7.486 ± 0.342 % 88.824 ± 0.493 %
|
| 217 |
+
17 8.7581 ± 0.4046 0.02420 ± 0.00751 0.06817 ± 0.00272 7.398 ± 0.328 % 88.489 ± 0.485 %
|
| 218 |
+
18 8.2497 ± 0.3677 0.02361 ± 0.00728 0.06759 ± 0.00262 7.299 ± 0.316 % 88.889 ± 0.464 %
|
| 219 |
+
19 8.3766 ± 0.3625 0.02308 ± 0.00707 0.06641 ± 0.00249 7.329 ± 0.302 % 88.937 ± 0.451 %
|
| 220 |
+
20 8.4128 ± 0.3548 0.01952 ± 0.00709 0.06843 ± 0.00246 7.444 ± 0.298 % 89.000 ± 0.438 %
|
| 221 |
+
21 8.3718 ± 0.3442 0.01720 ± 0.00686 0.06776 ± 0.00236 7.432 ± 0.286 % 89.076 ± 0.426 %
|
| 222 |
+
22 8.7064 ± 0.3529 0.01871 ± 0.00672 0.06871 ± 0.00229 7.395 ± 0.276 % 88.895 ± 0.420 %
|
| 223 |
+
23 8.7117 ± 0.3465 0.01811 ± 0.00700 0.07357 ± 0.00259 7.733 ± 0.288 % 88.815 ± 0.412 %
|
| 224 |
+
24 9.1035 ± 0.3560 0.01604 ± 0.00679 0.07289 ± 0.00249 7.644 ± 0.281 % 88.873 ± 0.402 %
|
| 225 |
+
25 9.0711 ± 0.3481 0.01387 ± 0.00669 0.07455 ± 0.00251 7.759 ± 0.278 % 88.784 ± 0.395 %
|
| 226 |
+
26 8.5310 ± 0.3177 0.02108 ± 0.00697 0.08648 ± 0.00322 8.988 ± 0.317 % 88.733 ± 0.388 %
|
| 227 |
+
27 8.2232 ± 0.2983 0.03827 ± 0.00744 0.10128 ± 0.00385 10.563 ± 0.352 % 88.308 ± 0.387 %
|
| 228 |
+
28 8.3340 ± 0.2975 0.03798 ± 0.00728 0.10082 ± 0.00372 10.490 ± 0.343 % 88.221 ± 0.382 %
|
| 229 |
+
29 8.2535 ± 0.2895 0.03774 ± 0.00710 0.09961 ± 0.00361 10.414 ± 0.336 % 88.357 ± 0.373 %
|
| 230 |
+
30 7.7201 ± 0.2639 0.03813 ± 0.00691 0.09731 ± 0.00351 10.334 ± 0.329 % 88.706 ± 0.362 %
|
| 231 |
+
31 7.2679 ± 0.2422 0.03714 ± 0.00671 0.09599 ± 0.00345 10.296 ± 0.325 % 88.982 ± 0.352 %
|
| 232 |
+
32 7.0855 ± 0.2305 0.03697 ± 0.00654 0.09442 ± 0.00334 10.234 ± 0.317 % 89.081 ± 0.345 %
|
| 233 |
+
33 6.9480 ± 0.2210 0.03725 ± 0.00640 0.09315 ± 0.00325 10.177 ± 0.310 % 89.138 ± 0.339 %
|
| 234 |
+
34 7.1471 ± 0.2253 0.03849 ± 0.00634 0.09480 ± 0.00319 10.121 ± 0.303 % 89.008 ± 0.336 %
|
| 235 |
+
35 7.2737 ± 0.2281 0.04141 ± 0.00633 0.09778 ± 0.00318 10.241 ± 0.296 % 88.762 ± 0.334 %
|
| 236 |
+
36 7.3348 ± 0.2275 0.04090 ± 0.00623 0.09705 ± 0.00310 10.160 ± 0.291 % 88.791 ± 0.329 %
|
| 237 |
+
37 7.3733 ± 0.2258 0.04384 ± 0.00625 0.10074 ± 0.00326 10.462 ± 0.294 % 88.702 ± 0.326 %
|
| 238 |
+
38 7.5896 ± 0.2305 0.04302 ± 0.00613 0.09981 ± 0.00318 10.381 ± 0.289 % 88.700 ± 0.322 %
|
| 239 |
+
39 7.5441 ± 0.2258 0.04395 ± 0.00607 0.10244 ± 0.00321 10.623 ± 0.288 % 88.648 ± 0.318 %
|
| 240 |
+
40 7.3660 ± 0.2162 0.05499 ± 0.00625 0.11283 ± 0.00346 11.468 ± 0.295 % 88.392 ± 0.317 %
|
| 241 |
+
41 7.2159 ± 0.2079 0.06522 ± 0.00651 0.12595 ± 0.00383 12.393 ± 0.300 % 88.063 ± 0.317 %
|
| 242 |
+
42 7.0379 ± 0.1991 0.07157 ± 0.00674 0.13664 ± 0.00409 13.163 ± 0.304 % 87.843 ± 0.316 %
|
| 243 |
+
43 6.8748 ± 0.1912 0.07926 ± 0.00692 0.14626 ± 0.00444 13.732 ± 0.307 % 87.706 ± 0.314 %
|
| 244 |
+
44 6.8108 ± 0.1864 0.07623 ± 0.00680 0.14427 ± 0.00434 13.624 ± 0.303 % 87.718 ± 0.310 %
|
| 245 |
+
45 6.9518 ± 0.1891 0.07483 ± 0.00671 0.14356 ± 0.00426 13.511 ± 0.299 % 87.651 ± 0.307 %
|
| 246 |
+
46 7.0895 ± 0.1910 0.07236 ± 0.00659 0.14173 ± 0.00417 13.388 ± 0.295 % 87.621 ± 0.304 %
|
| 247 |
+
47 7.2446 ± 0.1937 0.07180 ± 0.00646 0.13970 ± 0.00408 13.261 ± 0.292 % 87.685 ± 0.300 %
|
| 248 |
+
48 7.1135 ± 0.1872 0.07019 ± 0.00635 0.13752 ± 0.00400 13.152 ± 0.288 % 87.778 ± 0.296 %
|
| 249 |
+
49 7.2337 ± 0.1885 0.07193 ± 0.00634 0.13942 ± 0.00400 13.147 ± 0.285 % 87.683 ± 0.294 %
|
| 250 |
+
50 7.3326 ± 0.1903 0.07136 ± 0.00624 0.13825 ± 0.00393 13.044 ± 0.282 % 87.671 ± 0.291 %
|
| 251 |
+
51 7.4431 ± 0.1916 0.07091 ± 0.00613 0.13666 ± 0.00385 12.940 ± 0.278 % 87.712 ± 0.288 %
|
| 252 |
+
52 7.5160 ± 0.1915 0.07136 ± 0.00607 0.13643 ± 0.00378 12.861 ± 0.275 % 87.579 ± 0.286 %
|
| 253 |
+
53 7.6249 ± 0.1924 0.06995 ± 0.00598 0.13513 ± 0.00372 12.786 ± 0.272 % 87.577 ± 0.284 %
|
| 254 |
+
54 7.6769 ± 0.1915 0.06896 ± 0.00589 0.13358 ± 0.00365 12.687 ± 0.269 % 87.589 ± 0.281 %
|
| 255 |
+
55 7.7280 ± 0.1908 0.06857 ± 0.00579 0.13206 ± 0.00358 12.594 ± 0.266 % 87.586 ± 0.278 %
|
| 256 |
+
56 7.7605 ± 0.1899 0.06704 ± 0.00570 0.13058 ± 0.00352 12.499 ± 0.263 % 87.619 ± 0.276 %
|
| 257 |
+
57 7.7705 ± 0.1885 0.06839 ± 0.00568 0.13114 ± 0.00349 12.480 ± 0.260 % 87.582 ± 0.274 %
|
| 258 |
+
58 7.7699 ± 0.1867 0.06694 ± 0.00561 0.12999 ± 0.00343 12.416 ± 0.258 % 87.586 ± 0.271 %
|
| 259 |
+
59 7.7166 ± 0.1835 0.06519 ± 0.00553 0.12824 ± 0.00338 12.327 ± 0.255 % 87.690 ± 0.268 %
|
| 260 |
+
60 7.7299 ± 0.1824 0.06542 ± 0.00547 0.12748 ± 0.00333 12.275 ± 0.252 % 87.680 ± 0.266 %
|
| 261 |
+
61 7.7781 ± 0.1820 0.06501 ± 0.00540 0.12665 ± 0.00328 12.209 ± 0.250 % 87.676 ± 0.264 %
|
| 262 |
+
62 7.7397 ± 0.1799 0.06332 ± 0.00536 0.12596 ± 0.00324 12.166 ± 0.247 % 87.755 ± 0.261 %
|
| 263 |
+
63 7.7840 ± 0.1801 0.06271 ± 0.00531 0.12585 ± 0.00321 12.102 ± 0.245 % 87.700 ± 0.259 %
|
| 264 |
+
64 7.7572 ± 0.1776 0.06183 ± 0.00526 0.12503 ± 0.00316 12.040 ± 0.242 % 87.690 ± 0.257 %
|
| 265 |
+
65 7.7449 ± 0.1760 0.06141 ± 0.00521 0.12453 ± 0.00312 11.990 ± 0.240 % 87.698 ± 0.255 %
|
| 266 |
+
66 7.7789 ± 0.1756 0.06079 ± 0.00516 0.12386 ± 0.00308 11.938 ± 0.238 % 87.701 ± 0.253 %
|
| 267 |
+
67 7.7910 ± 0.1748 0.06140 ± 0.00511 0.12344 ± 0.00304 11.884 ± 0.235 % 87.714 ± 0.251 %
|
| 268 |
+
68 7.7412 ± 0.1721 0.06096 ± 0.00506 0.12244 ± 0.00300 11.821 ± 0.233 % 87.785 ± 0.249 %
|
| 269 |
+
69 7.7684 ± 0.1716 0.06002 ± 0.00500 0.12168 ± 0.00295 11.753 ± 0.231 % 87.820 ± 0.247 %
|
| 270 |
+
70 7.7372 ± 0.1694 0.05978 ± 0.00495 0.12105 ± 0.00292 11.727 ± 0.229 % 87.804 ± 0.245 %
|
| 271 |
+
71 7.7149 ± 0.1678 0.05888 ± 0.00489 0.12015 ± 0.00288 11.670 ± 0.227 % 87.871 ± 0.243 %
|
| 272 |
+
72 7.7297 ± 0.1672 0.05862 ± 0.00487 0.11968 ± 0.00285 11.638 ± 0.225 % 87.919 ± 0.241 %
|
| 273 |
+
73 7.7315 ± 0.1660 0.05812 ± 0.00482 0.11901 ± 0.00281 11.593 ± 0.223 % 87.945 ± 0.239 %
|
| 274 |
+
74 7.7206 ± 0.1645 0.05754 ± 0.00478 0.11844 ± 0.00278 11.544 ± 0.221 % 87.986 ± 0.237 %
|
| 275 |
+
75 7.7242 ± 0.1635 0.05709 ± 0.00474 0.11889 ± 0.00276 11.546 ± 0.219 % 87.937 ± 0.236 %
|
| 276 |
+
76 7.7862 ± 0.1639 0.05660 ± 0.00470 0.11824 ± 0.00272 11.498 ± 0.218 % 87.957 ± 0.234 %
|
| 277 |
+
77 7.7841 ± 0.1629 0.05660 ± 0.00466 0.11773 ± 0.00269 11.461 ± 0.216 % 87.965 ± 0.232 %
|
| 278 |
+
78 7.7906 ± 0.1620 0.05561 ± 0.00462 0.11711 ± 0.00266 11.412 ± 0.214 % 87.989 ± 0.231 %
|
| 279 |
+
79 7.8032 ± 0.1614 0.05576 ± 0.00459 0.11675 ± 0.00263 11.376 ± 0.212 % 87.972 ± 0.229 %
|
| 280 |
+
80 7.7943 ± 0.1605 0.05438 ± 0.00457 0.11676 ± 0.00260 11.359 ± 0.210 % 87.961 ± 0.228 %
|
| 281 |
+
81 7.7689 ± 0.1590 0.05421 ± 0.00453 0.11621 ± 0.00258 11.311 ± 0.209 % 88.008 ± 0.226 %
|
| 282 |
+
82 7.7501 ± 0.1575 0.05472 ± 0.00449 0.11555 ± 0.00255 11.280 ± 0.207 % 88.039 ± 0.224 %
|
| 283 |
+
83 7.7809 ± 0.1570 0.05437 ± 0.00444 0.11484 ± 0.00252 11.229 ± 0.205 % 88.065 ± 0.223 %
|
| 284 |
+
84 7.7935 ± 0.1560 0.05381 ± 0.00440 0.11409 ± 0.00249 11.179 ± 0.204 % 88.053 ± 0.222 %
|
| 285 |
+
85 7.7865 ± 0.1546 0.05346 ± 0.00436 0.11329 ± 0.00246 11.133 ± 0.202 % 88.074 ± 0.220 %
|
| 286 |
+
86 7.7095 ± 0.1517 0.05256 ± 0.00432 0.11258 ± 0.00243 11.094 ± 0.201 % 88.121 ± 0.218 %
|
| 287 |
+
87 7.6465 ± 0.1491 0.05216 ± 0.00428 0.11191 ± 0.00241 11.060 ± 0.199 % 88.145 ± 0.217 %
|
| 288 |
+
88 7.5852 ± 0.1466 0.05245 ± 0.00424 0.11139 ± 0.00238 11.023 ± 0.198 % 88.168 ± 0.216 %
|
| 289 |
+
89 7.5078 ± 0.1438 0.05217 ± 0.00420 0.11072 ± 0.00236 10.990 ± 0.197 % 88.231 ± 0.214 %
|
| 290 |
+
90 7.4493 ± 0.1415 0.05205 ± 0.00417 0.11006 ± 0.00234 10.946 ± 0.195 % 88.288 ± 0.212 %
|
| 291 |
+
91 7.3935 ± 0.1394 0.05164 ± 0.00413 0.10944 ± 0.00231 10.911 ± 0.194 % 88.326 ± 0.211 %
|
| 292 |
+
92 7.3305 ± 0.1370 0.05113 ± 0.00409 0.10883 ± 0.00229 10.879 ± 0.192 % 88.303 ± 0.210 %
|
| 293 |
+
93 7.3480 ± 0.1368 0.05102 ± 0.00407 0.10910 ± 0.00228 10.874 ± 0.191 % 88.286 ± 0.209 %
|
| 294 |
+
94 7.3780 ± 0.1366 0.05077 ± 0.00404 0.10841 ± 0.00226 10.829 ± 0.190 % 88.323 ± 0.207 %
|
| 295 |
+
95 7.4916 ± 0.1383 0.05085 ± 0.00401 0.10814 ± 0.00224 10.787 ± 0.189 % 88.322 ± 0.206 %
|
| 296 |
+
96 7.5824 ± 0.1394 0.04993 ± 0.00398 0.10782 ± 0.00221 10.751 ± 0.188 % 88.243 ± 0.206 %
|
| 297 |
+
97 7.6597 ± 0.1402 0.04941 ± 0.00395 0.10725 ± 0.00219 10.706 ± 0.187 % 88.203 ± 0.205 %
|
| 298 |
+
98 7.8011 ± 0.1426 0.04957 ± 0.00392 0.10668 ± 0.00217 10.658 ± 0.186 % 88.175 ± 0.204 %
|
| 299 |
+
99 7.9205 ± 0.1444 0.04918 ± 0.00389 0.10634 ± 0.00215 10.616 ± 0.184 % 88.132 ± 0.204 %
|
| 300 |
+
100 7.9521 ± 0.1443 0.04829 ± 0.00386 0.10622 ± 0.00213 10.583 ± 0.183 % 88.125 ± 0.203 %
|
| 301 |
+
101 7.9833 ± 0.1443 0.04737 ± 0.00385 0.10662 ± 0.00219 10.595 ± 0.183 % 88.119 ± 0.202 %
|
| 302 |
+
102 8.0477 ± 0.1453 0.04751 ± 0.00383 0.10651 ± 0.00217 10.580 ± 0.182 % 88.105 ± 0.201 %
|
| 303 |
+
103 8.0195 ± 0.1442 0.04737 ± 0.00380 0.10604 ± 0.00215 10.556 ± 0.180 % 88.155 ± 0.199 %
|
| 304 |
+
104 7.9607 ± 0.1423 0.04751 ± 0.00380 0.10636 ± 0.00215 10.614 ± 0.180 % 88.179 ± 0.198 %
|
| 305 |
+
105 7.8546 ± 0.1395 0.04860 ± 0.00381 0.10715 ± 0.00216 10.711 ± 0.180 % 88.224 ± 0.197 %
|
| 306 |
+
106 7.7206 ± 0.1362 0.04870 ± 0.00380 0.10687 ± 0.00216 10.723 ± 0.179 % 88.309 ± 0.195 %
|
| 307 |
+
107 7.7774 ± 0.1365 0.04830 ± 0.00377 0.10627 ± 0.00214 10.685 ± 0.178 % 88.309 ± 0.195 %
|
| 308 |
+
108 7.7880 ± 0.1360 0.04788 ± 0.00375 0.10586 ± 0.00212 10.657 ± 0.177 % 88.293 ± 0.194 %
|
| 309 |
+
109 7.8097 ± 0.1358 0.04794 ± 0.00372 0.10556 ± 0.00210 10.642 ± 0.176 % 88.271 ± 0.193 %
|
| 310 |
+
110 7.8449 ± 0.1358 0.04774 ± 0.00369 0.10507 ± 0.00208 10.610 ± 0.175 % 88.260 ± 0.192 %
|
| 311 |
+
111 7.8924 ± 0.1360 0.04748 ± 0.00367 0.10460 ± 0.00207 10.573 ± 0.174 % 88.260 ± 0.191 %
|
| 312 |
+
112 7.8974 ± 0.1354 0.04677 ± 0.00364 0.10403 ± 0.00205 10.540 ± 0.173 % 88.291 ± 0.190 %
|
| 313 |
+
113 7.9084 ± 0.1348 0.04678 ± 0.00362 0.10349 ± 0.00203 10.505 ± 0.172 % 88.294 ± 0.189 %
|
| 314 |
+
114 7.9220 ± 0.1346 0.04616 ± 0.00360 0.10311 ± 0.00201 10.472 ± 0.171 % 88.287 ± 0.189 %
|
| 315 |
+
115 7.9093 ± 0.1337 0.04691 ± 0.00359 0.10355 ± 0.00201 10.518 ± 0.171 % 88.280 ± 0.188 %
|
| 316 |
+
116 7.9188 ± 0.1333 0.04950 ± 0.00362 0.10645 ± 0.00205 10.696 ± 0.171 % 88.161 ± 0.188 %
|
| 317 |
+
117 7.8405 ± 0.1312 0.05313 ± 0.00364 0.10963 ± 0.00209 10.968 ± 0.172 % 88.111 ± 0.187 %
|
| 318 |
+
118 7.7602 ± 0.1290 0.05569 ± 0.00367 0.11233 ± 0.00213 11.158 ± 0.172 % 88.072 ± 0.187 %
|
| 319 |
+
119 7.6832 ± 0.1269 0.05916 ± 0.00372 0.11547 ± 0.00217 11.490 ± 0.174 % 88.014 ± 0.186 %
|
| 320 |
+
120 7.6116 ± 0.1250 0.06115 ± 0.00377 0.11932 ± 0.00224 11.751 ± 0.174 % 87.925 ± 0.186 %
|
| 321 |
+
121 7.5469 ± 0.1232 0.06425 ± 0.00381 0.12262 ± 0.00230 12.005 ± 0.175 % 87.850 ± 0.186 %
|
| 322 |
+
|
| 323 |
+
====== Perplexity statistics ======
|
| 324 |
+
Mean PPL(Q) : 7.546896 ± 0.123231
|
| 325 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 326 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 97.25%
|
| 327 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.064252 ± 0.003811
|
| 328 |
+
Mean PPL(Q)/PPL(base) : 1.066361 ± 0.004064
|
| 329 |
+
Mean PPL(Q)-PPL(base) : 0.469656 ± 0.029228
|
| 330 |
+
|
| 331 |
+
====== KL divergence statistics ======
|
| 332 |
+
Mean KLD: 0.122623 ± 0.002300
|
| 333 |
+
Maximum KLD: 14.374887
|
| 334 |
+
99.9% KLD: 4.831515
|
| 335 |
+
99.0% KLD: 2.028420
|
| 336 |
+
95.0% KLD: 0.478294
|
| 337 |
+
90.0% KLD: 0.218426
|
| 338 |
+
Median KLD: 0.027433
|
| 339 |
+
10.0% KLD: 0.000141
|
| 340 |
+
5.0% KLD: 0.000023
|
| 341 |
+
1.0% KLD: 0.000001
|
| 342 |
+
0.1% KLD: -0.000001
|
| 343 |
+
Minimum KLD: -0.000006
|
| 344 |
+
|
| 345 |
+
====== Token probability statistics ======
|
| 346 |
+
Mean Δp: -1.434 ± 0.068 %
|
| 347 |
+
Maximum Δp: 99.035%
|
| 348 |
+
99.9% Δp: 69.050%
|
| 349 |
+
99.0% Δp: 26.457%
|
| 350 |
+
95.0% Δp: 9.206%
|
| 351 |
+
90.0% Δp: 4.655%
|
| 352 |
+
75.0% Δp: 0.550%
|
| 353 |
+
Median Δp: -0.003%
|
| 354 |
+
25.0% Δp: -1.384%
|
| 355 |
+
10.0% Δp: -7.503%
|
| 356 |
+
5.0% Δp: -15.646%
|
| 357 |
+
1.0% Δp: -58.682%
|
| 358 |
+
0.1% Δp: -94.293%
|
| 359 |
+
Minimum Δp: -99.551%
|
| 360 |
+
RMS Δp : 12.005 ± 0.175 %
|
| 361 |
+
Same top p: 87.850 ± 0.186 %
|
| 362 |
+
|
| 363 |
+
llama_perf_context_print: load time = 41965.86 ms
|
| 364 |
+
llama_perf_context_print: prompt eval time = 78849.06 ms / 61952 tokens ( 1.27 ms per token, 785.70 tokens per second)
|
| 365 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 366 |
+
llama_perf_context_print: total time = 93746.14 ms / 61953 tokens
|
| 367 |
+
llama_perf_context_print: graphs reused = 0
|
| 368 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1035 + ( 22678 = 20891 + 208 + 1578) + 421 |
|
| 370 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1195 + ( 22487 = 18529 + 784 + 3174) + 451 |
|
| 371 |
+
llama_memory_breakdown_print: | - Host | 103801 = 103697 + 0 + 104 |
|
| 372 |
+
```
|
kld_data/unsloth/Q3_K_S/MiniMax-M2.5-Q3_K_S.md
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q3_K_S (unsloth)
|
| 2 |
+
|
| 3 |
+
91.91 GiB (3.45 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q3_K_S/MiniMax-M2.5-Q3_K_S-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 49703 used, -25831 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 49718 used, -25846 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 99421 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 53725 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 38314 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7439 MiB used, 16431 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1425 MiB used, 22445 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 15 layers ( 1 overflowing), 22769 MiB used, 1102 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 48 layers (38 overflowing), 22726 MiB used, 1145 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.84 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q3_K_S/MiniMax-M2.5-Q3_K_S-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 11
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q3_K: 435 tensors
|
| 87 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 88 |
+
print_info: file format = GGUF V3 (latest)
|
| 89 |
+
print_info: file type = Q3_K - Small
|
| 90 |
+
print_info: file size = 91.91 GiB (3.45 BPW)
|
| 91 |
+
load: 0 unused tokens
|
| 92 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 93 |
+
load: printing all EOG tokens:
|
| 94 |
+
load: - 200004 ('<fim_pad>')
|
| 95 |
+
load: - 200005 ('<reponame>')
|
| 96 |
+
load: - 200020 ('[e~[')
|
| 97 |
+
load: special tokens cache size = 54
|
| 98 |
+
load: token to piece cache size = 1.3355 MB
|
| 99 |
+
print_info: arch = minimax-m2
|
| 100 |
+
print_info: vocab_only = 0
|
| 101 |
+
print_info: no_alloc = 0
|
| 102 |
+
print_info: n_ctx_train = 196608
|
| 103 |
+
print_info: n_embd = 3072
|
| 104 |
+
print_info: n_embd_inp = 3072
|
| 105 |
+
print_info: n_layer = 62
|
| 106 |
+
print_info: n_head = 48
|
| 107 |
+
print_info: n_head_kv = 8
|
| 108 |
+
print_info: n_rot = 64
|
| 109 |
+
print_info: n_swa = 0
|
| 110 |
+
print_info: is_swa_any = 0
|
| 111 |
+
print_info: n_embd_head_k = 128
|
| 112 |
+
print_info: n_embd_head_v = 128
|
| 113 |
+
print_info: n_gqa = 6
|
| 114 |
+
print_info: n_embd_k_gqa = 1024
|
| 115 |
+
print_info: n_embd_v_gqa = 1024
|
| 116 |
+
print_info: f_norm_eps = 0.0e+00
|
| 117 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 118 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 119 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 120 |
+
print_info: f_logit_scale = 0.0e+00
|
| 121 |
+
print_info: f_attn_scale = 0.0e+00
|
| 122 |
+
print_info: n_ff = 1536
|
| 123 |
+
print_info: n_expert = 256
|
| 124 |
+
print_info: n_expert_used = 8
|
| 125 |
+
print_info: n_expert_groups = 0
|
| 126 |
+
print_info: n_group_used = 0
|
| 127 |
+
print_info: causal attn = 1
|
| 128 |
+
print_info: pooling type = 0
|
| 129 |
+
print_info: rope type = 2
|
| 130 |
+
print_info: rope scaling = linear
|
| 131 |
+
print_info: freq_base_train = 5000000.0
|
| 132 |
+
print_info: freq_scale_train = 1
|
| 133 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 134 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 135 |
+
print_info: rope_finetuned = unknown
|
| 136 |
+
print_info: model type = 230B.A10B
|
| 137 |
+
print_info: model params = 228.69 B
|
| 138 |
+
print_info: general.name = Minimax-M2.5
|
| 139 |
+
print_info: vocab type = BPE
|
| 140 |
+
print_info: n_vocab = 200064
|
| 141 |
+
print_info: n_merges = 199744
|
| 142 |
+
print_info: BOS token = 200034 ']~!b['
|
| 143 |
+
print_info: EOS token = 200020 '[e~['
|
| 144 |
+
print_info: UNK token = 200021 ']!d~['
|
| 145 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 146 |
+
print_info: LF token = 10 'Ċ'
|
| 147 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 148 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 149 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 150 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 151 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 152 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200020 '[e~['
|
| 155 |
+
print_info: max token length = 256
|
| 156 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 157 |
+
load_tensors: offloading output layer to GPU
|
| 158 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 159 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 46940.91 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 46670.97 MiB
|
| 162 |
+
load_tensors: CUDA0 model buffer size = 21103.48 MiB
|
| 163 |
+
load_tensors: CUDA1 model buffer size = 18800.45 MiB
|
| 164 |
+
....................................................................................................
|
| 165 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 166 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 167 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 168 |
+
llama_context: constructing llama_context
|
| 169 |
+
llama_context: n_seq_max = 8
|
| 170 |
+
llama_context: n_ctx = 4096
|
| 171 |
+
llama_context: n_ctx_seq = 512
|
| 172 |
+
llama_context: n_batch = 4096
|
| 173 |
+
llama_context: n_ubatch = 4096
|
| 174 |
+
llama_context: causal_attn = 1
|
| 175 |
+
llama_context: flash_attn = enabled
|
| 176 |
+
llama_context: kv_unified = false
|
| 177 |
+
llama_context: freq_base = 5000000.0
|
| 178 |
+
llama_context: freq_scale = 1
|
| 179 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 180 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 181 |
+
llama_kv_cache: CUDA0 KV buffer size = 240.00 MiB
|
| 182 |
+
llama_kv_cache: CUDA1 KV buffer size = 752.00 MiB
|
| 183 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 184 |
+
sched_reserve: reserving ...
|
| 185 |
+
sched_reserve: CUDA0 compute buffer size = 1426.00 MiB
|
| 186 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 187 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 188 |
+
sched_reserve: graph nodes = 4099
|
| 189 |
+
sched_reserve: graph splits = 151 (with bs=4096), 79 (with bs=1)
|
| 190 |
+
sched_reserve: reserve took 23.19 ms, sched copies = 1
|
| 191 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 192 |
+
|
| 193 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 194 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 195 |
+
kl_divergence: 5.85 seconds per pass - ETA 1.47 minutes
|
| 196 |
+
|
| 197 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 198 |
+
1 6.5674 ± 1.2704 0.03005 ± 0.03787 0.09729 ± 0.01192 10.314 ± 1.108 % 86.667 ± 2.133 %
|
| 199 |
+
2 4.7267 ± 0.5748 0.01102 ± 0.02372 0.07201 ± 0.00681 8.594 ± 0.790 % 89.608 ± 1.353 %
|
| 200 |
+
3 4.5633 ± 0.4578 0.01656 ± 0.02068 0.08533 ± 0.00691 9.817 ± 0.771 % 89.673 ± 1.101 %
|
| 201 |
+
4 5.2151 ± 0.4695 0.01677 ± 0.01775 0.08654 ± 0.00566 9.520 ± 0.646 % 89.608 ± 0.956 %
|
| 202 |
+
5 5.0721 ± 0.4118 0.03553 ± 0.01836 0.09154 ± 0.00566 9.627 ± 0.583 % 89.490 ± 0.859 %
|
| 203 |
+
6 6.2129 ± 0.4921 0.03544 ± 0.01678 0.10196 ± 0.00532 9.235 ± 0.516 % 88.301 ± 0.822 %
|
| 204 |
+
7 5.8330 ± 0.4135 0.04418 ± 0.01641 0.12401 ± 0.00765 10.710 ± 0.538 % 87.339 ± 0.787 %
|
| 205 |
+
8 6.5639 ± 0.4417 0.03874 ± 0.01522 0.12137 ± 0.00679 10.336 ± 0.491 % 86.667 ± 0.753 %
|
| 206 |
+
9 6.4198 ± 0.4023 0.03581 ± 0.01381 0.11512 ± 0.00610 9.901 ± 0.457 % 87.059 ± 0.701 %
|
| 207 |
+
10 5.8602 ± 0.3409 0.03366 ± 0.01263 0.11007 ± 0.00554 9.861 ± 0.421 % 87.294 ± 0.660 %
|
| 208 |
+
11 6.4204 ± 0.3609 0.03314 ± 0.01192 0.11015 ± 0.00510 9.690 ± 0.393 % 87.094 ± 0.633 %
|
| 209 |
+
12 7.1037 ± 0.3876 0.03279 ± 0.01135 0.10941 ± 0.00474 9.443 ± 0.371 % 86.797 ± 0.612 %
|
| 210 |
+
13 7.3584 ± 0.3824 0.03075 ± 0.01072 0.10636 ± 0.00441 9.208 ± 0.352 % 86.878 ± 0.587 %
|
| 211 |
+
14 7.9255 ± 0.4012 0.02973 ± 0.01047 0.10897 ± 0.00441 9.227 ± 0.355 % 86.359 ± 0.575 %
|
| 212 |
+
15 8.3034 ± 0.4065 0.03019 ± 0.01005 0.10819 ± 0.00414 9.141 ± 0.338 % 86.248 ± 0.557 %
|
| 213 |
+
16 8.5595 ± 0.4051 0.02837 ± 0.00957 0.10510 ± 0.00390 8.997 ± 0.323 % 86.127 ± 0.541 %
|
| 214 |
+
17 8.8267 ± 0.4080 0.03199 ± 0.00985 0.11055 ± 0.00397 8.963 ± 0.312 % 85.952 ± 0.528 %
|
| 215 |
+
18 8.3118 ± 0.3708 0.03112 ± 0.00957 0.11064 ± 0.00385 8.928 ± 0.304 % 86.209 ± 0.509 %
|
| 216 |
+
19 8.4155 ± 0.3645 0.02771 ± 0.00921 0.10775 ± 0.00366 8.857 ± 0.292 % 86.275 ± 0.494 %
|
| 217 |
+
20 8.4860 ± 0.3587 0.02818 ± 0.00913 0.11202 ± 0.00371 8.934 ± 0.281 % 86.216 ± 0.483 %
|
| 218 |
+
21 8.4405 ± 0.3475 0.02537 ± 0.00882 0.11088 ± 0.00356 8.941 ± 0.273 % 86.312 ± 0.470 %
|
| 219 |
+
22 8.7986 ± 0.3577 0.02926 ± 0.00860 0.11214 ± 0.00344 8.944 ± 0.265 % 86.025 ± 0.463 %
|
| 220 |
+
23 8.8248 ± 0.3519 0.03101 ± 0.00859 0.11570 ± 0.00346 9.221 ± 0.277 % 85.882 ± 0.455 %
|
| 221 |
+
24 9.2276 ± 0.3620 0.02958 ± 0.00835 0.11535 ± 0.00334 9.129 ± 0.269 % 85.866 ± 0.445 %
|
| 222 |
+
25 9.2178 ± 0.3549 0.02991 ± 0.00819 0.11878 ± 0.00335 9.317 ± 0.264 % 85.804 ± 0.437 %
|
| 223 |
+
26 8.7746 ± 0.3278 0.04924 ± 0.00858 0.13901 ± 0.00437 11.386 ± 0.343 % 85.535 ± 0.432 %
|
| 224 |
+
27 8.4719 ± 0.3085 0.06806 ± 0.00935 0.16353 ± 0.00559 13.023 ± 0.369 % 85.171 ± 0.428 %
|
| 225 |
+
28 8.5866 ± 0.3079 0.06783 ± 0.00917 0.16328 ± 0.00542 12.992 ± 0.359 % 85.098 ± 0.421 %
|
| 226 |
+
29 8.5306 ± 0.3013 0.07077 ± 0.00900 0.16243 ± 0.00527 12.926 ± 0.352 % 85.193 ± 0.413 %
|
| 227 |
+
30 7.9867 ± 0.2750 0.07209 ± 0.00884 0.16014 ± 0.00523 12.959 ± 0.350 % 85.621 ± 0.401 %
|
| 228 |
+
31 7.5205 ± 0.2526 0.07131 ± 0.00865 0.15864 ± 0.00516 12.979 ± 0.348 % 85.933 ± 0.391 %
|
| 229 |
+
32 7.3330 ± 0.2404 0.07130 ± 0.00843 0.15641 ± 0.00501 12.919 ± 0.339 % 86.078 ± 0.383 %
|
| 230 |
+
33 7.1746 ± 0.2298 0.06935 ± 0.00826 0.15552 ± 0.00487 12.945 ± 0.330 % 86.061 ± 0.378 %
|
| 231 |
+
34 7.3930 ± 0.2349 0.07231 ± 0.00819 0.15833 ± 0.00477 12.941 ± 0.323 % 85.871 ± 0.374 %
|
| 232 |
+
35 7.5016 ± 0.2367 0.07225 ± 0.00820 0.16279 ± 0.00474 13.084 ± 0.317 % 85.602 ± 0.372 %
|
| 233 |
+
36 7.5584 ± 0.2360 0.07093 ± 0.00807 0.16144 ± 0.00463 12.971 ± 0.311 % 85.664 ± 0.366 %
|
| 234 |
+
37 7.6217 ± 0.2351 0.07698 ± 0.00811 0.16601 ± 0.00474 13.331 ± 0.314 % 85.607 ± 0.361 %
|
| 235 |
+
38 7.8385 ± 0.2397 0.07529 ± 0.00795 0.16462 ± 0.00462 13.222 ± 0.309 % 85.552 ± 0.357 %
|
| 236 |
+
39 7.8270 ± 0.2359 0.08077 ± 0.00793 0.16980 ± 0.00473 13.598 ± 0.310 % 85.500 ± 0.353 %
|
| 237 |
+
40 7.6820 ± 0.2272 0.09699 ± 0.00814 0.18676 ± 0.00507 14.556 ± 0.314 % 85.127 ± 0.352 %
|
| 238 |
+
41 7.5863 ± 0.2204 0.11528 ± 0.00855 0.20796 ± 0.00559 15.700 ± 0.320 % 84.687 ± 0.352 %
|
| 239 |
+
42 7.4490 ± 0.2126 0.12834 ± 0.00885 0.22429 ± 0.00595 16.614 ± 0.323 % 84.416 ± 0.350 %
|
| 240 |
+
43 7.3090 ± 0.2051 0.14050 ± 0.00906 0.23764 ± 0.00623 17.218 ± 0.323 % 84.195 ± 0.348 %
|
| 241 |
+
44 7.2307 ± 0.1996 0.13606 ± 0.00889 0.23432 ± 0.00610 17.080 ± 0.319 % 84.260 ± 0.344 %
|
| 242 |
+
45 7.3740 ± 0.2024 0.13379 ± 0.00874 0.23267 ± 0.00597 16.921 ± 0.315 % 84.148 ± 0.341 %
|
| 243 |
+
46 7.5150 ± 0.2043 0.13066 ± 0.00859 0.23052 ± 0.00586 16.769 ± 0.311 % 84.109 ± 0.338 %
|
| 244 |
+
47 7.6705 ± 0.2069 0.12893 ± 0.00843 0.22719 ± 0.00574 16.610 ± 0.308 % 84.147 ± 0.334 %
|
| 245 |
+
48 7.5193 ± 0.1996 0.12567 ± 0.00828 0.22419 ± 0.00562 16.497 ± 0.304 % 84.191 ± 0.330 %
|
| 246 |
+
49 7.6127 ± 0.1999 0.12299 ± 0.00821 0.22716 ± 0.00573 16.457 ± 0.300 % 84.106 ± 0.327 %
|
| 247 |
+
50 7.7155 ± 0.2014 0.12226 ± 0.00811 0.22592 ± 0.00563 16.360 ± 0.297 % 84.047 ± 0.324 %
|
| 248 |
+
51 7.8260 ± 0.2026 0.12108 ± 0.00797 0.22332 ± 0.00553 16.233 ± 0.294 % 84.121 ± 0.320 %
|
| 249 |
+
52 7.8884 ± 0.2020 0.11971 ± 0.00789 0.22297 ± 0.00543 16.148 ± 0.290 % 84.042 ± 0.318 %
|
| 250 |
+
53 7.9913 ± 0.2026 0.11689 ± 0.00779 0.22110 ± 0.00534 16.043 ± 0.286 % 83.966 ± 0.316 %
|
| 251 |
+
54 8.0450 ± 0.2016 0.11581 ± 0.00767 0.21873 ± 0.00524 15.925 ± 0.283 % 83.951 ± 0.313 %
|
| 252 |
+
55 8.0951 ± 0.2007 0.11498 ± 0.00754 0.21646 ± 0.00515 15.808 ± 0.281 % 83.950 ± 0.310 %
|
| 253 |
+
56 8.1301 ± 0.1998 0.11358 ± 0.00743 0.21416 ± 0.00506 15.688 ± 0.278 % 83.992 ± 0.307 %
|
| 254 |
+
57 8.1493 ± 0.1986 0.11598 ± 0.00742 0.21533 ± 0.00501 15.678 ± 0.275 % 83.949 ± 0.304 %
|
| 255 |
+
58 8.1496 ± 0.1968 0.11465 ± 0.00733 0.21340 ± 0.00493 15.590 ± 0.272 % 84.030 ± 0.301 %
|
| 256 |
+
59 8.0873 ± 0.1932 0.11211 ± 0.00722 0.21062 ± 0.00485 15.481 ± 0.269 % 84.108 ± 0.298 %
|
| 257 |
+
60 8.0939 ± 0.1918 0.11145 ± 0.00712 0.20905 ± 0.00478 15.418 ± 0.267 % 84.124 ± 0.295 %
|
| 258 |
+
61 8.1389 ± 0.1913 0.11035 ± 0.00703 0.20738 ± 0.00470 15.329 ± 0.264 % 84.127 ± 0.293 %
|
| 259 |
+
62 8.0914 ± 0.1887 0.10776 ± 0.00696 0.20578 ± 0.00464 15.262 ± 0.261 % 84.231 ± 0.290 %
|
| 260 |
+
63 8.1296 ± 0.1887 0.10615 ± 0.00689 0.20500 ± 0.00459 15.175 ± 0.259 % 84.233 ± 0.288 %
|
| 261 |
+
64 8.0924 ± 0.1858 0.10414 ± 0.00682 0.20380 ± 0.00452 15.101 ± 0.256 % 84.252 ± 0.285 %
|
| 262 |
+
65 8.0699 ± 0.1838 0.10252 ± 0.00676 0.20293 ± 0.00446 15.038 ± 0.254 % 84.259 ± 0.283 %
|
| 263 |
+
66 8.1011 ± 0.1833 0.10137 ± 0.00668 0.20177 ± 0.00440 14.956 ± 0.252 % 84.272 ± 0.281 %
|
| 264 |
+
67 8.1149 ± 0.1824 0.10214 ± 0.00662 0.20099 ± 0.00434 14.908 ± 0.249 % 84.302 ± 0.278 %
|
| 265 |
+
68 8.0596 ± 0.1796 0.10128 ± 0.00654 0.19938 ± 0.00429 14.835 ± 0.247 % 84.377 ± 0.276 %
|
| 266 |
+
69 8.0897 ± 0.1791 0.10054 ± 0.00647 0.19818 ± 0.00423 14.763 ± 0.245 % 84.388 ± 0.274 %
|
| 267 |
+
70 8.0512 ± 0.1766 0.09956 ± 0.00641 0.19724 ± 0.00417 14.724 ± 0.243 % 84.370 ± 0.272 %
|
| 268 |
+
71 8.0275 ± 0.1750 0.09860 ± 0.00634 0.19605 ± 0.00413 14.664 ± 0.241 % 84.430 ± 0.269 %
|
| 269 |
+
72 8.0410 ± 0.1743 0.09811 ± 0.00631 0.19507 ± 0.00408 14.617 ± 0.239 % 84.439 ± 0.268 %
|
| 270 |
+
73 8.0365 ± 0.1728 0.09682 ± 0.00624 0.19404 ± 0.00403 14.554 ± 0.237 % 84.443 ± 0.266 %
|
| 271 |
+
74 8.0253 ± 0.1713 0.09625 ± 0.00618 0.19310 ± 0.00397 14.494 ± 0.234 % 84.457 ± 0.264 %
|
| 272 |
+
75 8.0346 ± 0.1705 0.09650 ± 0.00614 0.19279 ± 0.00393 14.458 ± 0.232 % 84.439 ± 0.262 %
|
| 273 |
+
76 8.0937 ± 0.1707 0.09533 ± 0.00608 0.19212 ± 0.00389 14.401 ± 0.230 % 84.448 ± 0.260 %
|
| 274 |
+
77 8.0858 ± 0.1695 0.09463 ± 0.00604 0.19106 ± 0.00384 14.346 ± 0.229 % 84.507 ± 0.258 %
|
| 275 |
+
78 8.0966 ± 0.1688 0.09413 ± 0.00599 0.19004 ± 0.00380 14.278 ± 0.227 % 84.550 ± 0.256 %
|
| 276 |
+
79 8.1004 ± 0.1678 0.09315 ± 0.00594 0.18948 ± 0.00376 14.244 ± 0.225 % 84.532 ± 0.255 %
|
| 277 |
+
80 8.0924 ± 0.1671 0.09191 ± 0.00592 0.19017 ± 0.00375 14.224 ± 0.223 % 84.529 ± 0.253 %
|
| 278 |
+
81 8.0613 ± 0.1654 0.09116 ± 0.00587 0.18932 ± 0.00371 14.172 ± 0.222 % 84.580 ± 0.251 %
|
| 279 |
+
82 8.0431 ± 0.1639 0.09183 ± 0.00581 0.18840 ± 0.00367 14.120 ± 0.220 % 84.586 ± 0.250 %
|
| 280 |
+
83 8.0766 ± 0.1634 0.09167 ± 0.00576 0.18751 ± 0.00363 14.067 ± 0.218 % 84.555 ± 0.248 %
|
| 281 |
+
84 8.0881 ± 0.1623 0.09091 ± 0.00570 0.18630 ± 0.00359 14.009 ± 0.217 % 84.528 ± 0.247 %
|
| 282 |
+
85 8.0791 ± 0.1609 0.09035 ± 0.00565 0.18514 ± 0.00355 13.961 ± 0.215 % 84.558 ± 0.245 %
|
| 283 |
+
86 8.0026 ± 0.1579 0.08987 ± 0.00560 0.18424 ± 0.00351 13.916 ± 0.213 % 84.592 ± 0.244 %
|
| 284 |
+
87 7.9388 ± 0.1553 0.08968 ± 0.00555 0.18354 ± 0.00347 13.901 ± 0.212 % 84.629 ± 0.242 %
|
| 285 |
+
88 7.8733 ± 0.1527 0.08972 ± 0.00550 0.18274 ± 0.00344 13.856 ± 0.210 % 84.661 ± 0.241 %
|
| 286 |
+
89 7.7956 ± 0.1499 0.08978 ± 0.00545 0.18178 ± 0.00340 13.811 ± 0.208 % 84.746 ± 0.239 %
|
| 287 |
+
90 7.7400 ± 0.1476 0.09032 ± 0.00541 0.18112 ± 0.00337 13.780 ± 0.207 % 84.776 ± 0.237 %
|
| 288 |
+
91 7.6881 ± 0.1455 0.09070 ± 0.00537 0.18050 ± 0.00334 13.756 ± 0.206 % 84.805 ± 0.236 %
|
| 289 |
+
92 7.6286 ± 0.1432 0.09099 ± 0.00533 0.17999 ± 0.00331 13.738 ± 0.204 % 84.834 ± 0.234 %
|
| 290 |
+
93 7.6476 ± 0.1431 0.09098 ± 0.00533 0.18127 ± 0.00339 13.739 ± 0.203 % 84.816 ± 0.233 %
|
| 291 |
+
94 7.6775 ± 0.1428 0.09055 ± 0.00528 0.18014 ± 0.00336 13.678 ± 0.202 % 84.856 ± 0.232 %
|
| 292 |
+
95 7.7932 ± 0.1445 0.09032 ± 0.00525 0.17978 ± 0.00332 13.633 ± 0.200 % 84.826 ± 0.231 %
|
| 293 |
+
96 7.8930 ± 0.1458 0.09008 ± 0.00521 0.17940 ± 0.00329 13.585 ± 0.199 % 84.706 ± 0.230 %
|
| 294 |
+
97 7.9737 ± 0.1466 0.08959 ± 0.00516 0.17850 ± 0.00326 13.527 ± 0.198 % 84.661 ± 0.229 %
|
| 295 |
+
98 8.1223 ± 0.1492 0.08991 ± 0.00513 0.17759 ± 0.00323 13.469 ± 0.197 % 84.626 ± 0.228 %
|
| 296 |
+
99 8.2462 ± 0.1510 0.08949 ± 0.00509 0.17698 ± 0.00320 13.415 ± 0.196 % 84.615 ± 0.227 %
|
| 297 |
+
100 8.2737 ± 0.1507 0.08794 ± 0.00506 0.17674 ± 0.00317 13.378 ± 0.194 % 84.608 ± 0.226 %
|
| 298 |
+
101 8.3022 ± 0.1506 0.08654 ± 0.00503 0.17711 ± 0.00320 13.377 ± 0.194 % 84.620 ± 0.225 %
|
| 299 |
+
102 8.3696 ± 0.1517 0.08672 ± 0.00501 0.17712 ± 0.00318 13.356 ± 0.192 % 84.587 ± 0.224 %
|
| 300 |
+
103 8.3402 ± 0.1506 0.08657 ± 0.00497 0.17619 ± 0.00315 13.322 ± 0.191 % 84.622 ± 0.223 %
|
| 301 |
+
104 8.2786 ± 0.1486 0.08667 ± 0.00495 0.17668 ± 0.00314 13.439 ± 0.191 % 84.627 ± 0.221 %
|
| 302 |
+
105 8.1534 ± 0.1453 0.08594 ± 0.00495 0.17704 ± 0.00313 13.520 ± 0.191 % 84.691 ± 0.220 %
|
| 303 |
+
106 8.0171 ± 0.1419 0.08639 ± 0.00494 0.17667 ± 0.00313 13.557 ± 0.191 % 84.784 ± 0.218 %
|
| 304 |
+
107 8.0748 ± 0.1422 0.08583 ± 0.00490 0.17572 ± 0.00310 13.506 ± 0.190 % 84.812 ± 0.217 %
|
| 305 |
+
108 8.0823 ± 0.1416 0.08497 ± 0.00486 0.17516 ± 0.00307 13.478 ± 0.189 % 84.808 ± 0.216 %
|
| 306 |
+
109 8.1010 ± 0.1414 0.08456 ± 0.00483 0.17462 ± 0.00305 13.446 ± 0.187 % 84.817 ± 0.215 %
|
| 307 |
+
110 8.1345 ± 0.1413 0.08400 ± 0.00480 0.17392 ± 0.00302 13.415 ± 0.186 % 84.816 ± 0.214 %
|
| 308 |
+
111 8.1806 ± 0.1414 0.08334 ± 0.00477 0.17326 ± 0.00300 13.371 ± 0.185 % 84.780 ± 0.214 %
|
| 309 |
+
112 8.1842 ± 0.1408 0.08244 ± 0.00473 0.17238 ± 0.00297 13.331 ± 0.184 % 84.783 ± 0.213 %
|
| 310 |
+
113 8.1973 ± 0.1402 0.08266 ± 0.00470 0.17155 ± 0.00294 13.294 ± 0.183 % 84.779 ± 0.212 %
|
| 311 |
+
114 8.2101 ± 0.1399 0.08188 ± 0.00467 0.17079 ± 0.00292 13.254 ± 0.182 % 84.782 ± 0.211 %
|
| 312 |
+
115 8.1962 ± 0.1390 0.08254 ± 0.00466 0.17119 ± 0.00291 13.286 ± 0.181 % 84.750 ± 0.210 %
|
| 313 |
+
116 8.1986 ± 0.1384 0.08422 ± 0.00469 0.17536 ± 0.00296 13.483 ± 0.182 % 84.621 ± 0.210 %
|
| 314 |
+
117 8.1410 ± 0.1367 0.09074 ± 0.00476 0.18109 ± 0.00304 13.862 ± 0.183 % 84.511 ± 0.209 %
|
| 315 |
+
118 8.0798 ± 0.1348 0.09605 ± 0.00481 0.18591 ± 0.00310 14.151 ± 0.184 % 84.413 ± 0.209 %
|
| 316 |
+
119 8.0237 ± 0.1332 0.10253 ± 0.00492 0.19191 ± 0.00321 14.519 ± 0.185 % 84.333 ± 0.209 %
|
| 317 |
+
120 7.9762 ± 0.1316 0.10794 ± 0.00498 0.19750 ± 0.00327 14.865 ± 0.186 % 84.232 ± 0.208 %
|
| 318 |
+
121 7.9176 ± 0.1300 0.11221 ± 0.00502 0.20212 ± 0.00332 15.149 ± 0.187 % 84.152 ± 0.208 %
|
| 319 |
+
|
| 320 |
+
====== Perplexity statistics ======
|
| 321 |
+
Mean PPL(Q) : 7.917621 ± 0.129973
|
| 322 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 323 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 95.25%
|
| 324 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.112207 ± 0.005024
|
| 325 |
+
Mean PPL(Q)/PPL(base) : 1.118744 ± 0.005621
|
| 326 |
+
Mean PPL(Q)-PPL(base) : 0.840381 ± 0.040702
|
| 327 |
+
|
| 328 |
+
====== KL divergence statistics ======
|
| 329 |
+
Mean KLD: 0.202119 ± 0.003320
|
| 330 |
+
Maximum KLD: 15.605315
|
| 331 |
+
99.9% KLD: 6.406550
|
| 332 |
+
99.0% KLD: 3.069579
|
| 333 |
+
95.0% KLD: 0.855274
|
| 334 |
+
90.0% KLD: 0.390511
|
| 335 |
+
Median KLD: 0.051354
|
| 336 |
+
10.0% KLD: 0.000254
|
| 337 |
+
5.0% KLD: 0.000044
|
| 338 |
+
1.0% KLD: 0.000002
|
| 339 |
+
0.1% KLD: -0.000001
|
| 340 |
+
Minimum KLD: -0.000004
|
| 341 |
+
|
| 342 |
+
====== Token probability statistics ======
|
| 343 |
+
Mean Δp: -2.448 ± 0.085 %
|
| 344 |
+
Maximum Δp: 99.015%
|
| 345 |
+
99.9% Δp: 77.255%
|
| 346 |
+
99.0% Δp: 31.663%
|
| 347 |
+
95.0% Δp: 11.539%
|
| 348 |
+
90.0% Δp: 5.604%
|
| 349 |
+
75.0% Δp: 0.561%
|
| 350 |
+
Median Δp: -0.013%
|
| 351 |
+
25.0% Δp: -2.300%
|
| 352 |
+
10.0% Δp: -11.397%
|
| 353 |
+
5.0% Δp: -23.594%
|
| 354 |
+
1.0% Δp: -77.397%
|
| 355 |
+
0.1% Δp: -98.089%
|
| 356 |
+
Minimum Δp: -99.954%
|
| 357 |
+
RMS Δp : 15.149 ± 0.187 %
|
| 358 |
+
Same top p: 84.152 ± 0.208 %
|
| 359 |
+
|
| 360 |
+
llama_perf_context_print: load time = 36536.26 ms
|
| 361 |
+
llama_perf_context_print: prompt eval time = 72738.22 ms / 61952 tokens ( 1.17 ms per token, 851.71 tokens per second)
|
| 362 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 363 |
+
llama_perf_context_print: total time = 87308.68 ms / 61953 tokens
|
| 364 |
+
llama_perf_context_print: graphs reused = 0
|
| 365 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 366 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 945 + (22769 = 21103 + 240 + 1425) + 419 |
|
| 367 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 955 + (22726 = 18800 + 752 + 3174) + 452 |
|
| 368 |
+
llama_memory_breakdown_print: | - Host | 93715 = 93611 + 0 + 104 |
|
| 369 |
+
```
|
kld_data/unsloth/Q4_0/MiniMax-M2.5-Q4_0.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q4_0 (unsloth)
|
| 2 |
+
|
| 3 |
+
120.60 GiB (4.53 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q4_0/MiniMax-M2.5-Q4_0-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 65073 used, -41202 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 63655 used, -39783 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 128729 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 83033 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37894 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 8316 MiB used, 15555 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1419 MiB used, 22451 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 11 layers ( 1 overflowing), 22406 MiB used, 1465 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 52 layers (44 overflowing), 22679 MiB used, 1192 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 5.23 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q4_0/MiniMax-M2.5-Q4_0-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 2
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_0: 428 tensors
|
| 87 |
+
llama_model_loader: - type q4_1: 7 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 89 |
+
print_info: file format = GGUF V3 (latest)
|
| 90 |
+
print_info: file type = Q4_0
|
| 91 |
+
print_info: file size = 120.60 GiB (4.53 BPW)
|
| 92 |
+
load: 0 unused tokens
|
| 93 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 94 |
+
load: printing all EOG tokens:
|
| 95 |
+
load: - 200004 ('<fim_pad>')
|
| 96 |
+
load: - 200005 ('<reponame>')
|
| 97 |
+
load: - 200020 ('[e~[')
|
| 98 |
+
load: special tokens cache size = 54
|
| 99 |
+
load: token to piece cache size = 1.3355 MB
|
| 100 |
+
print_info: arch = minimax-m2
|
| 101 |
+
print_info: vocab_only = 0
|
| 102 |
+
print_info: no_alloc = 0
|
| 103 |
+
print_info: n_ctx_train = 196608
|
| 104 |
+
print_info: n_embd = 3072
|
| 105 |
+
print_info: n_embd_inp = 3072
|
| 106 |
+
print_info: n_layer = 62
|
| 107 |
+
print_info: n_head = 48
|
| 108 |
+
print_info: n_head_kv = 8
|
| 109 |
+
print_info: n_rot = 64
|
| 110 |
+
print_info: n_swa = 0
|
| 111 |
+
print_info: is_swa_any = 0
|
| 112 |
+
print_info: n_embd_head_k = 128
|
| 113 |
+
print_info: n_embd_head_v = 128
|
| 114 |
+
print_info: n_gqa = 6
|
| 115 |
+
print_info: n_embd_k_gqa = 1024
|
| 116 |
+
print_info: n_embd_v_gqa = 1024
|
| 117 |
+
print_info: f_norm_eps = 0.0e+00
|
| 118 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 119 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 120 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 121 |
+
print_info: f_logit_scale = 0.0e+00
|
| 122 |
+
print_info: f_attn_scale = 0.0e+00
|
| 123 |
+
print_info: n_ff = 1536
|
| 124 |
+
print_info: n_expert = 256
|
| 125 |
+
print_info: n_expert_used = 8
|
| 126 |
+
print_info: n_expert_groups = 0
|
| 127 |
+
print_info: n_group_used = 0
|
| 128 |
+
print_info: causal attn = 1
|
| 129 |
+
print_info: pooling type = 0
|
| 130 |
+
print_info: rope type = 2
|
| 131 |
+
print_info: rope scaling = linear
|
| 132 |
+
print_info: freq_base_train = 5000000.0
|
| 133 |
+
print_info: freq_scale_train = 1
|
| 134 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 135 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 136 |
+
print_info: rope_finetuned = unknown
|
| 137 |
+
print_info: model type = 230B.A10B
|
| 138 |
+
print_info: model params = 228.69 B
|
| 139 |
+
print_info: general.name = Minimax-M2.5
|
| 140 |
+
print_info: vocab type = BPE
|
| 141 |
+
print_info: n_vocab = 200064
|
| 142 |
+
print_info: n_merges = 199744
|
| 143 |
+
print_info: BOS token = 200034 ']~!b['
|
| 144 |
+
print_info: EOS token = 200020 '[e~['
|
| 145 |
+
print_info: UNK token = 200021 ']!d~['
|
| 146 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 147 |
+
print_info: LF token = 10 'Ċ'
|
| 148 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 149 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 150 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 151 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 152 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 153 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 154 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 155 |
+
print_info: EOG token = 200020 '[e~['
|
| 156 |
+
print_info: max token length = 256
|
| 157 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 158 |
+
load_tensors: offloading output layer to GPU
|
| 159 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 160 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 46830.93 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 47296.24 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 28888.49 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 20882.44 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 18689.33 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 176.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 816.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 1348.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 181 (with bs=4096), 95 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 24.48 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 6.33 seconds per pass - ETA 1.58 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.3780 ± 1.2116 0.00078 ± 0.02424 0.04450 ± 0.00743 8.435 ± 1.323 % 89.804 ± 1.899 %
|
| 201 |
+
2 4.6932 ± 0.5600 0.00389 ± 0.01442 0.03046 ± 0.00389 6.425 ± 0.880 % 92.549 ± 1.164 %
|
| 202 |
+
3 4.5012 ± 0.4423 0.00286 ± 0.01247 0.03402 ± 0.00318 6.481 ± 0.614 % 92.288 ± 0.965 %
|
| 203 |
+
4 5.1413 ± 0.4527 0.00252 ± 0.01120 0.03659 ± 0.00273 6.497 ± 0.525 % 91.667 ± 0.866 %
|
| 204 |
+
5 4.9111 ± 0.3873 0.00328 ± 0.01121 0.03725 ± 0.00276 6.402 ± 0.450 % 92.078 ± 0.757 %
|
| 205 |
+
6 5.9848 ± 0.4620 -0.00196 ± 0.01077 0.04242 ± 0.00295 6.073 ± 0.402 % 92.026 ± 0.693 %
|
| 206 |
+
7 5.5710 ± 0.3857 -0.00176 ± 0.01031 0.05050 ± 0.00331 6.732 ± 0.430 % 91.541 ± 0.659 %
|
| 207 |
+
8 6.2928 ± 0.4139 -0.00344 ± 0.00959 0.04962 ± 0.00294 6.475 ± 0.393 % 91.324 ± 0.623 %
|
| 208 |
+
9 6.2003 ± 0.3811 0.00102 ± 0.00884 0.04807 ± 0.00269 6.245 ± 0.363 % 91.373 ± 0.586 %
|
| 209 |
+
10 5.6784 ± 0.3246 0.00214 ± 0.00820 0.04627 ± 0.00245 6.285 ± 0.331 % 91.490 ± 0.553 %
|
| 210 |
+
11 6.2248 ± 0.3445 0.00220 ± 0.00798 0.04637 ± 0.00228 6.164 ± 0.309 % 91.444 ± 0.528 %
|
| 211 |
+
12 6.8922 ± 0.3702 0.00255 ± 0.00771 0.04823 ± 0.00255 5.996 ± 0.292 % 91.209 ± 0.512 %
|
| 212 |
+
13 7.1626 ± 0.3669 0.00377 ± 0.00725 0.04667 ± 0.00236 5.896 ± 0.276 % 91.192 ± 0.492 %
|
| 213 |
+
14 7.7496 ± 0.3886 0.00730 ± 0.00718 0.04725 ± 0.00226 5.971 ± 0.271 % 90.924 ± 0.481 %
|
| 214 |
+
15 8.1141 ± 0.3939 0.00713 ± 0.00687 0.04718 ± 0.00212 5.930 ± 0.256 % 91.059 ± 0.461 %
|
| 215 |
+
16 8.3673 ± 0.3932 0.00565 ± 0.00654 0.04607 ± 0.00200 5.845 ± 0.245 % 91.029 ± 0.447 %
|
| 216 |
+
17 8.5989 ± 0.3946 0.00585 ± 0.00643 0.04698 ± 0.00193 5.790 ± 0.235 % 91.050 ± 0.434 %
|
| 217 |
+
18 8.0925 ± 0.3580 0.00437 ± 0.00625 0.04696 ± 0.00186 5.810 ± 0.227 % 91.264 ± 0.417 %
|
| 218 |
+
19 8.2453 ± 0.3552 0.00728 ± 0.00602 0.04566 ± 0.00177 5.764 ± 0.218 % 91.290 ± 0.405 %
|
| 219 |
+
20 8.3304 ± 0.3503 0.00967 ± 0.00602 0.04782 ± 0.00178 5.842 ± 0.210 % 91.235 ± 0.396 %
|
| 220 |
+
21 8.2880 ± 0.3398 0.00714 ± 0.00583 0.04757 ± 0.00171 5.877 ± 0.202 % 91.317 ± 0.385 %
|
| 221 |
+
22 8.6064 ± 0.3481 0.00716 ± 0.00570 0.04837 ± 0.00166 5.906 ± 0.197 % 91.123 ± 0.380 %
|
| 222 |
+
23 8.6152 ± 0.3417 0.00698 ± 0.00579 0.05033 ± 0.00183 6.188 ± 0.236 % 91.100 ± 0.372 %
|
| 223 |
+
24 9.0075 ± 0.3516 0.00544 ± 0.00560 0.04989 ± 0.00176 6.098 ± 0.230 % 91.144 ± 0.363 %
|
| 224 |
+
25 9.0084 ± 0.3455 0.00693 ± 0.00555 0.05134 ± 0.00174 6.222 ± 0.225 % 91.137 ± 0.356 %
|
| 225 |
+
26 8.4803 ± 0.3158 0.01513 ± 0.00569 0.05974 ± 0.00225 7.632 ± 0.297 % 90.920 ± 0.353 %
|
| 226 |
+
27 8.1391 ± 0.2952 0.02800 ± 0.00626 0.07176 ± 0.00317 8.967 ± 0.335 % 90.704 ± 0.350 %
|
| 227 |
+
28 8.2576 ± 0.2950 0.02877 ± 0.00614 0.07165 ± 0.00307 8.879 ± 0.327 % 90.630 ± 0.345 %
|
| 228 |
+
29 8.1850 ± 0.2873 0.02941 ± 0.00602 0.07132 ± 0.00298 8.829 ± 0.319 % 90.710 ± 0.338 %
|
| 229 |
+
30 7.6572 ± 0.2620 0.02995 ± 0.00594 0.07038 ± 0.00297 8.818 ± 0.315 % 90.980 ± 0.328 %
|
| 230 |
+
31 7.2144 ± 0.2406 0.02975 ± 0.00578 0.06929 ± 0.00290 8.782 ± 0.313 % 91.208 ± 0.319 %
|
| 231 |
+
32 7.0230 ± 0.2287 0.02810 ± 0.00563 0.06817 ± 0.00281 8.738 ± 0.305 % 91.189 ± 0.314 %
|
| 232 |
+
33 6.8749 ± 0.2188 0.02668 ± 0.00552 0.06726 ± 0.00273 8.687 ± 0.297 % 91.147 ± 0.310 %
|
| 233 |
+
34 7.0621 ± 0.2227 0.02652 ± 0.00547 0.06815 ± 0.00267 8.673 ± 0.292 % 91.130 ± 0.305 %
|
| 234 |
+
35 7.1924 ± 0.2257 0.03017 ± 0.00547 0.07064 ± 0.00266 8.800 ± 0.288 % 90.992 ± 0.303 %
|
| 235 |
+
36 7.2592 ± 0.2257 0.03054 ± 0.00537 0.06976 ± 0.00259 8.722 ± 0.283 % 91.013 ± 0.299 %
|
| 236 |
+
37 7.2810 ± 0.2237 0.03124 ± 0.00540 0.07107 ± 0.00260 8.873 ± 0.283 % 91.012 ± 0.294 %
|
| 237 |
+
38 7.5001 ± 0.2284 0.03116 ± 0.00528 0.07075 ± 0.00253 8.828 ± 0.278 % 90.908 ± 0.292 %
|
| 238 |
+
39 7.4641 ± 0.2240 0.03329 ± 0.00528 0.07315 ± 0.00259 9.074 ± 0.278 % 90.749 ± 0.291 %
|
| 239 |
+
40 7.2500 ± 0.2132 0.03912 ± 0.00537 0.08056 ± 0.00279 9.685 ± 0.281 % 90.490 ± 0.290 %
|
| 240 |
+
41 7.0726 ± 0.2041 0.04516 ± 0.00541 0.08597 ± 0.00283 10.185 ± 0.273 % 90.244 ± 0.290 %
|
| 241 |
+
42 6.8915 ± 0.1955 0.05054 ± 0.00553 0.09099 ± 0.00294 10.573 ± 0.271 % 90.168 ± 0.288 %
|
| 242 |
+
43 6.7083 ± 0.1869 0.05474 ± 0.00555 0.09468 ± 0.00301 10.798 ± 0.269 % 90.114 ± 0.285 %
|
| 243 |
+
44 6.6533 ± 0.1825 0.05284 ± 0.00545 0.09313 ± 0.00294 10.710 ± 0.265 % 90.169 ± 0.281 %
|
| 244 |
+
45 6.7962 ± 0.1853 0.05220 ± 0.00539 0.09293 ± 0.00288 10.623 ± 0.262 % 90.100 ± 0.279 %
|
| 245 |
+
46 6.9365 ± 0.1874 0.05054 ± 0.00531 0.09198 ± 0.00282 10.529 ± 0.259 % 90.060 ± 0.276 %
|
| 246 |
+
47 7.0881 ± 0.1900 0.04997 ± 0.00521 0.09070 ± 0.00277 10.436 ± 0.255 % 90.113 ± 0.273 %
|
| 247 |
+
48 6.9635 ± 0.1836 0.04888 ± 0.00511 0.08935 ± 0.00271 10.352 ± 0.252 % 90.196 ± 0.269 %
|
| 248 |
+
49 7.0532 ± 0.1839 0.04667 ± 0.00512 0.09250 ± 0.00294 10.333 ± 0.249 % 90.076 ± 0.267 %
|
| 249 |
+
50 7.1528 ± 0.1856 0.04653 ± 0.00505 0.09179 ± 0.00288 10.267 ± 0.246 % 90.031 ± 0.265 %
|
| 250 |
+
51 7.2597 ± 0.1867 0.04597 ± 0.00497 0.09090 ± 0.00283 10.187 ± 0.243 % 90.073 ± 0.262 %
|
| 251 |
+
52 7.3284 ± 0.1865 0.04608 ± 0.00491 0.09092 ± 0.00278 10.133 ± 0.240 % 89.962 ± 0.261 %
|
| 252 |
+
53 7.4401 ± 0.1876 0.04541 ± 0.00484 0.09018 ± 0.00272 10.066 ± 0.237 % 89.900 ± 0.259 %
|
| 253 |
+
54 7.4919 ± 0.1867 0.04457 ± 0.00477 0.08934 ± 0.00268 9.996 ± 0.234 % 89.877 ± 0.257 %
|
| 254 |
+
55 7.5429 ± 0.1860 0.04433 ± 0.00470 0.08832 ± 0.00263 9.920 ± 0.232 % 89.918 ± 0.254 %
|
| 255 |
+
56 7.5789 ± 0.1852 0.04336 ± 0.00464 0.08747 ± 0.00258 9.852 ± 0.230 % 89.881 ± 0.252 %
|
| 256 |
+
57 7.5803 ± 0.1836 0.04360 ± 0.00460 0.08788 ± 0.00255 9.885 ± 0.227 % 89.818 ± 0.251 %
|
| 257 |
+
58 7.5837 ± 0.1820 0.04268 ± 0.00455 0.08716 ± 0.00251 9.841 ± 0.224 % 89.784 ± 0.249 %
|
| 258 |
+
59 7.5392 ± 0.1790 0.04192 ± 0.00449 0.08609 ± 0.00247 9.775 ± 0.222 % 89.850 ± 0.246 %
|
| 259 |
+
60 7.5491 ± 0.1778 0.04175 ± 0.00443 0.08552 ± 0.00243 9.728 ± 0.219 % 89.817 ± 0.245 %
|
| 260 |
+
61 7.5898 ± 0.1772 0.04050 ± 0.00437 0.08494 ± 0.00240 9.673 ± 0.217 % 89.862 ± 0.242 %
|
| 261 |
+
62 7.5554 ± 0.1753 0.03921 ± 0.00434 0.08431 ± 0.00236 9.640 ± 0.215 % 89.873 ± 0.240 %
|
| 262 |
+
63 7.5967 ± 0.1753 0.03835 ± 0.00431 0.08404 ± 0.00233 9.584 ± 0.213 % 89.866 ± 0.238 %
|
| 263 |
+
64 7.5806 ± 0.1732 0.03881 ± 0.00426 0.08351 ± 0.00229 9.537 ± 0.211 % 89.859 ± 0.236 %
|
| 264 |
+
65 7.5667 ± 0.1715 0.03813 ± 0.00422 0.08316 ± 0.00226 9.508 ± 0.208 % 89.864 ± 0.234 %
|
| 265 |
+
66 7.6021 ± 0.1712 0.03780 ± 0.00419 0.08293 ± 0.00223 9.476 ± 0.206 % 89.834 ± 0.233 %
|
| 266 |
+
67 7.6113 ± 0.1702 0.03808 ± 0.00414 0.08261 ± 0.00220 9.436 ± 0.204 % 89.839 ± 0.231 %
|
| 267 |
+
68 7.5654 ± 0.1677 0.03799 ± 0.00411 0.08187 ± 0.00217 9.383 ± 0.202 % 89.908 ± 0.229 %
|
| 268 |
+
69 7.5994 ± 0.1674 0.03803 ± 0.00407 0.08156 ± 0.00214 9.343 ± 0.200 % 89.935 ± 0.227 %
|
| 269 |
+
70 7.5656 ± 0.1651 0.03735 ± 0.00403 0.08115 ± 0.00211 9.315 ± 0.198 % 89.944 ± 0.225 %
|
| 270 |
+
71 7.5451 ± 0.1635 0.03663 ± 0.00399 0.08060 ± 0.00208 9.275 ± 0.197 % 90.003 ± 0.223 %
|
| 271 |
+
72 7.5665 ± 0.1631 0.03729 ± 0.00396 0.08049 ± 0.00206 9.267 ± 0.195 % 89.989 ± 0.222 %
|
| 272 |
+
73 7.5689 ± 0.1619 0.03687 ± 0.00392 0.08006 ± 0.00204 9.223 ± 0.193 % 89.981 ± 0.220 %
|
| 273 |
+
74 7.5575 ± 0.1604 0.03618 ± 0.00389 0.07984 ± 0.00201 9.189 ± 0.191 % 89.989 ± 0.218 %
|
| 274 |
+
75 7.5605 ± 0.1595 0.03567 ± 0.00385 0.07968 ± 0.00199 9.174 ± 0.190 % 89.956 ± 0.217 %
|
| 275 |
+
76 7.6235 ± 0.1599 0.03548 ± 0.00382 0.07945 ± 0.00196 9.155 ± 0.188 % 89.964 ± 0.216 %
|
| 276 |
+
77 7.6196 ± 0.1588 0.03524 ± 0.00379 0.07906 ± 0.00194 9.116 ± 0.186 % 89.962 ± 0.214 %
|
| 277 |
+
78 7.6315 ± 0.1582 0.03497 ± 0.00376 0.07871 ± 0.00191 9.079 ± 0.185 % 89.980 ± 0.213 %
|
| 278 |
+
79 7.6340 ± 0.1572 0.03384 ± 0.00373 0.07849 ± 0.00189 9.048 ± 0.183 % 89.983 ± 0.212 %
|
| 279 |
+
80 7.6332 ± 0.1567 0.03349 ± 0.00373 0.07866 ± 0.00188 9.048 ± 0.182 % 89.951 ± 0.211 %
|
| 280 |
+
81 7.6078 ± 0.1552 0.03326 ± 0.00369 0.07830 ± 0.00186 9.010 ± 0.180 % 90.002 ± 0.209 %
|
| 281 |
+
82 7.5890 ± 0.1537 0.03371 ± 0.00366 0.07795 ± 0.00183 8.981 ± 0.179 % 90.024 ± 0.207 %
|
| 282 |
+
83 7.6179 ± 0.1532 0.03320 ± 0.00363 0.07753 ± 0.00181 8.945 ± 0.177 % 90.012 ± 0.206 %
|
| 283 |
+
84 7.6296 ± 0.1521 0.03255 ± 0.00359 0.07705 ± 0.00179 8.908 ± 0.176 % 90.009 ± 0.205 %
|
| 284 |
+
85 7.6237 ± 0.1509 0.03234 ± 0.00356 0.07660 ± 0.00177 8.870 ± 0.175 % 90.016 ± 0.204 %
|
| 285 |
+
86 7.5586 ± 0.1482 0.03279 ± 0.00353 0.07620 ± 0.00175 8.847 ± 0.173 % 90.018 ± 0.202 %
|
| 286 |
+
87 7.5028 ± 0.1459 0.03318 ± 0.00350 0.07581 ± 0.00173 8.819 ± 0.172 % 90.034 ± 0.201 %
|
| 287 |
+
88 7.4429 ± 0.1435 0.03351 ± 0.00347 0.07549 ± 0.00171 8.807 ± 0.170 % 90.049 ± 0.200 %
|
| 288 |
+
89 7.3717 ± 0.1408 0.03386 ± 0.00343 0.07505 ± 0.00170 8.783 ± 0.169 % 90.055 ± 0.199 %
|
| 289 |
+
90 7.3173 ± 0.1386 0.03416 ± 0.00341 0.07467 ± 0.00168 8.756 ± 0.168 % 90.096 ± 0.197 %
|
| 290 |
+
91 7.2626 ± 0.1365 0.03377 ± 0.00338 0.07422 ± 0.00166 8.727 ± 0.166 % 90.149 ± 0.196 %
|
| 291 |
+
92 7.2047 ± 0.1343 0.03382 ± 0.00335 0.07389 ± 0.00164 8.715 ± 0.165 % 90.145 ± 0.195 %
|
| 292 |
+
93 7.2201 ± 0.1341 0.03346 ± 0.00334 0.07427 ± 0.00166 8.721 ± 0.165 % 90.129 ± 0.194 %
|
| 293 |
+
94 7.2492 ± 0.1338 0.03315 ± 0.00331 0.07384 ± 0.00165 8.684 ± 0.164 % 90.134 ± 0.193 %
|
| 294 |
+
95 7.3590 ± 0.1355 0.03298 ± 0.00329 0.07362 ± 0.00163 8.648 ± 0.163 % 90.105 ± 0.192 %
|
| 295 |
+
96 7.4526 ± 0.1367 0.03266 ± 0.00327 0.07350 ± 0.00162 8.617 ± 0.161 % 90.065 ± 0.191 %
|
| 296 |
+
97 7.5288 ± 0.1374 0.03218 ± 0.00324 0.07319 ± 0.00160 8.582 ± 0.160 % 90.026 ± 0.191 %
|
| 297 |
+
98 7.6671 ± 0.1398 0.03223 ± 0.00322 0.07283 ± 0.00158 8.547 ± 0.160 % 89.988 ± 0.190 %
|
| 298 |
+
99 7.7860 ± 0.1416 0.03206 ± 0.00320 0.07269 ± 0.00157 8.513 ± 0.159 % 89.915 ± 0.190 %
|
| 299 |
+
100 7.8189 ± 0.1415 0.03139 ± 0.00318 0.07268 ± 0.00155 8.491 ± 0.157 % 89.886 ± 0.189 %
|
| 300 |
+
101 7.8546 ± 0.1416 0.03111 ± 0.00317 0.07261 ± 0.00155 8.473 ± 0.157 % 89.901 ± 0.188 %
|
| 301 |
+
102 7.9179 ± 0.1426 0.03125 ± 0.00315 0.07260 ± 0.00153 8.476 ± 0.156 % 89.896 ± 0.187 %
|
| 302 |
+
103 7.8909 ± 0.1416 0.03120 ± 0.00313 0.07223 ± 0.00152 8.460 ± 0.155 % 89.911 ± 0.186 %
|
| 303 |
+
104 7.8318 ± 0.1396 0.03120 ± 0.00311 0.07213 ± 0.00151 8.473 ± 0.154 % 89.959 ± 0.185 %
|
| 304 |
+
105 7.7178 ± 0.1367 0.03103 ± 0.00311 0.07263 ± 0.00152 8.592 ± 0.156 % 89.998 ± 0.183 %
|
| 305 |
+
106 7.5837 ± 0.1333 0.03081 ± 0.00310 0.07236 ± 0.00152 8.616 ± 0.156 % 90.070 ± 0.182 %
|
| 306 |
+
107 7.6428 ± 0.1337 0.03085 ± 0.00307 0.07193 ± 0.00150 8.584 ± 0.156 % 90.101 ± 0.181 %
|
| 307 |
+
108 7.6537 ± 0.1333 0.03048 ± 0.00305 0.07166 ± 0.00149 8.561 ± 0.155 % 90.109 ± 0.180 %
|
| 308 |
+
109 7.6748 ± 0.1331 0.03052 ± 0.00303 0.07150 ± 0.00148 8.541 ± 0.154 % 90.121 ± 0.179 %
|
| 309 |
+
110 7.7114 ± 0.1332 0.03058 ± 0.00301 0.07121 ± 0.00146 8.516 ± 0.153 % 90.132 ± 0.178 %
|
| 310 |
+
111 7.7596 ± 0.1334 0.03050 ± 0.00299 0.07097 ± 0.00145 8.489 ± 0.152 % 90.132 ± 0.177 %
|
| 311 |
+
112 7.7678 ± 0.1328 0.03022 ± 0.00297 0.07066 ± 0.00144 8.466 ± 0.151 % 90.126 ± 0.177 %
|
| 312 |
+
113 7.7768 ± 0.1322 0.03001 ± 0.00295 0.07036 ± 0.00143 8.445 ± 0.150 % 90.130 ± 0.176 %
|
| 313 |
+
114 7.7926 ± 0.1320 0.02969 ± 0.00294 0.07012 ± 0.00142 8.419 ± 0.149 % 90.134 ± 0.175 %
|
| 314 |
+
115 7.7760 ± 0.1311 0.02991 ± 0.00294 0.07038 ± 0.00141 8.458 ± 0.149 % 90.138 ± 0.174 %
|
| 315 |
+
116 7.7698 ± 0.1304 0.03050 ± 0.00296 0.07175 ± 0.00142 8.586 ± 0.149 % 90.081 ± 0.174 %
|
| 316 |
+
117 7.6784 ± 0.1280 0.03223 ± 0.00298 0.07387 ± 0.00146 8.877 ± 0.152 % 90.012 ± 0.174 %
|
| 317 |
+
118 7.5864 ± 0.1257 0.03304 ± 0.00299 0.07533 ± 0.00148 9.006 ± 0.152 % 89.983 ± 0.173 %
|
| 318 |
+
119 7.4969 ± 0.1234 0.03461 ± 0.00300 0.07690 ± 0.00150 9.192 ± 0.153 % 89.975 ± 0.172 %
|
| 319 |
+
120 7.4259 ± 0.1215 0.03645 ± 0.00305 0.07932 ± 0.00156 9.439 ± 0.155 % 89.908 ± 0.172 %
|
| 320 |
+
121 7.3495 ± 0.1196 0.03774 ± 0.00306 0.08116 ± 0.00159 9.622 ± 0.156 % 89.882 ± 0.172 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 7.349464 ± 0.119566
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.22%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.037743 ± 0.003065
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.038465 ± 0.003182
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 0.272225 ± 0.022708
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.081161 ± 0.001589
|
| 332 |
+
Maximum KLD: 9.655190
|
| 333 |
+
99.9% KLD: 3.576555
|
| 334 |
+
99.0% KLD: 1.201161
|
| 335 |
+
95.0% KLD: 0.301211
|
| 336 |
+
90.0% KLD: 0.150507
|
| 337 |
+
Median KLD: 0.020212
|
| 338 |
+
10.0% KLD: 0.000097
|
| 339 |
+
5.0% KLD: 0.000016
|
| 340 |
+
1.0% KLD: 0.000001
|
| 341 |
+
0.1% KLD: -0.000001
|
| 342 |
+
Minimum KLD: -0.000004
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -0.955 ± 0.055 %
|
| 346 |
+
Maximum Δp: 90.107%
|
| 347 |
+
99.9% Δp: 63.169%
|
| 348 |
+
99.0% Δp: 22.243%
|
| 349 |
+
95.0% Δp: 7.951%
|
| 350 |
+
90.0% Δp: 4.032%
|
| 351 |
+
75.0% Δp: 0.430%
|
| 352 |
+
Median Δp: -0.003%
|
| 353 |
+
25.0% Δp: -1.128%
|
| 354 |
+
10.0% Δp: -6.194%
|
| 355 |
+
5.0% Δp: -11.813%
|
| 356 |
+
1.0% Δp: -42.549%
|
| 357 |
+
0.1% Δp: -85.917%
|
| 358 |
+
Minimum Δp: -98.687%
|
| 359 |
+
RMS Δp : 9.622 ± 0.156 %
|
| 360 |
+
Same top p: 89.882 ± 0.172 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 52413.09 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 87711.77 ms / 61952 tokens ( 1.42 ms per token, 706.31 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 103047.82 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1275 + ( 22406 = 20882 + 176 + 1347) + 452 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1003 + ( 22679 = 18689 + 816 + 3174) + 451 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 123119 = 123015 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/Q4_1/MiniMax-M2.5-Q4_1.md
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q4_1 (unsloth)
|
| 2 |
+
|
| 3 |
+
133.38 GiB (5.01 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q4_1/MiniMax-M2.5-Q4_1-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 71565 used, -47694 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 70214 used, -46342 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 141780 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 96084 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37732 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 8623 MiB used, 15248 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1491 MiB used, 22379 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 10 layers ( 1 overflowing), 22825 MiB used, 1046 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 53 layers (46 overflowing), 22567 MiB used, 1304 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.08 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q4_1/MiniMax-M2.5-Q4_1-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 3
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_1: 435 tensors
|
| 87 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 88 |
+
print_info: file format = GGUF V3 (latest)
|
| 89 |
+
print_info: file type = Q4_1
|
| 90 |
+
print_info: file size = 133.38 GiB (5.01 BPW)
|
| 91 |
+
load: 0 unused tokens
|
| 92 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 93 |
+
load: printing all EOG tokens:
|
| 94 |
+
load: - 200004 ('<fim_pad>')
|
| 95 |
+
load: - 200005 ('<reponame>')
|
| 96 |
+
load: - 200020 ('[e~[')
|
| 97 |
+
load: special tokens cache size = 54
|
| 98 |
+
load: token to piece cache size = 1.3355 MB
|
| 99 |
+
print_info: arch = minimax-m2
|
| 100 |
+
print_info: vocab_only = 0
|
| 101 |
+
print_info: no_alloc = 0
|
| 102 |
+
print_info: n_ctx_train = 196608
|
| 103 |
+
print_info: n_embd = 3072
|
| 104 |
+
print_info: n_embd_inp = 3072
|
| 105 |
+
print_info: n_layer = 62
|
| 106 |
+
print_info: n_head = 48
|
| 107 |
+
print_info: n_head_kv = 8
|
| 108 |
+
print_info: n_rot = 64
|
| 109 |
+
print_info: n_swa = 0
|
| 110 |
+
print_info: is_swa_any = 0
|
| 111 |
+
print_info: n_embd_head_k = 128
|
| 112 |
+
print_info: n_embd_head_v = 128
|
| 113 |
+
print_info: n_gqa = 6
|
| 114 |
+
print_info: n_embd_k_gqa = 1024
|
| 115 |
+
print_info: n_embd_v_gqa = 1024
|
| 116 |
+
print_info: f_norm_eps = 0.0e+00
|
| 117 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 118 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 119 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 120 |
+
print_info: f_logit_scale = 0.0e+00
|
| 121 |
+
print_info: f_attn_scale = 0.0e+00
|
| 122 |
+
print_info: n_ff = 1536
|
| 123 |
+
print_info: n_expert = 256
|
| 124 |
+
print_info: n_expert_used = 8
|
| 125 |
+
print_info: n_expert_groups = 0
|
| 126 |
+
print_info: n_group_used = 0
|
| 127 |
+
print_info: causal attn = 1
|
| 128 |
+
print_info: pooling type = 0
|
| 129 |
+
print_info: rope type = 2
|
| 130 |
+
print_info: rope scaling = linear
|
| 131 |
+
print_info: freq_base_train = 5000000.0
|
| 132 |
+
print_info: freq_scale_train = 1
|
| 133 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 134 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 135 |
+
print_info: rope_finetuned = unknown
|
| 136 |
+
print_info: model type = 230B.A10B
|
| 137 |
+
print_info: model params = 228.69 B
|
| 138 |
+
print_info: general.name = Minimax-M2.5
|
| 139 |
+
print_info: vocab type = BPE
|
| 140 |
+
print_info: n_vocab = 200064
|
| 141 |
+
print_info: n_merges = 199744
|
| 142 |
+
print_info: BOS token = 200034 ']~!b['
|
| 143 |
+
print_info: EOS token = 200020 '[e~['
|
| 144 |
+
print_info: UNK token = 200021 ']!d~['
|
| 145 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 146 |
+
print_info: LF token = 10 'Ċ'
|
| 147 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 148 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 149 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 150 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 151 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 152 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200020 '[e~['
|
| 155 |
+
print_info: max token length = 256
|
| 156 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 157 |
+
load_tensors: offloading output layer to GPU
|
| 158 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 159 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 47087.96 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 47418.35 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 41570.44 MiB
|
| 163 |
+
load_tensors: CUDA0 model buffer size = 21173.02 MiB
|
| 164 |
+
load_tensors: CUDA1 model buffer size = 18561.51 MiB
|
| 165 |
+
....................................................................................................
|
| 166 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 167 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 168 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 169 |
+
llama_context: constructing llama_context
|
| 170 |
+
llama_context: n_seq_max = 8
|
| 171 |
+
llama_context: n_ctx = 4096
|
| 172 |
+
llama_context: n_ctx_seq = 512
|
| 173 |
+
llama_context: n_batch = 4096
|
| 174 |
+
llama_context: n_ubatch = 4096
|
| 175 |
+
llama_context: causal_attn = 1
|
| 176 |
+
llama_context: flash_attn = enabled
|
| 177 |
+
llama_context: kv_unified = false
|
| 178 |
+
llama_context: freq_base = 5000000.0
|
| 179 |
+
llama_context: freq_scale = 1
|
| 180 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 181 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 182 |
+
llama_kv_cache: CUDA0 KV buffer size = 160.00 MiB
|
| 183 |
+
llama_kv_cache: CUDA1 KV buffer size = 832.00 MiB
|
| 184 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 185 |
+
sched_reserve: reserving ...
|
| 186 |
+
sched_reserve: CUDA0 compute buffer size = 1492.00 MiB
|
| 187 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 188 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 189 |
+
sched_reserve: graph nodes = 4099
|
| 190 |
+
sched_reserve: graph splits = 185 (with bs=4096), 97 (with bs=1)
|
| 191 |
+
sched_reserve: reserve took 22.97 ms, sched copies = 1
|
| 192 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 193 |
+
|
| 194 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 195 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 196 |
+
kl_divergence: 6.92 seconds per pass - ETA 1.73 minutes
|
| 197 |
+
|
| 198 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 199 |
+
1 6.4158 ± 1.2367 0.00669 ± 0.02016 0.02598 ± 0.00278 5.582 ± 0.723 % 92.549 ± 1.648 %
|
| 200 |
+
2 4.7329 ± 0.5734 0.01233 ± 0.01263 0.01971 ± 0.00161 4.558 ± 0.469 % 94.510 ± 1.010 %
|
| 201 |
+
3 4.5197 ± 0.4487 0.00696 ± 0.01135 0.02358 ± 0.00202 5.621 ± 0.602 % 95.294 ± 0.766 %
|
| 202 |
+
4 5.1505 ± 0.4567 0.00431 ± 0.00994 0.02778 ± 0.00226 5.836 ± 0.554 % 95.000 ± 0.683 %
|
| 203 |
+
5 4.9280 ± 0.3897 0.00671 ± 0.00997 0.03101 ± 0.00344 6.294 ± 0.639 % 94.824 ± 0.621 %
|
| 204 |
+
6 5.9938 ± 0.4660 -0.00047 ± 0.00951 0.03493 ± 0.00302 6.062 ± 0.559 % 94.510 ± 0.583 %
|
| 205 |
+
7 5.5787 ± 0.3876 -0.00038 ± 0.00917 0.03951 ± 0.00282 6.455 ± 0.479 % 93.838 ± 0.569 %
|
| 206 |
+
8 6.3076 ± 0.4166 -0.00109 ± 0.00850 0.03858 ± 0.00250 6.204 ± 0.437 % 93.676 ± 0.539 %
|
| 207 |
+
9 6.1943 ± 0.3814 0.00006 ± 0.00780 0.03698 ± 0.00225 6.006 ± 0.403 % 93.551 ± 0.513 %
|
| 208 |
+
10 5.6721 ± 0.3251 0.00103 ± 0.00719 0.03540 ± 0.00204 5.951 ± 0.369 % 93.490 ± 0.489 %
|
| 209 |
+
11 6.2153 ± 0.3449 0.00068 ± 0.00680 0.03551 ± 0.00188 5.838 ± 0.344 % 93.333 ± 0.471 %
|
| 210 |
+
12 6.8850 ± 0.3715 0.00152 ± 0.00642 0.03583 ± 0.00178 5.694 ± 0.324 % 92.876 ± 0.465 %
|
| 211 |
+
13 7.1510 ± 0.3677 0.00215 ± 0.00606 0.03571 ± 0.00175 5.563 ± 0.306 % 92.790 ± 0.449 %
|
| 212 |
+
14 7.7248 ± 0.3878 0.00408 ± 0.00589 0.03652 ± 0.00173 5.619 ± 0.311 % 92.549 ± 0.440 %
|
| 213 |
+
15 8.0866 ± 0.3926 0.00373 ± 0.00564 0.03644 ± 0.00164 5.641 ± 0.294 % 92.418 ± 0.428 %
|
| 214 |
+
16 8.3358 ± 0.3917 0.00188 ± 0.00543 0.03558 ± 0.00154 5.573 ± 0.280 % 92.549 ± 0.411 %
|
| 215 |
+
17 8.5658 ± 0.3931 0.00199 ± 0.00539 0.03664 ± 0.00150 5.495 ± 0.269 % 92.457 ± 0.401 %
|
| 216 |
+
18 8.0758 ± 0.3577 0.00231 ± 0.00520 0.03659 ± 0.00145 5.435 ± 0.259 % 92.571 ± 0.387 %
|
| 217 |
+
19 8.1900 ± 0.3527 0.00055 ± 0.00503 0.03573 ± 0.00138 5.407 ± 0.249 % 92.528 ± 0.378 %
|
| 218 |
+
20 8.2496 ± 0.3465 -0.00007 ± 0.00499 0.03681 ± 0.00136 5.442 ± 0.238 % 92.392 ± 0.371 %
|
| 219 |
+
21 8.2124 ± 0.3362 -0.00203 ± 0.00486 0.03667 ± 0.00131 5.480 ± 0.231 % 92.381 ± 0.363 %
|
| 220 |
+
22 8.5293 ± 0.3445 -0.00183 ± 0.00476 0.03711 ± 0.00126 5.460 ± 0.223 % 92.210 ± 0.358 %
|
| 221 |
+
23 8.5417 ± 0.3384 -0.00159 ± 0.00494 0.03891 ± 0.00133 5.553 ± 0.217 % 92.140 ± 0.351 %
|
| 222 |
+
24 8.9399 ± 0.3486 -0.00209 ± 0.00480 0.03864 ± 0.00128 5.475 ± 0.211 % 92.075 ± 0.345 %
|
| 223 |
+
25 8.9259 ± 0.3415 -0.00228 ± 0.00473 0.03959 ± 0.00128 5.672 ± 0.219 % 91.922 ± 0.341 %
|
| 224 |
+
26 8.3548 ± 0.3104 0.00021 ± 0.00475 0.04453 ± 0.00161 6.463 ± 0.257 % 91.855 ± 0.336 %
|
| 225 |
+
27 7.9696 ± 0.2882 0.00695 ± 0.00498 0.05270 ± 0.00206 7.544 ± 0.290 % 91.634 ± 0.334 %
|
| 226 |
+
28 8.0933 ± 0.2882 0.00867 ± 0.00492 0.05294 ± 0.00201 7.512 ± 0.285 % 91.569 ± 0.329 %
|
| 227 |
+
29 8.0191 ± 0.2804 0.00893 ± 0.00481 0.05267 ± 0.00195 7.457 ± 0.279 % 91.616 ± 0.322 %
|
| 228 |
+
30 7.5003 ± 0.2557 0.00925 ± 0.00470 0.05138 ± 0.00191 7.374 ± 0.274 % 91.869 ± 0.312 %
|
| 229 |
+
31 7.0729 ± 0.2351 0.00994 ± 0.00463 0.05072 ± 0.00187 7.353 ± 0.269 % 92.081 ± 0.304 %
|
| 230 |
+
32 6.8923 ± 0.2235 0.00932 ± 0.00453 0.04999 ± 0.00181 7.338 ± 0.262 % 92.083 ± 0.299 %
|
| 231 |
+
33 6.7581 ± 0.2143 0.00955 ± 0.00443 0.04923 ± 0.00176 7.299 ± 0.256 % 92.109 ± 0.294 %
|
| 232 |
+
34 6.9371 ± 0.2178 0.00867 ± 0.00442 0.04977 ± 0.00172 7.263 ± 0.250 % 92.018 ± 0.291 %
|
| 233 |
+
35 7.0453 ± 0.2200 0.00950 ± 0.00438 0.05079 ± 0.00169 7.329 ± 0.245 % 91.866 ± 0.289 %
|
| 234 |
+
36 7.1075 ± 0.2196 0.00942 ± 0.00430 0.05042 ± 0.00165 7.282 ± 0.240 % 91.841 ± 0.286 %
|
| 235 |
+
37 7.1310 ± 0.2177 0.01043 ± 0.00434 0.05237 ± 0.00175 7.506 ± 0.245 % 91.828 ± 0.282 %
|
| 236 |
+
38 7.3523 ± 0.2226 0.01125 ± 0.00426 0.05193 ± 0.00171 7.446 ± 0.241 % 91.816 ± 0.278 %
|
| 237 |
+
39 7.3070 ± 0.2180 0.01202 ± 0.00424 0.05353 ± 0.00175 7.624 ± 0.240 % 91.735 ± 0.276 %
|
| 238 |
+
40 7.0793 ± 0.2071 0.01528 ± 0.00436 0.05900 ± 0.00190 8.245 ± 0.249 % 91.559 ± 0.275 %
|
| 239 |
+
41 6.8875 ± 0.1977 0.01864 ± 0.00452 0.06561 ± 0.00213 8.967 ± 0.259 % 91.373 ± 0.275 %
|
| 240 |
+
42 6.6974 ± 0.1888 0.02197 ± 0.00465 0.07030 ± 0.00225 9.542 ± 0.264 % 91.289 ± 0.273 %
|
| 241 |
+
43 6.5118 ± 0.1804 0.02501 ± 0.00474 0.07411 ± 0.00240 9.895 ± 0.265 % 91.272 ± 0.270 %
|
| 242 |
+
44 6.4561 ± 0.1760 0.02275 ± 0.00466 0.07302 ± 0.00235 9.810 ± 0.261 % 91.337 ± 0.266 %
|
| 243 |
+
45 6.5966 ± 0.1788 0.02239 ± 0.00460 0.07275 ± 0.00230 9.743 ± 0.258 % 91.268 ± 0.264 %
|
| 244 |
+
46 6.7369 ± 0.1810 0.02135 ± 0.00452 0.07203 ± 0.00226 9.657 ± 0.254 % 91.287 ± 0.260 %
|
| 245 |
+
47 6.8905 ± 0.1837 0.02169 ± 0.00444 0.07108 ± 0.00221 9.567 ± 0.251 % 91.289 ± 0.258 %
|
| 246 |
+
48 6.7733 ± 0.1777 0.02118 ± 0.00436 0.07001 ± 0.00216 9.489 ± 0.248 % 91.356 ± 0.254 %
|
| 247 |
+
49 6.8802 ± 0.1788 0.02182 ± 0.00442 0.07368 ± 0.00248 9.588 ± 0.249 % 91.180 ± 0.254 %
|
| 248 |
+
50 6.9898 ± 0.1810 0.02348 ± 0.00436 0.07309 ± 0.00244 9.520 ± 0.246 % 91.153 ± 0.252 %
|
| 249 |
+
51 7.0951 ± 0.1821 0.02303 ± 0.00429 0.07226 ± 0.00239 9.448 ± 0.243 % 91.226 ± 0.248 %
|
| 250 |
+
52 7.1588 ± 0.1817 0.02266 ± 0.00425 0.07241 ± 0.00235 9.413 ± 0.240 % 91.139 ± 0.247 %
|
| 251 |
+
53 7.2696 ± 0.1828 0.02223 ± 0.00419 0.07188 ± 0.00231 9.362 ± 0.238 % 91.165 ± 0.244 %
|
| 252 |
+
54 7.3205 ± 0.1819 0.02143 ± 0.00413 0.07113 ± 0.00227 9.296 ± 0.235 % 91.126 ± 0.242 %
|
| 253 |
+
55 7.3718 ± 0.1813 0.02139 ± 0.00407 0.07037 ± 0.00223 9.230 ± 0.232 % 91.123 ± 0.240 %
|
| 254 |
+
56 7.4098 ± 0.1807 0.02081 ± 0.00401 0.06958 ± 0.00219 9.161 ± 0.230 % 91.134 ± 0.238 %
|
| 255 |
+
57 7.4077 ± 0.1789 0.02057 ± 0.00399 0.06982 ± 0.00216 9.162 ± 0.227 % 91.111 ± 0.236 %
|
| 256 |
+
58 7.4170 ± 0.1776 0.02046 ± 0.00394 0.06947 ± 0.00213 9.135 ± 0.225 % 91.068 ± 0.235 %
|
| 257 |
+
59 7.3782 ± 0.1749 0.02034 ± 0.00389 0.06854 ± 0.00209 9.066 ± 0.223 % 91.127 ± 0.232 %
|
| 258 |
+
60 7.3919 ± 0.1738 0.02071 ± 0.00385 0.06819 ± 0.00206 9.021 ± 0.220 % 91.065 ± 0.231 %
|
| 259 |
+
61 7.4389 ± 0.1735 0.02042 ± 0.00381 0.06792 ± 0.00204 8.975 ± 0.218 % 91.077 ± 0.229 %
|
| 260 |
+
62 7.4065 ± 0.1716 0.01932 ± 0.00379 0.06752 ± 0.00201 8.945 ± 0.215 % 91.120 ± 0.226 %
|
| 261 |
+
63 7.4458 ± 0.1715 0.01829 ± 0.00376 0.06745 ± 0.00200 8.897 ± 0.213 % 91.111 ± 0.225 %
|
| 262 |
+
64 7.4245 ± 0.1693 0.01799 ± 0.00372 0.06711 ± 0.00197 8.856 ± 0.211 % 91.085 ± 0.223 %
|
| 263 |
+
65 7.4140 ± 0.1677 0.01776 ± 0.00368 0.06670 ± 0.00194 8.814 ± 0.209 % 91.095 ± 0.221 %
|
| 264 |
+
66 7.4541 ± 0.1676 0.01815 ± 0.00365 0.06649 ± 0.00191 8.782 ± 0.207 % 91.064 ± 0.220 %
|
| 265 |
+
67 7.4650 ± 0.1667 0.01866 ± 0.00361 0.06633 ± 0.00189 8.737 ± 0.205 % 91.062 ± 0.218 %
|
| 266 |
+
68 7.4235 ± 0.1643 0.01905 ± 0.00358 0.06578 ± 0.00186 8.684 ± 0.203 % 91.107 ± 0.216 %
|
| 267 |
+
69 7.4526 ± 0.1639 0.01852 ± 0.00355 0.06539 ± 0.00183 8.638 ± 0.201 % 91.134 ± 0.214 %
|
| 268 |
+
70 7.4195 ± 0.1617 0.01785 ± 0.00352 0.06516 ± 0.00181 8.619 ± 0.200 % 91.165 ± 0.212 %
|
| 269 |
+
71 7.4040 ± 0.1602 0.01775 ± 0.00348 0.06468 ± 0.00179 8.572 ± 0.198 % 91.196 ± 0.211 %
|
| 270 |
+
72 7.4269 ± 0.1599 0.01866 ± 0.00347 0.06440 ± 0.00177 8.539 ± 0.196 % 91.209 ± 0.209 %
|
| 271 |
+
73 7.4360 ± 0.1589 0.01916 ± 0.00344 0.06399 ± 0.00174 8.505 ± 0.195 % 91.201 ± 0.208 %
|
| 272 |
+
74 7.4282 ± 0.1576 0.01892 ± 0.00342 0.06397 ± 0.00172 8.477 ± 0.193 % 91.155 ± 0.207 %
|
| 273 |
+
75 7.4281 ± 0.1565 0.01800 ± 0.00340 0.06413 ± 0.00170 8.485 ± 0.191 % 91.085 ± 0.206 %
|
| 274 |
+
76 7.4928 ± 0.1571 0.01819 ± 0.00338 0.06404 ± 0.00168 8.474 ± 0.189 % 91.078 ± 0.205 %
|
| 275 |
+
77 7.4888 ± 0.1560 0.01793 ± 0.00335 0.06372 ± 0.00166 8.430 ± 0.187 % 91.123 ± 0.203 %
|
| 276 |
+
78 7.5014 ± 0.1553 0.01778 ± 0.00332 0.06354 ± 0.00164 8.418 ± 0.186 % 91.106 ± 0.202 %
|
| 277 |
+
79 7.5138 ± 0.1547 0.01798 ± 0.00330 0.06340 ± 0.00163 8.382 ± 0.185 % 91.119 ± 0.200 %
|
| 278 |
+
80 7.5177 ± 0.1543 0.01825 ± 0.00332 0.06355 ± 0.00161 8.369 ± 0.183 % 91.103 ± 0.199 %
|
| 279 |
+
81 7.4938 ± 0.1529 0.01817 ± 0.00329 0.06326 ± 0.00159 8.340 ± 0.181 % 91.126 ± 0.198 %
|
| 280 |
+
82 7.4784 ± 0.1514 0.01903 ± 0.00326 0.06290 ± 0.00158 8.308 ± 0.180 % 91.148 ± 0.196 %
|
| 281 |
+
83 7.5097 ± 0.1510 0.01889 ± 0.00323 0.06249 ± 0.00156 8.270 ± 0.179 % 91.146 ± 0.195 %
|
| 282 |
+
84 7.5267 ± 0.1501 0.01897 ± 0.00320 0.06207 ± 0.00154 8.236 ± 0.177 % 91.120 ± 0.194 %
|
| 283 |
+
85 7.5219 ± 0.1489 0.01889 ± 0.00316 0.06165 ± 0.00152 8.203 ± 0.176 % 91.114 ± 0.193 %
|
| 284 |
+
86 7.4555 ± 0.1462 0.01907 ± 0.00314 0.06135 ± 0.00151 8.177 ± 0.174 % 91.140 ± 0.192 %
|
| 285 |
+
87 7.3959 ± 0.1437 0.01884 ± 0.00311 0.06095 ± 0.00149 8.144 ± 0.173 % 91.161 ± 0.191 %
|
| 286 |
+
88 7.3366 ± 0.1413 0.01912 ± 0.00308 0.06064 ± 0.00147 8.127 ± 0.172 % 91.176 ± 0.189 %
|
| 287 |
+
89 7.2654 ± 0.1387 0.01934 ± 0.00306 0.06033 ± 0.00146 8.105 ± 0.171 % 91.201 ± 0.188 %
|
| 288 |
+
90 7.2089 ± 0.1365 0.01925 ± 0.00304 0.06001 ± 0.00144 8.073 ± 0.169 % 91.233 ± 0.187 %
|
| 289 |
+
91 7.1580 ± 0.1344 0.01926 ± 0.00301 0.05961 ± 0.00143 8.043 ± 0.168 % 91.261 ± 0.185 %
|
| 290 |
+
92 7.1016 ± 0.1322 0.01940 ± 0.00298 0.05932 ± 0.00141 8.025 ± 0.167 % 91.257 ± 0.184 %
|
| 291 |
+
93 7.1224 ± 0.1322 0.01983 ± 0.00299 0.05984 ± 0.00146 8.030 ± 0.166 % 91.238 ± 0.184 %
|
| 292 |
+
94 7.1537 ± 0.1320 0.01989 ± 0.00296 0.05947 ± 0.00145 7.998 ± 0.165 % 91.252 ± 0.182 %
|
| 293 |
+
95 7.2628 ± 0.1337 0.01983 ± 0.00295 0.05942 ± 0.00143 7.971 ± 0.164 % 91.212 ± 0.182 %
|
| 294 |
+
96 7.3545 ± 0.1348 0.01942 ± 0.00293 0.05946 ± 0.00142 7.943 ± 0.163 % 91.148 ± 0.182 %
|
| 295 |
+
97 7.4312 ± 0.1356 0.01913 ± 0.00290 0.05920 ± 0.00141 7.912 ± 0.162 % 91.110 ± 0.181 %
|
| 296 |
+
98 7.5710 ± 0.1380 0.01962 ± 0.00289 0.05893 ± 0.00140 7.877 ± 0.161 % 91.088 ± 0.180 %
|
| 297 |
+
99 7.6881 ± 0.1398 0.01941 ± 0.00287 0.05880 ± 0.00138 7.844 ± 0.160 % 91.075 ± 0.179 %
|
| 298 |
+
100 7.7218 ± 0.1397 0.01890 ± 0.00285 0.05885 ± 0.00138 7.823 ± 0.159 % 91.039 ± 0.179 %
|
| 299 |
+
101 7.7565 ± 0.1398 0.01855 ± 0.00283 0.05898 ± 0.00140 7.834 ± 0.159 % 91.023 ± 0.178 %
|
| 300 |
+
102 7.8185 ± 0.1408 0.01860 ± 0.00282 0.05893 ± 0.00139 7.824 ± 0.158 % 91.027 ± 0.177 %
|
| 301 |
+
103 7.7885 ± 0.1397 0.01814 ± 0.00281 0.05866 ± 0.00137 7.814 ± 0.157 % 91.041 ± 0.176 %
|
| 302 |
+
104 7.7306 ± 0.1378 0.01819 ± 0.00279 0.05888 ± 0.00138 7.867 ± 0.157 % 91.063 ± 0.175 %
|
| 303 |
+
105 7.6225 ± 0.1350 0.01861 ± 0.00280 0.05898 ± 0.00137 7.915 ± 0.157 % 91.104 ± 0.174 %
|
| 304 |
+
106 7.4921 ± 0.1317 0.01865 ± 0.00279 0.05887 ± 0.00137 7.955 ± 0.159 % 91.162 ± 0.173 %
|
| 305 |
+
107 7.5491 ± 0.1321 0.01851 ± 0.00276 0.05855 ± 0.00136 7.930 ± 0.158 % 91.156 ± 0.172 %
|
| 306 |
+
108 7.5605 ± 0.1316 0.01824 ± 0.00274 0.05822 ± 0.00134 7.903 ± 0.157 % 91.209 ± 0.171 %
|
| 307 |
+
109 7.5807 ± 0.1314 0.01818 ± 0.00273 0.05807 ± 0.00133 7.889 ± 0.156 % 91.225 ± 0.170 %
|
| 308 |
+
110 7.6159 ± 0.1315 0.01813 ± 0.00271 0.05781 ± 0.00132 7.868 ± 0.155 % 91.226 ± 0.169 %
|
| 309 |
+
111 7.6631 ± 0.1316 0.01799 ± 0.00269 0.05757 ± 0.00131 7.841 ± 0.154 % 91.231 ± 0.168 %
|
| 310 |
+
112 7.6708 ± 0.1311 0.01765 ± 0.00267 0.05727 ± 0.00130 7.817 ± 0.153 % 91.243 ± 0.167 %
|
| 311 |
+
113 7.6808 ± 0.1305 0.01758 ± 0.00265 0.05702 ± 0.00129 7.795 ± 0.152 % 91.261 ± 0.166 %
|
| 312 |
+
114 7.6985 ± 0.1303 0.01754 ± 0.00264 0.05682 ± 0.00128 7.771 ± 0.151 % 91.266 ± 0.166 %
|
| 313 |
+
115 7.6806 ± 0.1294 0.01756 ± 0.00263 0.05695 ± 0.00127 7.781 ± 0.150 % 91.263 ± 0.165 %
|
| 314 |
+
116 7.6792 ± 0.1289 0.01878 ± 0.00264 0.05840 ± 0.00129 7.903 ± 0.150 % 91.183 ± 0.165 %
|
| 315 |
+
117 7.5906 ± 0.1266 0.02074 ± 0.00267 0.06006 ± 0.00131 8.091 ± 0.150 % 91.145 ± 0.164 %
|
| 316 |
+
118 7.5026 ± 0.1243 0.02193 ± 0.00268 0.06154 ± 0.00133 8.247 ± 0.150 % 91.113 ± 0.164 %
|
| 317 |
+
119 7.4167 ± 0.1221 0.02386 ± 0.00271 0.06321 ± 0.00137 8.455 ± 0.151 % 91.112 ± 0.163 %
|
| 318 |
+
120 7.3420 ± 0.1201 0.02509 ± 0.00273 0.06466 ± 0.00138 8.638 ± 0.150 % 91.049 ± 0.163 %
|
| 319 |
+
121 7.2651 ± 0.1182 0.02620 ± 0.00273 0.06615 ± 0.00140 8.817 ± 0.151 % 91.006 ± 0.163 %
|
| 320 |
+
|
| 321 |
+
====== Perplexity statistics ======
|
| 322 |
+
Mean PPL(Q) : 7.265121 ± 0.118153
|
| 323 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 324 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.58%
|
| 325 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.026201 ± 0.002730
|
| 326 |
+
Mean PPL(Q)/PPL(base) : 1.026547 ± 0.002803
|
| 327 |
+
Mean PPL(Q)-PPL(base) : 0.187881 ± 0.019941
|
| 328 |
+
|
| 329 |
+
====== KL divergence statistics ======
|
| 330 |
+
Mean KLD: 0.066154 ± 0.001400
|
| 331 |
+
Maximum KLD: 10.726225
|
| 332 |
+
99.9% KLD: 3.200900
|
| 333 |
+
99.0% KLD: 1.036349
|
| 334 |
+
95.0% KLD: 0.242768
|
| 335 |
+
90.0% KLD: 0.117684
|
| 336 |
+
Median KLD: 0.015232
|
| 337 |
+
10.0% KLD: 0.000071
|
| 338 |
+
5.0% KLD: 0.000012
|
| 339 |
+
1.0% KLD: 0.000000
|
| 340 |
+
0.1% KLD: -0.000002
|
| 341 |
+
Minimum KLD: -0.000010
|
| 342 |
+
|
| 343 |
+
====== Token probability statistics ======
|
| 344 |
+
Mean Δp: -0.668 ± 0.050 %
|
| 345 |
+
Maximum Δp: 97.286%
|
| 346 |
+
99.9% Δp: 65.042%
|
| 347 |
+
99.0% Δp: 22.120%
|
| 348 |
+
95.0% Δp: 7.395%
|
| 349 |
+
90.0% Δp: 3.644%
|
| 350 |
+
75.0% Δp: 0.412%
|
| 351 |
+
Median Δp: -0.002%
|
| 352 |
+
25.0% Δp: -0.911%
|
| 353 |
+
10.0% Δp: -5.108%
|
| 354 |
+
5.0% Δp: -9.877%
|
| 355 |
+
1.0% Δp: -35.755%
|
| 356 |
+
0.1% Δp: -78.081%
|
| 357 |
+
Minimum Δp: -98.534%
|
| 358 |
+
RMS Δp : 8.817 ± 0.151 %
|
| 359 |
+
Same top p: 91.006 ± 0.163 %
|
| 360 |
+
|
| 361 |
+
llama_perf_context_print: load time = 51774.17 ms
|
| 362 |
+
llama_perf_context_print: prompt eval time = 96882.83 ms / 61952 tokens ( 1.56 ms per token, 639.45 tokens per second)
|
| 363 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 364 |
+
llama_perf_context_print: total time = 112647.78 ms / 61953 tokens
|
| 365 |
+
llama_perf_context_print: graphs reused = 0
|
| 366 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 367 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 857 + ( 22825 = 21173 + 160 + 1491) + 452 |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1147 + ( 22567 = 18561 + 832 + 3174) + 419 |
|
| 369 |
+
llama_memory_breakdown_print: | - Host | 136180 = 136076 + 0 + 104 |
|
| 370 |
+
```
|
kld_data/unsloth/Q4_K_M/MiniMax-M2.5-Q4_K_M.md
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q4_K_M (unsloth)
|
| 2 |
+
|
| 3 |
+
128.83 GiB (4.84 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q4_K_M/MiniMax-M2.5-Q4_K_M-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 69036 used, -45164 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 68122 used, -44250 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 137158 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 91462 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37646 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 8564 MiB used, 15306 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1644 MiB used, 22226 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 10 layers ( 1 overflowing), 22297 MiB used, 1573 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 53 layers (46 overflowing), 22334 MiB used, 1537 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.09 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q4_K_M/MiniMax-M2.5-Q4_K_M-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 15
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 375 tensors
|
| 87 |
+
llama_model_loader: - type q6_K: 61 tensors
|
| 88 |
+
print_info: file format = GGUF V3 (latest)
|
| 89 |
+
print_info: file type = Q4_K - Medium
|
| 90 |
+
print_info: file size = 128.83 GiB (4.84 BPW)
|
| 91 |
+
load: 0 unused tokens
|
| 92 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 93 |
+
load: printing all EOG tokens:
|
| 94 |
+
load: - 200004 ('<fim_pad>')
|
| 95 |
+
load: - 200005 ('<reponame>')
|
| 96 |
+
load: - 200020 ('[e~[')
|
| 97 |
+
load: special tokens cache size = 54
|
| 98 |
+
load: token to piece cache size = 1.3355 MB
|
| 99 |
+
print_info: arch = minimax-m2
|
| 100 |
+
print_info: vocab_only = 0
|
| 101 |
+
print_info: no_alloc = 0
|
| 102 |
+
print_info: n_ctx_train = 196608
|
| 103 |
+
print_info: n_embd = 3072
|
| 104 |
+
print_info: n_embd_inp = 3072
|
| 105 |
+
print_info: n_layer = 62
|
| 106 |
+
print_info: n_head = 48
|
| 107 |
+
print_info: n_head_kv = 8
|
| 108 |
+
print_info: n_rot = 64
|
| 109 |
+
print_info: n_swa = 0
|
| 110 |
+
print_info: is_swa_any = 0
|
| 111 |
+
print_info: n_embd_head_k = 128
|
| 112 |
+
print_info: n_embd_head_v = 128
|
| 113 |
+
print_info: n_gqa = 6
|
| 114 |
+
print_info: n_embd_k_gqa = 1024
|
| 115 |
+
print_info: n_embd_v_gqa = 1024
|
| 116 |
+
print_info: f_norm_eps = 0.0e+00
|
| 117 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 118 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 119 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 120 |
+
print_info: f_logit_scale = 0.0e+00
|
| 121 |
+
print_info: f_attn_scale = 0.0e+00
|
| 122 |
+
print_info: n_ff = 1536
|
| 123 |
+
print_info: n_expert = 256
|
| 124 |
+
print_info: n_expert_used = 8
|
| 125 |
+
print_info: n_expert_groups = 0
|
| 126 |
+
print_info: n_group_used = 0
|
| 127 |
+
print_info: causal attn = 1
|
| 128 |
+
print_info: pooling type = 0
|
| 129 |
+
print_info: rope type = 2
|
| 130 |
+
print_info: rope scaling = linear
|
| 131 |
+
print_info: freq_base_train = 5000000.0
|
| 132 |
+
print_info: freq_scale_train = 1
|
| 133 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 134 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 135 |
+
print_info: rope_finetuned = unknown
|
| 136 |
+
print_info: model type = 230B.A10B
|
| 137 |
+
print_info: model params = 228.69 B
|
| 138 |
+
print_info: general.name = Minimax-M2.5
|
| 139 |
+
print_info: vocab type = BPE
|
| 140 |
+
print_info: n_vocab = 200064
|
| 141 |
+
print_info: n_merges = 199744
|
| 142 |
+
print_info: BOS token = 200034 ']~!b['
|
| 143 |
+
print_info: EOS token = 200020 '[e~['
|
| 144 |
+
print_info: UNK token = 200021 ']!d~['
|
| 145 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 146 |
+
print_info: LF token = 10 'Ċ'
|
| 147 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 148 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 149 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 150 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 151 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 152 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200020 '[e~['
|
| 155 |
+
print_info: max token length = 256
|
| 156 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 157 |
+
load_tensors: offloading output layer to GPU
|
| 158 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 159 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 46606.85 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 47406.97 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 37425.01 MiB
|
| 163 |
+
load_tensors: CUDA0 model buffer size = 20492.96 MiB
|
| 164 |
+
load_tensors: CUDA1 model buffer size = 18328.02 MiB
|
| 165 |
+
....................................................................................................
|
| 166 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 167 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 168 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 169 |
+
llama_context: constructing llama_context
|
| 170 |
+
llama_context: n_seq_max = 8
|
| 171 |
+
llama_context: n_ctx = 4096
|
| 172 |
+
llama_context: n_ctx_seq = 512
|
| 173 |
+
llama_context: n_batch = 4096
|
| 174 |
+
llama_context: n_ubatch = 4096
|
| 175 |
+
llama_context: causal_attn = 1
|
| 176 |
+
llama_context: flash_attn = enabled
|
| 177 |
+
llama_context: kv_unified = false
|
| 178 |
+
llama_context: freq_base = 5000000.0
|
| 179 |
+
llama_context: freq_scale = 1
|
| 180 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 181 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 182 |
+
llama_kv_cache: CUDA0 KV buffer size = 160.00 MiB
|
| 183 |
+
llama_kv_cache: CUDA1 KV buffer size = 832.00 MiB
|
| 184 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 185 |
+
sched_reserve: reserving ...
|
| 186 |
+
sched_reserve: CUDA0 compute buffer size = 1645.00 MiB
|
| 187 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 188 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 189 |
+
sched_reserve: graph nodes = 4099
|
| 190 |
+
sched_reserve: graph splits = 189 (with bs=4096), 101 (with bs=1)
|
| 191 |
+
sched_reserve: reserve took 22.77 ms, sched copies = 1
|
| 192 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 193 |
+
|
| 194 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 195 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 196 |
+
kl_divergence: 6.78 seconds per pass - ETA 1.70 minutes
|
| 197 |
+
|
| 198 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 199 |
+
1 6.3550 ± 1.2163 -0.00282 ± 0.01855 0.02818 ± 0.00289 5.919 ± 0.628 % 92.549 ± 1.648 %
|
| 200 |
+
2 4.7048 ± 0.5651 0.00638 ± 0.01137 0.01951 ± 0.00163 4.462 ± 0.426 % 94.314 ± 1.026 %
|
| 201 |
+
3 4.5286 ± 0.4495 0.00893 ± 0.01076 0.02395 ± 0.00193 5.446 ± 0.427 % 94.248 ± 0.842 %
|
| 202 |
+
4 5.1353 ± 0.4550 0.00136 ± 0.00988 0.02694 ± 0.00207 5.626 ± 0.442 % 93.725 ± 0.760 %
|
| 203 |
+
5 4.9426 ± 0.3928 0.00968 ± 0.01007 0.02756 ± 0.00201 5.580 ± 0.391 % 93.804 ± 0.675 %
|
| 204 |
+
6 6.0541 ± 0.4731 0.00956 ± 0.00946 0.03093 ± 0.00183 5.471 ± 0.344 % 93.399 ± 0.635 %
|
| 205 |
+
7 5.6243 ± 0.3929 0.00776 ± 0.00929 0.03485 ± 0.00204 6.086 ± 0.387 % 93.109 ± 0.600 %
|
| 206 |
+
8 6.3735 ± 0.4240 0.00930 ± 0.00849 0.03417 ± 0.00182 5.834 ± 0.354 % 92.843 ± 0.571 %
|
| 207 |
+
9 6.2566 ± 0.3881 0.01007 ± 0.00777 0.03289 ± 0.00165 5.721 ± 0.330 % 92.898 ± 0.536 %
|
| 208 |
+
10 5.7259 ± 0.3311 0.01048 ± 0.00721 0.03157 ± 0.00150 5.693 ± 0.302 % 92.745 ± 0.514 %
|
| 209 |
+
11 6.2689 ± 0.3506 0.00926 ± 0.00684 0.03214 ± 0.00140 5.604 ± 0.282 % 92.585 ± 0.495 %
|
| 210 |
+
12 6.9444 ± 0.3775 0.01010 ± 0.00651 0.03410 ± 0.00183 5.585 ± 0.279 % 92.190 ± 0.485 %
|
| 211 |
+
13 7.2100 ± 0.3732 0.01036 ± 0.00611 0.03317 ± 0.00170 5.445 ± 0.265 % 92.187 ± 0.466 %
|
| 212 |
+
14 7.7819 ± 0.3929 0.01145 ± 0.00595 0.03312 ± 0.00164 5.319 ± 0.252 % 92.045 ± 0.453 %
|
| 213 |
+
15 8.1504 ± 0.3980 0.01159 ± 0.00572 0.03294 ± 0.00154 5.312 ± 0.239 % 91.974 ± 0.439 %
|
| 214 |
+
16 8.3803 ± 0.3953 0.00721 ± 0.00551 0.03221 ± 0.00144 5.215 ± 0.229 % 92.108 ± 0.422 %
|
| 215 |
+
17 8.6080 ± 0.3964 0.00690 ± 0.00537 0.03322 ± 0.00141 5.134 ± 0.220 % 92.065 ± 0.411 %
|
| 216 |
+
18 8.1197 ± 0.3608 0.00772 ± 0.00522 0.03343 ± 0.00138 5.104 ± 0.214 % 92.244 ± 0.395 %
|
| 217 |
+
19 8.2531 ± 0.3566 0.00822 ± 0.00504 0.03264 ± 0.00131 5.090 ± 0.205 % 92.301 ± 0.383 %
|
| 218 |
+
20 8.3060 ± 0.3499 0.00674 ± 0.00497 0.03345 ± 0.00127 5.114 ± 0.198 % 92.255 ± 0.374 %
|
| 219 |
+
21 8.2768 ± 0.3400 0.00579 ± 0.00481 0.03352 ± 0.00127 5.125 ± 0.195 % 92.362 ± 0.363 %
|
| 220 |
+
22 8.5932 ± 0.3482 0.00563 ± 0.00471 0.03409 ± 0.00124 5.147 ± 0.188 % 92.139 ± 0.359 %
|
| 221 |
+
23 8.5924 ± 0.3414 0.00433 ± 0.00480 0.03619 ± 0.00134 5.277 ± 0.188 % 92.123 ± 0.352 %
|
| 222 |
+
24 8.9860 ± 0.3513 0.00305 ± 0.00466 0.03592 ± 0.00129 5.197 ± 0.183 % 92.092 ± 0.345 %
|
| 223 |
+
25 8.9659 ± 0.3441 0.00221 ± 0.00459 0.03690 ± 0.00128 5.320 ± 0.180 % 92.047 ± 0.339 %
|
| 224 |
+
26 8.3848 ± 0.3123 0.00380 ± 0.00479 0.04158 ± 0.00160 6.317 ± 0.264 % 91.961 ± 0.334 %
|
| 225 |
+
27 7.9596 ± 0.2884 0.00569 ± 0.00492 0.04906 ± 0.00205 7.193 ± 0.281 % 91.779 ± 0.331 %
|
| 226 |
+
28 8.0894 ± 0.2887 0.00819 ± 0.00484 0.04914 ± 0.00199 7.155 ± 0.274 % 91.737 ± 0.326 %
|
| 227 |
+
29 8.0068 ± 0.2805 0.00740 ± 0.00475 0.04857 ± 0.00192 7.072 ± 0.267 % 91.832 ± 0.318 %
|
| 228 |
+
30 7.4909 ± 0.2558 0.00800 ± 0.00464 0.04723 ± 0.00187 6.988 ± 0.262 % 92.078 ± 0.309 %
|
| 229 |
+
31 7.0605 ± 0.2351 0.00818 ± 0.00455 0.04653 ± 0.00182 6.985 ± 0.258 % 92.258 ± 0.301 %
|
| 230 |
+
32 6.8836 ± 0.2237 0.00806 ± 0.00445 0.04588 ± 0.00177 6.974 ± 0.251 % 92.292 ± 0.295 %
|
| 231 |
+
33 6.7520 ± 0.2146 0.00864 ± 0.00437 0.04517 ± 0.00172 6.928 ± 0.245 % 92.276 ± 0.291 %
|
| 232 |
+
34 6.9362 ± 0.2184 0.00854 ± 0.00435 0.04581 ± 0.00168 6.909 ± 0.239 % 92.168 ± 0.289 %
|
| 233 |
+
35 7.0443 ± 0.2205 0.00937 ± 0.00430 0.04684 ± 0.00166 7.025 ± 0.236 % 91.989 ± 0.287 %
|
| 234 |
+
36 7.1012 ± 0.2200 0.00853 ± 0.00425 0.04657 ± 0.00162 6.984 ± 0.231 % 91.961 ± 0.284 %
|
| 235 |
+
37 7.1224 ± 0.2180 0.00922 ± 0.00429 0.04851 ± 0.00173 7.228 ± 0.241 % 91.913 ± 0.281 %
|
| 236 |
+
38 7.3405 ± 0.2228 0.00965 ± 0.00422 0.04837 ± 0.00169 7.195 ± 0.237 % 91.909 ± 0.277 %
|
| 237 |
+
39 7.2899 ± 0.2180 0.00967 ± 0.00417 0.04943 ± 0.00169 7.317 ± 0.235 % 91.865 ± 0.274 %
|
| 238 |
+
40 7.0586 ± 0.2069 0.01236 ± 0.00428 0.05402 ± 0.00181 7.886 ± 0.241 % 91.696 ± 0.273 %
|
| 239 |
+
41 6.8622 ± 0.1974 0.01496 ± 0.00442 0.05985 ± 0.00209 8.515 ± 0.249 % 91.516 ± 0.273 %
|
| 240 |
+
42 6.6717 ± 0.1885 0.01814 ± 0.00453 0.06381 ± 0.00216 8.983 ± 0.252 % 91.447 ± 0.270 %
|
| 241 |
+
43 6.4769 ± 0.1798 0.01963 ± 0.00457 0.06734 ± 0.00230 9.244 ± 0.252 % 91.427 ± 0.267 %
|
| 242 |
+
44 6.4228 ± 0.1754 0.01758 ± 0.00450 0.06646 ± 0.00225 9.169 ± 0.248 % 91.453 ± 0.264 %
|
| 243 |
+
45 6.5594 ± 0.1781 0.01673 ± 0.00446 0.06645 ± 0.00220 9.104 ± 0.245 % 91.425 ± 0.261 %
|
| 244 |
+
46 6.7005 ± 0.1803 0.01594 ± 0.00438 0.06575 ± 0.00216 9.020 ± 0.241 % 91.407 ± 0.259 %
|
| 245 |
+
47 6.8519 ± 0.1829 0.01607 ± 0.00430 0.06486 ± 0.00211 8.938 ± 0.239 % 91.389 ± 0.256 %
|
| 246 |
+
48 6.7384 ± 0.1770 0.01602 ± 0.00422 0.06393 ± 0.00207 8.866 ± 0.236 % 91.462 ± 0.253 %
|
| 247 |
+
49 6.8326 ± 0.1775 0.01488 ± 0.00426 0.06752 ± 0.00263 8.908 ± 0.236 % 91.309 ± 0.252 %
|
| 248 |
+
50 6.9401 ± 0.1796 0.01634 ± 0.00421 0.06711 ± 0.00258 8.852 ± 0.233 % 91.239 ± 0.250 %
|
| 249 |
+
51 7.0484 ± 0.1809 0.01642 ± 0.00414 0.06636 ± 0.00253 8.785 ± 0.230 % 91.303 ± 0.247 %
|
| 250 |
+
52 7.1116 ± 0.1806 0.01605 ± 0.00410 0.06649 ± 0.00248 8.749 ± 0.227 % 91.259 ± 0.245 %
|
| 251 |
+
53 7.2243 ± 0.1818 0.01598 ± 0.00404 0.06587 ± 0.00244 8.692 ± 0.225 % 91.254 ± 0.243 %
|
| 252 |
+
54 7.2760 ± 0.1810 0.01534 ± 0.00398 0.06519 ± 0.00239 8.630 ± 0.222 % 91.220 ± 0.241 %
|
| 253 |
+
55 7.3283 ± 0.1804 0.01547 ± 0.00392 0.06452 ± 0.00235 8.569 ± 0.220 % 91.251 ± 0.239 %
|
| 254 |
+
56 7.3658 ± 0.1797 0.01484 ± 0.00386 0.06386 ± 0.00231 8.509 ± 0.217 % 91.275 ± 0.236 %
|
| 255 |
+
57 7.3704 ± 0.1781 0.01552 ± 0.00384 0.06389 ± 0.00228 8.491 ± 0.214 % 91.242 ± 0.234 %
|
| 256 |
+
58 7.3760 ± 0.1767 0.01491 ± 0.00380 0.06348 ± 0.00224 8.466 ± 0.212 % 91.237 ± 0.233 %
|
| 257 |
+
59 7.3359 ± 0.1740 0.01459 ± 0.00374 0.06262 ± 0.00220 8.405 ± 0.210 % 91.293 ± 0.230 %
|
| 258 |
+
60 7.3465 ± 0.1728 0.01456 ± 0.00369 0.06218 ± 0.00217 8.359 ± 0.208 % 91.288 ± 0.228 %
|
| 259 |
+
61 7.3938 ± 0.1725 0.01433 ± 0.00365 0.06183 ± 0.00213 8.316 ± 0.205 % 91.308 ± 0.226 %
|
| 260 |
+
62 7.3650 ± 0.1706 0.01370 ± 0.00362 0.06136 ± 0.00210 8.286 ± 0.203 % 91.360 ± 0.223 %
|
| 261 |
+
63 7.4113 ± 0.1709 0.01364 ± 0.00360 0.06122 ± 0.00207 8.240 ± 0.201 % 91.348 ± 0.222 %
|
| 262 |
+
64 7.3895 ± 0.1686 0.01327 ± 0.00356 0.06094 ± 0.00204 8.200 ± 0.199 % 91.373 ± 0.220 %
|
| 263 |
+
65 7.3826 ± 0.1672 0.01351 ± 0.00353 0.06070 ± 0.00201 8.166 ± 0.197 % 91.403 ± 0.218 %
|
| 264 |
+
66 7.4184 ± 0.1669 0.01335 ± 0.00350 0.06044 ± 0.00199 8.144 ± 0.195 % 91.355 ± 0.217 %
|
| 265 |
+
67 7.4318 ± 0.1661 0.01420 ± 0.00350 0.06056 ± 0.00197 8.116 ± 0.193 % 91.384 ± 0.215 %
|
| 266 |
+
68 7.3868 ± 0.1636 0.01410 ± 0.00347 0.06006 ± 0.00194 8.068 ± 0.191 % 91.419 ± 0.213 %
|
| 267 |
+
69 7.4199 ± 0.1632 0.01411 ± 0.00343 0.05977 ± 0.00191 8.032 ± 0.189 % 91.458 ± 0.211 %
|
| 268 |
+
70 7.3862 ± 0.1610 0.01336 ± 0.00340 0.05945 ± 0.00189 8.001 ± 0.187 % 91.501 ± 0.209 %
|
| 269 |
+
71 7.3705 ± 0.1596 0.01322 ± 0.00336 0.05910 ± 0.00186 7.971 ± 0.186 % 91.527 ± 0.207 %
|
| 270 |
+
72 7.3924 ± 0.1592 0.01401 ± 0.00336 0.05889 ± 0.00184 7.936 ± 0.184 % 91.503 ± 0.206 %
|
| 271 |
+
73 7.4022 ± 0.1582 0.01459 ± 0.00333 0.05864 ± 0.00181 7.912 ± 0.183 % 91.507 ± 0.204 %
|
| 272 |
+
74 7.3956 ± 0.1569 0.01453 ± 0.00331 0.05861 ± 0.00179 7.885 ± 0.181 % 91.479 ± 0.203 %
|
| 273 |
+
75 7.4004 ± 0.1560 0.01427 ± 0.00329 0.05856 ± 0.00177 7.874 ± 0.179 % 91.441 ± 0.202 %
|
| 274 |
+
76 7.4648 ± 0.1565 0.01445 ± 0.00326 0.05840 ± 0.00175 7.861 ± 0.177 % 91.424 ± 0.201 %
|
| 275 |
+
77 7.4607 ± 0.1554 0.01417 ± 0.00323 0.05813 ± 0.00173 7.825 ± 0.176 % 91.464 ± 0.199 %
|
| 276 |
+
78 7.4735 ± 0.1548 0.01405 ± 0.00321 0.05805 ± 0.00171 7.813 ± 0.174 % 91.468 ± 0.198 %
|
| 277 |
+
79 7.4856 ± 0.1542 0.01421 ± 0.00319 0.05782 ± 0.00169 7.783 ± 0.173 % 91.482 ± 0.197 %
|
| 278 |
+
80 7.4923 ± 0.1538 0.01486 ± 0.00320 0.05818 ± 0.00168 7.773 ± 0.171 % 91.475 ± 0.196 %
|
| 279 |
+
81 7.4649 ± 0.1523 0.01429 ± 0.00317 0.05796 ± 0.00166 7.750 ± 0.170 % 91.484 ± 0.194 %
|
| 280 |
+
82 7.4473 ± 0.1508 0.01486 ± 0.00314 0.05761 ± 0.00164 7.721 ± 0.169 % 91.492 ± 0.193 %
|
| 281 |
+
83 7.4791 ± 0.1503 0.01480 ± 0.00311 0.05724 ± 0.00162 7.685 ± 0.167 % 91.505 ± 0.192 %
|
| 282 |
+
84 7.4966 ± 0.1495 0.01496 ± 0.00308 0.05686 ± 0.00160 7.654 ± 0.166 % 91.499 ± 0.191 %
|
| 283 |
+
85 7.4911 ± 0.1482 0.01479 ± 0.00305 0.05647 ± 0.00158 7.621 ± 0.165 % 91.506 ± 0.189 %
|
| 284 |
+
86 7.4251 ± 0.1456 0.01497 ± 0.00303 0.05609 ± 0.00156 7.595 ± 0.164 % 91.500 ± 0.188 %
|
| 285 |
+
87 7.3650 ± 0.1431 0.01465 ± 0.00300 0.05575 ± 0.00155 7.567 ± 0.162 % 91.517 ± 0.187 %
|
| 286 |
+
88 7.3062 ± 0.1408 0.01497 ± 0.00297 0.05542 ± 0.00153 7.538 ± 0.161 % 91.529 ± 0.186 %
|
| 287 |
+
89 7.2350 ± 0.1381 0.01515 ± 0.00295 0.05511 ± 0.00151 7.517 ± 0.160 % 91.558 ± 0.185 %
|
| 288 |
+
90 7.1804 ± 0.1360 0.01528 ± 0.00293 0.05478 ± 0.00150 7.487 ± 0.159 % 91.595 ± 0.183 %
|
| 289 |
+
91 7.1287 ± 0.1339 0.01516 ± 0.00290 0.05439 ± 0.00148 7.459 ± 0.158 % 91.640 ± 0.182 %
|
| 290 |
+
92 7.0703 ± 0.1317 0.01500 ± 0.00287 0.05404 ± 0.00146 7.434 ± 0.156 % 91.645 ± 0.181 %
|
| 291 |
+
93 7.0924 ± 0.1316 0.01561 ± 0.00287 0.05452 ± 0.00148 7.488 ± 0.159 % 91.609 ± 0.180 %
|
| 292 |
+
94 7.1240 ± 0.1315 0.01572 ± 0.00285 0.05417 ± 0.00146 7.457 ± 0.158 % 91.619 ± 0.179 %
|
| 293 |
+
95 7.2334 ± 0.1331 0.01577 ± 0.00283 0.05402 ± 0.00145 7.431 ± 0.157 % 91.571 ± 0.179 %
|
| 294 |
+
96 7.3243 ± 0.1342 0.01530 ± 0.00281 0.05406 ± 0.00144 7.404 ± 0.155 % 91.532 ± 0.178 %
|
| 295 |
+
97 7.3990 ± 0.1349 0.01479 ± 0.00279 0.05383 ± 0.00143 7.376 ± 0.154 % 91.510 ± 0.177 %
|
| 296 |
+
98 7.5390 ± 0.1374 0.01538 ± 0.00278 0.05365 ± 0.00141 7.346 ± 0.154 % 91.477 ± 0.177 %
|
| 297 |
+
99 7.6568 ± 0.1392 0.01533 ± 0.00276 0.05353 ± 0.00140 7.316 ± 0.153 % 91.428 ± 0.176 %
|
| 298 |
+
100 7.6914 ± 0.1391 0.01495 ± 0.00274 0.05347 ± 0.00139 7.300 ± 0.152 % 91.404 ± 0.176 %
|
| 299 |
+
101 7.7275 ± 0.1393 0.01479 ± 0.00273 0.05384 ± 0.00146 7.324 ± 0.153 % 91.396 ± 0.175 %
|
| 300 |
+
102 7.7897 ± 0.1402 0.01492 ± 0.00272 0.05376 ± 0.00145 7.313 ± 0.152 % 91.376 ± 0.174 %
|
| 301 |
+
103 7.7605 ± 0.1392 0.01453 ± 0.00270 0.05349 ± 0.00144 7.311 ± 0.151 % 91.388 ± 0.173 %
|
| 302 |
+
104 7.7043 ± 0.1373 0.01478 ± 0.00269 0.05364 ± 0.00143 7.364 ± 0.151 % 91.403 ± 0.172 %
|
| 303 |
+
105 7.5940 ± 0.1345 0.01487 ± 0.00269 0.05374 ± 0.00142 7.406 ± 0.151 % 91.436 ± 0.171 %
|
| 304 |
+
106 7.4634 ± 0.1312 0.01482 ± 0.00268 0.05362 ± 0.00141 7.453 ± 0.152 % 91.491 ± 0.170 %
|
| 305 |
+
107 7.5206 ± 0.1316 0.01473 ± 0.00266 0.05335 ± 0.00140 7.432 ± 0.151 % 91.482 ± 0.169 %
|
| 306 |
+
108 7.5321 ± 0.1311 0.01447 ± 0.00264 0.05304 ± 0.00139 7.405 ± 0.150 % 91.503 ± 0.168 %
|
| 307 |
+
109 7.5540 ± 0.1310 0.01465 ± 0.00262 0.05285 ± 0.00138 7.386 ± 0.149 % 91.509 ± 0.167 %
|
| 308 |
+
110 7.5891 ± 0.1310 0.01459 ± 0.00260 0.05261 ± 0.00136 7.365 ± 0.148 % 91.512 ± 0.166 %
|
| 309 |
+
111 7.6357 ± 0.1311 0.01440 ± 0.00259 0.05240 ± 0.00135 7.339 ± 0.148 % 91.500 ± 0.166 %
|
| 310 |
+
112 7.6427 ± 0.1306 0.01398 ± 0.00257 0.05213 ± 0.00134 7.316 ± 0.147 % 91.520 ± 0.165 %
|
| 311 |
+
113 7.6506 ± 0.1300 0.01364 ± 0.00255 0.05190 ± 0.00133 7.295 ± 0.146 % 91.522 ± 0.164 %
|
| 312 |
+
114 7.6663 ± 0.1298 0.01335 ± 0.00254 0.05170 ± 0.00132 7.270 ± 0.145 % 91.514 ± 0.163 %
|
| 313 |
+
115 7.6497 ± 0.1288 0.01354 ± 0.00253 0.05179 ± 0.00131 7.289 ± 0.144 % 91.529 ± 0.163 %
|
| 314 |
+
116 7.6480 ± 0.1283 0.01470 ± 0.00254 0.05302 ± 0.00132 7.416 ± 0.144 % 91.474 ± 0.162 %
|
| 315 |
+
117 7.5554 ± 0.1259 0.01608 ± 0.00255 0.05442 ± 0.00133 7.574 ± 0.145 % 91.446 ± 0.162 %
|
| 316 |
+
118 7.4655 ± 0.1236 0.01698 ± 0.00255 0.05579 ± 0.00135 7.752 ± 0.146 % 91.416 ± 0.161 %
|
| 317 |
+
119 7.3776 ± 0.1214 0.01857 ± 0.00257 0.05696 ± 0.00136 7.927 ± 0.147 % 91.402 ± 0.161 %
|
| 318 |
+
120 7.3005 ± 0.1193 0.01942 ± 0.00257 0.05828 ± 0.00137 8.096 ± 0.147 % 91.366 ± 0.161 %
|
| 319 |
+
121 7.2244 ± 0.1174 0.02058 ± 0.00259 0.05940 ± 0.00138 8.247 ± 0.148 % 91.369 ± 0.160 %
|
| 320 |
+
|
| 321 |
+
====== Perplexity statistics ======
|
| 322 |
+
Mean PPL(Q) : 7.224376 ± 0.117420
|
| 323 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 324 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.73%
|
| 325 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.020577 ± 0.002586
|
| 326 |
+
Mean PPL(Q)/PPL(base) : 1.020790 ± 0.002639
|
| 327 |
+
Mean PPL(Q)-PPL(base) : 0.147136 ± 0.018739
|
| 328 |
+
|
| 329 |
+
====== KL divergence statistics ======
|
| 330 |
+
Mean KLD: 0.059396 ± 0.001379
|
| 331 |
+
Maximum KLD: 13.867476
|
| 332 |
+
99.9% KLD: 2.728867
|
| 333 |
+
99.0% KLD: 0.916992
|
| 334 |
+
95.0% KLD: 0.209868
|
| 335 |
+
90.0% KLD: 0.106275
|
| 336 |
+
Median KLD: 0.013731
|
| 337 |
+
10.0% KLD: 0.000063
|
| 338 |
+
5.0% KLD: 0.000011
|
| 339 |
+
1.0% KLD: 0.000000
|
| 340 |
+
0.1% KLD: -0.000002
|
| 341 |
+
Minimum KLD: -0.000008
|
| 342 |
+
|
| 343 |
+
====== Token probability statistics ======
|
| 344 |
+
Mean Δp: -0.495 ± 0.047 %
|
| 345 |
+
Maximum Δp: 98.983%
|
| 346 |
+
99.9% Δp: 62.163%
|
| 347 |
+
99.0% Δp: 21.613%
|
| 348 |
+
95.0% Δp: 7.080%
|
| 349 |
+
90.0% Δp: 3.568%
|
| 350 |
+
75.0% Δp: 0.448%
|
| 351 |
+
Median Δp: -0.001%
|
| 352 |
+
25.0% Δp: -0.802%
|
| 353 |
+
10.0% Δp: -4.696%
|
| 354 |
+
5.0% Δp: -9.330%
|
| 355 |
+
1.0% Δp: -30.774%
|
| 356 |
+
0.1% Δp: -76.276%
|
| 357 |
+
Minimum Δp: -99.429%
|
| 358 |
+
RMS Δp : 8.247 ± 0.148 %
|
| 359 |
+
Same top p: 91.369 ± 0.160 %
|
| 360 |
+
|
| 361 |
+
llama_perf_context_print: load time = 47320.88 ms
|
| 362 |
+
llama_perf_context_print: prompt eval time = 94835.36 ms / 61952 tokens ( 1.53 ms per token, 653.26 tokens per second)
|
| 363 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 364 |
+
llama_perf_context_print: total time = 110650.19 ms / 61953 tokens
|
| 365 |
+
llama_perf_context_print: graphs reused = 0
|
| 366 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 367 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1383 + ( 22297 = 20492 + 160 + 1644) + 453 |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1347 + ( 22334 = 18328 + 832 + 3174) + 453 |
|
| 369 |
+
llama_memory_breakdown_print: | - Host | 131542 = 131438 + 0 + 104 |
|
| 370 |
+
```
|
kld_data/unsloth/Q4_K_S/MiniMax-M2.5-Q4_K_S.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q4_K_S (unsloth)
|
| 2 |
+
|
| 3 |
+
121.10 GiB (4.55 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q4_K_S/MiniMax-M2.5-Q4_K_S-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 65579 used, -41707 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 63655 used, -39783 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 129234 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 83539 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37821 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 8390 MiB used, 15481 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1491 MiB used, 22379 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 11 layers ( 1 overflowing), 22263 MiB used, 1607 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 52 layers (45 overflowing), 22682 MiB used, 1189 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.08 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q4_K_S/MiniMax-M2.5-Q4_K_S-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 14
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 424 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 11 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 89 |
+
print_info: file format = GGUF V3 (latest)
|
| 90 |
+
print_info: file type = Q4_K - Small
|
| 91 |
+
print_info: file size = 121.10 GiB (4.55 BPW)
|
| 92 |
+
load: 0 unused tokens
|
| 93 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 94 |
+
load: printing all EOG tokens:
|
| 95 |
+
load: - 200004 ('<fim_pad>')
|
| 96 |
+
load: - 200005 ('<reponame>')
|
| 97 |
+
load: - 200020 ('[e~[')
|
| 98 |
+
load: special tokens cache size = 54
|
| 99 |
+
load: token to piece cache size = 1.3355 MB
|
| 100 |
+
print_info: arch = minimax-m2
|
| 101 |
+
print_info: vocab_only = 0
|
| 102 |
+
print_info: no_alloc = 0
|
| 103 |
+
print_info: n_ctx_train = 196608
|
| 104 |
+
print_info: n_embd = 3072
|
| 105 |
+
print_info: n_embd_inp = 3072
|
| 106 |
+
print_info: n_layer = 62
|
| 107 |
+
print_info: n_head = 48
|
| 108 |
+
print_info: n_head_kv = 8
|
| 109 |
+
print_info: n_rot = 64
|
| 110 |
+
print_info: n_swa = 0
|
| 111 |
+
print_info: is_swa_any = 0
|
| 112 |
+
print_info: n_embd_head_k = 128
|
| 113 |
+
print_info: n_embd_head_v = 128
|
| 114 |
+
print_info: n_gqa = 6
|
| 115 |
+
print_info: n_embd_k_gqa = 1024
|
| 116 |
+
print_info: n_embd_v_gqa = 1024
|
| 117 |
+
print_info: f_norm_eps = 0.0e+00
|
| 118 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 119 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 120 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 121 |
+
print_info: f_logit_scale = 0.0e+00
|
| 122 |
+
print_info: f_attn_scale = 0.0e+00
|
| 123 |
+
print_info: n_ff = 1536
|
| 124 |
+
print_info: n_expert = 256
|
| 125 |
+
print_info: n_expert_used = 8
|
| 126 |
+
print_info: n_expert_groups = 0
|
| 127 |
+
print_info: n_group_used = 0
|
| 128 |
+
print_info: causal attn = 1
|
| 129 |
+
print_info: pooling type = 0
|
| 130 |
+
print_info: rope type = 2
|
| 131 |
+
print_info: rope scaling = linear
|
| 132 |
+
print_info: freq_base_train = 5000000.0
|
| 133 |
+
print_info: freq_scale_train = 1
|
| 134 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 135 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 136 |
+
print_info: rope_finetuned = unknown
|
| 137 |
+
print_info: model type = 230B.A10B
|
| 138 |
+
print_info: model params = 228.69 B
|
| 139 |
+
print_info: general.name = Minimax-M2.5
|
| 140 |
+
print_info: vocab type = BPE
|
| 141 |
+
print_info: n_vocab = 200064
|
| 142 |
+
print_info: n_merges = 199744
|
| 143 |
+
print_info: BOS token = 200034 ']~!b['
|
| 144 |
+
print_info: EOS token = 200020 '[e~['
|
| 145 |
+
print_info: UNK token = 200021 ']!d~['
|
| 146 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 147 |
+
print_info: LF token = 10 'Ċ'
|
| 148 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 149 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 150 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 151 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 152 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 153 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 154 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 155 |
+
print_info: EOG token = 200020 '[e~['
|
| 156 |
+
print_info: max token length = 256
|
| 157 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 158 |
+
load_tensors: offloading output layer to GPU
|
| 159 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 160 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 46664.76 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 47272.58 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 29536.49 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 20739.94 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 18692.33 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 176.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 816.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 1348.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 179 (with bs=4096), 93 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 22.72 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 6.35 seconds per pass - ETA 1.60 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.5061 ± 1.2599 0.02067 ± 0.02423 0.03067 ± 0.00369 6.143 ± 0.729 % 90.980 ± 1.797 %
|
| 201 |
+
2 4.7367 ± 0.5719 0.01312 ± 0.01489 0.02133 ± 0.00202 4.702 ± 0.486 % 93.922 ± 1.059 %
|
| 202 |
+
3 4.5219 ± 0.4494 0.00743 ± 0.01284 0.02620 ± 0.00217 5.777 ± 0.486 % 94.118 ± 0.851 %
|
| 203 |
+
4 5.1598 ± 0.4590 0.00612 ± 0.01174 0.03108 ± 0.00253 6.604 ± 0.587 % 93.431 ± 0.776 %
|
| 204 |
+
5 4.9350 ± 0.3935 0.00813 ± 0.01134 0.03069 ± 0.00226 6.398 ± 0.503 % 93.176 ± 0.706 %
|
| 205 |
+
6 6.0333 ± 0.4722 0.00611 ± 0.01039 0.03446 ± 0.00206 6.128 ± 0.443 % 92.876 ± 0.658 %
|
| 206 |
+
7 5.6221 ± 0.3938 0.00736 ± 0.00996 0.03969 �� 0.00235 6.824 ± 0.451 % 92.549 ± 0.622 %
|
| 207 |
+
8 6.3441 ± 0.4213 0.00467 ± 0.00911 0.03904 ± 0.00209 6.536 ± 0.413 % 92.157 ± 0.595 %
|
| 208 |
+
9 6.2335 ± 0.3860 0.00637 ± 0.00830 0.03757 ± 0.00189 6.332 ± 0.381 % 92.200 ± 0.560 %
|
| 209 |
+
10 5.7152 ± 0.3297 0.00861 ± 0.00773 0.03602 ± 0.00173 6.283 ± 0.351 % 92.275 ± 0.529 %
|
| 210 |
+
11 6.2607 ± 0.3494 0.00796 ± 0.00732 0.03600 ± 0.00159 6.135 ± 0.328 % 92.228 ± 0.506 %
|
| 211 |
+
12 6.9476 ± 0.3771 0.01057 ± 0.00696 0.03785 ± 0.00196 6.084 ± 0.320 % 91.895 ± 0.493 %
|
| 212 |
+
13 7.2132 ± 0.3728 0.01081 ± 0.00653 0.03703 ± 0.00184 5.948 ± 0.302 % 91.946 ± 0.473 %
|
| 213 |
+
14 7.7923 ± 0.3928 0.01279 ± 0.00632 0.03676 ± 0.00174 5.783 ± 0.289 % 91.737 ± 0.461 %
|
| 214 |
+
15 8.1531 ± 0.3972 0.01193 ± 0.00603 0.03633 ± 0.00163 5.737 ± 0.275 % 91.712 ± 0.446 %
|
| 215 |
+
16 8.3820 ± 0.3946 0.00741 ± 0.00580 0.03550 ± 0.00154 5.644 ± 0.263 % 91.814 ± 0.429 %
|
| 216 |
+
17 8.6139 ± 0.3960 0.00759 ± 0.00564 0.03635 ± 0.00149 5.543 ± 0.253 % 91.765 ± 0.418 %
|
| 217 |
+
18 8.1236 ± 0.3605 0.00821 ± 0.00549 0.03673 ± 0.00146 5.502 ± 0.243 % 91.961 ± 0.401 %
|
| 218 |
+
19 8.2566 ± 0.3562 0.00865 ± 0.00533 0.03590 ± 0.00139 5.472 ± 0.233 % 91.950 ± 0.391 %
|
| 219 |
+
20 8.3172 ± 0.3501 0.00808 ± 0.00527 0.03703 ± 0.00136 5.512 ± 0.227 % 91.804 ± 0.384 %
|
| 220 |
+
21 8.2845 ± 0.3400 0.00671 ± 0.00509 0.03679 ± 0.00132 5.534 ± 0.222 % 91.877 ± 0.373 %
|
| 221 |
+
22 8.5965 ± 0.3479 0.00601 ± 0.00498 0.03762 ± 0.00129 5.599 ± 0.215 % 91.622 ± 0.370 %
|
| 222 |
+
23 8.6110 ± 0.3420 0.00649 ± 0.00511 0.04016 ± 0.00141 5.786 ± 0.220 % 91.543 ± 0.363 %
|
| 223 |
+
24 9.0087 ± 0.3522 0.00557 ± 0.00497 0.03978 ± 0.00136 5.706 ± 0.214 % 91.520 ± 0.356 %
|
| 224 |
+
25 8.9853 ± 0.3446 0.00436 ± 0.00485 0.04066 ± 0.00133 5.750 ± 0.206 % 91.467 ± 0.350 %
|
| 225 |
+
26 8.4195 ± 0.3136 0.00793 ± 0.00510 0.04591 ± 0.00178 6.668 ± 0.271 % 91.357 ± 0.345 %
|
| 226 |
+
27 8.0266 ± 0.2908 0.01407 ± 0.00530 0.05417 ± 0.00226 7.629 ± 0.297 % 91.155 ± 0.342 %
|
| 227 |
+
28 8.1553 ± 0.2909 0.01630 ± 0.00528 0.05508 ± 0.00227 7.646 ± 0.293 % 90.966 ± 0.339 %
|
| 228 |
+
29 8.0745 ± 0.2829 0.01582 ± 0.00517 0.05464 ± 0.00220 7.569 ± 0.286 % 91.062 ± 0.332 %
|
| 229 |
+
30 7.5532 ± 0.2580 0.01628 ± 0.00504 0.05329 ± 0.00214 7.497 ± 0.280 % 91.346 ± 0.321 %
|
| 230 |
+
31 7.1159 ± 0.2369 0.01601 ± 0.00492 0.05234 ± 0.00208 7.438 ± 0.274 % 91.550 ± 0.313 %
|
| 231 |
+
32 6.9347 ± 0.2254 0.01545 ± 0.00481 0.05149 ± 0.00202 7.410 ± 0.267 % 91.593 ± 0.307 %
|
| 232 |
+
33 6.7988 ± 0.2161 0.01556 ± 0.00470 0.05072 ± 0.00196 7.371 ± 0.260 % 91.622 ± 0.302 %
|
| 233 |
+
34 6.9819 ± 0.2198 0.01511 ± 0.00468 0.05161 ± 0.00192 7.341 ± 0.254 % 91.499 ± 0.300 %
|
| 234 |
+
35 7.0975 ± 0.2222 0.01688 ± 0.00468 0.05292 ± 0.00190 7.454 ± 0.251 % 91.361 ± 0.297 %
|
| 235 |
+
36 7.1528 ± 0.2216 0.01577 ± 0.00460 0.05262 ± 0.00185 7.400 ± 0.246 % 91.362 ± 0.293 %
|
| 236 |
+
37 7.1688 ± 0.2194 0.01571 ± 0.00461 0.05415 ± 0.00190 7.574 ± 0.244 % 91.256 ± 0.291 %
|
| 237 |
+
38 7.3835 ± 0.2240 0.01549 ± 0.00452 0.05390 ± 0.00185 7.522 ± 0.240 % 91.228 ± 0.287 %
|
| 238 |
+
39 7.3353 ± 0.2193 0.01589 ± 0.00450 0.05539 ± 0.00187 7.707 ± 0.242 % 91.151 ± 0.285 %
|
| 239 |
+
40 7.1011 ± 0.2081 0.01836 ± 0.00460 0.06078 ± 0.00201 8.316 ± 0.250 % 90.971 ± 0.284 %
|
| 240 |
+
41 6.9109 ± 0.1988 0.02204 ± 0.00474 0.06702 ± 0.00225 8.995 ± 0.264 % 90.780 ± 0.283 %
|
| 241 |
+
42 6.7152 ± 0.1897 0.02463 ± 0.00488 0.07199 ± 0.00240 9.438 ± 0.265 % 90.682 ± 0.281 %
|
| 242 |
+
43 6.5218 ± 0.1810 0.02655 ± 0.00494 0.07705 ± 0.00263 9.888 ± 0.272 % 90.643 ± 0.278 %
|
| 243 |
+
44 6.4692 ± 0.1766 0.02477 ± 0.00485 0.07597 ± 0.00257 9.802 ± 0.268 % 90.722 ± 0.274 %
|
| 244 |
+
45 6.6130 ± 0.1796 0.02488 ± 0.00478 0.07564 ± 0.00252 9.727 ± 0.264 % 90.693 ± 0.271 %
|
| 245 |
+
46 6.7527 ± 0.1817 0.02369 �� 0.00470 0.07483 ± 0.00246 9.646 ± 0.261 % 90.665 ± 0.269 %
|
| 246 |
+
47 6.9039 ± 0.1844 0.02363 ± 0.00462 0.07387 ± 0.00241 9.560 ± 0.257 % 90.638 ± 0.266 %
|
| 247 |
+
48 6.7866 ± 0.1783 0.02315 ± 0.00453 0.07274 ± 0.00236 9.477 ± 0.254 % 90.727 ± 0.262 %
|
| 248 |
+
49 6.8819 ± 0.1789 0.02208 ± 0.00461 0.07563 ± 0.00254 9.544 ± 0.255 % 90.540 ± 0.262 %
|
| 249 |
+
50 6.9800 ± 0.1805 0.02208 ± 0.00455 0.07508 ± 0.00249 9.486 ± 0.252 % 90.463 ± 0.260 %
|
| 250 |
+
51 7.0894 ± 0.1819 0.02223 ± 0.00447 0.07417 ± 0.00244 9.412 ± 0.249 % 90.565 ± 0.256 %
|
| 251 |
+
52 7.1561 ± 0.1817 0.02228 ± 0.00441 0.07419 ± 0.00240 9.369 ± 0.245 % 90.573 ± 0.254 %
|
| 252 |
+
53 7.2689 ± 0.1829 0.02214 ± 0.00436 0.07368 ± 0.00236 9.328 ± 0.242 % 90.603 ± 0.251 %
|
| 253 |
+
54 7.3215 ± 0.1821 0.02158 ± 0.00429 0.07283 ± 0.00232 9.258 ± 0.240 % 90.552 ± 0.249 %
|
| 254 |
+
55 7.3723 ± 0.1814 0.02146 ± 0.00422 0.07199 ± 0.00228 9.190 ± 0.237 % 90.581 ± 0.247 %
|
| 255 |
+
56 7.4108 ± 0.1807 0.02093 ± 0.00416 0.07122 ± 0.00224 9.122 ± 0.235 % 90.609 ± 0.244 %
|
| 256 |
+
57 7.4147 ± 0.1792 0.02152 ± 0.00414 0.07121 ± 0.00220 9.106 ± 0.232 % 90.561 ± 0.243 %
|
| 257 |
+
58 7.4199 ± 0.1777 0.02086 ± 0.00409 0.07078 ± 0.00217 9.076 ± 0.229 % 90.561 ± 0.240 %
|
| 258 |
+
59 7.3784 ± 0.1749 0.02036 ± 0.00402 0.06982 ± 0.00213 9.011 ± 0.227 % 90.628 ± 0.238 %
|
| 259 |
+
60 7.3917 ± 0.1739 0.02069 ± 0.00398 0.06946 ± 0.00210 8.963 ± 0.225 % 90.601 ± 0.236 %
|
| 260 |
+
61 7.4380 ± 0.1735 0.02029 ± 0.00393 0.06905 ± 0.00207 8.917 ± 0.222 % 90.653 ± 0.233 %
|
| 261 |
+
62 7.4044 ± 0.1715 0.01903 ± 0.00390 0.06858 ± 0.00204 8.879 ± 0.220 % 90.708 ± 0.231 %
|
| 262 |
+
63 7.4537 ± 0.1718 0.01935 ± 0.00387 0.06823 ± 0.00201 8.830 ± 0.218 % 90.694 ± 0.229 %
|
| 263 |
+
64 7.4343 ± 0.1696 0.01931 ± 0.00382 0.06781 ± 0.00198 8.779 ± 0.215 % 90.735 ± 0.227 %
|
| 264 |
+
65 7.4272 ± 0.1682 0.01953 ± 0.00380 0.06753 ± 0.00196 8.755 ± 0.213 % 90.775 ± 0.225 %
|
| 265 |
+
66 7.4690 ± 0.1681 0.02014 ± 0.00377 0.06737 ± 0.00193 8.733 ± 0.211 % 90.725 ± 0.224 %
|
| 266 |
+
67 7.4788 ± 0.1672 0.02051 ± 0.00374 0.06721 ± 0.00190 8.697 ± 0.209 % 90.781 ± 0.221 %
|
| 267 |
+
68 7.4330 ± 0.1647 0.02034 ± 0.00370 0.06667 ± 0.00188 8.649 ± 0.207 % 90.813 ± 0.219 %
|
| 268 |
+
69 7.4648 ± 0.1643 0.02015 ± 0.00367 0.06630 ± 0.00185 8.605 ± 0.205 % 90.867 ± 0.217 %
|
| 269 |
+
70 7.4335 ± 0.1621 0.01973 ± 0.00363 0.06591 ± 0.00183 8.571 ± 0.203 % 90.891 ± 0.215 %
|
| 270 |
+
71 7.4181 ± 0.1607 0.01966 ± 0.00359 0.06554 ± 0.00180 8.538 ± 0.201 % 90.936 ± 0.213 %
|
| 271 |
+
72 7.4395 ± 0.1604 0.02036 ± 0.00360 0.06529 ± 0.00178 8.501 ± 0.199 % 90.937 ± 0.212 %
|
| 272 |
+
73 7.4488 ± 0.1593 0.02087 ± 0.00357 0.06514 ± 0.00176 8.487 ± 0.198 % 90.889 ± 0.211 %
|
| 273 |
+
74 7.4396 ± 0.1579 0.02046 ± 0.00354 0.06506 ± 0.00174 8.452 ± 0.196 % 90.859 ± 0.210 %
|
| 274 |
+
75 7.4435 ± 0.1570 0.02008 ± 0.00352 0.06514 ± 0.00172 8.453 ± 0.194 % 90.829 ± 0.209 %
|
| 275 |
+
76 7.5089 ± 0.1575 0.02034 ± 0.00350 0.06503 ± 0.00171 8.434 ± 0.192 % 90.820 ± 0.207 %
|
| 276 |
+
77 7.5066 ± 0.1565 0.02030 ± 0.00347 0.06476 ± 0.00169 8.393 ± 0.190 % 90.833 ± 0.206 %
|
| 277 |
+
78 7.5208 ± 0.1559 0.02037 ± 0.00345 0.06461 ± 0.00167 8.381 ± 0.189 % 90.840 ± 0.205 %
|
| 278 |
+
79 7.5306 ± 0.1552 0.02021 ± 0.00342 0.06431 ± 0.00165 8.341 ± 0.187 % 90.841 ± 0.203 %
|
| 279 |
+
80 7.5354 ± 0.1549 0.02060 ± 0.00343 0.06447 ± 0.00163 8.322 ± 0.185 % 90.848 ± 0.202 %
|
| 280 |
+
81 7.5071 ± 0.1533 0.01993 ± 0.00340 0.06421 ± 0.00161 8.290 ± 0.184 % 90.850 ± 0.201 %
|
| 281 |
+
82 7.4891 ± 0.1518 0.02046 ± 0.00337 0.06384 ± 0.00159 8.259 ± 0.182 % 90.885 ± 0.199 %
|
| 282 |
+
83 7.5202 ± 0.1513 0.02029 ± 0.00334 0.06342 ± 0.00158 8.221 ± 0.181 % 90.876 ± 0.198 %
|
| 283 |
+
84 7.5344 ± 0.1504 0.01999 ± 0.00330 0.06296 ± 0.00156 8.184 ± 0.180 % 90.887 ± 0.197 %
|
| 284 |
+
85 7.5294 ± 0.1491 0.01989 ± 0.00327 0.06254 ± 0.00154 8.148 ± 0.178 % 90.884 ± 0.196 %
|
| 285 |
+
86 7.4606 ± 0.1464 0.01974 ± 0.00324 0.06219 ± 0.00152 8.123 ± 0.177 % 90.889 ± 0.194 %
|
| 286 |
+
87 7.4009 ± 0.1439 0.01952 ± 0.00321 0.06180 ± 0.00151 8.095 ± 0.176 % 90.913 ± 0.193 %
|
| 287 |
+
88 7.3399 ± 0.1415 0.01957 ± 0.00318 0.06138 ± 0.00149 8.061 ± 0.174 % 90.954 ± 0.191 %
|
| 288 |
+
89 7.2685 ± 0.1389 0.01977 ± 0.00316 0.06108 ± 0.00148 8.039 ± 0.173 % 90.972 ± 0.190 %
|
| 289 |
+
90 7.2139 ± 0.1367 0.01993 ± 0.00313 0.06070 ± 0.00146 8.006 ± 0.172 % 91.020 ± 0.189 %
|
| 290 |
+
91 7.1608 ± 0.1346 0.01965 ± 0.00311 0.06028 ± 0.00144 7.975 ± 0.171 % 91.058 ± 0.187 %
|
| 291 |
+
92 7.1031 ± 0.1324 0.01962 ± 0.00308 0.05999 ± 0.00143 7.970 ± 0.170 % 91.061 ± 0.186 %
|
| 292 |
+
93 7.1213 ± 0.1322 0.01968 ± 0.00307 0.06098 ± 0.00154 8.012 ± 0.171 % 91.052 ± 0.185 %
|
| 293 |
+
94 7.1517 ± 0.1320 0.01961 ± 0.00304 0.06061 ± 0.00153 7.982 ± 0.170 % 91.060 ± 0.184 %
|
| 294 |
+
95 7.2592 ± 0.1337 0.01933 ± 0.00302 0.06050 ± 0.00151 7.950 ± 0.169 % 91.030 ± 0.184 %
|
| 295 |
+
96 7.3503 ± 0.1348 0.01884 ± 0.00300 0.06041 ± 0.00150 7.925 ± 0.168 % 90.989 ± 0.183 %
|
| 296 |
+
97 7.4258 ± 0.1355 0.01841 ± 0.00298 0.06011 ± 0.00148 7.892 ± 0.167 % 90.952 ± 0.182 %
|
| 297 |
+
98 7.5660 ± 0.1380 0.01896 ± 0.00297 0.05986 ± 0.00147 7.860 ± 0.166 % 90.888 ± 0.182 %
|
| 298 |
+
99 7.6839 ± 0.1397 0.01887 ± 0.00295 0.05972 ± 0.00145 7.827 ± 0.165 % 90.858 ± 0.181 %
|
| 299 |
+
100 7.7197 ± 0.1397 0.01862 ± 0.00293 0.05969 ± 0.00144 7.807 ± 0.164 % 90.835 ± 0.181 %
|
| 300 |
+
101 7.7542 ± 0.1398 0.01825 ± 0.00291 0.05976 ± 0.00144 7.825 ± 0.164 % 90.841 ± 0.180 %
|
| 301 |
+
102 7.8191 ± 0.1409 0.01869 ± 0.00290 0.05978 ± 0.00143 7.826 ± 0.163 % 90.830 ± 0.179 %
|
| 302 |
+
103 7.7914 ± 0.1399 0.01851 ± 0.00288 0.05948 ± 0.00142 7.817 ± 0.162 % 90.843 ± 0.178 %
|
| 303 |
+
104 7.7355 ± 0.1380 0.01882 ± 0.00287 0.05962 ± 0.00142 7.864 ± 0.162 % 90.867 ± 0.177 %
|
| 304 |
+
105 7.6229 ± 0.1351 0.01867 ± 0.00287 0.05979 ± 0.00141 7.927 ± 0.163 % 90.906 ± 0.176 %
|
| 305 |
+
106 7.4924 ± 0.1318 0.01870 ± 0.00286 0.05967 ± 0.00141 7.965 ± 0.163 % 90.966 ± 0.174 %
|
| 306 |
+
107 7.5485 ± 0.1321 0.01842 ± 0.00284 0.05935 ± 0.00140 7.941 ± 0.162 % 90.955 ± 0.174 %
|
| 307 |
+
108 7.5605 ± 0.1317 0.01824 ± 0.00281 0.05902 ± 0.00138 7.914 ± 0.161 % 90.984 ± 0.173 %
|
| 308 |
+
109 7.5830 ± 0.1316 0.01848 ± 0.00280 0.05887 ± 0.00137 7.898 ± 0.160 % 90.973 ± 0.172 %
|
| 309 |
+
110 7.6192 ± 0.1316 0.01855 ± 0.00278 0.05860 ± 0.00136 7.877 ± 0.159 % 90.973 ± 0.171 %
|
| 310 |
+
111 7.6674 ± 0.1318 0.01855 ± 0.00276 0.05836 ± 0.00135 7.849 ± 0.159 % 90.973 ± 0.170 %
|
| 311 |
+
112 7.6757 ± 0.1313 0.01829 ± 0.00274 0.05807 ± 0.00134 7.828 ± 0.158 % 90.977 ± 0.170 %
|
| 312 |
+
113 7.6842 ± 0.1306 0.01803 ± 0.00272 0.05780 ± 0.00133 7.806 ± 0.157 % 90.998 ± 0.169 %
|
| 313 |
+
114 7.7015 ± 0.1305 0.01794 ± 0.00270 0.05761 ± 0.00132 7.780 ± 0.156 % 91.015 ± 0.168 %
|
| 314 |
+
115 7.6840 ± 0.1296 0.01800 ± 0.00270 0.05782 ± 0.00131 7.818 ± 0.156 % 91.045 ± 0.167 %
|
| 315 |
+
116 7.6843 ± 0.1290 0.01943 ± 0.00271 0.05902 ± 0.00132 7.937 ± 0.155 % 90.974 ± 0.167 %
|
| 316 |
+
117 7.5946 ± 0.1267 0.02126 ± 0.00273 0.06073 ± 0.00134 8.122 ± 0.155 % 90.950 ± 0.166 %
|
| 317 |
+
118 7.5061 ± 0.1244 0.02240 ± 0.00275 0.06229 ± 0.00136 8.308 ± 0.156 % 90.924 ± 0.166 %
|
| 318 |
+
119 7.4170 ± 0.1222 0.02390 ± 0.00276 0.06356 ± 0.00137 8.477 ± 0.156 % 90.914 ± 0.165 %
|
| 319 |
+
120 7.3407 ± 0.1201 0.02491 ± 0.00278 0.06512 ± 0.00139 8.673 ± 0.156 % 90.866 ± 0.165 %
|
| 320 |
+
121 7.2617 ± 0.1181 0.02574 ± 0.00277 0.06637 ± 0.00141 8.835 ± 0.156 % 90.848 ± 0.164 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 7.261744 ± 0.118146
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.54%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.025736 ± 0.002775
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.026070 ± 0.002847
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 0.184504 ± 0.020245
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.066373 ± 0.001410
|
| 332 |
+
Maximum KLD: 10.885548
|
| 333 |
+
99.9% KLD: 3.265465
|
| 334 |
+
99.0% KLD: 1.020737
|
| 335 |
+
95.0% KLD: 0.242942
|
| 336 |
+
90.0% KLD: 0.118923
|
| 337 |
+
Median KLD: 0.015188
|
| 338 |
+
10.0% KLD: 0.000072
|
| 339 |
+
5.0% KLD: 0.000012
|
| 340 |
+
1.0% KLD: 0.000000
|
| 341 |
+
0.1% KLD: -0.000002
|
| 342 |
+
Minimum KLD: -0.000012
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -0.573 ± 0.050 %
|
| 346 |
+
Maximum Δp: 98.147%
|
| 347 |
+
99.9% Δp: 65.027%
|
| 348 |
+
99.0% Δp: 22.124%
|
| 349 |
+
95.0% Δp: 7.421%
|
| 350 |
+
90.0% Δp: 3.765%
|
| 351 |
+
75.0% Δp: 0.461%
|
| 352 |
+
Median Δp: -0.001%
|
| 353 |
+
25.0% Δp: -0.861%
|
| 354 |
+
10.0% Δp: -4.932%
|
| 355 |
+
5.0% Δp: -9.856%
|
| 356 |
+
1.0% Δp: -33.451%
|
| 357 |
+
0.1% Δp: -83.438%
|
| 358 |
+
Minimum Δp: -99.560%
|
| 359 |
+
RMS Δp : 8.835 ± 0.156 %
|
| 360 |
+
Same top p: 90.848 ± 0.164 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 43931.90 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 88507.25 ms / 61952 tokens ( 1.43 ms per token, 699.97 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 103815.00 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1451 + ( 22263 = 20739 + 176 + 1347) + 419 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 999 + ( 22682 = 18692 + 816 + 3174) + 452 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 123577 = 123473 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/Q5_K_M/MiniMax-M2.5-Q5_K_M.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q5_K_M (unsloth)
|
| 2 |
+
|
| 3 |
+
151.15 GiB (5.68 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q5_K_M/MiniMax-M2.5-Q5_K_M-00001-of-00005.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 80862 used, -56991 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 79073 used, -55202 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 159936 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 114241 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37268 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 9167 MiB used, 14704 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1788 MiB used, 22082 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 9 layers ( 1 overflowing), 22299 MiB used, 1572 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 54 layers (49 overflowing), 22353 MiB used, 1518 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.13 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 4 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q5_K_M/MiniMax-M2.5-Q5_K_M-00001-of-00005.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 17
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 5
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q5_K: 375 tensors
|
| 87 |
+
llama_model_loader: - type q6_K: 61 tensors
|
| 88 |
+
print_info: file format = GGUF V3 (latest)
|
| 89 |
+
print_info: file type = Q5_K - Medium
|
| 90 |
+
print_info: file size = 151.15 GiB (5.68 BPW)
|
| 91 |
+
load: 0 unused tokens
|
| 92 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 93 |
+
load: printing all EOG tokens:
|
| 94 |
+
load: - 200004 ('<fim_pad>')
|
| 95 |
+
load: - 200005 ('<reponame>')
|
| 96 |
+
load: - 200020 ('[e~[')
|
| 97 |
+
load: special tokens cache size = 54
|
| 98 |
+
load: token to piece cache size = 1.3355 MB
|
| 99 |
+
print_info: arch = minimax-m2
|
| 100 |
+
print_info: vocab_only = 0
|
| 101 |
+
print_info: no_alloc = 0
|
| 102 |
+
print_info: n_ctx_train = 196608
|
| 103 |
+
print_info: n_embd = 3072
|
| 104 |
+
print_info: n_embd_inp = 3072
|
| 105 |
+
print_info: n_layer = 62
|
| 106 |
+
print_info: n_head = 48
|
| 107 |
+
print_info: n_head_kv = 8
|
| 108 |
+
print_info: n_rot = 64
|
| 109 |
+
print_info: n_swa = 0
|
| 110 |
+
print_info: is_swa_any = 0
|
| 111 |
+
print_info: n_embd_head_k = 128
|
| 112 |
+
print_info: n_embd_head_v = 128
|
| 113 |
+
print_info: n_gqa = 6
|
| 114 |
+
print_info: n_embd_k_gqa = 1024
|
| 115 |
+
print_info: n_embd_v_gqa = 1024
|
| 116 |
+
print_info: f_norm_eps = 0.0e+00
|
| 117 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 118 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 119 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 120 |
+
print_info: f_logit_scale = 0.0e+00
|
| 121 |
+
print_info: f_attn_scale = 0.0e+00
|
| 122 |
+
print_info: n_ff = 1536
|
| 123 |
+
print_info: n_expert = 256
|
| 124 |
+
print_info: n_expert_used = 8
|
| 125 |
+
print_info: n_expert_groups = 0
|
| 126 |
+
print_info: n_group_used = 0
|
| 127 |
+
print_info: causal attn = 1
|
| 128 |
+
print_info: pooling type = 0
|
| 129 |
+
print_info: rope type = 2
|
| 130 |
+
print_info: rope scaling = linear
|
| 131 |
+
print_info: freq_base_train = 5000000.0
|
| 132 |
+
print_info: freq_scale_train = 1
|
| 133 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 134 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 135 |
+
print_info: rope_finetuned = unknown
|
| 136 |
+
print_info: model type = 230B.A10B
|
| 137 |
+
print_info: model params = 228.69 B
|
| 138 |
+
print_info: general.name = Minimax-M2.5
|
| 139 |
+
print_info: vocab type = BPE
|
| 140 |
+
print_info: n_vocab = 200064
|
| 141 |
+
print_info: n_merges = 199744
|
| 142 |
+
print_info: BOS token = 200034 ']~!b['
|
| 143 |
+
print_info: EOS token = 200020 '[e~['
|
| 144 |
+
print_info: UNK token = 200021 ']!d~['
|
| 145 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 146 |
+
print_info: LF token = 10 'Ċ'
|
| 147 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 148 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 149 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 150 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 151 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 152 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200020 '[e~['
|
| 155 |
+
print_info: max token length = 256
|
| 156 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 157 |
+
load_tensors: offloading output layer to GPU
|
| 158 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 159 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 47045.94 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 47463.00 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 46948.88 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 12777.31 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 20366.13 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 18331.10 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 144.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 848.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 1789.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 195 (with bs=4096), 101 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 24.42 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 8.56 seconds per pass - ETA 2.15 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.4177 ± 1.2350 0.00698 ± 0.01540 0.01766 ± 0.00327 4.639 ± 1.121 % 93.333 ± 1.565 %
|
| 201 |
+
2 4.6880 ± 0.5634 0.00280 ± 0.00984 0.01234 ± 0.00171 3.640 ± 0.724 % 95.294 ± 0.939 %
|
| 202 |
+
3 4.4819 ± 0.4439 -0.00143 ± 0.00923 0.01338 ± 0.00139 4.036 ± 0.497 % 95.425 ± 0.756 %
|
| 203 |
+
4 5.1102 ± 0.4541 -0.00354 ± 0.00811 0.01463 ± 0.00135 4.305 ± 0.484 % 95.588 ± 0.643 %
|
| 204 |
+
5 4.9287 ± 0.3946 0.00685 ± 0.00881 0.01549 ± 0.00145 4.677 ± 0.511 % 95.451 ± 0.584 %
|
| 205 |
+
6 6.0476 ± 0.4748 0.00847 ± 0.00822 0.01826 ± 0.00138 4.585 ± 0.442 % 94.771 ± 0.569 %
|
| 206 |
+
7 5.6338 ± 0.3961 0.00944 ± 0.00741 0.01933 ± 0.00127 4.648 ± 0.381 % 94.454 ± 0.542 %
|
| 207 |
+
8 6.3396 ± 0.4221 0.00397 ± 0.00684 0.01933 ± 0.00113 4.512 ± 0.346 % 94.216 ± 0.517 %
|
| 208 |
+
9 6.2177 ± 0.3857 0.00383 ± 0.00627 0.01851 ± 0.00102 4.362 ± 0.319 % 94.248 ± 0.486 %
|
| 209 |
+
10 5.6895 ± 0.3285 0.00409 ± 0.00574 0.01773 ± 0.00093 4.308 ± 0.292 % 94.510 ± 0.451 %
|
| 210 |
+
11 6.2510 ± 0.3498 0.00641 ± 0.00545 0.01800 ± 0.00086 4.193 ± 0.273 % 94.153 ± 0.443 %
|
| 211 |
+
12 6.9179 ± 0.3759 0.00628 ± 0.00521 0.01808 ± 0.00081 4.081 ± 0.257 % 93.856 ± 0.434 %
|
| 212 |
+
13 7.1863 ± 0.3721 0.00707 ± 0.00488 0.01749 ± 0.00075 3.979 ± 0.244 % 93.756 ± 0.420 %
|
| 213 |
+
14 7.7364 ± 0.3901 0.00559 ± 0.00473 0.01745 ± 0.00071 3.887 ± 0.232 % 93.754 ± 0.405 %
|
| 214 |
+
15 8.0935 ± 0.3947 0.00458 ± 0.00452 0.01771 ± 0.00070 3.879 ± 0.222 % 93.752 ± 0.391 %
|
| 215 |
+
16 8.3454 ± 0.3939 0.00304 ± 0.00430 0.01731 ± 0.00066 3.821 ± 0.211 % 93.799 ± 0.378 %
|
| 216 |
+
17 8.5648 ± 0.3942 0.00187 ± 0.00421 0.01768 ± 0.00065 3.757 ± 0.203 % 93.818 ± 0.366 %
|
| 217 |
+
18 8.0643 ± 0.3581 0.00088 ± 0.00406 0.01773 ± 0.00065 3.759 ± 0.195 % 93.922 ± 0.353 %
|
| 218 |
+
19 8.1974 ± 0.3541 0.00145 ± 0.00391 0.01735 ± 0.00062 3.739 ± 0.187 % 93.973 ± 0.342 %
|
| 219 |
+
20 8.2562 ± 0.3475 0.00072 ± 0.00386 0.01836 ± 0.00064 3.763 ± 0.178 % 93.882 ± 0.336 %
|
| 220 |
+
21 8.2351 ± 0.3382 0.00074 ± 0.00374 0.01846 ± 0.00062 3.765 ± 0.171 % 93.968 ± 0.325 %
|
| 221 |
+
22 8.5593 ± 0.3467 0.00168 ± 0.00369 0.01920 ± 0.00063 3.842 ± 0.168 % 93.904 ± 0.319 %
|
| 222 |
+
23 8.5792 ± 0.3409 0.00279 ± 0.00377 0.02040 ± 0.00071 4.129 ± 0.202 % 93.913 ± 0.312 %
|
| 223 |
+
24 8.9844 ± 0.3514 0.00287 ± 0.00367 0.02023 ± 0.00068 4.073 ± 0.197 % 93.889 ± 0.306 %
|
| 224 |
+
25 8.9674 ± 0.3442 0.00236 ± 0.00360 0.02072 ± 0.00067 4.110 ± 0.189 % 93.788 ± 0.302 %
|
| 225 |
+
26 8.3879 ± 0.3127 0.00417 ± 0.00369 0.02344 ± 0.00092 4.891 ± 0.247 % 93.725 ± 0.298 %
|
| 226 |
+
27 7.9486 ± 0.2883 0.00431 ± 0.00378 0.02581 ± 0.00104 5.375 ± 0.264 % 93.740 ± 0.292 %
|
| 227 |
+
28 8.0708 ± 0.2882 0.00589 ± 0.00374 0.02624 ± 0.00102 5.376 ± 0.256 % 93.585 ± 0.290 %
|
| 228 |
+
29 7.9963 ± 0.2805 0.00608 ± 0.00366 0.02631 ± 0.00100 5.361 ± 0.250 % 93.523 ± 0.286 %
|
| 229 |
+
30 7.4774 ± 0.2555 0.00619 ± 0.00356 0.02567 ± 0.00097 5.322 ± 0.245 % 93.725 ± 0.277 %
|
| 230 |
+
31 7.0426 ± 0.2346 0.00566 ± 0.00346 0.02520 ± 0.00095 5.299 ± 0.239 % 93.865 ± 0.270 %
|
| 231 |
+
32 6.8652 ± 0.2232 0.00538 ± 0.00338 0.02486 ± 0.00092 5.286 ± 0.233 % 93.885 ± 0.265 %
|
| 232 |
+
33 6.7329 ± 0.2140 0.00581 ± 0.00331 0.02453 ± 0.00089 5.253 ± 0.227 % 93.868 ± 0.262 %
|
| 233 |
+
34 6.9239 ± 0.2181 0.00676 ± 0.00330 0.02493 ± 0.00087 5.246 ± 0.222 % 93.783 ± 0.259 %
|
| 234 |
+
35 7.0319 ± 0.2203 0.00760 ± 0.00330 0.02547 ± 0.00086 5.318 ± 0.217 % 93.725 ± 0.257 %
|
| 235 |
+
36 7.0940 ± 0.2200 0.00751 ± 0.00324 0.02524 ± 0.00084 5.278 ± 0.213 % 93.736 ± 0.253 %
|
| 236 |
+
37 7.1138 ± 0.2177 0.00802 ± 0.00322 0.02543 ± 0.00084 5.326 ± 0.209 % 93.810 ± 0.248 %
|
| 237 |
+
38 7.3260 ± 0.2223 0.00767 ± 0.00315 0.02539 ± 0.00082 5.305 ± 0.207 % 93.798 ± 0.245 %
|
| 238 |
+
39 7.2808 ± 0.2177 0.00842 ± 0.00312 0.02595 ± 0.00083 5.417 ± 0.205 % 93.756 ± 0.243 %
|
| 239 |
+
40 7.0397 ± 0.2063 0.00968 ± 0.00314 0.02798 ± 0.00089 5.718 ± 0.202 % 93.706 ± 0.240 %
|
| 240 |
+
41 6.8325 ± 0.1964 0.01063 ± 0.00320 0.02997 ± 0.00095 6.122 ± 0.207 % 93.639 ± 0.239 %
|
| 241 |
+
42 6.6260 ± 0.1870 0.01126 ± 0.00329 0.03188 ± 0.00106 6.466 ± 0.211 % 93.623 ± 0.236 %
|
| 242 |
+
43 6.4272 ± 0.1782 0.01194 ± 0.00333 0.03329 ± 0.00109 6.715 ± 0.214 % 93.598 ± 0.234 %
|
| 243 |
+
44 6.3791 ± 0.1740 0.01076 ± 0.00327 0.03283 ± 0.00107 6.658 ± 0.211 % 93.636 ± 0.230 %
|
| 244 |
+
45 6.5210 ± 0.1769 0.01086 ± 0.00324 0.03305 ± 0.00106 6.627 ± 0.208 % 93.612 ± 0.228 %
|
| 245 |
+
46 6.6574 ± 0.1789 0.00949 ± 0.00320 0.03284 ± 0.00103 6.572 ± 0.205 % 93.572 ± 0.226 %
|
| 246 |
+
47 6.8070 ± 0.1816 0.00949 ± 0.00314 0.03242 ± 0.00101 6.512 ± 0.203 % 93.534 ± 0.225 %
|
| 247 |
+
48 6.6897 ± 0.1755 0.00877 ± 0.00308 0.03197 ± 0.00099 6.459 ± 0.200 % 93.546 ± 0.222 %
|
| 248 |
+
49 6.7938 ± 0.1765 0.00919 ± 0.00314 0.03641 ± 0.00190 6.573 ± 0.202 % 93.373 ± 0.223 %
|
| 249 |
+
50 6.8896 ± 0.1781 0.00905 ± 0.00311 0.03627 ± 0.00187 6.535 ± 0.199 % 93.333 ± 0.221 %
|
| 250 |
+
51 7.0005 ± 0.1795 0.00961 ± 0.00305 0.03590 ± 0.00183 6.501 ± 0.197 % 93.395 ± 0.218 %
|
| 251 |
+
52 7.0637 ± 0.1792 0.00929 ± 0.00302 0.03590 ± 0.00180 6.469 ± 0.194 % 93.386 ± 0.216 %
|
| 252 |
+
53 7.1755 ± 0.1803 0.00920 ± 0.00298 0.03560 ± 0.00176 6.423 ± 0.192 % 93.393 ± 0.214 %
|
| 253 |
+
54 7.2312 ± 0.1796 0.00916 ± 0.00294 0.03527 ± 0.00173 6.380 ± 0.189 % 93.406 ± 0.212 %
|
| 254 |
+
55 7.2825 ± 0.1790 0.00920 ± 0.00289 0.03488 ± 0.00170 6.332 ± 0.187 % 93.419 ± 0.209 %
|
| 255 |
+
56 7.3210 ± 0.1784 0.00875 ± 0.00285 0.03456 ± 0.00167 6.288 ± 0.185 % 93.424 ± 0.207 %
|
| 256 |
+
57 7.3179 ± 0.1767 0.00837 ± 0.00284 0.03459 ± 0.00164 6.271 ± 0.183 % 93.443 ± 0.205 %
|
| 257 |
+
58 7.3249 ± 0.1753 0.00796 ± 0.00281 0.03432 ± 0.00162 6.251 ± 0.181 % 93.455 ± 0.203 %
|
| 258 |
+
59 7.2870 ± 0.1726 0.00790 ± 0.00276 0.03386 ± 0.00159 6.209 ± 0.179 % 93.486 ± 0.201 %
|
| 259 |
+
60 7.2963 ± 0.1714 0.00769 ± 0.00273 0.03364 ± 0.00156 6.176 ± 0.177 % 93.444 ± 0.200 %
|
| 260 |
+
61 7.3448 ± 0.1711 0.00769 ± 0.00269 0.03340 ± 0.00154 6.145 ± 0.175 % 93.455 ± 0.198 %
|
| 261 |
+
62 7.3219 ± 0.1695 0.00783 ± 0.00268 0.03318 ± 0.00152 6.125 ± 0.174 % 93.510 ± 0.196 %
|
| 262 |
+
63 7.3702 ± 0.1697 0.00809 ± 0.00267 0.03323 ± 0.00150 6.097 ± 0.172 % 93.501 ± 0.194 %
|
| 263 |
+
64 7.3545 ± 0.1677 0.00853 ± 0.00264 0.03304 ± 0.00148 6.069 ± 0.170 % 93.499 ± 0.193 %
|
| 264 |
+
65 7.3440 ± 0.1662 0.00827 ± 0.00262 0.03294 ± 0.00145 6.050 ± 0.168 % 93.502 ± 0.191 %
|
| 265 |
+
66 7.3809 ± 0.1659 0.00828 ± 0.00260 0.03295 ± 0.00143 6.024 ± 0.167 % 93.476 ± 0.190 %
|
| 266 |
+
67 7.3877 ± 0.1649 0.00826 ± 0.00258 0.03294 ± 0.00141 6.012 ± 0.165 % 93.462 ± 0.189 %
|
| 267 |
+
68 7.3461 ± 0.1625 0.00858 ± 0.00257 0.03271 ± 0.00139 5.987 ± 0.164 % 93.478 ± 0.188 %
|
| 268 |
+
69 7.3815 ± 0.1622 0.00893 ± 0.00254 0.03255 ± 0.00137 5.963 ± 0.162 % 93.498 ± 0.186 %
|
| 269 |
+
70 7.3518 ± 0.1601 0.00868 ± 0.00253 0.03245 ± 0.00136 5.940 ± 0.160 % 93.507 ± 0.184 %
|
| 270 |
+
71 7.3387 ± 0.1588 0.00890 ± 0.00251 0.03236 ± 0.00134 5.916 ± 0.159 % 93.549 ± 0.183 %
|
| 271 |
+
72 7.3570 ± 0.1583 0.00920 ± 0.00251 0.03228 ± 0.00132 5.900 ± 0.157 % 93.540 ± 0.181 %
|
| 272 |
+
73 7.3628 ± 0.1572 0.00927 ± 0.00249 0.03219 ± 0.00130 5.886 ± 0.156 % 93.511 ± 0.181 %
|
| 273 |
+
74 7.3535 ± 0.1558 0.00882 ± 0.00247 0.03215 ± 0.00129 5.867 ± 0.154 % 93.498 ± 0.179 %
|
| 274 |
+
75 7.3552 ± 0.1549 0.00814 ± 0.00245 0.03235 ± 0.00128 5.887 ± 0.153 % 93.485 ± 0.178 %
|
| 275 |
+
76 7.4154 ± 0.1553 0.00780 ± 0.00243 0.03220 ± 0.00126 5.873 ± 0.151 % 93.478 ± 0.177 %
|
| 276 |
+
77 7.4118 ± 0.1542 0.00758 ± 0.00242 0.03206 ± 0.00125 5.851 ± 0.150 % 93.466 ± 0.176 %
|
| 277 |
+
78 7.4263 ± 0.1536 0.00772 ± 0.00241 0.03195 ± 0.00123 5.830 ± 0.149 % 93.469 ± 0.175 %
|
| 278 |
+
79 7.4371 ± 0.1529 0.00772 ± 0.00239 0.03185 ± 0.00122 5.807 ± 0.148 % 93.477 ± 0.174 %
|
| 279 |
+
80 7.4447 ± 0.1526 0.00849 ± 0.00241 0.03183 ± 0.00120 5.797 ± 0.146 % 93.471 ± 0.173 %
|
| 280 |
+
81 7.4220 ± 0.1512 0.00853 ± 0.00239 0.03175 ± 0.00119 5.776 ± 0.145 % 93.469 ± 0.172 %
|
| 281 |
+
82 7.4027 ± 0.1497 0.00886 ± 0.00237 0.03158 ± 0.00117 5.760 ± 0.144 % 93.482 ± 0.171 %
|
| 282 |
+
83 7.4311 ± 0.1492 0.00837 ± 0.00235 0.03138 ± 0.00116 5.733 ± 0.143 % 93.470 ± 0.170 %
|
| 283 |
+
84 7.4486 ± 0.1483 0.00855 ± 0.00233 0.03119 ± 0.00115 5.708 ± 0.142 % 93.478 ± 0.169 %
|
| 284 |
+
85 7.4436 ± 0.1471 0.00842 ± 0.00230 0.03097 ± 0.00113 5.683 ± 0.140 % 93.486 ± 0.168 %
|
| 285 |
+
86 7.3757 ± 0.1444 0.00830 ± 0.00228 0.03078 ± 0.00112 5.664 ± 0.139 % 93.534 ± 0.166 %
|
| 286 |
+
87 7.3163 ± 0.1420 0.00801 ± 0.00226 0.03056 ± 0.00111 5.644 ± 0.138 % 93.554 ± 0.165 %
|
| 287 |
+
88 7.2564 ± 0.1396 0.00813 ± 0.00224 0.03038 ± 0.00109 5.630 ± 0.137 % 93.556 ± 0.164 %
|
| 288 |
+
89 7.1841 ± 0.1369 0.00809 ± 0.00222 0.03021 ± 0.00108 5.613 ± 0.136 % 93.571 ± 0.163 %
|
| 289 |
+
90 7.1302 ± 0.1348 0.00827 ± 0.00220 0.03001 ± 0.00107 5.591 ± 0.135 % 93.595 ± 0.162 %
|
| 290 |
+
91 7.0807 ± 0.1328 0.00840 ± 0.00218 0.02979 ± 0.00106 5.569 ± 0.134 % 93.626 ± 0.160 %
|
| 291 |
+
92 7.0245 ± 0.1306 0.00848 ± 0.00216 0.02966 ± 0.00105 5.558 ± 0.133 % 93.627 ± 0.159 %
|
| 292 |
+
93 7.0443 ± 0.1306 0.00882 ± 0.00216 0.03017 ± 0.00109 5.550 ± 0.132 % 93.603 ± 0.159 %
|
| 293 |
+
94 7.0738 ± 0.1303 0.00866 ± 0.00214 0.02998 ± 0.00108 5.529 ± 0.131 % 93.596 ± 0.158 %
|
| 294 |
+
95 7.1804 ± 0.1320 0.00842 ± 0.00213 0.03002 ± 0.00107 5.522 ± 0.130 % 93.552 ± 0.158 %
|
| 295 |
+
96 7.2720 ± 0.1331 0.00814 ± 0.00211 0.03002 ± 0.00106 5.508 ± 0.129 % 93.542 ± 0.157 %
|
| 296 |
+
97 7.3482 ± 0.1339 0.00791 ± 0.00210 0.02990 ± 0.00105 5.484 ± 0.128 % 93.531 ± 0.156 %
|
| 297 |
+
98 7.4868 ± 0.1363 0.00844 ± 0.00209 0.02976 ± 0.00104 5.460 ± 0.128 % 93.509 ± 0.156 %
|
| 298 |
+
99 7.6051 ± 0.1380 0.00856 ± 0.00208 0.02974 ± 0.00103 5.442 ± 0.127 % 93.448 ± 0.156 %
|
| 299 |
+
100 7.6419 ± 0.1381 0.00849 ± 0.00207 0.02976 ± 0.00102 5.431 ± 0.126 % 93.447 ± 0.155 %
|
| 300 |
+
101 7.6792 ± 0.1382 0.00854 ± 0.00206 0.02975 ± 0.00102 5.415 ± 0.125 % 93.461 ± 0.154 %
|
| 301 |
+
102 7.7383 ± 0.1391 0.00830 ± 0.00205 0.02977 ± 0.00101 5.403 ± 0.124 % 93.460 ± 0.153 %
|
| 302 |
+
103 7.7119 ± 0.1381 0.00825 ± 0.00204 0.02961 ± 0.00100 5.390 ± 0.123 % 93.470 ± 0.152 %
|
| 303 |
+
104 7.6541 ± 0.1363 0.00824 ± 0.00203 0.02962 ± 0.00099 5.413 ± 0.122 % 93.499 ± 0.151 %
|
| 304 |
+
105 7.5429 ± 0.1334 0.00812 ± 0.00202 0.02970 ± 0.00099 5.474 ± 0.124 % 93.524 ± 0.150 %
|
| 305 |
+
106 7.4124 ± 0.1301 0.00797 ± 0.00200 0.02951 ± 0.00098 5.469 ± 0.124 % 93.574 ± 0.149 %
|
| 306 |
+
107 7.4698 ± 0.1305 0.00795 ± 0.00199 0.02933 ± 0.00097 5.447 ± 0.123 % 93.564 ± 0.149 %
|
| 307 |
+
108 7.4805 ± 0.1301 0.00760 ± 0.00197 0.02920 ± 0.00096 5.431 ± 0.122 % 93.591 ± 0.148 %
|
| 308 |
+
109 7.5004 ± 0.1299 0.00753 ± 0.00196 0.02909 ± 0.00095 5.416 ± 0.121 % 93.603 ± 0.147 %
|
| 309 |
+
110 7.5348 ± 0.1299 0.00741 ± 0.00195 0.02897 ± 0.00094 5.405 ± 0.121 % 93.608 ± 0.146 %
|
| 310 |
+
111 7.5817 ± 0.1301 0.00732 ± 0.00194 0.02887 ± 0.00094 5.387 ± 0.120 % 93.609 ± 0.145 %
|
| 311 |
+
112 7.5894 ± 0.1295 0.00698 ± 0.00192 0.02872 ± 0.00093 5.372 ± 0.119 % 93.617 ± 0.145 %
|
| 312 |
+
113 7.5996 ± 0.1289 0.00696 ± 0.00191 0.02864 ± 0.00092 5.363 ± 0.118 % 93.618 ± 0.144 %
|
| 313 |
+
114 7.6165 ± 0.1288 0.00683 ± 0.00190 0.02855 ± 0.00091 5.347 ± 0.118 % 93.605 ± 0.143 %
|
| 314 |
+
115 7.5981 ± 0.1278 0.00676 ± 0.00190 0.02864 ± 0.00091 5.361 ± 0.118 % 93.586 ± 0.143 %
|
| 315 |
+
116 7.5931 ± 0.1272 0.00749 ± 0.00190 0.02929 ± 0.00091 5.448 ± 0.118 % 93.523 ± 0.143 %
|
| 316 |
+
117 7.4960 ± 0.1248 0.00820 ± 0.00191 0.02982 ± 0.00091 5.576 ± 0.117 % 93.524 ± 0.142 %
|
| 317 |
+
118 7.4026 ± 0.1224 0.00851 ± 0.00192 0.03041 ± 0.00091 5.700 ± 0.120 % 93.513 ± 0.142 %
|
| 318 |
+
119 7.3044 ± 0.1200 0.00861 ± 0.00192 0.03079 ± 0.00091 5.782 ± 0.120 % 93.511 ± 0.141 %
|
| 319 |
+
120 7.2230 ± 0.1179 0.00875 ± 0.00193 0.03138 ± 0.00091 5.883 ± 0.120 % 93.507 ± 0.141 %
|
| 320 |
+
121 7.1427 ± 0.1159 0.00920 ± 0.00193 0.03192 ± 0.00091 5.998 ± 0.121 % 93.508 ± 0.140 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 7.142653 ± 0.115912
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.29%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.009200 ± 0.001927
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.009243 ± 0.001945
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 0.065414 ± 0.013786
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.031922 ± 0.000913
|
| 332 |
+
Maximum KLD: 15.861979
|
| 333 |
+
99.9% KLD: 1.532143
|
| 334 |
+
99.0% KLD: 0.434627
|
| 335 |
+
95.0% KLD: 0.112822
|
| 336 |
+
90.0% KLD: 0.058731
|
| 337 |
+
Median KLD: 0.007488
|
| 338 |
+
10.0% KLD: 0.000034
|
| 339 |
+
5.0% KLD: 0.000005
|
| 340 |
+
1.0% KLD: 0.000000
|
| 341 |
+
0.1% KLD: -0.000002
|
| 342 |
+
Minimum KLD: -0.000009
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -0.248 ± 0.034 %
|
| 346 |
+
Maximum Δp: 88.707%
|
| 347 |
+
99.9% Δp: 50.362%
|
| 348 |
+
99.0% Δp: 15.872%
|
| 349 |
+
95.0% Δp: 5.434%
|
| 350 |
+
90.0% Δp: 2.719%
|
| 351 |
+
75.0% Δp: 0.344%
|
| 352 |
+
Median Δp: -0.000%
|
| 353 |
+
25.0% Δp: -0.563%
|
| 354 |
+
10.0% Δp: -3.379%
|
| 355 |
+
5.0% Δp: -6.405%
|
| 356 |
+
1.0% Δp: -19.839%
|
| 357 |
+
0.1% Δp: -58.740%
|
| 358 |
+
Minimum Δp: -83.023%
|
| 359 |
+
RMS Δp : 5.998 ± 0.121 %
|
| 360 |
+
Same top p: 93.508 ± 0.140 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 62090.31 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 109451.69 ms / 61952 tokens ( 1.77 ms per token, 566.02 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 125021.22 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1413 + ( 22299 = 20366 + 144 + 1788) + 422 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1329 + ( 22353 = 18331 + 848 + 3174) + 452 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 154339 = 154235 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/Q5_K_S/MiniMax-M2.5-Q5_K_S.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q5_K_S (unsloth)
|
| 2 |
+
|
| 3 |
+
146.66 GiB (5.51 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q5_K_S/MiniMax-M2.5-Q5_K_S-00001-of-00005.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 78561 used, -54690 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 76772 used, -52901 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 155334 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 109639 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37433 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 9002 MiB used, 14869 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1635 MiB used, 22235 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 9 layers ( 1 overflowing), 22659 MiB used, 1212 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 54 layers (48 overflowing), 22826 MiB used, 1044 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.08 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 4 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q5_K_S/MiniMax-M2.5-Q5_K_S-00001-of-00005.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 16
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 5
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q5_K: 435 tensors
|
| 87 |
+
llama_model_loader: - type q6_K: 1 tensors
|
| 88 |
+
print_info: file format = GGUF V3 (latest)
|
| 89 |
+
print_info: file type = Q5_K - Small
|
| 90 |
+
print_info: file size = 146.66 GiB (5.51 BPW)
|
| 91 |
+
load: 0 unused tokens
|
| 92 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 93 |
+
load: printing all EOG tokens:
|
| 94 |
+
load: - 200004 ('<fim_pad>')
|
| 95 |
+
load: - 200005 ('<reponame>')
|
| 96 |
+
load: - 200020 ('[e~[')
|
| 97 |
+
load: special tokens cache size = 54
|
| 98 |
+
load: token to piece cache size = 1.3355 MB
|
| 99 |
+
print_info: arch = minimax-m2
|
| 100 |
+
print_info: vocab_only = 0
|
| 101 |
+
print_info: no_alloc = 0
|
| 102 |
+
print_info: n_ctx_train = 196608
|
| 103 |
+
print_info: n_embd = 3072
|
| 104 |
+
print_info: n_embd_inp = 3072
|
| 105 |
+
print_info: n_layer = 62
|
| 106 |
+
print_info: n_head = 48
|
| 107 |
+
print_info: n_head_kv = 8
|
| 108 |
+
print_info: n_rot = 64
|
| 109 |
+
print_info: n_swa = 0
|
| 110 |
+
print_info: is_swa_any = 0
|
| 111 |
+
print_info: n_embd_head_k = 128
|
| 112 |
+
print_info: n_embd_head_v = 128
|
| 113 |
+
print_info: n_gqa = 6
|
| 114 |
+
print_info: n_embd_k_gqa = 1024
|
| 115 |
+
print_info: n_embd_v_gqa = 1024
|
| 116 |
+
print_info: f_norm_eps = 0.0e+00
|
| 117 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 118 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 119 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 120 |
+
print_info: f_logit_scale = 0.0e+00
|
| 121 |
+
print_info: f_attn_scale = 0.0e+00
|
| 122 |
+
print_info: n_ff = 1536
|
| 123 |
+
print_info: n_expert = 256
|
| 124 |
+
print_info: n_expert_used = 8
|
| 125 |
+
print_info: n_expert_groups = 0
|
| 126 |
+
print_info: n_group_used = 0
|
| 127 |
+
print_info: causal attn = 1
|
| 128 |
+
print_info: pooling type = 0
|
| 129 |
+
print_info: rope type = 2
|
| 130 |
+
print_info: rope scaling = linear
|
| 131 |
+
print_info: freq_base_train = 5000000.0
|
| 132 |
+
print_info: freq_scale_train = 1
|
| 133 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 134 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 135 |
+
print_info: rope_finetuned = unknown
|
| 136 |
+
print_info: model type = 230B.A10B
|
| 137 |
+
print_info: model params = 228.69 B
|
| 138 |
+
print_info: general.name = Minimax-M2.5
|
| 139 |
+
print_info: vocab type = BPE
|
| 140 |
+
print_info: n_vocab = 200064
|
| 141 |
+
print_info: n_merges = 199744
|
| 142 |
+
print_info: BOS token = 200034 ']~!b['
|
| 143 |
+
print_info: EOS token = 200020 '[e~['
|
| 144 |
+
print_info: UNK token = 200021 ']!d~['
|
| 145 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 146 |
+
print_info: LF token = 10 'Ċ'
|
| 147 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 148 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 149 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 150 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 151 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 152 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200020 '[e~['
|
| 155 |
+
print_info: max token length = 256
|
| 156 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 157 |
+
load_tensors: offloading output layer to GPU
|
| 158 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 159 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 46974.48 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 47337.62 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 47334.61 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 8015.78 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 20879.34 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 18804.94 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 144.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 848.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 1636.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 189 (with bs=4096), 97 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 23.27 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 7.57 seconds per pass - ETA 1.90 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.4212 ± 1.2396 0.00754 ± 0.01623 0.01895 ± 0.00325 4.879 ± 0.961 % 93.333 ± 1.565 %
|
| 201 |
+
2 4.6411 ± 0.5581 -0.00726 ± 0.01165 0.01489 ± 0.00184 3.939 ± 0.610 % 95.294 ± 0.939 %
|
| 202 |
+
3 4.4653 ± 0.4427 -0.00516 ± 0.00980 0.01495 ± 0.00142 4.173 ± 0.424 % 95.294 ± 0.766 %
|
| 203 |
+
4 5.0482 ± 0.4452 -0.01575 ± 0.00836 0.01686 ± 0.00198 4.340 ± 0.508 % 95.392 ± 0.657 %
|
| 204 |
+
5 4.8790 ± 0.3891 -0.00329 ± 0.01045 0.01722 ± 0.00179 4.588 ± 0.477 % 95.137 ± 0.603 %
|
| 205 |
+
6 5.9561 ± 0.4655 -0.00678 ± 0.00958 0.01956 ± 0.00163 4.450 ± 0.414 % 94.771 ± 0.569 %
|
| 206 |
+
7 5.5467 ± 0.3883 -0.00614 ± 0.00859 0.02021 ± 0.00145 4.450 ± 0.359 % 94.566 ± 0.537 %
|
| 207 |
+
8 6.2529 ± 0.4148 -0.00980 ± 0.00776 0.01980 ± 0.00128 4.261 ± 0.329 % 94.510 ± 0.504 %
|
| 208 |
+
9 6.1369 ± 0.3792 -0.00925 ± 0.00700 0.01906 ± 0.00115 4.170 ± 0.304 % 94.510 ± 0.476 %
|
| 209 |
+
10 5.6240 ± 0.3237 -0.00748 ± 0.00643 0.01845 ± 0.00105 4.136 ± 0.278 % 94.627 ± 0.447 %
|
| 210 |
+
11 6.1677 ± 0.3435 -0.00701 ± 0.00604 0.01879 ± 0.00098 4.055 ± 0.259 % 94.581 ± 0.428 %
|
| 211 |
+
12 6.8501 ± 0.3710 -0.00358 ± 0.00571 0.01940 ± 0.00097 3.946 ± 0.244 % 94.412 ± 0.415 %
|
| 212 |
+
13 7.1166 ± 0.3671 -0.00267 ± 0.00535 0.01892 ± 0.00090 3.868 ± 0.230 % 94.419 ± 0.399 %
|
| 213 |
+
14 7.6709 ± 0.3855 -0.00292 ± 0.00513 0.01931 ± 0.00088 3.877 ± 0.220 % 94.426 ± 0.384 %
|
| 214 |
+
15 8.0307 ± 0.3905 -0.00320 ± 0.00490 0.01940 ± 0.00083 3.871 ± 0.209 % 94.484 ± 0.369 %
|
| 215 |
+
16 8.2862 ± 0.3901 -0.00409 ± 0.00465 0.01892 ± 0.00079 3.820 ± 0.199 % 94.608 ± 0.354 %
|
| 216 |
+
17 8.4742 ± 0.3889 -0.00876 ± 0.00474 0.02009 ± 0.00082 3.772 ± 0.190 % 94.579 ± 0.344 %
|
| 217 |
+
18 7.9803 ± 0.3532 -0.00960 ± 0.00455 0.01997 ± 0.00079 3.748 ± 0.184 % 94.662 ± 0.332 %
|
| 218 |
+
19 8.1192 ± 0.3497 -0.00813 ± 0.00437 0.01950 ± 0.00075 3.737 ± 0.176 % 94.675 ± 0.323 %
|
| 219 |
+
20 8.1918 ± 0.3441 -0.00711 ± 0.00428 0.02005 ± 0.00074 3.775 ± 0.168 % 94.686 ± 0.314 %
|
| 220 |
+
21 8.1689 ± 0.3347 -0.00734 ± 0.00413 0.01994 ± 0.00071 3.768 ± 0.161 % 94.641 ± 0.308 %
|
| 221 |
+
22 8.4842 ± 0.3426 -0.00714 ± 0.00403 0.02035 ± 0.00069 3.808 ± 0.158 % 94.421 ± 0.306 %
|
| 222 |
+
23 8.5165 ± 0.3376 -0.00454 ± 0.00412 0.02146 ± 0.00073 4.098 ± 0.188 % 94.254 ± 0.304 %
|
| 223 |
+
24 8.9207 ± 0.3481 -0.00425 ± 0.00399 0.02142 ± 0.00070 4.054 ± 0.183 % 94.216 ± 0.298 %
|
| 224 |
+
25 8.9046 ± 0.3411 -0.00466 ± 0.00391 0.02204 ± 0.00070 4.121 ± 0.178 % 94.118 ± 0.295 %
|
| 225 |
+
26 8.3326 ± 0.3101 -0.00244 ± 0.00394 0.02510 ± 0.00103 4.881 ± 0.239 % 94.072 ± 0.290 %
|
| 226 |
+
27 7.9175 ± 0.2867 0.00039 ± 0.00404 0.02779 ± 0.00116 5.443 ± 0.257 % 93.914 ± 0.288 %
|
| 227 |
+
28 8.0445 ± 0.2868 0.00262 ± 0.00398 0.02806 ± 0.00113 5.434 ± 0.249 % 93.754 ± 0.286 %
|
| 228 |
+
29 7.9667 ± 0.2791 0.00237 ± 0.00390 0.02800 ± 0.00110 5.393 ± 0.243 % 93.766 ± 0.281 %
|
| 229 |
+
30 7.4501 ± 0.2543 0.00254 ± 0.00379 0.02742 ± 0.00109 5.395 ± 0.247 % 93.961 ± 0.272 %
|
| 230 |
+
31 7.0180 ± 0.2335 0.00215 ± 0.00369 0.02699 ± 0.00107 5.402 ± 0.243 % 94.080 ± 0.265 %
|
| 231 |
+
32 6.8370 ± 0.2219 0.00126 ± 0.00360 0.02657 ± 0.00104 5.381 ± 0.236 % 94.118 ± 0.260 %
|
| 232 |
+
33 6.7050 ± 0.2128 0.00167 ± 0.00352 0.02621 ± 0.00101 5.344 ± 0.231 % 94.082 ± 0.257 %
|
| 233 |
+
34 6.8892 ± 0.2166 0.00174 ± 0.00350 0.02691 ± 0.00099 5.350 ± 0.225 % 93.979 ± 0.255 %
|
| 234 |
+
35 6.9915 ± 0.2186 0.00184 ± 0.00347 0.02758 ± 0.00098 5.432 ± 0.218 % 93.804 ± 0.255 %
|
| 235 |
+
36 7.0571 ± 0.2184 0.00230 ± 0.00341 0.02740 ± 0.00096 5.403 ± 0.214 % 93.824 ± 0.251 %
|
| 236 |
+
37 7.0793 ± 0.2164 0.00315 ± 0.00337 0.02746 ± 0.00094 5.435 ± 0.210 % 93.821 ± 0.248 %
|
| 237 |
+
38 7.2915 ± 0.2209 0.00295 ± 0.00330 0.02734 ± 0.00092 5.392 ± 0.207 % 93.767 ± 0.246 %
|
| 238 |
+
39 7.2459 ± 0.2163 0.00362 ± 0.00325 0.02788 ± 0.00091 5.499 ± 0.204 % 93.715 ± 0.243 %
|
| 239 |
+
40 7.0060 ± 0.2050 0.00489 ± 0.00328 0.03027 ± 0.00098 5.868 ± 0.207 % 93.637 ± 0.242 %
|
| 240 |
+
41 6.7993 ± 0.1952 0.00576 ± 0.00333 0.03255 ± 0.00106 6.297 ± 0.217 % 93.534 ± 0.241 %
|
| 241 |
+
42 6.6021 ± 0.1861 0.00764 ± 0.00342 0.03459 ± 0.00116 6.598 ± 0.218 % 93.529 ± 0.238 %
|
| 242 |
+
43 6.4093 ± 0.1775 0.00915 ± 0.00345 0.03554 ± 0.00117 6.755 ± 0.215 % 93.525 ± 0.235 %
|
| 243 |
+
44 6.3634 ± 0.1734 0.00829 ± 0.00340 0.03503 ± 0.00114 6.700 ± 0.212 % 93.574 ± 0.232 %
|
| 244 |
+
45 6.5014 ± 0.1762 0.00786 ± 0.00338 0.03536 ± 0.00113 6.683 ± 0.209 % 93.499 ± 0.230 %
|
| 245 |
+
46 6.6393 ± 0.1782 0.00676 ± 0.00333 0.03522 ± 0.00110 6.635 ± 0.206 % 93.436 ± 0.229 %
|
| 246 |
+
47 6.7917 ± 0.1809 0.00724 ± 0.00327 0.03474 ± 0.00108 6.572 ± 0.203 % 93.425 ± 0.226 %
|
| 247 |
+
48 6.6763 ± 0.1750 0.00677 ± 0.00321 0.03424 ± 0.00106 6.519 ± 0.201 % 93.464 ± 0.223 %
|
| 248 |
+
49 6.7852 ± 0.1761 0.00793 ± 0.00328 0.03729 ± 0.00149 6.569 ± 0.200 % 93.317 ± 0.223 %
|
| 249 |
+
50 6.8816 ± 0.1778 0.00789 ± 0.00324 0.03715 ± 0.00147 6.527 ± 0.197 % 93.302 ± 0.221 %
|
| 250 |
+
51 6.9891 ± 0.1791 0.00799 ± 0.00319 0.03683 ± 0.00144 6.489 ± 0.195 % 93.333 ± 0.219 %
|
| 251 |
+
52 7.0484 ± 0.1787 0.00713 ± 0.00317 0.03688 ± 0.00141 6.463 ± 0.192 % 93.326 ± 0.217 %
|
| 252 |
+
53 7.1603 ± 0.1799 0.00708 ± 0.00313 0.03655 ± 0.00139 6.430 ± 0.189 % 93.326 ± 0.215 %
|
| 253 |
+
54 7.2161 ± 0.1792 0.00706 ± 0.00308 0.03620 ± 0.00136 6.387 ± 0.187 % 93.333 ± 0.213 %
|
| 254 |
+
55 7.2656 ± 0.1785 0.00687 ± 0.00303 0.03586 ± 0.00134 6.340 ± 0.185 % 93.319 ± 0.211 %
|
| 255 |
+
56 7.3059 ± 0.1779 0.00668 ± 0.00299 0.03551 ± 0.00131 6.298 ± 0.183 % 93.319 ± 0.209 %
|
| 256 |
+
57 7.3084 ± 0.1764 0.00707 ± 0.00297 0.03555 ± 0.00129 6.290 ± 0.181 % 93.299 ± 0.207 %
|
| 257 |
+
58 7.3161 ± 0.1750 0.00676 ± 0.00293 0.03527 ± 0.00127 6.272 ± 0.178 % 93.293 ± 0.206 %
|
| 258 |
+
59 7.2782 ± 0.1723 0.00669 ± 0.00289 0.03483 ± 0.00125 6.231 ± 0.176 % 93.360 ± 0.203 %
|
| 259 |
+
60 7.2882 ± 0.1712 0.00659 ± 0.00285 0.03463 ± 0.00123 6.203 ± 0.174 % 93.320 ± 0.202 %
|
| 260 |
+
61 7.3376 ± 0.1709 0.00670 ± 0.00282 0.03450 ± 0.00121 6.203 ± 0.173 % 93.314 ± 0.200 %
|
| 261 |
+
62 7.3145 ± 0.1693 0.00681 ± 0.00282 0.03432 ± 0.00120 6.194 ± 0.172 % 93.346 ± 0.198 %
|
| 262 |
+
63 7.3577 ± 0.1694 0.00638 ± 0.00280 0.03421 ± 0.00118 6.159 ± 0.170 % 93.315 ± 0.197 %
|
| 263 |
+
64 7.3396 ± 0.1672 0.00650 ± 0.00277 0.03406 ± 0.00116 6.135 ± 0.168 % 93.309 ± 0.196 %
|
| 264 |
+
65 7.3327 ± 0.1658 0.00672 ± 0.00275 0.03392 ± 0.00115 6.123 ± 0.166 % 93.339 ± 0.194 %
|
| 265 |
+
66 7.3705 ± 0.1656 0.00687 ± 0.00272 0.03386 ± 0.00113 6.102 ± 0.165 % 93.304 ± 0.193 %
|
| 266 |
+
67 7.3767 ± 0.1646 0.00676 ± 0.00269 0.03383 ± 0.00112 6.084 ± 0.163 % 93.281 ± 0.192 %
|
| 267 |
+
68 7.3333 ± 0.1622 0.00684 ± 0.00268 0.03361 ± 0.00110 6.056 ± 0.161 % 93.310 ± 0.190 %
|
| 268 |
+
69 7.3672 ± 0.1619 0.00699 ± 0.00266 0.03345 ± 0.00108 6.035 ± 0.160 % 93.333 ± 0.188 %
|
| 269 |
+
70 7.3363 ± 0.1597 0.00657 ± 0.00264 0.03335 ± 0.00107 6.009 ± 0.158 % 93.345 ± 0.187 %
|
| 270 |
+
71 7.3222 ± 0.1583 0.00664 ± 0.00261 0.03321 ± 0.00106 5.981 ± 0.157 % 93.378 ± 0.185 %
|
| 271 |
+
72 7.3425 ± 0.1579 0.00724 ± 0.00260 0.03311 ± 0.00104 5.965 ± 0.155 % 93.388 ± 0.183 %
|
| 272 |
+
73 7.3505 ± 0.1569 0.00759 ± 0.00258 0.03298 ± 0.00103 5.943 ± 0.154 % 93.371 ± 0.182 %
|
| 273 |
+
74 7.3421 ± 0.1555 0.00727 ± 0.00255 0.03292 ± 0.00102 5.925 ± 0.152 % 93.365 ± 0.181 %
|
| 274 |
+
75 7.3446 ± 0.1546 0.00671 ± 0.00253 0.03295 ± 0.00100 5.926 ± 0.151 % 93.380 ± 0.180 %
|
| 275 |
+
76 7.4053 ± 0.1550 0.00644 ± 0.00251 0.03287 ± 0.00099 5.912 ± 0.149 % 93.344 ± 0.179 %
|
| 276 |
+
77 7.4044 ± 0.1540 0.00660 ± 0.00249 0.03271 ± 0.00098 5.882 ± 0.148 % 93.333 ± 0.178 %
|
| 277 |
+
78 7.4197 ± 0.1534 0.00683 ± 0.00247 0.03258 ± 0.00097 5.856 ± 0.147 % 93.348 ± 0.177 %
|
| 278 |
+
79 7.4286 ± 0.1527 0.00658 ± 0.00245 0.03245 ± 0.00095 5.832 ± 0.146 % 93.383 ± 0.175 %
|
| 279 |
+
80 7.4324 ± 0.1523 0.00684 ± 0.00248 0.03247 ± 0.00094 5.821 ± 0.144 % 93.358 ± 0.174 %
|
| 280 |
+
81 7.4097 ± 0.1508 0.00687 ± 0.00246 0.03237 ± 0.00093 5.797 ± 0.143 % 93.387 ± 0.173 %
|
| 281 |
+
82 7.3886 ± 0.1493 0.00696 ± 0.00243 0.03223 ± 0.00092 5.776 ± 0.142 % 93.396 ± 0.172 %
|
| 282 |
+
83 7.4184 ± 0.1488 0.00666 ± 0.00241 0.03203 ± 0.00091 5.749 ± 0.141 % 93.385 ± 0.171 %
|
| 283 |
+
84 7.4364 ± 0.1480 0.00690 ± 0.00239 0.03183 ± 0.00090 5.723 ± 0.140 % 93.385 ± 0.170 %
|
| 284 |
+
85 7.4305 ± 0.1468 0.00666 ± 0.00236 0.03164 ± 0.00089 5.703 ± 0.139 % 93.375 ± 0.169 %
|
| 285 |
+
86 7.3628 ± 0.1441 0.00655 ± 0.00234 0.03145 ± 0.00088 5.684 ± 0.137 % 93.397 ± 0.168 %
|
| 286 |
+
87 7.3039 ± 0.1416 0.00632 ± 0.00232 0.03125 ± 0.00087 5.665 ± 0.136 % 93.423 ± 0.166 %
|
| 287 |
+
88 7.2423 ± 0.1392 0.00618 ± 0.00230 0.03107 ± 0.00086 5.655 ± 0.135 % 93.440 ± 0.165 %
|
| 288 |
+
89 7.1688 ± 0.1366 0.00596 ± 0.00228 0.03092 ± 0.00086 5.637 ± 0.134 % 93.466 ± 0.164 %
|
| 289 |
+
90 7.1165 ± 0.1344 0.00634 ± 0.00226 0.03075 ± 0.00085 5.621 ± 0.133 % 93.490 ± 0.163 %
|
| 290 |
+
91 7.0651 ± 0.1324 0.00620 ± 0.00224 0.03058 ± 0.00084 5.607 ± 0.132 % 93.506 ± 0.162 %
|
| 291 |
+
92 7.0091 ± 0.1302 0.00629 ± 0.00222 0.03043 ± 0.00083 5.592 ± 0.131 % 93.508 ± 0.161 %
|
| 292 |
+
93 7.0337 ± 0.1303 0.00730 ± 0.00223 0.03149 ± 0.00093 5.674 ± 0.139 % 93.472 ± 0.160 %
|
| 293 |
+
94 7.0651 ± 0.1301 0.00742 ± 0.00221 0.03130 ± 0.00092 5.653 ± 0.138 % 93.475 ± 0.160 %
|
| 294 |
+
95 7.1695 ± 0.1317 0.00689 ± 0.00220 0.03133 ± 0.00091 5.646 ± 0.137 % 93.441 ± 0.159 %
|
| 295 |
+
96 7.2605 ± 0.1328 0.00655 ± 0.00218 0.03132 ± 0.00090 5.628 ± 0.136 % 93.407 ± 0.159 %
|
| 296 |
+
97 7.3364 ± 0.1336 0.00630 ± 0.00217 0.03119 ± 0.00089 5.604 ± 0.135 % 93.390 ± 0.158 %
|
| 297 |
+
98 7.4729 ± 0.1360 0.00658 ± 0.00217 0.03109 ± 0.00089 5.587 ± 0.135 % 93.337 ± 0.158 %
|
| 298 |
+
99 7.5936 ± 0.1378 0.00704 ± 0.00215 0.03100 ± 0.00088 5.567 ± 0.134 % 93.274 ± 0.158 %
|
| 299 |
+
100 7.6260 ± 0.1377 0.00641 ± 0.00214 0.03104 ± 0.00087 5.554 ± 0.133 % 93.251 ± 0.157 %
|
| 300 |
+
101 7.6617 ± 0.1378 0.00624 ± 0.00213 0.03144 ± 0.00097 5.583 ± 0.136 % 93.217 ± 0.157 %
|
| 301 |
+
102 7.7214 ± 0.1387 0.00612 ± 0.00212 0.03142 ± 0.00096 5.577 ± 0.135 % 93.206 ± 0.156 %
|
| 302 |
+
103 7.6965 ± 0.1378 0.00626 ± 0.00211 0.03131 ± 0.00095 5.568 ± 0.134 % 93.215 ± 0.155 %
|
| 303 |
+
104 7.6377 ± 0.1359 0.00609 ± 0.00210 0.03146 ± 0.00095 5.634 ± 0.134 % 93.232 ± 0.154 %
|
| 304 |
+
105 7.5296 ± 0.1331 0.00635 ± 0.00210 0.03156 ± 0.00095 5.685 ± 0.135 % 93.262 ± 0.153 %
|
| 305 |
+
106 7.3990 ± 0.1298 0.00616 ± 0.00208 0.03132 ± 0.00094 5.671 ± 0.134 % 93.322 ± 0.152 %
|
| 306 |
+
107 7.4565 ± 0.1302 0.00617 ± 0.00206 0.03115 ± 0.00093 5.649 ± 0.134 % 93.315 ± 0.151 %
|
| 307 |
+
108 7.4677 ± 0.1298 0.00589 ± 0.00205 0.03100 ± 0.00092 5.632 ± 0.133 % 93.333 ± 0.150 %
|
| 308 |
+
109 7.4877 ± 0.1296 0.00584 ± 0.00203 0.03089 ± 0.00091 5.620 ± 0.132 % 93.359 ± 0.149 %
|
| 309 |
+
110 7.5223 ± 0.1296 0.00575 ± 0.00202 0.03078 ± 0.00091 5.607 ± 0.131 % 93.351 ± 0.149 %
|
| 310 |
+
111 7.5702 ± 0.1298 0.00579 ± 0.00201 0.03067 ± 0.00090 5.588 ± 0.130 % 93.347 ± 0.148 %
|
| 311 |
+
112 7.5783 ± 0.1293 0.00552 ± 0.00199 0.03053 ± 0.00089 5.572 ± 0.130 % 93.347 ± 0.147 %
|
| 312 |
+
113 7.5894 ± 0.1287 0.00561 ± 0.00198 0.03038 ± 0.00088 5.555 ± 0.129 % 93.354 ± 0.147 %
|
| 313 |
+
114 7.6073 ± 0.1286 0.00563 ± 0.00197 0.03027 ± 0.00087 5.538 ± 0.128 % 93.354 ± 0.146 %
|
| 314 |
+
115 7.5890 ± 0.1276 0.00557 ± 0.00197 0.03035 ± 0.00087 5.557 ± 0.128 % 93.344 ± 0.146 %
|
| 315 |
+
116 7.5840 ± 0.1270 0.00630 ± 0.00197 0.03098 ± 0.00087 5.656 ± 0.129 % 93.293 ± 0.145 %
|
| 316 |
+
117 7.4888 ± 0.1246 0.00723 ± 0.00197 0.03153 ± 0.00087 5.773 ± 0.128 % 93.296 ± 0.145 %
|
| 317 |
+
118 7.3970 ± 0.1222 0.00775 ± 0.00198 0.03211 ± 0.00087 5.885 ± 0.128 % 93.274 ± 0.144 %
|
| 318 |
+
119 7.3007 ± 0.1199 0.00810 ± 0.00197 0.03242 ± 0.00087 5.958 ± 0.128 % 93.290 ± 0.144 %
|
| 319 |
+
120 7.2211 ± 0.1178 0.00849 ± 0.00198 0.03314 ± 0.00088 6.090 ± 0.129 % 93.284 ± 0.143 %
|
| 320 |
+
121 7.1431 ± 0.1159 0.00926 ± 0.00199 0.03376 ± 0.00088 6.213 ± 0.128 % 93.285 ± 0.142 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 7.143114 ± 0.115919
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.24%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.009265 ± 0.001992
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.009308 ± 0.002011
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 0.065874 ± 0.014247
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.033760 ± 0.000879
|
| 332 |
+
Maximum KLD: 11.391310
|
| 333 |
+
99.9% KLD: 1.588454
|
| 334 |
+
99.0% KLD: 0.452087
|
| 335 |
+
95.0% KLD: 0.122497
|
| 336 |
+
90.0% KLD: 0.062966
|
| 337 |
+
Median KLD: 0.008101
|
| 338 |
+
10.0% KLD: 0.000037
|
| 339 |
+
5.0% KLD: 0.000006
|
| 340 |
+
1.0% KLD: 0.000000
|
| 341 |
+
0.1% KLD: -0.000002
|
| 342 |
+
Minimum KLD: -0.000011
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -0.273 ± 0.035 %
|
| 346 |
+
Maximum Δp: 98.972%
|
| 347 |
+
99.9% Δp: 48.648%
|
| 348 |
+
99.0% Δp: 16.258%
|
| 349 |
+
95.0% Δp: 5.556%
|
| 350 |
+
90.0% Δp: 2.870%
|
| 351 |
+
75.0% Δp: 0.373%
|
| 352 |
+
Median Δp: -0.000%
|
| 353 |
+
25.0% Δp: -0.540%
|
| 354 |
+
10.0% Δp: -3.529%
|
| 355 |
+
5.0% Δp: -6.455%
|
| 356 |
+
1.0% Δp: -21.343%
|
| 357 |
+
0.1% Δp: -61.381%
|
| 358 |
+
Minimum Δp: -97.513%
|
| 359 |
+
RMS Δp : 6.213 ± 0.128 %
|
| 360 |
+
Same top p: 93.285 ± 0.142 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 57217.13 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 105639.72 ms / 61952 tokens ( 1.71 ms per token, 586.45 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 120866.86 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1055 + ( 22659 = 20879 + 144 + 1635) + 419 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 887 + ( 22826 = 18804 + 848 + 3174) + 420 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 149766 = 149662 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/Q6_K/MiniMax-M2.5-Q6_K.md
ADDED
|
@@ -0,0 +1,370 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q6_K (unsloth)
|
| 2 |
+
|
| 3 |
+
174.86 GiB (6.57 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q6_K/MiniMax-M2.5-Q6_K-00001-of-00005.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 93428 used, -69556 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 90710 used, -66838 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 184138 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 138443 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 36781 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 9807 MiB used, 14064 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1941 MiB used, 21929 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 8 layers ( 1 overflowing), 22212 MiB used, 1659 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 55 layers (51 overflowing), 22612 MiB used, 1259 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.10 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 4 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q6_K/MiniMax-M2.5-Q6_K-00001-of-00005.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 18
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 5
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q6_K: 436 tensors
|
| 87 |
+
print_info: file format = GGUF V3 (latest)
|
| 88 |
+
print_info: file type = Q6_K
|
| 89 |
+
print_info: file size = 174.86 GiB (6.57 BPW)
|
| 90 |
+
load: 0 unused tokens
|
| 91 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 92 |
+
load: printing all EOG tokens:
|
| 93 |
+
load: - 200004 ('<fim_pad>')
|
| 94 |
+
load: - 200005 ('<reponame>')
|
| 95 |
+
load: - 200020 ('[e~[')
|
| 96 |
+
load: special tokens cache size = 54
|
| 97 |
+
load: token to piece cache size = 1.3355 MB
|
| 98 |
+
print_info: arch = minimax-m2
|
| 99 |
+
print_info: vocab_only = 0
|
| 100 |
+
print_info: no_alloc = 0
|
| 101 |
+
print_info: n_ctx_train = 196608
|
| 102 |
+
print_info: n_embd = 3072
|
| 103 |
+
print_info: n_embd_inp = 3072
|
| 104 |
+
print_info: n_layer = 62
|
| 105 |
+
print_info: n_head = 48
|
| 106 |
+
print_info: n_head_kv = 8
|
| 107 |
+
print_info: n_rot = 64
|
| 108 |
+
print_info: n_swa = 0
|
| 109 |
+
print_info: is_swa_any = 0
|
| 110 |
+
print_info: n_embd_head_k = 128
|
| 111 |
+
print_info: n_embd_head_v = 128
|
| 112 |
+
print_info: n_gqa = 6
|
| 113 |
+
print_info: n_embd_k_gqa = 1024
|
| 114 |
+
print_info: n_embd_v_gqa = 1024
|
| 115 |
+
print_info: f_norm_eps = 0.0e+00
|
| 116 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 117 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 118 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 119 |
+
print_info: f_logit_scale = 0.0e+00
|
| 120 |
+
print_info: f_attn_scale = 0.0e+00
|
| 121 |
+
print_info: n_ff = 1536
|
| 122 |
+
print_info: n_expert = 256
|
| 123 |
+
print_info: n_expert_used = 8
|
| 124 |
+
print_info: n_expert_groups = 0
|
| 125 |
+
print_info: n_group_used = 0
|
| 126 |
+
print_info: causal attn = 1
|
| 127 |
+
print_info: pooling type = 0
|
| 128 |
+
print_info: rope type = 2
|
| 129 |
+
print_info: rope scaling = linear
|
| 130 |
+
print_info: freq_base_train = 5000000.0
|
| 131 |
+
print_info: freq_scale_train = 1
|
| 132 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 133 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 134 |
+
print_info: rope_finetuned = unknown
|
| 135 |
+
print_info: model type = 230B.A10B
|
| 136 |
+
print_info: model params = 228.69 B
|
| 137 |
+
print_info: general.name = Minimax-M2.5
|
| 138 |
+
print_info: vocab type = BPE
|
| 139 |
+
print_info: n_vocab = 200064
|
| 140 |
+
print_info: n_merges = 199744
|
| 141 |
+
print_info: BOS token = 200034 ']~!b['
|
| 142 |
+
print_info: EOS token = 200020 '[e~['
|
| 143 |
+
print_info: UNK token = 200021 ']!d~['
|
| 144 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 145 |
+
print_info: LF token = 10 'Ċ'
|
| 146 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 147 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 148 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 149 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 150 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 151 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 152 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 153 |
+
print_info: EOG token = 200020 '[e~['
|
| 154 |
+
print_info: max token length = 256
|
| 155 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 156 |
+
load_tensors: offloading output layer to GPU
|
| 157 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 158 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 159 |
+
load_tensors: CPU_Mapped model buffer size = 46440.88 MiB
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 46905.08 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 46905.08 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 38287.56 MiB
|
| 163 |
+
load_tensors: CUDA0 model buffer size = 20142.04 MiB
|
| 164 |
+
load_tensors: CUDA1 model buffer size = 18574.08 MiB
|
| 165 |
+
....................................................................................................
|
| 166 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 167 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 168 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 169 |
+
llama_context: constructing llama_context
|
| 170 |
+
llama_context: n_seq_max = 8
|
| 171 |
+
llama_context: n_ctx = 4096
|
| 172 |
+
llama_context: n_ctx_seq = 512
|
| 173 |
+
llama_context: n_batch = 4096
|
| 174 |
+
llama_context: n_ubatch = 4096
|
| 175 |
+
llama_context: causal_attn = 1
|
| 176 |
+
llama_context: flash_attn = enabled
|
| 177 |
+
llama_context: kv_unified = false
|
| 178 |
+
llama_context: freq_base = 5000000.0
|
| 179 |
+
llama_context: freq_scale = 1
|
| 180 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 181 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 182 |
+
llama_kv_cache: CUDA0 KV buffer size = 128.00 MiB
|
| 183 |
+
llama_kv_cache: CUDA1 KV buffer size = 864.00 MiB
|
| 184 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 185 |
+
sched_reserve: reserving ...
|
| 186 |
+
sched_reserve: CUDA0 compute buffer size = 1942.00 MiB
|
| 187 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 188 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 189 |
+
sched_reserve: graph nodes = 4099
|
| 190 |
+
sched_reserve: graph splits = 203 (with bs=4096), 105 (with bs=1)
|
| 191 |
+
sched_reserve: reserve took 22.43 ms, sched copies = 1
|
| 192 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 193 |
+
|
| 194 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 195 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 196 |
+
kl_divergence: 10.10 seconds per pass - ETA 2.53 minutes
|
| 197 |
+
|
| 198 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 199 |
+
1 6.4555 ± 1.2560 0.01286 ± 0.01242 0.00866 ± 0.00099 2.838 ± 0.355 % 94.510 ± 1.429 %
|
| 200 |
+
2 4.6874 ± 0.5668 0.00267 ± 0.00810 0.00636 ± 0.00059 2.408 ± 0.256 % 96.667 ± 0.796 %
|
| 201 |
+
3 4.5188 ± 0.4523 0.00675 ± 0.00822 0.00728 ± 0.00056 2.884 ± 0.221 % 96.340 ± 0.679 %
|
| 202 |
+
4 5.1557 ± 0.4629 0.00531 ± 0.00717 0.00839 ± 0.00059 3.151 ± 0.265 % 96.471 ± 0.578 %
|
| 203 |
+
5 4.9544 ± 0.3984 0.01206 ± 0.00792 0.00912 ± 0.00067 3.360 ± 0.319 % 96.471 ± 0.517 %
|
| 204 |
+
6 6.0494 ± 0.4754 0.00877 ± 0.00740 0.01094 ± 0.00069 3.237 ± 0.278 % 95.882 ± 0.508 %
|
| 205 |
+
7 5.6350 ± 0.3961 0.00965 ± 0.00663 0.01144 ± 0.00065 3.382 ± 0.253 % 95.630 ± 0.484 %
|
| 206 |
+
8 6.3483 ± 0.4225 0.00534 ± 0.00608 0.01166 ± 0.00059 3.289 ± 0.229 % 95.490 ± 0.460 %
|
| 207 |
+
9 6.2318 ± 0.3865 0.00610 ± 0.00552 0.01136 ± 0.00054 3.221 ± 0.210 % 95.425 ± 0.436 %
|
| 208 |
+
10 5.6971 ± 0.3287 0.00543 ± 0.00506 0.01085 ± 0.00049 3.188 ± 0.193 % 95.490 ± 0.411 %
|
| 209 |
+
11 6.2402 ± 0.3483 0.00468 ± 0.00478 0.01154 ± 0.00053 3.215 ± 0.186 % 95.294 ± 0.400 %
|
| 210 |
+
12 6.9074 ± 0.3745 0.00477 ± 0.00456 0.01222 ± 0.00060 3.154 ± 0.174 % 95.163 ± 0.388 %
|
| 211 |
+
13 7.1720 ± 0.3703 0.00509 ± 0.00426 0.01196 ± 0.00056 3.101 ± 0.164 % 95.023 ± 0.378 %
|
| 212 |
+
14 7.7287 ± 0.3890 0.00460 ± 0.00415 0.01228 ± 0.00061 3.121 ± 0.162 % 94.958 ± 0.366 %
|
| 213 |
+
15 8.0840 ± 0.3936 0.00341 ± 0.00394 0.01238 ± 0.00058 3.124 ± 0.155 % 94.902 ± 0.356 %
|
| 214 |
+
16 8.3409 ± 0.3933 0.00249 ± 0.00373 0.01211 ± 0.00055 3.068 ± 0.148 % 94.951 ± 0.343 %
|
| 215 |
+
17 8.5484 ± 0.3933 -0.00005 ± 0.00371 0.01267 ± 0.00055 3.050 ± 0.142 % 94.948 ± 0.333 %
|
| 216 |
+
18 8.0439 ± 0.3569 -0.00165 ± 0.00358 0.01252 ± 0.00053 3.015 ± 0.136 % 95.076 ± 0.319 %
|
| 217 |
+
19 8.1715 ± 0.3529 -0.00171 ± 0.00344 0.01219 ± 0.00050 2.990 ± 0.131 % 95.046 ± 0.312 %
|
| 218 |
+
20 8.2194 ± 0.3457 -0.00374 ± 0.00337 0.01248 ± 0.00049 3.036 ± 0.125 % 95.098 ± 0.302 %
|
| 219 |
+
21 8.1985 ± 0.3365 -0.00372 ± 0.00327 0.01256 ± 0.00047 3.039 ± 0.120 % 95.163 ± 0.293 %
|
| 220 |
+
22 8.5144 ± 0.3446 -0.00358 ± 0.00321 0.01280 ± 0.00046 3.040 ± 0.115 % 95.098 ± 0.288 %
|
| 221 |
+
23 8.5366 ± 0.3391 -0.00218 ± 0.00329 0.01435 ± 0.00063 3.366 ± 0.176 % 94.970 ± 0.285 %
|
| 222 |
+
24 8.9430 ± 0.3497 -0.00175 ± 0.00318 0.01418 ± 0.00060 3.319 ± 0.171 % 94.918 ± 0.281 %
|
| 223 |
+
25 8.9252 ± 0.3426 -0.00235 ± 0.00314 0.01449 ± 0.00059 3.389 ± 0.166 % 94.776 ± 0.279 %
|
| 224 |
+
26 8.3239 ± 0.3102 -0.00349 ± 0.00314 0.01518 ± 0.00061 3.734 ± 0.178 % 94.857 ± 0.271 %
|
| 225 |
+
27 7.8829 ± 0.2858 -0.00399 ± 0.00310 0.01632 ± 0.00065 4.120 ± 0.192 % 94.887 ± 0.265 %
|
| 226 |
+
28 8.0059 ± 0.2860 -0.00219 ± 0.00304 0.01651 ± 0.00063 4.123 ± 0.186 % 94.790 ± 0.263 %
|
| 227 |
+
29 7.9291 ± 0.2782 -0.00236 ± 0.00297 0.01649 ± 0.00062 4.077 ± 0.182 % 94.861 ± 0.257 %
|
| 228 |
+
30 7.4162 ± 0.2535 -0.00203 ± 0.00288 0.01603 ± 0.00060 4.027 ± 0.178 % 95.033 ± 0.248 %
|
| 229 |
+
31 6.9885 ± 0.2328 -0.00206 ± 0.00281 0.01576 ± 0.00058 3.980 ± 0.175 % 95.168 ± 0.241 %
|
| 230 |
+
32 6.8165 ± 0.2217 -0.00174 ± 0.00275 0.01555 ± 0.00057 3.973 ± 0.170 % 95.184 ± 0.237 %
|
| 231 |
+
33 6.6836 ± 0.2125 -0.00153 ± 0.00269 0.01533 ± 0.00055 3.954 ± 0.166 % 95.187 ± 0.233 %
|
| 232 |
+
34 6.8712 ± 0.2164 -0.00088 ± 0.00271 0.01586 ± 0.00055 3.974 ± 0.162 % 95.063 ± 0.233 %
|
| 233 |
+
35 6.9728 ± 0.2183 -0.00084 ± 0.00268 0.01626 ± 0.00054 3.998 ± 0.157 % 94.980 ± 0.231 %
|
| 234 |
+
36 7.0352 ± 0.2180 -0.00082 ± 0.00263 0.01611 ± 0.00053 3.969 ± 0.154 % 95.011 ± 0.227 %
|
| 235 |
+
37 7.0547 ± 0.2159 -0.00033 ± 0.00261 0.01627 ± 0.00052 4.019 ± 0.151 % 94.987 ± 0.225 %
|
| 236 |
+
38 7.2703 ± 0.2206 0.00003 ± 0.00256 0.01619 ± 0.00051 3.989 ± 0.148 % 94.964 ± 0.222 %
|
| 237 |
+
39 7.2242 ± 0.2160 0.00062 ± 0.00254 0.01659 ± 0.00052 4.061 ± 0.148 % 94.882 ± 0.221 %
|
| 238 |
+
40 6.9875 ± 0.2048 0.00223 ± 0.00258 0.01779 ± 0.00056 4.364 ± 0.159 % 94.843 ± 0.219 %
|
| 239 |
+
41 6.7699 ± 0.1946 0.00142 ± 0.00261 0.01869 ± 0.00058 4.622 ± 0.158 % 94.854 ± 0.216 %
|
| 240 |
+
42 6.5642 ± 0.1854 0.00189 ± 0.00262 0.01940 ± 0.00059 4.825 ± 0.158 % 94.865 ± 0.213 %
|
| 241 |
+
43 6.3621 ± 0.1764 0.00176 ± 0.00262 0.01997 ± 0.00061 4.942 ± 0.157 % 94.875 ± 0.211 %
|
| 242 |
+
44 6.3200 ± 0.1725 0.00145 ± 0.00257 0.01968 ± 0.00060 4.903 ± 0.155 % 94.866 ± 0.208 %
|
| 243 |
+
45 6.4603 ± 0.1754 0.00151 ± 0.00255 0.02000 ± 0.00060 4.886 ± 0.152 % 94.797 ± 0.207 %
|
| 244 |
+
46 6.5984 ± 0.1775 0.00057 ± 0.00252 0.01985 ± 0.00058 4.848 ± 0.150 % 94.791 ± 0.205 %
|
| 245 |
+
47 6.7481 ± 0.1801 0.00081 ± 0.00247 0.01966 ± 0.00057 4.808 ± 0.148 % 94.802 ± 0.203 %
|
| 246 |
+
48 6.6354 ± 0.1742 0.00062 ± 0.00243 0.01940 ± 0.00056 4.775 ± 0.146 % 94.837 ± 0.200 %
|
| 247 |
+
49 6.7362 ± 0.1752 0.00068 ± 0.00263 0.02178 ± 0.00092 4.939 ± 0.160 % 94.654 ± 0.201 %
|
| 248 |
+
50 6.8368 ± 0.1770 0.00135 ± 0.00260 0.02178 ± 0.00090 4.915 ± 0.158 % 94.635 ± 0.200 %
|
| 249 |
+
51 6.9435 ± 0.1782 0.00143 ± 0.00256 0.02158 ± 0.00088 4.882 ± 0.156 % 94.687 ± 0.197 %
|
| 250 |
+
52 7.0042 ± 0.1779 0.00082 ± 0.00254 0.02167 ± 0.00087 4.872 ± 0.153 % 94.653 ± 0.195 %
|
| 251 |
+
53 7.1133 ± 0.1789 0.00050 ± 0.00251 0.02155 ± 0.00085 4.850 ± 0.151 % 94.650 ± 0.194 %
|
| 252 |
+
54 7.1671 ± 0.1782 0.00025 ± 0.00247 0.02135 ± 0.00084 4.814 ± 0.150 % 94.655 ± 0.192 %
|
| 253 |
+
55 7.2190 ± 0.1776 0.00044 ± 0.00244 0.02114 ± 0.00082 4.780 ± 0.148 % 94.667 ± 0.190 %
|
| 254 |
+
56 7.2605 ± 0.1771 0.00044 ± 0.00240 0.02097 ± 0.00081 4.753 ± 0.146 % 94.664 ± 0.188 %
|
| 255 |
+
57 7.2618 ± 0.1755 0.00068 ± 0.00238 0.02100 ± 0.00080 4.754 ± 0.144 % 94.627 ± 0.187 %
|
| 256 |
+
58 7.2738 ± 0.1743 0.00096 ± 0.00235 0.02082 ± 0.00078 4.736 ± 0.142 % 94.652 ± 0.185 %
|
| 257 |
+
59 7.2372 ± 0.1716 0.00104 ± 0.00232 0.02055 ± 0.00077 4.704 ± 0.141 % 94.709 ± 0.183 %
|
| 258 |
+
60 7.2472 ± 0.1705 0.00094 ± 0.00229 0.02041 ± 0.00076 4.679 ± 0.139 % 94.699 ± 0.181 %
|
| 259 |
+
61 7.2964 ± 0.1702 0.00107 ± 0.00226 0.02038 ± 0.00075 4.658 ± 0.138 % 94.696 ± 0.180 %
|
| 260 |
+
62 7.2753 ± 0.1686 0.00144 ± 0.00228 0.02068 ± 0.00083 4.722 ± 0.150 % 94.738 ± 0.178 %
|
| 261 |
+
63 7.3202 ± 0.1688 0.00128 ± 0.00227 0.02063 ± 0.00082 4.696 ± 0.148 % 94.753 ± 0.176 %
|
| 262 |
+
64 7.3027 ± 0.1667 0.00146 ± 0.00225 0.02055 ± 0.00081 4.680 ± 0.147 % 94.755 ± 0.175 %
|
| 263 |
+
65 7.2937 ± 0.1652 0.00140 ± 0.00223 0.02055 ± 0.00080 4.692 ± 0.146 % 94.769 ± 0.173 %
|
| 264 |
+
66 7.3290 ± 0.1649 0.00122 ± 0.00221 0.02057 ± 0.00079 4.673 ± 0.144 % 94.753 ± 0.172 %
|
| 265 |
+
67 7.3393 ± 0.1640 0.00169 ± 0.00219 0.02073 ± 0.00079 4.654 ± 0.143 % 94.738 ± 0.171 %
|
| 266 |
+
68 7.2978 ± 0.1617 0.00198 ± 0.00219 0.02061 ± 0.00078 4.632 ± 0.142 % 94.769 ± 0.169 %
|
| 267 |
+
69 7.3276 ± 0.1612 0.00160 ± 0.00217 0.02054 ± 0.00077 4.609 ± 0.140 % 94.800 ± 0.167 %
|
| 268 |
+
70 7.2987 ± 0.1591 0.00144 ± 0.00216 0.02045 ± 0.00076 4.594 ± 0.139 % 94.812 ± 0.166 %
|
| 269 |
+
71 7.2848 ± 0.1577 0.00153 ± 0.00214 0.02038 ± 0.00075 4.584 ± 0.138 % 94.819 ± 0.165 %
|
| 270 |
+
72 7.3057 ± 0.1573 0.00221 ± 0.00215 0.02041 ± 0.00074 4.573 ± 0.136 % 94.760 ± 0.164 %
|
| 271 |
+
73 7.3125 ± 0.1563 0.00240 ± 0.00214 0.02038 ± 0.00073 4.565 ± 0.135 % 94.741 ± 0.164 %
|
| 272 |
+
74 7.3052 ± 0.1550 0.00223 ± 0.00212 0.02040 ± 0.00072 4.554 ± 0.133 % 94.722 ± 0.163 %
|
| 273 |
+
75 7.3060 ± 0.1540 0.00143 ± 0.00210 0.02052 ± 0.00071 4.554 ± 0.132 % 94.682 ± 0.162 %
|
| 274 |
+
76 7.3668 ± 0.1544 0.00124 ± 0.00208 0.02044 ± 0.00071 4.546 ± 0.131 % 94.670 ± 0.161 %
|
| 275 |
+
77 7.3613 ± 0.1532 0.00076 ± 0.00209 0.02037 ± 0.00070 4.525 ± 0.130 % 94.668 ± 0.160 %
|
| 276 |
+
78 7.3767 ± 0.1527 0.00101 ± 0.00207 0.02029 ± 0.00069 4.507 ± 0.128 % 94.686 ± 0.159 %
|
| 277 |
+
79 7.3851 ± 0.1519 0.00069 ± 0.00207 0.02035 ± 0.00068 4.501 ± 0.128 % 94.698 ± 0.158 %
|
| 278 |
+
80 7.3899 ± 0.1516 0.00111 ± 0.00209 0.02037 ± 0.00068 4.498 ± 0.127 % 94.696 ± 0.157 %
|
| 279 |
+
81 7.3686 ± 0.1502 0.00131 ± 0.00207 0.02045 ± 0.00068 4.486 ± 0.125 % 94.713 ± 0.156 %
|
| 280 |
+
82 7.3487 ± 0.1487 0.00153 ± 0.00205 0.02038 ± 0.00067 4.474 ± 0.124 % 94.730 ± 0.155 %
|
| 281 |
+
83 7.3800 ± 0.1482 0.00147 ± 0.00203 0.02025 ± 0.00066 4.453 ± 0.123 % 94.722 ± 0.154 %
|
| 282 |
+
84 7.3972 ± 0.1474 0.00162 ± 0.00201 0.02012 ± 0.00066 4.436 ± 0.122 % 94.725 ± 0.153 %
|
| 283 |
+
85 7.3940 ± 0.1462 0.00173 ± 0.00199 0.02000 ± 0.00065 4.420 ± 0.121 % 94.699 ± 0.152 %
|
| 284 |
+
86 7.3283 ± 0.1436 0.00185 ± 0.00197 0.01987 ± 0.00064 4.405 ± 0.120 % 94.724 ± 0.151 %
|
| 285 |
+
87 7.2720 ± 0.1412 0.00195 ± 0.00196 0.01975 ± 0.00063 4.387 ± 0.120 % 94.713 ± 0.150 %
|
| 286 |
+
88 7.2109 ± 0.1388 0.00184 ± 0.00194 0.01963 ± 0.00063 4.375 ± 0.119 % 94.733 ± 0.149 %
|
| 287 |
+
89 7.1395 ± 0.1362 0.00186 ± 0.00192 0.01953 ± 0.00062 4.363 ± 0.118 % 94.752 ± 0.148 %
|
| 288 |
+
90 7.0854 ± 0.1340 0.00196 ± 0.00190 0.01942 ± 0.00062 4.346 ± 0.117 % 94.776 ± 0.147 %
|
| 289 |
+
91 7.0351 ± 0.1320 0.00195 ± 0.00188 0.01932 ± 0.00061 4.337 ± 0.116 % 94.794 ± 0.146 %
|
| 290 |
+
92 6.9792 ± 0.1299 0.00202 ± 0.00187 0.01921 ± 0.00060 4.329 ± 0.115 % 94.795 ± 0.145 %
|
| 291 |
+
93 6.9976 ± 0.1298 0.00215 ± 0.00186 0.01948 ± 0.00062 4.355 ± 0.117 % 94.767 ± 0.145 %
|
| 292 |
+
94 7.0269 ± 0.1295 0.00200 ± 0.00184 0.01937 ± 0.00061 4.338 ± 0.116 % 94.777 ± 0.144 %
|
| 293 |
+
95 7.1348 ± 0.1312 0.00205 ± 0.00183 0.01947 ± 0.00061 4.340 ± 0.115 % 94.741 ± 0.143 %
|
| 294 |
+
96 7.2256 ± 0.1323 0.00173 ± 0.00182 0.01949 ± 0.00060 4.325 ± 0.115 % 94.710 ± 0.143 %
|
| 295 |
+
97 7.3015 ± 0.1331 0.00153 ± 0.00181 0.01941 ± 0.00060 4.307 ± 0.114 % 94.700 ± 0.142 %
|
| 296 |
+
98 7.4389 ± 0.1355 0.00202 ± 0.00181 0.01935 ± 0.00059 4.291 ± 0.113 % 94.674 ± 0.142 %
|
| 297 |
+
99 7.5561 ± 0.1372 0.00210 ± 0.00180 0.01936 ± 0.00059 4.275 ± 0.112 % 94.644 ± 0.142 %
|
| 298 |
+
100 7.5924 ± 0.1373 0.00200 ± 0.00178 0.01937 ± 0.00058 4.273 ± 0.112 % 94.647 ± 0.141 %
|
| 299 |
+
101 7.6295 ± 0.1374 0.00204 ± 0.00177 0.01933 ± 0.00058 4.266 ± 0.111 % 94.622 ± 0.141 %
|
| 300 |
+
102 7.6912 ± 0.1384 0.00220 ± 0.00177 0.01932 ± 0.00057 4.260 ± 0.110 % 94.625 ± 0.140 %
|
| 301 |
+
103 7.6666 ± 0.1374 0.00236 ± 0.00176 0.01923 ± 0.00057 4.248 ± 0.109 % 94.639 ± 0.139 %
|
| 302 |
+
104 7.6095 ± 0.1356 0.00240 ± 0.00175 0.01926 ± 0.00056 4.274 ± 0.109 % 94.668 ± 0.138 %
|
| 303 |
+
105 7.4988 ± 0.1327 0.00225 ± 0.00175 0.01921 ± 0.00056 4.279 ± 0.108 % 94.704 ± 0.137 %
|
| 304 |
+
106 7.3704 ± 0.1295 0.00229 ± 0.00173 0.01910 ± 0.00056 4.280 ± 0.108 % 94.747 ± 0.136 %
|
| 305 |
+
107 7.4271 ± 0.1298 0.00221 ± 0.00172 0.01900 ± 0.00055 4.264 ± 0.107 % 94.759 ± 0.135 %
|
| 306 |
+
108 7.4402 ± 0.1295 0.00220 ± 0.00170 0.01890 ± 0.00055 4.251 ± 0.106 % 94.760 ± 0.134 %
|
| 307 |
+
109 7.4608 ± 0.1293 0.00224 ± 0.00169 0.01883 ± 0.00054 4.239 ± 0.106 % 94.769 ± 0.134 %
|
| 308 |
+
110 7.4973 ± 0.1294 0.00242 ± 0.00168 0.01876 ± 0.00054 4.229 ± 0.105 % 94.777 ± 0.133 %
|
| 309 |
+
111 7.5443 ± 0.1295 0.00237 ± 0.00167 0.01869 ± 0.00053 4.215 ± 0.104 % 94.771 ± 0.132 %
|
| 310 |
+
112 7.5527 ± 0.1290 0.00214 ± 0.00166 0.01859 ± 0.00053 4.204 ± 0.104 % 94.783 ± 0.132 %
|
| 311 |
+
113 7.5638 ± 0.1285 0.00223 ± 0.00164 0.01854 ± 0.00052 4.195 ± 0.103 % 94.780 ± 0.131 %
|
| 312 |
+
114 7.5813 ± 0.1283 0.00220 ± 0.00164 0.01854 ± 0.00052 4.182 ± 0.102 % 94.761 ± 0.131 %
|
| 313 |
+
115 7.5652 ± 0.1274 0.00242 ± 0.00164 0.01863 ± 0.00052 4.211 ± 0.102 % 94.752 ± 0.130 %
|
| 314 |
+
116 7.5534 ± 0.1267 0.00225 ± 0.00164 0.01886 ± 0.00051 4.260 ± 0.102 % 94.713 ± 0.130 %
|
| 315 |
+
117 7.4546 ± 0.1242 0.00266 ± 0.00163 0.01907 ± 0.00051 4.314 ± 0.100 % 94.714 ± 0.130 %
|
| 316 |
+
118 7.3592 ± 0.1218 0.00263 ± 0.00163 0.01935 ± 0.00051 4.392 ± 0.100 % 94.706 ± 0.129 %
|
| 317 |
+
119 7.2614 ± 0.1194 0.00270 ± 0.00163 0.01953 ± 0.00051 4.450 ± 0.099 % 94.714 ± 0.128 %
|
| 318 |
+
120 7.1802 ± 0.1173 0.00280 ± 0.00164 0.01995 ± 0.00052 4.560 ± 0.100 % 94.690 ± 0.128 %
|
| 319 |
+
121 7.0956 ± 0.1153 0.00259 ± 0.00163 0.02013 ± 0.00052 4.610 ± 0.099 % 94.711 ± 0.127 %
|
| 320 |
+
|
| 321 |
+
====== Perplexity statistics ======
|
| 322 |
+
Mean PPL(Q) : 7.095623 ± 0.115273
|
| 323 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 324 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.49%
|
| 325 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.002594 ± 0.001633
|
| 326 |
+
Mean PPL(Q)/PPL(base) : 1.002597 ± 0.001637
|
| 327 |
+
Mean PPL(Q)-PPL(base) : 0.018383 ± 0.011593
|
| 328 |
+
|
| 329 |
+
====== KL divergence statistics ======
|
| 330 |
+
Mean KLD: 0.020127 ± 0.000519
|
| 331 |
+
Maximum KLD: 6.100366
|
| 332 |
+
99.9% KLD: 1.024550
|
| 333 |
+
99.0% KLD: 0.261472
|
| 334 |
+
95.0% KLD: 0.071629
|
| 335 |
+
90.0% KLD: 0.038561
|
| 336 |
+
Median KLD: 0.004884
|
| 337 |
+
10.0% KLD: 0.000020
|
| 338 |
+
5.0% KLD: 0.000004
|
| 339 |
+
1.0% KLD: -0.000000
|
| 340 |
+
0.1% KLD: -0.000003
|
| 341 |
+
Minimum KLD: -0.000030
|
| 342 |
+
|
| 343 |
+
====== Token probability statistics ======
|
| 344 |
+
Mean Δp: -0.024 ± 0.026 %
|
| 345 |
+
Maximum Δp: 78.814%
|
| 346 |
+
99.9% Δp: 40.507%
|
| 347 |
+
99.0% Δp: 13.488%
|
| 348 |
+
95.0% Δp: 4.770%
|
| 349 |
+
90.0% Δp: 2.435%
|
| 350 |
+
75.0% Δp: 0.338%
|
| 351 |
+
Median Δp: 0.000%
|
| 352 |
+
25.0% Δp: -0.379%
|
| 353 |
+
10.0% Δp: -2.513%
|
| 354 |
+
5.0% Δp: -4.751%
|
| 355 |
+
1.0% Δp: -13.809%
|
| 356 |
+
0.1% Δp: -41.212%
|
| 357 |
+
Minimum Δp: -99.775%
|
| 358 |
+
RMS Δp : 4.610 ± 0.099 %
|
| 359 |
+
Same top p: 94.711 ± 0.127 %
|
| 360 |
+
|
| 361 |
+
llama_perf_context_print: load time = 67124.39 ms
|
| 362 |
+
llama_perf_context_print: prompt eval time = 125282.78 ms / 61952 tokens ( 2.02 ms per token, 494.50 tokens per second)
|
| 363 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 364 |
+
llama_perf_context_print: total time = 142970.73 ms / 61953 tokens
|
| 365 |
+
llama_perf_context_print: graphs reused = 0
|
| 366 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 367 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1501 + ( 22212 = 20142 + 128 + 1941) + 421 |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1069 + ( 22612 = 18574 + 864 + 3174) + 453 |
|
| 369 |
+
llama_memory_breakdown_print: | - Host | 178642 = 178538 + 0 + 104 |
|
| 370 |
+
```
|
kld_data/unsloth/Q8_0/MiniMax-M2.5-Q8_0.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-Q8_0 (unsloth)
|
| 2 |
+
|
| 3 |
+
226.43 GiB (8.51 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q8_0/MiniMax-M2.5-Q8_0-00001-of-00006.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 120537 used, -96666 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 116267 used, -92395 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 236805 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 191109 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 35451 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 11416 MiB used, 12455 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 2499 MiB used, 21371 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 6 layers ( 1 overflowing), 22463 MiB used, 1408 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 57 layers (54 overflowing), 22050 MiB used, 1821 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.18 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 5 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/Q8_0/MiniMax-M2.5-Q8_0-00001-of-00006.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 7
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 6
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q8_0: 436 tensors
|
| 87 |
+
print_info: file format = GGUF V3 (latest)
|
| 88 |
+
print_info: file type = Q8_0
|
| 89 |
+
print_info: file size = 226.43 GiB (8.51 BPW)
|
| 90 |
+
load: 0 unused tokens
|
| 91 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 92 |
+
load: printing all EOG tokens:
|
| 93 |
+
load: - 200004 ('<fim_pad>')
|
| 94 |
+
load: - 200005 ('<reponame>')
|
| 95 |
+
load: - 200020 ('[e~[')
|
| 96 |
+
load: special tokens cache size = 54
|
| 97 |
+
load: token to piece cache size = 1.3355 MB
|
| 98 |
+
print_info: arch = minimax-m2
|
| 99 |
+
print_info: vocab_only = 0
|
| 100 |
+
print_info: no_alloc = 0
|
| 101 |
+
print_info: n_ctx_train = 196608
|
| 102 |
+
print_info: n_embd = 3072
|
| 103 |
+
print_info: n_embd_inp = 3072
|
| 104 |
+
print_info: n_layer = 62
|
| 105 |
+
print_info: n_head = 48
|
| 106 |
+
print_info: n_head_kv = 8
|
| 107 |
+
print_info: n_rot = 64
|
| 108 |
+
print_info: n_swa = 0
|
| 109 |
+
print_info: is_swa_any = 0
|
| 110 |
+
print_info: n_embd_head_k = 128
|
| 111 |
+
print_info: n_embd_head_v = 128
|
| 112 |
+
print_info: n_gqa = 6
|
| 113 |
+
print_info: n_embd_k_gqa = 1024
|
| 114 |
+
print_info: n_embd_v_gqa = 1024
|
| 115 |
+
print_info: f_norm_eps = 0.0e+00
|
| 116 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 117 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 118 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 119 |
+
print_info: f_logit_scale = 0.0e+00
|
| 120 |
+
print_info: f_attn_scale = 0.0e+00
|
| 121 |
+
print_info: n_ff = 1536
|
| 122 |
+
print_info: n_expert = 256
|
| 123 |
+
print_info: n_expert_used = 8
|
| 124 |
+
print_info: n_expert_groups = 0
|
| 125 |
+
print_info: n_group_used = 0
|
| 126 |
+
print_info: causal attn = 1
|
| 127 |
+
print_info: pooling type = 0
|
| 128 |
+
print_info: rope type = 2
|
| 129 |
+
print_info: rope scaling = linear
|
| 130 |
+
print_info: freq_base_train = 5000000.0
|
| 131 |
+
print_info: freq_scale_train = 1
|
| 132 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 133 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 134 |
+
print_info: rope_finetuned = unknown
|
| 135 |
+
print_info: model type = 230B.A10B
|
| 136 |
+
print_info: model params = 228.69 B
|
| 137 |
+
print_info: general.name = Minimax-M2.5
|
| 138 |
+
print_info: vocab type = BPE
|
| 139 |
+
print_info: n_vocab = 200064
|
| 140 |
+
print_info: n_merges = 199744
|
| 141 |
+
print_info: BOS token = 200034 ']~!b['
|
| 142 |
+
print_info: EOS token = 200020 '[e~['
|
| 143 |
+
print_info: UNK token = 200021 ']!d~['
|
| 144 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 145 |
+
print_info: LF token = 10 'Ċ'
|
| 146 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 147 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 148 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 149 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 150 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 151 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 152 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 153 |
+
print_info: EOG token = 200020 '[e~['
|
| 154 |
+
print_info: max token length = 256
|
| 155 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 156 |
+
load_tensors: offloading output layer to GPU
|
| 157 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 158 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 159 |
+
load_tensors: CPU_Mapped model buffer size = 46527.54 MiB
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 47087.13 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 47084.12 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 47128.79 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 43367.46 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 19867.06 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 17980.67 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 96.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 896.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 2500.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 221 (with bs=4096), 117 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 22.09 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 10.94 seconds per pass - ETA 2.75 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.4337 ± 1.2337 0.00949 ± 0.01159 0.00740 ± 0.00128 2.939 ± 0.488 % 96.863 ± 1.094 %
|
| 201 |
+
2 4.6863 ± 0.5613 0.00242 ± 0.00689 0.00551 ± 0.00069 2.346 ± 0.316 % 97.451 ± 0.699 %
|
| 202 |
+
3 4.5152 ± 0.4478 0.00595 ± 0.00713 0.00608 ± 0.00065 2.499 ± 0.235 % 97.255 ± 0.591 %
|
| 203 |
+
4 5.1523 ± 0.4588 0.00466 ± 0.00596 0.00695 ± 0.00067 2.700 ± 0.239 % 96.765 ± 0.554 %
|
| 204 |
+
5 4.9498 ± 0.3959 0.01112 ± 0.00718 0.00713 ± 0.00065 2.804 ± 0.240 % 96.706 ± 0.500 %
|
| 205 |
+
6 6.0523 ± 0.4742 0.00925 ± 0.00653 0.00850 ± 0.00063 2.667 ± 0.211 % 96.340 ± 0.480 %
|
| 206 |
+
7 5.6238 ± 0.3946 0.00767 ± 0.00576 0.00890 ± 0.00057 2.715 ± 0.187 % 96.190 ± 0.453 %
|
| 207 |
+
8 6.3736 ± 0.4253 0.00932 ± 0.00527 0.00887 ± 0.00051 2.635 ± 0.170 % 95.833 ± 0.443 %
|
| 208 |
+
9 6.2537 ± 0.3891 0.00960 ± 0.00480 0.00860 ± 0.00046 2.578 ± 0.158 % 95.991 ± 0.410 %
|
| 209 |
+
10 5.7080 ± 0.3301 0.00734 ± 0.00441 0.00838 ± 0.00043 2.601 ± 0.143 % 95.961 ± 0.390 %
|
| 210 |
+
11 6.2492 ± 0.3495 0.00611 ± 0.00413 0.00851 ± 0.00040 2.582 ± 0.134 % 96.007 ± 0.370 %
|
| 211 |
+
12 6.9125 ± 0.3753 0.00550 ± 0.00393 0.00867 ± 0.00038 2.575 ± 0.125 % 95.882 ± 0.359 %
|
| 212 |
+
13 7.1767 ± 0.3710 0.00574 ± 0.00368 0.00860 ± 0.00035 2.527 ± 0.118 % 95.777 ± 0.349 %
|
| 213 |
+
14 7.7514 ± 0.3912 0.00752 ± 0.00356 0.00901 ± 0.00046 2.553 ± 0.117 % 95.770 ± 0.337 %
|
| 214 |
+
15 8.1203 ± 0.3965 0.00789 ± 0.00339 0.00906 ± 0.00043 2.533 ± 0.110 % 95.608 ± 0.331 %
|
| 215 |
+
16 8.3750 ± 0.3961 0.00658 ± 0.00323 0.00885 ± 0.00041 2.489 ± 0.105 % 95.760 ± 0.316 %
|
| 216 |
+
17 8.6089 ± 0.3974 0.00700 ± 0.00312 0.00890 ± 0.00039 2.444 ± 0.101 % 95.755 ± 0.306 %
|
| 217 |
+
18 8.1072 ± 0.3611 0.00619 ± 0.00299 0.00883 ± 0.00037 2.424 ± 0.098 % 95.882 ± 0.293 %
|
| 218 |
+
19 8.2243 ± 0.3565 0.00473 ± 0.00288 0.00874 ± 0.00036 2.437 ± 0.096 % 95.810 ± 0.288 %
|
| 219 |
+
20 8.2920 ± 0.3503 0.00506 ± 0.00281 0.00896 ± 0.00035 2.463 ± 0.092 % 95.941 ± 0.276 %
|
| 220 |
+
21 8.2580 ± 0.3401 0.00351 ± 0.00273 0.00908 ± 0.00034 2.515 ± 0.092 % 95.985 ± 0.268 %
|
| 221 |
+
22 8.5735 ± 0.3482 0.00333 ± 0.00271 0.00940 ± 0.00038 2.518 ± 0.088 % 95.865 ± 0.266 %
|
| 222 |
+
23 8.5897 ± 0.3421 0.00401 ± 0.00283 0.01024 ± 0.00052 2.544 ± 0.087 % 95.772 ± 0.263 %
|
| 223 |
+
24 8.9924 ± 0.3523 0.00376 ± 0.00274 0.01015 ± 0.00050 2.512 ± 0.084 % 95.752 ± 0.258 %
|
| 224 |
+
25 8.9746 ± 0.3452 0.00317 ± 0.00269 0.01052 ± 0.00049 2.615 ± 0.094 % 95.733 ± 0.253 %
|
| 225 |
+
26 8.3791 ± 0.3130 0.00312 ± 0.00265 0.01101 ± 0.00050 2.942 ± 0.127 % 95.807 ± 0.246 %
|
| 226 |
+
27 7.9362 ± 0.2885 0.00275 ± 0.00270 0.01238 ± 0.00059 3.519 ± 0.191 % 95.686 ± 0.245 %
|
| 227 |
+
28 8.0573 ± 0.2885 0.00420 ± 0.00267 0.01271 ± 0.00058 3.505 ± 0.185 % 95.588 ± 0.243 %
|
| 228 |
+
29 7.9784 ± 0.2806 0.00384 ± 0.00262 0.01268 ± 0.00056 3.494 ± 0.180 % 95.565 ± 0.239 %
|
| 229 |
+
30 7.4607 ± 0.2557 0.00396 ± 0.00254 0.01234 ± 0.00055 3.461 ± 0.176 % 95.699 ± 0.232 %
|
| 230 |
+
31 7.0296 ± 0.2348 0.00381 ± 0.00250 0.01215 ± 0.00053 3.456 ± 0.172 % 95.800 ± 0.226 %
|
| 231 |
+
32 6.8533 ± 0.2235 0.00365 ± 0.00245 0.01202 ± 0.00052 3.462 ± 0.167 % 95.846 ± 0.221 %
|
| 232 |
+
33 6.7204 ± 0.2142 0.00395 ± 0.00240 0.01187 ± 0.00050 3.456 ± 0.162 % 95.841 ± 0.218 %
|
| 233 |
+
34 6.9085 ± 0.2182 0.00453 ± 0.00239 0.01223 ± 0.00049 3.456 ± 0.158 % 95.767 ± 0.216 %
|
| 234 |
+
35 7.0146 ± 0.2203 0.00513 ± 0.00238 0.01256 ± 0.00049 3.506 ± 0.153 % 95.686 ± 0.215 %
|
| 235 |
+
36 7.0757 ± 0.2200 0.00492 ± 0.00234 0.01241 ± 0.00047 3.483 ± 0.150 % 95.697 ± 0.212 %
|
| 236 |
+
37 7.0955 ± 0.2178 0.00544 ± 0.00237 0.01244 ± 0.00047 3.515 ± 0.148 % 95.718 ± 0.208 %
|
| 237 |
+
38 7.3079 ± 0.2224 0.00520 ± 0.00233 0.01244 ± 0.00046 3.503 ± 0.145 % 95.666 ± 0.207 %
|
| 238 |
+
39 7.2573 ± 0.2176 0.00520 ± 0.00231 0.01256 ± 0.00045 3.520 ± 0.142 % 95.666 ± 0.204 %
|
| 239 |
+
40 7.0139 ± 0.2061 0.00601 ± 0.00231 0.01325 ± 0.00047 3.730 ± 0.147 % 95.618 ± 0.203 %
|
| 240 |
+
41 6.8074 ± 0.1963 0.00694 ± 0.00235 0.01402 ± 0.00050 4.010 ± 0.155 % 95.600 ± 0.201 %
|
| 241 |
+
42 6.5969 ± 0.1868 0.00686 ± 0.00235 0.01441 ± 0.00050 4.128 ± 0.157 % 95.668 ± 0.197 %
|
| 242 |
+
43 6.3947 ± 0.1778 0.00686 ± 0.00233 0.01470 ± 0.00050 4.187 ± 0.154 % 95.705 ± 0.194 %
|
| 243 |
+
44 6.3537 ± 0.1739 0.00676 ± 0.00229 0.01450 ± 0.00049 4.157 ± 0.151 % 95.740 ± 0.191 %
|
| 244 |
+
45 6.4921 ± 0.1767 0.00643 ± 0.00228 0.01474 ± 0.00048 4.151 ± 0.148 % 95.678 ± 0.190 %
|
| 245 |
+
46 6.6329 ± 0.1789 0.00580 ± 0.00224 0.01462 ± 0.00047 4.118 ± 0.146 % 95.669 ± 0.188 %
|
| 246 |
+
47 6.7827 ± 0.1815 0.00593 ± 0.00220 0.01444 ± 0.00046 4.080 ± 0.145 % 95.636 ± 0.187 %
|
| 247 |
+
48 6.6707 ± 0.1757 0.00593 ± 0.00216 0.01424 ± 0.00046 4.054 ± 0.143 % 95.678 ± 0.184 %
|
| 248 |
+
49 6.7709 ± 0.1765 0.00581 ± 0.00239 0.01845 ± 0.00167 4.348 ± 0.182 % 95.534 ± 0.185 %
|
| 249 |
+
50 6.8676 ± 0.1781 0.00585 ± 0.00236 0.01838 ± 0.00164 4.335 ± 0.180 % 95.506 ± 0.183 %
|
| 250 |
+
51 6.9733 ± 0.1793 0.00573 ± 0.00232 0.01819 ± 0.00161 4.310 ± 0.177 % 95.571 ± 0.180 %
|
| 251 |
+
52 7.0373 ± 0.1790 0.00555 ± 0.00230 0.01823 ± 0.00158 4.286 ± 0.175 % 95.596 ± 0.178 %
|
| 252 |
+
53 7.1481 ± 0.1801 0.00538 ± 0.00227 0.01809 ± 0.00155 4.261 ± 0.173 % 95.620 ± 0.176 %
|
| 253 |
+
54 7.2049 ± 0.1795 0.00552 ± 0.00224 0.01796 ± 0.00152 4.244 ± 0.171 % 95.628 ± 0.174 %
|
| 254 |
+
55 7.2569 ± 0.1789 0.00568 ± 0.00221 0.01776 ± 0.00149 4.213 ± 0.169 % 95.601 ± 0.173 %
|
| 255 |
+
56 7.2978 ± 0.1783 0.00558 ± 0.00218 0.01760 ± 0.00147 4.192 ± 0.167 % 95.581 ± 0.172 %
|
| 256 |
+
57 7.2974 ± 0.1767 0.00556 ± 0.00215 0.01754 ± 0.00144 4.178 ± 0.164 % 95.583 ± 0.170 %
|
| 257 |
+
58 7.3066 ± 0.1754 0.00546 ± 0.00213 0.01738 ± 0.00142 4.161 ± 0.162 % 95.592 ± 0.169 %
|
| 258 |
+
59 7.2671 ± 0.1726 0.00516 ± 0.00210 0.01714 ± 0.00139 4.130 ± 0.161 % 95.626 ± 0.167 %
|
| 259 |
+
60 7.2767 ± 0.1714 0.00501 ± 0.00207 0.01708 ± 0.00137 4.127 ± 0.159 % 95.582 ± 0.166 %
|
| 260 |
+
61 7.3267 ± 0.1712 0.00522 ± 0.00204 0.01702 ± 0.00135 4.103 ± 0.157 % 95.616 ± 0.164 %
|
| 261 |
+
62 7.3039 ± 0.1695 0.00536 ± 0.00203 0.01690 ± 0.00133 4.097 ± 0.155 % 95.648 ± 0.162 %
|
| 262 |
+
63 7.3535 ± 0.1699 0.00582 ± 0.00203 0.01691 ± 0.00131 4.081 ± 0.153 % 95.618 ± 0.162 %
|
| 263 |
+
64 7.3360 ± 0.1678 0.00600 ± 0.00201 0.01687 ± 0.00129 4.081 ± 0.151 % 95.600 ± 0.161 %
|
| 264 |
+
65 7.3258 ± 0.1663 0.00578 ± 0.00199 0.01678 ± 0.00127 4.075 ± 0.150 % 95.620 ± 0.159 %
|
| 265 |
+
66 7.3592 ± 0.1659 0.00533 ± 0.00198 0.01684 ± 0.00125 4.074 ± 0.147 % 95.579 ± 0.158 %
|
| 266 |
+
67 7.3687 ± 0.1650 0.00568 ± 0.00197 0.01696 ± 0.00123 4.074 ± 0.146 % 95.546 ± 0.158 %
|
| 267 |
+
68 7.3271 ± 0.1626 0.00598 ± 0.00198 0.01688 ± 0.00121 4.053 ± 0.144 % 95.559 ± 0.156 %
|
| 268 |
+
69 7.3594 ± 0.1622 0.00593 ± 0.00196 0.01682 ± 0.00120 4.036 ± 0.143 % 95.584 ± 0.155 %
|
| 269 |
+
70 7.3296 ± 0.1601 0.00566 ± 0.00195 0.01681 ± 0.00118 4.023 ± 0.141 % 95.591 ± 0.154 %
|
| 270 |
+
71 7.3147 ± 0.1587 0.00561 ± 0.00193 0.01685 ± 0.00117 4.023 ± 0.140 % 95.592 ± 0.153 %
|
| 271 |
+
72 7.3405 ± 0.1584 0.00696 ± 0.00195 0.01688 ± 0.00115 4.011 ± 0.139 % 95.556 ± 0.152 %
|
| 272 |
+
73 7.3476 ± 0.1574 0.00719 ± 0.00194 0.01685 ± 0.00114 3.998 ± 0.137 % 95.536 ± 0.151 %
|
| 273 |
+
74 7.3417 ± 0.1561 0.00722 ± 0.00192 0.01684 ± 0.00112 3.988 ± 0.136 % 95.517 ± 0.151 %
|
| 274 |
+
75 7.3443 ± 0.1552 0.00666 ± 0.00191 0.01696 ± 0.00111 4.008 ± 0.135 % 95.477 ± 0.150 %
|
| 275 |
+
76 7.4046 ± 0.1556 0.00634 ± 0.00189 0.01695 ± 0.00110 4.015 ± 0.134 % 95.454 ± 0.150 %
|
| 276 |
+
77 7.4020 ± 0.1545 0.00627 ± 0.00187 0.01686 ± 0.00108 3.998 ± 0.133 % 95.462 ± 0.149 %
|
| 277 |
+
78 7.4162 ± 0.1539 0.00636 ± 0.00186 0.01680 ± 0.00107 3.989 ± 0.132 % 95.490 ± 0.147 %
|
| 278 |
+
79 7.4244 ± 0.1531 0.00600 ± 0.00185 0.01687 ± 0.00106 3.988 ± 0.131 % 95.478 ± 0.146 %
|
| 279 |
+
80 7.4317 ± 0.1528 0.00674 ± 0.00189 0.01695 ± 0.00104 4.006 ± 0.129 % 95.441 ± 0.146 %
|
| 280 |
+
81 7.4074 ± 0.1513 0.00656 ± 0.00188 0.01699 ± 0.00103 3.999 ± 0.128 % 95.410 ± 0.146 %
|
| 281 |
+
82 7.3866 ± 0.1498 0.00668 ± 0.00186 0.01689 ± 0.00102 3.989 ± 0.127 % 95.442 ± 0.144 %
|
| 282 |
+
83 7.4183 ± 0.1493 0.00665 ± 0.00184 0.01677 ± 0.00101 3.972 ± 0.126 % 95.417 ± 0.144 %
|
| 283 |
+
84 7.4351 ± 0.1485 0.00673 ± 0.00182 0.01665 ± 0.00100 3.955 ± 0.125 % 95.420 ± 0.143 %
|
| 284 |
+
85 7.4311 ± 0.1473 0.00674 ± 0.00180 0.01653 ± 0.00098 3.938 ± 0.124 % 95.419 ± 0.142 %
|
| 285 |
+
86 7.3628 ± 0.1446 0.00655 ± 0.00179 0.01647 ± 0.00097 3.929 ± 0.123 % 95.440 ± 0.141 %
|
| 286 |
+
87 7.3051 ± 0.1422 0.00648 ± 0.00177 0.01638 ± 0.00096 3.915 ± 0.122 % 95.443 ± 0.140 %
|
| 287 |
+
88 7.2443 ± 0.1398 0.00645 ± 0.00175 0.01630 ± 0.00095 3.907 ± 0.121 % 95.477 ± 0.139 %
|
| 288 |
+
89 7.1712 ± 0.1371 0.00629 ± 0.00174 0.01619 ± 0.00094 3.893 ± 0.120 % 95.479 ± 0.138 %
|
| 289 |
+
90 7.1159 ± 0.1349 0.00625 ± 0.00172 0.01608 ± 0.00093 3.878 ± 0.119 % 95.481 ± 0.137 %
|
| 290 |
+
91 7.0649 ± 0.1329 0.00617 ± 0.00170 0.01599 ± 0.00092 3.866 ± 0.118 % 95.492 ± 0.136 %
|
| 291 |
+
92 7.0080 ± 0.1307 0.00614 ± 0.00169 0.01590 ± 0.00091 3.864 ± 0.117 % 95.494 ± 0.135 %
|
| 292 |
+
93 7.0201 ± 0.1304 0.00537 ± 0.00169 0.01644 ± 0.00097 3.929 ± 0.125 % 95.463 ± 0.135 %
|
| 293 |
+
94 7.0500 ± 0.1302 0.00529 ± 0.00168 0.01634 ± 0.00096 3.915 ± 0.124 % 95.448 ± 0.135 %
|
| 294 |
+
95 7.1579 ± 0.1318 0.00527 ± 0.00167 0.01633 ± 0.00095 3.903 ± 0.124 % 95.439 ± 0.134 %
|
| 295 |
+
96 7.2495 ± 0.1330 0.00504 ± 0.00166 0.01636 ± 0.00094 3.895 ± 0.123 % 95.413 ± 0.134 %
|
| 296 |
+
97 7.3259 ± 0.1338 0.00486 ± 0.00165 0.01630 ± 0.00093 3.881 ± 0.122 % 95.395 ± 0.133 %
|
| 297 |
+
98 7.4628 ± 0.1361 0.00523 ± 0.00165 0.01624 ± 0.00092 3.865 ± 0.121 % 95.374 ± 0.133 %
|
| 298 |
+
99 7.5800 ± 0.1379 0.00525 ± 0.00164 0.01626 ± 0.00091 3.853 ± 0.120 % 95.306 ± 0.133 %
|
| 299 |
+
100 7.6168 ± 0.1379 0.00521 ± 0.00164 0.01644 ± 0.00091 3.844 ± 0.119 % 95.275 ± 0.133 %
|
| 300 |
+
101 7.6518 ± 0.1380 0.00496 ± 0.00162 0.01642 ± 0.00090 3.836 ± 0.119 % 95.251 ± 0.133 %
|
| 301 |
+
102 7.7128 ± 0.1390 0.00500 ± 0.00162 0.01635 ± 0.00089 3.825 ± 0.118 % 95.267 ± 0.132 %
|
| 302 |
+
103 7.6880 ± 0.1380 0.00516 ± 0.00161 0.01626 ± 0.00088 3.814 ± 0.117 % 95.275 ± 0.131 %
|
| 303 |
+
104 7.6316 ± 0.1362 0.00530 ± 0.00160 0.01628 ± 0.00087 3.843 ± 0.116 % 95.283 ± 0.130 %
|
| 304 |
+
105 7.5202 ± 0.1333 0.00510 ± 0.00159 0.01627 ± 0.00087 3.846 ± 0.115 % 95.298 ± 0.129 %
|
| 305 |
+
106 7.3912 ± 0.1301 0.00511 ± 0.00158 0.01615 ± 0.00086 3.837 ± 0.114 % 95.342 ± 0.128 %
|
| 306 |
+
107 7.4493 ± 0.1304 0.00520 ± 0.00157 0.01606 ± 0.00085 3.823 ± 0.113 % 95.331 ± 0.128 %
|
| 307 |
+
108 7.4621 ± 0.1300 0.00514 ± 0.00156 0.01597 ± 0.00084 3.811 ± 0.113 % 95.341 ± 0.127 %
|
| 308 |
+
109 7.4825 ± 0.1299 0.00514 ± 0.00155 0.01589 ± 0.00084 3.801 ± 0.112 % 95.348 ± 0.126 %
|
| 309 |
+
110 7.5172 ± 0.1299 0.00507 ± 0.00154 0.01584 ± 0.00083 3.795 ± 0.111 % 95.333 ± 0.126 %
|
| 310 |
+
111 7.5638 ± 0.1301 0.00495 ± 0.00153 0.01578 ± 0.00082 3.786 ± 0.110 % 95.337 ± 0.125 %
|
| 311 |
+
112 7.5726 ± 0.1295 0.00477 ± 0.00151 0.01570 ± 0.00081 3.775 ± 0.110 % 95.326 ± 0.125 %
|
| 312 |
+
113 7.5830 ± 0.1290 0.00477 ± 0.00150 0.01562 ± 0.00081 3.764 ± 0.109 % 95.325 ± 0.124 %
|
| 313 |
+
114 7.5999 ± 0.1288 0.00466 ± 0.00150 0.01560 ± 0.00080 3.752 ± 0.109 % 95.329 ± 0.124 %
|
| 314 |
+
115 7.5815 ± 0.1279 0.00457 ± 0.00149 0.01565 ± 0.00079 3.758 ± 0.108 % 95.321 ± 0.123 %
|
| 315 |
+
116 7.5705 ± 0.1271 0.00452 ± 0.00150 0.01583 ± 0.00079 3.796 ± 0.107 % 95.297 ± 0.123 %
|
| 316 |
+
117 7.4690 ± 0.1246 0.00458 ± 0.00149 0.01600 ± 0.00078 3.848 ± 0.105 % 95.297 ± 0.123 %
|
| 317 |
+
118 7.3718 ± 0.1221 0.00435 ± 0.00149 0.01624 ± 0.00078 3.925 ± 0.106 % 95.281 ± 0.122 %
|
| 318 |
+
119 7.2732 ± 0.1197 0.00432 ± 0.00148 0.01632 ± 0.00078 3.960 ± 0.105 % 95.291 ± 0.122 %
|
| 319 |
+
120 7.1913 ± 0.1176 0.00435 ± 0.00149 0.01662 ± 0.00078 4.066 ± 0.106 % 95.278 ± 0.121 %
|
| 320 |
+
121 7.1086 ± 0.1156 0.00442 ± 0.00148 0.01681 ± 0.00077 4.136 ± 0.105 % 95.281 ± 0.121 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 7.108575 ± 0.115604
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.58%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.004418 ± 0.001484
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.004428 ± 0.001491
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 0.031335 ± 0.010580
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.016814 ± 0.000772
|
| 332 |
+
Maximum KLD: 13.118722
|
| 333 |
+
99.9% KLD: 0.988008
|
| 334 |
+
99.0% KLD: 0.195046
|
| 335 |
+
95.0% KLD: 0.055750
|
| 336 |
+
90.0% KLD: 0.030438
|
| 337 |
+
Median KLD: 0.003656
|
| 338 |
+
10.0% KLD: 0.000015
|
| 339 |
+
5.0% KLD: 0.000003
|
| 340 |
+
1.0% KLD: -0.000000
|
| 341 |
+
0.1% KLD: -0.000003
|
| 342 |
+
Minimum KLD: -0.000009
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -0.016 ± 0.024 %
|
| 346 |
+
Maximum Δp: 96.224%
|
| 347 |
+
99.9% Δp: 37.836%
|
| 348 |
+
99.0% Δp: 12.005%
|
| 349 |
+
95.0% Δp: 4.001%
|
| 350 |
+
90.0% Δp: 2.076%
|
| 351 |
+
75.0% Δp: 0.293%
|
| 352 |
+
Median Δp: 0.000%
|
| 353 |
+
25.0% Δp: -0.326%
|
| 354 |
+
10.0% Δp: -2.211%
|
| 355 |
+
5.0% Δp: -4.178%
|
| 356 |
+
1.0% Δp: -11.482%
|
| 357 |
+
0.1% Δp: -34.414%
|
| 358 |
+
Minimum Δp: -68.203%
|
| 359 |
+
RMS Δp : 4.136 ± 0.105 %
|
| 360 |
+
Same top p: 95.281 ± 0.121 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 86828.35 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 155038.17 ms / 61952 tokens ( 2.50 ms per token, 399.59 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 173350.95 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1219 + ( 22463 = 19867 + 96 + 2499) + 452 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1631 + ( 22050 = 17980 + 896 + 3174) + 452 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 231299 = 231195 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/UD-IQ1_M/MiniMax-M2.5-UD-IQ1_M.md
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-IQ1_M (unsloth)
|
| 2 |
+
|
| 3 |
+
63.73 GiB (2.39 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ1_M/MiniMax-M2.5-UD-IQ1_M-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 33815 used, -9943 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 36678 used, -12806 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 70493 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 24798 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37999 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7249 MiB used, 16622 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1344 MiB used, 22526 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 22 layers ( 1 overflowing), 22695 MiB used, 1176 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 41 layers (25 overflowing), 22814 MiB used, 1057 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.19 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ1_M/MiniMax-M2.5-UD-IQ1_M-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 31
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 1 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 20 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 89 |
+
llama_model_loader: - type iq2_xxs: 32 tensors
|
| 90 |
+
llama_model_loader: - type iq3_xxs: 39 tensors
|
| 91 |
+
llama_model_loader: - type iq3_s: 10 tensors
|
| 92 |
+
llama_model_loader: - type iq4_xs: 231 tensors
|
| 93 |
+
llama_model_loader: - type iq1_m: 92 tensors
|
| 94 |
+
print_info: file format = GGUF V3 (latest)
|
| 95 |
+
print_info: file type = IQ1_M - 1.75 bpw
|
| 96 |
+
print_info: file size = 63.73 GiB (2.39 BPW)
|
| 97 |
+
load: 0 unused tokens
|
| 98 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 99 |
+
load: printing all EOG tokens:
|
| 100 |
+
load: - 200004 ('<fim_pad>')
|
| 101 |
+
load: - 200005 ('<reponame>')
|
| 102 |
+
load: - 200020 ('[e~[')
|
| 103 |
+
load: special tokens cache size = 54
|
| 104 |
+
load: token to piece cache size = 1.3355 MB
|
| 105 |
+
print_info: arch = minimax-m2
|
| 106 |
+
print_info: vocab_only = 0
|
| 107 |
+
print_info: no_alloc = 0
|
| 108 |
+
print_info: n_ctx_train = 196608
|
| 109 |
+
print_info: n_embd = 3072
|
| 110 |
+
print_info: n_embd_inp = 3072
|
| 111 |
+
print_info: n_layer = 62
|
| 112 |
+
print_info: n_head = 48
|
| 113 |
+
print_info: n_head_kv = 8
|
| 114 |
+
print_info: n_rot = 64
|
| 115 |
+
print_info: n_swa = 0
|
| 116 |
+
print_info: is_swa_any = 0
|
| 117 |
+
print_info: n_embd_head_k = 128
|
| 118 |
+
print_info: n_embd_head_v = 128
|
| 119 |
+
print_info: n_gqa = 6
|
| 120 |
+
print_info: n_embd_k_gqa = 1024
|
| 121 |
+
print_info: n_embd_v_gqa = 1024
|
| 122 |
+
print_info: f_norm_eps = 0.0e+00
|
| 123 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 124 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 125 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 126 |
+
print_info: f_logit_scale = 0.0e+00
|
| 127 |
+
print_info: f_attn_scale = 0.0e+00
|
| 128 |
+
print_info: n_ff = 1536
|
| 129 |
+
print_info: n_expert = 256
|
| 130 |
+
print_info: n_expert_used = 8
|
| 131 |
+
print_info: n_expert_groups = 0
|
| 132 |
+
print_info: n_group_used = 0
|
| 133 |
+
print_info: causal attn = 1
|
| 134 |
+
print_info: pooling type = 0
|
| 135 |
+
print_info: rope type = 2
|
| 136 |
+
print_info: rope scaling = linear
|
| 137 |
+
print_info: freq_base_train = 5000000.0
|
| 138 |
+
print_info: freq_scale_train = 1
|
| 139 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 140 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 141 |
+
print_info: rope_finetuned = unknown
|
| 142 |
+
print_info: model type = 230B.A10B
|
| 143 |
+
print_info: model params = 228.69 B
|
| 144 |
+
print_info: general.name = Minimax-M2.5
|
| 145 |
+
print_info: vocab type = BPE
|
| 146 |
+
print_info: n_vocab = 200064
|
| 147 |
+
print_info: n_merges = 199744
|
| 148 |
+
print_info: BOS token = 200034 ']~!b['
|
| 149 |
+
print_info: EOS token = 200020 '[e~['
|
| 150 |
+
print_info: UNK token = 200021 ']!d~['
|
| 151 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 152 |
+
print_info: LF token = 10 'Ċ'
|
| 153 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 154 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 155 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 156 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 157 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 158 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 159 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 160 |
+
print_info: EOG token = 200020 '[e~['
|
| 161 |
+
print_info: max token length = 256
|
| 162 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 163 |
+
load_tensors: offloading output layer to GPU
|
| 164 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 165 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 166 |
+
load_tensors: CPU_Mapped model buffer size = 46917.99 MiB
|
| 167 |
+
load_tensors: CPU_Mapped model buffer size = 17839.78 MiB
|
| 168 |
+
load_tensors: CUDA0 model buffer size = 20998.23 MiB
|
| 169 |
+
load_tensors: CUDA1 model buffer size = 19000.01 MiB
|
| 170 |
+
....................................................................................................
|
| 171 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 172 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 173 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 174 |
+
llama_context: constructing llama_context
|
| 175 |
+
llama_context: n_seq_max = 8
|
| 176 |
+
llama_context: n_ctx = 4096
|
| 177 |
+
llama_context: n_ctx_seq = 512
|
| 178 |
+
llama_context: n_batch = 4096
|
| 179 |
+
llama_context: n_ubatch = 4096
|
| 180 |
+
llama_context: causal_attn = 1
|
| 181 |
+
llama_context: flash_attn = enabled
|
| 182 |
+
llama_context: kv_unified = false
|
| 183 |
+
llama_context: freq_base = 5000000.0
|
| 184 |
+
llama_context: freq_scale = 1
|
| 185 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 186 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 187 |
+
llama_kv_cache: CUDA0 KV buffer size = 352.00 MiB
|
| 188 |
+
llama_kv_cache: CUDA1 KV buffer size = 640.00 MiB
|
| 189 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 190 |
+
sched_reserve: reserving ...
|
| 191 |
+
sched_reserve: CUDA0 compute buffer size = 1345.00 MiB
|
| 192 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 193 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 194 |
+
sched_reserve: graph nodes = 4099
|
| 195 |
+
sched_reserve: graph splits = 103 (with bs=4096), 57 (with bs=1)
|
| 196 |
+
sched_reserve: reserve took 23.60 ms, sched copies = 1
|
| 197 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 198 |
+
|
| 199 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 200 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 201 |
+
kl_divergence: 5.03 seconds per pass - ETA 1.27 minutes
|
| 202 |
+
|
| 203 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 204 |
+
1 7.1209 ± 1.4160 0.11096 ± 0.05322 0.22589 ± 0.03684 15.291 ± 2.179 % 84.314 ± 2.282 %
|
| 205 |
+
2 5.1244 ± 0.6368 0.09181 ± 0.03356 0.16595 ± 0.02085 13.006 ± 1.560 % 86.667 ± 1.507 %
|
| 206 |
+
3 5.0126 ± 0.5071 0.11046 ± 0.03157 0.23309 ± 0.01993 16.335 ± 1.146 % 85.098 ± 1.288 %
|
| 207 |
+
4 5.5946 ± 0.5023 0.08702 ± 0.02858 0.23282 ± 0.01709 15.740 ± 0.965 % 84.412 ± 1.136 %
|
| 208 |
+
5 5.7161 ± 0.4763 0.15507 ± 0.03129 0.26794 ± 0.01864 17.030 ± 0.912 % 83.608 ± 1.037 %
|
| 209 |
+
6 6.9366 ± 0.5616 0.14563 ± 0.02826 0.28803 ± 0.01683 16.555 ± 0.808 % 82.157 ± 0.979 %
|
| 210 |
+
7 6.6792 ± 0.4880 0.17966 ± 0.02756 0.33798 ± 0.01864 18.133 ± 0.780 % 81.513 ± 0.919 %
|
| 211 |
+
8 7.4335 ± 0.5104 0.16314 ± 0.02517 0.32268 ± 0.01649 17.426 ± 0.719 % 81.422 ± 0.861 %
|
| 212 |
+
9 7.2346 ± 0.4628 0.15530 ± 0.02293 0.30627 ± 0.01489 16.746 ± 0.668 % 81.874 ± 0.804 %
|
| 213 |
+
10 6.5562 ± 0.3911 0.14588 ± 0.02119 0.29167 ± 0.01352 16.525 ± 0.619 % 82.196 ± 0.758 %
|
| 214 |
+
11 7.1487 ± 0.4108 0.14059 ± 0.01978 0.28766 ± 0.01247 16.266 ± 0.588 % 81.569 ± 0.732 %
|
| 215 |
+
12 7.9006 ± 0.4404 0.13911 ± 0.01862 0.28040 ± 0.01154 15.768 ± 0.558 % 81.569 ± 0.701 %
|
| 216 |
+
13 8.1476 ± 0.4322 0.13262 ± 0.01749 0.26964 ± 0.01076 15.347 ± 0.532 % 81.750 ± 0.671 %
|
| 217 |
+
14 8.7454 ± 0.4506 0.12817 ± 0.01706 0.27021 ± 0.01058 15.105 ± 0.512 % 81.373 ± 0.652 %
|
| 218 |
+
15 9.0898 ± 0.4519 0.12068 ± 0.01615 0.26379 ± 0.00991 14.774 ± 0.490 % 81.359 ± 0.630 %
|
| 219 |
+
16 9.3201 ± 0.4468 0.11349 ± 0.01535 0.25464 ± 0.00932 14.478 ± 0.470 % 81.373 ± 0.610 %
|
| 220 |
+
17 9.6265 ± 0.4529 0.11873 ± 0.01513 0.26596 ± 0.00948 14.271 ± 0.454 % 81.338 ± 0.592 %
|
| 221 |
+
18 9.1162 ± 0.4155 0.12349 ± 0.01506 0.27077 ± 0.00946 14.286 ± 0.443 % 81.503 ± 0.573 %
|
| 222 |
+
19 9.2098 ± 0.4071 0.11790 ± 0.01447 0.26401 ± 0.00907 14.134 ± 0.427 % 81.651 ± 0.556 %
|
| 223 |
+
20 9.3665 ± 0.4044 0.12690 ± 0.01443 0.27546 ± 0.00906 14.516 ± 0.418 % 81.451 ± 0.544 %
|
| 224 |
+
21 9.3528 ± 0.3939 0.12800 ± 0.01411 0.27369 ± 0.00883 14.453 ± 0.407 % 81.550 ± 0.530 %
|
| 225 |
+
22 9.7614 ± 0.4060 0.13310 ± 0.01374 0.27541 ± 0.00849 14.383 ± 0.393 % 81.141 ± 0.522 %
|
| 226 |
+
23 9.8854 ± 0.4046 0.14451 ± 0.01384 0.28510 ± 0.00877 14.844 ± 0.397 % 81.023 ± 0.512 %
|
| 227 |
+
24 10.3091 ± 0.4138 0.14041 ± 0.01345 0.28515 ± 0.00847 14.712 ± 0.387 % 80.719 ± 0.504 %
|
| 228 |
+
25 10.3365 ± 0.4071 0.14445 ± 0.01324 0.28966 ± 0.00828 14.957 ± 0.379 % 80.439 ± 0.497 %
|
| 229 |
+
26 10.2186 ± 0.3915 0.20159 ± 0.01441 0.34239 ± 0.00980 18.306 ± 0.422 % 79.321 ± 0.497 %
|
| 230 |
+
27 10.3187 ± 0.3873 0.26526 ± 0.01567 0.40358 ± 0.01135 21.006 ± 0.435 % 78.257 ± 0.497 %
|
| 231 |
+
28 10.4252 ± 0.3847 0.26185 ± 0.01535 0.40206 ± 0.01102 20.919 ± 0.425 % 78.151 ± 0.489 %
|
| 232 |
+
29 10.3026 ± 0.3736 0.25951 ± 0.01501 0.39932 ± 0.01072 20.754 ± 0.417 % 78.296 ± 0.479 %
|
| 233 |
+
30 9.6526 ± 0.3419 0.26153 ± 0.01479 0.39453 ± 0.01062 20.759 ± 0.412 % 78.824 ± 0.467 %
|
| 234 |
+
31 9.1040 ± 0.3155 0.26239 ± 0.01454 0.39172 ± 0.01050 20.893 ± 0.407 % 79.152 ± 0.457 %
|
| 235 |
+
32 8.8261 ± 0.2988 0.25663 ± 0.01419 0.38528 ± 0.01019 20.781 ± 0.398 % 79.265 ± 0.449 %
|
| 236 |
+
33 8.5834 ± 0.2843 0.24864 ± 0.01384 0.37965 ± 0.00990 20.639 ± 0.390 % 79.335 ± 0.441 %
|
| 237 |
+
34 8.7942 ± 0.2881 0.24587 ± 0.01363 0.38510 ± 0.00976 20.543 ± 0.383 % 78.985 ± 0.438 %
|
| 238 |
+
35 8.9277 ± 0.2904 0.24630 ± 0.01350 0.39206 ± 0.00964 20.588 ± 0.375 % 78.745 ± 0.433 %
|
| 239 |
+
36 9.0034 ± 0.2899 0.24586 ± 0.01325 0.39092 ± 0.00944 20.525 ± 0.368 % 78.682 ± 0.427 %
|
| 240 |
+
37 9.1647 ± 0.2913 0.26134 ± 0.01335 0.40612 ± 0.00970 21.280 ± 0.369 % 78.378 ± 0.424 %
|
| 241 |
+
38 9.3908 ± 0.2954 0.25597 ± 0.01307 0.40175 ± 0.00946 21.075 ± 0.363 % 78.349 ± 0.418 %
|
| 242 |
+
39 9.4649 ± 0.2940 0.27078 ± 0.01322 0.41518 ± 0.00963 21.575 ± 0.361 % 78.200 ± 0.414 %
|
| 243 |
+
40 9.6852 ± 0.2972 0.32871 ± 0.01406 0.46903 ± 0.01066 23.304 ± 0.364 % 77.304 ± 0.415 %
|
| 244 |
+
41 9.8980 ± 0.2999 0.38127 ± 0.01473 0.52059 ± 0.01145 24.968 ± 0.364 % 76.365 ± 0.416 %
|
| 245 |
+
42 10.0254 ± 0.3002 0.42538 ± 0.01522 0.56340 ± 0.01203 26.329 ± 0.363 % 75.649 ± 0.415 %
|
| 246 |
+
43 10.0445 ± 0.2972 0.45842 ± 0.01553 0.59580 ± 0.01244 27.206 ± 0.361 % 75.239 ± 0.412 %
|
| 247 |
+
44 9.8845 ± 0.2879 0.44869 ± 0.01524 0.58646 ± 0.01218 26.979 ± 0.356 % 75.419 ± 0.407 %
|
| 248 |
+
45 10.0577 ± 0.2910 0.44417 ± 0.01498 0.58107 ± 0.01194 26.767 ± 0.352 % 75.355 ± 0.402 %
|
| 249 |
+
46 10.1946 ± 0.2918 0.43561 ± 0.01471 0.57406 ± 0.01170 26.514 ± 0.348 % 75.379 ± 0.398 %
|
| 250 |
+
47 10.3422 ± 0.2931 0.42778 ± 0.01443 0.56536 ± 0.01147 26.272 ± 0.344 % 75.519 ± 0.393 %
|
| 251 |
+
48 10.0975 ± 0.2819 0.42049 ± 0.01417 0.55653 ± 0.01125 26.056 ± 0.340 % 75.703 ± 0.388 %
|
| 252 |
+
49 10.2071 ± 0.2821 0.41627 ± 0.01401 0.55683 ± 0.01120 25.914 ± 0.337 % 75.654 ± 0.384 %
|
| 253 |
+
50 10.2935 ± 0.2824 0.41054 ± 0.01383 0.55334 ± 0.01101 25.761 ± 0.333 % 75.624 ± 0.380 %
|
| 254 |
+
51 10.3985 ± 0.2825 0.40530 ± 0.01359 0.54670 ± 0.01080 25.559 ± 0.329 % 75.640 ± 0.376 %
|
| 255 |
+
52 10.4660 ± 0.2810 0.40245 ± 0.01341 0.54528 ± 0.01063 25.429 ± 0.325 % 75.513 ± 0.373 %
|
| 256 |
+
53 10.5592 ± 0.2803 0.39553 ± 0.01320 0.53974 ± 0.01044 25.249 ± 0.322 % 75.486 ± 0.370 %
|
| 257 |
+
54 10.5910 ± 0.2780 0.39076 ± 0.01299 0.53315 ± 0.01026 25.048 ± 0.319 % 75.512 ± 0.366 %
|
| 258 |
+
55 10.6122 ± 0.2755 0.38573 ± 0.01279 0.52639 ± 0.01008 24.858 ± 0.316 % 75.529 ± 0.363 %
|
| 259 |
+
56 10.6262 ± 0.2733 0.38132 ± 0.01261 0.52148 ± 0.00992 24.701 ± 0.312 % 75.532 ± 0.360 %
|
| 260 |
+
57 10.6042 ± 0.2700 0.37930 ± 0.01249 0.52276 ± 0.00982 24.664 ± 0.309 % 75.480 ± 0.357 %
|
| 261 |
+
58 10.5939 ± 0.2673 0.37696 ± 0.01235 0.51867 ± 0.00967 24.543 ± 0.306 % 75.592 ± 0.353 %
|
| 262 |
+
59 10.4782 ± 0.2617 0.37111 ± 0.01217 0.51215 ± 0.00952 24.376 ± 0.303 % 75.753 ± 0.349 %
|
| 263 |
+
60 10.4530 ± 0.2587 0.36722 ± 0.01201 0.50815 ± 0.00938 24.245 ± 0.301 % 75.817 ± 0.346 %
|
| 264 |
+
61 10.4776 ± 0.2570 0.36293 ± 0.01184 0.50334 ± 0.00924 24.075 ± 0.298 % 75.905 ± 0.343 %
|
| 265 |
+
62 10.4039 ± 0.2535 0.35913 ± 0.01173 0.50036 ± 0.00914 23.993 ± 0.295 % 75.984 ± 0.340 %
|
| 266 |
+
63 10.4413 ± 0.2530 0.35641 ± 0.01162 0.49839 ± 0.00904 23.877 ± 0.293 % 76.022 ± 0.337 %
|
| 267 |
+
64 10.3729 ± 0.2487 0.35241 ± 0.01150 0.49468 ± 0.00891 23.755 ± 0.291 % 76.091 ± 0.334 %
|
| 268 |
+
65 10.3294 ± 0.2457 0.34938 ± 0.01139 0.49240 ± 0.00881 23.643 ± 0.288 % 76.157 ± 0.331 %
|
| 269 |
+
66 10.3333 ± 0.2441 0.34475 ± 0.01125 0.48945 ± 0.00869 23.544 ± 0.286 % 76.209 ± 0.328 %
|
| 270 |
+
67 10.3056 ± 0.2415 0.34112 ± 0.01113 0.48595 ± 0.00857 23.420 ± 0.283 % 76.313 ± 0.325 %
|
| 271 |
+
68 10.1881 ± 0.2365 0.33563 ± 0.01098 0.48084 ± 0.00845 23.276 ± 0.281 % 76.488 ± 0.322 %
|
| 272 |
+
69 10.1903 ± 0.2349 0.33139 ± 0.01087 0.47747 ± 0.00834 23.162 ± 0.279 % 76.522 ± 0.320 %
|
| 273 |
+
70 10.1042 ± 0.2307 0.32669 ± 0.01076 0.47479 ± 0.00826 23.064 ± 0.276 % 76.571 ± 0.317 %
|
| 274 |
+
71 10.0466 ± 0.2276 0.32297 ± 0.01063 0.47047 ± 0.00815 22.935 ± 0.274 % 76.725 ± 0.314 %
|
| 275 |
+
72 10.0461 ± 0.2264 0.32074 ± 0.01054 0.46820 ± 0.00808 22.823 ± 0.272 % 76.836 ± 0.311 %
|
| 276 |
+
73 10.0231 ± 0.2240 0.31771 ± 0.01042 0.46494 ± 0.00798 22.729 ± 0.270 % 76.857 ± 0.309 %
|
| 277 |
+
74 9.9704 ± 0.2210 0.31327 ± 0.01031 0.46186 ± 0.00788 22.629 ± 0.268 % 76.889 ± 0.307 %
|
| 278 |
+
75 9.9586 ± 0.2194 0.31117 ± 0.01023 0.45981 ± 0.00779 22.562 ± 0.265 % 76.920 ± 0.305 %
|
| 279 |
+
76 10.0156 ± 0.2192 0.30839 ± 0.01013 0.45756 ± 0.00770 22.483 ± 0.263 % 76.940 ± 0.303 %
|
| 280 |
+
77 9.9875 ± 0.2172 0.30585 ± 0.01005 0.45489 ± 0.00761 22.409 ± 0.262 % 76.980 ± 0.300 %
|
| 281 |
+
78 9.9832 ± 0.2159 0.30360 ± 0.00995 0.45289 ± 0.00753 22.309 ± 0.260 % 76.988 ± 0.298 %
|
| 282 |
+
79 9.9685 ± 0.2141 0.30066 ± 0.00986 0.45017 ± 0.00744 22.211 ± 0.258 % 77.022 ± 0.296 %
|
| 283 |
+
80 9.9532 ± 0.2130 0.29889 ± 0.00979 0.45014 ± 0.00738 22.161 ± 0.256 % 77.025 ± 0.295 %
|
| 284 |
+
81 9.8975 ± 0.2106 0.29637 ± 0.00969 0.44734 ± 0.00730 22.060 ± 0.254 % 77.124 ± 0.292 %
|
| 285 |
+
82 9.8596 ± 0.2082 0.29546 ± 0.00959 0.44491 ± 0.00722 21.994 ± 0.252 % 77.145 ± 0.290 %
|
| 286 |
+
83 9.8895 ± 0.2074 0.29417 ± 0.00950 0.44216 ± 0.00714 21.910 ± 0.250 % 77.151 ± 0.289 %
|
| 287 |
+
84 9.8855 ± 0.2056 0.29159 ± 0.00941 0.43907 ± 0.00706 21.812 ± 0.248 % 77.166 ± 0.287 %
|
| 288 |
+
85 9.8485 ± 0.2032 0.28839 ± 0.00932 0.43589 ± 0.00698 21.715 ± 0.247 % 77.190 ± 0.285 %
|
| 289 |
+
86 9.7341 ± 0.1990 0.28574 ± 0.00922 0.43295 ± 0.00691 21.626 ± 0.245 % 77.287 ± 0.283 %
|
| 290 |
+
87 9.6397 ± 0.1954 0.28381 ± 0.00913 0.42988 ± 0.00683 21.537 ± 0.243 % 77.377 ± 0.281 %
|
| 291 |
+
88 9.5505 ± 0.1920 0.28284 ± 0.00905 0.42749 ± 0.00677 21.458 ± 0.242 % 77.473 ± 0.279 %
|
| 292 |
+
89 9.4342 ± 0.1880 0.28056 ± 0.00896 0.42438 ± 0.00670 21.381 ± 0.240 % 77.555 ± 0.277 %
|
| 293 |
+
90 9.3427 ± 0.1847 0.27852 ± 0.00888 0.42189 ± 0.00664 21.299 ± 0.239 % 77.647 ± 0.275 %
|
| 294 |
+
91 9.2559 ± 0.1816 0.27629 ± 0.00880 0.41932 ± 0.00657 21.221 ± 0.237 % 77.738 ± 0.273 %
|
| 295 |
+
92 9.1633 ± 0.1782 0.27429 ± 0.00871 0.41635 ± 0.00650 21.145 ± 0.236 % 77.826 ± 0.271 %
|
| 296 |
+
93 9.1691 ± 0.1776 0.27243 ± 0.00866 0.41645 ± 0.00647 21.117 ± 0.234 % 77.820 ± 0.270 %
|
| 297 |
+
94 9.1872 ± 0.1768 0.27007 ± 0.00858 0.41362 ± 0.00640 21.024 ± 0.233 % 77.885 ± 0.268 %
|
| 298 |
+
95 9.3224 ± 0.1790 0.26949 ± 0.00852 0.41190 ± 0.00634 20.945 ± 0.231 % 77.882 ± 0.267 %
|
| 299 |
+
96 9.4310 ± 0.1803 0.26810 ± 0.00845 0.41068 ± 0.00628 20.868 ± 0.230 % 77.835 ± 0.265 %
|
| 300 |
+
97 9.5198 ± 0.1811 0.26682 ± 0.00838 0.40843 ± 0.00622 20.786 ± 0.228 % 77.772 ± 0.264 %
|
| 301 |
+
98 9.6868 ± 0.1839 0.26606 ± 0.00831 0.40617 ± 0.00616 20.693 ± 0.227 % 77.727 ± 0.263 %
|
| 302 |
+
99 9.8219 ± 0.1857 0.26435 ± 0.00824 0.40429 ± 0.00610 20.610 ± 0.226 % 77.663 ± 0.262 %
|
| 303 |
+
100 9.8531 ± 0.1853 0.26263 ± 0.00818 0.40354 ± 0.00605 20.561 ± 0.225 % 77.667 ± 0.261 %
|
| 304 |
+
101 9.8997 ± 0.1856 0.26252 ± 0.00814 0.40356 ± 0.00604 20.532 ± 0.224 % 77.678 ± 0.259 %
|
| 305 |
+
102 9.9830 ± 0.1867 0.26301 ± 0.00810 0.40494 ± 0.00602 20.535 ± 0.222 % 77.601 ± 0.259 %
|
| 306 |
+
103 9.9466 ± 0.1853 0.26272 ± 0.00804 0.40372 ± 0.00597 20.553 ± 0.221 % 77.620 ± 0.257 %
|
| 307 |
+
104 9.9093 ± 0.1836 0.26648 ± 0.00806 0.40696 ± 0.00603 20.726 ± 0.221 % 77.568 ± 0.256 %
|
| 308 |
+
105 9.7824 ± 0.1801 0.26809 ± 0.00805 0.40867 ± 0.00603 20.881 ± 0.220 % 77.595 ± 0.255 %
|
| 309 |
+
106 9.6566 ± 0.1766 0.27246 ± 0.00805 0.41227 ± 0.00606 21.152 ± 0.220 % 77.617 ± 0.254 %
|
| 310 |
+
107 9.7111 ± 0.1766 0.27035 ± 0.00798 0.40963 ± 0.00601 21.068 ± 0.219 % 77.592 ± 0.252 %
|
| 311 |
+
108 9.7139 ± 0.1759 0.26886 ± 0.00793 0.40854 ± 0.00597 21.021 ± 0.218 % 77.560 ± 0.251 %
|
| 312 |
+
109 9.7470 ± 0.1758 0.26954 ± 0.00789 0.40814 ± 0.00593 21.003 ± 0.217 % 77.554 ± 0.250 %
|
| 313 |
+
110 9.7836 ± 0.1756 0.26859 ± 0.00784 0.40669 ± 0.00588 20.952 ± 0.216 % 77.554 ± 0.249 %
|
| 314 |
+
111 9.8322 ± 0.1756 0.26723 ± 0.00779 0.40492 ± 0.00583 20.880 ± 0.215 % 77.541 ± 0.248 %
|
| 315 |
+
112 9.8320 ± 0.1747 0.26587 ± 0.00773 0.40289 ± 0.00578 20.819 ± 0.214 % 77.595 ± 0.247 %
|
| 316 |
+
113 9.8336 ± 0.1737 0.26466 ± 0.00767 0.40069 ± 0.00574 20.750 ± 0.212 % 77.633 ± 0.245 %
|
| 317 |
+
114 9.8418 ± 0.1732 0.26316 ± 0.00762 0.39862 ± 0.00569 20.682 ± 0.211 % 77.654 ± 0.244 %
|
| 318 |
+
115 9.8143 ± 0.1719 0.26271 ± 0.00759 0.39903 ± 0.00566 20.695 ± 0.211 % 77.647 ± 0.243 %
|
| 319 |
+
116 9.8766 ± 0.1725 0.27042 ± 0.00764 0.40811 ± 0.00575 20.920 ± 0.210 % 77.485 ± 0.243 %
|
| 320 |
+
117 9.9138 ± 0.1724 0.28775 ± 0.00781 0.42427 ± 0.00593 21.549 ± 0.211 % 77.218 ± 0.243 %
|
| 321 |
+
118 9.9330 ± 0.1721 0.30254 ± 0.00794 0.43628 ± 0.00604 21.980 ± 0.211 % 77.036 ± 0.242 %
|
| 322 |
+
119 9.9778 ± 0.1722 0.32049 ± 0.00810 0.45309 ± 0.00624 22.576 ± 0.212 % 76.800 ± 0.242 %
|
| 323 |
+
120 9.9918 ± 0.1716 0.33324 ± 0.00820 0.46613 ± 0.00634 23.058 ± 0.211 % 76.562 ± 0.242 %
|
| 324 |
+
121 10.0105 ± 0.1713 0.34675 ± 0.00829 0.47889 ± 0.00645 23.529 ± 0.211 % 76.364 ± 0.242 %
|
| 325 |
+
|
| 326 |
+
====== Perplexity statistics ======
|
| 327 |
+
Mean PPL(Q) : 10.010472 ± 0.171308
|
| 328 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 329 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 87.75%
|
| 330 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.346748 ± 0.008286
|
| 331 |
+
Mean PPL(Q)/PPL(base) : 1.414460 ± 0.011720
|
| 332 |
+
Mean PPL(Q)-PPL(base) : 2.933232 ± 0.089724
|
| 333 |
+
|
| 334 |
+
====== KL divergence statistics ======
|
| 335 |
+
Mean KLD: 0.478890 ± 0.006451
|
| 336 |
+
Maximum KLD: 21.323242
|
| 337 |
+
99.9% KLD: 9.954420
|
| 338 |
+
99.0% KLD: 6.009495
|
| 339 |
+
95.0% KLD: 2.622951
|
| 340 |
+
90.0% KLD: 1.090507
|
| 341 |
+
Median KLD: 0.120992
|
| 342 |
+
10.0% KLD: 0.000874
|
| 343 |
+
5.0% KLD: 0.000141
|
| 344 |
+
1.0% KLD: 0.000006
|
| 345 |
+
0.1% KLD: 0.000000
|
| 346 |
+
Minimum KLD: -0.000003
|
| 347 |
+
|
| 348 |
+
====== Token probability statistics ======
|
| 349 |
+
Mean Δp: -6.219 ± 0.129 %
|
| 350 |
+
Maximum Δp: 99.309%
|
| 351 |
+
99.9% Δp: 82.469%
|
| 352 |
+
99.0% Δp: 37.119%
|
| 353 |
+
95.0% Δp: 14.629%
|
| 354 |
+
90.0% Δp: 7.115%
|
| 355 |
+
75.0% Δp: 0.513%
|
| 356 |
+
Median Δp: -0.071%
|
| 357 |
+
25.0% Δp: -5.199%
|
| 358 |
+
10.0% Δp: -26.713%
|
| 359 |
+
5.0% Δp: -60.999%
|
| 360 |
+
1.0% Δp: -97.106%
|
| 361 |
+
0.1% Δp: -99.748%
|
| 362 |
+
Minimum Δp: -99.988%
|
| 363 |
+
RMS Δp : 23.529 ± 0.211 %
|
| 364 |
+
Same top p: 76.364 ± 0.242 %
|
| 365 |
+
|
| 366 |
+
llama_perf_context_print: load time = 26428.44 ms
|
| 367 |
+
llama_perf_context_print: prompt eval time = 65950.70 ms / 61952 tokens ( 1.06 ms per token, 939.37 tokens per second)
|
| 368 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 369 |
+
llama_perf_context_print: total time = 78753.55 ms / 61953 tokens
|
| 370 |
+
llama_perf_context_print: graphs reused = 0
|
| 371 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 372 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 483 + (22695 = 20998 + 352 + 1344) + 956 |
|
| 373 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 365 + (22814 = 19000 + 640 + 3174) + 955 |
|
| 374 |
+
llama_memory_breakdown_print: | - Host | 64861 = 64757 + 0 + 104 |
|
| 375 |
+
```
|
kld_data/unsloth/UD-IQ1_S/MiniMax-M2.5-UD-IQ1_S.md
ADDED
|
@@ -0,0 +1,377 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-IQ1_S (unsloth)
|
| 2 |
+
|
| 3 |
+
58.85 GiB (2.21 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ1_S/MiniMax-M2.5-UD-IQ1_S-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 30387 used, -6515 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 35112 used, -11240 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 65499 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 19803 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 38241 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 6880 MiB used, 16990 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1344 MiB used, 22526 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24 layers ( 1 overflowing), 22521 MiB used, 1349 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 39 layers (22 overflowing), 22619 MiB used, 1252 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.17 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ1_S/MiniMax-M2.5-UD-IQ1_S-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 24
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 1 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 18 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 89 |
+
llama_model_loader: - type iq2_xxs: 24 tensors
|
| 90 |
+
llama_model_loader: - type iq3_xxs: 100 tensors
|
| 91 |
+
llama_model_loader: - type iq1_s: 86 tensors
|
| 92 |
+
llama_model_loader: - type iq3_s: 108 tensors
|
| 93 |
+
llama_model_loader: - type iq2_s: 35 tensors
|
| 94 |
+
llama_model_loader: - type iq4_xs: 39 tensors
|
| 95 |
+
llama_model_loader: - type iq1_m: 14 tensors
|
| 96 |
+
print_info: file format = GGUF V3 (latest)
|
| 97 |
+
print_info: file type = IQ1_S - 1.5625 bpw
|
| 98 |
+
print_info: file size = 58.85 GiB (2.21 BPW)
|
| 99 |
+
load: 0 unused tokens
|
| 100 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 101 |
+
load: printing all EOG tokens:
|
| 102 |
+
load: - 200004 ('<fim_pad>')
|
| 103 |
+
load: - 200005 ('<reponame>')
|
| 104 |
+
load: - 200020 ('[e~[')
|
| 105 |
+
load: special tokens cache size = 54
|
| 106 |
+
load: token to piece cache size = 1.3355 MB
|
| 107 |
+
print_info: arch = minimax-m2
|
| 108 |
+
print_info: vocab_only = 0
|
| 109 |
+
print_info: no_alloc = 0
|
| 110 |
+
print_info: n_ctx_train = 196608
|
| 111 |
+
print_info: n_embd = 3072
|
| 112 |
+
print_info: n_embd_inp = 3072
|
| 113 |
+
print_info: n_layer = 62
|
| 114 |
+
print_info: n_head = 48
|
| 115 |
+
print_info: n_head_kv = 8
|
| 116 |
+
print_info: n_rot = 64
|
| 117 |
+
print_info: n_swa = 0
|
| 118 |
+
print_info: is_swa_any = 0
|
| 119 |
+
print_info: n_embd_head_k = 128
|
| 120 |
+
print_info: n_embd_head_v = 128
|
| 121 |
+
print_info: n_gqa = 6
|
| 122 |
+
print_info: n_embd_k_gqa = 1024
|
| 123 |
+
print_info: n_embd_v_gqa = 1024
|
| 124 |
+
print_info: f_norm_eps = 0.0e+00
|
| 125 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 126 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 127 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 128 |
+
print_info: f_logit_scale = 0.0e+00
|
| 129 |
+
print_info: f_attn_scale = 0.0e+00
|
| 130 |
+
print_info: n_ff = 1536
|
| 131 |
+
print_info: n_expert = 256
|
| 132 |
+
print_info: n_expert_used = 8
|
| 133 |
+
print_info: n_expert_groups = 0
|
| 134 |
+
print_info: n_group_used = 0
|
| 135 |
+
print_info: causal attn = 1
|
| 136 |
+
print_info: pooling type = 0
|
| 137 |
+
print_info: rope type = 2
|
| 138 |
+
print_info: rope scaling = linear
|
| 139 |
+
print_info: freq_base_train = 5000000.0
|
| 140 |
+
print_info: freq_scale_train = 1
|
| 141 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 142 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 143 |
+
print_info: rope_finetuned = unknown
|
| 144 |
+
print_info: model type = 230B.A10B
|
| 145 |
+
print_info: model params = 228.69 B
|
| 146 |
+
print_info: general.name = Minimax-M2.5
|
| 147 |
+
print_info: vocab type = BPE
|
| 148 |
+
print_info: n_vocab = 200064
|
| 149 |
+
print_info: n_merges = 199744
|
| 150 |
+
print_info: BOS token = 200034 ']~!b['
|
| 151 |
+
print_info: EOS token = 200020 '[e~['
|
| 152 |
+
print_info: UNK token = 200021 ']!d~['
|
| 153 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 154 |
+
print_info: LF token = 10 'Ċ'
|
| 155 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 156 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 157 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 158 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 159 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 160 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 161 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 162 |
+
print_info: EOG token = 200020 '[e~['
|
| 163 |
+
print_info: max token length = 256
|
| 164 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 165 |
+
load_tensors: offloading output layer to GPU
|
| 166 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 167 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 168 |
+
load_tensors: CPU_Mapped model buffer size = 46940.58 MiB
|
| 169 |
+
load_tensors: CPU_Mapped model buffer size = 12821.11 MiB
|
| 170 |
+
load_tensors: CUDA0 model buffer size = 20792.93 MiB
|
| 171 |
+
load_tensors: CUDA1 model buffer size = 18837.02 MiB
|
| 172 |
+
....................................................................................................
|
| 173 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 174 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 175 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 176 |
+
llama_context: constructing llama_context
|
| 177 |
+
llama_context: n_seq_max = 8
|
| 178 |
+
llama_context: n_ctx = 4096
|
| 179 |
+
llama_context: n_ctx_seq = 512
|
| 180 |
+
llama_context: n_batch = 4096
|
| 181 |
+
llama_context: n_ubatch = 4096
|
| 182 |
+
llama_context: causal_attn = 1
|
| 183 |
+
llama_context: flash_attn = enabled
|
| 184 |
+
llama_context: kv_unified = false
|
| 185 |
+
llama_context: freq_base = 5000000.0
|
| 186 |
+
llama_context: freq_scale = 1
|
| 187 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 188 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 189 |
+
llama_kv_cache: CUDA0 KV buffer size = 384.00 MiB
|
| 190 |
+
llama_kv_cache: CUDA1 KV buffer size = 608.00 MiB
|
| 191 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 192 |
+
sched_reserve: reserving ...
|
| 193 |
+
sched_reserve: CUDA0 compute buffer size = 1345.00 MiB
|
| 194 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 195 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 196 |
+
sched_reserve: graph nodes = 4099
|
| 197 |
+
sched_reserve: graph splits = 89 (with bs=4096), 49 (with bs=1)
|
| 198 |
+
sched_reserve: reserve took 23.59 ms, sched copies = 1
|
| 199 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 200 |
+
|
| 201 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 202 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 203 |
+
kl_divergence: 3.85 seconds per pass - ETA 0.97 minutes
|
| 204 |
+
|
| 205 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 206 |
+
1 7.2545 ± 1.3959 0.12955 ± 0.06769 0.31147 ± 0.05266 16.565 ± 2.001 % 80.784 ± 2.472 %
|
| 207 |
+
2 5.1589 ± 0.6335 0.09850 ± 0.04073 0.22320 ± 0.02907 13.474 ± 1.380 % 83.725 ± 1.636 %
|
| 208 |
+
3 5.0617 ± 0.5009 0.12022 ± 0.03749 0.27499 ± 0.02400 17.147 ± 1.095 % 82.614 ± 1.371 %
|
| 209 |
+
4 5.6876 ± 0.5013 0.10351 ± 0.03280 0.29341 ± 0.02058 17.369 ± 0.945 % 80.882 ± 1.232 %
|
| 210 |
+
5 5.8157 ± 0.4726 0.17234 �� 0.03215 0.31803 ± 0.02037 17.849 ± 0.893 % 80.314 ± 1.114 %
|
| 211 |
+
6 7.1883 ± 0.5734 0.18126 ± 0.03005 0.34750 ± 0.01943 17.579 ± 0.822 % 79.346 ± 1.035 %
|
| 212 |
+
7 7.0584 ± 0.5127 0.23487 ± 0.03016 0.40570 ± 0.02141 19.492 ± 0.804 % 78.375 ± 0.975 %
|
| 213 |
+
8 7.7815 ± 0.5307 0.20890 ± 0.02788 0.39121 ± 0.01896 18.812 ± 0.736 % 78.088 ± 0.916 %
|
| 214 |
+
9 7.5867 ± 0.4835 0.20283 ± 0.02552 0.37473 ± 0.01723 18.208 ± 0.683 % 78.606 ± 0.856 %
|
| 215 |
+
10 6.8211 ± 0.4038 0.18549 ± 0.02363 0.35986 ± 0.01577 18.181 ± 0.636 % 78.824 ± 0.809 %
|
| 216 |
+
11 7.4382 ± 0.4257 0.18030 ± 0.02213 0.35694 ± 0.01463 17.971 ± 0.601 % 78.324 ± 0.778 %
|
| 217 |
+
12 8.1439 ± 0.4516 0.16944 ± 0.02082 0.35107 ± 0.01365 17.464 ± 0.570 % 78.235 ± 0.746 %
|
| 218 |
+
13 8.3734 ± 0.4412 0.15996 ± 0.01948 0.33795 ± 0.01269 17.039 ± 0.543 % 78.371 ± 0.715 %
|
| 219 |
+
14 8.9859 ± 0.4594 0.15530 ± 0.01920 0.34192 ± 0.01240 16.850 ± 0.521 % 77.563 ± 0.698 %
|
| 220 |
+
15 9.3397 ± 0.4606 0.14780 ± 0.01821 0.33566 ± 0.01162 16.661 ± 0.497 % 77.673 ± 0.673 %
|
| 221 |
+
16 9.6074 ± 0.4583 0.14386 ± 0.01723 0.32651 ± 0.01095 16.394 ± 0.478 % 77.598 ± 0.653 %
|
| 222 |
+
17 9.9714 ± 0.4665 0.15393 ± 0.01692 0.33782 ± 0.01109 16.215 ± 0.464 % 77.647 ± 0.633 %
|
| 223 |
+
18 9.3710 ± 0.4236 0.15106 ± 0.01659 0.34172 ± 0.01094 16.191 ± 0.454 % 77.865 ± 0.613 %
|
| 224 |
+
19 9.4888 ± 0.4161 0.14775 ± 0.01596 0.33334 ± 0.01044 16.047 ± 0.437 % 78.039 ± 0.595 %
|
| 225 |
+
20 9.6678 ± 0.4144 0.15856 ± 0.01582 0.34863 ± 0.01041 16.413 ± 0.429 % 77.490 ± 0.585 %
|
| 226 |
+
21 9.6223 ± 0.4025 0.15641 ± 0.01533 0.34404 ± 0.01002 16.346 ± 0.416 % 77.778 ± 0.568 %
|
| 227 |
+
22 10.0680 ± 0.4164 0.16402 ± 0.01497 0.34950 ± 0.00976 16.333 ± 0.404 % 77.380 ± 0.559 %
|
| 228 |
+
23 10.1286 ± 0.4113 0.16881 ± 0.01490 0.36082 ± 0.01001 16.626 ± 0.402 % 77.442 ± 0.546 %
|
| 229 |
+
24 10.5627 ± 0.4216 0.16471 ± 0.01446 0.35918 ± 0.00964 16.447 ± 0.392 % 77.157 ± 0.537 %
|
| 230 |
+
25 10.6023 ± 0.4148 0.16984 ± 0.01428 0.36556 ± 0.00951 16.755 ± 0.385 % 77.004 ± 0.527 %
|
| 231 |
+
26 10.5161 ± 0.4009 0.23029 ± 0.01561 0.42230 ± 0.01107 19.918 ± 0.421 % 76.109 ± 0.524 %
|
| 232 |
+
27 10.6706 ± 0.3986 0.29880 ± 0.01680 0.48949 ± 0.01256 22.723 ± 0.433 % 74.858 ± 0.523 %
|
| 233 |
+
28 10.7890 ± 0.3958 0.29616 ± 0.01656 0.48822 ± 0.01227 22.658 ± 0.424 % 74.790 ± 0.514 %
|
| 234 |
+
29 10.6513 ± 0.3843 0.29278 ± 0.01626 0.48696 ± 0.01196 22.592 ± 0.415 % 74.970 ± 0.504 %
|
| 235 |
+
30 9.9537 ± 0.3511 0.29225 ± 0.01602 0.47879 ± 0.01181 22.490 ± 0.409 % 75.660 ± 0.491 %
|
| 236 |
+
31 9.3653 ± 0.3230 0.29068 ± 0.01567 0.47188 ± 0.01157 22.495 ± 0.404 % 76.154 ± 0.479 %
|
| 237 |
+
32 9.1100 ± 0.3071 0.28829 ± 0.01532 0.46529 ± 0.01126 22.388 ± 0.396 % 76.336 ± 0.471 %
|
| 238 |
+
33 8.8645 ± 0.2923 0.28087 ± 0.01497 0.45852 ± 0.01094 22.245 ± 0.387 % 76.506 ± 0.462 %
|
| 239 |
+
34 9.0996 ± 0.2972 0.28002 ± 0.01472 0.46307 ± 0.01076 22.071 ± 0.380 % 76.263 ± 0.457 %
|
| 240 |
+
35 9.2582 ± 0.3000 0.28265 ± 0.01461 0.47074 ± 0.01062 22.142 ± 0.372 % 76.034 ± 0.452 %
|
| 241 |
+
36 9.3033 ± 0.2985 0.27863 ± 0.01436 0.46796 ± 0.01039 22.005 ± 0.366 % 76.057 ± 0.445 %
|
| 242 |
+
37 9.4715 ± 0.3001 0.29427 ± 0.01443 0.48320 ± 0.01058 22.750 ± 0.365 % 75.718 ± 0.441 %
|
| 243 |
+
38 9.7189 ± 0.3050 0.29032 ± 0.01414 0.47886 ± 0.01034 22.563 ± 0.360 % 75.769 ± 0.435 %
|
| 244 |
+
39 9.8521 ± 0.3064 0.31087 ± 0.01438 0.49575 ± 0.01067 23.042 ± 0.358 % 75.606 ± 0.431 %
|
| 245 |
+
40 10.0144 ± 0.3079 0.36213 ± 0.01504 0.54574 ± 0.01155 24.446 ± 0.358 % 74.853 ± 0.430 %
|
| 246 |
+
41 10.2840 ± 0.3129 0.41952 ± 0.01581 0.60038 ± 0.01241 26.041 ± 0.358 % 73.955 ± 0.429 %
|
| 247 |
+
42 10.5142 ± 0.3170 0.47299 ± 0.01640 0.64961 ± 0.01312 27.454 ± 0.358 % 73.165 ± 0.428 %
|
| 248 |
+
43 10.5945 ± 0.3161 0.51174 ± 0.01677 0.68549 ± 0.01353 28.403 ± 0.356 % 72.649 ± 0.426 %
|
| 249 |
+
44 10.4188 ± 0.3060 0.50133 ± 0.01645 0.67592 ± 0.01324 28.187 ± 0.351 % 72.772 ± 0.420 %
|
| 250 |
+
45 10.5901 ± 0.3091 0.49575 ± 0.01617 0.67118 ± 0.01299 27.952 ± 0.347 % 72.749 ± 0.416 %
|
| 251 |
+
46 10.7245 ± 0.3094 0.48628 ± 0.01589 0.66404 ± 0.01273 27.710 ± 0.343 % 72.762 ± 0.411 %
|
| 252 |
+
47 10.8735 ± 0.3104 0.47787 ± 0.01559 0.65415 ± 0.01248 27.454 ± 0.340 % 72.933 ± 0.406 %
|
| 253 |
+
48 10.6170 ± 0.2987 0.47065 ± 0.01531 0.64462 ± 0.01224 27.244 ± 0.336 % 73.145 ± 0.401 %
|
| 254 |
+
49 10.6739 ± 0.2969 0.46098 ± 0.01509 0.64242 ± 0.01215 27.064 ± 0.332 % 73.261 ± 0.396 %
|
| 255 |
+
50 10.7836 ± 0.2978 0.45705 ± 0.01489 0.63964 ± 0.01195 26.925 ± 0.328 % 73.278 ± 0.392 %
|
| 256 |
+
51 10.8894 ± 0.2976 0.45142 ± 0.01464 0.63198 ± 0.01173 26.721 ± 0.325 % 73.310 ± 0.388 %
|
| 257 |
+
52 10.9562 ± 0.2960 0.44822 ± 0.01447 0.63055 ± 0.01155 26.586 ± 0.321 % 73.243 ± 0.384 %
|
| 258 |
+
53 11.0541 ± 0.2954 0.44133 ± 0.01424 0.62430 ± 0.01135 26.415 ± 0.318 % 73.163 ± 0.381 %
|
| 259 |
+
54 11.0764 ± 0.2925 0.43557 ± 0.01402 0.61771 ± 0.01115 26.219 ± 0.315 % 73.166 ± 0.378 %
|
| 260 |
+
55 11.0912 ± 0.2897 0.42988 ± 0.01380 0.61049 ± 0.01096 26.022 ± 0.312 % 73.205 ± 0.374 %
|
| 261 |
+
56 11.0944 ± 0.2868 0.42444 ± 0.01359 0.60411 ± 0.01078 25.831 ± 0.309 % 73.263 ± 0.370 %
|
| 262 |
+
57 11.1176 ± 0.2849 0.42658 ± 0.01348 0.60570 ± 0.01067 25.794 ± 0.306 % 73.223 ± 0.367 %
|
| 263 |
+
58 11.1094 ± 0.2821 0.42448 ± 0.01333 0.60156 ± 0.01052 25.673 ± 0.303 % 73.340 ± 0.364 %
|
| 264 |
+
59 10.9793 ± 0.2758 0.41782 ± 0.01314 0.59402 ± 0.01036 25.505 ± 0.300 % 73.473 ± 0.360 %
|
| 265 |
+
60 10.9578 ± 0.2728 0.41438 ± 0.01296 0.58936 ± 0.01021 25.384 ± 0.297 % 73.562 ± 0.357 %
|
| 266 |
+
61 11.0139 ± 0.2721 0.41285 ± 0.01281 0.58586 ± 0.01007 25.235 ± 0.294 % 73.545 ± 0.354 %
|
| 267 |
+
62 10.9570 ± 0.2691 0.41094 ± 0.01272 0.58357 ± 0.00998 25.191 ± 0.292 % 73.675 ± 0.350 %
|
| 268 |
+
63 10.9859 ± 0.2683 0.40725 ± 0.01260 0.58091 ± 0.00986 25.065 ± 0.290 % 73.719 ± 0.347 %
|
| 269 |
+
64 10.8935 ± 0.2631 0.40137 ± 0.01247 0.57677 ± 0.00972 24.947 ± 0.287 % 73.781 ± 0.344 %
|
| 270 |
+
65 10.8269 ± 0.2592 0.39641 ± 0.01234 0.57488 ± 0.00962 24.845 ± 0.284 % 73.888 ± 0.341 %
|
| 271 |
+
66 10.8273 ± 0.2573 0.39145 ± 0.01220 0.57142 ± 0.00949 24.724 ± 0.282 % 73.933 ± 0.338 %
|
| 272 |
+
67 10.8095 ± 0.2549 0.38886 ± 0.01206 0.56805 ± 0.00936 24.598 ± 0.280 % 74.047 ± 0.335 %
|
| 273 |
+
68 10.6838 ± 0.2496 0.38313 ± 0.01191 0.56276 ± 0.00924 24.457 ± 0.277 % 74.158 ± 0.332 %
|
| 274 |
+
69 10.7017 ± 0.2483 0.38035 ± 0.01179 0.55968 ± 0.00913 24.345 ± 0.275 % 74.180 ± 0.330 %
|
| 275 |
+
70 10.6198 ± 0.2443 0.37646 ± 0.01167 0.55769 ± 0.00904 24.275 ± 0.273 % 74.258 ± 0.327 %
|
| 276 |
+
71 10.5553 ± 0.2411 0.37236 ± 0.01153 0.55327 ± 0.00893 24.156 ± 0.271 % 74.399 ± 0.324 %
|
| 277 |
+
72 10.5470 ± 0.2396 0.36940 ± 0.01142 0.54955 ± 0.00882 24.035 ± 0.269 % 74.499 ± 0.322 %
|
| 278 |
+
73 10.5430 ± 0.2378 0.36828 ± 0.01131 0.54751 ± 0.00874 23.943 ± 0.267 % 74.445 ± 0.320 %
|
| 279 |
+
74 10.4877 ± 0.2346 0.36385 ± 0.01119 0.54411 ± 0.00863 23.844 ± 0.265 % 74.494 ± 0.317 %
|
| 280 |
+
75 10.4691 ± 0.2326 0.36116 ± 0.01110 0.54266 ± 0.00853 23.801 ± 0.263 % 74.484 ± 0.315 %
|
| 281 |
+
76 10.5227 ± 0.2322 0.35778 ± 0.01100 0.54038 ± 0.00844 23.716 ± 0.261 % 74.479 ± 0.313 %
|
| 282 |
+
77 10.4916 ± 0.2300 0.35510 ± 0.01089 0.53701 ± 0.00834 23.624 ± 0.259 % 74.561 ± 0.311 %
|
| 283 |
+
78 10.4982 ± 0.2288 0.35389 ± 0.01081 0.53602 ± 0.00827 23.564 ± 0.257 % 74.550 ± 0.309 %
|
| 284 |
+
79 10.4977 ± 0.2275 0.35239 ± 0.01073 0.53409 ± 0.00818 23.506 ± 0.255 % 74.540 ± 0.307 %
|
| 285 |
+
80 10.4926 ± 0.2265 0.35166 ± 0.01065 0.53522 ± 0.00813 23.465 ± 0.253 % 74.500 ± 0.305 %
|
| 286 |
+
81 10.4302 ± 0.2238 0.34879 ± 0.01055 0.53156 ± 0.00804 23.370 ± 0.252 % 74.582 ± 0.303 %
|
| 287 |
+
82 10.3846 ± 0.2212 0.34734 ± 0.01044 0.52871 ± 0.00795 23.291 ± 0.250 % 74.582 ± 0.301 %
|
| 288 |
+
83 10.4171 ± 0.2203 0.34615 ± 0.01035 0.52615 ± 0.00786 23.203 ± 0.248 % 74.562 ± 0.299 %
|
| 289 |
+
84 10.4088 ± 0.2183 0.34317 ± 0.01025 0.52312 ± 0.00777 23.104 ± 0.246 % 74.585 ± 0.297 %
|
| 290 |
+
85 10.3690 ± 0.2158 0.33989 ± 0.01016 0.52000 ± 0.00769 23.011 ± 0.244 % 74.588 ± 0.296 %
|
| 291 |
+
86 10.2478 ± 0.2113 0.33717 ± 0.01006 0.51665 ± 0.00761 22.936 ± 0.243 % 74.656 ± 0.294 %
|
| 292 |
+
87 10.1426 ± 0.2074 0.33466 ± 0.00996 0.51299 ± 0.00753 22.842 ± 0.241 % 74.717 ± 0.292 %
|
| 293 |
+
88 10.0389 ± 0.2036 0.33271 ± 0.00986 0.50979 ± 0.00745 22.758 ± 0.240 % 74.826 ± 0.290 %
|
| 294 |
+
89 9.9193 ± 0.1994 0.33070 ± 0.00977 0.50641 ± 0.00737 22.676 ± 0.238 % 74.893 ± 0.288 %
|
| 295 |
+
90 9.8188 ± 0.1958 0.32823 ± 0.00968 0.50309 ± 0.00730 22.588 ± 0.236 % 74.972 ± 0.286 %
|
| 296 |
+
91 9.7195 ± 0.1924 0.32516 ± 0.00959 0.50010 ± 0.00723 22.512 ± 0.235 % 75.057 ± 0.284 %
|
| 297 |
+
92 9.6149 ± 0.1887 0.32240 ± 0.00950 0.49699 ± 0.00716 22.435 ± 0.233 % 75.128 ± 0.282 %
|
| 298 |
+
93 9.6016 ± 0.1875 0.31852 ± 0.00945 0.49694 ± 0.00711 22.409 ± 0.232 % 75.109 ± 0.281 %
|
| 299 |
+
94 9.6182 ± 0.1867 0.31591 ± 0.00936 0.49379 ± 0.00704 22.314 ± 0.230 % 75.148 ± 0.279 %
|
| 300 |
+
95 9.7470 ± 0.1885 0.31402 ± 0.00928 0.49149 ± 0.00697 22.226 ± 0.229 % 75.179 ± 0.278 %
|
| 301 |
+
96 9.8521 ± 0.1896 0.31179 ± 0.00921 0.48982 ± 0.00691 22.132 ± 0.227 % 75.131 ± 0.276 %
|
| 302 |
+
97 9.9536 ± 0.1907 0.31138 ± 0.00914 0.48752 ± 0.00684 22.060 ± 0.226 % 75.068 ± 0.275 %
|
| 303 |
+
98 10.1193 ± 0.1934 0.30974 ± 0.00906 0.48505 ± 0.00678 21.962 ± 0.225 % 75.058 ± 0.274 %
|
| 304 |
+
99 10.2621 ± 0.1954 0.30819 ± 0.00899 0.48321 ± 0.00671 21.890 ± 0.224 % 74.977 ± 0.273 %
|
| 305 |
+
100 10.2951 ± 0.1949 0.30652 ± 0.00893 0.48251 ± 0.00666 21.842 ± 0.222 % 74.992 ± 0.271 %
|
| 306 |
+
101 10.3230 ± 0.1946 0.30439 ± 0.00887 0.48161 ± 0.00664 21.806 ± 0.221 % 75.034 ± 0.270 %
|
| 307 |
+
102 10.4123 ± 0.1960 0.30511 ± 0.00884 0.48309 ± 0.00662 21.795 ± 0.220 % 74.967 ± 0.269 %
|
| 308 |
+
103 10.3863 ± 0.1947 0.30598 ± 0.00880 0.48277 ± 0.00659 21.861 ± 0.219 % 74.978 ± 0.267 %
|
| 309 |
+
104 10.3575 ± 0.1931 0.31071 ± 0.00879 0.48572 ± 0.00660 22.004 ± 0.219 % 74.925 ± 0.266 %
|
| 310 |
+
105 10.2181 ± 0.1893 0.31167 ± 0.00876 0.48681 ± 0.00658 22.154 ± 0.218 % 74.988 ± 0.265 %
|
| 311 |
+
106 10.1017 ± 0.1860 0.31751 ± 0.00878 0.49124 ± 0.00664 22.440 ± 0.218 % 75.013 ± 0.263 %
|
| 312 |
+
107 10.1565 ± 0.1860 0.31519 ± 0.00871 0.48819 ± 0.00658 22.351 ± 0.217 % 75.005 ± 0.262 %
|
| 313 |
+
108 10.1575 ± 0.1851 0.31351 ± 0.00864 0.48699 ± 0.00653 22.294 ± 0.216 % 75.018 ± 0.261 %
|
| 314 |
+
109 10.1841 ± 0.1849 0.31340 ± 0.00859 0.48611 ± 0.00648 22.266 ± 0.215 % 75.028 ± 0.260 %
|
| 315 |
+
110 10.2137 ± 0.1845 0.31162 ± 0.00854 0.48471 ± 0.00644 22.210 ± 0.214 % 75.005 ± 0.259 %
|
| 316 |
+
111 10.2561 ± 0.1843 0.30945 ± 0.00848 0.48290 ± 0.00639 22.132 ± 0.213 % 74.994 ± 0.257 %
|
| 317 |
+
112 10.2505 ± 0.1832 0.30755 ± 0.00842 0.48056 ± 0.00634 22.073 ± 0.211 % 75.018 ± 0.256 %
|
| 318 |
+
113 10.2453 ± 0.1820 0.30567 ± 0.00836 0.47795 ± 0.00628 22.002 ± 0.210 % 75.051 ± 0.255 %
|
| 319 |
+
114 10.2442 ± 0.1813 0.30323 ± 0.00830 0.47577 ± 0.00623 21.936 ± 0.209 % 75.091 ± 0.254 %
|
| 320 |
+
115 10.2324 ± 0.1802 0.30442 ± 0.00827 0.47680 ± 0.00621 21.980 ± 0.208 % 75.076 ± 0.253 %
|
| 321 |
+
116 10.3061 ± 0.1809 0.31299 ± 0.00832 0.48562 ± 0.00627 22.233 ± 0.208 % 74.888 ± 0.252 %
|
| 322 |
+
117 10.3716 ± 0.1815 0.33290 ± 0.00850 0.50312 ± 0.00648 22.816 ± 0.209 % 74.627 ± 0.252 %
|
| 323 |
+
118 10.4027 ± 0.1816 0.34875 ± 0.00865 0.51685 ± 0.00662 23.264 ± 0.209 % 74.440 ± 0.251 %
|
| 324 |
+
119 10.4599 ± 0.1819 0.36768 ± 0.00882 0.53420 ± 0.00681 23.833 ± 0.210 % 74.190 ± 0.251 %
|
| 325 |
+
120 10.5059 ± 0.1821 0.38342 ± 0.00895 0.54987 ± 0.00695 24.331 ± 0.209 % 73.935 ± 0.251 %
|
| 326 |
+
121 10.5458 ± 0.1821 0.39885 ± 0.00903 0.56357 ± 0.00705 24.837 ± 0.210 % 73.713 ± 0.251 %
|
| 327 |
+
|
| 328 |
+
====== Perplexity statistics ======
|
| 329 |
+
Mean PPL(Q) : 10.545816 ± 0.182117
|
| 330 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 331 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 85.61%
|
| 332 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.398845 ± 0.009030
|
| 333 |
+
Mean PPL(Q)/PPL(base) : 1.490103 ± 0.013455
|
| 334 |
+
Mean PPL(Q)-PPL(base) : 3.468577 ± 0.102926
|
| 335 |
+
|
| 336 |
+
====== KL divergence statistics ======
|
| 337 |
+
Mean KLD: 0.563569 ± 0.007049
|
| 338 |
+
Maximum KLD: 19.248781
|
| 339 |
+
99.9% KLD: 10.335377
|
| 340 |
+
99.0% KLD: 6.580419
|
| 341 |
+
95.0% KLD: 2.957068
|
| 342 |
+
90.0% KLD: 1.339546
|
| 343 |
+
Median KLD: 0.157771
|
| 344 |
+
10.0% KLD: 0.001290
|
| 345 |
+
5.0% KLD: 0.000202
|
| 346 |
+
1.0% KLD: 0.000011
|
| 347 |
+
0.1% KLD: 0.000000
|
| 348 |
+
Minimum KLD: -0.000003
|
| 349 |
+
|
| 350 |
+
====== Token probability statistics ======
|
| 351 |
+
Mean Δp: -7.067 ± 0.136 %
|
| 352 |
+
Maximum Δp: 98.664%
|
| 353 |
+
99.9% Δp: 79.946%
|
| 354 |
+
99.0% Δp: 39.711%
|
| 355 |
+
95.0% Δp: 15.406%
|
| 356 |
+
90.0% Δp: 7.470%
|
| 357 |
+
75.0% Δp: 0.460%
|
| 358 |
+
Median Δp: -0.139%
|
| 359 |
+
25.0% Δp: -6.781%
|
| 360 |
+
10.0% Δp: -30.853%
|
| 361 |
+
5.0% Δp: -67.276%
|
| 362 |
+
1.0% Δp: -97.535%
|
| 363 |
+
0.1% Δp: -99.845%
|
| 364 |
+
Minimum Δp: -99.988%
|
| 365 |
+
RMS Δp : 24.837 ± 0.210 %
|
| 366 |
+
Same top p: 73.713 ± 0.251 %
|
| 367 |
+
|
| 368 |
+
llama_perf_context_print: load time = 25148.68 ms
|
| 369 |
+
llama_perf_context_print: prompt eval time = 50826.40 ms / 61952 tokens ( 0.82 ms per token, 1218.89 tokens per second)
|
| 370 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 371 |
+
llama_perf_context_print: total time = 62208.21 ms / 61953 tokens
|
| 372 |
+
llama_perf_context_print: graphs reused = 0
|
| 373 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 374 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 659 + (22521 = 20792 + 384 + 1344) + 953 |
|
| 375 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 605 + (22619 = 18837 + 608 + 3174) + 910 |
|
| 376 |
+
llama_memory_breakdown_print: | - Host | 59865 = 59761 + 0 + 104 |
|
| 377 |
+
```
|
kld_data/unsloth/UD-IQ2_M/MiniMax-M2.5-UD-IQ2_M.md
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-IQ2_M (unsloth)
|
| 2 |
+
|
| 3 |
+
72.82 GiB (2.74 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ2_M/MiniMax-M2.5-UD-IQ2_M-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 38810 used, -14938 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 40992 used, -17120 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 79802 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 34106 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37981 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7404 MiB used, 16467 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1416 MiB used, 22454 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 19 layers ( 1 overflowing), 22724 MiB used, 1147 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 44 layers (30 overflowing), 22553 MiB used, 1318 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.20 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ2_M/MiniMax-M2.5-UD-IQ2_M-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 29
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 1 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 16 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 89 |
+
llama_model_loader: - type iq2_xs: 98 tensors
|
| 90 |
+
llama_model_loader: - type iq3_xxs: 41 tensors
|
| 91 |
+
llama_model_loader: - type iq3_s: 9 tensors
|
| 92 |
+
llama_model_loader: - type iq2_s: 26 tensors
|
| 93 |
+
llama_model_loader: - type iq4_xs: 234 tensors
|
| 94 |
+
print_info: file format = GGUF V3 (latest)
|
| 95 |
+
print_info: file type = IQ2_M - 2.7 bpw
|
| 96 |
+
print_info: file size = 72.82 GiB (2.74 BPW)
|
| 97 |
+
load: 0 unused tokens
|
| 98 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 99 |
+
load: printing all EOG tokens:
|
| 100 |
+
load: - 200004 ('<fim_pad>')
|
| 101 |
+
load: - 200005 ('<reponame>')
|
| 102 |
+
load: - 200020 ('[e~[')
|
| 103 |
+
load: special tokens cache size = 54
|
| 104 |
+
load: token to piece cache size = 1.3355 MB
|
| 105 |
+
print_info: arch = minimax-m2
|
| 106 |
+
print_info: vocab_only = 0
|
| 107 |
+
print_info: no_alloc = 0
|
| 108 |
+
print_info: n_ctx_train = 196608
|
| 109 |
+
print_info: n_embd = 3072
|
| 110 |
+
print_info: n_embd_inp = 3072
|
| 111 |
+
print_info: n_layer = 62
|
| 112 |
+
print_info: n_head = 48
|
| 113 |
+
print_info: n_head_kv = 8
|
| 114 |
+
print_info: n_rot = 64
|
| 115 |
+
print_info: n_swa = 0
|
| 116 |
+
print_info: is_swa_any = 0
|
| 117 |
+
print_info: n_embd_head_k = 128
|
| 118 |
+
print_info: n_embd_head_v = 128
|
| 119 |
+
print_info: n_gqa = 6
|
| 120 |
+
print_info: n_embd_k_gqa = 1024
|
| 121 |
+
print_info: n_embd_v_gqa = 1024
|
| 122 |
+
print_info: f_norm_eps = 0.0e+00
|
| 123 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 124 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 125 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 126 |
+
print_info: f_logit_scale = 0.0e+00
|
| 127 |
+
print_info: f_attn_scale = 0.0e+00
|
| 128 |
+
print_info: n_ff = 1536
|
| 129 |
+
print_info: n_expert = 256
|
| 130 |
+
print_info: n_expert_used = 8
|
| 131 |
+
print_info: n_expert_groups = 0
|
| 132 |
+
print_info: n_group_used = 0
|
| 133 |
+
print_info: causal attn = 1
|
| 134 |
+
print_info: pooling type = 0
|
| 135 |
+
print_info: rope type = 2
|
| 136 |
+
print_info: rope scaling = linear
|
| 137 |
+
print_info: freq_base_train = 5000000.0
|
| 138 |
+
print_info: freq_scale_train = 1
|
| 139 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 140 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 141 |
+
print_info: rope_finetuned = unknown
|
| 142 |
+
print_info: model type = 230B.A10B
|
| 143 |
+
print_info: model params = 228.69 B
|
| 144 |
+
print_info: general.name = Minimax-M2.5
|
| 145 |
+
print_info: vocab type = BPE
|
| 146 |
+
print_info: n_vocab = 200064
|
| 147 |
+
print_info: n_merges = 199744
|
| 148 |
+
print_info: BOS token = 200034 ']~!b['
|
| 149 |
+
print_info: EOS token = 200020 '[e~['
|
| 150 |
+
print_info: UNK token = 200021 ']!d~['
|
| 151 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 152 |
+
print_info: LF token = 10 'Ċ'
|
| 153 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 154 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 155 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 156 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 157 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 158 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 159 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 160 |
+
print_info: EOG token = 200020 '[e~['
|
| 161 |
+
print_info: max token length = 256
|
| 162 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 163 |
+
load_tensors: offloading output layer to GPU
|
| 164 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 165 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 166 |
+
load_tensors: CPU_Mapped model buffer size = 47117.04 MiB
|
| 167 |
+
load_tensors: CPU_Mapped model buffer size = 26968.51 MiB
|
| 168 |
+
load_tensors: CUDA0 model buffer size = 21003.14 MiB
|
| 169 |
+
load_tensors: CUDA1 model buffer size = 18691.54 MiB
|
| 170 |
+
....................................................................................................
|
| 171 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 172 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 173 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 174 |
+
llama_context: constructing llama_context
|
| 175 |
+
llama_context: n_seq_max = 8
|
| 176 |
+
llama_context: n_ctx = 4096
|
| 177 |
+
llama_context: n_ctx_seq = 512
|
| 178 |
+
llama_context: n_batch = 4096
|
| 179 |
+
llama_context: n_ubatch = 4096
|
| 180 |
+
llama_context: causal_attn = 1
|
| 181 |
+
llama_context: flash_attn = enabled
|
| 182 |
+
llama_context: kv_unified = false
|
| 183 |
+
llama_context: freq_base = 5000000.0
|
| 184 |
+
llama_context: freq_scale = 1
|
| 185 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 186 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 187 |
+
llama_kv_cache: CUDA0 KV buffer size = 304.00 MiB
|
| 188 |
+
llama_kv_cache: CUDA1 KV buffer size = 688.00 MiB
|
| 189 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 190 |
+
sched_reserve: reserving ...
|
| 191 |
+
sched_reserve: CUDA0 compute buffer size = 1417.00 MiB
|
| 192 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 193 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 194 |
+
sched_reserve: graph nodes = 4099
|
| 195 |
+
sched_reserve: graph splits = 123 (with bs=4096), 65 (with bs=1)
|
| 196 |
+
sched_reserve: reserve took 23.69 ms, sched copies = 1
|
| 197 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 198 |
+
|
| 199 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 200 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 201 |
+
kl_divergence: 4.99 seconds per pass - ETA 1.25 minutes
|
| 202 |
+
|
| 203 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 204 |
+
1 6.8483 ± 1.3654 0.07193 ± 0.04448 0.09984 ± 0.00997 9.178 ± 1.007 % 87.451 ± 2.079 %
|
| 205 |
+
2 5.1270 ± 0.6520 0.09231 ± 0.02911 0.07871 ± 0.00711 9.243 ± 1.034 % 90.392 ± 1.306 %
|
| 206 |
+
3 4.8027 ± 0.4868 0.06768 ± 0.02411 0.09953 ± 0.00721 10.518 ± 0.829 % 89.281 ± 1.119 %
|
| 207 |
+
4 5.4344 ± 0.4868 0.05798 ± 0.02066 0.10818 ± 0.00661 10.304 ± 0.679 % 88.137 ± 1.013 %
|
| 208 |
+
5 5.4556 ± 0.4472 0.10843 ± 0.02152 0.13329 ± 0.01067 11.657 ± 0.771 % 88.157 ± 0.905 %
|
| 209 |
+
6 6.6446 ± 0.5312 0.10262 ± 0.02025 0.15231 ± 0.01002 11.760 ± 0.710 % 87.255 ± 0.853 %
|
| 210 |
+
7 6.2543 ± 0.4502 0.11392 ± 0.01960 0.17948 ± 0.01128 12.844 ± 0.671 % 86.050 ± 0.820 %
|
| 211 |
+
8 6.9504 ± 0.4694 0.09594 ± 0.01791 0.17509 ± 0.01010 12.406 ± 0.617 % 85.343 ± 0.783 %
|
| 212 |
+
9 6.8149 ± 0.4282 0.09553 ± 0.01650 0.17006 ± 0.00939 12.270 ± 0.595 % 85.403 ± 0.737 %
|
| 213 |
+
10 6.2005 ± 0.3632 0.09011 ± 0.01517 0.16132 ± 0.00854 12.129 ± 0.549 % 85.725 ± 0.693 %
|
| 214 |
+
11 6.8081 ± 0.3867 0.09177 ± 0.01472 0.16383 ± 0.00814 12.027 ± 0.527 % 84.920 ± 0.676 %
|
| 215 |
+
12 7.4846 ± 0.4109 0.08502 ± 0.01380 0.16183 ± 0.00755 11.728 ± 0.497 % 84.739 ± 0.650 %
|
| 216 |
+
13 7.7230 ± 0.4028 0.07911 ± 0.01293 0.15655 ± 0.00704 11.416 ± 0.474 % 84.736 ± 0.625 %
|
| 217 |
+
14 8.3141 ± 0.4221 0.07761 ± 0.01254 0.15667 ± 0.00683 11.300 ± 0.458 % 84.426 ± 0.607 %
|
| 218 |
+
15 8.6788 ± 0.4254 0.07441 ± 0.01190 0.15408 ± 0.00641 11.097 ± 0.439 % 84.418 ± 0.586 %
|
| 219 |
+
16 8.9003 ± 0.4210 0.06741 ± 0.01132 0.14865 ± 0.00602 10.867 ± 0.421 % 84.657 ± 0.564 %
|
| 220 |
+
17 9.1626 ± 0.4236 0.06934 ± 0.01125 0.16106 ± 0.00642 10.930 ± 0.413 % 84.060 ± 0.556 %
|
| 221 |
+
18 8.6481 ± 0.3866 0.07077 ± 0.01101 0.16187 ± 0.00627 10.933 ± 0.407 % 84.423 ± 0.535 %
|
| 222 |
+
19 8.7951 ± 0.3828 0.07183 ± 0.01068 0.15852 ± 0.00598 10.878 ± 0.391 % 84.396 ± 0.521 %
|
| 223 |
+
20 8.8522 ± 0.3756 0.07043 ± 0.01063 0.16482 ± 0.00601 11.019 ± 0.388 % 84.137 ± 0.512 %
|
| 224 |
+
21 8.8483 ± 0.3663 0.07255 ± 0.01029 0.16305 ± 0.00578 11.024 ± 0.378 % 84.388 ± 0.496 %
|
| 225 |
+
22 9.2435 ± 0.3791 0.07858 ± 0.01017 0.16347 ± 0.00555 10.946 ± 0.365 % 84.153 ± 0.488 %
|
| 226 |
+
23 9.2859 ± 0.3738 0.08194 ± 0.01025 0.17043 ± 0.00569 11.286 ± 0.364 % 84.075 ± 0.478 %
|
| 227 |
+
24 9.6889 ± 0.3833 0.07836 ± 0.00995 0.16937 ± 0.00547 11.151 ± 0.354 % 84.003 ± 0.469 %
|
| 228 |
+
25 9.7064 ± 0.3770 0.08155 ± 0.00987 0.17422 ± 0.00542 11.399 ± 0.343 % 83.780 ± 0.462 %
|
| 229 |
+
26 9.4040 ± 0.3546 0.11851 ± 0.01082 0.21057 ± 0.00677 14.463 ± 0.403 % 83.032 ± 0.461 %
|
| 230 |
+
27 9.3195 ± 0.3433 0.16342 ± 0.01194 0.25419 ± 0.00827 16.898 ± 0.422 % 82.251 ± 0.461 %
|
| 231 |
+
28 9.4414 ± 0.3424 0.16273 ± 0.01173 0.25217 ± 0.00800 16.742 ± 0.413 % 82.185 ± 0.453 %
|
| 232 |
+
29 9.3115 ± 0.3315 0.15835 ± 0.01148 0.25001 ± 0.00778 16.592 ± 0.404 % 82.380 ± 0.443 %
|
| 233 |
+
30 8.7312 ± 0.3037 0.16120 ± 0.01145 0.24772 ± 0.00776 16.569 ± 0.399 % 82.837 ± 0.431 %
|
| 234 |
+
31 8.2151 ± 0.2789 0.15965 ± 0.01115 0.24402 ± 0.00760 16.508 ± 0.393 % 83.264 ± 0.420 %
|
| 235 |
+
32 7.9689 ± 0.2641 0.15446 ± 0.01087 0.23949 ± 0.00737 16.390 ± 0.384 % 83.333 ± 0.413 %
|
| 236 |
+
33 7.7903 ± 0.2526 0.15169 ± 0.01061 0.23541 ± 0.00716 16.255 ± 0.376 % 83.434 ± 0.405 %
|
| 237 |
+
34 8.0114 ± 0.2575 0.15265 ± 0.01044 0.23777 ± 0.00701 16.131 ± 0.369 % 83.299 ± 0.401 %
|
| 238 |
+
35 8.1620 ± 0.2610 0.15663 ± 0.01044 0.24461 ± 0.00697 16.249 ± 0.361 % 83.070 ± 0.397 %
|
| 239 |
+
36 8.2196 ± 0.2600 0.15478 ± 0.01026 0.24285 ± 0.00680 16.132 ± 0.354 % 83.039 ± 0.392 %
|
| 240 |
+
37 8.3086 ± 0.2595 0.16328 ± 0.01037 0.25232 ± 0.00712 16.565 ± 0.355 % 82.872 ± 0.388 %
|
| 241 |
+
38 8.5197 ± 0.2634 0.15862 ± 0.01015 0.24988 ± 0.00696 16.412 ± 0.350 % 82.848 ± 0.383 %
|
| 242 |
+
39 8.5311 ± 0.2602 0.16690 ± 0.01026 0.25999 ± 0.00720 16.871 ± 0.347 % 82.665 ± 0.380 %
|
| 243 |
+
40 8.5369 ± 0.2564 0.20252 ± 0.01083 0.29349 ± 0.00788 18.317 ± 0.350 % 81.990 ± 0.381 %
|
| 244 |
+
41 8.5114 ± 0.2515 0.23035 ± 0.01124 0.32390 ± 0.00838 19.528 ± 0.349 % 81.416 ± 0.380 %
|
| 245 |
+
42 8.4850 ± 0.2476 0.25856 ± 0.01177 0.35190 ± 0.00899 20.563 ± 0.350 % 80.999 ± 0.379 %
|
| 246 |
+
43 8.3856 ± 0.2408 0.27792 ± 0.01192 0.37137 ± 0.00918 21.436 ± 0.349 % 80.629 ± 0.377 %
|
| 247 |
+
44 8.2670 ± 0.2336 0.27000 ± 0.01170 0.36541 ± 0.00899 21.240 ± 0.344 % 80.793 ± 0.372 %
|
| 248 |
+
45 8.4182 ± 0.2363 0.26623 ± 0.01152 0.36205 ± 0.00880 21.064 ± 0.340 % 80.784 ± 0.368 %
|
| 249 |
+
46 8.5504 ± 0.2373 0.25973 ± 0.01133 0.35768 ± 0.00862 20.884 ± 0.336 % 80.776 ± 0.364 %
|
| 250 |
+
47 8.7057 ± 0.2395 0.25552 ± 0.01111 0.35221 ± 0.00845 20.698 ± 0.332 % 80.826 ± 0.360 %
|
| 251 |
+
48 8.5089 ± 0.2303 0.24931 ± 0.01090 0.34663 ± 0.00828 20.524 ± 0.328 % 80.964 ± 0.355 %
|
| 252 |
+
49 8.6411 ± 0.2316 0.24971 ± 0.01089 0.34895 ± 0.00836 20.397 ± 0.325 % 80.864 ± 0.352 %
|
| 253 |
+
50 8.7198 ± 0.2321 0.24462 ± 0.01071 0.34525 ± 0.00820 20.243 ± 0.321 % 80.886 ± 0.348 %
|
| 254 |
+
51 8.8298 ± 0.2329 0.24176 ± 0.01053 0.34083 ± 0.00804 20.071 ± 0.318 % 81.000 ± 0.344 %
|
| 255 |
+
52 8.8883 ± 0.2318 0.23905 ± 0.01040 0.33949 ± 0.00791 19.957 ± 0.314 % 80.875 ± 0.342 %
|
| 256 |
+
53 8.9880 ± 0.2319 0.23442 ± 0.01024 0.33556 ± 0.00777 19.796 ± 0.311 % 80.910 ± 0.338 %
|
| 257 |
+
54 9.0308 ± 0.2303 0.23139 ± 0.01008 0.33168 ± 0.00763 19.645 ± 0.308 % 80.930 ± 0.335 %
|
| 258 |
+
55 9.0726 ± 0.2288 0.22898 ± 0.00993 0.32773 ± 0.00750 19.496 ± 0.304 % 80.934 ± 0.332 %
|
| 259 |
+
56 9.0858 ± 0.2269 0.22471 ± 0.00977 0.32387 ± 0.00737 19.348 ± 0.301 % 80.959 ± 0.329 %
|
| 260 |
+
57 9.0927 ± 0.2252 0.22552 ± 0.00968 0.32376 ± 0.00728 19.316 ± 0.298 % 80.936 ± 0.326 %
|
| 261 |
+
58 9.0819 ± 0.2229 0.22297 ± 0.00955 0.32068 ± 0.00716 19.199 ± 0.295 % 80.994 ± 0.323 %
|
| 262 |
+
59 9.0035 ± 0.2187 0.21943 ± 0.00941 0.31631 ± 0.00704 19.053 ± 0.292 % 81.163 ± 0.319 %
|
| 263 |
+
60 8.9949 ± 0.2167 0.21699 ± 0.00929 0.31364 ± 0.00694 18.957 ± 0.290 % 81.183 ± 0.316 %
|
| 264 |
+
61 9.0285 ± 0.2156 0.21407 ± 0.00917 0.31104 ± 0.00683 18.858 ± 0.287 % 81.241 ± 0.313 %
|
| 265 |
+
62 8.9692 ± 0.2126 0.21076 ± 0.00906 0.30852 ± 0.00673 18.794 ± 0.284 % 81.322 ± 0.310 %
|
| 266 |
+
63 9.0320 ± 0.2132 0.21141 ± 0.00897 0.30675 ± 0.00664 18.701 ± 0.281 % 81.351 ± 0.307 %
|
| 267 |
+
64 8.9930 ± 0.2102 0.20966 ± 0.00887 0.30500 ± 0.00655 18.622 ± 0.279 % 81.360 ± 0.305 %
|
| 268 |
+
65 8.9568 ± 0.2076 0.20680 ± 0.00878 0.30338 ± 0.00646 18.539 ± 0.276 % 81.406 ± 0.302 %
|
| 269 |
+
66 8.9759 ± 0.2065 0.20392 ± 0.00869 0.30164 ± 0.00637 18.486 ± 0.273 % 81.408 ± 0.300 %
|
| 270 |
+
67 8.9688 ± 0.2049 0.20219 ± 0.00859 0.29997 ± 0.00629 18.398 ± 0.271 % 81.463 ± 0.297 %
|
| 271 |
+
68 8.8901 ± 0.2013 0.19934 ± 0.00848 0.29683 ± 0.00620 18.286 ± 0.269 % 81.574 ± 0.294 %
|
| 272 |
+
69 8.9100 ± 0.2003 0.19712 ± 0.00839 0.29476 ± 0.00612 18.199 ± 0.267 % 81.620 ± 0.292 %
|
| 273 |
+
70 8.8596 ± 0.1974 0.19524 ± 0.00830 0.29340 ± 0.00606 18.096 ± 0.265 % 81.686 ± 0.290 %
|
| 274 |
+
71 8.8224 ± 0.1952 0.19303 ± 0.00819 0.29090 ± 0.00598 17.997 ± 0.262 % 81.756 ± 0.287 %
|
| 275 |
+
72 8.8358 ± 0.1945 0.19237 ± 0.00812 0.28922 ± 0.00592 17.942 ± 0.260 % 81.814 ± 0.285 %
|
| 276 |
+
73 8.8191 ± 0.1926 0.18974 ± 0.00803 0.28730 ± 0.00584 17.853 ± 0.258 % 81.789 ± 0.283 %
|
| 277 |
+
74 8.7930 ± 0.1905 0.18760 ± 0.00794 0.28562 ± 0.00576 17.764 ± 0.256 % 81.802 ± 0.281 %
|
| 278 |
+
75 8.7825 ± 0.1890 0.18550 ± 0.00787 0.28487 ± 0.00570 17.718 ± 0.254 % 81.820 ± 0.279 %
|
| 279 |
+
76 8.8481 ± 0.1893 0.18445 ± 0.00779 0.28368 ± 0.00563 17.659 ± 0.252 % 81.847 ± 0.277 %
|
| 280 |
+
77 8.8317 ± 0.1878 0.18287 ± 0.00772 0.28192 ± 0.00557 17.586 ± 0.250 % 81.889 ± 0.275 %
|
| 281 |
+
78 8.8372 ± 0.1869 0.18166 ± 0.00765 0.28116 ± 0.00551 17.567 ± 0.249 % 81.905 ± 0.273 %
|
| 282 |
+
79 8.8346 ± 0.1857 0.17991 ± 0.00758 0.27981 ± 0.00545 17.496 ± 0.247 % 81.941 ± 0.271 %
|
| 283 |
+
80 8.8170 ± 0.1845 0.17767 ± 0.00754 0.28061 ± 0.00541 17.463 ± 0.245 % 81.892 ± 0.270 %
|
| 284 |
+
81 8.7815 ± 0.1827 0.17674 ± 0.00746 0.27897 ± 0.00535 17.385 ± 0.243 % 81.927 ± 0.268 %
|
| 285 |
+
82 8.7525 ± 0.1808 0.17635 ± 0.00739 0.27725 ± 0.00529 17.320 ± 0.241 % 81.970 ± 0.266 %
|
| 286 |
+
83 8.7776 ± 0.1800 0.17490 ± 0.00731 0.27524 ± 0.00522 17.238 ± 0.240 % 81.961 ± 0.264 %
|
| 287 |
+
84 8.7827 ± 0.1786 0.17330 ± 0.00724 0.27312 ± 0.00516 17.154 ± 0.238 % 81.993 ± 0.263 %
|
| 288 |
+
85 8.7672 ± 0.1770 0.17208 ± 0.00716 0.27114 ± 0.00511 17.076 ± 0.236 % 81.993 ± 0.261 %
|
| 289 |
+
86 8.6692 ± 0.1734 0.16988 ± 0.00710 0.26932 ± 0.00505 17.014 ± 0.235 % 82.034 ± 0.259 %
|
| 290 |
+
87 8.5912 ± 0.1704 0.16866 ± 0.00704 0.26737 ± 0.00500 16.942 ± 0.233 % 82.078 ± 0.258 %
|
| 291 |
+
88 8.5088 ± 0.1673 0.16734 ± 0.00697 0.26549 ± 0.00494 16.868 ± 0.232 % 82.130 ± 0.256 %
|
| 292 |
+
89 8.4133 ± 0.1640 0.16603 ± 0.00690 0.26360 ± 0.00489 16.797 ± 0.230 % 82.186 ± 0.254 %
|
| 293 |
+
90 8.3384 ± 0.1612 0.16480 ± 0.00684 0.26166 ± 0.00484 16.722 ± 0.228 % 82.261 ± 0.252 %
|
| 294 |
+
91 8.2673 ± 0.1586 0.16333 ± 0.00678 0.25999 ± 0.00479 16.675 ± 0.227 % 82.340 ± 0.250 %
|
| 295 |
+
92 8.1850 ± 0.1557 0.16139 ± 0.00671 0.25829 ± 0.00474 16.614 ± 0.225 % 82.357 ± 0.249 %
|
| 296 |
+
93 8.1927 ± 0.1552 0.15983 ± 0.00667 0.25842 ± 0.00474 16.577 ± 0.224 % 82.328 ± 0.248 %
|
| 297 |
+
94 8.2130 ± 0.1546 0.15798 ± 0.00661 0.25691 ± 0.00470 16.508 ± 0.222 % 82.411 ± 0.246 %
|
| 298 |
+
95 8.3276 ± 0.1563 0.15664 ± 0.00656 0.25589 ± 0.00466 16.445 ± 0.221 % 82.436 ± 0.244 %
|
| 299 |
+
96 8.4180 ± 0.1573 0.15448 ± 0.00651 0.25511 ± 0.00461 16.385 ± 0.220 % 82.345 ± 0.244 %
|
| 300 |
+
97 8.5002 ± 0.1580 0.15353 ± 0.00645 0.25376 ± 0.00457 16.322 ± 0.218 % 82.292 ± 0.243 %
|
| 301 |
+
98 8.6476 ± 0.1605 0.15257 ± 0.00640 0.25221 ± 0.00452 16.246 ± 0.217 % 82.309 ± 0.241 %
|
| 302 |
+
99 8.7773 ± 0.1624 0.15191 ± 0.00635 0.25118 ± 0.00448 16.183 ± 0.216 % 82.309 ± 0.240 %
|
| 303 |
+
100 8.8146 ± 0.1623 0.15125 ± 0.00631 0.25074 ± 0.00444 16.156 ± 0.215 % 82.263 ± 0.239 %
|
| 304 |
+
101 8.8518 ± 0.1623 0.15064 ± 0.00626 0.25000 ± 0.00441 16.116 ± 0.214 % 82.279 ± 0.238 %
|
| 305 |
+
102 8.9290 ± 0.1636 0.15142 ± 0.00624 0.25068 ± 0.00438 16.110 ± 0.212 % 82.268 ± 0.237 %
|
| 306 |
+
103 8.9035 ± 0.1625 0.15193 ± 0.00620 0.25017 ± 0.00436 16.138 ± 0.212 % 82.311 ± 0.235 %
|
| 307 |
+
104 8.8513 ± 0.1606 0.15357 ± 0.00618 0.25123 ± 0.00435 16.256 ± 0.211 % 82.319 ± 0.234 %
|
| 308 |
+
105 8.7359 ± 0.1575 0.15495 ± 0.00618 0.25270 ± 0.00438 16.376 ± 0.211 % 82.375 ± 0.233 %
|
| 309 |
+
106 8.5991 ± 0.1540 0.15647 ± 0.00617 0.25354 ± 0.00440 16.495 ± 0.211 % 82.442 ± 0.231 %
|
| 310 |
+
107 8.6565 ± 0.1542 0.15539 ± 0.00612 0.25185 ± 0.00436 16.429 ± 0.210 % 82.456 ± 0.230 %
|
| 311 |
+
108 8.6679 ± 0.1537 0.15493 ± 0.00608 0.25153 ± 0.00433 16.394 ± 0.208 % 82.440 ± 0.229 %
|
| 312 |
+
109 8.6899 ± 0.1535 0.15473 ± 0.00604 0.25077 ± 0.00429 16.354 ± 0.207 % 82.443 ± 0.228 %
|
| 313 |
+
110 8.7217 ± 0.1533 0.15370 ± 0.00599 0.24951 ± 0.00426 16.307 ± 0.206 % 82.467 ± 0.227 %
|
| 314 |
+
111 8.7711 ± 0.1534 0.15303 ± 0.00595 0.24854 ± 0.00422 16.252 ± 0.205 % 82.448 ± 0.226 %
|
| 315 |
+
112 8.7738 ± 0.1526 0.15200 ± 0.00591 0.24731 ± 0.00419 16.205 ± 0.204 % 82.461 ± 0.225 %
|
| 316 |
+
113 8.7794 ± 0.1519 0.15127 ± 0.00587 0.24594 ± 0.00415 16.147 ± 0.203 % 82.485 ± 0.224 %
|
| 317 |
+
114 8.7954 ± 0.1516 0.15075 ± 0.00582 0.24467 ± 0.00412 16.092 ± 0.202 % 82.521 ± 0.223 %
|
| 318 |
+
115 8.7729 ± 0.1504 0.15054 ± 0.00580 0.24482 ± 0.00409 16.103 ± 0.201 % 82.517 ± 0.222 %
|
| 319 |
+
116 8.8041 ± 0.1504 0.15547 ± 0.00584 0.25150 ± 0.00416 16.392 ± 0.201 % 82.346 ± 0.222 %
|
| 320 |
+
117 8.7830 ± 0.1492 0.16665 ± 0.00596 0.26133 ± 0.00427 16.915 ± 0.202 % 82.169 ± 0.222 %
|
| 321 |
+
118 8.7608 ± 0.1481 0.17697 ± 0.00607 0.27057 ± 0.00437 17.377 ± 0.203 % 82.017 ± 0.221 %
|
| 322 |
+
119 8.7375 ± 0.1469 0.18775 ± 0.00617 0.28054 ± 0.00450 17.893 ± 0.204 % 81.862 ± 0.221 %
|
| 323 |
+
120 8.7162 ± 0.1458 0.19666 ± 0.00627 0.28935 ± 0.00459 18.328 ± 0.203 % 81.660 ± 0.221 %
|
| 324 |
+
121 8.7003 ± 0.1449 0.20648 ± 0.00634 0.29868 ± 0.00470 18.785 ± 0.204 % 81.491 ± 0.221 %
|
| 325 |
+
|
| 326 |
+
====== Perplexity statistics ======
|
| 327 |
+
Mean PPL(Q) : 8.700339 ± 0.144862
|
| 328 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 329 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 92.57%
|
| 330 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.206478 ± 0.006342
|
| 331 |
+
Mean PPL(Q)/PPL(base) : 1.229341 ± 0.007796
|
| 332 |
+
Mean PPL(Q)-PPL(base) : 1.623099 ± 0.058276
|
| 333 |
+
|
| 334 |
+
====== KL divergence statistics ======
|
| 335 |
+
Mean KLD: 0.298683 ± 0.004697
|
| 336 |
+
Maximum KLD: 16.626404
|
| 337 |
+
99.9% KLD: 8.051671
|
| 338 |
+
99.0% KLD: 4.460805
|
| 339 |
+
95.0% KLD: 1.470186
|
| 340 |
+
90.0% KLD: 0.588759
|
| 341 |
+
Median KLD: 0.065393
|
| 342 |
+
10.0% KLD: 0.000416
|
| 343 |
+
5.0% KLD: 0.000062
|
| 344 |
+
1.0% KLD: 0.000003
|
| 345 |
+
0.1% KLD: -0.000001
|
| 346 |
+
Minimum KLD: -0.000004
|
| 347 |
+
|
| 348 |
+
====== Token probability statistics ======
|
| 349 |
+
Mean Δp: -4.179 ± 0.104 %
|
| 350 |
+
Maximum Δp: 97.156%
|
| 351 |
+
99.9% Δp: 74.673%
|
| 352 |
+
99.0% Δp: 30.773%
|
| 353 |
+
95.0% Δp: 11.678%
|
| 354 |
+
90.0% Δp: 5.691%
|
| 355 |
+
75.0% Δp: 0.475%
|
| 356 |
+
Median Δp: -0.027%
|
| 357 |
+
25.0% Δp: -3.293%
|
| 358 |
+
10.0% Δp: -16.104%
|
| 359 |
+
5.0% Δp: -36.994%
|
| 360 |
+
1.0% Δp: -91.500%
|
| 361 |
+
0.1% Δp: -99.389%
|
| 362 |
+
Minimum Δp: -99.982%
|
| 363 |
+
RMS Δp : 18.785 ± 0.204 %
|
| 364 |
+
Same top p: 81.491 ± 0.221 %
|
| 365 |
+
|
| 366 |
+
llama_perf_context_print: load time = 30584.59 ms
|
| 367 |
+
llama_perf_context_print: prompt eval time = 58490.19 ms / 61952 tokens ( 0.94 ms per token, 1059.19 tokens per second)
|
| 368 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 369 |
+
llama_perf_context_print: total time = 70899.93 ms / 61953 tokens
|
| 370 |
+
llama_perf_context_print: graphs reused = 0
|
| 371 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 372 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 955 + (22724 = 21003 + 304 + 1416) + 455 |
|
| 373 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1127 + (22553 = 18691 + 688 + 3174) + 453 |
|
| 374 |
+
llama_memory_breakdown_print: | - Host | 74189 = 74085 + 0 + 104 |
|
| 375 |
+
```
|
kld_data/unsloth/UD-IQ2_XXS/MiniMax-M2.5-UD-IQ2_XXS.md
ADDED
|
@@ -0,0 +1,376 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-IQ2_XXS (unsloth)
|
| 2 |
+
|
| 3 |
+
69.02 GiB (2.59 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ2_XXS/MiniMax-M2.5-UD-IQ2_XXS-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 36578 used, -12706 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 39336 used, -15464 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 75914 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 30218 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37981 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7332 MiB used, 16539 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1416 MiB used, 22454 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 20 layers ( 1 overflowing), 22801 MiB used, 1070 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 43 layers (28 overflowing), 22839 MiB used, 1032 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.15 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ2_XXS/MiniMax-M2.5-UD-IQ2_XXS-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 19
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 1 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 16 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 89 |
+
llama_model_loader: - type iq2_xxs: 98 tensors
|
| 90 |
+
llama_model_loader: - type iq2_xs: 10 tensors
|
| 91 |
+
llama_model_loader: - type iq3_xxs: 41 tensors
|
| 92 |
+
llama_model_loader: - type iq3_s: 9 tensors
|
| 93 |
+
llama_model_loader: - type iq2_s: 16 tensors
|
| 94 |
+
llama_model_loader: - type iq4_xs: 234 tensors
|
| 95 |
+
print_info: file format = GGUF V3 (latest)
|
| 96 |
+
print_info: file type = IQ2_XXS - 2.0625 bpw
|
| 97 |
+
print_info: file size = 69.02 GiB (2.59 BPW)
|
| 98 |
+
load: 0 unused tokens
|
| 99 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 100 |
+
load: printing all EOG tokens:
|
| 101 |
+
load: - 200004 ('<fim_pad>')
|
| 102 |
+
load: - 200005 ('<reponame>')
|
| 103 |
+
load: - 200020 ('[e~[')
|
| 104 |
+
load: special tokens cache size = 54
|
| 105 |
+
load: token to piece cache size = 1.3355 MB
|
| 106 |
+
print_info: arch = minimax-m2
|
| 107 |
+
print_info: vocab_only = 0
|
| 108 |
+
print_info: no_alloc = 0
|
| 109 |
+
print_info: n_ctx_train = 196608
|
| 110 |
+
print_info: n_embd = 3072
|
| 111 |
+
print_info: n_embd_inp = 3072
|
| 112 |
+
print_info: n_layer = 62
|
| 113 |
+
print_info: n_head = 48
|
| 114 |
+
print_info: n_head_kv = 8
|
| 115 |
+
print_info: n_rot = 64
|
| 116 |
+
print_info: n_swa = 0
|
| 117 |
+
print_info: is_swa_any = 0
|
| 118 |
+
print_info: n_embd_head_k = 128
|
| 119 |
+
print_info: n_embd_head_v = 128
|
| 120 |
+
print_info: n_gqa = 6
|
| 121 |
+
print_info: n_embd_k_gqa = 1024
|
| 122 |
+
print_info: n_embd_v_gqa = 1024
|
| 123 |
+
print_info: f_norm_eps = 0.0e+00
|
| 124 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 125 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 126 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 127 |
+
print_info: f_logit_scale = 0.0e+00
|
| 128 |
+
print_info: f_attn_scale = 0.0e+00
|
| 129 |
+
print_info: n_ff = 1536
|
| 130 |
+
print_info: n_expert = 256
|
| 131 |
+
print_info: n_expert_used = 8
|
| 132 |
+
print_info: n_expert_groups = 0
|
| 133 |
+
print_info: n_group_used = 0
|
| 134 |
+
print_info: causal attn = 1
|
| 135 |
+
print_info: pooling type = 0
|
| 136 |
+
print_info: rope type = 2
|
| 137 |
+
print_info: rope scaling = linear
|
| 138 |
+
print_info: freq_base_train = 5000000.0
|
| 139 |
+
print_info: freq_scale_train = 1
|
| 140 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 141 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 142 |
+
print_info: rope_finetuned = unknown
|
| 143 |
+
print_info: model type = 230B.A10B
|
| 144 |
+
print_info: model params = 228.69 B
|
| 145 |
+
print_info: general.name = Minimax-M2.5
|
| 146 |
+
print_info: vocab type = BPE
|
| 147 |
+
print_info: n_vocab = 200064
|
| 148 |
+
print_info: n_merges = 199744
|
| 149 |
+
print_info: BOS token = 200034 ']~!b['
|
| 150 |
+
print_info: EOS token = 200020 '[e~['
|
| 151 |
+
print_info: UNK token = 200021 ']!d~['
|
| 152 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: LF token = 10 'Ċ'
|
| 154 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 155 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 156 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 157 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 158 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 159 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 160 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 161 |
+
print_info: EOG token = 200020 '[e~['
|
| 162 |
+
print_info: max token length = 256
|
| 163 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 164 |
+
load_tensors: offloading output layer to GPU
|
| 165 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 166 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 167 |
+
load_tensors: CPU_Mapped model buffer size = 47021.33 MiB
|
| 168 |
+
load_tensors: CPU_Mapped model buffer size = 23176.22 MiB
|
| 169 |
+
load_tensors: CUDA0 model buffer size = 21064.50 MiB
|
| 170 |
+
load_tensors: CUDA1 model buffer size = 18993.18 MiB
|
| 171 |
+
....................................................................................................
|
| 172 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 173 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 174 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 175 |
+
llama_context: constructing llama_context
|
| 176 |
+
llama_context: n_seq_max = 8
|
| 177 |
+
llama_context: n_ctx = 4096
|
| 178 |
+
llama_context: n_ctx_seq = 512
|
| 179 |
+
llama_context: n_batch = 4096
|
| 180 |
+
llama_context: n_ubatch = 4096
|
| 181 |
+
llama_context: causal_attn = 1
|
| 182 |
+
llama_context: flash_attn = enabled
|
| 183 |
+
llama_context: kv_unified = false
|
| 184 |
+
llama_context: freq_base = 5000000.0
|
| 185 |
+
llama_context: freq_scale = 1
|
| 186 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 187 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 188 |
+
llama_kv_cache: CUDA0 KV buffer size = 320.00 MiB
|
| 189 |
+
llama_kv_cache: CUDA1 KV buffer size = 672.00 MiB
|
| 190 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 191 |
+
sched_reserve: reserving ...
|
| 192 |
+
sched_reserve: CUDA0 compute buffer size = 1417.00 MiB
|
| 193 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 194 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 195 |
+
sched_reserve: graph nodes = 4099
|
| 196 |
+
sched_reserve: graph splits = 113 (with bs=4096), 61 (with bs=1)
|
| 197 |
+
sched_reserve: reserve took 22.98 ms, sched copies = 1
|
| 198 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 199 |
+
|
| 200 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 201 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 202 |
+
kl_divergence: 4.01 seconds per pass - ETA 1.00 minutes
|
| 203 |
+
|
| 204 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 205 |
+
1 7.1505 ± 1.3994 0.11511 ± 0.05072 0.17671 ± 0.03186 12.582 ± 1.992 % 84.314 ± 2.282 %
|
| 206 |
+
2 5.1367 ± 0.6386 0.09420 ± 0.03365 0.13005 ± 0.01772 10.887 ± 1.342 % 88.235 ± 1.428 %
|
| 207 |
+
3 4.8420 ± 0.4842 0.07584 ± 0.02757 0.14094 ± 0.01335 12.375 ± 0.999 % 87.843 ± 1.182 %
|
| 208 |
+
4 5.4514 ± 0.4857 0.06109 ± 0.02361 0.14022 ± 0.01067 11.642 ± 0.816 % 87.353 ± 1.041 %
|
| 209 |
+
5 5.4902 ± 0.4511 0.11474 ± 0.02757 0.17542 ± 0.01520 12.742 ± 0.828 % 86.980 ± 0.943 %
|
| 210 |
+
6 6.7319 ± 0.5415 0.11567 ± 0.02589 0.19146 ± 0.01370 12.547 ± 0.767 % 85.490 ± 0.901 %
|
| 211 |
+
7 6.3000 ± 0.4559 0.12121 ± 0.02451 0.21974 ± 0.01443 13.704 ± 0.725 % 84.762 ± 0.851 %
|
| 212 |
+
8 7.0469 ± 0.4793 0.10973 ± 0.02219 0.21393 ± 0.01286 13.312 ± 0.661 % 84.020 ± 0.811 %
|
| 213 |
+
9 6.9062 ± 0.4381 0.10885 ± 0.02017 0.20468 ± 0.01168 12.963 ± 0.624 % 84.575 ± 0.754 %
|
| 214 |
+
10 6.3046 ± 0.3725 0.10676 ± 0.01850 0.19408 ± 0.01059 12.782 ± 0.574 % 85.020 ± 0.707 %
|
| 215 |
+
11 6.8822 ± 0.3926 0.10260 ± 0.01726 0.19149 ± 0.00974 12.565 ± 0.543 % 84.706 ± 0.680 %
|
| 216 |
+
12 7.5778 ± 0.4188 0.09739 ± 0.01621 0.18880 ± 0.00905 12.184 ± 0.515 % 84.641 ± 0.652 %
|
| 217 |
+
13 7.8289 ± 0.4114 0.09272 ± 0.01514 0.18201 ± 0.00842 11.914 ± 0.490 % 84.827 ± 0.623 %
|
| 218 |
+
14 8.4371 ± 0.4312 0.09229 ± 0.01456 0.18255 ± 0.00809 11.779 ± 0.472 % 84.342 ± 0.608 %
|
| 219 |
+
15 8.8061 ± 0.4346 0.08897 ± 0.01378 0.18023 ± 0.00759 11.645 ± 0.449 % 84.157 ± 0.590 %
|
| 220 |
+
16 9.0034 ± 0.4283 0.07893 ± 0.01310 0.17380 ± 0.00713 11.383 ± 0.432 % 84.485 ± 0.567 %
|
| 221 |
+
17 9.3236 ± 0.4353 0.08676 ± 0.01308 0.18462 ± 0.00726 11.434 ± 0.425 % 84.014 ± 0.557 %
|
| 222 |
+
18 8.8389 ± 0.3992 0.09259 ± 0.01317 0.19419 ± 0.00785 11.519 ± 0.419 % 84.161 ± 0.539 %
|
| 223 |
+
19 8.9321 ± 0.3915 0.08728 ± 0.01269 0.19007 ± 0.00750 11.435 ± 0.403 % 84.293 ± 0.523 %
|
| 224 |
+
20 9.0205 ± 0.3857 0.08927 ± 0.01249 0.19803 ± 0.00746 11.564 ± 0.392 % 84.020 ± 0.513 %
|
| 225 |
+
21 9.0110 ± 0.3762 0.09077 ± 0.01210 0.19709 ± 0.00725 11.533 ± 0.381 % 84.071 ± 0.500 %
|
| 226 |
+
22 9.4200 ± 0.3895 0.09749 ± 0.01182 0.19775 ± 0.00695 11.478 ± 0.367 % 83.815 ± 0.492 %
|
| 227 |
+
23 9.4815 ± 0.3854 0.10279 ± 0.01192 0.20523 ± 0.00702 11.860 ± 0.366 % 83.581 ± 0.484 %
|
| 228 |
+
24 9.8891 ± 0.3951 0.09881 ± 0.01164 0.20529 ± 0.00678 11.759 ± 0.358 % 83.366 ± 0.476 %
|
| 229 |
+
25 9.9099 ± 0.3883 0.10231 ± 0.01153 0.21079 ± 0.00669 12.042 ± 0.353 % 83.153 ± 0.469 %
|
| 230 |
+
26 9.6836 ± 0.3693 0.14781 ± 0.01274 0.25385 ± 0.00836 15.061 ± 0.410 % 82.368 ± 0.468 %
|
| 231 |
+
27 9.6659 ± 0.3602 0.19991 ± 0.01382 0.30767 ± 0.00983 18.037 ± 0.432 % 81.293 ± 0.470 %
|
| 232 |
+
28 9.7746 ± 0.3584 0.19742 ± 0.01351 0.30570 ± 0.00951 17.905 ± 0.422 % 81.190 ± 0.463 %
|
| 233 |
+
29 9.6524 ± 0.3478 0.19431 ± 0.01319 0.30241 ± 0.00923 17.734 ± 0.412 % 81.339 ± 0.453 %
|
| 234 |
+
30 9.0450 ± 0.3186 0.19652 ± 0.01312 0.29821 ± 0.00915 17.691 ± 0.407 % 81.830 ± 0.441 %
|
| 235 |
+
31 8.5013 ± 0.2923 0.19390 ± 0.01279 0.29451 ± 0.00897 17.723 ± 0.402 % 82.214 ± 0.430 %
|
| 236 |
+
32 8.2439 ± 0.2766 0.18838 ± 0.01247 0.28942 ± 0.00870 17.623 ± 0.392 % 82.316 ± 0.422 %
|
| 237 |
+
33 8.0392 ± 0.2637 0.18314 ± 0.01216 0.28478 ± 0.00845 17.501 ± 0.384 % 82.472 ± 0.414 %
|
| 238 |
+
34 8.2670 ± 0.2685 0.18405 ± 0.01196 0.28860 ± 0.00829 17.421 ± 0.376 % 82.157 ± 0.411 %
|
| 239 |
+
35 8.4022 ± 0.2709 0.18563 ± 0.01182 0.29476 ± 0.00818 17.524 ± 0.368 % 81.871 ± 0.408 %
|
| 240 |
+
36 8.4447 ± 0.2693 0.18180 ± 0.01162 0.29296 ± 0.00798 17.473 ± 0.361 % 81.699 ± 0.404 %
|
| 241 |
+
37 8.5548 ± 0.2694 0.19247 ± 0.01173 0.30309 ± 0.00822 17.998 ± 0.361 % 81.463 ± 0.400 %
|
| 242 |
+
38 8.7761 ± 0.2736 0.18827 ± 0.01148 0.29945 ± 0.00802 17.834 ± 0.355 % 81.548 ± 0.394 %
|
| 243 |
+
39 8.7898 ± 0.2701 0.19678 ± 0.01151 0.30969 ± 0.00820 18.259 ± 0.354 % 81.398 ± 0.390 %
|
| 244 |
+
40 8.8047 ± 0.2662 0.23340 ± 0.01201 0.34638 ± 0.00882 19.841 ± 0.355 % 80.657 ± 0.391 %
|
| 245 |
+
41 8.9197 ± 0.2661 0.27720 ± 0.01269 0.39016 ± 0.00961 21.433 ± 0.357 % 79.770 ± 0.393 %
|
| 246 |
+
42 8.9251 ± 0.2625 0.30913 ± 0.01314 0.42435 ± 0.01017 22.728 ± 0.357 % 79.169 ± 0.392 %
|
| 247 |
+
43 8.8941 ± 0.2582 0.33678 ± 0.01344 0.44981 ± 0.01055 23.586 ± 0.355 % 78.805 ± 0.390 %
|
| 248 |
+
44 8.7577 ± 0.2502 0.32766 ± 0.01319 0.44288 ± 0.01033 23.377 ± 0.351 % 78.948 ± 0.385 %
|
| 249 |
+
45 8.9090 ± 0.2528 0.32290 ± 0.01297 0.43820 ± 0.01011 23.172 ± 0.346 % 78.885 ± 0.381 %
|
| 250 |
+
46 9.0421 ± 0.2537 0.31564 ± 0.01273 0.43243 ± 0.00990 22.949 ± 0.342 % 78.951 ± 0.376 %
|
| 251 |
+
47 9.2084 ± 0.2561 0.31167 ± 0.01249 0.42611 ± 0.00970 22.736 ± 0.339 % 79.040 ± 0.372 %
|
| 252 |
+
48 8.9959 ± 0.2462 0.30497 ± 0.01225 0.41889 ± 0.00951 22.533 ± 0.335 % 79.208 ± 0.367 %
|
| 253 |
+
49 9.1106 ± 0.2467 0.30262 ± 0.01210 0.42031 ± 0.00956 22.394 ± 0.331 % 79.176 ± 0.363 %
|
| 254 |
+
50 9.1970 ± 0.2473 0.29790 ± 0.01191 0.41605 ± 0.00938 22.248 ± 0.327 % 79.216 ± 0.359 %
|
| 255 |
+
51 9.2962 ± 0.2475 0.29323 ± 0.01170 0.41048 ± 0.00921 22.060 ± 0.324 % 79.308 ± 0.355 %
|
| 256 |
+
52 9.3444 ± 0.2458 0.28910 ± 0.01156 0.40843 ± 0.00904 21.946 ± 0.320 % 79.201 ± 0.352 %
|
| 257 |
+
53 9.4367 ± 0.2455 0.28314 ± 0.01139 0.40396 ± 0.00888 21.788 ± 0.317 % 79.193 ± 0.349 %
|
| 258 |
+
54 9.4730 ± 0.2436 0.27920 ± 0.01121 0.39909 ± 0.00873 21.623 ± 0.314 % 79.245 ± 0.346 %
|
| 259 |
+
55 9.5030 ± 0.2416 0.27533 ± 0.01102 0.39407 ± 0.00858 21.453 ± 0.311 % 79.244 ± 0.342 %
|
| 260 |
+
56 9.5148 ± 0.2396 0.27085 ± 0.01085 0.38949 ± 0.00843 21.298 ± 0.308 % 79.272 ± 0.339 %
|
| 261 |
+
57 9.5065 ± 0.2371 0.27003 ± 0.01075 0.39000 ± 0.00833 21.272 ± 0.304 % 79.229 ± 0.336 %
|
| 262 |
+
58 9.4962 ± 0.2346 0.26758 ± 0.01062 0.38707 ± 0.00822 21.158 ± 0.302 % 79.337 ± 0.333 %
|
| 263 |
+
59 9.4015 ± 0.2298 0.26268 ± 0.01046 0.38189 ± 0.00809 21.009 ± 0.299 % 79.501 ± 0.329 %
|
| 264 |
+
60 9.3937 ± 0.2278 0.26036 ± 0.01033 0.37876 ± 0.00797 20.908 ± 0.296 % 79.556 ± 0.326 %
|
| 265 |
+
61 9.4129 ± 0.2262 0.25577 ± 0.01019 0.37508 ± 0.00784 20.770 ± 0.293 % 79.576 ± 0.323 %
|
| 266 |
+
62 9.3592 ± 0.2233 0.25332 ± 0.01011 0.37336 ± 0.00775 20.730 ± 0.291 % 79.652 ± 0.320 %
|
| 267 |
+
63 9.4165 ± 0.2238 0.25310 ± 0.01000 0.37085 ± 0.00764 20.614 ± 0.288 % 79.745 ± 0.317 %
|
| 268 |
+
64 9.3682 ± 0.2204 0.25053 ± 0.00987 0.36808 ± 0.00754 20.500 ± 0.286 % 79.773 ± 0.314 %
|
| 269 |
+
65 9.3296 ± 0.2177 0.24758 ± 0.00978 0.36639 ± 0.00744 20.431 ± 0.283 % 79.807 ± 0.312 %
|
| 270 |
+
66 9.3491 ± 0.2166 0.24466 ± 0.00966 0.36366 ± 0.00734 20.332 ± 0.281 % 79.810 ± 0.309 %
|
| 271 |
+
67 9.3415 ± 0.2148 0.24290 ± 0.00955 0.36134 ± 0.00724 20.228 ± 0.278 % 79.842 ± 0.307 %
|
| 272 |
+
68 9.2543 ± 0.2109 0.23949 ± 0.00942 0.35759 ± 0.00714 20.101 ± 0.276 % 79.988 ± 0.304 %
|
| 273 |
+
69 9.2701 ± 0.2098 0.23675 ± 0.00932 0.35485 ± 0.00704 19.983 ± 0.274 % 80.057 ± 0.301 %
|
| 274 |
+
70 9.2084 ± 0.2066 0.23386 ± 0.00921 0.35259 ± 0.00696 19.887 ± 0.272 % 80.095 ± 0.299 %
|
| 275 |
+
71 9.1657 ± 0.2042 0.23120 ± 0.00910 0.34951 ± 0.00687 19.773 ± 0.269 % 80.171 ± 0.296 %
|
| 276 |
+
72 9.1803 ± 0.2034 0.23061 ± 0.00901 0.34675 ± 0.00678 19.684 ± 0.267 % 80.240 ± 0.294 %
|
| 277 |
+
73 9.1731 ± 0.2018 0.22910 ± 0.00890 0.34440 ± 0.00669 19.596 ± 0.265 % 80.231 ± 0.292 %
|
| 278 |
+
74 9.1380 ± 0.1994 0.22609 ± 0.00881 0.34221 ± 0.00661 19.507 ± 0.263 % 80.286 ± 0.290 %
|
| 279 |
+
75 9.1292 ± 0.1978 0.22421 ± 0.00873 0.34106 ± 0.00653 19.469 ± 0.260 % 80.282 ± 0.288 %
|
| 280 |
+
76 9.1859 ± 0.1978 0.22191 ± 0.00863 0.33946 ± 0.00645 19.389 ± 0.258 % 80.258 ± 0.286 %
|
| 281 |
+
77 9.1719 ± 0.1963 0.22066 ± 0.00855 0.33769 ± 0.00638 19.339 ± 0.256 % 80.285 ± 0.284 %
|
| 282 |
+
78 9.1613 ± 0.1948 0.21768 ± 0.00847 0.33602 ± 0.00631 19.261 ± 0.254 % 80.302 ± 0.282 %
|
| 283 |
+
79 9.1565 ± 0.1935 0.21569 ± 0.00838 0.33439 ± 0.00623 19.170 ± 0.253 % 80.347 ± 0.280 %
|
| 284 |
+
80 9.1527 ± 0.1927 0.21504 ± 0.00833 0.33505 ± 0.00618 19.132 ± 0.251 % 80.284 ± 0.279 %
|
| 285 |
+
81 9.1154 ± 0.1908 0.21406 ± 0.00825 0.33313 ± 0.00612 19.047 ± 0.249 % 80.368 ± 0.276 %
|
| 286 |
+
82 9.0818 ± 0.1887 0.21328 ± 0.00817 0.33127 ± 0.00605 18.964 ± 0.247 % 80.445 ± 0.274 %
|
| 287 |
+
83 9.0973 ± 0.1876 0.21068 ± 0.00809 0.32907 ± 0.00598 18.881 ± 0.245 % 80.425 ± 0.273 %
|
| 288 |
+
84 9.0973 ± 0.1861 0.20849 ± 0.00801 0.32667 ± 0.00591 18.799 ± 0.243 % 80.434 ± 0.271 %
|
| 289 |
+
85 9.0720 ± 0.1841 0.20625 ± 0.00793 0.32425 ± 0.00585 18.714 ± 0.242 % 80.411 ± 0.270 %
|
| 290 |
+
86 8.9695 ± 0.1804 0.20393 ± 0.00784 0.32195 ± 0.00578 18.637 ± 0.240 % 80.492 ± 0.268 %
|
| 291 |
+
87 8.8812 ± 0.1770 0.20185 ± 0.00777 0.31944 ± 0.00572 18.555 ± 0.238 % 80.559 ± 0.266 %
|
| 292 |
+
88 8.7908 ± 0.1737 0.19995 ± 0.00769 0.31719 ± 0.00566 18.475 ± 0.237 % 80.664 ± 0.264 %
|
| 293 |
+
89 8.6893 ± 0.1702 0.19831 ± 0.00761 0.31493 ± 0.00560 18.408 ± 0.235 % 80.718 ± 0.262 %
|
| 294 |
+
90 8.6059 ± 0.1671 0.19638 ± 0.00754 0.31243 ± 0.00554 18.326 ± 0.234 % 80.802 ± 0.260 %
|
| 295 |
+
91 8.5275 ± 0.1643 0.19432 ± 0.00747 0.31023 ± 0.00549 18.260 ± 0.232 % 80.875 ± 0.258 %
|
| 296 |
+
92 8.4402 ± 0.1612 0.19210 ± 0.00739 0.30835 ± 0.00543 18.196 ± 0.231 % 80.934 ± 0.256 %
|
| 297 |
+
93 8.4462 ± 0.1607 0.19031 ± 0.00736 0.30904 ± 0.00542 18.174 ± 0.229 % 80.923 ± 0.255 %
|
| 298 |
+
94 8.4658 ± 0.1601 0.18829 ± 0.00729 0.30710 ± 0.00537 18.097 ± 0.228 % 80.964 ± 0.254 %
|
| 299 |
+
95 8.5841 ± 0.1618 0.18698 ± 0.00724 0.30600 ± 0.00532 18.024 ± 0.227 % 80.945 ± 0.252 %
|
| 300 |
+
96 8.6772 ± 0.1628 0.18480 ± 0.00718 0.30472 ± 0.00526 17.954 ± 0.225 % 80.854 ± 0.251 %
|
| 301 |
+
97 8.7617 ± 0.1636 0.18384 ± 0.00711 0.30307 ± 0.00521 17.880 ± 0.224 % 80.821 ± 0.250 %
|
| 302 |
+
98 8.9187 ± 0.1663 0.18345 ± 0.00706 0.30132 ± 0.00516 17.799 ± 0.223 % 80.824 ± 0.249 %
|
| 303 |
+
99 9.0505 ± 0.1682 0.18256 ± 0.00700 0.29992 ± 0.00511 17.733 ± 0.222 % 80.800 ± 0.248 %
|
| 304 |
+
100 9.0933 ± 0.1682 0.18239 ± 0.00697 0.29993 ± 0.00508 17.703 ± 0.220 % 80.765 ± 0.247 %
|
| 305 |
+
101 9.1275 ± 0.1682 0.18131 ± 0.00692 0.29925 ± 0.00506 17.680 ± 0.219 % 80.800 ± 0.245 %
|
| 306 |
+
102 9.1956 ± 0.1692 0.18084 ± 0.00689 0.29942 ± 0.00502 17.648 ± 0.218 % 80.827 ± 0.244 %
|
| 307 |
+
103 9.1672 ± 0.1679 0.18113 ± 0.00685 0.29896 ± 0.00500 17.688 ± 0.217 % 80.849 ± 0.243 %
|
| 308 |
+
104 9.1083 ± 0.1658 0.18219 ± 0.00683 0.30018 ± 0.00499 17.787 ± 0.216 % 80.841 ± 0.242 %
|
| 309 |
+
105 8.9915 ± 0.1626 0.18379 ± 0.00683 0.30222 ± 0.00500 17.972 ± 0.216 % 80.874 ± 0.240 %
|
| 310 |
+
106 8.8556 ± 0.1591 0.18586 ± 0.00683 0.30359 ± 0.00502 18.122 ± 0.216 % 80.947 ± 0.239 %
|
| 311 |
+
107 8.9107 ± 0.1592 0.18433 ± 0.00677 0.30161 ± 0.00498 18.047 ± 0.215 % 80.949 ± 0.238 %
|
| 312 |
+
108 8.9226 ± 0.1588 0.18388 ± 0.00672 0.30124 ± 0.00495 18.006 ± 0.214 % 80.908 ± 0.237 %
|
| 313 |
+
109 8.9404 ± 0.1584 0.18316 ± 0.00668 0.30045 ± 0.00491 17.979 ± 0.212 % 80.881 ± 0.236 %
|
| 314 |
+
110 8.9707 ± 0.1582 0.18185 ± 0.00663 0.29914 ± 0.00487 17.925 ± 0.211 % 80.852 ± 0.235 %
|
| 315 |
+
111 9.0198 ± 0.1583 0.18100 ± 0.00659 0.29788 ± 0.00483 17.863 ± 0.210 % 80.862 ± 0.234 %
|
| 316 |
+
112 9.0288 ± 0.1577 0.18065 ± 0.00654 0.29647 ± 0.00479 17.807 ± 0.209 % 80.875 ± 0.233 %
|
| 317 |
+
113 9.0357 ± 0.1569 0.18004 ± 0.00649 0.29483 ± 0.00475 17.743 ± 0.208 % 80.881 ± 0.232 %
|
| 318 |
+
114 9.0475 ± 0.1565 0.17901 ± 0.00644 0.29330 ± 0.00471 17.681 ± 0.207 % 80.918 ± 0.230 %
|
| 319 |
+
115 9.0283 ± 0.1555 0.17923 ± 0.00642 0.29346 ± 0.00469 17.694 ± 0.206 % 80.921 ± 0.229 %
|
| 320 |
+
116 9.0647 ± 0.1554 0.18464 ± 0.00645 0.30003 ± 0.00473 17.959 ± 0.206 % 80.747 ± 0.229 %
|
| 321 |
+
117 9.0722 ± 0.1549 0.19904 ± 0.00660 0.31298 ± 0.00490 18.543 ± 0.207 % 80.523 ± 0.229 %
|
| 322 |
+
118 9.0580 ± 0.1539 0.21033 ± 0.00670 0.32344 ± 0.00501 18.983 ± 0.207 % 80.359 ± 0.229 %
|
| 323 |
+
119 9.0555 ± 0.1531 0.22350 ± 0.00684 0.33496 ± 0.00515 19.522 ± 0.207 % 80.168 ± 0.229 %
|
| 324 |
+
120 9.0447 ± 0.1522 0.23366 ± 0.00694 0.34581 ± 0.00524 19.990 ± 0.207 % 79.944 ± 0.229 %
|
| 325 |
+
121 9.0338 ± 0.1513 0.24409 ± 0.00701 0.35518 ± 0.00533 20.422 ± 0.207 % 79.767 ± 0.229 %
|
| 326 |
+
|
| 327 |
+
====== Perplexity statistics ======
|
| 328 |
+
Mean PPL(Q) : 9.033840 ± 0.151314
|
| 329 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 330 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 90.99%
|
| 331 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.244094 ± 0.007007
|
| 332 |
+
Mean PPL(Q)/PPL(base) : 1.276464 ± 0.008944
|
| 333 |
+
Mean PPL(Q)-PPL(base) : 1.956600 ± 0.066985
|
| 334 |
+
|
| 335 |
+
====== KL divergence statistics ======
|
| 336 |
+
Mean KLD: 0.355179 ± 0.005328
|
| 337 |
+
Maximum KLD: 19.298819
|
| 338 |
+
99.9% KLD: 8.835881
|
| 339 |
+
99.0% KLD: 5.058808
|
| 340 |
+
95.0% KLD: 1.803043
|
| 341 |
+
90.0% KLD: 0.736885
|
| 342 |
+
Median KLD: 0.079771
|
| 343 |
+
10.0% KLD: 0.000532
|
| 344 |
+
5.0% KLD: 0.000084
|
| 345 |
+
1.0% KLD: 0.000004
|
| 346 |
+
0.1% KLD: -0.000000
|
| 347 |
+
Minimum KLD: -0.000003
|
| 348 |
+
|
| 349 |
+
====== Token probability statistics ======
|
| 350 |
+
Mean Δp: -4.850 ± 0.113 %
|
| 351 |
+
Maximum Δp: 99.455%
|
| 352 |
+
99.9% Δp: 76.421%
|
| 353 |
+
99.0% Δp: 32.687%
|
| 354 |
+
95.0% Δp: 12.563%
|
| 355 |
+
90.0% Δp: 6.058%
|
| 356 |
+
75.0% Δp: 0.474%
|
| 357 |
+
Median Δp: -0.041%
|
| 358 |
+
25.0% Δp: -4.003%
|
| 359 |
+
10.0% Δp: -18.567%
|
| 360 |
+
5.0% Δp: -45.571%
|
| 361 |
+
1.0% Δp: -93.948%
|
| 362 |
+
0.1% Δp: -99.583%
|
| 363 |
+
Minimum Δp: -99.957%
|
| 364 |
+
RMS Δp : 20.422 ± 0.207 %
|
| 365 |
+
Same top p: 79.767 ± 0.229 %
|
| 366 |
+
|
| 367 |
+
llama_perf_context_print: load time = 26697.85 ms
|
| 368 |
+
llama_perf_context_print: prompt eval time = 54369.88 ms / 61952 tokens ( 0.88 ms per token, 1139.45 tokens per second)
|
| 369 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 370 |
+
llama_perf_context_print: total time = 65794.86 ms / 61953 tokens
|
| 371 |
+
llama_perf_context_print: graphs reused = 0
|
| 372 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 373 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 909 + (22801 = 21064 + 320 + 1416) + 423 |
|
| 374 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 841 + (22839 = 18993 + 672 + 3174) + 454 |
|
| 375 |
+
llama_memory_breakdown_print: | - Host | 70301 = 70197 + 0 + 104 |
|
| 376 |
+
```
|
kld_data/unsloth/UD-IQ3_XXS/MiniMax-M2.5-UD-IQ3_XXS.md
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-IQ3_XXS (unsloth)
|
| 2 |
+
|
| 3 |
+
86.90 GiB (3.26 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ3_XXS/MiniMax-M2.5-UD-IQ3_XXS-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 45929 used, -22057 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 48288 used, -24416 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 94217 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 48522 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37848 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7627 MiB used, 16244 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1542 MiB used, 22328 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 16 layers ( 1 overflowing), 22645 MiB used, 1226 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 47 layers (36 overflowing), 22822 MiB used, 1049 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.25 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-IQ3_XXS/MiniMax-M2.5-UD-IQ3_XXS-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 23
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 1 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 20 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 89 |
+
llama_model_loader: - type iq3_xxs: 131 tensors
|
| 90 |
+
llama_model_loader: - type iq3_s: 42 tensors
|
| 91 |
+
llama_model_loader: - type iq4_xs: 231 tensors
|
| 92 |
+
print_info: file format = GGUF V3 (latest)
|
| 93 |
+
print_info: file type = IQ3_XXS - 3.0625 bpw
|
| 94 |
+
print_info: file size = 86.90 GiB (3.26 BPW)
|
| 95 |
+
load: 0 unused tokens
|
| 96 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 97 |
+
load: printing all EOG tokens:
|
| 98 |
+
load: - 200004 ('<fim_pad>')
|
| 99 |
+
load: - 200005 ('<reponame>')
|
| 100 |
+
load: - 200020 ('[e~[')
|
| 101 |
+
load: special tokens cache size = 54
|
| 102 |
+
load: token to piece cache size = 1.3355 MB
|
| 103 |
+
print_info: arch = minimax-m2
|
| 104 |
+
print_info: vocab_only = 0
|
| 105 |
+
print_info: no_alloc = 0
|
| 106 |
+
print_info: n_ctx_train = 196608
|
| 107 |
+
print_info: n_embd = 3072
|
| 108 |
+
print_info: n_embd_inp = 3072
|
| 109 |
+
print_info: n_layer = 62
|
| 110 |
+
print_info: n_head = 48
|
| 111 |
+
print_info: n_head_kv = 8
|
| 112 |
+
print_info: n_rot = 64
|
| 113 |
+
print_info: n_swa = 0
|
| 114 |
+
print_info: is_swa_any = 0
|
| 115 |
+
print_info: n_embd_head_k = 128
|
| 116 |
+
print_info: n_embd_head_v = 128
|
| 117 |
+
print_info: n_gqa = 6
|
| 118 |
+
print_info: n_embd_k_gqa = 1024
|
| 119 |
+
print_info: n_embd_v_gqa = 1024
|
| 120 |
+
print_info: f_norm_eps = 0.0e+00
|
| 121 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 122 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 123 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 124 |
+
print_info: f_logit_scale = 0.0e+00
|
| 125 |
+
print_info: f_attn_scale = 0.0e+00
|
| 126 |
+
print_info: n_ff = 1536
|
| 127 |
+
print_info: n_expert = 256
|
| 128 |
+
print_info: n_expert_used = 8
|
| 129 |
+
print_info: n_expert_groups = 0
|
| 130 |
+
print_info: n_group_used = 0
|
| 131 |
+
print_info: causal attn = 1
|
| 132 |
+
print_info: pooling type = 0
|
| 133 |
+
print_info: rope type = 2
|
| 134 |
+
print_info: rope scaling = linear
|
| 135 |
+
print_info: freq_base_train = 5000000.0
|
| 136 |
+
print_info: freq_scale_train = 1
|
| 137 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 138 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 139 |
+
print_info: rope_finetuned = unknown
|
| 140 |
+
print_info: model type = 230B.A10B
|
| 141 |
+
print_info: model params = 228.69 B
|
| 142 |
+
print_info: general.name = Minimax-M2.5
|
| 143 |
+
print_info: vocab type = BPE
|
| 144 |
+
print_info: n_vocab = 200064
|
| 145 |
+
print_info: n_merges = 199744
|
| 146 |
+
print_info: BOS token = 200034 ']~!b['
|
| 147 |
+
print_info: EOS token = 200020 '[e~['
|
| 148 |
+
print_info: UNK token = 200021 ']!d~['
|
| 149 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 150 |
+
print_info: LF token = 10 'Ċ'
|
| 151 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 152 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 153 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 154 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 155 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 156 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 157 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 158 |
+
print_info: EOG token = 200020 '[e~['
|
| 159 |
+
print_info: max token length = 256
|
| 160 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 161 |
+
load_tensors: offloading output layer to GPU
|
| 162 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 163 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 164 |
+
load_tensors: CPU_Mapped model buffer size = 47003.29 MiB
|
| 165 |
+
load_tensors: CPU_Mapped model buffer size = 41497.82 MiB
|
| 166 |
+
load_tensors: CUDA0 model buffer size = 20846.05 MiB
|
| 167 |
+
load_tensors: CUDA1 model buffer size = 18912.20 MiB
|
| 168 |
+
....................................................................................................
|
| 169 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 170 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 171 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 172 |
+
llama_context: constructing llama_context
|
| 173 |
+
llama_context: n_seq_max = 8
|
| 174 |
+
llama_context: n_ctx = 4096
|
| 175 |
+
llama_context: n_ctx_seq = 512
|
| 176 |
+
llama_context: n_batch = 4096
|
| 177 |
+
llama_context: n_ubatch = 4096
|
| 178 |
+
llama_context: causal_attn = 1
|
| 179 |
+
llama_context: flash_attn = enabled
|
| 180 |
+
llama_context: kv_unified = false
|
| 181 |
+
llama_context: freq_base = 5000000.0
|
| 182 |
+
llama_context: freq_scale = 1
|
| 183 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 184 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 185 |
+
llama_kv_cache: CUDA0 KV buffer size = 256.00 MiB
|
| 186 |
+
llama_kv_cache: CUDA1 KV buffer size = 736.00 MiB
|
| 187 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 188 |
+
sched_reserve: reserving ...
|
| 189 |
+
sched_reserve: CUDA0 compute buffer size = 1543.00 MiB
|
| 190 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 191 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 192 |
+
sched_reserve: graph nodes = 4099
|
| 193 |
+
sched_reserve: graph splits = 143 (with bs=4096), 75 (with bs=1)
|
| 194 |
+
sched_reserve: reserve took 23.39 ms, sched copies = 1
|
| 195 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 196 |
+
|
| 197 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 198 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 199 |
+
kl_divergence: 5.18 seconds per pass - ETA 1.30 minutes
|
| 200 |
+
|
| 201 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 202 |
+
1 6.3465 ± 1.1982 -0.00416 ± 0.03616 0.06480 ± 0.00839 8.500 ± 1.200 % 90.588 ± 1.832 %
|
| 203 |
+
2 4.7680 ± 0.5729 0.01972 ± 0.02183 0.04892 ± 0.00480 7.437 ± 0.822 % 92.353 ± 1.178 %
|
| 204 |
+
3 4.5829 ± 0.4500 0.02085 ± 0.01933 0.06209 ± 0.00534 8.974 ± 0.815 % 91.765 ± 0.995 %
|
| 205 |
+
4 5.1655 ± 0.4546 0.00722 ± 0.01642 0.06503 ± 0.00498 8.667 ± 0.685 % 91.275 ± 0.884 %
|
| 206 |
+
5 5.0436 ± 0.4029 0.02991 ± 0.01726 0.07029 ± 0.00578 8.846 ± 0.682 % 91.373 ± 0.787 %
|
| 207 |
+
6 6.2039 ± 0.4882 0.03400 ± 0.01614 0.08098 ± 0.00533 8.520 ± 0.596 % 89.869 ± 0.772 %
|
| 208 |
+
7 5.8471 ± 0.4140 0.04660 ± 0.01523 0.09566 ± 0.00583 9.092 ± 0.528 % 88.908 ± 0.744 %
|
| 209 |
+
8 6.5530 ± 0.4384 0.03707 ± 0.01383 0.09222 ± 0.00516 8.719 ± 0.483 % 88.725 ± 0.700 %
|
| 210 |
+
9 6.4293 ± 0.4008 0.03729 ± 0.01270 0.08903 ± 0.00474 8.463 ± 0.450 % 88.932 ± 0.655 %
|
| 211 |
+
10 5.8784 ± 0.3416 0.03677 ± 0.01165 0.08420 ± 0.00430 8.360 ± 0.415 % 89.255 ± 0.613 %
|
| 212 |
+
11 6.4172 ± 0.3600 0.03264 ± 0.01093 0.08427 ± 0.00402 8.193 ± 0.388 % 88.913 ± 0.593 %
|
| 213 |
+
12 7.0766 ± 0.3845 0.02896 ± 0.01032 0.08394 ± 0.00376 7.961 ± 0.367 % 88.856 ± 0.569 %
|
| 214 |
+
13 7.3320 ± 0.3791 0.02714 ± 0.00972 0.08154 ± 0.00353 7.756 ± 0.349 % 88.959 ± 0.544 %
|
| 215 |
+
14 7.9243 ± 0.3991 0.02959 ± 0.00954 0.08379 ± 0.00372 7.845 ± 0.363 % 88.711 ± 0.530 %
|
| 216 |
+
15 8.2674 ± 0.4020 0.02585 ± 0.00906 0.08268 ± 0.00349 7.797 ± 0.344 % 88.680 ± 0.512 %
|
| 217 |
+
16 8.5247 ± 0.4012 0.02429 ± 0.00860 0.08046 ± 0.00329 7.726 ± 0.328 % 88.750 ± 0.495 %
|
| 218 |
+
17 8.8210 ± 0.4062 0.03135 ± 0.00869 0.08542 ± 0.00341 7.781 ± 0.331 % 88.512 ± 0.484 %
|
| 219 |
+
18 8.3458 ± 0.3717 0.03520 ± 0.00850 0.08770 ± 0.00345 7.952 ± 0.337 % 88.562 ± 0.470 %
|
| 220 |
+
19 8.4204 ± 0.3648 0.02829 ± 0.00836 0.08969 ± 0.00351 8.180 ± 0.333 % 88.545 ± 0.458 %
|
| 221 |
+
20 8.4728 ± 0.3575 0.02662 ± 0.00825 0.09205 ± 0.00343 8.333 ± 0.324 % 88.471 ± 0.447 %
|
| 222 |
+
21 8.4505 ± 0.3480 0.02656 ± 0.00798 0.09086 ± 0.00329 8.270 ± 0.313 % 88.609 ± 0.434 %
|
| 223 |
+
22 8.7806 ± 0.3567 0.02720 ± 0.00777 0.09130 ± 0.00317 8.239 ± 0.303 % 88.360 ± 0.428 %
|
| 224 |
+
23 8.8221 ± 0.3520 0.03071 ± 0.00787 0.09585 ± 0.00328 8.604 ± 0.307 % 88.218 ± 0.421 %
|
| 225 |
+
24 9.2346 ± 0.3627 0.03034 ± 0.00765 0.09519 ± 0.00316 8.491 ± 0.299 % 88.219 ± 0.412 %
|
| 226 |
+
25 9.2512 ± 0.3570 0.03353 ± 0.00760 0.09773 ± 0.00314 8.644 ± 0.294 % 88.094 ± 0.406 %
|
| 227 |
+
26 8.8469 ± 0.3314 0.05745 ± 0.00838 0.12183 ± 0.00458 11.347 ± 0.387 % 87.677 ± 0.404 %
|
| 228 |
+
27 8.5919 ± 0.3139 0.08213 ± 0.00907 0.14578 ± 0.00555 13.206 ± 0.403 % 87.204 ± 0.403 %
|
| 229 |
+
28 8.7150 ± 0.3134 0.08268 ± 0.00885 0.14470 ± 0.00537 13.154 ± 0.394 % 87.101 ± 0.397 %
|
| 230 |
+
29 8.6111 ± 0.3041 0.08016 ± 0.00866 0.14369 ± 0.00521 13.023 ± 0.385 % 87.221 ± 0.388 %
|
| 231 |
+
30 8.0621 ± 0.2778 0.08148 ± 0.00864 0.14183 ± 0.00519 12.977 ± 0.380 % 87.569 ± 0.377 %
|
| 232 |
+
31 7.5926 ± 0.2554 0.08085 ± 0.00844 0.13888 ± 0.00505 12.842 ± 0.373 % 87.932 ± 0.366 %
|
| 233 |
+
32 7.3742 ± 0.2420 0.07690 ± 0.00822 0.13602 ± 0.00490 12.715 ± 0.365 % 87.953 ± 0.360 %
|
| 234 |
+
33 7.2125 ± 0.2315 0.07463 ± 0.00802 0.13375 ± 0.00476 12.607 ± 0.358 % 88.010 ± 0.354 %
|
| 235 |
+
34 7.4018 ± 0.2353 0.07351 ± 0.00790 0.13540 ± 0.00466 12.520 ± 0.350 % 87.809 ± 0.351 %
|
| 236 |
+
35 7.5286 ± 0.2376 0.07584 ± 0.00789 0.14033 ± 0.00465 12.720 ± 0.343 % 87.574 ± 0.349 %
|
| 237 |
+
36 7.5827 ± 0.2367 0.07414 ± 0.00775 0.13920 ± 0.00454 12.665 ± 0.337 % 87.516 ± 0.345 %
|
| 238 |
+
37 7.6327 ± 0.2354 0.07842 ± 0.00776 0.14202 ± 0.00456 12.883 ± 0.333 % 87.419 ± 0.341 %
|
| 239 |
+
38 7.8477 ± 0.2397 0.07647 ± 0.00760 0.14045 ± 0.00445 12.765 ± 0.328 % 87.399 ± 0.337 %
|
| 240 |
+
39 7.8194 ± 0.2352 0.07980 ± 0.00755 0.14579 ± 0.00458 13.156 ± 0.329 % 87.280 ± 0.334 %
|
| 241 |
+
40 7.7063 ± 0.2276 0.10014 ± 0.00794 0.16692 ± 0.00517 14.414 ± 0.331 % 86.765 ± 0.336 %
|
| 242 |
+
41 7.5873 ± 0.2202 0.11541 ± 0.00823 0.18278 ± 0.00550 15.294 ± 0.330 % 86.399 ± 0.335 %
|
| 243 |
+
42 7.4452 ± 0.2124 0.12782 ± 0.00840 0.19599 ± 0.00570 16.114 ± 0.330 % 86.172 ± 0.334 %
|
| 244 |
+
43 7.2841 ± 0.2044 0.13709 ± 0.00858 0.20716 ± 0.00590 16.675 ± 0.327 % 85.974 ± 0.332 %
|
| 245 |
+
44 7.2109 ± 0.1991 0.13332 ± 0.00842 0.20374 ± 0.00577 16.511 ± 0.323 % 86.114 ± 0.326 %
|
| 246 |
+
45 7.3628 ± 0.2022 0.13227 ± 0.00828 0.20173 ± 0.00565 16.364 ± 0.319 % 86.074 ± 0.323 %
|
| 247 |
+
46 7.5049 ± 0.2041 0.12931 ± 0.00813 0.19921 ± 0.00553 16.208 ± 0.315 % 86.130 ± 0.319 %
|
| 248 |
+
47 7.6592 ± 0.2066 0.12745 ± 0.00798 0.19618 ± 0.00542 16.054 ± 0.311 % 86.149 ± 0.316 %
|
| 249 |
+
48 7.5121 ± 0.1995 0.12471 ± 0.00783 0.19321 ± 0.00531 15.921 ± 0.308 % 86.266 ± 0.311 %
|
| 250 |
+
49 7.6050 ± 0.1996 0.12198 ± 0.00778 0.19387 ± 0.00526 15.831 ± 0.304 % 86.098 ± 0.310 %
|
| 251 |
+
50 7.6924 ± 0.2006 0.11926 ± 0.00766 0.19228 ± 0.00517 15.728 ± 0.300 % 86.086 ± 0.307 %
|
| 252 |
+
51 7.7978 ± 0.2015 0.11747 ± 0.00753 0.18992 ± 0.00507 15.601 ± 0.297 % 86.136 ± 0.303 %
|
| 253 |
+
52 7.8520 ± 0.2005 0.11509 ± 0.00746 0.18952 ± 0.00499 15.515 ± 0.293 % 86.063 ± 0.301 %
|
| 254 |
+
53 7.9499 ± 0.2009 0.11169 ± 0.00735 0.18740 ± 0.00490 15.401 ± 0.290 % 86.045 ± 0.298 %
|
| 255 |
+
54 7.9975 ± 0.1997 0.10988 ± 0.00723 0.18514 ± 0.00481 15.279 ± 0.287 % 86.042 ± 0.295 %
|
| 256 |
+
55 8.0431 ± 0.1987 0.10854 ± 0.00711 0.18275 ± 0.00472 15.156 ± 0.284 % 86.018 ± 0.293 %
|
| 257 |
+
56 8.0702 ± 0.1975 0.10617 ± 0.00700 0.18065 ± 0.00464 15.046 ± 0.282 % 86.015 ± 0.290 %
|
| 258 |
+
57 8.0724 ± 0.1959 0.10651 ± 0.00694 0.18118 ± 0.00460 15.044 ± 0.278 % 85.972 ± 0.288 %
|
| 259 |
+
58 8.0856 ± 0.1947 0.10676 ± 0.00687 0.17970 ± 0.00452 14.950 ± 0.275 % 85.991 ± 0.285 %
|
| 260 |
+
59 8.0266 ± 0.1912 0.10457 ± 0.00677 0.17713 ± 0.00445 14.838 ± 0.273 % 86.162 ± 0.282 %
|
| 261 |
+
60 8.0341 ± 0.1899 0.10402 ± 0.00668 0.17567 ± 0.00438 14.759 ± 0.270 % 86.144 ± 0.279 %
|
| 262 |
+
61 8.0736 ± 0.1892 0.10229 ± 0.00660 0.17410 ± 0.00431 14.668 ± 0.267 % 86.133 ± 0.277 %
|
| 263 |
+
62 8.0423 ± 0.1873 0.10167 ± 0.00654 0.17281 ± 0.00425 14.618 ± 0.265 % 86.199 ± 0.274 %
|
| 264 |
+
63 8.0934 ± 0.1876 0.10168 ± 0.00649 0.17222 ± 0.00420 14.565 ± 0.263 % 86.187 ± 0.272 %
|
| 265 |
+
64 8.0745 ± 0.1853 0.10193 ± 0.00642 0.17132 ± 0.00414 14.492 ± 0.261 % 86.158 ± 0.270 %
|
| 266 |
+
65 8.0455 ± 0.1831 0.09949 ± 0.00635 0.17015 ± 0.00408 14.424 ± 0.258 % 86.226 ± 0.268 %
|
| 267 |
+
66 8.0760 ± 0.1825 0.09828 ± 0.00628 0.16913 ± 0.00403 14.358 ± 0.256 % 86.239 ± 0.266 %
|
| 268 |
+
67 8.0806 ± 0.1814 0.09791 ± 0.00621 0.16806 ± 0.00397 14.278 ± 0.253 % 86.263 ± 0.263 %
|
| 269 |
+
68 8.0216 ± 0.1785 0.09654 ± 0.00613 0.16626 ± 0.00391 14.186 ± 0.251 % 86.384 ± 0.260 %
|
| 270 |
+
69 8.0484 ± 0.1778 0.09543 ± 0.00606 0.16504 ± 0.00386 14.113 ± 0.249 % 86.417 ± 0.258 %
|
| 271 |
+
70 8.0123 ± 0.1755 0.09472 ± 0.00600 0.16448 ± 0.00382 14.041 ± 0.247 % 86.437 ± 0.256 %
|
| 272 |
+
71 7.9849 ± 0.1736 0.09328 ± 0.00593 0.16329 ± 0.00377 13.981 ± 0.245 % 86.490 ± 0.254 %
|
| 273 |
+
72 8.0031 ± 0.1731 0.09339 ± 0.00590 0.16213 ± 0.00372 13.906 ± 0.243 % 86.509 ± 0.252 %
|
| 274 |
+
73 7.9977 ± 0.1716 0.09197 ± 0.00583 0.16114 ± 0.00367 13.839 ± 0.241 % 86.527 ± 0.250 %
|
| 275 |
+
74 7.9769 ± 0.1698 0.09020 ± 0.00577 0.16046 ± 0.00362 13.778 ± 0.239 % 86.460 ± 0.249 %
|
| 276 |
+
75 7.9770 ± 0.1687 0.08929 ± 0.00572 0.15988 ± 0.00358 13.738 ± 0.236 % 86.473 ± 0.247 %
|
| 277 |
+
76 8.0364 ± 0.1689 0.08823 ± 0.00568 0.15940 ± 0.00354 13.697 ± 0.235 % 86.429 ± 0.246 %
|
| 278 |
+
77 8.0216 ± 0.1674 0.08665 ± 0.00562 0.15836 ± 0.00350 13.633 ± 0.233 % 86.453 ± 0.244 %
|
| 279 |
+
78 8.0287 ± 0.1666 0.08572 ± 0.00557 0.15761 ± 0.00346 13.577 ± 0.231 % 86.456 ± 0.243 %
|
| 280 |
+
79 8.0342 ± 0.1656 0.08494 ± 0.00552 0.15692 ± 0.00342 13.518 ± 0.229 % 86.463 ± 0.241 %
|
| 281 |
+
80 8.0329 ± 0.1651 0.08453 ± 0.00552 0.15760 ± 0.00341 13.496 ± 0.227 % 86.446 ± 0.240 %
|
| 282 |
+
81 8.0062 ± 0.1636 0.08430 ± 0.00546 0.15686 ± 0.00337 13.443 ± 0.225 % 86.478 ± 0.238 %
|
| 283 |
+
82 7.9856 ± 0.1620 0.08466 ± 0.00542 0.15603 ± 0.00333 13.386 ± 0.224 % 86.480 ± 0.236 %
|
| 284 |
+
83 8.0165 ± 0.1614 0.08419 ± 0.00536 0.15489 ± 0.00330 13.328 ± 0.222 % 86.473 ± 0.235 %
|
| 285 |
+
84 8.0292 ± 0.1604 0.08359 ± 0.00531 0.15367 ± 0.00326 13.264 ± 0.221 % 86.466 ± 0.234 %
|
| 286 |
+
85 8.0189 ± 0.1590 0.08287 ± 0.00525 0.15247 ± 0.00322 13.201 ± 0.219 % 86.501 ± 0.232 %
|
| 287 |
+
86 7.9420 ± 0.1560 0.08228 ± 0.00520 0.15156 ± 0.00319 13.158 ± 0.217 % 86.521 ± 0.231 %
|
| 288 |
+
87 7.8753 ± 0.1534 0.08164 ± 0.00516 0.15066 ± 0.00315 13.112 ± 0.216 % 86.558 ± 0.229 %
|
| 289 |
+
88 7.8048 ± 0.1507 0.08099 ± 0.00510 0.14974 ± 0.00312 13.064 ± 0.214 % 86.618 ± 0.227 %
|
| 290 |
+
89 7.7226 ± 0.1478 0.08037 ± 0.00505 0.14856 ± 0.00309 13.009 ± 0.213 % 86.667 ± 0.226 %
|
| 291 |
+
90 7.6536 ± 0.1453 0.07911 ± 0.00500 0.14740 ± 0.00306 12.950 ± 0.212 % 86.723 ± 0.224 %
|
| 292 |
+
91 7.5938 ± 0.1430 0.07836 ± 0.00496 0.14636 ± 0.00303 12.900 ± 0.210 % 86.796 ± 0.222 %
|
| 293 |
+
92 7.5295 ± 0.1406 0.07792 ± 0.00491 0.14552 ± 0.00300 12.856 ± 0.209 % 86.829 ± 0.221 %
|
| 294 |
+
93 7.5450 ± 0.1404 0.07747 ± 0.00489 0.14562 ± 0.00298 12.848 ± 0.208 % 86.818 ± 0.220 %
|
| 295 |
+
94 7.5687 ± 0.1400 0.07628 ± 0.00485 0.14468 ± 0.00295 12.796 ± 0.206 % 86.850 ± 0.218 %
|
| 296 |
+
95 7.6800 ± 0.1417 0.07568 ± 0.00481 0.14424 ± 0.00292 12.748 ± 0.205 % 86.811 ± 0.217 %
|
| 297 |
+
96 7.7731 ± 0.1428 0.07477 ± 0.00477 0.14370 ± 0.00289 12.700 ± 0.204 % 86.744 ± 0.217 %
|
| 298 |
+
97 7.8524 ± 0.1436 0.07426 ± 0.00473 0.14295 ± 0.00286 12.646 ± 0.202 % 86.695 ± 0.216 %
|
| 299 |
+
98 7.9973 ± 0.1461 0.07440 ± 0.00470 0.14215 ± 0.00283 12.591 ± 0.201 % 86.687 ± 0.215 %
|
| 300 |
+
99 8.1220 ± 0.1479 0.07431 ± 0.00466 0.14164 ± 0.00281 12.544 ± 0.200 % 86.635 ± 0.214 %
|
| 301 |
+
100 8.1609 ± 0.1480 0.07421 ± 0.00463 0.14142 ± 0.00278 12.512 ± 0.199 % 86.604 ± 0.213 %
|
| 302 |
+
101 8.1959 ± 0.1480 0.07365 ± 0.00459 0.14101 ± 0.00276 12.482 ± 0.198 % 86.605 ± 0.212 %
|
| 303 |
+
102 8.2631 ± 0.1491 0.07391 ± 0.00457 0.14104 ± 0.00274 12.473 ± 0.197 % 86.571 ± 0.211 %
|
| 304 |
+
103 8.2373 ± 0.1481 0.07417 ± 0.00454 0.14038 ± 0.00272 12.452 ± 0.195 % 86.625 ± 0.210 %
|
| 305 |
+
104 8.1787 ± 0.1461 0.07453 ± 0.00453 0.14102 ± 0.00271 12.535 ± 0.195 % 86.640 ± 0.209 %
|
| 306 |
+
105 8.0591 ± 0.1430 0.07431 ± 0.00452 0.14107 ± 0.00270 12.575 ± 0.194 % 86.715 ± 0.207 %
|
| 307 |
+
106 7.9221 ± 0.1396 0.07447 ± 0.00450 0.14061 ± 0.00268 12.581 ± 0.193 % 86.804 ± 0.206 %
|
| 308 |
+
107 7.9779 ± 0.1399 0.07375 ± 0.00446 0.13969 ± 0.00266 12.530 ± 0.192 % 86.817 ± 0.205 %
|
| 309 |
+
108 7.9875 ± 0.1394 0.07318 ± 0.00443 0.13938 ± 0.00264 12.504 ± 0.191 % 86.819 ± 0.204 %
|
| 310 |
+
109 8.0093 ± 0.1392 0.07318 ± 0.00440 0.13897 ± 0.00261 12.474 ± 0.190 % 86.807 ± 0.203 %
|
| 311 |
+
110 8.0445 ± 0.1392 0.07288 ± 0.00437 0.13852 ± 0.00260 12.447 ± 0.189 % 86.788 ± 0.202 %
|
| 312 |
+
111 8.0900 ± 0.1393 0.07221 ± 0.00434 0.13799 ± 0.00258 12.403 ± 0.188 % 86.773 ± 0.201 %
|
| 313 |
+
112 8.0962 ± 0.1387 0.07162 ± 0.00431 0.13727 ± 0.00255 12.363 ± 0.187 % 86.789 ± 0.200 %
|
| 314 |
+
113 8.1047 ± 0.1380 0.07130 ± 0.00428 0.13658 ± 0.00253 12.320 ± 0.186 % 86.799 ± 0.199 %
|
| 315 |
+
114 8.1183 ± 0.1377 0.07065 ± 0.00425 0.13587 ± 0.00251 12.276 ± 0.185 % 86.842 ± 0.198 %
|
| 316 |
+
115 8.0992 ± 0.1367 0.07063 ± 0.00424 0.13615 ± 0.00250 12.298 ± 0.184 % 86.864 ± 0.197 %
|
| 317 |
+
116 8.1173 ± 0.1366 0.07425 ± 0.00427 0.13954 ± 0.00252 12.496 ± 0.184 % 86.711 ± 0.197 %
|
| 318 |
+
117 8.0484 ± 0.1346 0.07930 ± 0.00434 0.14459 ± 0.00262 12.818 ± 0.185 % 86.660 ± 0.197 %
|
| 319 |
+
118 7.9822 ± 0.1326 0.08390 ± 0.00438 0.14912 ± 0.00267 13.137 ± 0.186 % 86.567 ± 0.197 %
|
| 320 |
+
119 7.9128 ± 0.1307 0.08861 ± 0.00444 0.15433 ± 0.00276 13.526 ± 0.187 % 86.469 ± 0.196 %
|
| 321 |
+
120 7.8636 ± 0.1291 0.09372 ± 0.00451 0.15948 ± 0.00284 13.849 ± 0.187 % 86.363 ± 0.196 %
|
| 322 |
+
121 7.8049 ± 0.1274 0.09787 ± 0.00453 0.16373 ± 0.00288 14.174 ± 0.188 % 86.271 ± 0.196 %
|
| 323 |
+
|
| 324 |
+
====== Perplexity statistics ======
|
| 325 |
+
Mean PPL(Q) : 7.804892 ± 0.127436
|
| 326 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 327 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 96.11%
|
| 328 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.097867 ± 0.004533
|
| 329 |
+
Mean PPL(Q)/PPL(base) : 1.102816 ± 0.004999
|
| 330 |
+
Mean PPL(Q)-PPL(base) : 0.727652 ± 0.036143
|
| 331 |
+
|
| 332 |
+
====== KL divergence statistics ======
|
| 333 |
+
Mean KLD: 0.163725 ± 0.002878
|
| 334 |
+
Maximum KLD: 9.271864
|
| 335 |
+
99.9% KLD: 5.711734
|
| 336 |
+
99.0% KLD: 2.694207
|
| 337 |
+
95.0% KLD: 0.687143
|
| 338 |
+
90.0% KLD: 0.299838
|
| 339 |
+
Median KLD: 0.034524
|
| 340 |
+
10.0% KLD: 0.000195
|
| 341 |
+
5.0% KLD: 0.000030
|
| 342 |
+
1.0% KLD: 0.000002
|
| 343 |
+
0.1% KLD: -0.000001
|
| 344 |
+
Minimum KLD: -0.000005
|
| 345 |
+
|
| 346 |
+
====== Token probability statistics ======
|
| 347 |
+
Mean Δp: -2.365 ± 0.080 %
|
| 348 |
+
Maximum Δp: 94.447%
|
| 349 |
+
99.9% Δp: 74.181%
|
| 350 |
+
99.0% Δp: 27.025%
|
| 351 |
+
95.0% Δp: 9.443%
|
| 352 |
+
90.0% Δp: 4.425%
|
| 353 |
+
75.0% Δp: 0.422%
|
| 354 |
+
Median Δp: -0.011%
|
| 355 |
+
25.0% Δp: -2.025%
|
| 356 |
+
10.0% Δp: -9.900%
|
| 357 |
+
5.0% Δp: -20.650%
|
| 358 |
+
1.0% Δp: -74.846%
|
| 359 |
+
0.1% Δp: -97.304%
|
| 360 |
+
Minimum Δp: -99.748%
|
| 361 |
+
RMS Δp : 14.174 ± 0.188 %
|
| 362 |
+
Same top p: 86.271 ± 0.196 %
|
| 363 |
+
|
| 364 |
+
llama_perf_context_print: load time = 33173.38 ms
|
| 365 |
+
llama_perf_context_print: prompt eval time = 65346.89 ms / 61952 tokens ( 1.05 ms per token, 948.05 tokens per second)
|
| 366 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 367 |
+
llama_perf_context_print: total time = 77968.92 ms / 61953 tokens
|
| 368 |
+
llama_perf_context_print: graphs reused = 0
|
| 369 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 370 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1067 + (22645 = 20846 + 256 + 1542) + 422 |
|
| 371 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 857 + (22822 = 18912 + 736 + 3174) + 455 |
|
| 372 |
+
llama_memory_breakdown_print: | - Host | 88605 = 88501 + 0 + 104 |
|
| 373 |
+
```
|
kld_data/unsloth/UD-Q2_K_XL/MiniMax-M2.5-UD-Q2_K_XL.md
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-Q2_K_XL (unsloth)
|
| 2 |
+
|
| 3 |
+
80.01 GiB (3.01 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q2_K_XL/MiniMax-M2.5-UD-Q2_K_XL-00001-of-00003.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 43186 used, -19314 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 43976 used, -20104 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 87162 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 41467 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37865 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7619 MiB used, 16252 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1461 MiB used, 22409 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 17 layers ( 1 overflowing), 22687 MiB used, 1184 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 46 layers (34 overflowing), 22561 MiB used, 1310 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 3.92 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 2 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q2_K_XL/MiniMax-M2.5-UD-Q2_K_XL-00001-of-00003.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 10
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 3
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q2_K: 124 tensors
|
| 87 |
+
llama_model_loader: - type q3_K: 50 tensors
|
| 88 |
+
llama_model_loader: - type q4_K: 235 tensors
|
| 89 |
+
llama_model_loader: - type q5_K: 16 tensors
|
| 90 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 91 |
+
print_info: file format = GGUF V3 (latest)
|
| 92 |
+
print_info: file type = Q2_K - Medium
|
| 93 |
+
print_info: file size = 80.01 GiB (3.01 BPW)
|
| 94 |
+
load: 0 unused tokens
|
| 95 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 96 |
+
load: printing all EOG tokens:
|
| 97 |
+
load: - 200004 ('<fim_pad>')
|
| 98 |
+
load: - 200005 ('<reponame>')
|
| 99 |
+
load: - 200020 ('[e~[')
|
| 100 |
+
load: special tokens cache size = 54
|
| 101 |
+
load: token to piece cache size = 1.3355 MB
|
| 102 |
+
print_info: arch = minimax-m2
|
| 103 |
+
print_info: vocab_only = 0
|
| 104 |
+
print_info: no_alloc = 0
|
| 105 |
+
print_info: n_ctx_train = 196608
|
| 106 |
+
print_info: n_embd = 3072
|
| 107 |
+
print_info: n_embd_inp = 3072
|
| 108 |
+
print_info: n_layer = 62
|
| 109 |
+
print_info: n_head = 48
|
| 110 |
+
print_info: n_head_kv = 8
|
| 111 |
+
print_info: n_rot = 64
|
| 112 |
+
print_info: n_swa = 0
|
| 113 |
+
print_info: is_swa_any = 0
|
| 114 |
+
print_info: n_embd_head_k = 128
|
| 115 |
+
print_info: n_embd_head_v = 128
|
| 116 |
+
print_info: n_gqa = 6
|
| 117 |
+
print_info: n_embd_k_gqa = 1024
|
| 118 |
+
print_info: n_embd_v_gqa = 1024
|
| 119 |
+
print_info: f_norm_eps = 0.0e+00
|
| 120 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 121 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 122 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 123 |
+
print_info: f_logit_scale = 0.0e+00
|
| 124 |
+
print_info: f_attn_scale = 0.0e+00
|
| 125 |
+
print_info: n_ff = 1536
|
| 126 |
+
print_info: n_expert = 256
|
| 127 |
+
print_info: n_expert_used = 8
|
| 128 |
+
print_info: n_expert_groups = 0
|
| 129 |
+
print_info: n_group_used = 0
|
| 130 |
+
print_info: causal attn = 1
|
| 131 |
+
print_info: pooling type = 0
|
| 132 |
+
print_info: rope type = 2
|
| 133 |
+
print_info: rope scaling = linear
|
| 134 |
+
print_info: freq_base_train = 5000000.0
|
| 135 |
+
print_info: freq_scale_train = 1
|
| 136 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 137 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 138 |
+
print_info: rope_finetuned = unknown
|
| 139 |
+
print_info: model type = 230B.A10B
|
| 140 |
+
print_info: model params = 228.69 B
|
| 141 |
+
print_info: general.name = Minimax-M2.5
|
| 142 |
+
print_info: vocab type = BPE
|
| 143 |
+
print_info: n_vocab = 200064
|
| 144 |
+
print_info: n_merges = 199744
|
| 145 |
+
print_info: BOS token = 200034 ']~!b['
|
| 146 |
+
print_info: EOS token = 200020 '[e~['
|
| 147 |
+
print_info: UNK token = 200021 ']!d~['
|
| 148 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 149 |
+
print_info: LF token = 10 'Ċ'
|
| 150 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 151 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 152 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 153 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 154 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 155 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 156 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 157 |
+
print_info: EOG token = 200020 '[e~['
|
| 158 |
+
print_info: max token length = 256
|
| 159 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 160 |
+
load_tensors: offloading output layer to GPU
|
| 161 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 162 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 47118.90 MiB
|
| 164 |
+
load_tensors: CPU_Mapped model buffer size = 34306.49 MiB
|
| 165 |
+
load_tensors: CUDA0 model buffer size = 20953.35 MiB
|
| 166 |
+
load_tensors: CUDA1 model buffer size = 18667.83 MiB
|
| 167 |
+
....................................................................................................
|
| 168 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 169 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 170 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 171 |
+
llama_context: constructing llama_context
|
| 172 |
+
llama_context: n_seq_max = 8
|
| 173 |
+
llama_context: n_ctx = 4096
|
| 174 |
+
llama_context: n_ctx_seq = 512
|
| 175 |
+
llama_context: n_batch = 4096
|
| 176 |
+
llama_context: n_ubatch = 4096
|
| 177 |
+
llama_context: causal_attn = 1
|
| 178 |
+
llama_context: flash_attn = enabled
|
| 179 |
+
llama_context: kv_unified = false
|
| 180 |
+
llama_context: freq_base = 5000000.0
|
| 181 |
+
llama_context: freq_scale = 1
|
| 182 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 183 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 184 |
+
llama_kv_cache: CUDA0 KV buffer size = 272.00 MiB
|
| 185 |
+
llama_kv_cache: CUDA1 KV buffer size = 720.00 MiB
|
| 186 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 187 |
+
sched_reserve: reserving ...
|
| 188 |
+
sched_reserve: CUDA0 compute buffer size = 1462.00 MiB
|
| 189 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 190 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 191 |
+
sched_reserve: graph nodes = 4099
|
| 192 |
+
sched_reserve: graph splits = 139 (with bs=4096), 75 (with bs=1)
|
| 193 |
+
sched_reserve: reserve took 22.74 ms, sched copies = 1
|
| 194 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 195 |
+
|
| 196 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 197 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 198 |
+
kl_divergence: 5.52 seconds per pass - ETA 1.38 minutes
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
1 6.8329 ± 1.3600 0.06968 ± 0.03479 0.10747 ± 0.01680 10.288 ± 1.852 % 89.804 ± 1.899 %
|
| 202 |
+
2 5.0387 ± 0.6365 0.07493 ± 0.02296 0.07914 ± 0.00911 8.408 ± 1.172 % 92.157 ± 1.192 %
|
| 203 |
+
3 4.6835 ± 0.4720 0.04256 ± 0.02129 0.09611 ± 0.00848 9.842 ± 0.910 % 91.765 ± 0.995 %
|
| 204 |
+
4 5.3595 ± 0.4867 0.04410 ± 0.01893 0.10031 ± 0.00719 9.889 ± 0.747 % 90.588 ± 0.915 %
|
| 205 |
+
5 5.1862 ± 0.4233 0.05777 ± 0.02130 0.10783 ± 0.00802 10.266 ± 0.745 % 90.588 ± 0.818 %
|
| 206 |
+
6 6.3232 ± 0.5031 0.05304 ± 0.02053 0.12014 ± 0.00742 10.150 ± 0.676 % 88.889 ± 0.804 %
|
| 207 |
+
7 5.8953 ± 0.4184 0.05481 ± 0.01946 0.14055 ± 0.00829 11.593 ± 0.657 % 88.347 ± 0.760 %
|
| 208 |
+
8 6.6420 ± 0.4471 0.05055 ± 0.01760 0.13580 ± 0.00734 11.083 ± 0.603 % 87.990 ± 0.720 %
|
| 209 |
+
9 6.5106 ± 0.4088 0.04986 ± 0.01594 0.13036 ± 0.00668 10.702 ± 0.562 % 88.061 ± 0.677 %
|
| 210 |
+
10 5.9543 ± 0.3483 0.04960 ± 0.01462 0.12452 ± 0.00609 10.727 ± 0.516 % 88.235 ± 0.638 %
|
| 211 |
+
11 6.5077 ± 0.3678 0.04666 ± 0.01376 0.12550 ± 0.00574 10.718 ± 0.483 % 87.701 ± 0.620 %
|
| 212 |
+
12 7.2062 ± 0.3951 0.04711 ± 0.01298 0.12534 ± 0.00540 10.437 ± 0.457 % 87.386 ± 0.600 %
|
| 213 |
+
13 7.4469 ± 0.3883 0.04269 ± 0.01223 0.12147 ± 0.00502 10.244 ± 0.433 % 87.210 ± 0.580 %
|
| 214 |
+
14 8.0130 ± 0.4059 0.04071 ± 0.01207 0.12506 ± 0.00504 10.240 ± 0.421 % 86.695 ± 0.569 %
|
| 215 |
+
15 8.3782 ± 0.4107 0.03916 ± 0.01143 0.12342 ± 0.00473 10.109 ± 0.401 % 86.379 ± 0.555 %
|
| 216 |
+
16 8.6321 ± 0.4090 0.03681 ± 0.01089 0.12024 ± 0.00446 9.954 ± 0.383 % 86.520 ± 0.535 %
|
| 217 |
+
17 8.8848 ± 0.4122 0.03855 ± 0.01104 0.12548 ± 0.00449 9.904 ± 0.370 % 86.205 ± 0.524 %
|
| 218 |
+
18 8.3720 ± 0.3751 0.03833 ± 0.01076 0.12814 ± 0.00461 9.945 ± 0.364 % 86.296 ± 0.508 %
|
| 219 |
+
19 8.5024 ± 0.3700 0.03798 ± 0.01046 0.12503 ± 0.00438 9.850 ± 0.350 % 86.419 ± 0.492 %
|
| 220 |
+
20 8.5854 ± 0.3646 0.03982 ± 0.01032 0.12935 ± 0.00437 9.905 ± 0.338 % 86.137 ± 0.484 %
|
| 221 |
+
21 8.5459 ± 0.3536 0.03778 ± 0.00998 0.12876 ± 0.00426 9.942 ± 0.329 % 86.181 ± 0.472 %
|
| 222 |
+
22 8.9163 ± 0.3644 0.04254 ± 0.00979 0.13000 ± 0.00415 10.024 ± 0.325 % 85.865 ± 0.465 %
|
| 223 |
+
23 8.9871 ± 0.3614 0.04924 ± 0.01000 0.13872 ± 0.00459 10.547 ± 0.344 % 85.610 ± 0.458 %
|
| 224 |
+
24 9.3979 ± 0.3717 0.04787 ± 0.00968 0.13762 ± 0.00441 10.421 ± 0.335 % 85.474 ± 0.450 %
|
| 225 |
+
25 9.3705 ± 0.3636 0.04634 ± 0.00961 0.14235 ± 0.00452 10.646 ± 0.330 % 85.302 ± 0.444 %
|
| 226 |
+
26 9.0651 ± 0.3417 0.08181 ± 0.01051 0.17490 ± 0.00597 13.431 ± 0.396 % 84.615 ± 0.443 %
|
| 227 |
+
27 8.8436 ± 0.3252 0.11101 ± 0.01128 0.20684 ± 0.00707 15.360 ± 0.413 % 84.096 ± 0.441 %
|
| 228 |
+
28 8.9456 ± 0.3236 0.10880 ± 0.01100 0.20506 ± 0.00685 15.227 ± 0.403 % 84.062 ± 0.433 %
|
| 229 |
+
29 8.8544 ± 0.3149 0.10802 ± 0.01073 0.20322 ± 0.00663 15.115 ± 0.393 % 84.287 ± 0.423 %
|
| 230 |
+
30 8.2797 ± 0.2872 0.10811 ± 0.01046 0.19853 ± 0.00647 15.011 ± 0.386 % 84.719 ± 0.411 %
|
| 231 |
+
31 7.7818 ± 0.2634 0.10546 ± 0.01017 0.19478 ± 0.00633 14.872 ± 0.379 % 85.098 ± 0.401 %
|
| 232 |
+
32 7.5658 ± 0.2500 0.10255 ± 0.00991 0.19159 ± 0.00614 14.778 ± 0.370 % 85.208 ± 0.393 %
|
| 233 |
+
33 7.4114 ± 0.2397 0.10183 ± 0.00967 0.18872 ± 0.00597 14.660 ± 0.363 % 85.276 ± 0.386 %
|
| 234 |
+
34 7.6221 ± 0.2442 0.10284 ± 0.00955 0.19142 ± 0.00585 14.582 ± 0.355 % 85.167 ± 0.382 %
|
| 235 |
+
35 7.7276 ± 0.2457 0.10194 ± 0.00947 0.19552 ± 0.00578 14.654 ± 0.348 % 84.975 ± 0.378 %
|
| 236 |
+
36 7.7845 ± 0.2450 0.10040 ± 0.00930 0.19517 ± 0.00565 14.624 ± 0.341 % 84.978 ± 0.373 %
|
| 237 |
+
37 7.8831 ± 0.2451 0.11070 ± 0.00944 0.20533 ± 0.00597 15.294 ± 0.344 % 84.738 ± 0.370 %
|
| 238 |
+
38 8.1011 ± 0.2495 0.10825 ± 0.00922 0.20310 ± 0.00582 15.152 ± 0.339 % 84.665 ± 0.366 %
|
| 239 |
+
39 8.1295 ± 0.2468 0.11869 ± 0.00929 0.21216 ± 0.00598 15.696 ± 0.340 % 84.465 ± 0.363 %
|
| 240 |
+
40 8.0422 ± 0.2400 0.14281 ± 0.00966 0.23682 ± 0.00661 16.735 ± 0.339 % 83.941 ± 0.364 %
|
| 241 |
+
41 8.0611 ± 0.2372 0.17599 ± 0.01032 0.26897 ± 0.00740 18.090 ± 0.343 % 83.338 ± 0.364 %
|
| 242 |
+
42 7.9835 ± 0.2313 0.19764 ± 0.01065 0.29378 ± 0.00785 19.282 ± 0.344 % 82.829 ± 0.364 %
|
| 243 |
+
43 7.8584 ± 0.2240 0.21298 ± 0.01079 0.30980 ± 0.00806 19.995 ± 0.342 % 82.572 ± 0.362 %
|
| 244 |
+
44 7.7680 ± 0.2180 0.20774 ± 0.01059 0.30522 ± 0.00788 19.836 ± 0.337 % 82.620 ± 0.358 %
|
| 245 |
+
45 7.9150 ± 0.2207 0.20459 ± 0.01040 0.30252 ± 0.00772 19.657 ± 0.333 % 82.571 ± 0.354 %
|
| 246 |
+
46 8.0651 ± 0.2228 0.20130 ± 0.01021 0.29888 ± 0.00756 19.487 ± 0.329 % 82.566 ± 0.350 %
|
| 247 |
+
47 8.2245 ± 0.2253 0.19866 ± 0.01001 0.29473 ± 0.00741 19.316 ± 0.326 % 82.628 ± 0.346 %
|
| 248 |
+
48 8.0556 ± 0.2172 0.19456 ± 0.00983 0.29010 ± 0.00726 19.162 ± 0.322 % 82.696 ± 0.342 %
|
| 249 |
+
49 8.1495 ± 0.2174 0.19113 ± 0.00976 0.29252 ± 0.00734 19.067 ± 0.318 % 82.569 ± 0.339 %
|
| 250 |
+
50 8.2542 ± 0.2191 0.18976 ± 0.00962 0.29054 ± 0.00721 18.961 ± 0.314 % 82.510 ± 0.336 %
|
| 251 |
+
51 8.3544 ± 0.2198 0.18642 ± 0.00946 0.28682 ± 0.00707 18.810 ± 0.311 % 82.568 ± 0.333 %
|
| 252 |
+
52 8.4037 ± 0.2185 0.18300 ± 0.00933 0.28494 ± 0.00695 18.671 ± 0.308 % 82.541 ± 0.330 %
|
| 253 |
+
53 8.4994 ± 0.2187 0.17853 ± 0.00919 0.28201 ± 0.00682 18.549 ± 0.304 % 82.471 ± 0.327 %
|
| 254 |
+
54 8.5442 ± 0.2172 0.17601 ± 0.00904 0.27870 ± 0.00670 18.406 ± 0.301 % 82.498 ± 0.324 %
|
| 255 |
+
55 8.5806 ± 0.2158 0.17322 ± 0.00890 0.27548 ± 0.00658 18.267 ± 0.298 % 82.510 ± 0.321 %
|
| 256 |
+
56 8.6039 ± 0.2142 0.17021 ± 0.00877 0.27236 ± 0.00647 18.138 ± 0.295 % 82.514 ± 0.318 %
|
| 257 |
+
57 8.6067 ± 0.2125 0.17059 ± 0.00872 0.27241 ± 0.00638 18.118 ± 0.292 % 82.463 ± 0.315 %
|
| 258 |
+
58 8.6114 ± 0.2108 0.16978 ± 0.00862 0.27023 ± 0.00628 18.024 ± 0.289 % 82.502 ± 0.312 %
|
| 259 |
+
59 8.5431 ± 0.2070 0.16694 ± 0.00849 0.26649 ± 0.00618 17.897 ± 0.286 % 82.645 ± 0.309 %
|
| 260 |
+
60 8.5431 ± 0.2053 0.16546 ± 0.00838 0.26446 ± 0.00610 17.807 ± 0.283 % 82.660 ± 0.306 %
|
| 261 |
+
61 8.5815 ± 0.2045 0.16330 ± 0.00827 0.26229 ± 0.00601 17.705 ± 0.280 % 82.707 ± 0.303 %
|
| 262 |
+
62 8.5346 ± 0.2019 0.16108 ± 0.00819 0.26052 ± 0.00593 17.626 ± 0.277 % 82.770 ± 0.300 %
|
| 263 |
+
63 8.5678 ± 0.2016 0.15864 ± 0.00809 0.25859 ± 0.00584 17.506 ± 0.275 % 82.776 ± 0.298 %
|
| 264 |
+
64 8.5211 ± 0.1984 0.15576 ± 0.00799 0.25656 ± 0.00575 17.405 ± 0.272 % 82.770 ± 0.296 %
|
| 265 |
+
65 8.4887 ± 0.1959 0.15312 ± 0.00790 0.25527 ± 0.00568 17.314 ± 0.270 % 82.848 ± 0.293 %
|
| 266 |
+
66 8.5135 ± 0.1952 0.15103 ± 0.00781 0.25343 ± 0.00560 17.215 ± 0.268 % 82.941 ± 0.290 %
|
| 267 |
+
67 8.5154 ± 0.1939 0.15031 ± 0.00772 0.25162 ± 0.00552 17.122 ± 0.265 % 83.020 ± 0.287 %
|
| 268 |
+
68 8.4466 ± 0.1906 0.14817 ± 0.00762 0.24896 ± 0.00544 17.013 ± 0.263 % 83.126 ± 0.284 %
|
| 269 |
+
69 8.4637 ± 0.1897 0.14574 ± 0.00753 0.24687 ± 0.00537 16.920 ± 0.261 % 83.154 ± 0.282 %
|
| 270 |
+
70 8.4153 ± 0.1869 0.14380 ± 0.00745 0.24530 ± 0.00530 16.851 ± 0.259 % 83.244 ± 0.280 %
|
| 271 |
+
71 8.3793 ± 0.1848 0.14150 ± 0.00737 0.24331 ± 0.00523 16.774 ± 0.257 % 83.270 ± 0.277 %
|
| 272 |
+
72 8.3926 ± 0.1841 0.14090 ± 0.00732 0.24254 ± 0.00522 16.706 ± 0.255 % 83.339 ± 0.275 %
|
| 273 |
+
73 8.3863 ± 0.1825 0.13942 ± 0.00723 0.24098 ± 0.00516 16.641 ± 0.253 % 83.336 ± 0.273 %
|
| 274 |
+
74 8.3651 ± 0.1806 0.13772 ± 0.00715 0.23940 ± 0.00509 16.561 ± 0.251 % 83.376 ± 0.271 %
|
| 275 |
+
75 8.3558 ± 0.1793 0.13569 ± 0.00709 0.23841 ± 0.00503 16.520 ± 0.248 % 83.388 ± 0.269 %
|
| 276 |
+
76 8.4190 ± 0.1796 0.13473 ± 0.00702 0.23737 ± 0.00497 16.461 ± 0.247 % 83.426 ± 0.267 %
|
| 277 |
+
77 8.4023 ± 0.1781 0.13303 ± 0.00696 0.23595 ± 0.00491 16.384 ± 0.245 % 83.499 ± 0.265 %
|
| 278 |
+
78 8.4102 ± 0.1772 0.13213 ± 0.00691 0.23496 ± 0.00485 16.335 ± 0.243 % 83.529 ± 0.263 %
|
| 279 |
+
79 8.4044 ± 0.1760 0.12999 ± 0.00684 0.23355 ± 0.00480 16.255 ± 0.241 % 83.539 ± 0.261 %
|
| 280 |
+
80 8.4009 ± 0.1752 0.12933 ± 0.00679 0.23461 ± 0.00477 16.242 ± 0.239 % 83.529 ± 0.260 %
|
| 281 |
+
81 8.3639 ± 0.1734 0.12801 ± 0.00673 0.23317 ± 0.00472 16.181 ± 0.237 % 83.563 ± 0.258 %
|
| 282 |
+
82 8.3373 ± 0.1715 0.12775 ± 0.00666 0.23187 ± 0.00467 16.113 ± 0.236 % 83.568 ± 0.256 %
|
| 283 |
+
83 8.3652 ± 0.1709 0.12678 ± 0.00660 0.23043 ± 0.00461 16.046 ± 0.234 % 83.586 ± 0.255 %
|
| 284 |
+
84 8.3786 ± 0.1698 0.12619 ± 0.00653 0.22891 ± 0.00456 15.973 ± 0.232 % 83.562 ± 0.253 %
|
| 285 |
+
85 8.3675 ± 0.1683 0.12542 ± 0.00646 0.22742 ± 0.00451 15.905 ± 0.231 % 83.562 ± 0.252 %
|
| 286 |
+
86 8.2901 ± 0.1652 0.12517 ± 0.00641 0.22634 ± 0.00446 15.854 ± 0.229 % 83.575 ± 0.250 %
|
| 287 |
+
87 8.2190 ± 0.1623 0.12436 ± 0.00636 0.22515 ± 0.00441 15.814 ± 0.227 % 83.552 ± 0.249 %
|
| 288 |
+
88 8.1417 ± 0.1594 0.12324 ± 0.00629 0.22371 ± 0.00437 15.749 ± 0.225 % 83.627 ± 0.247 %
|
| 289 |
+
89 8.0560 ± 0.1563 0.12263 ± 0.00623 0.22251 ± 0.00432 15.709 ± 0.224 % 83.662 ± 0.245 %
|
| 290 |
+
90 7.9918 ± 0.1538 0.12234 ± 0.00618 0.22123 ± 0.00428 15.653 ± 0.222 % 83.717 ± 0.244 %
|
| 291 |
+
91 7.9258 ± 0.1513 0.12115 ± 0.00613 0.22016 ± 0.00424 15.615 ± 0.221 % 83.771 ± 0.242 %
|
| 292 |
+
92 7.8582 ± 0.1487 0.12065 ± 0.00607 0.21902 ± 0.00419 15.579 ± 0.219 % 83.798 ± 0.241 %
|
| 293 |
+
93 7.8731 ± 0.1484 0.12004 ± 0.00604 0.21886 ± 0.00418 15.539 ± 0.218 % 83.833 ± 0.239 %
|
| 294 |
+
94 7.9020 ± 0.1481 0.11938 ± 0.00599 0.21753 ± 0.00414 15.471 ± 0.216 % 83.855 ± 0.238 %
|
| 295 |
+
95 8.0186 ± 0.1499 0.11883 ± 0.00594 0.21682 ± 0.00410 15.412 ± 0.215 % 83.856 ± 0.236 %
|
| 296 |
+
96 8.1170 ± 0.1511 0.11807 ± 0.00589 0.21618 ± 0.00406 15.355 ± 0.214 % 83.791 ± 0.236 %
|
| 297 |
+
97 8.1990 ± 0.1519 0.11746 ± 0.00584 0.21494 ± 0.00402 15.294 ± 0.213 % 83.780 ± 0.234 %
|
| 298 |
+
98 8.3464 ± 0.1544 0.11712 ± 0.00580 0.21370 ± 0.00398 15.224 ± 0.212 % 83.766 ± 0.233 %
|
| 299 |
+
99 8.4750 ± 0.1564 0.11685 ± 0.00575 0.21272 ± 0.00394 15.158 ± 0.210 % 83.684 ± 0.233 %
|
| 300 |
+
100 8.5133 ± 0.1564 0.11647 ± 0.00571 0.21231 ± 0.00391 15.115 ± 0.209 % 83.655 ± 0.232 %
|
| 301 |
+
101 8.5436 ± 0.1563 0.11520 ± 0.00567 0.21162 ± 0.00389 15.075 ± 0.208 % 83.692 ± 0.230 %
|
| 302 |
+
102 8.6123 ± 0.1574 0.11531 ± 0.00563 0.21179 ± 0.00387 15.042 ± 0.207 % 83.676 ± 0.229 %
|
| 303 |
+
103 8.5783 ± 0.1561 0.11472 ± 0.00559 0.21117 ± 0.00384 15.055 ± 0.206 % 83.689 ± 0.228 %
|
| 304 |
+
104 8.5293 ± 0.1543 0.11651 ± 0.00560 0.21286 ± 0.00386 15.198 ± 0.206 % 83.703 ± 0.227 %
|
| 305 |
+
105 8.4111 ± 0.1511 0.11707 ± 0.00558 0.21375 ± 0.00386 15.320 ± 0.206 % 83.750 ± 0.225 %
|
| 306 |
+
106 8.2715 ± 0.1476 0.11763 ± 0.00556 0.21353 ± 0.00385 15.385 ± 0.205 % 83.844 ± 0.224 %
|
| 307 |
+
107 8.3266 ± 0.1478 0.11654 ± 0.00551 0.21226 ± 0.00381 15.325 ± 0.204 % 83.808 ± 0.223 %
|
| 308 |
+
108 8.3318 ± 0.1471 0.11538 ± 0.00547 0.21152 ± 0.00378 15.297 ± 0.203 % 83.798 ± 0.222 %
|
| 309 |
+
109 8.3503 ± 0.1468 0.11487 ± 0.00544 0.21106 ± 0.00375 15.275 ± 0.202 % 83.792 ± 0.221 %
|
| 310 |
+
110 8.3871 ± 0.1469 0.11458 ± 0.00540 0.21025 ± 0.00372 15.234 ± 0.201 % 83.815 ± 0.220 %
|
| 311 |
+
111 8.4333 ± 0.1469 0.11376 ± 0.00537 0.20942 ± 0.00369 15.188 ± 0.200 % 83.830 ± 0.219 %
|
| 312 |
+
112 8.4361 ± 0.1462 0.11275 ± 0.00532 0.20837 ± 0.00366 15.139 ± 0.199 % 83.852 ± 0.218 %
|
| 313 |
+
113 8.4411 ± 0.1454 0.11196 ± 0.00528 0.20718 ± 0.00363 15.088 ± 0.198 % 83.866 ± 0.217 %
|
| 314 |
+
114 8.4562 ± 0.1452 0.11142 ± 0.00525 0.20617 ± 0.00360 15.036 ± 0.197 % 83.887 ± 0.216 %
|
| 315 |
+
115 8.4428 ± 0.1443 0.11218 ± 0.00523 0.20680 ± 0.00358 15.078 ± 0.196 % 83.860 ± 0.215 %
|
| 316 |
+
116 8.4565 ± 0.1439 0.11519 ± 0.00527 0.21174 ± 0.00363 15.292 ± 0.196 % 83.742 ± 0.215 %
|
| 317 |
+
117 8.4195 ± 0.1425 0.12437 ± 0.00537 0.22025 ± 0.00375 15.781 ± 0.197 % 83.576 ± 0.214 %
|
| 318 |
+
118 8.3768 ± 0.1409 0.13215 ± 0.00544 0.22680 ± 0.00382 16.138 ± 0.197 % 83.463 ± 0.214 %
|
| 319 |
+
119 8.3341 ± 0.1394 0.14049 ± 0.00553 0.23472 ± 0.00393 16.599 ± 0.199 % 83.315 ± 0.214 %
|
| 320 |
+
120 8.2894 ± 0.1379 0.14645 ± 0.00559 0.24141 ± 0.00399 16.939 ± 0.198 % 83.157 ± 0.214 %
|
| 321 |
+
121 8.2455 ± 0.1365 0.15278 ± 0.00564 0.24873 ± 0.00407 17.363 ± 0.199 % 82.991 ± 0.214 %
|
| 322 |
+
|
| 323 |
+
====== Perplexity statistics ======
|
| 324 |
+
Mean PPL(Q) : 8.245452 ± 0.136454
|
| 325 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 326 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 94.07%
|
| 327 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.152778 ± 0.005643
|
| 328 |
+
Mean PPL(Q)/PPL(base) : 1.165066 ± 0.006575
|
| 329 |
+
Mean PPL(Q)-PPL(base) : 1.168212 ± 0.048381
|
| 330 |
+
|
| 331 |
+
====== KL divergence statistics ======
|
| 332 |
+
Mean KLD: 0.248731 ± 0.004073
|
| 333 |
+
Maximum KLD: 15.101192
|
| 334 |
+
99.9% KLD: 7.450212
|
| 335 |
+
99.0% KLD: 3.810123
|
| 336 |
+
95.0% KLD: 1.125538
|
| 337 |
+
90.0% KLD: 0.465921
|
| 338 |
+
Median KLD: 0.056457
|
| 339 |
+
10.0% KLD: 0.000318
|
| 340 |
+
5.0% KLD: 0.000052
|
| 341 |
+
1.0% KLD: 0.000003
|
| 342 |
+
0.1% KLD: -0.000000
|
| 343 |
+
Minimum KLD: -0.000003
|
| 344 |
+
|
| 345 |
+
====== Token probability statistics ======
|
| 346 |
+
Mean Δp: -3.037 ± 0.097 %
|
| 347 |
+
Maximum Δp: 99.792%
|
| 348 |
+
99.9% Δp: 77.019%
|
| 349 |
+
99.0% Δp: 32.363%
|
| 350 |
+
95.0% Δp: 12.625%
|
| 351 |
+
90.0% Δp: 6.502%
|
| 352 |
+
75.0% Δp: 0.680%
|
| 353 |
+
Median Δp: -0.011%
|
| 354 |
+
25.0% Δp: -2.450%
|
| 355 |
+
10.0% Δp: -12.819%
|
| 356 |
+
5.0% Δp: -30.170%
|
| 357 |
+
1.0% Δp: -88.453%
|
| 358 |
+
0.1% Δp: -98.990%
|
| 359 |
+
Minimum Δp: -99.911%
|
| 360 |
+
RMS Δp : 17.363 ± 0.199 %
|
| 361 |
+
Same top p: 82.991 ± 0.214 %
|
| 362 |
+
|
| 363 |
+
llama_perf_context_print: load time = 33766.54 ms
|
| 364 |
+
llama_perf_context_print: prompt eval time = 67408.81 ms / 61952 tokens ( 1.09 ms per token, 919.05 tokens per second)
|
| 365 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 366 |
+
llama_perf_context_print: total time = 80106.81 ms / 61953 tokens
|
| 367 |
+
llama_perf_context_print: graphs reused = 0
|
| 368 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 993 + (22687 = 20953 + 272 + 1461) + 453 |
|
| 370 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1119 + (22561 = 18667 + 720 + 3174) + 453 |
|
| 371 |
+
llama_memory_breakdown_print: | - Host | 81529 = 81425 + 0 + 104 |
|
| 372 |
+
```
|
kld_data/unsloth/UD-Q3_K_XL/MiniMax-M2.5-UD-Q3_K_XL.md
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-Q3_K_XL (unsloth)
|
| 2 |
+
|
| 3 |
+
94.33 GiB (3.54 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q3_K_XL/MiniMax-M2.5-UD-Q3_K_XL-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 50827 used, -26955 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 51001 used, -27129 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 101829 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 56133 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37743 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 7858 MiB used, 16013 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1578 MiB used, 22292 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 14 layers ( 1 overflowing), 22460 MiB used, 1411 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 49 layers (39 overflowing), 22581 MiB used, 1290 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.01 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q3_K_XL/MiniMax-M2.5-UD-Q3_K_XL-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 12
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q3_K: 173 tensors
|
| 87 |
+
llama_model_loader: - type q4_K: 232 tensors
|
| 88 |
+
llama_model_loader: - type q5_K: 20 tensors
|
| 89 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 90 |
+
print_info: file format = GGUF V3 (latest)
|
| 91 |
+
print_info: file type = Q3_K - Medium
|
| 92 |
+
print_info: file size = 94.33 GiB (3.54 BPW)
|
| 93 |
+
load: 0 unused tokens
|
| 94 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 95 |
+
load: printing all EOG tokens:
|
| 96 |
+
load: - 200004 ('<fim_pad>')
|
| 97 |
+
load: - 200005 ('<reponame>')
|
| 98 |
+
load: - 200020 ('[e~[')
|
| 99 |
+
load: special tokens cache size = 54
|
| 100 |
+
load: token to piece cache size = 1.3355 MB
|
| 101 |
+
print_info: arch = minimax-m2
|
| 102 |
+
print_info: vocab_only = 0
|
| 103 |
+
print_info: no_alloc = 0
|
| 104 |
+
print_info: n_ctx_train = 196608
|
| 105 |
+
print_info: n_embd = 3072
|
| 106 |
+
print_info: n_embd_inp = 3072
|
| 107 |
+
print_info: n_layer = 62
|
| 108 |
+
print_info: n_head = 48
|
| 109 |
+
print_info: n_head_kv = 8
|
| 110 |
+
print_info: n_rot = 64
|
| 111 |
+
print_info: n_swa = 0
|
| 112 |
+
print_info: is_swa_any = 0
|
| 113 |
+
print_info: n_embd_head_k = 128
|
| 114 |
+
print_info: n_embd_head_v = 128
|
| 115 |
+
print_info: n_gqa = 6
|
| 116 |
+
print_info: n_embd_k_gqa = 1024
|
| 117 |
+
print_info: n_embd_v_gqa = 1024
|
| 118 |
+
print_info: f_norm_eps = 0.0e+00
|
| 119 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 120 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 121 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 122 |
+
print_info: f_logit_scale = 0.0e+00
|
| 123 |
+
print_info: f_attn_scale = 0.0e+00
|
| 124 |
+
print_info: n_ff = 1536
|
| 125 |
+
print_info: n_expert = 256
|
| 126 |
+
print_info: n_expert_used = 8
|
| 127 |
+
print_info: n_expert_groups = 0
|
| 128 |
+
print_info: n_group_used = 0
|
| 129 |
+
print_info: causal attn = 1
|
| 130 |
+
print_info: pooling type = 0
|
| 131 |
+
print_info: rope type = 2
|
| 132 |
+
print_info: rope scaling = linear
|
| 133 |
+
print_info: freq_base_train = 5000000.0
|
| 134 |
+
print_info: freq_scale_train = 1
|
| 135 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 136 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 137 |
+
print_info: rope_finetuned = unknown
|
| 138 |
+
print_info: model type = 230B.A10B
|
| 139 |
+
print_info: model params = 228.69 B
|
| 140 |
+
print_info: general.name = Minimax-M2.5
|
| 141 |
+
print_info: vocab type = BPE
|
| 142 |
+
print_info: n_vocab = 200064
|
| 143 |
+
print_info: n_merges = 199744
|
| 144 |
+
print_info: BOS token = 200034 ']~!b['
|
| 145 |
+
print_info: EOS token = 200020 '[e~['
|
| 146 |
+
print_info: UNK token = 200021 ']!d~['
|
| 147 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 148 |
+
print_info: LF token = 10 'Ċ'
|
| 149 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 150 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 151 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 152 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 155 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 156 |
+
print_info: EOG token = 200020 '[e~['
|
| 157 |
+
print_info: max token length = 256
|
| 158 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 159 |
+
load_tensors: offloading output layer to GPU
|
| 160 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 161 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 47144.51 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 47300.88 MiB
|
| 164 |
+
load_tensors: CPU_Mapped model buffer size = 1641.01 MiB
|
| 165 |
+
load_tensors: CUDA0 model buffer size = 20657.32 MiB
|
| 166 |
+
load_tensors: CUDA1 model buffer size = 18639.11 MiB
|
| 167 |
+
....................................................................................................
|
| 168 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 169 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 170 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 171 |
+
llama_context: constructing llama_context
|
| 172 |
+
llama_context: n_seq_max = 8
|
| 173 |
+
llama_context: n_ctx = 4096
|
| 174 |
+
llama_context: n_ctx_seq = 512
|
| 175 |
+
llama_context: n_batch = 4096
|
| 176 |
+
llama_context: n_ubatch = 4096
|
| 177 |
+
llama_context: causal_attn = 1
|
| 178 |
+
llama_context: flash_attn = enabled
|
| 179 |
+
llama_context: kv_unified = false
|
| 180 |
+
llama_context: freq_base = 5000000.0
|
| 181 |
+
llama_context: freq_scale = 1
|
| 182 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 183 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 184 |
+
llama_kv_cache: CUDA0 KV buffer size = 224.00 MiB
|
| 185 |
+
llama_kv_cache: CUDA1 KV buffer size = 768.00 MiB
|
| 186 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 187 |
+
sched_reserve: reserving ...
|
| 188 |
+
sched_reserve: CUDA0 compute buffer size = 1579.00 MiB
|
| 189 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 190 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 191 |
+
sched_reserve: graph nodes = 4099
|
| 192 |
+
sched_reserve: graph splits = 157 (with bs=4096), 83 (with bs=1)
|
| 193 |
+
sched_reserve: reserve took 23.01 ms, sched copies = 1
|
| 194 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 195 |
+
|
| 196 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 197 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 198 |
+
kl_divergence: 5.38 seconds per pass - ETA 1.35 minutes
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
1 6.5826 ± 1.2794 0.03236 ± 0.02691 0.05069 ± 0.00515 7.140 ± 0.813 % 92.157 ± 1.687 %
|
| 202 |
+
2 4.7866 ± 0.5816 0.02361 ± 0.01705 0.04209 ± 0.00480 6.929 ± 1.113 % 93.529 ± 1.090 %
|
| 203 |
+
3 4.5785 ± 0.4594 0.01989 ± 0.01574 0.05137 ± 0.00446 8.554 ± 0.817 % 92.810 ± 0.935 %
|
| 204 |
+
4 5.1917 ± 0.4658 0.01228 ± 0.01446 0.05398 ± 0.00439 8.100 ± 0.694 % 92.745 ± 0.813 %
|
| 205 |
+
5 5.0470 ± 0.4077 0.03057 ± 0.01705 0.06582 ± 0.00730 8.517 ± 0.697 % 92.314 ± 0.746 %
|
| 206 |
+
6 6.0840 ± 0.4768 0.01448 ± 0.01544 0.07148 ± 0.00635 8.169 ± 0.610 % 91.373 ± 0.718 %
|
| 207 |
+
7 5.6698 ± 0.3971 0.01581 ± 0.01457 0.08423 ± 0.00660 8.885 ± 0.531 % 90.420 ± 0.697 %
|
| 208 |
+
8 6.3681 ± 0.4229 0.00845 ± 0.01351 0.08194 ± 0.00584 8.530 ± 0.486 % 89.951 ± 0.666 %
|
| 209 |
+
9 6.2525 ± 0.3868 0.00941 ± 0.01218 0.07774 ± 0.00524 8.188 ± 0.452 % 89.804 ± 0.632 %
|
| 210 |
+
10 5.7144 ± 0.3289 0.00847 ± 0.01114 0.07407 ± 0.00477 8.172 ± 0.420 % 89.922 ± 0.596 %
|
| 211 |
+
11 6.2751 ± 0.3495 0.01026 ± 0.01054 0.07413 ± 0.00442 8.184 ± 0.415 % 89.661 ± 0.575 %
|
| 212 |
+
12 6.9480 ± 0.3764 0.01062 ± 0.00995 0.07409 ± 0.00425 8.014 ± 0.395 % 89.346 ± 0.558 %
|
| 213 |
+
13 7.2078 ± 0.3717 0.01006 ± 0.00933 0.07156 ± 0.00394 7.811 ± 0.375 % 89.502 ± 0.532 %
|
| 214 |
+
14 7.7713 ± 0.3905 0.01009 ± 0.00911 0.07355 ± 0.00410 7.891 ± 0.379 % 89.300 ± 0.517 %
|
| 215 |
+
15 8.1323 ± 0.3951 0.00937 ± 0.00870 0.07246 ± 0.00384 7.738 ± 0.362 % 89.229 ± 0.501 %
|
| 216 |
+
16 8.3858 ± 0.3940 0.00786 ± 0.00828 0.07020 ± 0.00361 7.592 ± 0.346 % 89.461 ± 0.481 %
|
| 217 |
+
17 8.6498 ± 0.3971 0.01174 ± 0.00831 0.07423 ± 0.00355 7.534 ± 0.331 % 89.273 ± 0.470 %
|
| 218 |
+
18 8.1523 ± 0.3611 0.01173 ± 0.00805 0.07487 ± 0.00344 7.572 ± 0.323 % 89.434 ± 0.454 %
|
| 219 |
+
19 8.2769 ± 0.3563 0.01110 ± 0.00774 0.07289 ± 0.00326 7.492 ± 0.310 % 89.659 ± 0.437 %
|
| 220 |
+
20 8.3446 ± 0.3503 0.01138 ± 0.00767 0.07515 ± 0.00320 7.559 ± 0.304 % 89.569 ± 0.428 %
|
| 221 |
+
21 8.3159 ± 0.3403 0.01049 ± 0.00742 0.07441 ± 0.00306 7.547 ± 0.292 % 89.636 ± 0.417 %
|
| 222 |
+
22 8.6283 ± 0.3479 0.00971 ± 0.00722 0.07509 ± 0.00294 7.531 ± 0.282 % 89.340 ± 0.412 %
|
| 223 |
+
23 8.6315 ± 0.3415 0.00887 ± 0.00737 0.07811 ± 0.00302 7.688 ± 0.283 % 89.190 ± 0.405 %
|
| 224 |
+
24 9.0295 ± 0.3517 0.00787 ± 0.00716 0.07742 ± 0.00290 7.608 ± 0.274 % 89.150 ± 0.398 %
|
| 225 |
+
25 9.0200 ± 0.3450 0.00821 ± 0.00707 0.07985 ± 0.00290 7.835 ± 0.273 % 88.988 ± 0.392 %
|
| 226 |
+
26 8.5181 ± 0.3162 0.01957 ± 0.00740 0.09308 ± 0.00351 9.530 ± 0.330 % 88.688 ± 0.389 %
|
| 227 |
+
27 8.1641 ± 0.2951 0.03105 ± 0.00770 0.10754 ± 0.00417 10.655 ± 0.346 % 88.439 ± 0.385 %
|
| 228 |
+
28 8.2800 ± 0.2947 0.03148 ± 0.00752 0.10718 ± 0.00403 10.571 ± 0.337 % 88.389 ± 0.379 %
|
| 229 |
+
29 8.2118 ± 0.2875 0.03268 ± 0.00736 0.10670 ± 0.00392 10.519 ± 0.330 % 88.425 ± 0.372 %
|
| 230 |
+
30 7.6891 ± 0.2624 0.03411 ± 0.00724 0.10557 ± 0.00395 10.579 ± 0.334 % 88.758 ± 0.361 %
|
| 231 |
+
31 7.2397 ± 0.2409 0.03325 ± 0.00705 0.10414 ± 0.00387 10.565 ± 0.329 % 89.007 ± 0.352 %
|
| 232 |
+
32 7.0609 ± 0.2294 0.03348 ± 0.00686 0.10230 ± 0.00376 10.491 ± 0.322 % 89.154 ± 0.344 %
|
| 233 |
+
33 6.9226 ± 0.2199 0.03360 ± 0.00671 0.10066 ± 0.00365 10.433 ± 0.314 % 89.210 ± 0.338 %
|
| 234 |
+
34 7.1153 ± 0.2240 0.03403 ± 0.00665 0.10203 ± 0.00356 10.389 ± 0.307 % 89.112 ± 0.335 %
|
| 235 |
+
35 7.2296 ± 0.2262 0.03533 ± 0.00664 0.10545 ± 0.00356 10.559 ± 0.302 % 88.863 ± 0.333 %
|
| 236 |
+
36 7.2872 ± 0.2256 0.03439 ± 0.00653 0.10486 ± 0.00348 10.502 ± 0.296 % 88.813 ± 0.329 %
|
| 237 |
+
37 7.3349 ± 0.2243 0.03863 ± 0.00653 0.10871 ± 0.00360 10.867 ± 0.300 % 88.723 ± 0.326 %
|
| 238 |
+
38 7.5588 ± 0.2291 0.03895 ± 0.00641 0.10788 ± 0.00352 10.781 ± 0.295 % 88.648 ± 0.322 %
|
| 239 |
+
39 7.5304 ± 0.2250 0.04213 ± 0.00643 0.11166 ± 0.00363 11.076 ± 0.297 % 88.567 ± 0.319 %
|
| 240 |
+
40 7.3304 ± 0.2149 0.05015 ± 0.00663 0.12237 ± 0.00388 11.669 ± 0.294 % 88.363 ± 0.318 %
|
| 241 |
+
41 7.1772 ± 0.2064 0.05984 ± 0.00684 0.13540 ± 0.00418 12.621 ± 0.296 % 87.967 ± 0.318 %
|
| 242 |
+
42 7.0077 ± 0.1979 0.06727 ± 0.00701 0.14687 ± 0.00441 13.504 ± 0.302 % 87.703 ± 0.317 %
|
| 243 |
+
43 6.8467 ± 0.1900 0.07516 ± 0.00709 0.15577 ± 0.00465 14.042 ± 0.305 % 87.560 ± 0.315 %
|
| 244 |
+
44 6.7817 ± 0.1852 0.07196 ± 0.00697 0.15349 ± 0.00455 13.927 ± 0.300 % 87.576 ± 0.311 %
|
| 245 |
+
45 6.9220 ± 0.1881 0.07054 ± 0.00686 0.15233 ± 0.00446 13.798 ± 0.296 % 87.495 ± 0.309 %
|
| 246 |
+
46 7.0667 ± 0.1903 0.06915 ± 0.00674 0.15045 ± 0.00436 13.675 ± 0.293 % 87.451 ± 0.306 %
|
| 247 |
+
47 7.2188 ± 0.1929 0.06823 ± 0.00662 0.14822 ± 0.00427 13.545 ± 0.290 % 87.476 ± 0.302 %
|
| 248 |
+
48 7.0877 ± 0.1864 0.06656 ± 0.00649 0.14595 ± 0.00419 13.435 ± 0.286 % 87.598 ± 0.298 %
|
| 249 |
+
49 7.1924 ± 0.1872 0.06621 ± 0.00652 0.15032 ± 0.00446 13.413 ± 0.284 % 87.435 ± 0.297 %
|
| 250 |
+
50 7.2877 ± 0.1888 0.06521 ± 0.00643 0.14917 ± 0.00437 13.324 ± 0.280 % 87.412 ± 0.294 %
|
| 251 |
+
51 7.3942 ± 0.1900 0.06432 ± 0.00632 0.14726 ± 0.00429 13.217 ± 0.277 % 87.489 ± 0.290 %
|
| 252 |
+
52 7.4615 ± 0.1898 0.06408 ± 0.00625 0.14677 ± 0.00421 13.144 ± 0.274 % 87.391 ± 0.288 %
|
| 253 |
+
53 7.5663 ± 0.1906 0.06223 ± 0.00616 0.14527 ± 0.00413 13.058 ± 0.270 % 87.333 ± 0.286 %
|
| 254 |
+
54 7.6185 ± 0.1897 0.06133 ± 0.00606 0.14359 ± 0.00406 12.959 ± 0.268 % 87.349 ± 0.283 %
|
| 255 |
+
55 7.6717 ± 0.1891 0.06126 ± 0.00596 0.14196 ± 0.00399 12.866 ± 0.265 % 87.373 ± 0.280 %
|
| 256 |
+
56 7.7049 ± 0.1881 0.05985 ± 0.00587 0.14033 ± 0.00392 12.774 ± 0.262 % 87.409 ± 0.278 %
|
| 257 |
+
57 7.7134 ± 0.1866 0.06101 ± 0.00588 0.14128 ± 0.00388 12.763 ± 0.259 % 87.355 ± 0.276 %
|
| 258 |
+
58 7.7239 ± 0.1853 0.06100 ± 0.00581 0.14014 ± 0.00382 12.708 ± 0.256 % 87.356 ± 0.273 %
|
| 259 |
+
59 7.6742 ± 0.1822 0.05968 ± 0.00572 0.13814 ± 0.00376 12.611 ± 0.253 % 87.438 ± 0.270 %
|
| 260 |
+
60 7.6901 ± 0.1812 0.06027 ± 0.00566 0.13737 ± 0.00371 12.572 ± 0.251 % 87.444 ± 0.268 %
|
| 261 |
+
61 7.7370 ± 0.1808 0.05971 ± 0.00559 0.13640 ± 0.00365 12.507 ± 0.249 % 87.425 ± 0.266 %
|
| 262 |
+
62 7.7001 ± 0.1787 0.05819 ± 0.00555 0.13558 ± 0.00360 12.478 ± 0.246 % 87.457 ± 0.263 %
|
| 263 |
+
63 7.7408 ± 0.1787 0.05714 ± 0.00550 0.13512 ± 0.00356 12.414 ± 0.244 % 87.445 ± 0.261 %
|
| 264 |
+
64 7.7151 ± 0.1763 0.05640 ± 0.00544 0.13408 ± 0.00351 12.339 ± 0.241 % 87.488 ± 0.259 %
|
| 265 |
+
65 7.7042 ± 0.1747 0.05615 ± 0.00540 0.13355 ± 0.00347 12.279 ± 0.239 % 87.548 ± 0.256 %
|
| 266 |
+
66 7.7394 ± 0.1744 0.05570 ± 0.00535 0.13291 ± 0.00342 12.228 ± 0.237 % 87.546 ± 0.255 %
|
| 267 |
+
67 7.7513 ± 0.1736 0.05630 ± 0.00529 0.13224 ± 0.00337 12.170 ± 0.234 % 87.586 ± 0.252 %
|
| 268 |
+
68 7.7010 ± 0.1709 0.05576 ± 0.00523 0.13105 ± 0.00332 12.101 ± 0.232 % 87.659 ± 0.250 %
|
| 269 |
+
69 7.7325 ± 0.1705 0.05539 ± 0.00518 0.13024 ± 0.00328 12.040 ± 0.230 % 87.695 ± 0.248 %
|
| 270 |
+
70 7.6990 ± 0.1683 0.05483 ± 0.00512 0.12949 ± 0.00323 11.984 ± 0.228 % 87.725 ± 0.246 %
|
| 271 |
+
71 7.6769 ± 0.1667 0.05395 ± 0.00507 0.12857 ± 0.00319 11.928 ± 0.226 % 87.733 ± 0.244 %
|
| 272 |
+
72 7.6939 ± 0.1662 0.05399 ± 0.00506 0.12826 ± 0.00316 11.903 ± 0.225 % 87.767 ± 0.242 %
|
| 273 |
+
73 7.6955 ± 0.1649 0.05346 ± 0.00500 0.12745 ± 0.00312 11.854 ± 0.223 % 87.752 ± 0.240 %
|
| 274 |
+
74 7.6802 ± 0.1634 0.05229 ± 0.00495 0.12668 ± 0.00308 11.798 ± 0.221 % 87.769 ± 0.239 %
|
| 275 |
+
75 7.6831 ± 0.1624 0.05175 ± 0.00491 0.12641 ± 0.00305 11.771 ± 0.219 % 87.739 ± 0.237 %
|
| 276 |
+
76 7.7436 ± 0.1628 0.05111 ± 0.00486 0.12586 ± 0.00301 11.730 ± 0.217 % 87.719 ± 0.236 %
|
| 277 |
+
77 7.7438 ± 0.1619 0.05141 ± 0.00483 0.12520 ± 0.00298 11.685 ± 0.215 % 87.762 ± 0.234 %
|
| 278 |
+
78 7.7510 ± 0.1611 0.05052 ± 0.00479 0.12453 ± 0.00294 11.641 ± 0.214 % 87.783 ± 0.232 %
|
| 279 |
+
79 7.7540 ± 0.1601 0.04944 ± 0.00476 0.12417 ± 0.00291 11.617 ± 0.212 % 87.769 ± 0.231 %
|
| 280 |
+
80 7.7499 ± 0.1594 0.04866 ± 0.00474 0.12462 ± 0.00289 11.599 ± 0.210 % 87.755 ± 0.230 %
|
| 281 |
+
81 7.7211 ± 0.1579 0.04805 ± 0.00469 0.12412 ± 0.00286 11.563 ± 0.209 % 87.800 ± 0.228 %
|
| 282 |
+
82 7.7012 ± 0.1563 0.04839 ± 0.00465 0.12344 ± 0.00283 11.522 ± 0.207 % 87.814 ± 0.226 %
|
| 283 |
+
83 7.7325 ± 0.1558 0.04813 ± 0.00461 0.12268 ± 0.00279 11.470 ± 0.206 % 87.838 ± 0.225 %
|
| 284 |
+
84 7.7500 ± 0.1549 0.04821 ± 0.00456 0.12177 ± 0.00276 11.416 ± 0.204 % 87.820 ± 0.223 %
|
| 285 |
+
85 7.7451 ± 0.1536 0.04813 ± 0.00451 0.12090 ± 0.00273 11.366 ± 0.203 % 87.852 ± 0.222 %
|
| 286 |
+
86 7.6722 ± 0.1508 0.04772 ± 0.00447 0.12018 ± 0.00270 11.325 ± 0.201 % 87.893 ± 0.220 %
|
| 287 |
+
87 7.6072 ± 0.1482 0.04701 ± 0.00443 0.11950 ± 0.00267 11.303 ± 0.199 % 87.920 ± 0.219 %
|
| 288 |
+
88 7.5466 ± 0.1457 0.04734 ± 0.00439 0.11879 ± 0.00264 11.268 ± 0.198 % 87.959 ± 0.217 %
|
| 289 |
+
89 7.4732 ± 0.1430 0.04755 ± 0.00435 0.11822 ± 0.00262 11.232 ± 0.196 % 87.997 ± 0.216 %
|
| 290 |
+
90 7.4166 ± 0.1408 0.04765 ± 0.00431 0.11763 ± 0.00259 11.189 ± 0.195 % 88.039 ± 0.214 %
|
| 291 |
+
91 7.3638 ± 0.1387 0.04761 ± 0.00428 0.11695 ± 0.00256 11.165 ± 0.194 % 88.072 ± 0.213 %
|
| 292 |
+
92 7.3031 ± 0.1364 0.04738 ± 0.00424 0.11639 ± 0.00254 11.145 ± 0.192 % 88.099 ± 0.211 %
|
| 293 |
+
93 7.3229 ± 0.1362 0.04759 ± 0.00421 0.11668 ± 0.00253 11.148 ± 0.191 % 88.109 ± 0.210 %
|
| 294 |
+
94 7.3523 ± 0.1360 0.04728 ± 0.00418 0.11588 ± 0.00250 11.101 ± 0.190 % 88.127 ± 0.209 %
|
| 295 |
+
95 7.4616 ± 0.1376 0.04683 ± 0.00415 0.11544 ± 0.00248 11.058 ± 0.189 % 88.124 ± 0.208 %
|
| 296 |
+
96 7.5547 ± 0.1388 0.04627 ± 0.00412 0.11509 ± 0.00245 11.016 ± 0.188 % 88.043 ± 0.207 %
|
| 297 |
+
97 7.6308 ± 0.1395 0.04563 ± 0.00408 0.11445 ± 0.00243 10.969 ± 0.187 % 88.017 ± 0.206 %
|
| 298 |
+
98 7.7737 ± 0.1420 0.04605 ± 0.00405 0.11378 ± 0.00241 10.919 ± 0.186 % 88.003 ± 0.206 %
|
| 299 |
+
99 7.8951 ± 0.1438 0.04598 ± 0.00402 0.11339 ± 0.00238 10.875 ± 0.185 % 87.974 ± 0.205 %
|
| 300 |
+
100 7.9249 ± 0.1436 0.04486 ± 0.00399 0.11323 ± 0.00237 10.845 ± 0.183 % 87.984 ± 0.204 %
|
| 301 |
+
101 7.9557 ± 0.1436 0.04390 ± 0.00397 0.11360 ± 0.00242 10.844 ± 0.183 % 87.987 ± 0.203 %
|
| 302 |
+
102 8.0226 ± 0.1447 0.04438 ± 0.00396 0.11361 ± 0.00240 10.828 ± 0.182 % 87.955 ± 0.202 %
|
| 303 |
+
103 7.9938 ± 0.1436 0.04416 ± 0.00393 0.11305 ± 0.00238 10.798 ± 0.180 % 87.995 ± 0.201 %
|
| 304 |
+
104 7.9355 ± 0.1417 0.04434 ± 0.00392 0.11342 ± 0.00238 10.838 ± 0.179 % 88.009 ± 0.199 %
|
| 305 |
+
105 7.8255 ± 0.1388 0.04490 ± 0.00391 0.11335 ± 0.00236 10.848 ± 0.178 % 88.082 ± 0.198 %
|
| 306 |
+
106 7.6952 ± 0.1356 0.04540 ± 0.00391 0.11304 ± 0.00236 10.858 ± 0.178 % 88.172 ± 0.196 %
|
| 307 |
+
107 7.7525 ± 0.1359 0.04509 ± 0.00387 0.11239 ± 0.00234 10.821 ± 0.177 % 88.202 ± 0.195 %
|
| 308 |
+
108 7.7633 ± 0.1354 0.04470 ± 0.00385 0.11206 ± 0.00232 10.811 ± 0.176 % 88.192 ± 0.194 %
|
| 309 |
+
109 7.7833 ± 0.1352 0.04456 ± 0.00382 0.11178 ± 0.00230 10.783 ± 0.175 % 88.196 ± 0.194 %
|
| 310 |
+
110 7.8187 ± 0.1353 0.04440 ± 0.00380 0.11134 ± 0.00228 10.757 ± 0.174 % 88.168 ± 0.193 %
|
| 311 |
+
111 7.8657 ± 0.1354 0.04409 ± 0.00377 0.11085 ± 0.00226 10.722 ± 0.173 % 88.147 ± 0.192 %
|
| 312 |
+
112 7.8713 ± 0.1348 0.04346 ± 0.00374 0.11029 ± 0.00224 10.690 ± 0.172 % 88.151 ± 0.191 %
|
| 313 |
+
113 7.8813 ± 0.1342 0.04336 ± 0.00372 0.10970 ± 0.00223 10.655 ± 0.171 % 88.169 ± 0.190 %
|
| 314 |
+
114 7.8959 ± 0.1340 0.04286 ± 0.00370 0.10926 ± 0.00221 10.619 ± 0.170 % 88.184 ± 0.189 %
|
| 315 |
+
115 7.8855 ± 0.1332 0.04390 ± 0.00369 0.10986 ± 0.00221 10.654 ± 0.170 % 88.160 ± 0.189 %
|
| 316 |
+
116 7.8888 ± 0.1326 0.04570 ± 0.00371 0.11279 ± 0.00224 10.878 ± 0.170 % 88.056 ± 0.189 %
|
| 317 |
+
117 7.8132 ± 0.1306 0.04964 ± 0.00375 0.11620 ± 0.00228 11.158 ± 0.171 % 88.004 ± 0.188 %
|
| 318 |
+
118 7.7322 ± 0.1284 0.05208 ± 0.00378 0.11935 ± 0.00231 11.395 ± 0.171 % 87.933 ± 0.188 %
|
| 319 |
+
119 7.6522 ± 0.1262 0.05513 ± 0.00383 0.12260 ± 0.00235 11.702 ± 0.173 % 87.886 ± 0.187 %
|
| 320 |
+
120 7.5902 ± 0.1245 0.05834 ± 0.00390 0.12673 ± 0.00242 11.976 ± 0.173 % 87.791 ± 0.187 %
|
| 321 |
+
121 7.5275 ± 0.1227 0.06168 ± 0.00394 0.13043 ± 0.00247 12.257 ± 0.175 % 87.704 ± 0.187 %
|
| 322 |
+
|
| 323 |
+
====== Perplexity statistics ======
|
| 324 |
+
Mean PPL(Q) : 7.527545 ± 0.122738
|
| 325 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 326 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 97.06%
|
| 327 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.061685 ± 0.003937
|
| 328 |
+
Mean PPL(Q)/PPL(base) : 1.063627 ± 0.004188
|
| 329 |
+
Mean PPL(Q)-PPL(base) : 0.450305 ± 0.029935
|
| 330 |
+
|
| 331 |
+
====== KL divergence statistics ======
|
| 332 |
+
Mean KLD: 0.130427 ± 0.002466
|
| 333 |
+
Maximum KLD: 15.723516
|
| 334 |
+
99.9% KLD: 5.405243
|
| 335 |
+
99.0% KLD: 2.102021
|
| 336 |
+
95.0% KLD: 0.508409
|
| 337 |
+
90.0% KLD: 0.237711
|
| 338 |
+
Median KLD: 0.028882
|
| 339 |
+
10.0% KLD: 0.000148
|
| 340 |
+
5.0% KLD: 0.000024
|
| 341 |
+
1.0% KLD: 0.000001
|
| 342 |
+
0.1% KLD: -0.000001
|
| 343 |
+
Minimum KLD: -0.000004
|
| 344 |
+
|
| 345 |
+
====== Token probability statistics ======
|
| 346 |
+
Mean Δp: -1.373 ± 0.069 %
|
| 347 |
+
Maximum Δp: 99.654%
|
| 348 |
+
99.9% Δp: 71.001%
|
| 349 |
+
99.0% Δp: 27.309%
|
| 350 |
+
95.0% Δp: 9.813%
|
| 351 |
+
90.0% Δp: 5.009%
|
| 352 |
+
75.0% Δp: 0.628%
|
| 353 |
+
Median Δp: -0.002%
|
| 354 |
+
25.0% Δp: -1.251%
|
| 355 |
+
10.0% Δp: -7.498%
|
| 356 |
+
5.0% Δp: -16.279%
|
| 357 |
+
1.0% Δp: -60.177%
|
| 358 |
+
0.1% Δp: -93.782%
|
| 359 |
+
Minimum Δp: -99.858%
|
| 360 |
+
RMS Δp : 12.257 ± 0.175 %
|
| 361 |
+
Same top p: 87.704 ± 0.187 %
|
| 362 |
+
|
| 363 |
+
llama_perf_context_print: load time = 35553.43 ms
|
| 364 |
+
llama_perf_context_print: prompt eval time = 73992.73 ms / 61952 tokens ( 1.19 ms per token, 837.27 tokens per second)
|
| 365 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 366 |
+
llama_perf_context_print: total time = 86623.74 ms / 61953 tokens
|
| 367 |
+
llama_perf_context_print: graphs reused = 0
|
| 368 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1253 + (22460 = 20657 + 224 + 1578) + 420 |
|
| 370 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1099 + (22581 = 18639 + 768 + 3174) + 454 |
|
| 371 |
+
llama_memory_breakdown_print: | - Host | 96190 = 96086 + 0 + 104 |
|
| 372 |
+
```
|
kld_data/unsloth/UD-Q4_K_XL/MiniMax-M2.5-UD-Q4_K_XL.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-Q4_K_XL (unsloth)
|
| 2 |
+
|
| 3 |
+
122.30 GiB (4.59 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q4_K_XL/MiniMax-M2.5-UD-Q4_K_XL-00001-of-00004.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 65461 used, -41589 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 65001 used, -41129 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 130463 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 84767 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37744 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 8322 MiB used, 15549 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1491 MiB used, 22379 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 11 layers ( 1 overflowing), 22506 MiB used, 1365 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 52 layers (44 overflowing), 22729 MiB used, 1142 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.04 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 3 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q4_K_XL/MiniMax-M2.5-UD-Q4_K_XL-00001-of-00004.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 15
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 4
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 386 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 39 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 11 tensors
|
| 89 |
+
print_info: file format = GGUF V3 (latest)
|
| 90 |
+
print_info: file type = Q4_K - Medium
|
| 91 |
+
print_info: file size = 122.30 GiB (4.59 BPW)
|
| 92 |
+
load: 0 unused tokens
|
| 93 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 94 |
+
load: printing all EOG tokens:
|
| 95 |
+
load: - 200004 ('<fim_pad>')
|
| 96 |
+
load: - 200005 ('<reponame>')
|
| 97 |
+
load: - 200020 ('[e~[')
|
| 98 |
+
load: special tokens cache size = 54
|
| 99 |
+
load: token to piece cache size = 1.3355 MB
|
| 100 |
+
print_info: arch = minimax-m2
|
| 101 |
+
print_info: vocab_only = 0
|
| 102 |
+
print_info: no_alloc = 0
|
| 103 |
+
print_info: n_ctx_train = 196608
|
| 104 |
+
print_info: n_embd = 3072
|
| 105 |
+
print_info: n_embd_inp = 3072
|
| 106 |
+
print_info: n_layer = 62
|
| 107 |
+
print_info: n_head = 48
|
| 108 |
+
print_info: n_head_kv = 8
|
| 109 |
+
print_info: n_rot = 64
|
| 110 |
+
print_info: n_swa = 0
|
| 111 |
+
print_info: is_swa_any = 0
|
| 112 |
+
print_info: n_embd_head_k = 128
|
| 113 |
+
print_info: n_embd_head_v = 128
|
| 114 |
+
print_info: n_gqa = 6
|
| 115 |
+
print_info: n_embd_k_gqa = 1024
|
| 116 |
+
print_info: n_embd_v_gqa = 1024
|
| 117 |
+
print_info: f_norm_eps = 0.0e+00
|
| 118 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 119 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 120 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 121 |
+
print_info: f_logit_scale = 0.0e+00
|
| 122 |
+
print_info: f_attn_scale = 0.0e+00
|
| 123 |
+
print_info: n_ff = 1536
|
| 124 |
+
print_info: n_expert = 256
|
| 125 |
+
print_info: n_expert_used = 8
|
| 126 |
+
print_info: n_expert_groups = 0
|
| 127 |
+
print_info: n_group_used = 0
|
| 128 |
+
print_info: causal attn = 1
|
| 129 |
+
print_info: pooling type = 0
|
| 130 |
+
print_info: rope type = 2
|
| 131 |
+
print_info: rope scaling = linear
|
| 132 |
+
print_info: freq_base_train = 5000000.0
|
| 133 |
+
print_info: freq_scale_train = 1
|
| 134 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 135 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 136 |
+
print_info: rope_finetuned = unknown
|
| 137 |
+
print_info: model type = 230B.A10B
|
| 138 |
+
print_info: model params = 228.69 B
|
| 139 |
+
print_info: general.name = Minimax-M2.5
|
| 140 |
+
print_info: vocab type = BPE
|
| 141 |
+
print_info: n_vocab = 200064
|
| 142 |
+
print_info: n_merges = 199744
|
| 143 |
+
print_info: BOS token = 200034 ']~!b['
|
| 144 |
+
print_info: EOS token = 200020 '[e~['
|
| 145 |
+
print_info: UNK token = 200021 ']!d~['
|
| 146 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 147 |
+
print_info: LF token = 10 'Ċ'
|
| 148 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 149 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 150 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 151 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 152 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 153 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 154 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 155 |
+
print_info: EOG token = 200020 '[e~['
|
| 156 |
+
print_info: max token length = 256
|
| 157 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 158 |
+
load_tensors: offloading output layer to GPU
|
| 159 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 160 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 46786.77 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 47146.57 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 30813.21 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 20838.29 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 18739.39 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 176.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 816.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 1492.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 181 (with bs=4096), 95 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 22.71 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 6.71 seconds per pass - ETA 1.68 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.4038 ± 1.2361 0.00482 ± 0.01802 0.02480 ± 0.00272 5.585 ± 0.692 % 93.333 ± 1.565 %
|
| 201 |
+
2 4.7486 ± 0.5749 0.01563 ± 0.01122 0.01880 ± 0.00161 4.465 ± 0.453 % 95.098 ± 0.957 %
|
| 202 |
+
3 4.5280 ± 0.4504 0.00880 ± 0.01013 0.02254 ± 0.00197 5.391 ± 0.519 % 94.641 ± 0.815 %
|
| 203 |
+
4 5.1363 ± 0.4557 0.00155 ± 0.00964 0.02667 ± 0.00245 5.585 ± 0.504 % 94.314 ± 0.725 %
|
| 204 |
+
5 4.9339 ± 0.3921 0.00790 ± 0.01016 0.02835 ± 0.00300 5.772 ± 0.567 % 94.039 ± 0.663 %
|
| 205 |
+
6 6.0092 ± 0.4687 0.00210 ± 0.00961 0.03244 ± 0.00263 5.635 ± 0.490 % 93.464 ± 0.632 %
|
| 206 |
+
7 5.5913 ± 0.3907 0.00188 ± 0.00936 0.03649 ± 0.00267 6.299 ± 0.475 % 93.389 ± 0.588 %
|
| 207 |
+
8 6.3433 ± 0.4220 0.00455 ± 0.00855 0.03593 ± 0.00236 6.046 ± 0.434 % 92.990 ± 0.565 %
|
| 208 |
+
9 6.2238 ± 0.3858 0.00481 ± 0.00783 0.03444 ± 0.00212 5.842 ± 0.401 % 93.028 ± 0.532 %
|
| 209 |
+
10 5.6969 ± 0.3288 0.00540 ± 0.00737 0.03362 ± 0.00195 5.953 ± 0.364 % 92.941 ± 0.507 %
|
| 210 |
+
11 6.2484 ± 0.3493 0.00599 ± 0.00705 0.03397 ± 0.00179 5.869 ± 0.337 % 92.834 ± 0.487 %
|
| 211 |
+
12 6.9396 ± 0.3769 0.00942 ± 0.00672 0.03505 ± 0.00181 5.716 ± 0.318 % 92.451 ± 0.478 %
|
| 212 |
+
13 7.2121 ± 0.3731 0.01066 ± 0.00631 0.03395 ± 0.00168 5.581 ± 0.301 % 92.609 ± 0.454 %
|
| 213 |
+
14 7.7788 ± 0.3922 0.01105 ± 0.00616 0.03401 ± 0.00159 5.489 ± 0.285 % 92.353 ± 0.445 %
|
| 214 |
+
15 8.1538 ± 0.3977 0.01201 ± 0.00586 0.03377 ± 0.00149 5.446 ± 0.271 % 92.105 ± 0.436 %
|
| 215 |
+
16 8.3944 ± 0.3958 0.00889 ± 0.00565 0.03358 ± 0.00148 5.361 ± 0.259 % 92.132 ± 0.422 %
|
| 216 |
+
17 8.6273 ± 0.3974 0.00914 ± 0.00556 0.03460 ± 0.00144 5.322 ± 0.251 % 92.157 ± 0.408 %
|
| 217 |
+
18 8.1316 ± 0.3614 0.00920 ± 0.00541 0.03471 ± 0.00141 5.283 ± 0.241 % 92.353 ± 0.392 %
|
| 218 |
+
19 8.2399 ± 0.3557 0.00662 ± 0.00524 0.03391 ± 0.00134 5.240 ± 0.232 % 92.363 ± 0.382 %
|
| 219 |
+
20 8.2819 ± 0.3483 0.00384 ± 0.00522 0.03546 ± 0.00139 5.277 ± 0.223 % 92.176 ± 0.376 %
|
| 220 |
+
21 8.2562 ± 0.3386 0.00329 ± 0.00507 0.03531 ± 0.00134 5.346 ± 0.219 % 92.288 ± 0.365 %
|
| 221 |
+
22 8.5849 ± 0.3476 0.00467 ± 0.00495 0.03609 ± 0.00130 5.394 ± 0.214 % 92.103 ± 0.360 %
|
| 222 |
+
23 8.5826 ± 0.3408 0.00318 ± 0.00506 0.03840 ± 0.00142 5.516 ± 0.209 % 92.055 ± 0.353 %
|
| 223 |
+
24 8.9816 ± 0.3510 0.00256 ± 0.00490 0.03802 ± 0.00136 5.439 ± 0.203 % 91.928 ± 0.348 %
|
| 224 |
+
25 8.9589 ± 0.3435 0.00142 ± 0.00483 0.03912 ± 0.00135 5.603 ± 0.209 % 91.718 ± 0.345 %
|
| 225 |
+
26 8.3693 ± 0.3116 0.00195 ± 0.00492 0.04283 ± 0.00147 6.340 ± 0.218 % 91.659 ± 0.340 %
|
| 226 |
+
27 7.9681 ± 0.2886 0.00675 ± 0.00516 0.05165 ± 0.00211 7.512 ± 0.282 % 91.431 ± 0.337 %
|
| 227 |
+
28 8.0942 ± 0.2886 0.00878 ± 0.00509 0.05186 ± 0.00204 7.476 ± 0.276 % 91.246 ± 0.334 %
|
| 228 |
+
29 8.0202 ± 0.2808 0.00907 ± 0.00498 0.05135 ± 0.00198 7.397 ± 0.270 % 91.359 ± 0.327 %
|
| 229 |
+
30 7.5013 ± 0.2560 0.00939 ± 0.00486 0.05006 ± 0.00192 7.335 ± 0.264 % 91.608 ± 0.317 %
|
| 230 |
+
31 7.0710 ± 0.2353 0.00967 ± 0.00475 0.04930 ± 0.00189 7.286 ± 0.260 % 91.853 ± 0.308 %
|
| 231 |
+
32 6.8914 ± 0.2238 0.00918 ± 0.00464 0.04853 ± 0.00183 7.254 ± 0.254 % 91.949 ± 0.301 %
|
| 232 |
+
33 6.7634 ± 0.2149 0.01034 ± 0.00454 0.04787 ± 0.00178 7.213 ± 0.248 % 91.979 ± 0.296 %
|
| 233 |
+
34 6.9455 ± 0.2185 0.00987 ± 0.00452 0.04858 ± 0.00174 7.197 ± 0.242 % 91.845 ± 0.294 %
|
| 234 |
+
35 7.0643 ± 0.2212 0.01219 ± 0.00453 0.05012 ± 0.00174 7.343 ± 0.240 % 91.630 ± 0.293 %
|
| 235 |
+
36 7.1232 ± 0.2207 0.01163 ± 0.00445 0.04989 ± 0.00170 7.304 ± 0.235 % 91.536 ± 0.291 %
|
| 236 |
+
37 7.1433 ± 0.2185 0.01216 ± 0.00441 0.05147 ± 0.00178 7.441 ± 0.236 % 91.489 ± 0.287 %
|
| 237 |
+
38 7.3562 ± 0.2231 0.01179 ± 0.00433 0.05123 ± 0.00173 7.411 ± 0.232 % 91.517 ± 0.283 %
|
| 238 |
+
39 7.3060 ± 0.2182 0.01188 ± 0.00431 0.05273 ± 0.00177 7.602 ± 0.234 % 91.443 ± 0.281 %
|
| 239 |
+
40 7.0717 ± 0.2071 0.01421 ± 0.00443 0.05813 ± 0.00195 8.177 ± 0.241 % 91.284 ± 0.279 %
|
| 240 |
+
41 6.8885 ± 0.1980 0.01878 ± 0.00462 0.06458 ± 0.00226 8.898 ± 0.259 % 91.095 ± 0.279 %
|
| 241 |
+
42 6.6899 ± 0.1889 0.02086 ± 0.00469 0.06842 ± 0.00233 9.312 ± 0.262 % 91.027 ± 0.276 %
|
| 242 |
+
43 6.5052 ± 0.1805 0.02400 ± 0.00479 0.07263 ± 0.00251 9.668 ± 0.267 % 90.999 ± 0.273 %
|
| 243 |
+
44 6.4511 ± 0.1761 0.02197 ± 0.00470 0.07151 ± 0.00245 9.580 ± 0.263 % 91.096 ± 0.269 %
|
| 244 |
+
45 6.5903 ± 0.1789 0.02142 ± 0.00464 0.07123 ± 0.00240 9.511 ± 0.259 % 91.085 ± 0.266 %
|
| 245 |
+
46 6.7324 ± 0.1811 0.02069 ± 0.00456 0.07058 ± 0.00235 9.437 ± 0.256 % 91.066 ± 0.263 %
|
| 246 |
+
47 6.8806 ± 0.1835 0.02025 ± 0.00448 0.06962 ± 0.00230 9.350 ± 0.253 % 91.039 ± 0.261 %
|
| 247 |
+
48 6.7628 ± 0.1775 0.01963 ± 0.00439 0.06858 ± 0.00226 9.267 ± 0.250 % 91.062 ± 0.258 %
|
| 248 |
+
49 6.8392 ± 0.1774 0.01585 ± 0.00448 0.07339 ± 0.00295 9.371 ± 0.251 % 90.892 ± 0.257 %
|
| 249 |
+
50 6.9399 ± 0.1792 0.01632 ± 0.00442 0.07297 ± 0.00290 9.319 ± 0.248 % 90.878 ± 0.255 %
|
| 250 |
+
51 7.0477 ± 0.1805 0.01633 ± 0.00435 0.07205 ± 0.00284 9.246 ± 0.245 % 90.957 ± 0.251 %
|
| 251 |
+
52 7.1131 ± 0.1803 0.01626 ± 0.00430 0.07208 ± 0.00279 9.204 ± 0.242 % 90.928 ± 0.249 %
|
| 252 |
+
53 7.2276 ± 0.1816 0.01643 ± 0.00424 0.07149 ± 0.00274 9.171 ± 0.239 % 90.958 ± 0.247 %
|
| 253 |
+
54 7.2806 ± 0.1807 0.01597 ± 0.00418 0.07062 ± 0.00269 9.101 ± 0.237 % 90.959 ± 0.244 %
|
| 254 |
+
55 7.3348 ± 0.1802 0.01636 ± 0.00411 0.06986 ± 0.00264 9.038 ± 0.234 % 90.973 ± 0.242 %
|
| 255 |
+
56 7.3762 ± 0.1796 0.01626 ± 0.00405 0.06908 ± 0.00260 8.974 ± 0.231 % 91.015 ± 0.239 %
|
| 256 |
+
57 7.3776 ± 0.1780 0.01650 ± 0.00402 0.06916 ± 0.00256 8.959 ± 0.229 % 90.960 ± 0.238 %
|
| 257 |
+
58 7.3861 ± 0.1766 0.01629 ± 0.00398 0.06882 ± 0.00252 8.946 ± 0.226 % 90.953 ± 0.236 %
|
| 258 |
+
59 7.3483 ± 0.1740 0.01628 ± 0.00392 0.06788 ± 0.00248 8.882 ± 0.224 % 91.034 ± 0.233 %
|
| 259 |
+
60 7.3619 ± 0.1729 0.01664 ± 0.00388 0.06765 ± 0.00245 8.843 ± 0.222 % 91.000 ± 0.231 %
|
| 260 |
+
61 7.4088 ± 0.1726 0.01636 ± 0.00384 0.06725 ± 0.00242 8.790 ± 0.219 % 91.025 ± 0.229 %
|
| 261 |
+
62 7.3751 ± 0.1706 0.01507 ± 0.00382 0.06674 ± 0.00238 8.747 ± 0.217 % 91.101 ± 0.226 %
|
| 262 |
+
63 7.4196 ± 0.1707 0.01476 ± 0.00379 0.06653 ± 0.00235 8.704 ± 0.215 % 91.111 ± 0.225 %
|
| 263 |
+
64 7.3996 ± 0.1685 0.01464 ± 0.00375 0.06620 ± 0.00231 8.664 ± 0.213 % 91.103 ± 0.223 %
|
| 264 |
+
65 7.3921 ± 0.1671 0.01480 ± 0.00372 0.06585 ± 0.00228 8.619 ± 0.211 % 91.125 ± 0.221 %
|
| 265 |
+
66 7.4351 ± 0.1671 0.01559 ± 0.00369 0.06564 ± 0.00224 8.583 ± 0.208 % 91.117 ± 0.219 %
|
| 266 |
+
67 7.4406 ± 0.1661 0.01539 ± 0.00365 0.06567 ± 0.00222 8.571 ± 0.207 % 91.121 ± 0.218 %
|
| 267 |
+
68 7.3975 ± 0.1637 0.01555 ± 0.00362 0.06509 ± 0.00219 8.521 ± 0.205 % 91.165 ± 0.216 %
|
| 268 |
+
69 7.4286 ± 0.1633 0.01530 ± 0.00359 0.06474 ± 0.00216 8.478 ± 0.204 % 91.219 ± 0.213 %
|
| 269 |
+
70 7.3960 ± 0.1611 0.01469 ± 0.00355 0.06438 ± 0.00213 8.452 ± 0.202 % 91.255 ± 0.211 %
|
| 270 |
+
71 7.3794 ± 0.1596 0.01443 ± 0.00351 0.06394 ± 0.00210 8.409 ± 0.200 % 91.301 ± 0.209 %
|
| 271 |
+
72 7.3982 ± 0.1591 0.01479 ± 0.00352 0.06376 ± 0.00208 8.377 ± 0.198 % 91.253 ± 0.209 %
|
| 272 |
+
73 7.4075 ± 0.1581 0.01531 ± 0.00349 0.06349 ± 0.00205 8.348 ± 0.196 % 91.238 ± 0.207 %
|
| 273 |
+
74 7.3985 ± 0.1567 0.01492 ± 0.00346 0.06342 ± 0.00202 8.321 ± 0.195 % 91.219 ± 0.206 %
|
| 274 |
+
75 7.4025 ± 0.1558 0.01455 ± 0.00344 0.06339 ± 0.00200 8.327 ± 0.192 % 91.200 ± 0.205 %
|
| 275 |
+
76 7.4658 ± 0.1563 0.01458 ± 0.00341 0.06329 ± 0.00198 8.313 ± 0.191 % 91.171 ± 0.204 %
|
| 276 |
+
77 7.4624 ± 0.1552 0.01440 ± 0.00338 0.06301 ± 0.00195 8.267 ± 0.189 % 91.179 ± 0.202 %
|
| 277 |
+
78 7.4741 ± 0.1545 0.01413 ± 0.00335 0.06286 ± 0.00193 8.255 ± 0.188 % 91.181 ± 0.201 %
|
| 278 |
+
79 7.4832 ± 0.1538 0.01390 ± 0.00333 0.06264 ± 0.00191 8.222 ± 0.186 % 91.214 ± 0.199 %
|
| 279 |
+
80 7.4867 ± 0.1534 0.01412 ± 0.00333 0.06265 ± 0.00189 8.196 ± 0.185 % 91.221 ± 0.198 %
|
| 280 |
+
81 7.4600 ± 0.1519 0.01364 ± 0.00330 0.06253 ± 0.00187 8.168 ± 0.183 % 91.247 ± 0.197 %
|
| 281 |
+
82 7.4428 ± 0.1504 0.01426 ± 0.00327 0.06218 ± 0.00185 8.136 ± 0.182 % 91.282 ± 0.195 %
|
| 282 |
+
83 7.4739 ± 0.1500 0.01411 ± 0.00324 0.06176 ± 0.00183 8.098 ± 0.180 % 91.311 ± 0.194 %
|
| 283 |
+
84 7.4901 ± 0.1491 0.01410 ± 0.00321 0.06132 ± 0.00181 8.062 ± 0.179 % 91.312 ± 0.192 %
|
| 284 |
+
85 7.4867 ± 0.1479 0.01420 ± 0.00317 0.06084 ± 0.00179 8.025 ± 0.178 % 91.345 ± 0.191 %
|
| 285 |
+
86 7.4194 ± 0.1452 0.01421 ± 0.00314 0.06043 ± 0.00177 7.996 ± 0.176 % 91.336 ± 0.190 %
|
| 286 |
+
87 7.3609 ± 0.1428 0.01409 ± 0.00311 0.06002 ± 0.00175 7.964 ± 0.175 % 91.364 ± 0.189 %
|
| 287 |
+
88 7.3020 ± 0.1405 0.01439 ± 0.00308 0.05963 ± 0.00173 7.936 ± 0.174 % 91.395 ± 0.187 %
|
| 288 |
+
89 7.2299 ± 0.1378 0.01444 ± 0.00306 0.05932 ± 0.00171 7.911 ± 0.172 % 91.421 ± 0.186 %
|
| 289 |
+
90 7.1739 ± 0.1356 0.01438 ± 0.00303 0.05902 ± 0.00169 7.883 ± 0.171 % 91.451 ± 0.185 %
|
| 290 |
+
91 7.1209 ± 0.1335 0.01406 ± 0.00301 0.05863 ± 0.00167 7.853 ± 0.170 % 91.493 ± 0.183 %
|
| 291 |
+
92 7.0635 ± 0.1313 0.01403 ± 0.00298 0.05827 ± 0.00166 7.826 ± 0.169 % 91.500 ± 0.182 %
|
| 292 |
+
93 7.0804 ± 0.1311 0.01393 ± 0.00297 0.05847 ± 0.00165 7.818 ± 0.167 % 91.465 ± 0.181 %
|
| 293 |
+
94 7.1106 ± 0.1309 0.01385 ± 0.00295 0.05806 ± 0.00163 7.785 ± 0.166 % 91.481 ± 0.180 %
|
| 294 |
+
95 7.2204 ± 0.1326 0.01398 ± 0.00293 0.05789 ± 0.00162 7.757 ± 0.165 % 91.455 ± 0.180 %
|
| 295 |
+
96 7.3106 ± 0.1337 0.01343 ± 0.00291 0.05781 ± 0.00160 7.731 ± 0.164 % 91.426 ± 0.179 %
|
| 296 |
+
97 7.3876 ± 0.1345 0.01325 ± 0.00289 0.05752 ± 0.00158 7.699 ± 0.163 % 91.381 ± 0.178 %
|
| 297 |
+
98 7.5270 ± 0.1370 0.01379 ± 0.00288 0.05730 ± 0.00157 7.669 ± 0.162 % 91.345 ± 0.178 %
|
| 298 |
+
99 7.6463 ± 0.1387 0.01396 ± 0.00286 0.05715 ± 0.00155 7.638 ± 0.161 % 91.301 ± 0.177 %
|
| 299 |
+
100 7.6822 ± 0.1388 0.01375 ± 0.00284 0.05721 ± 0.00155 7.619 ± 0.160 % 91.275 ± 0.177 %
|
| 300 |
+
101 7.7147 ± 0.1388 0.01314 ± 0.00282 0.05736 ± 0.00157 7.629 ± 0.160 % 91.279 ± 0.176 %
|
| 301 |
+
102 7.7762 ± 0.1398 0.01318 ± 0.00281 0.05733 ± 0.00156 7.625 ± 0.159 % 91.269 ± 0.175 %
|
| 302 |
+
103 7.7473 ± 0.1387 0.01283 ± 0.00279 0.05703 ± 0.00154 7.615 ± 0.158 % 91.281 ± 0.174 %
|
| 303 |
+
104 7.6912 ± 0.1369 0.01307 ± 0.00278 0.05719 ± 0.00154 7.664 ± 0.158 % 91.301 ± 0.173 %
|
| 304 |
+
105 7.5801 ± 0.1340 0.01304 ± 0.00278 0.05723 ± 0.00153 7.698 ± 0.158 % 91.331 ± 0.172 %
|
| 305 |
+
106 7.4503 ± 0.1308 0.01307 ± 0.00277 0.05713 ± 0.00153 7.734 ± 0.159 % 91.391 ± 0.171 %
|
| 306 |
+
107 7.5076 ± 0.1311 0.01299 ± 0.00275 0.05678 ± 0.00151 7.702 ± 0.158 % 91.395 ± 0.170 %
|
| 307 |
+
108 7.5210 ± 0.1307 0.01299 ± 0.00273 0.05647 ± 0.00150 7.676 ± 0.157 % 91.427 ± 0.169 %
|
| 308 |
+
109 7.5427 ± 0.1306 0.01315 ± 0.00271 0.05630 ± 0.00149 7.655 ± 0.156 % 91.430 ± 0.168 %
|
| 309 |
+
110 7.5781 ± 0.1306 0.01315 ± 0.00269 0.05605 ± 0.00147 7.633 ± 0.155 % 91.444 ± 0.167 %
|
| 310 |
+
111 7.6259 ± 0.1308 0.01312 ± 0.00268 0.05582 ± 0.00146 7.608 ± 0.154 % 91.440 ± 0.166 %
|
| 311 |
+
112 7.6328 ± 0.1302 0.01268 ± 0.00266 0.05556 ± 0.00145 7.586 ± 0.153 % 91.453 ± 0.165 %
|
| 312 |
+
113 7.6417 ± 0.1296 0.01247 ± 0.00264 0.05532 ± 0.00144 7.565 ± 0.153 % 91.466 ± 0.165 %
|
| 313 |
+
114 7.6577 ± 0.1294 0.01223 ± 0.00263 0.05512 ± 0.00142 7.539 ± 0.152 % 91.465 ± 0.164 %
|
| 314 |
+
115 7.6430 ± 0.1286 0.01265 ± 0.00262 0.05524 ± 0.00142 7.560 ± 0.151 % 91.482 ± 0.163 %
|
| 315 |
+
116 7.6471 ± 0.1281 0.01459 ± 0.00265 0.05688 ± 0.00143 7.737 ± 0.152 % 91.393 ± 0.163 %
|
| 316 |
+
117 7.5578 ± 0.1258 0.01640 ± 0.00267 0.05841 ± 0.00146 7.900 ± 0.151 % 91.359 ± 0.163 %
|
| 317 |
+
118 7.4693 ± 0.1235 0.01749 ± 0.00268 0.05968 ± 0.00147 8.047 ± 0.152 % 91.343 ± 0.162 %
|
| 318 |
+
119 7.3760 ± 0.1212 0.01837 ± 0.00268 0.06073 ± 0.00147 8.205 ± 0.152 % 91.346 ± 0.161 %
|
| 319 |
+
120 7.3022 ± 0.1192 0.01966 ± 0.00270 0.06240 ± 0.00149 8.438 ± 0.153 % 91.288 ± 0.161 %
|
| 320 |
+
121 7.2261 ± 0.1173 0.02082 ± 0.00271 0.06396 ± 0.00151 8.666 ± 0.154 % 91.233 ± 0.161 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 7.226141 ± 0.117342
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 98.60%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.020821 ± 0.002714
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.021039 ± 0.002772
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 0.148901 ± 0.019641
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.063959 ± 0.001510
|
| 332 |
+
Maximum KLD: 16.526793
|
| 333 |
+
99.9% KLD: 3.390165
|
| 334 |
+
99.0% KLD: 0.990056
|
| 335 |
+
95.0% KLD: 0.230491
|
| 336 |
+
90.0% KLD: 0.111804
|
| 337 |
+
Median KLD: 0.014112
|
| 338 |
+
10.0% KLD: 0.000065
|
| 339 |
+
5.0% KLD: 0.000011
|
| 340 |
+
1.0% KLD: 0.000000
|
| 341 |
+
0.1% KLD: -0.000002
|
| 342 |
+
Minimum KLD: -0.000004
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -0.525 ± 0.049 %
|
| 346 |
+
Maximum Δp: 97.605%
|
| 347 |
+
99.9% Δp: 66.834%
|
| 348 |
+
99.0% Δp: 22.195%
|
| 349 |
+
95.0% Δp: 7.260%
|
| 350 |
+
90.0% Δp: 3.683%
|
| 351 |
+
75.0% Δp: 0.444%
|
| 352 |
+
Median Δp: -0.001%
|
| 353 |
+
25.0% Δp: -0.822%
|
| 354 |
+
10.0% Δp: -4.746%
|
| 355 |
+
5.0% Δp: -9.491%
|
| 356 |
+
1.0% Δp: -33.469%
|
| 357 |
+
0.1% Δp: -80.314%
|
| 358 |
+
Minimum Δp: -99.345%
|
| 359 |
+
RMS Δp : 8.666 ± 0.154 %
|
| 360 |
+
Same top p: 91.233 ± 0.161 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 53103.25 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 89730.89 ms / 61952 tokens ( 1.45 ms per token, 690.42 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 103696.70 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1175 + ( 22506 = 20838 + 176 + 1491) + 453 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 953 + ( 22729 = 18739 + 816 + 3174) + 451 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 124850 = 124746 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/UD-Q5_K_XL/MiniMax-M2.5-UD-Q5_K_XL.md
ADDED
|
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-Q5_K_XL (unsloth)
|
| 2 |
+
|
| 3 |
+
150.65 GiB (5.66 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q5_K_XL/MiniMax-M2.5-UD-Q5_K_XL-00001-of-00005.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 78649 used, -54777 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 80776 used, -56905 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 159426 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 113730 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 37103 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 9178 MiB used, 14693 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 1941 MiB used, 21929 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 10 layers ( 1 overflowing), 22657 MiB used, 1214 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 53 layers (48 overflowing), 22330 MiB used, 1541 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.40 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 4 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q5_K_XL/MiniMax-M2.5-UD-Q5_K_XL-00001-of-00005.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 17
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 5
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q4_K: 20 tensors
|
| 87 |
+
llama_model_loader: - type q5_K: 312 tensors
|
| 88 |
+
llama_model_loader: - type q6_K: 104 tensors
|
| 89 |
+
print_info: file format = GGUF V3 (latest)
|
| 90 |
+
print_info: file type = Q5_K - Medium
|
| 91 |
+
print_info: file size = 150.65 GiB (5.66 BPW)
|
| 92 |
+
load: 0 unused tokens
|
| 93 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 94 |
+
load: printing all EOG tokens:
|
| 95 |
+
load: - 200004 ('<fim_pad>')
|
| 96 |
+
load: - 200005 ('<reponame>')
|
| 97 |
+
load: - 200020 ('[e~[')
|
| 98 |
+
load: special tokens cache size = 54
|
| 99 |
+
load: token to piece cache size = 1.3355 MB
|
| 100 |
+
print_info: arch = minimax-m2
|
| 101 |
+
print_info: vocab_only = 0
|
| 102 |
+
print_info: no_alloc = 0
|
| 103 |
+
print_info: n_ctx_train = 196608
|
| 104 |
+
print_info: n_embd = 3072
|
| 105 |
+
print_info: n_embd_inp = 3072
|
| 106 |
+
print_info: n_layer = 62
|
| 107 |
+
print_info: n_head = 48
|
| 108 |
+
print_info: n_head_kv = 8
|
| 109 |
+
print_info: n_rot = 64
|
| 110 |
+
print_info: n_swa = 0
|
| 111 |
+
print_info: is_swa_any = 0
|
| 112 |
+
print_info: n_embd_head_k = 128
|
| 113 |
+
print_info: n_embd_head_v = 128
|
| 114 |
+
print_info: n_gqa = 6
|
| 115 |
+
print_info: n_embd_k_gqa = 1024
|
| 116 |
+
print_info: n_embd_v_gqa = 1024
|
| 117 |
+
print_info: f_norm_eps = 0.0e+00
|
| 118 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 119 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 120 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 121 |
+
print_info: f_logit_scale = 0.0e+00
|
| 122 |
+
print_info: f_attn_scale = 0.0e+00
|
| 123 |
+
print_info: n_ff = 1536
|
| 124 |
+
print_info: n_expert = 256
|
| 125 |
+
print_info: n_expert_used = 8
|
| 126 |
+
print_info: n_expert_groups = 0
|
| 127 |
+
print_info: n_group_used = 0
|
| 128 |
+
print_info: causal attn = 1
|
| 129 |
+
print_info: pooling type = 0
|
| 130 |
+
print_info: rope type = 2
|
| 131 |
+
print_info: rope scaling = linear
|
| 132 |
+
print_info: freq_base_train = 5000000.0
|
| 133 |
+
print_info: freq_scale_train = 1
|
| 134 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 135 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 136 |
+
print_info: rope_finetuned = unknown
|
| 137 |
+
print_info: model type = 230B.A10B
|
| 138 |
+
print_info: model params = 228.69 B
|
| 139 |
+
print_info: general.name = Minimax-M2.5
|
| 140 |
+
print_info: vocab type = BPE
|
| 141 |
+
print_info: n_vocab = 200064
|
| 142 |
+
print_info: n_merges = 199744
|
| 143 |
+
print_info: BOS token = 200034 ']~!b['
|
| 144 |
+
print_info: EOS token = 200020 '[e~['
|
| 145 |
+
print_info: UNK token = 200021 ']!d~['
|
| 146 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 147 |
+
print_info: LF token = 10 'Ċ'
|
| 148 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 149 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 150 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 151 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 152 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 153 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 154 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 155 |
+
print_info: EOG token = 200020 '[e~['
|
| 156 |
+
print_info: max token length = 256
|
| 157 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 158 |
+
load_tensors: offloading output layer to GPU
|
| 159 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 160 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 46445.34 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 46831.97 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 46775.65 MiB
|
| 164 |
+
load_tensors: CPU_Mapped model buffer size = 13703.68 MiB
|
| 165 |
+
load_tensors: CUDA0 model buffer size = 20555.13 MiB
|
| 166 |
+
load_tensors: CUDA1 model buffer size = 18324.67 MiB
|
| 167 |
+
....................................................................................................
|
| 168 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 169 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 170 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 171 |
+
llama_context: constructing llama_context
|
| 172 |
+
llama_context: n_seq_max = 8
|
| 173 |
+
llama_context: n_ctx = 4096
|
| 174 |
+
llama_context: n_ctx_seq = 512
|
| 175 |
+
llama_context: n_batch = 4096
|
| 176 |
+
llama_context: n_ubatch = 4096
|
| 177 |
+
llama_context: causal_attn = 1
|
| 178 |
+
llama_context: flash_attn = enabled
|
| 179 |
+
llama_context: kv_unified = false
|
| 180 |
+
llama_context: freq_base = 5000000.0
|
| 181 |
+
llama_context: freq_scale = 1
|
| 182 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 183 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 184 |
+
llama_kv_cache: CUDA0 KV buffer size = 160.00 MiB
|
| 185 |
+
llama_kv_cache: CUDA1 KV buffer size = 832.00 MiB
|
| 186 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 187 |
+
sched_reserve: reserving ...
|
| 188 |
+
sched_reserve: CUDA0 compute buffer size = 1942.00 MiB
|
| 189 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 190 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 191 |
+
sched_reserve: graph nodes = 4099
|
| 192 |
+
sched_reserve: graph splits = 191 (with bs=4096), 99 (with bs=1)
|
| 193 |
+
sched_reserve: reserve took 22.57 ms, sched copies = 1
|
| 194 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 195 |
+
|
| 196 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 197 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 198 |
+
kl_divergence: 7.84 seconds per pass - ETA 1.97 minutes
|
| 199 |
+
|
| 200 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 201 |
+
1 6.3212 ± 1.2131 -0.00815 ± 0.01526 0.01624 ± 0.00361 4.902 ± 1.208 % 93.725 ± 1.522 %
|
| 202 |
+
2 4.6159 ± 0.5515 -0.01270 ± 0.00901 0.01152 ± 0.00190 3.806 ± 0.794 % 96.078 ± 0.860 %
|
| 203 |
+
3 4.4497 ± 0.4408 -0.00865 ± 0.00922 0.01277 ± 0.00151 4.126 ± 0.532 % 95.686 ± 0.735 %
|
| 204 |
+
4 5.0906 ± 0.4521 -0.00738 ± 0.00797 0.01565 ± 0.00168 4.575 ± 0.529 % 95.588 ± 0.643 %
|
| 205 |
+
5 4.9203 ± 0.3938 0.00515 ± 0.00918 0.01692 ± 0.00164 4.761 ± 0.494 % 95.608 ± 0.574 %
|
| 206 |
+
6 6.0368 ± 0.4735 0.00668 ± 0.00846 0.01868 ± 0.00148 4.593 ± 0.433 % 94.902 ± 0.563 %
|
| 207 |
+
7 5.6272 ± 0.3957 0.00828 ± 0.00763 0.01946 ± 0.00133 4.656 ± 0.374 % 94.846 ± 0.523 %
|
| 208 |
+
8 6.3453 ± 0.4236 0.00487 ± 0.00698 0.01900 ± 0.00117 4.498 ± 0.340 % 94.706 ± 0.496 %
|
| 209 |
+
9 6.2235 ± 0.3870 0.00477 ± 0.00633 0.01818 ± 0.00106 4.360 ± 0.315 % 94.728 ± 0.467 %
|
| 210 |
+
10 5.6915 ± 0.3291 0.00445 ± 0.00578 0.01741 ± 0.00096 4.291 ± 0.289 % 94.863 ± 0.437 %
|
| 211 |
+
11 6.2390 ± 0.3490 0.00448 ± 0.00543 0.01751 ± 0.00089 4.189 ± 0.270 % 94.724 ± 0.422 %
|
| 212 |
+
12 6.9092 ± 0.3754 0.00503 ± 0.00512 0.01765 ± 0.00083 4.074 ± 0.255 % 94.379 ± 0.416 %
|
| 213 |
+
13 7.1723 ± 0.3710 0.00512 ± 0.00479 0.01721 ± 0.00077 3.977 ± 0.241 % 94.389 ± 0.400 %
|
| 214 |
+
14 7.7261 ± 0.3893 0.00425 ± 0.00468 0.01793 ± 0.00090 3.985 ± 0.230 % 94.342 ± 0.387 %
|
| 215 |
+
15 8.0828 ± 0.3938 0.00327 ± 0.00446 0.01791 ± 0.00085 3.982 ± 0.220 % 94.353 ± 0.373 %
|
| 216 |
+
16 8.3427 ± 0.3937 0.00271 ± 0.00422 0.01749 ± 0.00080 3.917 ± 0.211 % 94.485 ± 0.357 %
|
| 217 |
+
17 8.5456 ± 0.3931 -0.00038 ± 0.00427 0.01834 ± 0.00079 3.942 ± 0.205 % 94.418 ± 0.349 %
|
| 218 |
+
18 8.0417 ± 0.3568 -0.00193 ± 0.00411 0.01827 ± 0.00076 3.945 ± 0.196 % 94.444 ± 0.338 %
|
| 219 |
+
19 8.1610 ± 0.3522 -0.00300 ± 0.00394 0.01772 ± 0.00072 3.887 ± 0.189 % 94.489 ± 0.328 %
|
| 220 |
+
20 8.2261 ± 0.3462 -0.00293 ± 0.00385 0.01850 ± 0.00072 3.924 ± 0.182 % 94.529 ± 0.318 %
|
| 221 |
+
21 8.2025 ± 0.3366 -0.00323 ± 0.00372 0.01840 ± 0.00070 3.903 ± 0.175 % 94.659 ± 0.307 %
|
| 222 |
+
22 8.5309 ± 0.3453 -0.00165 ± 0.00367 0.01894 ± 0.00068 3.919 ± 0.168 % 94.528 ± 0.304 %
|
| 223 |
+
23 8.5551 ± 0.3398 -0.00002 ± 0.00379 0.02013 ± 0.00081 4.103 ± 0.197 % 94.493 ± 0.298 %
|
| 224 |
+
24 8.9588 ± 0.3503 0.00002 ± 0.00367 0.01988 ± 0.00078 4.037 ± 0.192 % 94.444 ± 0.293 %
|
| 225 |
+
25 8.9423 ± 0.3432 -0.00043 ± 0.00359 0.02039 ± 0.00077 4.126 ± 0.189 % 94.322 ± 0.290 %
|
| 226 |
+
26 8.3648 ± 0.3119 0.00141 ± 0.00362 0.02269 ± 0.00094 4.823 ± 0.238 % 94.238 ± 0.286 %
|
| 227 |
+
27 7.9284 ± 0.2876 0.00176 ± 0.00371 0.02524 ± 0.00107 5.419 ± 0.260 % 94.205 ± 0.282 %
|
| 228 |
+
28 8.0462 ± 0.2873 0.00284 ± 0.00364 0.02539 ± 0.00104 5.378 ± 0.253 % 94.062 ± 0.280 %
|
| 229 |
+
29 7.9698 ± 0.2796 0.00277 ± 0.00355 0.02512 ± 0.00101 5.317 ± 0.247 % 94.118 ± 0.274 %
|
| 230 |
+
30 7.4549 ± 0.2548 0.00318 ± 0.00349 0.02465 ± 0.00099 5.312 ± 0.243 % 94.288 ± 0.265 %
|
| 231 |
+
31 7.0210 ± 0.2339 0.00258 ± 0.00341 0.02432 ± 0.00097 5.337 ± 0.238 % 94.396 ± 0.259 %
|
| 232 |
+
32 6.8419 ± 0.2224 0.00197 ± 0.00333 0.02392 ± 0.00094 5.306 ± 0.232 % 94.387 ± 0.255 %
|
| 233 |
+
33 6.7088 ± 0.2133 0.00223 ± 0.00325 0.02353 ± 0.00091 5.267 ± 0.227 % 94.415 ± 0.250 %
|
| 234 |
+
34 6.8992 ± 0.2172 0.00318 ± 0.00326 0.02407 ± 0.00090 5.266 ± 0.221 % 94.221 ± 0.251 %
|
| 235 |
+
35 6.9963 ± 0.2189 0.00253 ± 0.00323 0.02459 ± 0.00089 5.287 ± 0.215 % 94.118 ± 0.249 %
|
| 236 |
+
36 7.0586 ± 0.2187 0.00250 ± 0.00317 0.02438 ± 0.00086 5.247 ± 0.211 % 94.118 ± 0.246 %
|
| 237 |
+
37 7.0786 ± 0.2165 0.00305 ± 0.00315 0.02470 ± 0.00087 5.312 ± 0.207 % 94.139 ± 0.242 %
|
| 238 |
+
38 7.2909 ± 0.2211 0.00287 ± 0.00310 0.02463 ± 0.00085 5.277 ± 0.203 % 94.066 ± 0.240 %
|
| 239 |
+
39 7.2462 ± 0.2165 0.00367 ± 0.00307 0.02549 ± 0.00088 5.442 ± 0.207 % 94.037 ± 0.237 %
|
| 240 |
+
40 7.0059 ± 0.2052 0.00486 ± 0.00309 0.02726 ± 0.00092 5.713 ± 0.205 % 94.039 ± 0.234 %
|
| 241 |
+
41 6.8012 ± 0.1954 0.00603 ± 0.00314 0.02922 ± 0.00096 6.132 ± 0.210 % 93.945 ± 0.233 %
|
| 242 |
+
42 6.5898 ± 0.1858 0.00578 ± 0.00322 0.03111 ± 0.00105 6.443 ± 0.216 % 93.950 ± 0.230 %
|
| 243 |
+
43 6.3896 ± 0.1769 0.00607 ± 0.00322 0.03237 ± 0.00108 6.644 ± 0.215 % 93.935 ± 0.228 %
|
| 244 |
+
44 6.3447 ± 0.1729 0.00535 ± 0.00316 0.03187 ± 0.00106 6.583 ± 0.213 % 93.984 ± 0.224 %
|
| 245 |
+
45 6.4851 ± 0.1757 0.00535 ± 0.00315 0.03222 ± 0.00104 6.570 ± 0.209 % 93.882 ± 0.224 %
|
| 246 |
+
46 6.6255 ± 0.1779 0.00469 ± 0.00309 0.03189 ± 0.00102 6.512 ± 0.207 % 93.870 ± 0.221 %
|
| 247 |
+
47 6.7752 ± 0.1806 0.00481 ± 0.00304 0.03147 ± 0.00100 6.450 ± 0.204 % 93.859 ± 0.219 %
|
| 248 |
+
48 6.6615 ± 0.1747 0.00454 ± 0.00298 0.03101 ± 0.00098 6.401 ± 0.202 % 93.881 ± 0.217 %
|
| 249 |
+
49 6.7582 ± 0.1754 0.00393 ± 0.00310 0.03469 ± 0.00160 6.533 ± 0.208 % 93.758 ± 0.216 %
|
| 250 |
+
50 6.8521 ± 0.1769 0.00358 ± 0.00305 0.03448 ± 0.00157 6.489 ± 0.205 % 93.725 ± 0.215 %
|
| 251 |
+
51 6.9611 ± 0.1782 0.00397 ± 0.00300 0.03413 ± 0.00154 6.448 ± 0.203 % 93.779 ± 0.212 %
|
| 252 |
+
52 7.0219 ± 0.1779 0.00335 ± 0.00299 0.03425 ± 0.00151 6.422 ± 0.200 % 93.741 ± 0.210 %
|
| 253 |
+
53 7.1330 ± 0.1790 0.00327 ± 0.00295 0.03399 ± 0.00149 6.385 ± 0.197 % 93.762 ± 0.208 %
|
| 254 |
+
54 7.1890 ± 0.1784 0.00331 ± 0.00291 0.03365 ± 0.00146 6.341 ± 0.195 % 93.791 ± 0.206 %
|
| 255 |
+
55 7.2408 ± 0.1778 0.00345 ± 0.00287 0.03331 ± 0.00143 6.300 ± 0.193 % 93.761 ± 0.204 %
|
| 256 |
+
56 7.2809 ± 0.1772 0.00326 ± 0.00282 0.03297 ± 0.00141 6.255 ± 0.191 % 93.796 ± 0.202 %
|
| 257 |
+
57 7.2804 ± 0.1755 0.00323 ± 0.00281 0.03316 ± 0.00139 6.261 ± 0.189 % 93.801 ± 0.200 %
|
| 258 |
+
58 7.2949 ± 0.1744 0.00386 ± 0.00278 0.03293 ± 0.00137 6.255 ± 0.188 % 93.759 ± 0.199 %
|
| 259 |
+
59 7.2558 ± 0.1717 0.00361 ± 0.00274 0.03249 ± 0.00135 6.213 ± 0.186 % 93.825 ± 0.196 %
|
| 260 |
+
60 7.2680 ± 0.1706 0.00381 ± 0.00270 0.03235 ± 0.00133 6.196 ± 0.183 % 93.784 ± 0.195 %
|
| 261 |
+
61 7.3178 ± 0.1704 0.00401 ± 0.00267 0.03215 ± 0.00131 6.166 ± 0.181 % 93.790 ± 0.194 %
|
| 262 |
+
62 7.2949 ± 0.1688 0.00414 ± 0.00266 0.03193 ± 0.00129 6.145 ± 0.180 % 93.846 ± 0.191 %
|
| 263 |
+
63 7.3435 ± 0.1690 0.00445 ± 0.00265 0.03183 ± 0.00127 6.112 ± 0.178 % 93.844 ± 0.190 %
|
| 264 |
+
64 7.3250 ± 0.1669 0.00451 ± 0.00262 0.03167 ± 0.00125 6.080 ± 0.176 % 93.824 ± 0.188 %
|
| 265 |
+
65 7.3162 ± 0.1654 0.00447 ± 0.00261 0.03154 ± 0.00123 6.074 ± 0.174 % 93.840 ± 0.187 %
|
| 266 |
+
66 7.3529 ± 0.1651 0.00447 ± 0.00258 0.03150 ± 0.00121 6.049 ± 0.172 % 93.821 ± 0.186 %
|
| 267 |
+
67 7.3581 ± 0.1641 0.00424 ± 0.00255 0.03152 ± 0.00120 6.029 ± 0.170 % 93.784 ± 0.185 %
|
| 268 |
+
68 7.3170 ± 0.1618 0.00461 ± 0.00254 0.03127 ± 0.00118 5.995 ± 0.169 % 93.835 ± 0.183 %
|
| 269 |
+
69 7.3509 ± 0.1615 0.00478 ± 0.00253 0.03116 ± 0.00117 5.966 ± 0.167 % 93.851 ± 0.181 %
|
| 270 |
+
70 7.3205 ± 0.1593 0.00442 ± 0.00251 0.03101 ± 0.00115 5.939 ± 0.166 % 93.854 ± 0.180 %
|
| 271 |
+
71 7.3057 ± 0.1579 0.00439 ± 0.00249 0.03091 ± 0.00113 5.910 ± 0.164 % 93.869 ± 0.178 %
|
| 272 |
+
72 7.3250 ± 0.1574 0.00485 ± 0.00248 0.03088 ± 0.00112 5.888 ± 0.163 % 93.856 ± 0.177 %
|
| 273 |
+
73 7.3351 ± 0.1565 0.00550 ± 0.00246 0.03076 ± 0.00111 5.868 ± 0.161 % 93.854 ± 0.176 %
|
| 274 |
+
74 7.3276 ± 0.1552 0.00530 ± 0.00244 0.03067 ± 0.00109 5.848 ± 0.160 % 93.837 ± 0.175 %
|
| 275 |
+
75 7.3305 ± 0.1542 0.00478 ± 0.00242 0.03077 ± 0.00108 5.850 ± 0.158 % 93.825 ± 0.174 %
|
| 276 |
+
76 7.3895 ± 0.1546 0.00431 ± 0.00240 0.03070 ± 0.00107 5.846 ± 0.157 % 93.762 ± 0.174 %
|
| 277 |
+
77 7.3875 ± 0.1536 0.00430 ± 0.00240 0.03059 ± 0.00105 5.818 ± 0.156 % 93.751 ± 0.173 %
|
| 278 |
+
78 7.4054 ± 0.1531 0.00490 ± 0.00238 0.03049 ± 0.00104 5.796 ± 0.154 % 93.776 ± 0.171 %
|
| 279 |
+
79 7.4156 ± 0.1524 0.00482 ± 0.00236 0.03042 ± 0.00103 5.772 ± 0.153 % 93.800 ± 0.170 %
|
| 280 |
+
80 7.4198 ± 0.1520 0.00514 ± 0.00238 0.03055 ± 0.00102 5.776 ± 0.151 % 93.794 ± 0.169 %
|
| 281 |
+
81 7.3974 ± 0.1506 0.00521 ± 0.00236 0.03058 ± 0.00101 5.759 ± 0.150 % 93.803 ± 0.168 %
|
| 282 |
+
82 7.3775 ± 0.1491 0.00545 ± 0.00234 0.03040 ± 0.00100 5.736 ± 0.149 % 93.812 ± 0.167 %
|
| 283 |
+
83 7.4095 ± 0.1486 0.00546 ± 0.00232 0.03021 ± 0.00099 5.711 ± 0.148 % 93.796 ± 0.166 %
|
| 284 |
+
84 7.4282 ± 0.1478 0.00580 ± 0.00230 0.02999 ± 0.00098 5.686 ± 0.147 % 93.814 ± 0.165 %
|
| 285 |
+
85 7.4246 ± 0.1466 0.00586 ± 0.00227 0.02980 ± 0.00097 5.662 ± 0.145 % 93.809 ± 0.164 %
|
| 286 |
+
86 7.3589 ± 0.1440 0.00601 ± 0.00225 0.02960 ± 0.00095 5.641 ± 0.144 % 93.830 ± 0.162 %
|
| 287 |
+
87 7.2996 ± 0.1416 0.00573 ± 0.00223 0.02941 ± 0.00094 5.620 ± 0.143 % 93.843 ± 0.161 %
|
| 288 |
+
88 7.2398 ± 0.1392 0.00584 ± 0.00221 0.02924 ± 0.00093 5.609 ± 0.142 % 93.850 ± 0.160 %
|
| 289 |
+
89 7.1683 ± 0.1366 0.00588 ± 0.00219 0.02914 ± 0.00092 5.596 ± 0.141 % 93.849 ± 0.159 %
|
| 290 |
+
90 7.1144 ± 0.1344 0.00604 ± 0.00218 0.02901 ± 0.00092 5.576 ± 0.140 % 93.878 ± 0.158 %
|
| 291 |
+
91 7.0663 ± 0.1325 0.00636 ± 0.00216 0.02881 ± 0.00091 5.559 ± 0.139 % 93.898 ± 0.157 %
|
| 292 |
+
92 7.0108 ± 0.1303 0.00654 ± 0.00214 0.02868 ± 0.00090 5.557 ± 0.138 % 93.913 ± 0.156 %
|
| 293 |
+
93 7.0341 ± 0.1304 0.00737 ± 0.00215 0.02941 ± 0.00096 5.600 ± 0.141 % 93.882 ± 0.156 %
|
| 294 |
+
94 7.0652 ± 0.1302 0.00744 ± 0.00213 0.02923 ± 0.00095 5.579 ± 0.140 % 93.901 ± 0.155 %
|
| 295 |
+
95 7.1713 ± 0.1318 0.00715 ± 0.00212 0.02929 ± 0.00094 5.579 ± 0.139 % 93.866 ± 0.154 %
|
| 296 |
+
96 7.2644 ± 0.1330 0.00709 ± 0.00211 0.02926 ± 0.00093 5.559 ± 0.138 % 93.811 ± 0.154 %
|
| 297 |
+
97 7.3416 ± 0.1338 0.00700 ± 0.00209 0.02914 ± 0.00092 5.535 ± 0.138 % 93.794 ± 0.153 %
|
| 298 |
+
98 7.4784 ± 0.1361 0.00731 ± 0.00209 0.02903 ± 0.00092 5.517 ± 0.137 % 93.762 ± 0.153 %
|
| 299 |
+
99 7.5962 ± 0.1379 0.00738 ± 0.00207 0.02898 ± 0.00091 5.497 ± 0.136 % 93.694 ± 0.153 %
|
| 300 |
+
100 7.6343 ± 0.1380 0.00750 ± 0.00207 0.02909 ± 0.00090 5.497 ± 0.135 % 93.698 ± 0.152 %
|
| 301 |
+
101 7.6701 ± 0.1381 0.00734 ± 0.00205 0.02903 ± 0.00090 5.481 ± 0.134 % 93.691 ± 0.152 %
|
| 302 |
+
102 7.7294 ± 0.1390 0.00715 ± 0.00205 0.02904 ± 0.00089 5.474 ± 0.133 % 93.691 ± 0.151 %
|
| 303 |
+
103 7.7042 ± 0.1381 0.00726 ± 0.00204 0.02891 ± 0.00088 5.461 ± 0.132 % 93.710 ± 0.150 %
|
| 304 |
+
104 7.6460 ± 0.1362 0.00718 ± 0.00203 0.02908 ± 0.00088 5.537 ± 0.135 % 93.725 ± 0.149 %
|
| 305 |
+
105 7.5347 ± 0.1333 0.00703 ± 0.00202 0.02923 ± 0.00089 5.593 ± 0.137 % 93.740 ± 0.148 %
|
| 306 |
+
106 7.4055 ± 0.1301 0.00703 ± 0.00201 0.02904 ± 0.00088 5.589 ± 0.137 % 93.792 ± 0.147 %
|
| 307 |
+
107 7.4617 ± 0.1304 0.00686 ± 0.00199 0.02886 ± 0.00087 5.566 ± 0.136 % 93.777 ± 0.146 %
|
| 308 |
+
108 7.4730 ± 0.1300 0.00660 ± 0.00197 0.02870 ± 0.00086 5.547 ± 0.135 % 93.787 ± 0.145 %
|
| 309 |
+
109 7.4930 ± 0.1298 0.00655 ± 0.00196 0.02861 ± 0.00086 5.533 ± 0.134 % 93.801 ± 0.145 %
|
| 310 |
+
110 7.5288 ± 0.1298 0.00662 ± 0.00195 0.02848 ± 0.00085 5.517 ± 0.133 % 93.815 ± 0.144 %
|
| 311 |
+
111 7.5758 ± 0.1300 0.00654 ± 0.00193 0.02838 ± 0.00084 5.501 ± 0.133 % 93.789 ± 0.143 %
|
| 312 |
+
112 7.5847 ± 0.1295 0.00636 ± 0.00192 0.02824 ± 0.00083 5.487 ± 0.132 % 93.803 ± 0.143 %
|
| 313 |
+
113 7.5961 ± 0.1289 0.00650 ± 0.00190 0.02813 ± 0.00083 5.472 ± 0.131 % 93.805 ± 0.142 %
|
| 314 |
+
114 7.6146 ± 0.1288 0.00658 ± 0.00190 0.02809 ± 0.00082 5.455 ± 0.130 % 93.787 ± 0.142 %
|
| 315 |
+
115 7.5988 ± 0.1279 0.00685 ± 0.00189 0.02818 ± 0.00082 5.455 ± 0.129 % 93.770 ± 0.141 %
|
| 316 |
+
116 7.5923 ± 0.1273 0.00739 ± 0.00189 0.02880 ± 0.00082 5.529 ± 0.128 % 93.715 ± 0.141 %
|
| 317 |
+
117 7.4951 ± 0.1248 0.00808 ± 0.00189 0.02919 ± 0.00082 5.622 ± 0.127 % 93.722 ± 0.140 %
|
| 318 |
+
118 7.3985 ± 0.1224 0.00797 ± 0.00189 0.02982 ± 0.00083 5.714 ± 0.126 % 93.706 ± 0.140 %
|
| 319 |
+
119 7.2989 ± 0.1199 0.00785 ± 0.00190 0.03008 ± 0.00082 5.758 ± 0.125 % 93.709 ± 0.139 %
|
| 320 |
+
120 7.2172 ± 0.1179 0.00795 ± 0.00190 0.03057 ± 0.00082 5.841 ± 0.125 % 93.703 ± 0.139 %
|
| 321 |
+
121 7.1368 ± 0.1159 0.00838 ± 0.00190 0.03092 ± 0.00082 5.907 ± 0.124 % 93.716 ± 0.138 %
|
| 322 |
+
|
| 323 |
+
====== Perplexity statistics ======
|
| 324 |
+
Mean PPL(Q) : 7.136782 ± 0.115879
|
| 325 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 326 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.31%
|
| 327 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.008378 ± 0.001902
|
| 328 |
+
Mean PPL(Q)/PPL(base) : 1.008413 ± 0.001918
|
| 329 |
+
Mean PPL(Q)-PPL(base) : 0.059542 ± 0.013596
|
| 330 |
+
|
| 331 |
+
====== KL divergence statistics ======
|
| 332 |
+
Mean KLD: 0.030918 ± 0.000823
|
| 333 |
+
Maximum KLD: 10.278928
|
| 334 |
+
99.9% KLD: 1.663864
|
| 335 |
+
99.0% KLD: 0.421479
|
| 336 |
+
95.0% KLD: 0.108696
|
| 337 |
+
90.0% KLD: 0.056246
|
| 338 |
+
Median KLD: 0.007063
|
| 339 |
+
10.0% KLD: 0.000031
|
| 340 |
+
5.0% KLD: 0.000005
|
| 341 |
+
1.0% KLD: 0.000000
|
| 342 |
+
0.1% KLD: -0.000003
|
| 343 |
+
Minimum KLD: -0.000005
|
| 344 |
+
|
| 345 |
+
====== Token probability statistics ======
|
| 346 |
+
Mean Δp: -0.186 ± 0.034 %
|
| 347 |
+
Maximum Δp: 96.566%
|
| 348 |
+
99.9% Δp: 52.836%
|
| 349 |
+
99.0% Δp: 15.958%
|
| 350 |
+
95.0% Δp: 5.429%
|
| 351 |
+
90.0% Δp: 2.777%
|
| 352 |
+
75.0% Δp: 0.347%
|
| 353 |
+
Median Δp: -0.000%
|
| 354 |
+
25.0% Δp: -0.522%
|
| 355 |
+
10.0% Δp: -3.236%
|
| 356 |
+
5.0% Δp: -6.139%
|
| 357 |
+
1.0% Δp: -18.928%
|
| 358 |
+
0.1% Δp: -54.258%
|
| 359 |
+
Minimum Δp: -95.246%
|
| 360 |
+
RMS Δp : 5.907 ± 0.124 %
|
| 361 |
+
Same top p: 93.716 ± 0.138 %
|
| 362 |
+
|
| 363 |
+
llama_perf_context_print: load time = 58901.25 ms
|
| 364 |
+
llama_perf_context_print: prompt eval time = 108548.05 ms / 61952 tokens ( 1.75 ms per token, 570.73 tokens per second)
|
| 365 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 366 |
+
llama_perf_context_print: total time = 123699.13 ms / 61953 tokens
|
| 367 |
+
llama_perf_context_print: graphs reused = 0
|
| 368 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1057 + ( 22657 = 20555 + 160 + 1941) + 420 |
|
| 370 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1351 + ( 22330 = 18324 + 832 + 3174) + 452 |
|
| 371 |
+
llama_memory_breakdown_print: | - Host | 153860 = 153756 + 0 + 104 |
|
| 372 |
+
```
|
kld_data/unsloth/UD-Q6_K_XL/MiniMax-M2.5-UD-Q6_K_XL.md
ADDED
|
@@ -0,0 +1,371 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-Q6_K_XL (unsloth)
|
| 2 |
+
|
| 3 |
+
180.94 GiB (6.80 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q6_K_XL/MiniMax-M2.5-UD-Q6_K_XL-00001-of-00005.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 95009 used, -71137 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 95211 used, -71339 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 190221 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 144525 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 35721 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 10309 MiB used, 13562 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 2499 MiB used, 21371 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 7 layers ( 1 overflowing), 21983 MiB used, 1888 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 56 layers (52 overflowing), 22461 MiB used, 1410 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.13 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 4 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q6_K_XL/MiniMax-M2.5-UD-Q6_K_XL-00001-of-00005.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 18
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 5
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type q8_0: 208 tensors
|
| 87 |
+
llama_model_loader: - type q6_K: 228 tensors
|
| 88 |
+
print_info: file format = GGUF V3 (latest)
|
| 89 |
+
print_info: file type = Q6_K
|
| 90 |
+
print_info: file size = 180.94 GiB (6.80 BPW)
|
| 91 |
+
load: 0 unused tokens
|
| 92 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 93 |
+
load: printing all EOG tokens:
|
| 94 |
+
load: - 200004 ('<fim_pad>')
|
| 95 |
+
load: - 200005 ('<reponame>')
|
| 96 |
+
load: - 200020 ('[e~[')
|
| 97 |
+
load: special tokens cache size = 54
|
| 98 |
+
load: token to piece cache size = 1.3355 MB
|
| 99 |
+
print_info: arch = minimax-m2
|
| 100 |
+
print_info: vocab_only = 0
|
| 101 |
+
print_info: no_alloc = 0
|
| 102 |
+
print_info: n_ctx_train = 196608
|
| 103 |
+
print_info: n_embd = 3072
|
| 104 |
+
print_info: n_embd_inp = 3072
|
| 105 |
+
print_info: n_layer = 62
|
| 106 |
+
print_info: n_head = 48
|
| 107 |
+
print_info: n_head_kv = 8
|
| 108 |
+
print_info: n_rot = 64
|
| 109 |
+
print_info: n_swa = 0
|
| 110 |
+
print_info: is_swa_any = 0
|
| 111 |
+
print_info: n_embd_head_k = 128
|
| 112 |
+
print_info: n_embd_head_v = 128
|
| 113 |
+
print_info: n_gqa = 6
|
| 114 |
+
print_info: n_embd_k_gqa = 1024
|
| 115 |
+
print_info: n_embd_v_gqa = 1024
|
| 116 |
+
print_info: f_norm_eps = 0.0e+00
|
| 117 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 118 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 119 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 120 |
+
print_info: f_logit_scale = 0.0e+00
|
| 121 |
+
print_info: f_attn_scale = 0.0e+00
|
| 122 |
+
print_info: n_ff = 1536
|
| 123 |
+
print_info: n_expert = 256
|
| 124 |
+
print_info: n_expert_used = 8
|
| 125 |
+
print_info: n_expert_groups = 0
|
| 126 |
+
print_info: n_group_used = 0
|
| 127 |
+
print_info: causal attn = 1
|
| 128 |
+
print_info: pooling type = 0
|
| 129 |
+
print_info: rope type = 2
|
| 130 |
+
print_info: rope scaling = linear
|
| 131 |
+
print_info: freq_base_train = 5000000.0
|
| 132 |
+
print_info: freq_scale_train = 1
|
| 133 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 134 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 135 |
+
print_info: rope_finetuned = unknown
|
| 136 |
+
print_info: model type = 230B.A10B
|
| 137 |
+
print_info: model params = 228.69 B
|
| 138 |
+
print_info: general.name = Minimax-M2.5
|
| 139 |
+
print_info: vocab type = BPE
|
| 140 |
+
print_info: n_vocab = 200064
|
| 141 |
+
print_info: n_merges = 199744
|
| 142 |
+
print_info: BOS token = 200034 ']~!b['
|
| 143 |
+
print_info: EOS token = 200020 '[e~['
|
| 144 |
+
print_info: UNK token = 200021 ']!d~['
|
| 145 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 146 |
+
print_info: LF token = 10 'Ċ'
|
| 147 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 148 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 149 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 150 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 151 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 152 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200020 '[e~['
|
| 155 |
+
print_info: max token length = 256
|
| 156 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 157 |
+
load_tensors: offloading output layer to GPU
|
| 158 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 159 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 46177.82 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 46998.08 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 46779.07 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 44700.46 MiB
|
| 164 |
+
load_tensors: CUDA0 model buffer size = 19371.22 MiB
|
| 165 |
+
load_tensors: CUDA1 model buffer size = 18407.23 MiB
|
| 166 |
+
....................................................................................................
|
| 167 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 168 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 169 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 170 |
+
llama_context: constructing llama_context
|
| 171 |
+
llama_context: n_seq_max = 8
|
| 172 |
+
llama_context: n_ctx = 4096
|
| 173 |
+
llama_context: n_ctx_seq = 512
|
| 174 |
+
llama_context: n_batch = 4096
|
| 175 |
+
llama_context: n_ubatch = 4096
|
| 176 |
+
llama_context: causal_attn = 1
|
| 177 |
+
llama_context: flash_attn = enabled
|
| 178 |
+
llama_context: kv_unified = false
|
| 179 |
+
llama_context: freq_base = 5000000.0
|
| 180 |
+
llama_context: freq_scale = 1
|
| 181 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 182 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 183 |
+
llama_kv_cache: CUDA0 KV buffer size = 112.00 MiB
|
| 184 |
+
llama_kv_cache: CUDA1 KV buffer size = 880.00 MiB
|
| 185 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 186 |
+
sched_reserve: reserving ...
|
| 187 |
+
sched_reserve: CUDA0 compute buffer size = 2500.00 MiB
|
| 188 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 189 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 190 |
+
sched_reserve: graph nodes = 4099
|
| 191 |
+
sched_reserve: graph splits = 209 (with bs=4096), 109 (with bs=1)
|
| 192 |
+
sched_reserve: reserve took 21.73 ms, sched copies = 1
|
| 193 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 194 |
+
|
| 195 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 196 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 197 |
+
kl_divergence: 9.80 seconds per pass - ETA 2.47 minutes
|
| 198 |
+
|
| 199 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 200 |
+
1 6.3661 ± 1.2152 -0.00108 ± 0.01112 0.00926 ± 0.00142 3.293 ± 0.613 % 96.863 ± 1.094 %
|
| 201 |
+
2 4.6670 ± 0.5568 -0.00170 ± 0.00712 0.00685 ± 0.00079 2.600 ± 0.395 % 97.451 ± 0.699 %
|
| 202 |
+
3 4.4853 ± 0.4428 -0.00067 ± 0.00756 0.00718 ± 0.00066 2.855 ± 0.276 % 97.124 ± 0.605 %
|
| 203 |
+
4 5.1096 ± 0.4531 -0.00365 ± 0.00634 0.00800 ± 0.00059 2.811 ± 0.225 % 97.157 ± 0.521 %
|
| 204 |
+
5 4.9177 ± 0.3914 0.00462 ± 0.00758 0.00775 ± 0.00053 2.773 ± 0.206 % 97.490 ± 0.438 %
|
| 205 |
+
6 5.9974 ± 0.4676 0.00013 ± 0.00679 0.00916 ± 0.00055 2.681 ± 0.181 % 97.059 ± 0.432 %
|
| 206 |
+
7 5.5802 ± 0.3899 -0.00011 �� 0.00602 0.00981 ± 0.00053 2.841 ± 0.162 % 96.863 ± 0.413 %
|
| 207 |
+
8 6.2917 ± 0.4171 -0.00363 ± 0.00553 0.01014 ± 0.00049 2.795 ± 0.149 % 96.618 ± 0.400 %
|
| 208 |
+
9 6.1826 ± 0.3823 -0.00183 ± 0.00500 0.00983 ± 0.00044 2.707 ± 0.138 % 96.558 ± 0.381 %
|
| 209 |
+
10 5.6680 ± 0.3267 0.00032 ± 0.00463 0.00962 ± 0.00041 2.724 ± 0.126 % 96.392 ± 0.369 %
|
| 210 |
+
11 6.2162 ± 0.3468 0.00083 ± 0.00434 0.00968 ± 0.00039 2.674 ± 0.118 % 96.435 ± 0.350 %
|
| 211 |
+
12 6.8907 ± 0.3734 0.00234 ± 0.00417 0.01030 ± 0.00051 2.669 ± 0.111 % 96.275 ± 0.342 %
|
| 212 |
+
13 7.1563 ± 0.3693 0.00289 ± 0.00395 0.01025 ± 0.00049 2.624 ± 0.105 % 96.290 ± 0.328 %
|
| 213 |
+
14 7.7138 ± 0.3882 0.00265 ± 0.00389 0.01059 ± 0.00054 2.691 ± 0.123 % 96.190 ± 0.320 %
|
| 214 |
+
15 8.0809 ± 0.3938 0.00304 ± 0.00370 0.01070 ± 0.00051 2.695 ± 0.120 % 96.052 ± 0.315 %
|
| 215 |
+
16 8.3410 ± 0.3941 0.00251 ± 0.00350 0.01043 ± 0.00048 2.653 ± 0.114 % 96.054 ± 0.305 %
|
| 216 |
+
17 8.5615 ± 0.3945 0.00149 ± 0.00339 0.01063 ± 0.00047 2.620 ± 0.110 % 95.986 ± 0.298 %
|
| 217 |
+
18 8.0676 ± 0.3588 0.00129 ± 0.00324 0.01062 ± 0.00046 2.621 ± 0.107 % 96.078 ± 0.287 %
|
| 218 |
+
19 8.1915 ± 0.3546 0.00073 ± 0.00310 0.01033 ± 0.00043 2.594 ± 0.103 % 96.058 ± 0.280 %
|
| 219 |
+
20 8.2583 ± 0.3484 0.00098 ± 0.00307 0.01080 ± 0.00044 2.664 ± 0.103 % 96.039 ± 0.273 %
|
| 220 |
+
21 8.2283 ± 0.3384 -0.00009 ± 0.00299 0.01099 ± 0.00044 2.684 ± 0.099 % 96.022 ± 0.267 %
|
| 221 |
+
22 8.5436 ± 0.3464 -0.00015 ± 0.00299 0.01152 ± 0.00045 2.747 ± 0.106 % 95.775 ± 0.269 %
|
| 222 |
+
23 8.5577 ± 0.3403 0.00028 ± 0.00309 0.01241 ± 0.00050 2.977 ± 0.137 % 95.652 ± 0.266 %
|
| 223 |
+
24 8.9633 ± 0.3508 0.00052 ± 0.00300 0.01234 ± 0.00048 2.941 ± 0.133 % 95.703 ± 0.259 %
|
| 224 |
+
25 8.9526 ± 0.3442 0.00072 ± 0.00297 0.01287 ± 0.00048 3.054 ± 0.139 % 95.561 ± 0.258 %
|
| 225 |
+
26 8.3535 ± 0.3117 0.00005 ± 0.00297 0.01345 ± 0.00053 3.322 ± 0.154 % 95.641 ± 0.251 %
|
| 226 |
+
27 7.9078 ± 0.2871 -0.00083 ± 0.00297 0.01450 ± 0.00060 3.676 ± 0.184 % 95.657 ± 0.246 %
|
| 227 |
+
28 8.0264 ± 0.2870 0.00036 ± 0.00290 0.01458 ± 0.00058 3.670 ± 0.178 % 95.574 ± 0.243 %
|
| 228 |
+
29 7.9578 ± 0.2796 0.00126 ± 0.00286 0.01464 ± 0.00056 3.659 ± 0.173 % 95.578 ± 0.239 %
|
| 229 |
+
30 7.4403 ± 0.2547 0.00122 ± 0.00277 0.01422 ± 0.00055 3.616 ± 0.170 % 95.712 ± 0.232 %
|
| 230 |
+
31 7.0111 ± 0.2339 0.00117 ± 0.00273 0.01398 ± 0.00054 3.584 ± 0.166 % 95.838 ± 0.225 %
|
| 231 |
+
32 6.8364 ± 0.2226 0.00118 ± 0.00267 0.01381 ± 0.00052 3.579 ± 0.162 % 95.821 ± 0.222 %
|
| 232 |
+
33 6.7030 ± 0.2134 0.00137 ± 0.00260 0.01364 ± 0.00051 3.562 ± 0.158 % 95.793 ± 0.219 %
|
| 233 |
+
34 6.8926 ± 0.2174 0.00222 ± 0.00260 0.01404 ± 0.00050 3.566 ± 0.154 % 95.732 ± 0.217 %
|
| 234 |
+
35 6.9985 ± 0.2196 0.00284 ± 0.00259 0.01437 ± 0.00050 3.622 ± 0.150 % 95.597 ± 0.217 %
|
| 235 |
+
36 7.0612 ± 0.2193 0.00288 ± 0.00254 0.01426 ± 0.00048 3.607 ± 0.146 % 95.621 ± 0.214 %
|
| 236 |
+
37 7.0827 ± 0.2172 0.00363 ± 0.00252 0.01433 ± 0.00048 3.629 ± 0.143 % 95.580 ± 0.212 %
|
| 237 |
+
38 7.2933 ± 0.2217 0.00320 ± 0.00248 0.01431 ± 0.00046 3.619 ± 0.140 % 95.552 ± 0.209 %
|
| 238 |
+
39 7.2441 ± 0.2169 0.00338 ± 0.00245 0.01463 ± 0.00047 3.675 ± 0.142 % 95.515 ± 0.208 %
|
| 239 |
+
40 6.9995 ± 0.2054 0.00394 ± 0.00249 0.01589 ± 0.00055 3.994 ± 0.151 % 95.480 ± 0.206 %
|
| 240 |
+
41 6.7977 ± 0.1957 0.00552 ± 0.00251 0.01664 ± 0.00056 4.183 ± 0.149 % 95.485 ± 0.203 %
|
| 241 |
+
42 6.5921 ± 0.1865 0.00613 ± 0.00251 0.01743 ± 0.00061 4.395 ± 0.158 % 95.528 ± 0.200 %
|
| 242 |
+
43 6.3870 ± 0.1773 0.00566 ± 0.00252 0.01810 ± 0.00064 4.615 ± 0.170 % 95.477 ± 0.198 %
|
| 243 |
+
44 6.3448 ± 0.1734 0.00536 ± 0.00248 0.01784 ± 0.00062 4.582 ± 0.168 % 95.499 ± 0.196 %
|
| 244 |
+
45 6.4882 ± 0.1764 0.00581 ± 0.00246 0.01812 ± 0.00062 4.564 ± 0.165 % 95.442 ± 0.195 %
|
| 245 |
+
46 6.6283 ± 0.1786 0.00511 ± 0.00242 0.01796 ± 0.00060 4.526 ± 0.163 % 95.456 ± 0.192 %
|
| 246 |
+
47 6.7772 ± 0.1812 0.00512 ± 0.00237 0.01775 ± 0.00059 4.484 ± 0.161 % 95.444 ± 0.190 %
|
| 247 |
+
48 6.6645 ± 0.1753 0.00500 ± 0.00233 0.01749 ± 0.00058 4.451 ± 0.159 % 95.458 ± 0.188 %
|
| 248 |
+
49 6.7676 ± 0.1762 0.00533 ± 0.00251 0.02074 ± 0.00127 4.589 ± 0.167 % 95.294 ± 0.189 %
|
| 249 |
+
50 6.8657 ± 0.1779 0.00556 ± 0.00247 0.02066 ± 0.00124 4.565 ± 0.165 % 95.263 ± 0.188 %
|
| 250 |
+
51 6.9725 ± 0.1791 0.00561 ± 0.00243 0.02046 ± 0.00122 4.536 ± 0.163 % 95.302 ± 0.186 %
|
| 251 |
+
52 7.0361 ± 0.1788 0.00538 ± 0.00241 0.02050 ± 0.00120 4.520 ± 0.160 % 95.256 ± 0.185 %
|
| 252 |
+
53 7.1478 ± 0.1800 0.00534 ± 0.00238 0.02032 ± 0.00117 4.490 ± 0.158 % 95.265 ± 0.183 %
|
| 253 |
+
54 7.2030 ± 0.1792 0.00525 ± 0.00235 0.02015 ± 0.00115 4.459 ± 0.156 % 95.251 ± 0.181 %
|
| 254 |
+
55 7.2537 ± 0.1786 0.00524 ± 0.00231 0.01995 ± 0.00113 4.433 ± 0.155 % 95.251 ± 0.180 %
|
| 255 |
+
56 7.2939 ± 0.1781 0.00504 ± 0.00227 0.01976 ± 0.00111 4.404 ± 0.153 % 95.245 ± 0.178 %
|
| 256 |
+
57 7.2934 ± 0.1764 0.00502 ± 0.00225 0.01974 ± 0.00109 4.390 ± 0.151 % 95.212 ± 0.177 %
|
| 257 |
+
58 7.3052 ± 0.1752 0.00527 ± 0.00223 0.01960 ± 0.00107 4.387 ± 0.149 % 95.199 ± 0.176 %
|
| 258 |
+
59 7.2688 ± 0.1726 0.00540 ± 0.00221 0.01937 ± 0.00106 4.359 ± 0.148 % 95.234 ± 0.174 %
|
| 259 |
+
60 7.2785 ± 0.1714 0.00525 ± 0.00219 0.01929 ± 0.00104 4.346 ± 0.146 % 95.176 ± 0.173 %
|
| 260 |
+
61 7.3276 ± 0.1711 0.00534 ± 0.00216 0.01925 ± 0.00103 4.324 ± 0.144 % 95.198 ± 0.171 %
|
| 261 |
+
62 7.3052 ± 0.1695 0.00555 ± 0.00215 0.01915 ± 0.00101 4.316 ± 0.143 % 95.225 ± 0.170 %
|
| 262 |
+
63 7.3520 ± 0.1697 0.00561 ± 0.00215 0.01916 ± 0.00100 4.294 ± 0.141 % 95.226 ± 0.168 %
|
| 263 |
+
64 7.3298 ± 0.1674 0.00516 ± 0.00213 0.01908 ± 0.00098 4.281 ± 0.139 % 95.184 ± 0.168 %
|
| 264 |
+
65 7.3185 ± 0.1659 0.00478 ± 0.00211 0.01904 ± 0.00097 4.286 ± 0.138 % 95.192 ± 0.166 %
|
| 265 |
+
66 7.3556 ± 0.1656 0.00484 ± 0.00209 0.01906 ± 0.00096 4.269 ± 0.136 % 95.169 ± 0.165 %
|
| 266 |
+
67 7.3640 ± 0.1646 0.00504 ± 0.00207 0.01916 ± 0.00095 4.266 ± 0.135 % 95.142 ± 0.164 %
|
| 267 |
+
68 7.3236 ± 0.1623 0.00551 ± 0.00208 0.01900 ± 0.00093 4.241 ± 0.133 % 95.173 ± 0.163 %
|
| 268 |
+
69 7.3541 ± 0.1619 0.00522 ± 0.00206 0.01894 ± 0.00092 4.227 ± 0.132 % 95.175 ± 0.162 %
|
| 269 |
+
70 7.3226 ± 0.1597 0.00470 ± 0.00204 0.01890 ± 0.00091 4.217 ± 0.130 % 95.182 ± 0.160 %
|
| 270 |
+
71 7.3093 ± 0.1584 0.00488 ± 0.00202 0.01889 ± 0.00090 4.220 ± 0.129 % 95.189 ± 0.159 %
|
| 271 |
+
72 7.3288 ± 0.1579 0.00536 ± 0.00202 0.01893 ± 0.00089 4.215 ± 0.128 % 95.163 ± 0.158 %
|
| 272 |
+
73 7.3340 ± 0.1568 0.00534 ± 0.00200 0.01890 ± 0.00087 4.210 ± 0.126 % 95.165 ± 0.157 %
|
| 273 |
+
74 7.3285 ± 0.1555 0.00541 ± 0.00199 0.01892 ± 0.00086 4.202 ± 0.125 % 95.103 ± 0.157 %
|
| 274 |
+
75 7.3315 ± 0.1546 0.00491 ± 0.00198 0.01909 ± 0.00086 4.232 ± 0.125 % 95.048 ± 0.157 %
|
| 275 |
+
76 7.3921 ± 0.1550 0.00466 ± 0.00196 0.01902 ± 0.00084 4.223 ± 0.123 % 95.046 ± 0.156 %
|
| 276 |
+
77 7.3892 ± 0.1539 0.00453 ± 0.00195 0.01895 ± 0.00083 4.205 ± 0.122 % 95.039 ± 0.155 %
|
| 277 |
+
78 7.4014 ± 0.1533 0.00436 ± 0.00193 0.01887 ± 0.00082 4.187 ± 0.121 % 95.043 ± 0.154 %
|
| 278 |
+
79 7.4119 ± 0.1526 0.00432 ± 0.00192 0.01885 ± 0.00081 4.172 ± 0.120 % 95.036 ± 0.153 %
|
| 279 |
+
80 7.4179 ± 0.1523 0.00488 ± 0.00196 0.01887 ± 0.00081 4.172 ± 0.119 % 95.025 ± 0.152 %
|
| 280 |
+
81 7.3942 ± 0.1508 0.00478 ± 0.00195 0.01890 ± 0.00080 4.168 ± 0.118 % 95.013 ± 0.151 %
|
| 281 |
+
82 7.3743 ± 0.1493 0.00501 ± 0.00193 0.01880 ± 0.00079 4.156 ± 0.117 % 95.041 ± 0.150 %
|
| 282 |
+
83 7.4056 ± 0.1488 0.00494 ± 0.00191 0.01868 ± 0.00078 4.142 ± 0.116 % 95.025 ± 0.149 %
|
| 283 |
+
84 7.4232 ± 0.1480 0.00513 ± 0.00189 0.01854 ± 0.00077 4.124 ± 0.115 % 95.037 ± 0.148 %
|
| 284 |
+
85 7.4199 ± 0.1468 0.00523 ± 0.00187 0.01840 ± 0.00076 4.106 ± 0.114 % 95.054 ± 0.147 %
|
| 285 |
+
86 7.3532 ± 0.1442 0.00524 ± 0.00185 0.01833 ± 0.00075 4.095 ± 0.113 % 95.075 ± 0.146 %
|
| 286 |
+
87 7.2960 ± 0.1418 0.00523 ± 0.00184 0.01821 ± 0.00075 4.079 ± 0.112 % 95.064 ± 0.145 %
|
| 287 |
+
88 7.2358 ± 0.1394 0.00528 ± 0.00182 0.01812 ± 0.00074 4.077 ± 0.111 % 95.076 ± 0.144 %
|
| 288 |
+
89 7.1639 ± 0.1368 0.00528 ± 0.00180 0.01802 ± 0.00073 4.065 ± 0.110 % 95.078 ± 0.144 %
|
| 289 |
+
90 7.1093 ± 0.1346 0.00534 ± 0.00178 0.01789 ± 0.00072 4.050 ± 0.109 % 95.102 ± 0.142 %
|
| 290 |
+
91 7.0586 ± 0.1326 0.00527 ± 0.00177 0.01777 ± 0.00071 4.036 ± 0.109 % 95.113 ± 0.142 %
|
| 291 |
+
92 7.0013 ± 0.1304 0.00519 ± 0.00175 0.01767 ± 0.00071 4.032 ± 0.108 % 95.102 ± 0.141 %
|
| 292 |
+
93 7.0169 ± 0.1302 0.00491 ± 0.00176 0.01814 ± 0.00073 4.036 ± 0.107 % 95.058 ± 0.141 %
|
| 293 |
+
94 7.0461 ± 0.1299 0.00473 ± 0.00174 0.01804 ± 0.00073 4.023 ± 0.106 % 95.048 ± 0.140 %
|
| 294 |
+
95 7.1535 ± 0.1316 0.00467 ± 0.00173 0.01807 ± 0.00072 4.014 ± 0.105 % 95.059 ± 0.139 %
|
| 295 |
+
96 7.2453 ± 0.1328 0.00447 ± 0.00172 0.01810 ± 0.00071 4.001 ± 0.104 % 95.012 ± 0.139 %
|
| 296 |
+
97 7.3222 ± 0.1336 0.00435 ± 0.00171 0.01804 ± 0.00070 3.986 ± 0.104 % 95.007 ± 0.138 %
|
| 297 |
+
98 7.4582 ± 0.1360 0.00461 ± 0.00172 0.01801 ± 0.00070 3.977 ± 0.103 % 94.990 ± 0.138 %
|
| 298 |
+
99 7.5764 ± 0.1377 0.00478 ± 0.00171 0.01802 ± 0.00069 3.965 ± 0.103 % 94.961 ± 0.138 %
|
| 299 |
+
100 7.6132 ± 0.1378 0.00473 ± 0.00170 0.01806 ± 0.00069 3.962 ± 0.102 % 94.937 ± 0.137 %
|
| 300 |
+
101 7.6505 ± 0.1379 0.00479 ± 0.00169 0.01805 ± 0.00068 3.952 ± 0.101 % 94.906 ± 0.137 %
|
| 301 |
+
102 7.7119 ± 0.1389 0.00489 ± 0.00169 0.01804 ± 0.00068 3.945 ± 0.100 % 94.913 ± 0.136 %
|
| 302 |
+
103 7.6870 ± 0.1379 0.00503 ± 0.00168 0.01795 ± 0.00067 3.934 ± 0.100 % 94.929 ± 0.135 %
|
| 303 |
+
104 7.6290 ± 0.1360 0.00496 ± 0.00167 0.01792 ± 0.00066 3.943 ± 0.099 % 94.947 ± 0.135 %
|
| 304 |
+
105 7.5176 ± 0.1332 0.00475 ± 0.00166 0.01792 ± 0.00066 3.968 ± 0.099 % 94.980 ± 0.133 %
|
| 305 |
+
106 7.3889 ± 0.1299 0.00479 ± 0.00165 0.01780 ± 0.00065 3.962 ± 0.098 % 95.024 ± 0.132 %
|
| 306 |
+
107 7.4460 ± 0.1303 0.00475 ± 0.00164 0.01770 ± 0.00065 3.946 ± 0.098 % 95.023 ± 0.132 %
|
| 307 |
+
108 7.4591 ± 0.1299 0.00473 ± 0.00162 0.01758 ± 0.00064 3.934 ± 0.097 % 95.029 ± 0.131 %
|
| 308 |
+
109 7.4786 ± 0.1297 0.00462 ± 0.00161 0.01751 ± 0.00064 3.923 ± 0.097 % 95.042 ± 0.130 %
|
| 309 |
+
110 7.5145 ± 0.1298 0.00472 ± 0.00160 0.01742 ± 0.00063 3.914 ± 0.096 % 95.055 ± 0.129 %
|
| 310 |
+
111 7.5617 ± 0.1299 0.00466 ± 0.00159 0.01734 ± 0.00062 3.901 ± 0.095 % 95.072 ± 0.129 %
|
| 311 |
+
112 7.5704 ± 0.1294 0.00447 ± 0.00158 0.01726 ± 0.00062 3.892 ± 0.095 % 95.060 ± 0.128 %
|
| 312 |
+
113 7.5808 ± 0.1289 0.00448 ± 0.00156 0.01718 ± 0.00061 3.881 ± 0.094 % 95.058 ± 0.128 %
|
| 313 |
+
114 7.5984 ± 0.1287 0.00446 ± 0.00156 0.01713 ± 0.00061 3.870 ± 0.094 % 95.060 ± 0.127 %
|
| 314 |
+
115 7.5813 ± 0.1278 0.00455 ± 0.00156 0.01722 ± 0.00061 3.883 ± 0.093 % 95.049 ± 0.127 %
|
| 315 |
+
116 7.5715 ± 0.1271 0.00465 ± 0.00156 0.01741 ± 0.00060 3.923 ± 0.092 % 95.024 ± 0.126 %
|
| 316 |
+
117 7.4708 ± 0.1246 0.00483 ± 0.00155 0.01752 ± 0.00060 3.962 ± 0.091 % 95.026 ± 0.126 %
|
| 317 |
+
118 7.3742 ± 0.1221 0.00466 ± 0.00155 0.01771 ± 0.00060 4.020 ± 0.091 % 95.022 ± 0.125 %
|
| 318 |
+
119 7.2774 ± 0.1197 0.00490 ± 0.00155 0.01790 ± 0.00059 4.090 ± 0.093 % 95.027 ± 0.125 %
|
| 319 |
+
120 7.1952 ± 0.1176 0.00489 ± 0.00155 0.01820 ± 0.00059 4.189 ± 0.094 % 95.010 ± 0.124 %
|
| 320 |
+
121 7.1130 ± 0.1156 0.00503 ± 0.00154 0.01839 ± 0.00059 4.249 ± 0.093 % 95.015 ± 0.124 %
|
| 321 |
+
|
| 322 |
+
====== Perplexity statistics ======
|
| 323 |
+
Mean PPL(Q) : 7.112958 ± 0.115626
|
| 324 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 325 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.55%
|
| 326 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.005034 ± 0.001544
|
| 327 |
+
Mean PPL(Q)/PPL(base) : 1.005047 ± 0.001552
|
| 328 |
+
Mean PPL(Q)-PPL(base) : 0.035718 ± 0.011009
|
| 329 |
+
|
| 330 |
+
====== KL divergence statistics ======
|
| 331 |
+
Mean KLD: 0.018391 ± 0.000591
|
| 332 |
+
Maximum KLD: 9.594548
|
| 333 |
+
99.9% KLD: 1.094286
|
| 334 |
+
99.0% KLD: 0.220442
|
| 335 |
+
95.0% KLD: 0.064967
|
| 336 |
+
90.0% KLD: 0.034229
|
| 337 |
+
Median KLD: 0.004147
|
| 338 |
+
10.0% KLD: 0.000018
|
| 339 |
+
5.0% KLD: 0.000003
|
| 340 |
+
1.0% KLD: -0.000000
|
| 341 |
+
0.1% KLD: -0.000003
|
| 342 |
+
Minimum KLD: -0.000039
|
| 343 |
+
|
| 344 |
+
====== Token probability statistics ======
|
| 345 |
+
Mean Δp: -0.049 ± 0.024 %
|
| 346 |
+
Maximum Δp: 77.764%
|
| 347 |
+
99.9% Δp: 37.605%
|
| 348 |
+
99.0% Δp: 12.226%
|
| 349 |
+
95.0% Δp: 4.283%
|
| 350 |
+
90.0% Δp: 2.184%
|
| 351 |
+
75.0% Δp: 0.303%
|
| 352 |
+
Median Δp: -0.000%
|
| 353 |
+
25.0% Δp: -0.343%
|
| 354 |
+
10.0% Δp: -2.347%
|
| 355 |
+
5.0% Δp: -4.557%
|
| 356 |
+
1.0% Δp: -12.721%
|
| 357 |
+
0.1% Δp: -37.281%
|
| 358 |
+
Minimum Δp: -72.369%
|
| 359 |
+
RMS Δp : 4.249 ± 0.093 %
|
| 360 |
+
Same top p: 95.015 ± 0.124 %
|
| 361 |
+
|
| 362 |
+
llama_perf_context_print: load time = 68778.72 ms
|
| 363 |
+
llama_perf_context_print: prompt eval time = 129078.32 ms / 61952 tokens ( 2.08 ms per token, 479.96 tokens per second)
|
| 364 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 365 |
+
llama_perf_context_print: total time = 146114.70 ms / 61953 tokens
|
| 366 |
+
llama_perf_context_print: graphs reused = 0
|
| 367 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 368 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1731 + ( 21983 = 19371 + 112 + 2499) + 420 |
|
| 369 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 1221 + ( 22461 = 18407 + 880 + 3174) + 452 |
|
| 370 |
+
llama_memory_breakdown_print: | - Host | 184759 = 184655 + 0 + 104 |
|
| 371 |
+
```
|
kld_data/unsloth/UD-Q8_K_XL/MiniMax-M2.5-UD-Q8_K_XL.md
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
### MiniMax-M2.5-UD-Q8_K_XL (unsloth)
|
| 2 |
+
|
| 3 |
+
243.42 GiB (9.14 BPW)
|
| 4 |
+
|
| 5 |
+
```txt
|
| 6 |
+
/home/jarvis/development/llama.cpp/build/bin/llama-perplexity --threads 48 --flash-attn on --file /mnt/srv/host/resources/KLD/calibration_datav3.txt --kl-divergence-base /mnt/srv/snowdrift/ref-logits-unsloth-MiniMax-M2.5-BF16-calibration-datav3.bin --kl-divergence --batch-size 4096 --ubatch-size 4096 --model /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q8_K_XL/MiniMax-M2.5-UD-Q8_K_XL-00001-of-00007.gguf
|
| 7 |
+
ggml_cuda_init: found 2 CUDA devices:
|
| 8 |
+
Device 0: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 9 |
+
Device 1: NVIDIA GeForce RTX 3090, compute capability 8.6, VMM: yes
|
| 10 |
+
build: 8067 (ff4affb4c) with GNU 14.2.1 for Linux x86_64
|
| 11 |
+
common_init_result: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on
|
| 12 |
+
llama_params_fit_impl: projected memory use with initial parameters [MiB]:
|
| 13 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 24135 total, 120537 used, -96666 free vs. target of 1024
|
| 14 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 24135 total, 133115 used, -109243 free vs. target of 1024
|
| 15 |
+
llama_params_fit_impl: projected to use 253653 MiB of device memory vs. 47743 MiB of free device memory
|
| 16 |
+
llama_params_fit_impl: cannot meet free memory targets on all devices, need to use 207957 MiB less in total
|
| 17 |
+
llama_params_fit_impl: context size set by user to 4096 -> no change
|
| 18 |
+
llama_params_fit_impl: with only dense weights in device memory there is a total surplus of 32643 MiB
|
| 19 |
+
llama_params_fit_impl: filling dense-only layers back-to-front:
|
| 20 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 63 layers, 12064 MiB used, 11807 MiB free
|
| 21 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 0 layers, 4659 MiB used, 19211 MiB free
|
| 22 |
+
llama_params_fit_impl: converting dense-only layers to full layers and filling them front-to-back with overflow to next device/system memory:
|
| 23 |
+
llama_params_fit_impl: - CUDA0 (NVIDIA GeForce RTX 3090): 5 layers ( 1 overflowing), 22114 MiB used, 1757 MiB free
|
| 24 |
+
llama_params_fit_impl: - CUDA1 (NVIDIA GeForce RTX 3090): 58 layers (55 overflowing), 22762 MiB used, 1109 MiB free
|
| 25 |
+
llama_params_fit: successfully fit params to free device memory
|
| 26 |
+
llama_params_fit: fitting params to free memory took 4.17 seconds
|
| 27 |
+
llama_model_load_from_file_impl: using device CUDA0 (NVIDIA GeForce RTX 3090) (0000:06:10.0) - 23871 MiB free
|
| 28 |
+
llama_model_load_from_file_impl: using device CUDA1 (NVIDIA GeForce RTX 3090) (0000:06:11.0) - 23871 MiB free
|
| 29 |
+
llama_model_loader: additional 6 GGUFs metadata loaded.
|
| 30 |
+
llama_model_loader: loaded meta data with 53 key-value pairs and 809 tensors from /mnt/srv/snowdrift/gguf/MiniMax-M2.5-GGUF/unsloth/UD-Q8_K_XL/MiniMax-M2.5-UD-Q8_K_XL-00001-of-00007.gguf (version GGUF V3 (latest))
|
| 31 |
+
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
|
| 32 |
+
llama_model_loader: - kv 0: general.architecture str = minimax-m2
|
| 33 |
+
llama_model_loader: - kv 1: general.type str = model
|
| 34 |
+
llama_model_loader: - kv 2: general.sampling.top_k i32 = 40
|
| 35 |
+
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
|
| 36 |
+
llama_model_loader: - kv 4: general.sampling.temp f32 = 1.000000
|
| 37 |
+
llama_model_loader: - kv 5: general.name str = Minimax-M2.5
|
| 38 |
+
llama_model_loader: - kv 6: general.basename str = Minimax-M2.5
|
| 39 |
+
llama_model_loader: - kv 7: general.quantized_by str = Unsloth
|
| 40 |
+
llama_model_loader: - kv 8: general.size_label str = 256x4.9B
|
| 41 |
+
llama_model_loader: - kv 9: general.license str = other
|
| 42 |
+
llama_model_loader: - kv 10: general.license.name str = modified-mit
|
| 43 |
+
llama_model_loader: - kv 11: general.license.link str = https://github.com/MiniMax-AI/MiniMax...
|
| 44 |
+
llama_model_loader: - kv 12: general.repo_url str = https://huggingface.co/unsloth
|
| 45 |
+
llama_model_loader: - kv 13: general.base_model.count u32 = 1
|
| 46 |
+
llama_model_loader: - kv 14: general.base_model.0.name str = MiniMax M2.5
|
| 47 |
+
llama_model_loader: - kv 15: general.base_model.0.organization str = MiniMaxAI
|
| 48 |
+
llama_model_loader: - kv 16: general.base_model.0.repo_url str = https://huggingface.co/MiniMaxAI/Mini...
|
| 49 |
+
llama_model_loader: - kv 17: general.tags arr[str,2] = ["unsloth", "text-generation"]
|
| 50 |
+
llama_model_loader: - kv 18: minimax-m2.block_count u32 = 62
|
| 51 |
+
llama_model_loader: - kv 19: minimax-m2.context_length u32 = 196608
|
| 52 |
+
llama_model_loader: - kv 20: minimax-m2.embedding_length u32 = 3072
|
| 53 |
+
llama_model_loader: - kv 21: minimax-m2.feed_forward_length u32 = 1536
|
| 54 |
+
llama_model_loader: - kv 22: minimax-m2.attention.head_count u32 = 48
|
| 55 |
+
llama_model_loader: - kv 23: minimax-m2.attention.head_count_kv u32 = 8
|
| 56 |
+
llama_model_loader: - kv 24: minimax-m2.rope.freq_base f32 = 5000000.000000
|
| 57 |
+
llama_model_loader: - kv 25: minimax-m2.attention.layer_norm_rms_epsilon f32 = 0.000001
|
| 58 |
+
llama_model_loader: - kv 26: minimax-m2.expert_count u32 = 256
|
| 59 |
+
llama_model_loader: - kv 27: minimax-m2.expert_used_count u32 = 8
|
| 60 |
+
llama_model_loader: - kv 28: minimax-m2.expert_gating_func u32 = 2
|
| 61 |
+
llama_model_loader: - kv 29: minimax-m2.attention.key_length u32 = 128
|
| 62 |
+
llama_model_loader: - kv 30: minimax-m2.attention.value_length u32 = 128
|
| 63 |
+
llama_model_loader: - kv 31: minimax-m2.expert_feed_forward_length u32 = 1536
|
| 64 |
+
llama_model_loader: - kv 32: minimax-m2.rope.dimension_count u32 = 64
|
| 65 |
+
llama_model_loader: - kv 33: tokenizer.ggml.model str = gpt2
|
| 66 |
+
llama_model_loader: - kv 34: tokenizer.ggml.pre str = minimax-m2
|
| 67 |
+
llama_model_loader: - kv 35: tokenizer.ggml.tokens arr[str,200064] = ["Ā", "ā", "Ă", "ă", "Ą", "ą", ...
|
| 68 |
+
llama_model_loader: - kv 36: tokenizer.ggml.token_type arr[i32,200064] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
|
| 69 |
+
llama_model_loader: - kv 37: tokenizer.ggml.merges arr[str,199744] = ["Ġ Ġ", "Ġ t", "Ġ a", "i n", "e r...
|
| 70 |
+
llama_model_loader: - kv 38: tokenizer.ggml.bos_token_id u32 = 200034
|
| 71 |
+
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 200020
|
| 72 |
+
llama_model_loader: - kv 40: tokenizer.ggml.unknown_token_id u32 = 200021
|
| 73 |
+
llama_model_loader: - kv 41: tokenizer.ggml.padding_token_id u32 = 200004
|
| 74 |
+
llama_model_loader: - kv 42: tokenizer.ggml.add_bos_token bool = true
|
| 75 |
+
llama_model_loader: - kv 43: tokenizer.chat_template str = {# Unsloth template fixes #}\n{# -----...
|
| 76 |
+
llama_model_loader: - kv 44: general.quantization_version u32 = 2
|
| 77 |
+
llama_model_loader: - kv 45: general.file_type u32 = 7
|
| 78 |
+
llama_model_loader: - kv 46: quantize.imatrix.file str = MiniMax-M2.5-GGUF/imatrix_unsloth.gguf
|
| 79 |
+
llama_model_loader: - kv 47: quantize.imatrix.dataset str = unsloth_calibration_MiniMax-M2.5.txt
|
| 80 |
+
llama_model_loader: - kv 48: quantize.imatrix.entries_count u32 = 496
|
| 81 |
+
llama_model_loader: - kv 49: quantize.imatrix.chunks_count u32 = 81
|
| 82 |
+
llama_model_loader: - kv 50: split.no u16 = 0
|
| 83 |
+
llama_model_loader: - kv 51: split.tensors.count i32 = 809
|
| 84 |
+
llama_model_loader: - kv 52: split.count u16 = 7
|
| 85 |
+
llama_model_loader: - type f32: 373 tensors
|
| 86 |
+
llama_model_loader: - type f16: 27 tensors
|
| 87 |
+
llama_model_loader: - type q8_0: 409 tensors
|
| 88 |
+
print_info: file format = GGUF V3 (latest)
|
| 89 |
+
print_info: file type = Q8_0
|
| 90 |
+
print_info: file size = 243.42 GiB (9.14 BPW)
|
| 91 |
+
load: 0 unused tokens
|
| 92 |
+
load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect
|
| 93 |
+
load: printing all EOG tokens:
|
| 94 |
+
load: - 200004 ('<fim_pad>')
|
| 95 |
+
load: - 200005 ('<reponame>')
|
| 96 |
+
load: - 200020 ('[e~[')
|
| 97 |
+
load: special tokens cache size = 54
|
| 98 |
+
load: token to piece cache size = 1.3355 MB
|
| 99 |
+
print_info: arch = minimax-m2
|
| 100 |
+
print_info: vocab_only = 0
|
| 101 |
+
print_info: no_alloc = 0
|
| 102 |
+
print_info: n_ctx_train = 196608
|
| 103 |
+
print_info: n_embd = 3072
|
| 104 |
+
print_info: n_embd_inp = 3072
|
| 105 |
+
print_info: n_layer = 62
|
| 106 |
+
print_info: n_head = 48
|
| 107 |
+
print_info: n_head_kv = 8
|
| 108 |
+
print_info: n_rot = 64
|
| 109 |
+
print_info: n_swa = 0
|
| 110 |
+
print_info: is_swa_any = 0
|
| 111 |
+
print_info: n_embd_head_k = 128
|
| 112 |
+
print_info: n_embd_head_v = 128
|
| 113 |
+
print_info: n_gqa = 6
|
| 114 |
+
print_info: n_embd_k_gqa = 1024
|
| 115 |
+
print_info: n_embd_v_gqa = 1024
|
| 116 |
+
print_info: f_norm_eps = 0.0e+00
|
| 117 |
+
print_info: f_norm_rms_eps = 1.0e-06
|
| 118 |
+
print_info: f_clamp_kqv = 0.0e+00
|
| 119 |
+
print_info: f_max_alibi_bias = 0.0e+00
|
| 120 |
+
print_info: f_logit_scale = 0.0e+00
|
| 121 |
+
print_info: f_attn_scale = 0.0e+00
|
| 122 |
+
print_info: n_ff = 1536
|
| 123 |
+
print_info: n_expert = 256
|
| 124 |
+
print_info: n_expert_used = 8
|
| 125 |
+
print_info: n_expert_groups = 0
|
| 126 |
+
print_info: n_group_used = 0
|
| 127 |
+
print_info: causal attn = 1
|
| 128 |
+
print_info: pooling type = 0
|
| 129 |
+
print_info: rope type = 2
|
| 130 |
+
print_info: rope scaling = linear
|
| 131 |
+
print_info: freq_base_train = 5000000.0
|
| 132 |
+
print_info: freq_scale_train = 1
|
| 133 |
+
print_info: n_ctx_orig_yarn = 196608
|
| 134 |
+
print_info: rope_yarn_log_mul = 0.0000
|
| 135 |
+
print_info: rope_finetuned = unknown
|
| 136 |
+
print_info: model type = 230B.A10B
|
| 137 |
+
print_info: model params = 228.69 B
|
| 138 |
+
print_info: general.name = Minimax-M2.5
|
| 139 |
+
print_info: vocab type = BPE
|
| 140 |
+
print_info: n_vocab = 200064
|
| 141 |
+
print_info: n_merges = 199744
|
| 142 |
+
print_info: BOS token = 200034 ']~!b['
|
| 143 |
+
print_info: EOS token = 200020 '[e~['
|
| 144 |
+
print_info: UNK token = 200021 ']!d~['
|
| 145 |
+
print_info: PAD token = 200004 '<fim_pad>'
|
| 146 |
+
print_info: LF token = 10 'Ċ'
|
| 147 |
+
print_info: FIM PRE token = 200001 '<fim_prefix>'
|
| 148 |
+
print_info: FIM SUF token = 200003 '<fim_suffix>'
|
| 149 |
+
print_info: FIM MID token = 200002 '<fim_middle>'
|
| 150 |
+
print_info: FIM PAD token = 200004 '<fim_pad>'
|
| 151 |
+
print_info: FIM REP token = 200005 '<reponame>'
|
| 152 |
+
print_info: EOG token = 200004 '<fim_pad>'
|
| 153 |
+
print_info: EOG token = 200005 '<reponame>'
|
| 154 |
+
print_info: EOG token = 200020 '[e~['
|
| 155 |
+
print_info: max token length = 256
|
| 156 |
+
load_tensors: loading model tensors, this can take a while... (mmap = true, direct_io = false)
|
| 157 |
+
load_tensors: offloading output layer to GPU
|
| 158 |
+
load_tensors: offloading 61 repeating layers to GPU
|
| 159 |
+
load_tensors: offloaded 63/63 layers to GPU
|
| 160 |
+
load_tensors: CPU_Mapped model buffer size = 45808.37 MiB
|
| 161 |
+
load_tensors: CPU_Mapped model buffer size = 47084.12 MiB
|
| 162 |
+
load_tensors: CPU_Mapped model buffer size = 47128.79 MiB
|
| 163 |
+
load_tensors: CPU_Mapped model buffer size = 47435.82 MiB
|
| 164 |
+
load_tensors: CPU_Mapped model buffer size = 46627.14 MiB
|
| 165 |
+
load_tensors: CPU_Mapped model buffer size = 13894.38 MiB
|
| 166 |
+
load_tensors: CUDA0 model buffer size = 17374.38 MiB
|
| 167 |
+
load_tensors: CUDA1 model buffer size = 18676.27 MiB
|
| 168 |
+
....................................................................................................
|
| 169 |
+
common_init_result: added <fim_pad> logit bias = -inf
|
| 170 |
+
common_init_result: added <reponame> logit bias = -inf
|
| 171 |
+
common_init_result: added [e~[ logit bias = -inf
|
| 172 |
+
llama_context: constructing llama_context
|
| 173 |
+
llama_context: n_seq_max = 8
|
| 174 |
+
llama_context: n_ctx = 4096
|
| 175 |
+
llama_context: n_ctx_seq = 512
|
| 176 |
+
llama_context: n_batch = 4096
|
| 177 |
+
llama_context: n_ubatch = 4096
|
| 178 |
+
llama_context: causal_attn = 1
|
| 179 |
+
llama_context: flash_attn = enabled
|
| 180 |
+
llama_context: kv_unified = false
|
| 181 |
+
llama_context: freq_base = 5000000.0
|
| 182 |
+
llama_context: freq_scale = 1
|
| 183 |
+
llama_context: n_ctx_seq (512) < n_ctx_train (196608) -- the full capacity of the model will not be utilized
|
| 184 |
+
llama_context: CUDA_Host output buffer size = 6.11 MiB
|
| 185 |
+
llama_kv_cache: CUDA0 KV buffer size = 80.00 MiB
|
| 186 |
+
llama_kv_cache: CUDA1 KV buffer size = 912.00 MiB
|
| 187 |
+
llama_kv_cache: size = 992.00 MiB ( 512 cells, 62 layers, 8/8 seqs), K (f16): 496.00 MiB, V (f16): 496.00 MiB
|
| 188 |
+
sched_reserve: reserving ...
|
| 189 |
+
sched_reserve: CUDA0 compute buffer size = 4660.00 MiB
|
| 190 |
+
sched_reserve: CUDA1 compute buffer size = 3174.00 MiB
|
| 191 |
+
sched_reserve: CUDA_Host compute buffer size = 104.11 MiB
|
| 192 |
+
sched_reserve: graph nodes = 4099
|
| 193 |
+
sched_reserve: graph splits = 217 (with bs=4096), 111 (with bs=1)
|
| 194 |
+
sched_reserve: reserve took 22.29 ms, sched copies = 1
|
| 195 |
+
common_init_from_params: warming up the model with an empty run - please wait ... (--no-warmup to disable)
|
| 196 |
+
|
| 197 |
+
system_info: n_threads = 48 (n_threads_batch = 48) / 56 | CUDA : ARCHS = 860 | USE_GRAPHS = 1 | PEER_MAX_BATCH_SIZE = 128 | CPU : SSE3 = 1 | SSSE3 = 1 | AVX = 1 | AVX_VNNI = 1 | AVX2 = 1 | F16C = 1 | FMA = 1 | BMI2 = 1 | AVX512 = 1 | AVX512_VBMI = 1 | AVX512_VNNI = 1 | AVX512_BF16 = 1 | LLAMAFILE = 1 | OPENMP = 1 | REPACK = 1 |
|
| 198 |
+
kl_divergence: computing over 121 chunks, n_ctx=512, batch_size=4096, n_seq=8
|
| 199 |
+
kl_divergence: 12.01 seconds per pass - ETA 3.02 minutes
|
| 200 |
+
|
| 201 |
+
chunk PPL ln(PPL(Q)/PPL(base)) KL Divergence Δp RMS Same top p
|
| 202 |
+
1 6.4002 ± 1.2181 0.00426 ± 0.01411 0.00979 ± 0.00133 3.306 ± 0.393 % 95.294 ± 1.329 %
|
| 203 |
+
2 4.7051 ± 0.5627 0.00645 ± 0.00797 0.00755 ± 0.00080 2.676 ± 0.263 % 96.667 ± 0.796 %
|
| 204 |
+
3 4.5268 ± 0.4475 0.00853 ± 0.00790 0.00702 ± 0.00059 2.682 ± 0.191 % 96.340 ± 0.679 %
|
| 205 |
+
4 5.1666 ± 0.4593 0.00743 ± 0.00648 0.00762 ± 0.00062 2.696 ± 0.176 % 96.275 ± 0.593 %
|
| 206 |
+
5 4.9713 ± 0.3968 0.01546 ± 0.00737 0.00819 ± 0.00066 2.745 ± 0.161 % 96.392 ± 0.522 %
|
| 207 |
+
6 6.0536 ± 0.4730 0.00946 ± 0.00677 0.00982 ± 0.00067 2.749 ± 0.147 % 96.275 ± 0.484 %
|
| 208 |
+
7 5.6250 ± 0.3938 0.00788 ± 0.00603 0.00990 ± 0.00060 2.748 ± 0.132 % 95.910 ± 0.469 %
|
| 209 |
+
8 6.3548 ± 0.4225 0.00636 ± 0.00545 0.00977 ± 0.00053 2.681 ± 0.119 % 95.931 ± 0.438 %
|
| 210 |
+
9 6.2443 ± 0.3873 0.00809 ± 0.00502 0.00949 ± 0.00049 2.650 ± 0.112 % 95.948 ± 0.412 %
|
| 211 |
+
10 5.7022 ± 0.3288 0.00633 ± 0.00463 0.00922 ± 0.00045 2.687 ± 0.105 % 96.039 ± 0.386 %
|
| 212 |
+
11 6.2439 ± 0.3484 0.00527 ± 0.00429 0.00905 ± 0.00041 2.629 ± 0.098 % 96.114 ± 0.365 %
|
| 213 |
+
12 6.9141 ± 0.3748 0.00573 ± 0.00414 0.00982 ± 0.00054 2.637 ± 0.094 % 95.980 ± 0.355 %
|
| 214 |
+
13 7.1882 ± 0.3715 0.00734 ± 0.00391 0.00977 ± 0.00050 2.601 ± 0.089 % 95.777 ± 0.349 %
|
| 215 |
+
14 7.7446 ± 0.3903 0.00665 ± 0.00378 0.00969 ± 0.00047 2.586 ± 0.086 % 95.742 ± 0.338 %
|
| 216 |
+
15 8.1155 ± 0.3957 0.00730 ± 0.00362 0.01005 ± 0.00048 2.680 ± 0.102 % 95.582 ± 0.332 %
|
| 217 |
+
16 8.3772 ± 0.3960 0.00683 ± 0.00343 0.00979 ± 0.00045 2.654 ± 0.097 % 95.588 ± 0.322 %
|
| 218 |
+
17 8.6020 ± 0.3969 0.00620 ± 0.00331 0.00981 ± 0.00043 2.616 ± 0.094 % 95.663 ± 0.309 %
|
| 219 |
+
18 8.1043 ± 0.3607 0.00583 ± 0.00317 0.00981 ± 0.00043 2.673 ± 0.121 % 95.730 ± 0.298 %
|
| 220 |
+
19 8.2293 ± 0.3565 0.00533 ± 0.00307 0.00962 ± 0.00041 2.665 ± 0.116 % 95.624 ± 0.294 %
|
| 221 |
+
20 8.2868 ± 0.3499 0.00442 ± 0.00300 0.00991 ± 0.00041 2.661 ± 0.111 % 95.706 ± 0.284 %
|
| 222 |
+
21 8.2623 ± 0.3401 0.00403 ± 0.00291 0.01005 ± 0.00041 2.726 ± 0.118 % 95.761 ± 0.275 %
|
| 223 |
+
22 8.5743 ± 0.3480 0.00342 ± 0.00288 0.01053 ± 0.00044 2.806 ± 0.125 % 95.633 ± 0.273 %
|
| 224 |
+
23 8.5997 ± 0.3424 0.00518 ± 0.00295 0.01146 ± 0.00051 3.077 ± 0.165 % 95.482 ± 0.271 %
|
| 225 |
+
24 9.0008 ± 0.3526 0.00469 ± 0.00287 0.01139 ± 0.00049 3.039 ± 0.160 % 95.474 ± 0.266 %
|
| 226 |
+
25 8.9853 ± 0.3456 0.00437 ± 0.00281 0.01157 ± 0.00048 3.066 ± 0.155 % 95.467 ± 0.261 %
|
| 227 |
+
26 8.3908 ± 0.3134 0.00452 ± 0.00279 0.01201 ± 0.00050 3.371 ± 0.170 % 95.551 ± 0.253 %
|
| 228 |
+
27 7.9506 ± 0.2890 0.00456 ± 0.00281 0.01336 ± 0.00058 3.734 ± 0.188 % 95.483 ± 0.250 %
|
| 229 |
+
28 8.0674 ± 0.2888 0.00546 ± 0.00277 0.01356 ± 0.00057 3.723 ± 0.183 % 95.420 ± 0.247 %
|
| 230 |
+
29 7.9841 ± 0.2806 0.00456 ± 0.00273 0.01360 ± 0.00055 3.694 ± 0.178 % 95.470 ± 0.242 %
|
| 231 |
+
30 7.4670 ± 0.2557 0.00479 ± 0.00267 0.01332 ± 0.00054 3.675 ± 0.175 % 95.608 ± 0.234 %
|
| 232 |
+
31 7.0356 ± 0.2348 0.00465 ± 0.00259 0.01304 ± 0.00053 3.655 ± 0.171 % 95.737 ± 0.227 %
|
| 233 |
+
32 6.8602 ± 0.2235 0.00465 ± 0.00253 0.01291 ± 0.00051 3.657 ± 0.166 % 95.735 ± 0.224 %
|
| 234 |
+
33 6.7267 ± 0.2143 0.00489 ± 0.00248 0.01276 ± 0.00050 3.646 ± 0.162 % 95.746 ± 0.220 %
|
| 235 |
+
34 6.9104 ± 0.2180 0.00481 ± 0.00246 0.01297 ± 0.00049 3.623 ± 0.158 % 95.663 ± 0.219 %
|
| 236 |
+
35 7.0155 ± 0.2201 0.00526 ± 0.00244 0.01329 ± 0.00048 3.651 ± 0.153 % 95.585 ± 0.217 %
|
| 237 |
+
36 7.0755 ± 0.2197 0.00491 ± 0.00239 0.01316 ± 0.00047 3.626 ± 0.150 % 95.610 ± 0.214 %
|
| 238 |
+
37 7.0965 ± 0.2176 0.00558 ± 0.00238 0.01305 ± 0.00046 3.605 ± 0.147 % 95.665 ± 0.210 %
|
| 239 |
+
38 7.3116 ± 0.2222 0.00571 ± 0.00233 0.01309 ± 0.00046 3.625 ± 0.147 % 95.624 ± 0.208 %
|
| 240 |
+
39 7.2617 ± 0.2175 0.00580 ± 0.00231 0.01336 ± 0.00046 3.683 ± 0.145 % 95.606 ± 0.206 %
|
| 241 |
+
40 7.0227 ± 0.2061 0.00726 ± 0.00234 0.01405 ± 0.00047 3.850 ± 0.142 % 95.588 ± 0.203 %
|
| 242 |
+
41 6.8125 ± 0.1962 0.00769 ± 0.00238 0.01459 ± 0.00048 4.008 ± 0.140 % 95.600 ± 0.201 %
|
| 243 |
+
42 6.6033 ± 0.1868 0.00783 ± 0.00237 0.01493 ± 0.00049 4.149 ± 0.145 % 95.621 ± 0.198 %
|
| 244 |
+
43 6.3960 ± 0.1776 0.00707 ± 0.00236 0.01531 ± 0.00049 4.292 ± 0.144 % 95.613 ± 0.196 %
|
| 245 |
+
44 6.3543 ± 0.1737 0.00685 ± 0.00232 0.01510 ± 0.00048 4.259 ± 0.142 % 95.651 ± 0.193 %
|
| 246 |
+
45 6.4985 ± 0.1767 0.00740 ± 0.00231 0.01539 ± 0.00048 4.258 ± 0.140 % 95.573 ± 0.192 %
|
| 247 |
+
46 6.6397 ± 0.1789 0.00682 ± 0.00227 0.01530 ± 0.00047 4.226 ± 0.138 % 95.575 ± 0.190 %
|
| 248 |
+
47 6.7896 ± 0.1815 0.00693 ± 0.00223 0.01511 ± 0.00046 4.190 ± 0.136 % 95.578 ± 0.188 %
|
| 249 |
+
48 6.6770 ± 0.1757 0.00687 ± 0.00219 0.01492 ± 0.00045 4.164 ± 0.134 % 95.621 ± 0.185 %
|
| 250 |
+
49 6.7703 ± 0.1762 0.00573 ± 0.00234 0.01844 ± 0.00148 4.374 ± 0.164 % 95.454 ± 0.186 %
|
| 251 |
+
50 6.8708 ± 0.1781 0.00630 ± 0.00231 0.01837 ± 0.00145 4.352 ± 0.162 % 95.467 ± 0.184 %
|
| 252 |
+
51 6.9787 ± 0.1794 0.00649 ± 0.00228 0.01823 ± 0.00143 4.326 ± 0.160 % 95.479 ± 0.182 %
|
| 253 |
+
52 7.0426 ± 0.1791 0.00630 ± 0.00225 0.01826 ± 0.00140 4.304 ± 0.158 % 95.452 ± 0.181 %
|
| 254 |
+
53 7.1522 ± 0.1802 0.00595 ± 0.00222 0.01810 ± 0.00137 4.281 ± 0.156 % 95.427 ± 0.180 %
|
| 255 |
+
54 7.2070 ± 0.1794 0.00581 ± 0.00219 0.01797 ± 0.00135 4.258 ± 0.154 % 95.410 ± 0.178 %
|
| 256 |
+
55 7.2574 ± 0.1788 0.00574 ± 0.00216 0.01785 ± 0.00132 4.233 ± 0.152 % 95.401 ± 0.177 %
|
| 257 |
+
56 7.2983 ± 0.1782 0.00564 ± 0.00213 0.01772 ± 0.00130 4.215 ± 0.150 % 95.406 ± 0.175 %
|
| 258 |
+
57 7.3010 ± 0.1767 0.00606 ± 0.00211 0.01774 ± 0.00128 4.204 ± 0.148 % 95.432 ± 0.173 %
|
| 259 |
+
58 7.3076 ± 0.1753 0.00559 ± 0.00209 0.01759 ± 0.00126 4.187 ± 0.146 % 95.456 ± 0.171 %
|
| 260 |
+
59 7.2686 ± 0.1726 0.00538 ± 0.00206 0.01736 ± 0.00124 4.159 ± 0.145 % 95.494 ± 0.169 %
|
| 261 |
+
60 7.2778 ± 0.1714 0.00516 ± 0.00204 0.01724 ± 0.00122 4.138 ± 0.143 % 95.464 ± 0.168 %
|
| 262 |
+
61 7.3263 ± 0.1711 0.00516 ± 0.00201 0.01721 ± 0.00120 4.115 ± 0.141 % 95.461 ± 0.167 %
|
| 263 |
+
62 7.3037 ± 0.1694 0.00534 ± 0.00200 0.01716 ± 0.00118 4.120 ± 0.140 % 95.490 ± 0.165 %
|
| 264 |
+
63 7.3504 ± 0.1696 0.00540 ± 0.00199 0.01713 ± 0.00116 4.100 ± 0.139 % 95.481 ± 0.164 %
|
| 265 |
+
64 7.3273 ± 0.1673 0.00481 ± 0.00197 0.01710 ± 0.00115 4.090 ± 0.137 % 95.429 ± 0.163 %
|
| 266 |
+
65 7.3176 ± 0.1659 0.00466 ± 0.00196 0.01703 ± 0.00113 4.079 ± 0.136 % 95.439 ± 0.162 %
|
| 267 |
+
66 7.3548 ± 0.1656 0.00473 ± 0.00195 0.01705 ± 0.00111 4.066 ± 0.134 % 95.389 ± 0.162 %
|
| 268 |
+
67 7.3616 ± 0.1646 0.00471 ± 0.00194 0.01711 ± 0.00110 4.062 ± 0.132 % 95.382 ± 0.161 %
|
| 269 |
+
68 7.3206 ± 0.1622 0.00510 ± 0.00194 0.01700 ± 0.00108 4.040 ± 0.131 % 95.398 ± 0.159 %
|
| 270 |
+
69 7.3509 ± 0.1618 0.00478 ± 0.00192 0.01697 ± 0.00106 4.026 ± 0.130 % 95.431 ± 0.157 %
|
| 271 |
+
70 7.3226 ± 0.1597 0.00470 ± 0.00190 0.01694 ± 0.00105 4.015 ± 0.128 % 95.451 ± 0.156 %
|
| 272 |
+
71 7.3076 ± 0.1583 0.00464 ± 0.00188 0.01697 ± 0.00104 4.016 ± 0.127 % 95.465 ± 0.155 %
|
| 273 |
+
72 7.3288 ± 0.1579 0.00537 ± 0.00190 0.01700 ± 0.00103 4.009 ± 0.126 % 95.430 ± 0.154 %
|
| 274 |
+
73 7.3366 ± 0.1569 0.00570 ± 0.00188 0.01701 ± 0.00101 4.005 ± 0.124 % 95.428 ± 0.153 %
|
| 275 |
+
74 7.3289 ± 0.1556 0.00547 ± 0.00187 0.01705 ± 0.00100 3.999 ± 0.123 % 95.395 ± 0.153 %
|
| 276 |
+
75 7.3310 ± 0.1546 0.00484 ± 0.00186 0.01723 ± 0.00099 4.025 ± 0.122 % 95.357 ± 0.152 %
|
| 277 |
+
76 7.3907 ± 0.1549 0.00447 ± 0.00184 0.01722 ± 0.00098 4.029 ± 0.121 % 95.335 ± 0.151 %
|
| 278 |
+
77 7.3881 ± 0.1539 0.00438 ± 0.00185 0.01717 ± 0.00097 4.015 ± 0.120 % 95.309 ± 0.151 %
|
| 279 |
+
78 7.4025 ± 0.1533 0.00451 ± 0.00184 0.01715 ± 0.00095 4.010 ± 0.119 % 95.304 ± 0.150 %
|
| 280 |
+
79 7.4147 ± 0.1526 0.00470 ± 0.00183 0.01713 ± 0.00094 3.996 ± 0.118 % 95.304 ± 0.149 %
|
| 281 |
+
80 7.4223 ± 0.1524 0.00548 ± 0.00187 0.01718 ± 0.00093 4.008 ± 0.117 % 95.270 ± 0.149 %
|
| 282 |
+
81 7.3993 ± 0.1509 0.00547 ± 0.00186 0.01719 ± 0.00092 3.999 ± 0.116 % 95.270 ± 0.148 %
|
| 283 |
+
82 7.3800 ± 0.1494 0.00578 ± 0.00184 0.01709 ± 0.00091 3.986 ± 0.115 % 95.280 ± 0.147 %
|
| 284 |
+
83 7.4105 ± 0.1489 0.00559 ± 0.00182 0.01698 ± 0.00090 3.969 ± 0.114 % 95.266 ± 0.146 %
|
| 285 |
+
84 7.4274 ± 0.1481 0.00568 ± 0.00180 0.01687 ± 0.00089 3.953 ± 0.113 % 95.271 ± 0.145 %
|
| 286 |
+
85 7.4230 ± 0.1469 0.00565 ± 0.00178 0.01675 ± 0.00088 3.935 ± 0.112 % 95.285 ± 0.144 %
|
| 287 |
+
86 7.3546 ± 0.1442 0.00544 ± 0.00177 0.01674 ± 0.00087 3.939 ± 0.112 % 95.294 ± 0.143 %
|
| 288 |
+
87 7.2963 ± 0.1418 0.00529 ± 0.00175 0.01664 ± 0.00086 3.924 ± 0.111 % 95.290 ± 0.142 %
|
| 289 |
+
88 7.2357 ± 0.1394 0.00527 ± 0.00174 0.01653 ± 0.00085 3.911 ± 0.110 % 95.285 ± 0.141 %
|
| 290 |
+
89 7.1623 ± 0.1367 0.00505 ± 0.00172 0.01647 ± 0.00084 3.901 ± 0.109 % 95.285 ± 0.141 %
|
| 291 |
+
90 7.1072 ± 0.1345 0.00503 ± 0.00171 0.01640 ± 0.00084 3.894 ± 0.109 % 95.298 ± 0.140 %
|
| 292 |
+
91 7.0566 ± 0.1325 0.00499 ± 0.00169 0.01631 ± 0.00083 3.882 ± 0.108 % 95.320 ± 0.139 %
|
| 293 |
+
92 7.0003 ± 0.1303 0.00504 ± 0.00168 0.01625 ± 0.00082 3.876 ± 0.107 % 95.307 ± 0.138 %
|
| 294 |
+
93 7.0188 ± 0.1302 0.00519 ± 0.00169 0.01669 ± 0.00084 3.919 ± 0.111 % 95.256 ± 0.138 %
|
| 295 |
+
94 7.0480 ± 0.1300 0.00501 ± 0.00168 0.01659 ± 0.00083 3.905 ± 0.110 % 95.269 ± 0.137 %
|
| 296 |
+
95 7.1561 ± 0.1317 0.00503 ± 0.00167 0.01671 ± 0.00082 3.908 ± 0.109 % 95.232 ± 0.137 %
|
| 297 |
+
96 7.2469 ± 0.1328 0.00468 ± 0.00167 0.01678 ± 0.00081 3.905 ± 0.108 % 95.184 ± 0.137 %
|
| 298 |
+
97 7.3226 ± 0.1336 0.00441 ± 0.00165 0.01674 ± 0.00081 3.895 ± 0.108 % 95.173 ± 0.136 %
|
| 299 |
+
98 7.4593 ± 0.1360 0.00476 ± 0.00166 0.01667 ± 0.00080 3.880 ± 0.107 % 95.142 ± 0.136 %
|
| 300 |
+
99 7.5767 ± 0.1377 0.00481 ± 0.00165 0.01670 ± 0.00079 3.867 ± 0.106 % 95.108 ± 0.136 %
|
| 301 |
+
100 7.6115 ± 0.1377 0.00450 ± 0.00164 0.01681 ± 0.00079 3.860 ± 0.105 % 95.090 ± 0.135 %
|
| 302 |
+
101 7.6462 ± 0.1378 0.00422 ± 0.00164 0.01704 ± 0.00081 3.912 ± 0.114 % 95.077 ± 0.135 %
|
| 303 |
+
102 7.7083 ± 0.1388 0.00441 ± 0.00163 0.01700 ± 0.00080 3.900 ± 0.113 % 95.098 ± 0.134 %
|
| 304 |
+
103 7.6838 ± 0.1379 0.00460 ± 0.00163 0.01692 ± 0.00079 3.890 ± 0.112 % 95.104 ± 0.133 %
|
| 305 |
+
104 7.6260 ± 0.1360 0.00456 ± 0.00162 0.01688 ± 0.00078 3.898 ± 0.111 % 95.102 ± 0.133 %
|
| 306 |
+
105 7.5147 ± 0.1331 0.00436 ± 0.00161 0.01682 ± 0.00078 3.895 ± 0.110 % 95.126 ± 0.132 %
|
| 307 |
+
106 7.3856 ± 0.1299 0.00434 ± 0.00159 0.01668 ± 0.00077 3.884 ± 0.109 % 95.172 ± 0.130 %
|
| 308 |
+
107 7.4438 ± 0.1303 0.00446 ± 0.00158 0.01659 ± 0.00076 3.870 ± 0.109 % 95.166 ± 0.130 %
|
| 309 |
+
108 7.4563 ± 0.1299 0.00436 ± 0.00157 0.01649 ± 0.00075 3.857 ± 0.108 % 95.182 ± 0.129 %
|
| 310 |
+
109 7.4762 ± 0.1297 0.00429 ± 0.00156 0.01643 ± 0.00075 3.849 ± 0.107 % 95.186 ± 0.128 %
|
| 311 |
+
110 7.5111 ± 0.1297 0.00427 ± 0.00155 0.01637 ± 0.00074 3.840 ± 0.107 % 95.194 ± 0.128 %
|
| 312 |
+
111 7.5588 ± 0.1299 0.00428 ± 0.00153 0.01630 ± 0.00073 3.829 ± 0.106 % 95.199 ± 0.127 %
|
| 313 |
+
112 7.5681 ± 0.1294 0.00417 ± 0.00152 0.01622 ± 0.00073 3.821 ± 0.105 % 95.196 ± 0.127 %
|
| 314 |
+
113 7.5787 ± 0.1288 0.00420 ± 0.00151 0.01617 ± 0.00072 3.813 ± 0.105 % 95.187 ± 0.126 %
|
| 315 |
+
114 7.5965 ± 0.1287 0.00421 ± 0.00151 0.01613 ± 0.00072 3.802 ± 0.104 % 95.191 ± 0.125 %
|
| 316 |
+
115 7.5807 ± 0.1278 0.00447 ± 0.00150 0.01617 ± 0.00071 3.815 ± 0.103 % 95.171 ± 0.125 %
|
| 317 |
+
116 7.5699 ± 0.1271 0.00443 ± 0.00150 0.01636 ± 0.00071 3.870 ± 0.102 % 95.145 ± 0.125 %
|
| 318 |
+
117 7.4682 ± 0.1245 0.00448 ± 0.00150 0.01650 ± 0.00070 3.917 ± 0.101 % 95.160 ± 0.124 %
|
| 319 |
+
118 7.3729 ± 0.1221 0.00449 ± 0.00150 0.01662 ± 0.00070 3.962 ± 0.100 % 95.168 ± 0.124 %
|
| 320 |
+
119 7.2751 ± 0.1197 0.00459 ± 0.00149 0.01670 ± 0.00069 3.998 ± 0.100 % 95.182 ± 0.123 %
|
| 321 |
+
120 7.1936 ± 0.1177 0.00468 ± 0.00149 0.01694 ± 0.00069 4.088 ± 0.100 % 95.180 ± 0.122 %
|
| 322 |
+
121 7.1105 ± 0.1156 0.00468 ± 0.00149 0.01707 ± 0.00069 4.130 ± 0.099 % 95.197 ± 0.122 %
|
| 323 |
+
|
| 324 |
+
====== Perplexity statistics ======
|
| 325 |
+
Mean PPL(Q) : 7.110469 ± 0.115634
|
| 326 |
+
Mean PPL(base) : 7.077240 ± 0.114279
|
| 327 |
+
Cor(ln(PPL(Q)), ln(PPL(base))): 99.58%
|
| 328 |
+
Mean ln(PPL(Q)/PPL(base)) : 0.004684 ± 0.001486
|
| 329 |
+
Mean PPL(Q)/PPL(base) : 1.004695 ± 0.001493
|
| 330 |
+
Mean PPL(Q)-PPL(base) : 0.033229 ± 0.010599
|
| 331 |
+
|
| 332 |
+
====== KL divergence statistics ======
|
| 333 |
+
Mean KLD: 0.017066 ± 0.000686
|
| 334 |
+
Maximum KLD: 14.732626
|
| 335 |
+
99.9% KLD: 0.835423
|
| 336 |
+
99.0% KLD: 0.206448
|
| 337 |
+
95.0% KLD: 0.057932
|
| 338 |
+
90.0% KLD: 0.031650
|
| 339 |
+
Median KLD: 0.004152
|
| 340 |
+
10.0% KLD: 0.000017
|
| 341 |
+
5.0% KLD: 0.000003
|
| 342 |
+
1.0% KLD: -0.000000
|
| 343 |
+
0.1% KLD: -0.000002
|
| 344 |
+
Minimum KLD: -0.000012
|
| 345 |
+
|
| 346 |
+
====== Token probability statistics ======
|
| 347 |
+
Mean Δp: 0.002 ± 0.024 %
|
| 348 |
+
Maximum Δp: 97.571%
|
| 349 |
+
99.9% Δp: 36.779%
|
| 350 |
+
99.0% Δp: 12.020%
|
| 351 |
+
95.0% Δp: 4.292%
|
| 352 |
+
90.0% Δp: 2.248%
|
| 353 |
+
75.0% Δp: 0.323%
|
| 354 |
+
Median Δp: 0.000%
|
| 355 |
+
25.0% Δp: -0.341%
|
| 356 |
+
10.0% Δp: -2.288%
|
| 357 |
+
5.0% Δp: -4.339%
|
| 358 |
+
1.0% Δp: -11.905%
|
| 359 |
+
0.1% Δp: -34.207%
|
| 360 |
+
Minimum Δp: -63.506%
|
| 361 |
+
RMS Δp : 4.130 ± 0.099 %
|
| 362 |
+
Same top p: 95.197 ± 0.122 %
|
| 363 |
+
|
| 364 |
+
llama_perf_context_print: load time = 92183.91 ms
|
| 365 |
+
llama_perf_context_print: prompt eval time = 167490.60 ms / 61952 tokens ( 2.70 ms per token, 369.88 tokens per second)
|
| 366 |
+
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
|
| 367 |
+
llama_perf_context_print: total time = 186641.25 ms / 61953 tokens
|
| 368 |
+
llama_perf_context_print: graphs reused = 0
|
| 369 |
+
llama_memory_breakdown_print: | memory breakdown [MiB] | total free self model context compute unaccounted |
|
| 370 |
+
llama_memory_breakdown_print: | - CUDA0 (RTX 3090) | 24135 = 1213 + ( 22114 = 17374 + 80 + 4659) + 806 |
|
| 371 |
+
llama_memory_breakdown_print: | - CUDA1 (RTX 3090) | 24135 = 237 + ( 22762 = 18676 + 912 + 3174) + 1135 |
|
| 372 |
+
llama_memory_breakdown_print: | - Host | 248082 = 247978 + 0 + 104 |
|
| 373 |
+
```
|