Delete model_diff
Browse files- model_diff/MiniMaxAI_MiniMax-M2.5-2.0bpw.txt +0 -87
- model_diff/MiniMaxAI_MiniMax-M2.5-3.0bpw.txt +0 -87
- model_diff/MiniMaxAI_MiniMax-M2.5-4.0bpw.txt +0 -87
- model_diff/MiniMaxAI_MiniMax-M2.5-5.0bpw.txt +0 -87
- model_diff/MiniMaxAI_MiniMax-M2.5-6.0bpw.txt +0 -87
- model_diff/MiniMaxAI_MiniMax-M2.5-7.0bpw.txt +0 -87
- model_diff/MiniMaxAI_MiniMax-M2.5-8.0bpw.txt +0 -87
model_diff/MiniMaxAI_MiniMax-M2.5-2.0bpw.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
|
| 3 |
-
-- model.layers.0 rfn_err: 0.007858 max_diff/norm: 0.001212 sqnr: 43.922771 cos_err: 0.000015
|
| 4 |
-
-- model.layers.1 rfn_err: 0.011489 max_diff/norm: 0.003147 sqnr: 39.937154 cos_err: 0.000051
|
| 5 |
-
-- model.layers.2 rfn_err: 0.024680 max_diff/norm: 0.005199 sqnr: 33.424866 cos_err: 0.000133
|
| 6 |
-
-- model.layers.3 rfn_err: 0.047014 max_diff/norm: 0.005335 sqnr: 27.375450 cos_err: 0.000201
|
| 7 |
-
-- model.layers.4 rfn_err: 0.053175 max_diff/norm: 0.010376 sqnr: 26.392067 cos_err: 0.000264
|
| 8 |
-
-- model.layers.5 rfn_err: 0.101607 max_diff/norm: 0.439889 sqnr: 25.275748 cos_err: 0.000366
|
| 9 |
-
-- model.layers.6 rfn_err: 0.104424 max_diff/norm: 0.433924 sqnr: 23.747089 cos_err: 0.000484
|
| 10 |
-
-- model.layers.7 rfn_err: 0.104557 max_diff/norm: 0.431919 sqnr: 24.505606 cos_err: 0.000609
|
| 11 |
-
-- model.layers.8 rfn_err: 0.105434 max_diff/norm: 0.429296 sqnr: 23.243343 cos_err: 0.000926
|
| 12 |
-
-- model.layers.9 rfn_err: 0.106367 max_diff/norm: 0.423224 sqnr: 22.128818 cos_err: 0.001310
|
| 13 |
-
-- model.layers.10 rfn_err: 0.104368 max_diff/norm: 0.418102 sqnr: 22.921680 cos_err: 0.001477
|
| 14 |
-
-- model.layers.11 rfn_err: 0.105033 max_diff/norm: 0.413182 sqnr: 22.373919 cos_err: 0.001895
|
| 15 |
-
-- model.layers.12 rfn_err: 0.106147 max_diff/norm: 0.409659 sqnr: 21.870182 cos_err: 0.002353
|
| 16 |
-
-- model.layers.13 rfn_err: 0.107418 max_diff/norm: 0.406745 sqnr: 21.460371 cos_err: 0.002797
|
| 17 |
-
-- model.layers.14 rfn_err: 0.108870 max_diff/norm: 0.403091 sqnr: 21.103061 cos_err: 0.003244
|
| 18 |
-
-- model.layers.15 rfn_err: 0.112406 max_diff/norm: 0.397691 sqnr: 20.229972 cos_err: 0.003911
|
| 19 |
-
-- model.layers.16 rfn_err: 0.113939 max_diff/norm: 0.391588 sqnr: 19.953552 cos_err: 0.004338
|
| 20 |
-
-- model.layers.17 rfn_err: 0.118850 max_diff/norm: 0.387389 sqnr: 19.038787 cos_err: 0.005121
|
| 21 |
-
-- model.layers.18 rfn_err: 0.122696 max_diff/norm: 0.382501 sqnr: 18.479936 cos_err: 0.006249
|
| 22 |
-
-- model.layers.19 rfn_err: 0.126383 max_diff/norm: 0.375111 sqnr: 18.069667 cos_err: 0.007387
|
| 23 |
-
-- model.layers.20 rfn_err: 0.133546 max_diff/norm: 0.371628 sqnr: 17.313982 cos_err: 0.008723
|
| 24 |
-
-- model.layers.21 rfn_err: 0.139186 max_diff/norm: 0.355202 sqnr: 16.785980 cos_err: 0.010051
|
| 25 |
-
-- model.layers.22 rfn_err: 0.146212 max_diff/norm: 0.354242 sqnr: 16.204125 cos_err: 0.011832
|
| 26 |
-
-- model.layers.23 rfn_err: 0.153624 max_diff/norm: 0.354606 sqnr: 15.559332 cos_err: 0.014324
|
| 27 |
-
-- model.layers.24 rfn_err: 0.167379 max_diff/norm: 0.376240 sqnr: 14.823083 cos_err: 0.017401
|
| 28 |
-
-- model.layers.25 rfn_err: 0.178808 max_diff/norm: 0.371875 sqnr: 14.015634 cos_err: 0.021692
|
| 29 |
-
-- model.layers.26 rfn_err: 0.191962 max_diff/norm: 0.371208 sqnr: 13.135830 cos_err: 0.027272
|
| 30 |
-
-- model.layers.27 rfn_err: 0.208002 max_diff/norm: 0.363041 sqnr: 12.251230 cos_err: 0.034009
|
| 31 |
-
-- model.layers.28 rfn_err: 0.237604 max_diff/norm: 0.358428 sqnr: 11.404306 cos_err: 0.042369
|
| 32 |
-
-- model.layers.29 rfn_err: 0.254413 max_diff/norm: 0.353930 sqnr: 10.800144 cos_err: 0.048954
|
| 33 |
-
-- model.layers.30 rfn_err: 0.276157 max_diff/norm: 0.345944 sqnr: 9.982429 cos_err: 0.058998
|
| 34 |
-
-- model.layers.31 rfn_err: 0.289523 max_diff/norm: 0.339725 sqnr: 9.545339 cos_err: 0.065212
|
| 35 |
-
-- model.layers.32 rfn_err: 0.310718 max_diff/norm: 0.328324 sqnr: 9.050561 cos_err: 0.072135
|
| 36 |
-
-- model.layers.33 rfn_err: 0.325859 max_diff/norm: 0.321673 sqnr: 8.594002 cos_err: 0.080117
|
| 37 |
-
-- model.layers.34 rfn_err: 0.336873 max_diff/norm: 0.311122 sqnr: 8.352301 cos_err: 0.084569
|
| 38 |
-
-- model.layers.35 rfn_err: 0.346817 max_diff/norm: 0.300775 sqnr: 8.178503 cos_err: 0.087901
|
| 39 |
-
-- model.layers.36 rfn_err: 0.364598 max_diff/norm: 0.298362 sqnr: 7.717051 cos_err: 0.097876
|
| 40 |
-
-- model.layers.37 rfn_err: 0.370055 max_diff/norm: 0.282943 sqnr: 7.747602 cos_err: 0.097190
|
| 41 |
-
-- model.layers.38 rfn_err: 0.383731 max_diff/norm: 0.271891 sqnr: 7.462389 cos_err: 0.105123
|
| 42 |
-
-- model.layers.39 rfn_err: 0.387421 max_diff/norm: 0.258139 sqnr: 7.546127 cos_err: 0.101936
|
| 43 |
-
-- model.layers.40 rfn_err: 0.394746 max_diff/norm: 0.238546 sqnr: 7.460694 cos_err: 0.103462
|
| 44 |
-
-- model.layers.41 rfn_err: 0.402364 max_diff/norm: 0.203700 sqnr: 7.381044 cos_err: 0.105978
|
| 45 |
-
-- model.layers.42 rfn_err: 0.412670 max_diff/norm: 0.168166 sqnr: 7.233969 cos_err: 0.110305
|
| 46 |
-
-- model.layers.43 rfn_err: 0.415765 max_diff/norm: 0.144052 sqnr: 7.316041 cos_err: 0.107760
|
| 47 |
-
-- model.layers.44 rfn_err: 0.429482 max_diff/norm: 0.141583 sqnr: 7.099236 cos_err: 0.113699
|
| 48 |
-
-- model.layers.45 rfn_err: 0.438810 max_diff/norm: 0.134049 sqnr: 6.963944 cos_err: 0.117313
|
| 49 |
-
-- model.layers.46 rfn_err: 0.439827 max_diff/norm: 0.127399 sqnr: 7.015340 cos_err: 0.115621
|
| 50 |
-
-- model.layers.47 rfn_err: 0.449468 max_diff/norm: 0.121084 sqnr: 6.893901 cos_err: 0.120122
|
| 51 |
-
-- model.layers.48 rfn_err: 0.454131 max_diff/norm: 0.114432 sqnr: 6.845831 cos_err: 0.120953
|
| 52 |
-
-- model.layers.49 rfn_err: 0.457335 max_diff/norm: 0.107741 sqnr: 6.835199 cos_err: 0.122351
|
| 53 |
-
-- model.layers.50 rfn_err: 0.460975 max_diff/norm: 0.101064 sqnr: 6.794710 cos_err: 0.123616
|
| 54 |
-
-- model.layers.51 rfn_err: 0.468401 max_diff/norm: 0.094384 sqnr: 6.697549 cos_err: 0.127102
|
| 55 |
-
-- model.layers.52 rfn_err: 0.471488 max_diff/norm: 0.086993 sqnr: 6.672907 cos_err: 0.128079
|
| 56 |
-
-- model.layers.53 rfn_err: 0.476036 max_diff/norm: 0.082251 sqnr: 6.596959 cos_err: 0.130608
|
| 57 |
-
-- model.layers.54 rfn_err: 0.478482 max_diff/norm: 0.077461 sqnr: 6.566252 cos_err: 0.131370
|
| 58 |
-
-- model.layers.55 rfn_err: 0.484951 max_diff/norm: 0.075136 sqnr: 6.461519 cos_err: 0.135141
|
| 59 |
-
-- model.layers.56 rfn_err: 0.490010 max_diff/norm: 0.072084 sqnr: 6.370898 cos_err: 0.137315
|
| 60 |
-
-- model.layers.57 rfn_err: 0.493214 max_diff/norm: 0.065704 sqnr: 6.307768 cos_err: 0.137680
|
| 61 |
-
-- model.layers.58 rfn_err: 0.492374 max_diff/norm: 0.062835 sqnr: 6.277678 cos_err: 0.136408
|
| 62 |
-
-- model.layers.59 rfn_err: 0.494073 max_diff/norm: 0.059431 sqnr: 6.242959 cos_err: 0.134536
|
| 63 |
-
-- model.layers.60 rfn_err: 0.480946 max_diff/norm: 0.051310 sqnr: 6.505622 cos_err: 0.124897
|
| 64 |
-
-- model.layers.61 rfn_err: 0.450072 max_diff/norm: 0.107757 sqnr: 7.071674 cos_err: 0.111895
|
| 65 |
-
-- model.norm rfn_err: 0.483829 max_diff/norm: 0.010220 sqnr: 6.910045 cos_err: 0.119958
|
| 66 |
-
-- A perplexity: 9.46492433
|
| 67 |
-
-- B perplexity: 8.34981264
|
| 68 |
-
-- A label in top-K:
|
| 69 |
-
K = 1: 0.5363
|
| 70 |
-
K = 2: 0.6564
|
| 71 |
-
K = 3: 0.7155
|
| 72 |
-
K = 4: 0.7527
|
| 73 |
-
K = 5: 0.7792
|
| 74 |
-
-- B label in top-K:
|
| 75 |
-
K = 1: 0.5598
|
| 76 |
-
K = 2: 0.6804
|
| 77 |
-
K = 3: 0.7390
|
| 78 |
-
K = 4: 0.7749
|
| 79 |
-
K = 5: 0.8005
|
| 80 |
-
-- Top-K agreement, A vs B:
|
| 81 |
-
K = 1: 0.7699
|
| 82 |
-
K = 2: 0.4340
|
| 83 |
-
K = 3: 0.2006
|
| 84 |
-
K = 4: 0.0796
|
| 85 |
-
K = 5: 0.0289
|
| 86 |
-
-- KL divergence (A, B): 0.36735150
|
| 87 |
-
-- KL divergence (B, A): 0.42469226
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_diff/MiniMaxAI_MiniMax-M2.5-3.0bpw.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
|
| 3 |
-
-- model.layers.0 rfn_err: 0.003394 max_diff/norm: 0.000607 sqnr: 51.644513 cos_err: 0.000004
|
| 4 |
-
-- model.layers.1 rfn_err: 0.005808 max_diff/norm: 0.002926 sqnr: 46.651903 cos_err: 0.000013
|
| 5 |
-
-- model.layers.2 rfn_err: 0.011415 max_diff/norm: 0.002718 sqnr: 40.951312 cos_err: 0.000040
|
| 6 |
-
-- model.layers.3 rfn_err: 0.015681 max_diff/norm: 0.003334 sqnr: 37.879271 cos_err: 0.000062
|
| 7 |
-
-- model.layers.4 rfn_err: 0.018357 max_diff/norm: 0.004838 sqnr: 36.169828 cos_err: 0.000083
|
| 8 |
-
-- model.layers.5 rfn_err: 0.023889 max_diff/norm: 0.160353 sqnr: 34.804823 cos_err: 0.000118
|
| 9 |
-
-- model.layers.6 rfn_err: 0.024955 max_diff/norm: 0.159496 sqnr: 33.171246 cos_err: 0.000159
|
| 10 |
-
-- model.layers.7 rfn_err: 0.025661 max_diff/norm: 0.157699 sqnr: 32.518122 cos_err: 0.000202
|
| 11 |
-
-- model.layers.8 rfn_err: 0.026866 max_diff/norm: 0.156315 sqnr: 31.303216 cos_err: 0.000301
|
| 12 |
-
-- model.layers.9 rfn_err: 0.028113 max_diff/norm: 0.153931 sqnr: 30.224139 cos_err: 0.000407
|
| 13 |
-
-- model.layers.10 rfn_err: 0.029038 max_diff/norm: 0.152014 sqnr: 29.747104 cos_err: 0.000474
|
| 14 |
-
-- model.layers.11 rfn_err: 0.030416 max_diff/norm: 0.149952 sqnr: 29.029571 cos_err: 0.000603
|
| 15 |
-
-- model.layers.12 rfn_err: 0.032240 max_diff/norm: 0.148527 sqnr: 28.188072 cos_err: 0.000743
|
| 16 |
-
-- model.layers.13 rfn_err: 0.034151 max_diff/norm: 0.147561 sqnr: 27.505001 cos_err: 0.000882
|
| 17 |
-
-- model.layers.14 rfn_err: 0.036222 max_diff/norm: 0.146296 sqnr: 26.820057 cos_err: 0.001026
|
| 18 |
-
-- model.layers.15 rfn_err: 0.038649 max_diff/norm: 0.144103 sqnr: 26.183311 cos_err: 0.001212
|
| 19 |
-
-- model.layers.16 rfn_err: 0.040821 max_diff/norm: 0.141321 sqnr: 25.734528 cos_err: 0.001330
|
| 20 |
-
-- model.layers.17 rfn_err: 0.043610 max_diff/norm: 0.140069 sqnr: 25.100897 cos_err: 0.001547
|
| 21 |
-
-- model.layers.18 rfn_err: 0.046721 max_diff/norm: 0.138471 sqnr: 24.456386 cos_err: 0.001895
|
| 22 |
-
-- model.layers.19 rfn_err: 0.050462 max_diff/norm: 0.137175 sqnr: 23.788862 cos_err: 0.002255
|
| 23 |
-
-- model.layers.20 rfn_err: 0.054798 max_diff/norm: 0.134785 sqnr: 23.108732 cos_err: 0.002650
|
| 24 |
-
-- model.layers.21 rfn_err: 0.058880 max_diff/norm: 0.133462 sqnr: 22.572879 cos_err: 0.003073
|
| 25 |
-
-- model.layers.22 rfn_err: 0.064044 max_diff/norm: 0.131656 sqnr: 21.904474 cos_err: 0.003659
|
| 26 |
-
-- model.layers.23 rfn_err: 0.070224 max_diff/norm: 0.131168 sqnr: 21.062901 cos_err: 0.004548
|
| 27 |
-
-- model.layers.24 rfn_err: 0.077352 max_diff/norm: 0.127912 sqnr: 20.287736 cos_err: 0.005605
|
| 28 |
-
-- model.layers.25 rfn_err: 0.086052 max_diff/norm: 0.127358 sqnr: 19.350536 cos_err: 0.007216
|
| 29 |
-
-- model.layers.26 rfn_err: 0.097367 max_diff/norm: 0.127782 sqnr: 18.169144 cos_err: 0.009497
|
| 30 |
-
-- model.layers.27 rfn_err: 0.110078 max_diff/norm: 0.127504 sqnr: 17.233901 cos_err: 0.012311
|
| 31 |
-
-- model.layers.28 rfn_err: 0.141438 max_diff/norm: 0.124777 sqnr: 16.252857 cos_err: 0.016206
|
| 32 |
-
-- model.layers.29 rfn_err: 0.155649 max_diff/norm: 0.123986 sqnr: 15.576786 cos_err: 0.019012
|
| 33 |
-
-- model.layers.30 rfn_err: 0.171725 max_diff/norm: 0.122313 sqnr: 14.626882 cos_err: 0.023428
|
| 34 |
-
-- model.layers.31 rfn_err: 0.181566 max_diff/norm: 0.119922 sqnr: 14.148360 cos_err: 0.026096
|
| 35 |
-
-- model.layers.32 rfn_err: 0.198945 max_diff/norm: 0.117497 sqnr: 13.547689 cos_err: 0.029230
|
| 36 |
-
-- model.layers.33 rfn_err: 0.209506 max_diff/norm: 0.115155 sqnr: 13.038629 cos_err: 0.032930
|
| 37 |
-
-- model.layers.34 rfn_err: 0.216794 max_diff/norm: 0.112269 sqnr: 12.763568 cos_err: 0.034920
|
| 38 |
-
-- model.layers.35 rfn_err: 0.223848 max_diff/norm: 0.108764 sqnr: 12.565271 cos_err: 0.036753
|
| 39 |
-
-- model.layers.36 rfn_err: 0.237001 max_diff/norm: 0.107344 sqnr: 12.045963 cos_err: 0.041433
|
| 40 |
-
-- model.layers.37 rfn_err: 0.240391 max_diff/norm: 0.103187 sqnr: 12.052502 cos_err: 0.041365
|
| 41 |
-
-- model.layers.38 rfn_err: 0.249047 max_diff/norm: 0.099769 sqnr: 11.780027 cos_err: 0.045215
|
| 42 |
-
-- model.layers.39 rfn_err: 0.251593 max_diff/norm: 0.094690 sqnr: 11.874520 cos_err: 0.043901
|
| 43 |
-
-- model.layers.40 rfn_err: 0.256486 max_diff/norm: 0.092512 sqnr: 11.783108 cos_err: 0.044108
|
| 44 |
-
-- model.layers.41 rfn_err: 0.261185 max_diff/norm: 0.087722 sqnr: 11.704016 cos_err: 0.045681
|
| 45 |
-
-- model.layers.42 rfn_err: 0.269034 max_diff/norm: 0.084542 sqnr: 11.544741 cos_err: 0.048144
|
| 46 |
-
-- model.layers.43 rfn_err: 0.271123 max_diff/norm: 0.079522 sqnr: 11.648045 cos_err: 0.046260
|
| 47 |
-
-- model.layers.44 rfn_err: 0.281021 max_diff/norm: 0.076279 sqnr: 11.425829 cos_err: 0.049241
|
| 48 |
-
-- model.layers.45 rfn_err: 0.287677 max_diff/norm: 0.073810 sqnr: 11.279177 cos_err: 0.050887
|
| 49 |
-
-- model.layers.46 rfn_err: 0.288193 max_diff/norm: 0.069342 sqnr: 11.328149 cos_err: 0.050074
|
| 50 |
-
-- model.layers.47 rfn_err: 0.296293 max_diff/norm: 0.066391 sqnr: 11.184084 cos_err: 0.052388
|
| 51 |
-
-- model.layers.48 rfn_err: 0.299036 max_diff/norm: 0.063632 sqnr: 11.137132 cos_err: 0.052699
|
| 52 |
-
-- model.layers.49 rfn_err: 0.301785 max_diff/norm: 0.060063 sqnr: 11.114139 cos_err: 0.053790
|
| 53 |
-
-- model.layers.50 rfn_err: 0.304180 max_diff/norm: 0.057226 sqnr: 11.059243 cos_err: 0.054579
|
| 54 |
-
-- model.layers.51 rfn_err: 0.309586 max_diff/norm: 0.054422 sqnr: 10.955227 cos_err: 0.056505
|
| 55 |
-
-- model.layers.52 rfn_err: 0.311136 max_diff/norm: 0.050588 sqnr: 10.931107 cos_err: 0.057045
|
| 56 |
-
-- model.layers.53 rfn_err: 0.313254 max_diff/norm: 0.048196 sqnr: 10.851354 cos_err: 0.058378
|
| 57 |
-
-- model.layers.54 rfn_err: 0.315019 max_diff/norm: 0.045797 sqnr: 10.793363 cos_err: 0.058742
|
| 58 |
-
-- model.layers.55 rfn_err: 0.319939 max_diff/norm: 0.044467 sqnr: 10.657039 cos_err: 0.060996
|
| 59 |
-
-- model.layers.56 rfn_err: 0.322801 max_diff/norm: 0.041824 sqnr: 10.549990 cos_err: 0.061744
|
| 60 |
-
-- model.layers.57 rfn_err: 0.322995 max_diff/norm: 0.039723 sqnr: 10.481810 cos_err: 0.061341
|
| 61 |
-
-- model.layers.58 rfn_err: 0.318804 max_diff/norm: 0.038624 sqnr: 10.458463 cos_err: 0.059246
|
| 62 |
-
-- model.layers.59 rfn_err: 0.316041 max_diff/norm: 0.036968 sqnr: 10.455861 cos_err: 0.056453
|
| 63 |
-
-- model.layers.60 rfn_err: 0.305529 max_diff/norm: 0.032550 sqnr: 10.742234 cos_err: 0.051024
|
| 64 |
-
-- model.layers.61 rfn_err: 0.282084 max_diff/norm: 0.061164 sqnr: 11.445191 cos_err: 0.046361
|
| 65 |
-
-- model.norm rfn_err: 0.312897 max_diff/norm: 0.009568 sqnr: 11.155214 cos_err: 0.050396
|
| 66 |
-
-- A perplexity: 8.74921130
|
| 67 |
-
-- B perplexity: 8.34981264
|
| 68 |
-
-- A label in top-K:
|
| 69 |
-
K = 1: 0.5544
|
| 70 |
-
K = 2: 0.6738
|
| 71 |
-
K = 3: 0.7339
|
| 72 |
-
K = 4: 0.7703
|
| 73 |
-
K = 5: 0.7952
|
| 74 |
-
-- B label in top-K:
|
| 75 |
-
K = 1: 0.5598
|
| 76 |
-
K = 2: 0.6804
|
| 77 |
-
K = 3: 0.7390
|
| 78 |
-
K = 4: 0.7749
|
| 79 |
-
K = 5: 0.8005
|
| 80 |
-
-- Top-K agreement, A vs B:
|
| 81 |
-
K = 1: 0.8640
|
| 82 |
-
K = 2: 0.6125
|
| 83 |
-
K = 3: 0.3773
|
| 84 |
-
K = 4: 0.2072
|
| 85 |
-
K = 5: 0.1040
|
| 86 |
-
-- KL divergence (A, B): 0.14842009
|
| 87 |
-
-- KL divergence (B, A): 0.15566614
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_diff/MiniMaxAI_MiniMax-M2.5-4.0bpw.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
|
| 3 |
-
-- model.layers.0 rfn_err: 0.001854 max_diff/norm: 0.000474 sqnr: 56.263819 cos_err: 0.000001
|
| 4 |
-
-- model.layers.1 rfn_err: 0.003312 max_diff/norm: 0.001508 sqnr: 53.693782 cos_err: 0.000005
|
| 5 |
-
-- model.layers.2 rfn_err: 0.006790 max_diff/norm: 0.002903 sqnr: 47.855051 cos_err: 0.000016
|
| 6 |
-
-- model.layers.3 rfn_err: 0.009778 max_diff/norm: 0.003935 sqnr: 43.142093 cos_err: 0.000026
|
| 7 |
-
-- model.layers.4 rfn_err: 0.010822 max_diff/norm: 0.004183 sqnr: 41.971633 cos_err: 0.000036
|
| 8 |
-
-- model.layers.5 rfn_err: 0.018502 max_diff/norm: 0.333859 sqnr: 39.795376 cos_err: 0.000052
|
| 9 |
-
-- model.layers.6 rfn_err: 0.019027 max_diff/norm: 0.328645 sqnr: 38.428198 cos_err: 0.000073
|
| 10 |
-
-- model.layers.7 rfn_err: 0.019311 max_diff/norm: 0.315777 sqnr: 37.685598 cos_err: 0.000094
|
| 11 |
-
-- model.layers.8 rfn_err: 0.020006 max_diff/norm: 0.301441 sqnr: 36.191342 cos_err: 0.000139
|
| 12 |
-
-- model.layers.9 rfn_err: 0.020795 max_diff/norm: 0.285230 sqnr: 34.898715 cos_err: 0.000184
|
| 13 |
-
-- model.layers.10 rfn_err: 0.021488 max_diff/norm: 0.271941 sqnr: 34.073830 cos_err: 0.000218
|
| 14 |
-
-- model.layers.11 rfn_err: 0.022412 max_diff/norm: 0.264327 sqnr: 33.236031 cos_err: 0.000273
|
| 15 |
-
-- model.layers.12 rfn_err: 0.023527 max_diff/norm: 0.257119 sqnr: 32.362971 cos_err: 0.000333
|
| 16 |
-
-- model.layers.13 rfn_err: 0.024654 max_diff/norm: 0.251269 sqnr: 31.681959 cos_err: 0.000394
|
| 17 |
-
-- model.layers.14 rfn_err: 0.025710 max_diff/norm: 0.246635 sqnr: 31.131241 cos_err: 0.000450
|
| 18 |
-
-- model.layers.15 rfn_err: 0.027181 max_diff/norm: 0.243005 sqnr: 30.451275 cos_err: 0.000529
|
| 19 |
-
-- model.layers.16 rfn_err: 0.028224 max_diff/norm: 0.235984 sqnr: 30.094617 cos_err: 0.000575
|
| 20 |
-
-- model.layers.17 rfn_err: 0.029731 max_diff/norm: 0.231063 sqnr: 29.538305 cos_err: 0.000662
|
| 21 |
-
-- model.layers.18 rfn_err: 0.031832 max_diff/norm: 0.227557 sqnr: 28.787331 cos_err: 0.000799
|
| 22 |
-
-- model.layers.19 rfn_err: 0.034139 max_diff/norm: 0.223469 sqnr: 28.072172 cos_err: 0.000945
|
| 23 |
-
-- model.layers.20 rfn_err: 0.036582 max_diff/norm: 0.215091 sqnr: 27.441542 cos_err: 0.001107
|
| 24 |
-
-- model.layers.21 rfn_err: 0.039119 max_diff/norm: 0.207371 sqnr: 26.848416 cos_err: 0.001289
|
| 25 |
-
-- model.layers.22 rfn_err: 0.042374 max_diff/norm: 0.199526 sqnr: 26.164466 cos_err: 0.001552
|
| 26 |
-
-- model.layers.23 rfn_err: 0.046192 max_diff/norm: 0.195744 sqnr: 25.379355 cos_err: 0.001945
|
| 27 |
-
-- model.layers.24 rfn_err: 0.051334 max_diff/norm: 0.188322 sqnr: 24.590973 cos_err: 0.002428
|
| 28 |
-
-- model.layers.25 rfn_err: 0.057732 max_diff/norm: 0.184453 sqnr: 23.548251 cos_err: 0.003231
|
| 29 |
-
-- model.layers.26 rfn_err: 0.065455 max_diff/norm: 0.183255 sqnr: 22.427084 cos_err: 0.004314
|
| 30 |
-
-- model.layers.27 rfn_err: 0.075206 max_diff/norm: 0.179117 sqnr: 21.443197 cos_err: 0.005829
|
| 31 |
-
-- model.layers.28 rfn_err: 0.103912 max_diff/norm: 0.172765 sqnr: 20.421422 cos_err: 0.008061
|
| 32 |
-
-- model.layers.29 rfn_err: 0.115632 max_diff/norm: 0.168113 sqnr: 19.698096 cos_err: 0.009589
|
| 33 |
-
-- model.layers.30 rfn_err: 0.127148 max_diff/norm: 0.161370 sqnr: 18.701677 cos_err: 0.011875
|
| 34 |
-
-- model.layers.31 rfn_err: 0.134375 max_diff/norm: 0.154427 sqnr: 18.184652 cos_err: 0.013281
|
| 35 |
-
-- model.layers.32 rfn_err: 0.147770 max_diff/norm: 0.147113 sqnr: 17.544938 cos_err: 0.014799
|
| 36 |
-
-- model.layers.33 rfn_err: 0.154914 max_diff/norm: 0.142185 sqnr: 17.007279 cos_err: 0.016813
|
| 37 |
-
-- model.layers.34 rfn_err: 0.159491 max_diff/norm: 0.134358 sqnr: 16.705121 cos_err: 0.017905
|
| 38 |
-
-- model.layers.35 rfn_err: 0.164361 max_diff/norm: 0.126980 sqnr: 16.478361 cos_err: 0.019041
|
| 39 |
-
-- model.layers.36 rfn_err: 0.174045 max_diff/norm: 0.124578 sqnr: 15.923690 cos_err: 0.021620
|
| 40 |
-
-- model.layers.37 rfn_err: 0.175827 max_diff/norm: 0.115278 sqnr: 15.915138 cos_err: 0.021702
|
| 41 |
-
-- model.layers.38 rfn_err: 0.181752 max_diff/norm: 0.110514 sqnr: 15.646665 cos_err: 0.024156
|
| 42 |
-
-- model.layers.39 rfn_err: 0.183469 max_diff/norm: 0.103311 sqnr: 15.743259 cos_err: 0.023519
|
| 43 |
-
-- model.layers.40 rfn_err: 0.186590 max_diff/norm: 0.098521 sqnr: 15.634178 cos_err: 0.023518
|
| 44 |
-
-- model.layers.41 rfn_err: 0.190228 max_diff/norm: 0.091429 sqnr: 15.554746 cos_err: 0.024727
|
| 45 |
-
-- model.layers.42 rfn_err: 0.196391 max_diff/norm: 0.086804 sqnr: 15.384511 cos_err: 0.026359
|
| 46 |
-
-- model.layers.43 rfn_err: 0.197996 max_diff/norm: 0.080018 sqnr: 15.496179 cos_err: 0.025006
|
| 47 |
-
-- model.layers.44 rfn_err: 0.205729 max_diff/norm: 0.076818 sqnr: 15.265893 cos_err: 0.026823
|
| 48 |
-
-- model.layers.45 rfn_err: 0.210517 max_diff/norm: 0.074625 sqnr: 15.121604 cos_err: 0.027725
|
| 49 |
-
-- model.layers.46 rfn_err: 0.210609 max_diff/norm: 0.070258 sqnr: 15.172807 cos_err: 0.027206
|
| 50 |
-
-- model.layers.47 rfn_err: 0.217327 max_diff/norm: 0.067273 sqnr: 15.027985 cos_err: 0.028692
|
| 51 |
-
-- model.layers.48 rfn_err: 0.219067 max_diff/norm: 0.064565 sqnr: 14.978627 cos_err: 0.028730
|
| 52 |
-
-- model.layers.49 rfn_err: 0.221027 max_diff/norm: 0.061065 sqnr: 14.961713 cos_err: 0.029623
|
| 53 |
-
-- model.layers.50 rfn_err: 0.222484 max_diff/norm: 0.058346 sqnr: 14.898379 cos_err: 0.030090
|
| 54 |
-
-- model.layers.51 rfn_err: 0.226745 max_diff/norm: 0.055432 sqnr: 14.786190 cos_err: 0.031318
|
| 55 |
-
-- model.layers.52 rfn_err: 0.227504 max_diff/norm: 0.051481 sqnr: 14.754768 cos_err: 0.031677
|
| 56 |
-
-- model.layers.53 rfn_err: 0.228062 max_diff/norm: 0.048984 sqnr: 14.670084 cos_err: 0.032442
|
| 57 |
-
-- model.layers.54 rfn_err: 0.228794 max_diff/norm: 0.046740 sqnr: 14.594321 cos_err: 0.032488
|
| 58 |
-
-- model.layers.55 rfn_err: 0.232104 max_diff/norm: 0.045176 sqnr: 14.434930 cos_err: 0.033913
|
| 59 |
-
-- model.layers.56 rfn_err: 0.233304 max_diff/norm: 0.042648 sqnr: 14.316571 cos_err: 0.034139
|
| 60 |
-
-- model.layers.57 rfn_err: 0.231687 max_diff/norm: 0.040295 sqnr: 14.232150 cos_err: 0.033459
|
| 61 |
-
-- model.layers.58 rfn_err: 0.225268 max_diff/norm: 0.038996 sqnr: 14.208016 cos_err: 0.031359
|
| 62 |
-
-- model.layers.59 rfn_err: 0.220112 max_diff/norm: 0.037183 sqnr: 14.211387 cos_err: 0.028587
|
| 63 |
-
-- model.layers.60 rfn_err: 0.211295 max_diff/norm: 0.032582 sqnr: 14.490349 cos_err: 0.025010
|
| 64 |
-
-- model.layers.61 rfn_err: 0.196379 max_diff/norm: 0.072403 sqnr: 15.138763 cos_err: 0.023275
|
| 65 |
-
-- model.norm rfn_err: 0.220192 max_diff/norm: 0.009406 sqnr: 14.905477 cos_err: 0.025712
|
| 66 |
-
-- A perplexity: 8.43832064
|
| 67 |
-
-- B perplexity: 8.34981264
|
| 68 |
-
-- A label in top-K:
|
| 69 |
-
K = 1: 0.5583
|
| 70 |
-
K = 2: 0.6787
|
| 71 |
-
K = 3: 0.7376
|
| 72 |
-
K = 4: 0.7734
|
| 73 |
-
K = 5: 0.7988
|
| 74 |
-
-- B label in top-K:
|
| 75 |
-
K = 1: 0.5598
|
| 76 |
-
K = 2: 0.6804
|
| 77 |
-
K = 3: 0.7390
|
| 78 |
-
K = 4: 0.7749
|
| 79 |
-
K = 5: 0.8005
|
| 80 |
-
-- Top-K agreement, A vs B:
|
| 81 |
-
K = 1: 0.9118
|
| 82 |
-
K = 2: 0.7281
|
| 83 |
-
K = 3: 0.5222
|
| 84 |
-
K = 4: 0.3439
|
| 85 |
-
K = 5: 0.2105
|
| 86 |
-
-- KL divergence (A, B): 0.07256054
|
| 87 |
-
-- KL divergence (B, A): 0.07650418
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_diff/MiniMaxAI_MiniMax-M2.5-5.0bpw.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
|
| 3 |
-
-- model.layers.0 rfn_err: 0.001017 max_diff/norm: 0.000208 sqnr: 61.532980 cos_err: 0.000000
|
| 4 |
-
-- model.layers.1 rfn_err: 0.002231 max_diff/norm: 0.001100 sqnr: 58.734509 cos_err: 0.000002
|
| 5 |
-
-- model.layers.2 rfn_err: 0.004763 max_diff/norm: 0.002880 sqnr: 53.876651 cos_err: 0.000008
|
| 6 |
-
-- model.layers.3 rfn_err: 0.006568 max_diff/norm: 0.004134 sqnr: 50.205115 cos_err: 0.000014
|
| 7 |
-
-- model.layers.4 rfn_err: 0.007617 max_diff/norm: 0.003830 sqnr: 47.289655 cos_err: 0.000019
|
| 8 |
-
-- model.layers.5 rfn_err: 0.007090 max_diff/norm: 0.116742 sqnr: 44.775571 cos_err: 0.000028
|
| 9 |
-
-- model.layers.6 rfn_err: 0.007607 max_diff/norm: 0.116455 sqnr: 43.337426 cos_err: 0.000041
|
| 10 |
-
-- model.layers.7 rfn_err: 0.008447 max_diff/norm: 0.116347 sqnr: 41.213080 cos_err: 0.000054
|
| 11 |
-
-- model.layers.8 rfn_err: 0.009379 max_diff/norm: 0.115107 sqnr: 39.464045 cos_err: 0.000081
|
| 12 |
-
-- model.layers.9 rfn_err: 0.010305 max_diff/norm: 0.113122 sqnr: 38.171880 cos_err: 0.000108
|
| 13 |
-
-- model.layers.10 rfn_err: 0.011118 max_diff/norm: 0.111587 sqnr: 37.345647 cos_err: 0.000129
|
| 14 |
-
-- model.layers.11 rfn_err: 0.012104 max_diff/norm: 0.109842 sqnr: 36.323288 cos_err: 0.000161
|
| 15 |
-
-- model.layers.12 rfn_err: 0.013162 max_diff/norm: 0.108572 sqnr: 35.364393 cos_err: 0.000195
|
| 16 |
-
-- model.layers.13 rfn_err: 0.014205 max_diff/norm: 0.107720 sqnr: 34.623768 cos_err: 0.000231
|
| 17 |
-
-- model.layers.14 rfn_err: 0.015160 max_diff/norm: 0.106660 sqnr: 34.000090 cos_err: 0.000264
|
| 18 |
-
-- model.layers.15 rfn_err: 0.016444 max_diff/norm: 0.105012 sqnr: 33.290000 cos_err: 0.000310
|
| 19 |
-
-- model.layers.16 rfn_err: 0.017320 max_diff/norm: 0.103041 sqnr: 32.984575 cos_err: 0.000333
|
| 20 |
-
-- model.layers.17 rfn_err: 0.018565 max_diff/norm: 0.102205 sqnr: 32.427712 cos_err: 0.000384
|
| 21 |
-
-- model.layers.18 rfn_err: 0.020225 max_diff/norm: 0.101198 sqnr: 31.697718 cos_err: 0.000462
|
| 22 |
-
-- model.layers.19 rfn_err: 0.021969 max_diff/norm: 0.100307 sqnr: 31.051150 cos_err: 0.000542
|
| 23 |
-
-- model.layers.20 rfn_err: 0.023903 max_diff/norm: 0.098437 sqnr: 30.451744 cos_err: 0.000630
|
| 24 |
-
-- model.layers.21 rfn_err: 0.025844 max_diff/norm: 0.097368 sqnr: 29.921284 cos_err: 0.000737
|
| 25 |
-
-- model.layers.22 rfn_err: 0.028511 max_diff/norm: 0.096021 sqnr: 29.220995 cos_err: 0.000896
|
| 26 |
-
-- model.layers.23 rfn_err: 0.031631 max_diff/norm: 0.095519 sqnr: 28.430126 cos_err: 0.001145
|
| 27 |
-
-- model.layers.24 rfn_err: 0.034842 max_diff/norm: 0.093165 sqnr: 27.687440 cos_err: 0.001442
|
| 28 |
-
-- model.layers.25 rfn_err: 0.040242 max_diff/norm: 0.092788 sqnr: 26.635372 cos_err: 0.001980
|
| 29 |
-
-- model.layers.26 rfn_err: 0.046830 max_diff/norm: 0.093038 sqnr: 25.497961 cos_err: 0.002694
|
| 30 |
-
-- model.layers.27 rfn_err: 0.055636 max_diff/norm: 0.092440 sqnr: 24.501275 cos_err: 0.003792
|
| 31 |
-
-- model.layers.28 rfn_err: 0.085580 max_diff/norm: 0.090473 sqnr: 23.438816 cos_err: 0.005463
|
| 32 |
-
-- model.layers.29 rfn_err: 0.096313 max_diff/norm: 0.089205 sqnr: 22.715888 cos_err: 0.006492
|
| 33 |
-
-- model.layers.30 rfn_err: 0.106057 max_diff/norm: 0.087621 sqnr: 21.663076 cos_err: 0.008087
|
| 34 |
-
-- model.layers.31 rfn_err: 0.112199 max_diff/norm: 0.085844 sqnr: 21.099320 cos_err: 0.009051
|
| 35 |
-
-- model.layers.32 rfn_err: 0.124029 max_diff/norm: 0.083795 sqnr: 20.418774 cos_err: 0.010056
|
| 36 |
-
-- model.layers.33 rfn_err: 0.129828 max_diff/norm: 0.082055 sqnr: 19.857175 cos_err: 0.011483
|
| 37 |
-
-- model.layers.34 rfn_err: 0.133268 max_diff/norm: 0.079986 sqnr: 19.531722 cos_err: 0.012268
|
| 38 |
-
-- model.layers.35 rfn_err: 0.137318 max_diff/norm: 0.077364 sqnr: 19.286253 cos_err: 0.013129
|
| 39 |
-
-- model.layers.36 rfn_err: 0.145534 max_diff/norm: 0.076403 sqnr: 18.707049 cos_err: 0.014979
|
| 40 |
-
-- model.layers.37 rfn_err: 0.146671 max_diff/norm: 0.073246 sqnr: 18.685345 cos_err: 0.015100
|
| 41 |
-
-- model.layers.38 rfn_err: 0.151369 max_diff/norm: 0.070850 sqnr: 18.426652 cos_err: 0.017061
|
| 42 |
-
-- model.layers.39 rfn_err: 0.152925 max_diff/norm: 0.067381 sqnr: 18.532562 cos_err: 0.016655
|
| 43 |
-
-- model.layers.40 rfn_err: 0.155370 max_diff/norm: 0.065503 sqnr: 18.412829 cos_err: 0.016559
|
| 44 |
-
-- model.layers.41 rfn_err: 0.158440 max_diff/norm: 0.062154 sqnr: 18.329644 cos_err: 0.017659
|
| 45 |
-
-- model.layers.42 rfn_err: 0.163820 max_diff/norm: 0.059775 sqnr: 18.161913 cos_err: 0.018989
|
| 46 |
-
-- model.layers.43 rfn_err: 0.165392 max_diff/norm: 0.055976 sqnr: 18.276385 cos_err: 0.017872
|
| 47 |
-
-- model.layers.44 rfn_err: 0.172189 max_diff/norm: 0.053723 sqnr: 18.048376 cos_err: 0.019302
|
| 48 |
-
-- model.layers.45 rfn_err: 0.176197 max_diff/norm: 0.052196 sqnr: 17.905685 cos_err: 0.019926
|
| 49 |
-
-- model.layers.46 rfn_err: 0.175998 max_diff/norm: 0.049371 sqnr: 17.959718 cos_err: 0.019498
|
| 50 |
-
-- model.layers.47 rfn_err: 0.182210 max_diff/norm: 0.047151 sqnr: 17.813583 cos_err: 0.020710
|
| 51 |
-
-- model.layers.48 rfn_err: 0.183390 max_diff/norm: 0.045295 sqnr: 17.766797 cos_err: 0.020642
|
| 52 |
-
-- model.layers.49 rfn_err: 0.185077 max_diff/norm: 0.042782 sqnr: 17.756343 cos_err: 0.021452
|
| 53 |
-
-- model.layers.50 rfn_err: 0.185991 max_diff/norm: 0.041007 sqnr: 17.691015 cos_err: 0.021791
|
| 54 |
-
-- model.layers.51 rfn_err: 0.189697 max_diff/norm: 0.039038 sqnr: 17.577948 cos_err: 0.022782
|
| 55 |
-
-- model.layers.52 rfn_err: 0.189906 max_diff/norm: 0.036750 sqnr: 17.544695 cos_err: 0.023056
|
| 56 |
-
-- model.layers.53 rfn_err: 0.189749 max_diff/norm: 0.035126 sqnr: 17.456175 cos_err: 0.023645
|
| 57 |
-
-- model.layers.54 rfn_err: 0.189891 max_diff/norm: 0.033551 sqnr: 17.366807 cos_err: 0.023576
|
| 58 |
-
-- model.layers.55 rfn_err: 0.192427 max_diff/norm: 0.032293 sqnr: 17.188225 cos_err: 0.024681
|
| 59 |
-
-- model.layers.56 rfn_err: 0.192786 max_diff/norm: 0.030716 sqnr: 17.057778 cos_err: 0.024695
|
| 60 |
-
-- model.layers.57 rfn_err: 0.189982 max_diff/norm: 0.027926 sqnr: 16.962916 cos_err: 0.023917
|
| 61 |
-
-- model.layers.58 rfn_err: 0.182264 max_diff/norm: 0.026714 sqnr: 16.934458 cos_err: 0.021830
|
| 62 |
-
-- model.layers.59 rfn_err: 0.175773 max_diff/norm: 0.025124 sqnr: 16.934383 cos_err: 0.019174
|
| 63 |
-
-- model.layers.60 rfn_err: 0.167440 max_diff/norm: 0.020558 sqnr: 17.208983 cos_err: 0.016231
|
| 64 |
-
-- model.layers.61 rfn_err: 0.156737 max_diff/norm: 0.025630 sqnr: 17.784731 cos_err: 0.015342
|
| 65 |
-
-- model.norm rfn_err: 0.177760 max_diff/norm: 0.008254 sqnr: 17.592665 cos_err: 0.017236
|
| 66 |
-
-- A perplexity: 8.35222293
|
| 67 |
-
-- B perplexity: 8.34981264
|
| 68 |
-
-- A label in top-K:
|
| 69 |
-
K = 1: 0.5593
|
| 70 |
-
K = 2: 0.6798
|
| 71 |
-
K = 3: 0.7382
|
| 72 |
-
K = 4: 0.7743
|
| 73 |
-
K = 5: 0.7998
|
| 74 |
-
-- B label in top-K:
|
| 75 |
-
K = 1: 0.5598
|
| 76 |
-
K = 2: 0.6804
|
| 77 |
-
K = 3: 0.7390
|
| 78 |
-
K = 4: 0.7749
|
| 79 |
-
K = 5: 0.8005
|
| 80 |
-
-- Top-K agreement, A vs B:
|
| 81 |
-
K = 1: 0.9344
|
| 82 |
-
K = 2: 0.7901
|
| 83 |
-
K = 3: 0.6154
|
| 84 |
-
K = 4: 0.4472
|
| 85 |
-
K = 5: 0.3056
|
| 86 |
-
-- KL divergence (A, B): 0.04801990
|
| 87 |
-
-- KL divergence (B, A): 0.04921814
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_diff/MiniMaxAI_MiniMax-M2.5-6.0bpw.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
|
| 3 |
-
-- model.layers.0 rfn_err: 0.000543 max_diff/norm: 0.000184 sqnr: 68.177185 cos_err: 0.000000
|
| 4 |
-
-- model.layers.1 rfn_err: 0.001513 max_diff/norm: 0.000866 sqnr: 65.447161 cos_err: 0.000001
|
| 5 |
-
-- model.layers.2 rfn_err: 0.003429 max_diff/norm: 0.002247 sqnr: 59.905907 cos_err: 0.000004
|
| 6 |
-
-- model.layers.3 rfn_err: 0.004892 max_diff/norm: 0.002883 sqnr: 55.291200 cos_err: 0.000008
|
| 7 |
-
-- model.layers.4 rfn_err: 0.005726 max_diff/norm: 0.003310 sqnr: 52.228319 cos_err: 0.000011
|
| 8 |
-
-- model.layers.5 rfn_err: 0.008686 max_diff/norm: 0.188249 sqnr: 49.429878 cos_err: 0.000017
|
| 9 |
-
-- model.layers.6 rfn_err: 0.009091 max_diff/norm: 0.187352 sqnr: 47.501211 cos_err: 0.000025
|
| 10 |
-
-- model.layers.7 rfn_err: 0.009506 max_diff/norm: 0.182608 sqnr: 45.382496 cos_err: 0.000034
|
| 11 |
-
-- model.layers.8 rfn_err: 0.010246 max_diff/norm: 0.180253 sqnr: 43.051331 cos_err: 0.000053
|
| 12 |
-
-- model.layers.9 rfn_err: 0.010918 max_diff/norm: 0.177052 sqnr: 41.321413 cos_err: 0.000071
|
| 13 |
-
-- model.layers.10 rfn_err: 0.011519 max_diff/norm: 0.174697 sqnr: 40.338670 cos_err: 0.000086
|
| 14 |
-
-- model.layers.11 rfn_err: 0.012264 max_diff/norm: 0.172930 sqnr: 39.208071 cos_err: 0.000108
|
| 15 |
-
-- model.layers.12 rfn_err: 0.013085 max_diff/norm: 0.171060 sqnr: 38.091909 cos_err: 0.000132
|
| 16 |
-
-- model.layers.13 rfn_err: 0.013903 max_diff/norm: 0.169636 sqnr: 37.261419 cos_err: 0.000158
|
| 17 |
-
-- model.layers.14 rfn_err: 0.014629 max_diff/norm: 0.168219 sqnr: 36.583158 cos_err: 0.000180
|
| 18 |
-
-- model.layers.15 rfn_err: 0.015628 max_diff/norm: 0.166202 sqnr: 35.780368 cos_err: 0.000212
|
| 19 |
-
-- model.layers.16 rfn_err: 0.016242 max_diff/norm: 0.163085 sqnr: 35.441346 cos_err: 0.000228
|
| 20 |
-
-- model.layers.17 rfn_err: 0.017188 max_diff/norm: 0.161503 sqnr: 34.837762 cos_err: 0.000263
|
| 21 |
-
-- model.layers.18 rfn_err: 0.018460 max_diff/norm: 0.160362 sqnr: 34.086316 cos_err: 0.000316
|
| 22 |
-
-- model.layers.19 rfn_err: 0.019807 max_diff/norm: 0.158439 sqnr: 33.417090 cos_err: 0.000372
|
| 23 |
-
-- model.layers.20 rfn_err: 0.021275 max_diff/norm: 0.156205 sqnr: 32.797049 cos_err: 0.000434
|
| 24 |
-
-- model.layers.21 rfn_err: 0.022793 max_diff/norm: 0.154088 sqnr: 32.250618 cos_err: 0.000510
|
| 25 |
-
-- model.layers.22 rfn_err: 0.025017 max_diff/norm: 0.151355 sqnr: 31.522344 cos_err: 0.000629
|
| 26 |
-
-- model.layers.23 rfn_err: 0.027571 max_diff/norm: 0.149713 sqnr: 30.720477 cos_err: 0.000811
|
| 27 |
-
-- model.layers.24 rfn_err: 0.030635 max_diff/norm: 0.143742 sqnr: 29.978750 cos_err: 0.001035
|
| 28 |
-
-- model.layers.25 rfn_err: 0.035260 max_diff/norm: 0.142449 sqnr: 28.921009 cos_err: 0.001450
|
| 29 |
-
-- model.layers.26 rfn_err: 0.041012 max_diff/norm: 0.142144 sqnr: 27.738527 cos_err: 0.001992
|
| 30 |
-
-- model.layers.27 rfn_err: 0.048649 max_diff/norm: 0.140497 sqnr: 26.724511 cos_err: 0.002842
|
| 31 |
-
-- model.layers.28 rfn_err: 0.075249 max_diff/norm: 0.138490 sqnr: 25.626220 cos_err: 0.004184
|
| 32 |
-
-- model.layers.29 rfn_err: 0.085114 max_diff/norm: 0.137145 sqnr: 24.873034 cos_err: 0.005033
|
| 33 |
-
-- model.layers.30 rfn_err: 0.093850 max_diff/norm: 0.133826 sqnr: 23.763328 cos_err: 0.006293
|
| 34 |
-
-- model.layers.31 rfn_err: 0.099443 max_diff/norm: 0.131075 sqnr: 23.143857 cos_err: 0.007078
|
| 35 |
-
-- model.layers.32 rfn_err: 0.110217 max_diff/norm: 0.127145 sqnr: 22.407360 cos_err: 0.007872
|
| 36 |
-
-- model.layers.33 rfn_err: 0.115368 max_diff/norm: 0.124526 sqnr: 21.812169 cos_err: 0.009041
|
| 37 |
-
-- model.layers.34 rfn_err: 0.118347 max_diff/norm: 0.120848 sqnr: 21.452885 cos_err: 0.009680
|
| 38 |
-
-- model.layers.35 rfn_err: 0.121983 max_diff/norm: 0.117505 sqnr: 21.180904 cos_err: 0.010409
|
| 39 |
-
-- model.layers.36 rfn_err: 0.129488 max_diff/norm: 0.115913 sqnr: 20.568714 cos_err: 0.011940
|
| 40 |
-
-- model.layers.37 rfn_err: 0.130546 max_diff/norm: 0.110626 sqnr: 20.523469 cos_err: 0.012070
|
| 41 |
-
-- model.layers.38 rfn_err: 0.134762 max_diff/norm: 0.107698 sqnr: 20.258577 cos_err: 0.013730
|
| 42 |
-
-- model.layers.39 rfn_err: 0.136458 max_diff/norm: 0.102443 sqnr: 20.352471 cos_err: 0.013488
|
| 43 |
-
-- model.layers.40 rfn_err: 0.138824 max_diff/norm: 0.099604 sqnr: 20.215974 cos_err: 0.013448
|
| 44 |
-
-- model.layers.41 rfn_err: 0.142556 max_diff/norm: 0.093639 sqnr: 20.129098 cos_err: 0.014499
|
| 45 |
-
-- model.layers.42 rfn_err: 0.147585 max_diff/norm: 0.089614 sqnr: 19.955028 cos_err: 0.015680
|
| 46 |
-
-- model.layers.43 rfn_err: 0.149174 max_diff/norm: 0.083302 sqnr: 20.070309 cos_err: 0.014730
|
| 47 |
-
-- model.layers.44 rfn_err: 0.155522 max_diff/norm: 0.079138 sqnr: 19.834189 cos_err: 0.015953
|
| 48 |
-
-- model.layers.45 rfn_err: 0.159260 max_diff/norm: 0.076222 sqnr: 19.685693 cos_err: 0.016491
|
| 49 |
-
-- model.layers.46 rfn_err: 0.159097 max_diff/norm: 0.071667 sqnr: 19.735605 cos_err: 0.016122
|
| 50 |
-
-- model.layers.47 rfn_err: 0.164939 max_diff/norm: 0.067849 sqnr: 19.584990 cos_err: 0.017165
|
| 51 |
-
-- model.layers.48 rfn_err: 0.165954 max_diff/norm: 0.064549 sqnr: 19.534291 cos_err: 0.017085
|
| 52 |
-
-- model.layers.49 rfn_err: 0.167305 max_diff/norm: 0.060484 sqnr: 19.525616 cos_err: 0.017809
|
| 53 |
-
-- model.layers.50 rfn_err: 0.168045 max_diff/norm: 0.057157 sqnr: 19.455932 cos_err: 0.018103
|
| 54 |
-
-- model.layers.51 rfn_err: 0.171497 max_diff/norm: 0.053969 sqnr: 19.336356 cos_err: 0.018967
|
| 55 |
-
-- model.layers.52 rfn_err: 0.171437 max_diff/norm: 0.050450 sqnr: 19.299448 cos_err: 0.019189
|
| 56 |
-
-- model.layers.53 rfn_err: 0.171026 max_diff/norm: 0.047735 sqnr: 19.203575 cos_err: 0.019697
|
| 57 |
-
-- model.layers.54 rfn_err: 0.170891 max_diff/norm: 0.045281 sqnr: 19.107354 cos_err: 0.019586
|
| 58 |
-
-- model.layers.55 rfn_err: 0.173012 max_diff/norm: 0.043646 sqnr: 18.910681 cos_err: 0.020545
|
| 59 |
-
-- model.layers.56 rfn_err: 0.172990 max_diff/norm: 0.041733 sqnr: 18.769338 cos_err: 0.020506
|
| 60 |
-
-- model.layers.57 rfn_err: 0.169792 max_diff/norm: 0.038496 sqnr: 18.663130 cos_err: 0.019758
|
| 61 |
-
-- model.layers.58 rfn_err: 0.161728 max_diff/norm: 0.036592 sqnr: 18.628091 cos_err: 0.017834
|
| 62 |
-
-- model.layers.59 rfn_err: 0.154931 max_diff/norm: 0.034570 sqnr: 18.614537 cos_err: 0.015379
|
| 63 |
-
-- model.layers.60 rfn_err: 0.146969 max_diff/norm: 0.030656 sqnr: 18.880935 cos_err: 0.012806
|
| 64 |
-
-- model.layers.61 rfn_err: 0.138028 max_diff/norm: 0.047421 sqnr: 19.416316 cos_err: 0.012273
|
| 65 |
-
-- model.norm rfn_err: 0.158195 max_diff/norm: 0.006963 sqnr: 19.225764 cos_err: 0.013913
|
| 66 |
-
-- A perplexity: 8.35782554
|
| 67 |
-
-- B perplexity: 8.34981264
|
| 68 |
-
-- A label in top-K:
|
| 69 |
-
K = 1: 0.5596
|
| 70 |
-
K = 2: 0.6798
|
| 71 |
-
K = 3: 0.7387
|
| 72 |
-
K = 4: 0.7746
|
| 73 |
-
K = 5: 0.7998
|
| 74 |
-
-- B label in top-K:
|
| 75 |
-
K = 1: 0.5598
|
| 76 |
-
K = 2: 0.6804
|
| 77 |
-
K = 3: 0.7390
|
| 78 |
-
K = 4: 0.7749
|
| 79 |
-
K = 5: 0.8005
|
| 80 |
-
-- Top-K agreement, A vs B:
|
| 81 |
-
K = 1: 0.9449
|
| 82 |
-
K = 2: 0.8209
|
| 83 |
-
K = 3: 0.6651
|
| 84 |
-
K = 4: 0.5071
|
| 85 |
-
K = 5: 0.3670
|
| 86 |
-
-- KL divergence (A, B): 0.04015230
|
| 87 |
-
-- KL divergence (B, A): 0.04071388
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_diff/MiniMaxAI_MiniMax-M2.5-7.0bpw.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
|
| 3 |
-
-- model.layers.0 rfn_err: 0.000560 max_diff/norm: 0.000182 sqnr: 66.250859 cos_err: 0.000000
|
| 4 |
-
-- model.layers.1 rfn_err: 0.001196 max_diff/norm: 0.000613 sqnr: 65.494256 cos_err: 0.000001
|
| 5 |
-
-- model.layers.2 rfn_err: 0.002691 max_diff/norm: 0.001966 sqnr: 63.802718 cos_err: 0.000003
|
| 6 |
-
-- model.layers.3 rfn_err: 0.003877 max_diff/norm: 0.002972 sqnr: 59.584492 cos_err: 0.000005
|
| 7 |
-
-- model.layers.4 rfn_err: 0.004591 max_diff/norm: 0.002993 sqnr: 56.390129 cos_err: 0.000007
|
| 8 |
-
-- model.layers.5 rfn_err: 0.004672 max_diff/norm: 0.043094 sqnr: 53.401958 cos_err: 0.000011
|
| 9 |
-
-- model.layers.6 rfn_err: 0.005053 max_diff/norm: 0.042747 sqnr: 51.403407 cos_err: 0.000017
|
| 10 |
-
-- model.layers.7 rfn_err: 0.005484 max_diff/norm: 0.041560 sqnr: 49.283374 cos_err: 0.000024
|
| 11 |
-
-- model.layers.8 rfn_err: 0.006207 max_diff/norm: 0.040911 sqnr: 46.516407 cos_err: 0.000038
|
| 12 |
-
-- model.layers.9 rfn_err: 0.006919 max_diff/norm: 0.040032 sqnr: 44.392462 cos_err: 0.000051
|
| 13 |
-
-- model.layers.10 rfn_err: 0.007554 max_diff/norm: 0.039448 sqnr: 43.250104 cos_err: 0.000064
|
| 14 |
-
-- model.layers.11 rfn_err: 0.008283 max_diff/norm: 0.038800 sqnr: 41.952453 cos_err: 0.000080
|
| 15 |
-
-- model.layers.12 rfn_err: 0.009074 max_diff/norm: 0.038340 sqnr: 40.670537 cos_err: 0.000099
|
| 16 |
-
-- model.layers.13 rfn_err: 0.009857 max_diff/norm: 0.037903 sqnr: 39.760592 cos_err: 0.000120
|
| 17 |
-
-- model.layers.14 rfn_err: 0.010539 max_diff/norm: 0.037459 sqnr: 38.997738 cos_err: 0.000137
|
| 18 |
-
-- model.layers.15 rfn_err: 0.011476 max_diff/norm: 0.036819 sqnr: 38.084897 cos_err: 0.000162
|
| 19 |
-
-- model.layers.16 rfn_err: 0.012086 max_diff/norm: 0.036038 sqnr: 37.657304 cos_err: 0.000175
|
| 20 |
-
-- model.layers.17 rfn_err: 0.012987 max_diff/norm: 0.035555 sqnr: 36.976459 cos_err: 0.000204
|
| 21 |
-
-- model.layers.18 rfn_err: 0.014156 max_diff/norm: 0.035192 sqnr: 36.152336 cos_err: 0.000246
|
| 22 |
-
-- model.layers.19 rfn_err: 0.015402 max_diff/norm: 0.034900 sqnr: 35.448933 cos_err: 0.000290
|
| 23 |
-
-- model.layers.20 rfn_err: 0.016710 max_diff/norm: 0.034060 sqnr: 34.786421 cos_err: 0.000338
|
| 24 |
-
-- model.layers.21 rfn_err: 0.018065 max_diff/norm: 0.033554 sqnr: 34.188991 cos_err: 0.000401
|
| 25 |
-
-- model.layers.22 rfn_err: 0.020125 max_diff/norm: 0.033028 sqnr: 33.411842 cos_err: 0.000499
|
| 26 |
-
-- model.layers.23 rfn_err: 0.022564 max_diff/norm: 0.032781 sqnr: 32.553960 cos_err: 0.000653
|
| 27 |
-
-- model.layers.24 rfn_err: 0.025218 max_diff/norm: 0.031707 sqnr: 31.774962 cos_err: 0.000839
|
| 28 |
-
-- model.layers.25 rfn_err: 0.029645 max_diff/norm: 0.031183 sqnr: 30.675104 cos_err: 0.001193
|
| 29 |
-
-- model.layers.26 rfn_err: 0.035183 max_diff/norm: 0.031087 sqnr: 29.389312 cos_err: 0.001664
|
| 30 |
-
-- model.layers.27 rfn_err: 0.042841 max_diff/norm: 0.030733 sqnr: 28.315314 cos_err: 0.002444
|
| 31 |
-
-- model.layers.28 rfn_err: 0.071600 max_diff/norm: 0.030360 sqnr: 27.148526 cos_err: 0.003679
|
| 32 |
-
-- model.layers.29 rfn_err: 0.081257 max_diff/norm: 0.030133 sqnr: 26.339088 cos_err: 0.004428
|
| 33 |
-
-- model.layers.30 rfn_err: 0.089474 max_diff/norm: 0.029503 sqnr: 25.159096 cos_err: 0.005543
|
| 34 |
-
-- model.layers.31 rfn_err: 0.094679 max_diff/norm: 0.028846 sqnr: 24.468424 cos_err: 0.006236
|
| 35 |
-
-- model.layers.32 rfn_err: 0.105001 max_diff/norm: 0.028775 sqnr: 23.675020 cos_err: 0.006935
|
| 36 |
-
-- model.layers.33 rfn_err: 0.109669 max_diff/norm: 0.028754 sqnr: 23.043021 cos_err: 0.007975
|
| 37 |
-
-- model.layers.34 rfn_err: 0.112336 max_diff/norm: 0.028278 sqnr: 22.645694 cos_err: 0.008555
|
| 38 |
-
-- model.layers.35 rfn_err: 0.115750 max_diff/norm: 0.027747 sqnr: 22.345913 cos_err: 0.009223
|
| 39 |
-
-- model.layers.36 rfn_err: 0.122733 max_diff/norm: 0.027560 sqnr: 21.706496 cos_err: 0.010586
|
| 40 |
-
-- model.layers.37 rfn_err: 0.123626 max_diff/norm: 0.025725 sqnr: 21.635876 cos_err: 0.010734
|
| 41 |
-
-- model.layers.38 rfn_err: 0.127712 max_diff/norm: 0.023241 sqnr: 21.348562 cos_err: 0.012312
|
| 42 |
-
-- model.layers.39 rfn_err: 0.129427 max_diff/norm: 0.023995 sqnr: 21.426755 cos_err: 0.012149
|
| 43 |
-
-- model.layers.40 rfn_err: 0.131556 max_diff/norm: 0.021078 sqnr: 21.274276 cos_err: 0.012101
|
| 44 |
-
-- model.layers.41 rfn_err: 0.134928 max_diff/norm: 0.019668 sqnr: 21.173382 cos_err: 0.013130
|
| 45 |
-
-- model.layers.42 rfn_err: 0.139890 max_diff/norm: 0.023751 sqnr: 20.989811 cos_err: 0.014258
|
| 46 |
-
-- model.layers.43 rfn_err: 0.141563 max_diff/norm: 0.025637 sqnr: 21.096693 cos_err: 0.013388
|
| 47 |
-
-- model.layers.44 rfn_err: 0.147833 max_diff/norm: 0.025241 sqnr: 20.853674 cos_err: 0.014561
|
| 48 |
-
-- model.layers.45 rfn_err: 0.151443 max_diff/norm: 0.028335 sqnr: 20.701290 cos_err: 0.015050
|
| 49 |
-
-- model.layers.46 rfn_err: 0.151276 max_diff/norm: 0.031591 sqnr: 20.743301 cos_err: 0.014719
|
| 50 |
-
-- model.layers.47 rfn_err: 0.157034 max_diff/norm: 0.031835 sqnr: 20.584028 cos_err: 0.015708
|
| 51 |
-
-- model.layers.48 rfn_err: 0.157965 max_diff/norm: 0.031107 sqnr: 20.529386 cos_err: 0.015626
|
| 52 |
-
-- model.layers.49 rfn_err: 0.159416 max_diff/norm: 0.031311 sqnr: 20.515366 cos_err: 0.016340
|
| 53 |
-
-- model.layers.50 rfn_err: 0.160044 max_diff/norm: 0.030860 sqnr: 20.441445 cos_err: 0.016615
|
| 54 |
-
-- model.layers.51 rfn_err: 0.163510 max_diff/norm: 0.031575 sqnr: 20.315686 cos_err: 0.017451
|
| 55 |
-
-- model.layers.52 rfn_err: 0.163440 max_diff/norm: 0.028570 sqnr: 20.268164 cos_err: 0.017664
|
| 56 |
-
-- model.layers.53 rfn_err: 0.162917 max_diff/norm: 0.028204 sqnr: 20.162311 cos_err: 0.018130
|
| 57 |
-
-- model.layers.54 rfn_err: 0.162807 max_diff/norm: 0.027045 sqnr: 20.049632 cos_err: 0.018021
|
| 58 |
-
-- model.layers.55 rfn_err: 0.164703 max_diff/norm: 0.023875 sqnr: 19.840393 cos_err: 0.018919
|
| 59 |
-
-- model.layers.56 rfn_err: 0.164629 max_diff/norm: 0.023986 sqnr: 19.686238 cos_err: 0.018869
|
| 60 |
-
-- model.layers.57 rfn_err: 0.161328 max_diff/norm: 0.021253 sqnr: 19.574260 cos_err: 0.018143
|
| 61 |
-
-- model.layers.58 rfn_err: 0.153104 max_diff/norm: 0.018375 sqnr: 19.530551 cos_err: 0.016282
|
| 62 |
-
-- model.layers.59 rfn_err: 0.146190 max_diff/norm: 0.015653 sqnr: 19.507797 cos_err: 0.013920
|
| 63 |
-
-- model.layers.60 rfn_err: 0.138407 max_diff/norm: 0.010334 sqnr: 19.770672 cos_err: 0.011484
|
| 64 |
-
-- model.layers.61 rfn_err: 0.130370 max_diff/norm: 0.073108 sqnr: 20.301139 cos_err: 0.011050
|
| 65 |
-
-- model.norm rfn_err: 0.149805 max_diff/norm: 0.007261 sqnr: 20.115299 cos_err: 0.012592
|
| 66 |
-
-- A perplexity: 8.35427106
|
| 67 |
-
-- B perplexity: 8.34981264
|
| 68 |
-
-- A label in top-K:
|
| 69 |
-
K = 1: 0.5595
|
| 70 |
-
K = 2: 0.6799
|
| 71 |
-
K = 3: 0.7387
|
| 72 |
-
K = 4: 0.7748
|
| 73 |
-
K = 5: 0.8003
|
| 74 |
-
-- B label in top-K:
|
| 75 |
-
K = 1: 0.5598
|
| 76 |
-
K = 2: 0.6804
|
| 77 |
-
K = 3: 0.7390
|
| 78 |
-
K = 4: 0.7749
|
| 79 |
-
K = 5: 0.8005
|
| 80 |
-
-- Top-K agreement, A vs B:
|
| 81 |
-
K = 1: 0.9510
|
| 82 |
-
K = 2: 0.8380
|
| 83 |
-
K = 3: 0.6922
|
| 84 |
-
K = 4: 0.5420
|
| 85 |
-
K = 5: 0.4046
|
| 86 |
-
-- KL divergence (A, B): 0.03484128
|
| 87 |
-
-- KL divergence (B, A): 0.03757493
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_diff/MiniMaxAI_MiniMax-M2.5-8.0bpw.txt
DELETED
|
@@ -1,87 +0,0 @@
|
|
| 1 |
-
|
| 2 |
-
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
|
| 3 |
-
-- model.layers.0 rfn_err: 0.000469 max_diff/norm: 0.000179 sqnr: 67.624667 cos_err: 0.000000
|
| 4 |
-
-- model.layers.1 rfn_err: 0.000974 max_diff/norm: 0.000531 sqnr: 68.439451 cos_err: 0.000000
|
| 5 |
-
-- model.layers.2 rfn_err: 0.002241 max_diff/norm: 0.001870 sqnr: 68.260154 cos_err: 0.000002
|
| 6 |
-
-- model.layers.3 rfn_err: 0.003263 max_diff/norm: 0.002894 sqnr: 63.013945 cos_err: 0.000004
|
| 7 |
-
-- model.layers.4 rfn_err: 0.003907 max_diff/norm: 0.003336 sqnr: 59.611947 cos_err: 0.000005
|
| 8 |
-
-- model.layers.5 rfn_err: 0.003222 max_diff/norm: 0.047131 sqnr: 56.541940 cos_err: 0.000008
|
| 9 |
-
-- model.layers.6 rfn_err: 0.003638 max_diff/norm: 0.047244 sqnr: 54.479703 cos_err: 0.000013
|
| 10 |
-
-- model.layers.7 rfn_err: 0.004107 max_diff/norm: 0.045489 sqnr: 51.914129 cos_err: 0.000019
|
| 11 |
-
-- model.layers.8 rfn_err: 0.004872 max_diff/norm: 0.045198 sqnr: 49.001552 cos_err: 0.000030
|
| 12 |
-
-- model.layers.9 rfn_err: 0.005626 max_diff/norm: 0.044496 sqnr: 46.700288 cos_err: 0.000041
|
| 13 |
-
-- model.layers.10 rfn_err: 0.006273 max_diff/norm: 0.043968 sqnr: 45.495138 cos_err: 0.000051
|
| 14 |
-
-- model.layers.11 rfn_err: 0.006999 max_diff/norm: 0.043394 sqnr: 44.036062 cos_err: 0.000065
|
| 15 |
-
-- model.layers.12 rfn_err: 0.007787 max_diff/norm: 0.042877 sqnr: 42.612123 cos_err: 0.000081
|
| 16 |
-
-- model.layers.13 rfn_err: 0.008557 max_diff/norm: 0.042569 sqnr: 41.627334 cos_err: 0.000098
|
| 17 |
-
-- model.layers.14 rfn_err: 0.009232 max_diff/norm: 0.042269 sqnr: 40.791703 cos_err: 0.000113
|
| 18 |
-
-- model.layers.15 rfn_err: 0.010146 max_diff/norm: 0.041974 sqnr: 39.771770 cos_err: 0.000135
|
| 19 |
-
-- model.layers.16 rfn_err: 0.010730 max_diff/norm: 0.041506 sqnr: 39.285714 cos_err: 0.000145
|
| 20 |
-
-- model.layers.17 rfn_err: 0.011582 max_diff/norm: 0.041059 sqnr: 38.529370 cos_err: 0.000170
|
| 21 |
-
-- model.layers.18 rfn_err: 0.012672 max_diff/norm: 0.040827 sqnr: 37.640926 cos_err: 0.000205
|
| 22 |
-
-- model.layers.19 rfn_err: 0.013846 max_diff/norm: 0.040427 sqnr: 36.894370 cos_err: 0.000243
|
| 23 |
-
-- model.layers.20 rfn_err: 0.015082 max_diff/norm: 0.039825 sqnr: 36.179127 cos_err: 0.000285
|
| 24 |
-
-- model.layers.21 rfn_err: 0.016368 max_diff/norm: 0.039220 sqnr: 35.531018 cos_err: 0.000340
|
| 25 |
-
-- model.layers.22 rfn_err: 0.018362 max_diff/norm: 0.038589 sqnr: 34.697237 cos_err: 0.000426
|
| 26 |
-
-- model.layers.23 rfn_err: 0.020675 max_diff/norm: 0.038304 sqnr: 33.781780 cos_err: 0.000562
|
| 27 |
-
-- model.layers.24 rfn_err: 0.024030 max_diff/norm: 0.037275 sqnr: 32.957620 cos_err: 0.000730
|
| 28 |
-
-- model.layers.25 rfn_err: 0.028294 max_diff/norm: 0.037068 sqnr: 31.808008 cos_err: 0.001044
|
| 29 |
-
-- model.layers.26 rfn_err: 0.033569 max_diff/norm: 0.036918 sqnr: 30.428794 cos_err: 0.001466
|
| 30 |
-
-- model.layers.27 rfn_err: 0.040863 max_diff/norm: 0.036527 sqnr: 29.294528 cos_err: 0.002164
|
| 31 |
-
-- model.layers.28 rfn_err: 0.068729 max_diff/norm: 0.035751 sqnr: 28.074716 cos_err: 0.003308
|
| 32 |
-
-- model.layers.29 rfn_err: 0.078003 max_diff/norm: 0.035327 sqnr: 27.221135 cos_err: 0.003987
|
| 33 |
-
-- model.layers.30 rfn_err: 0.085684 max_diff/norm: 0.034522 sqnr: 25.983341 cos_err: 0.005010
|
| 34 |
-
-- model.layers.31 rfn_err: 0.090742 max_diff/norm: 0.033618 sqnr: 25.235119 cos_err: 0.005663
|
| 35 |
-
-- model.layers.32 rfn_err: 0.100789 max_diff/norm: 0.032754 sqnr: 24.391638 cos_err: 0.006320
|
| 36 |
-
-- model.layers.33 rfn_err: 0.105336 max_diff/norm: 0.032467 sqnr: 23.727129 cos_err: 0.007296
|
| 37 |
-
-- model.layers.34 rfn_err: 0.107950 max_diff/norm: 0.031170 sqnr: 23.301634 cos_err: 0.007859
|
| 38 |
-
-- model.layers.35 rfn_err: 0.111258 max_diff/norm: 0.030942 sqnr: 22.984354 cos_err: 0.008479
|
| 39 |
-
-- model.layers.36 rfn_err: 0.118099 max_diff/norm: 0.030517 sqnr: 22.322384 cos_err: 0.009760
|
| 40 |
-
-- model.layers.37 rfn_err: 0.118839 max_diff/norm: 0.028324 sqnr: 22.233939 cos_err: 0.009903
|
| 41 |
-
-- model.layers.38 rfn_err: 0.122697 max_diff/norm: 0.027327 sqnr: 21.934171 cos_err: 0.011375
|
| 42 |
-
-- model.layers.39 rfn_err: 0.124401 max_diff/norm: 0.026100 sqnr: 21.999972 cos_err: 0.011230
|
| 43 |
-
-- model.layers.40 rfn_err: 0.126139 max_diff/norm: 0.025323 sqnr: 21.835296 cos_err: 0.011150
|
| 44 |
-
-- model.layers.41 rfn_err: 0.128898 max_diff/norm: 0.023867 sqnr: 21.726902 cos_err: 0.012110
|
| 45 |
-
-- model.layers.42 rfn_err: 0.133769 max_diff/norm: 0.023403 sqnr: 21.534948 cos_err: 0.013173
|
| 46 |
-
-- model.layers.43 rfn_err: 0.135472 max_diff/norm: 0.025423 sqnr: 21.634254 cos_err: 0.012356
|
| 47 |
-
-- model.layers.44 rfn_err: 0.141627 max_diff/norm: 0.024765 sqnr: 21.388501 cos_err: 0.013442
|
| 48 |
-
-- model.layers.45 rfn_err: 0.145089 max_diff/norm: 0.028550 sqnr: 21.230392 cos_err: 0.013905
|
| 49 |
-
-- model.layers.46 rfn_err: 0.145036 max_diff/norm: 0.031216 sqnr: 21.266876 cos_err: 0.013606
|
| 50 |
-
-- model.layers.47 rfn_err: 0.150609 max_diff/norm: 0.031708 sqnr: 21.100099 cos_err: 0.014522
|
| 51 |
-
-- model.layers.48 rfn_err: 0.151605 max_diff/norm: 0.030604 sqnr: 21.043640 cos_err: 0.014459
|
| 52 |
-
-- model.layers.49 rfn_err: 0.152998 max_diff/norm: 0.030138 sqnr: 21.027880 cos_err: 0.015126
|
| 53 |
-
-- model.layers.50 rfn_err: 0.153678 max_diff/norm: 0.030143 sqnr: 20.949445 cos_err: 0.015387
|
| 54 |
-
-- model.layers.51 rfn_err: 0.157058 max_diff/norm: 0.030881 sqnr: 20.818885 cos_err: 0.016170
|
| 55 |
-
-- model.layers.52 rfn_err: 0.157101 max_diff/norm: 0.028464 sqnr: 20.767393 cos_err: 0.016376
|
| 56 |
-
-- model.layers.53 rfn_err: 0.156680 max_diff/norm: 0.028216 sqnr: 20.657856 cos_err: 0.016834
|
| 57 |
-
-- model.layers.54 rfn_err: 0.156580 max_diff/norm: 0.026803 sqnr: 20.544510 cos_err: 0.016751
|
| 58 |
-
-- model.layers.55 rfn_err: 0.158494 max_diff/norm: 0.023788 sqnr: 20.331582 cos_err: 0.017598
|
| 59 |
-
-- model.layers.56 rfn_err: 0.158479 max_diff/norm: 0.021634 sqnr: 20.168725 cos_err: 0.017573
|
| 60 |
-
-- model.layers.57 rfn_err: 0.155372 max_diff/norm: 0.018350 sqnr: 20.042795 cos_err: 0.016913
|
| 61 |
-
-- model.layers.58 rfn_err: 0.147437 max_diff/norm: 0.018248 sqnr: 19.997026 cos_err: 0.015184
|
| 62 |
-
-- model.layers.59 rfn_err: 0.140684 max_diff/norm: 0.016183 sqnr: 19.970912 cos_err: 0.012968
|
| 63 |
-
-- model.layers.60 rfn_err: 0.133054 max_diff/norm: 0.010360 sqnr: 20.229823 cos_err: 0.010685
|
| 64 |
-
-- model.layers.61 rfn_err: 0.125113 max_diff/norm: 0.071410 sqnr: 20.739476 cos_err: 0.010296
|
| 65 |
-
-- model.norm rfn_err: 0.144264 max_diff/norm: 0.008163 sqnr: 20.559373 cos_err: 0.011742
|
| 66 |
-
-- A perplexity: 8.33833098
|
| 67 |
-
-- B perplexity: 8.34981264
|
| 68 |
-
-- A label in top-K:
|
| 69 |
-
K = 1: 0.5594
|
| 70 |
-
K = 2: 0.6805
|
| 71 |
-
K = 3: 0.7388
|
| 72 |
-
K = 4: 0.7750
|
| 73 |
-
K = 5: 0.8003
|
| 74 |
-
-- B label in top-K:
|
| 75 |
-
K = 1: 0.5598
|
| 76 |
-
K = 2: 0.6804
|
| 77 |
-
K = 3: 0.7390
|
| 78 |
-
K = 4: 0.7749
|
| 79 |
-
K = 5: 0.8005
|
| 80 |
-
-- Top-K agreement, A vs B:
|
| 81 |
-
K = 1: 0.9533
|
| 82 |
-
K = 2: 0.8440
|
| 83 |
-
K = 3: 0.7042
|
| 84 |
-
K = 4: 0.5587
|
| 85 |
-
K = 5: 0.4226
|
| 86 |
-
-- KL divergence (A, B): 0.03227931
|
| 87 |
-
-- KL divergence (B, A): 0.03371121
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|