MiniMax-M2.5-exl3 / model_diff_logs /MiniMaxAI_MiniMax-M2.5-3.0bpw.txt
NeuroSenko's picture
Upload 7 files
9bbe9c0 verified
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
-- model.layers.0 rfn_err: 0.003394 max_diff/norm: 0.000607 sqnr: 51.644513 cos_err: 0.000004
-- model.layers.1 rfn_err: 0.005808 max_diff/norm: 0.002926 sqnr: 46.651903 cos_err: 0.000013
-- model.layers.2 rfn_err: 0.011415 max_diff/norm: 0.002718 sqnr: 40.951312 cos_err: 0.000040
-- model.layers.3 rfn_err: 0.015681 max_diff/norm: 0.003334 sqnr: 37.879271 cos_err: 0.000062
-- model.layers.4 rfn_err: 0.018357 max_diff/norm: 0.004838 sqnr: 36.169828 cos_err: 0.000083
-- model.layers.5 rfn_err: 0.023889 max_diff/norm: 0.160353 sqnr: 34.804823 cos_err: 0.000118
-- model.layers.6 rfn_err: 0.024955 max_diff/norm: 0.159496 sqnr: 33.171246 cos_err: 0.000159
-- model.layers.7 rfn_err: 0.025661 max_diff/norm: 0.157699 sqnr: 32.518122 cos_err: 0.000202
-- model.layers.8 rfn_err: 0.026866 max_diff/norm: 0.156315 sqnr: 31.303216 cos_err: 0.000301
-- model.layers.9 rfn_err: 0.028113 max_diff/norm: 0.153931 sqnr: 30.224139 cos_err: 0.000407
-- model.layers.10 rfn_err: 0.029038 max_diff/norm: 0.152014 sqnr: 29.747104 cos_err: 0.000474
-- model.layers.11 rfn_err: 0.030416 max_diff/norm: 0.149952 sqnr: 29.029571 cos_err: 0.000603
-- model.layers.12 rfn_err: 0.032240 max_diff/norm: 0.148527 sqnr: 28.188072 cos_err: 0.000743
-- model.layers.13 rfn_err: 0.034151 max_diff/norm: 0.147561 sqnr: 27.505001 cos_err: 0.000882
-- model.layers.14 rfn_err: 0.036222 max_diff/norm: 0.146296 sqnr: 26.820057 cos_err: 0.001026
-- model.layers.15 rfn_err: 0.038649 max_diff/norm: 0.144103 sqnr: 26.183311 cos_err: 0.001212
-- model.layers.16 rfn_err: 0.040821 max_diff/norm: 0.141321 sqnr: 25.734528 cos_err: 0.001330
-- model.layers.17 rfn_err: 0.043610 max_diff/norm: 0.140069 sqnr: 25.100897 cos_err: 0.001547
-- model.layers.18 rfn_err: 0.046721 max_diff/norm: 0.138471 sqnr: 24.456386 cos_err: 0.001895
-- model.layers.19 rfn_err: 0.050462 max_diff/norm: 0.137175 sqnr: 23.788862 cos_err: 0.002255
-- model.layers.20 rfn_err: 0.054798 max_diff/norm: 0.134785 sqnr: 23.108732 cos_err: 0.002650
-- model.layers.21 rfn_err: 0.058880 max_diff/norm: 0.133462 sqnr: 22.572879 cos_err: 0.003073
-- model.layers.22 rfn_err: 0.064044 max_diff/norm: 0.131656 sqnr: 21.904474 cos_err: 0.003659
-- model.layers.23 rfn_err: 0.070224 max_diff/norm: 0.131168 sqnr: 21.062901 cos_err: 0.004548
-- model.layers.24 rfn_err: 0.077352 max_diff/norm: 0.127912 sqnr: 20.287736 cos_err: 0.005605
-- model.layers.25 rfn_err: 0.086052 max_diff/norm: 0.127358 sqnr: 19.350536 cos_err: 0.007216
-- model.layers.26 rfn_err: 0.097367 max_diff/norm: 0.127782 sqnr: 18.169144 cos_err: 0.009497
-- model.layers.27 rfn_err: 0.110078 max_diff/norm: 0.127504 sqnr: 17.233901 cos_err: 0.012311
-- model.layers.28 rfn_err: 0.141438 max_diff/norm: 0.124777 sqnr: 16.252857 cos_err: 0.016206
-- model.layers.29 rfn_err: 0.155649 max_diff/norm: 0.123986 sqnr: 15.576786 cos_err: 0.019012
-- model.layers.30 rfn_err: 0.171725 max_diff/norm: 0.122313 sqnr: 14.626882 cos_err: 0.023428
-- model.layers.31 rfn_err: 0.181566 max_diff/norm: 0.119922 sqnr: 14.148360 cos_err: 0.026096
-- model.layers.32 rfn_err: 0.198945 max_diff/norm: 0.117497 sqnr: 13.547689 cos_err: 0.029230
-- model.layers.33 rfn_err: 0.209506 max_diff/norm: 0.115155 sqnr: 13.038629 cos_err: 0.032930
-- model.layers.34 rfn_err: 0.216794 max_diff/norm: 0.112269 sqnr: 12.763568 cos_err: 0.034920
-- model.layers.35 rfn_err: 0.223848 max_diff/norm: 0.108764 sqnr: 12.565271 cos_err: 0.036753
-- model.layers.36 rfn_err: 0.237001 max_diff/norm: 0.107344 sqnr: 12.045963 cos_err: 0.041433
-- model.layers.37 rfn_err: 0.240391 max_diff/norm: 0.103187 sqnr: 12.052502 cos_err: 0.041365
-- model.layers.38 rfn_err: 0.249047 max_diff/norm: 0.099769 sqnr: 11.780027 cos_err: 0.045215
-- model.layers.39 rfn_err: 0.251593 max_diff/norm: 0.094690 sqnr: 11.874520 cos_err: 0.043901
-- model.layers.40 rfn_err: 0.256486 max_diff/norm: 0.092512 sqnr: 11.783108 cos_err: 0.044108
-- model.layers.41 rfn_err: 0.261185 max_diff/norm: 0.087722 sqnr: 11.704016 cos_err: 0.045681
-- model.layers.42 rfn_err: 0.269034 max_diff/norm: 0.084542 sqnr: 11.544741 cos_err: 0.048144
-- model.layers.43 rfn_err: 0.271123 max_diff/norm: 0.079522 sqnr: 11.648045 cos_err: 0.046260
-- model.layers.44 rfn_err: 0.281021 max_diff/norm: 0.076279 sqnr: 11.425829 cos_err: 0.049241
-- model.layers.45 rfn_err: 0.287677 max_diff/norm: 0.073810 sqnr: 11.279177 cos_err: 0.050887
-- model.layers.46 rfn_err: 0.288193 max_diff/norm: 0.069342 sqnr: 11.328149 cos_err: 0.050074
-- model.layers.47 rfn_err: 0.296293 max_diff/norm: 0.066391 sqnr: 11.184084 cos_err: 0.052388
-- model.layers.48 rfn_err: 0.299036 max_diff/norm: 0.063632 sqnr: 11.137132 cos_err: 0.052699
-- model.layers.49 rfn_err: 0.301785 max_diff/norm: 0.060063 sqnr: 11.114139 cos_err: 0.053790
-- model.layers.50 rfn_err: 0.304180 max_diff/norm: 0.057226 sqnr: 11.059243 cos_err: 0.054579
-- model.layers.51 rfn_err: 0.309586 max_diff/norm: 0.054422 sqnr: 10.955227 cos_err: 0.056505
-- model.layers.52 rfn_err: 0.311136 max_diff/norm: 0.050588 sqnr: 10.931107 cos_err: 0.057045
-- model.layers.53 rfn_err: 0.313254 max_diff/norm: 0.048196 sqnr: 10.851354 cos_err: 0.058378
-- model.layers.54 rfn_err: 0.315019 max_diff/norm: 0.045797 sqnr: 10.793363 cos_err: 0.058742
-- model.layers.55 rfn_err: 0.319939 max_diff/norm: 0.044467 sqnr: 10.657039 cos_err: 0.060996
-- model.layers.56 rfn_err: 0.322801 max_diff/norm: 0.041824 sqnr: 10.549990 cos_err: 0.061744
-- model.layers.57 rfn_err: 0.322995 max_diff/norm: 0.039723 sqnr: 10.481810 cos_err: 0.061341
-- model.layers.58 rfn_err: 0.318804 max_diff/norm: 0.038624 sqnr: 10.458463 cos_err: 0.059246
-- model.layers.59 rfn_err: 0.316041 max_diff/norm: 0.036968 sqnr: 10.455861 cos_err: 0.056453
-- model.layers.60 rfn_err: 0.305529 max_diff/norm: 0.032550 sqnr: 10.742234 cos_err: 0.051024
-- model.layers.61 rfn_err: 0.282084 max_diff/norm: 0.061164 sqnr: 11.445191 cos_err: 0.046361
-- model.norm rfn_err: 0.312897 max_diff/norm: 0.009568 sqnr: 11.155214 cos_err: 0.050396
-- A perplexity: 8.74921130
-- B perplexity: 8.34981264
-- A label in top-K:
K = 1: 0.5544
K = 2: 0.6738
K = 3: 0.7339
K = 4: 0.7703
K = 5: 0.7952
-- B label in top-K:
K = 1: 0.5598
K = 2: 0.6804
K = 3: 0.7390
K = 4: 0.7749
K = 5: 0.8005
-- Top-K agreement, A vs B:
K = 1: 0.8640
K = 2: 0.6125
K = 3: 0.3773
K = 4: 0.2072
K = 5: 0.1040
-- KL divergence (A, B): 0.14842009
-- KL divergence (B, A): 0.15566614