MiniMax-M2.5-exl3 / model_diff_logs /MiniMaxAI_MiniMax-M2.5-4.0bpw.txt
NeuroSenko's picture
Upload 7 files
9bbe9c0 verified
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
-- model.layers.0 rfn_err: 0.001854 max_diff/norm: 0.000474 sqnr: 56.263819 cos_err: 0.000001
-- model.layers.1 rfn_err: 0.003312 max_diff/norm: 0.001508 sqnr: 53.693782 cos_err: 0.000005
-- model.layers.2 rfn_err: 0.006790 max_diff/norm: 0.002903 sqnr: 47.855051 cos_err: 0.000016
-- model.layers.3 rfn_err: 0.009778 max_diff/norm: 0.003935 sqnr: 43.142093 cos_err: 0.000026
-- model.layers.4 rfn_err: 0.010822 max_diff/norm: 0.004183 sqnr: 41.971633 cos_err: 0.000036
-- model.layers.5 rfn_err: 0.018502 max_diff/norm: 0.333859 sqnr: 39.795376 cos_err: 0.000052
-- model.layers.6 rfn_err: 0.019027 max_diff/norm: 0.328645 sqnr: 38.428198 cos_err: 0.000073
-- model.layers.7 rfn_err: 0.019311 max_diff/norm: 0.315777 sqnr: 37.685598 cos_err: 0.000094
-- model.layers.8 rfn_err: 0.020006 max_diff/norm: 0.301441 sqnr: 36.191342 cos_err: 0.000139
-- model.layers.9 rfn_err: 0.020795 max_diff/norm: 0.285230 sqnr: 34.898715 cos_err: 0.000184
-- model.layers.10 rfn_err: 0.021488 max_diff/norm: 0.271941 sqnr: 34.073830 cos_err: 0.000218
-- model.layers.11 rfn_err: 0.022412 max_diff/norm: 0.264327 sqnr: 33.236031 cos_err: 0.000273
-- model.layers.12 rfn_err: 0.023527 max_diff/norm: 0.257119 sqnr: 32.362971 cos_err: 0.000333
-- model.layers.13 rfn_err: 0.024654 max_diff/norm: 0.251269 sqnr: 31.681959 cos_err: 0.000394
-- model.layers.14 rfn_err: 0.025710 max_diff/norm: 0.246635 sqnr: 31.131241 cos_err: 0.000450
-- model.layers.15 rfn_err: 0.027181 max_diff/norm: 0.243005 sqnr: 30.451275 cos_err: 0.000529
-- model.layers.16 rfn_err: 0.028224 max_diff/norm: 0.235984 sqnr: 30.094617 cos_err: 0.000575
-- model.layers.17 rfn_err: 0.029731 max_diff/norm: 0.231063 sqnr: 29.538305 cos_err: 0.000662
-- model.layers.18 rfn_err: 0.031832 max_diff/norm: 0.227557 sqnr: 28.787331 cos_err: 0.000799
-- model.layers.19 rfn_err: 0.034139 max_diff/norm: 0.223469 sqnr: 28.072172 cos_err: 0.000945
-- model.layers.20 rfn_err: 0.036582 max_diff/norm: 0.215091 sqnr: 27.441542 cos_err: 0.001107
-- model.layers.21 rfn_err: 0.039119 max_diff/norm: 0.207371 sqnr: 26.848416 cos_err: 0.001289
-- model.layers.22 rfn_err: 0.042374 max_diff/norm: 0.199526 sqnr: 26.164466 cos_err: 0.001552
-- model.layers.23 rfn_err: 0.046192 max_diff/norm: 0.195744 sqnr: 25.379355 cos_err: 0.001945
-- model.layers.24 rfn_err: 0.051334 max_diff/norm: 0.188322 sqnr: 24.590973 cos_err: 0.002428
-- model.layers.25 rfn_err: 0.057732 max_diff/norm: 0.184453 sqnr: 23.548251 cos_err: 0.003231
-- model.layers.26 rfn_err: 0.065455 max_diff/norm: 0.183255 sqnr: 22.427084 cos_err: 0.004314
-- model.layers.27 rfn_err: 0.075206 max_diff/norm: 0.179117 sqnr: 21.443197 cos_err: 0.005829
-- model.layers.28 rfn_err: 0.103912 max_diff/norm: 0.172765 sqnr: 20.421422 cos_err: 0.008061
-- model.layers.29 rfn_err: 0.115632 max_diff/norm: 0.168113 sqnr: 19.698096 cos_err: 0.009589
-- model.layers.30 rfn_err: 0.127148 max_diff/norm: 0.161370 sqnr: 18.701677 cos_err: 0.011875
-- model.layers.31 rfn_err: 0.134375 max_diff/norm: 0.154427 sqnr: 18.184652 cos_err: 0.013281
-- model.layers.32 rfn_err: 0.147770 max_diff/norm: 0.147113 sqnr: 17.544938 cos_err: 0.014799
-- model.layers.33 rfn_err: 0.154914 max_diff/norm: 0.142185 sqnr: 17.007279 cos_err: 0.016813
-- model.layers.34 rfn_err: 0.159491 max_diff/norm: 0.134358 sqnr: 16.705121 cos_err: 0.017905
-- model.layers.35 rfn_err: 0.164361 max_diff/norm: 0.126980 sqnr: 16.478361 cos_err: 0.019041
-- model.layers.36 rfn_err: 0.174045 max_diff/norm: 0.124578 sqnr: 15.923690 cos_err: 0.021620
-- model.layers.37 rfn_err: 0.175827 max_diff/norm: 0.115278 sqnr: 15.915138 cos_err: 0.021702
-- model.layers.38 rfn_err: 0.181752 max_diff/norm: 0.110514 sqnr: 15.646665 cos_err: 0.024156
-- model.layers.39 rfn_err: 0.183469 max_diff/norm: 0.103311 sqnr: 15.743259 cos_err: 0.023519
-- model.layers.40 rfn_err: 0.186590 max_diff/norm: 0.098521 sqnr: 15.634178 cos_err: 0.023518
-- model.layers.41 rfn_err: 0.190228 max_diff/norm: 0.091429 sqnr: 15.554746 cos_err: 0.024727
-- model.layers.42 rfn_err: 0.196391 max_diff/norm: 0.086804 sqnr: 15.384511 cos_err: 0.026359
-- model.layers.43 rfn_err: 0.197996 max_diff/norm: 0.080018 sqnr: 15.496179 cos_err: 0.025006
-- model.layers.44 rfn_err: 0.205729 max_diff/norm: 0.076818 sqnr: 15.265893 cos_err: 0.026823
-- model.layers.45 rfn_err: 0.210517 max_diff/norm: 0.074625 sqnr: 15.121604 cos_err: 0.027725
-- model.layers.46 rfn_err: 0.210609 max_diff/norm: 0.070258 sqnr: 15.172807 cos_err: 0.027206
-- model.layers.47 rfn_err: 0.217327 max_diff/norm: 0.067273 sqnr: 15.027985 cos_err: 0.028692
-- model.layers.48 rfn_err: 0.219067 max_diff/norm: 0.064565 sqnr: 14.978627 cos_err: 0.028730
-- model.layers.49 rfn_err: 0.221027 max_diff/norm: 0.061065 sqnr: 14.961713 cos_err: 0.029623
-- model.layers.50 rfn_err: 0.222484 max_diff/norm: 0.058346 sqnr: 14.898379 cos_err: 0.030090
-- model.layers.51 rfn_err: 0.226745 max_diff/norm: 0.055432 sqnr: 14.786190 cos_err: 0.031318
-- model.layers.52 rfn_err: 0.227504 max_diff/norm: 0.051481 sqnr: 14.754768 cos_err: 0.031677
-- model.layers.53 rfn_err: 0.228062 max_diff/norm: 0.048984 sqnr: 14.670084 cos_err: 0.032442
-- model.layers.54 rfn_err: 0.228794 max_diff/norm: 0.046740 sqnr: 14.594321 cos_err: 0.032488
-- model.layers.55 rfn_err: 0.232104 max_diff/norm: 0.045176 sqnr: 14.434930 cos_err: 0.033913
-- model.layers.56 rfn_err: 0.233304 max_diff/norm: 0.042648 sqnr: 14.316571 cos_err: 0.034139
-- model.layers.57 rfn_err: 0.231687 max_diff/norm: 0.040295 sqnr: 14.232150 cos_err: 0.033459
-- model.layers.58 rfn_err: 0.225268 max_diff/norm: 0.038996 sqnr: 14.208016 cos_err: 0.031359
-- model.layers.59 rfn_err: 0.220112 max_diff/norm: 0.037183 sqnr: 14.211387 cos_err: 0.028587
-- model.layers.60 rfn_err: 0.211295 max_diff/norm: 0.032582 sqnr: 14.490349 cos_err: 0.025010
-- model.layers.61 rfn_err: 0.196379 max_diff/norm: 0.072403 sqnr: 15.138763 cos_err: 0.023275
-- model.norm rfn_err: 0.220192 max_diff/norm: 0.009406 sqnr: 14.905477 cos_err: 0.025712
-- A perplexity: 8.43832064
-- B perplexity: 8.34981264
-- A label in top-K:
K = 1: 0.5583
K = 2: 0.6787
K = 3: 0.7376
K = 4: 0.7734
K = 5: 0.7988
-- B label in top-K:
K = 1: 0.5598
K = 2: 0.6804
K = 3: 0.7390
K = 4: 0.7749
K = 5: 0.8005
-- Top-K agreement, A vs B:
K = 1: 0.9118
K = 2: 0.7281
K = 3: 0.5222
K = 4: 0.3439
K = 5: 0.2105
-- KL divergence (A, B): 0.07256054
-- KL divergence (B, A): 0.07650418