MiniMax-M2.5-exl3 / model_diff_logs /MiniMaxAI_MiniMax-M2.5-2.0bpw.txt
NeuroSenko's picture
Upload 7 files
9bbe9c0 verified
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
-- model.layers.0 rfn_err: 0.007858 max_diff/norm: 0.001212 sqnr: 43.922771 cos_err: 0.000015
-- model.layers.1 rfn_err: 0.011489 max_diff/norm: 0.003147 sqnr: 39.937154 cos_err: 0.000051
-- model.layers.2 rfn_err: 0.024680 max_diff/norm: 0.005199 sqnr: 33.424866 cos_err: 0.000133
-- model.layers.3 rfn_err: 0.047014 max_diff/norm: 0.005335 sqnr: 27.375450 cos_err: 0.000201
-- model.layers.4 rfn_err: 0.053175 max_diff/norm: 0.010376 sqnr: 26.392067 cos_err: 0.000264
-- model.layers.5 rfn_err: 0.101607 max_diff/norm: 0.439889 sqnr: 25.275748 cos_err: 0.000366
-- model.layers.6 rfn_err: 0.104424 max_diff/norm: 0.433924 sqnr: 23.747089 cos_err: 0.000484
-- model.layers.7 rfn_err: 0.104557 max_diff/norm: 0.431919 sqnr: 24.505606 cos_err: 0.000609
-- model.layers.8 rfn_err: 0.105434 max_diff/norm: 0.429296 sqnr: 23.243343 cos_err: 0.000926
-- model.layers.9 rfn_err: 0.106367 max_diff/norm: 0.423224 sqnr: 22.128818 cos_err: 0.001310
-- model.layers.10 rfn_err: 0.104368 max_diff/norm: 0.418102 sqnr: 22.921680 cos_err: 0.001477
-- model.layers.11 rfn_err: 0.105033 max_diff/norm: 0.413182 sqnr: 22.373919 cos_err: 0.001895
-- model.layers.12 rfn_err: 0.106147 max_diff/norm: 0.409659 sqnr: 21.870182 cos_err: 0.002353
-- model.layers.13 rfn_err: 0.107418 max_diff/norm: 0.406745 sqnr: 21.460371 cos_err: 0.002797
-- model.layers.14 rfn_err: 0.108870 max_diff/norm: 0.403091 sqnr: 21.103061 cos_err: 0.003244
-- model.layers.15 rfn_err: 0.112406 max_diff/norm: 0.397691 sqnr: 20.229972 cos_err: 0.003911
-- model.layers.16 rfn_err: 0.113939 max_diff/norm: 0.391588 sqnr: 19.953552 cos_err: 0.004338
-- model.layers.17 rfn_err: 0.118850 max_diff/norm: 0.387389 sqnr: 19.038787 cos_err: 0.005121
-- model.layers.18 rfn_err: 0.122696 max_diff/norm: 0.382501 sqnr: 18.479936 cos_err: 0.006249
-- model.layers.19 rfn_err: 0.126383 max_diff/norm: 0.375111 sqnr: 18.069667 cos_err: 0.007387
-- model.layers.20 rfn_err: 0.133546 max_diff/norm: 0.371628 sqnr: 17.313982 cos_err: 0.008723
-- model.layers.21 rfn_err: 0.139186 max_diff/norm: 0.355202 sqnr: 16.785980 cos_err: 0.010051
-- model.layers.22 rfn_err: 0.146212 max_diff/norm: 0.354242 sqnr: 16.204125 cos_err: 0.011832
-- model.layers.23 rfn_err: 0.153624 max_diff/norm: 0.354606 sqnr: 15.559332 cos_err: 0.014324
-- model.layers.24 rfn_err: 0.167379 max_diff/norm: 0.376240 sqnr: 14.823083 cos_err: 0.017401
-- model.layers.25 rfn_err: 0.178808 max_diff/norm: 0.371875 sqnr: 14.015634 cos_err: 0.021692
-- model.layers.26 rfn_err: 0.191962 max_diff/norm: 0.371208 sqnr: 13.135830 cos_err: 0.027272
-- model.layers.27 rfn_err: 0.208002 max_diff/norm: 0.363041 sqnr: 12.251230 cos_err: 0.034009
-- model.layers.28 rfn_err: 0.237604 max_diff/norm: 0.358428 sqnr: 11.404306 cos_err: 0.042369
-- model.layers.29 rfn_err: 0.254413 max_diff/norm: 0.353930 sqnr: 10.800144 cos_err: 0.048954
-- model.layers.30 rfn_err: 0.276157 max_diff/norm: 0.345944 sqnr: 9.982429 cos_err: 0.058998
-- model.layers.31 rfn_err: 0.289523 max_diff/norm: 0.339725 sqnr: 9.545339 cos_err: 0.065212
-- model.layers.32 rfn_err: 0.310718 max_diff/norm: 0.328324 sqnr: 9.050561 cos_err: 0.072135
-- model.layers.33 rfn_err: 0.325859 max_diff/norm: 0.321673 sqnr: 8.594002 cos_err: 0.080117
-- model.layers.34 rfn_err: 0.336873 max_diff/norm: 0.311122 sqnr: 8.352301 cos_err: 0.084569
-- model.layers.35 rfn_err: 0.346817 max_diff/norm: 0.300775 sqnr: 8.178503 cos_err: 0.087901
-- model.layers.36 rfn_err: 0.364598 max_diff/norm: 0.298362 sqnr: 7.717051 cos_err: 0.097876
-- model.layers.37 rfn_err: 0.370055 max_diff/norm: 0.282943 sqnr: 7.747602 cos_err: 0.097190
-- model.layers.38 rfn_err: 0.383731 max_diff/norm: 0.271891 sqnr: 7.462389 cos_err: 0.105123
-- model.layers.39 rfn_err: 0.387421 max_diff/norm: 0.258139 sqnr: 7.546127 cos_err: 0.101936
-- model.layers.40 rfn_err: 0.394746 max_diff/norm: 0.238546 sqnr: 7.460694 cos_err: 0.103462
-- model.layers.41 rfn_err: 0.402364 max_diff/norm: 0.203700 sqnr: 7.381044 cos_err: 0.105978
-- model.layers.42 rfn_err: 0.412670 max_diff/norm: 0.168166 sqnr: 7.233969 cos_err: 0.110305
-- model.layers.43 rfn_err: 0.415765 max_diff/norm: 0.144052 sqnr: 7.316041 cos_err: 0.107760
-- model.layers.44 rfn_err: 0.429482 max_diff/norm: 0.141583 sqnr: 7.099236 cos_err: 0.113699
-- model.layers.45 rfn_err: 0.438810 max_diff/norm: 0.134049 sqnr: 6.963944 cos_err: 0.117313
-- model.layers.46 rfn_err: 0.439827 max_diff/norm: 0.127399 sqnr: 7.015340 cos_err: 0.115621
-- model.layers.47 rfn_err: 0.449468 max_diff/norm: 0.121084 sqnr: 6.893901 cos_err: 0.120122
-- model.layers.48 rfn_err: 0.454131 max_diff/norm: 0.114432 sqnr: 6.845831 cos_err: 0.120953
-- model.layers.49 rfn_err: 0.457335 max_diff/norm: 0.107741 sqnr: 6.835199 cos_err: 0.122351
-- model.layers.50 rfn_err: 0.460975 max_diff/norm: 0.101064 sqnr: 6.794710 cos_err: 0.123616
-- model.layers.51 rfn_err: 0.468401 max_diff/norm: 0.094384 sqnr: 6.697549 cos_err: 0.127102
-- model.layers.52 rfn_err: 0.471488 max_diff/norm: 0.086993 sqnr: 6.672907 cos_err: 0.128079
-- model.layers.53 rfn_err: 0.476036 max_diff/norm: 0.082251 sqnr: 6.596959 cos_err: 0.130608
-- model.layers.54 rfn_err: 0.478482 max_diff/norm: 0.077461 sqnr: 6.566252 cos_err: 0.131370
-- model.layers.55 rfn_err: 0.484951 max_diff/norm: 0.075136 sqnr: 6.461519 cos_err: 0.135141
-- model.layers.56 rfn_err: 0.490010 max_diff/norm: 0.072084 sqnr: 6.370898 cos_err: 0.137315
-- model.layers.57 rfn_err: 0.493214 max_diff/norm: 0.065704 sqnr: 6.307768 cos_err: 0.137680
-- model.layers.58 rfn_err: 0.492374 max_diff/norm: 0.062835 sqnr: 6.277678 cos_err: 0.136408
-- model.layers.59 rfn_err: 0.494073 max_diff/norm: 0.059431 sqnr: 6.242959 cos_err: 0.134536
-- model.layers.60 rfn_err: 0.480946 max_diff/norm: 0.051310 sqnr: 6.505622 cos_err: 0.124897
-- model.layers.61 rfn_err: 0.450072 max_diff/norm: 0.107757 sqnr: 7.071674 cos_err: 0.111895
-- model.norm rfn_err: 0.483829 max_diff/norm: 0.010220 sqnr: 6.910045 cos_err: 0.119958
-- A perplexity: 9.46492433
-- B perplexity: 8.34981264
-- A label in top-K:
K = 1: 0.5363
K = 2: 0.6564
K = 3: 0.7155
K = 4: 0.7527
K = 5: 0.7792
-- B label in top-K:
K = 1: 0.5598
K = 2: 0.6804
K = 3: 0.7390
K = 4: 0.7749
K = 5: 0.8005
-- Top-K agreement, A vs B:
K = 1: 0.7699
K = 2: 0.4340
K = 3: 0.2006
K = 4: 0.0796
K = 5: 0.0289
-- KL divergence (A, B): 0.36735150
-- KL divergence (B, A): 0.42469226