MiniMax-M2.5-exl3 / model_diff_logs /MiniMaxAI_MiniMax-M2.5-7.0bpw.txt
NeuroSenko's picture
Upload 7 files
9bbe9c0 verified
-- model.embed_tokens rfn_err: 0.000000 max_diff/norm: 0.000000 sqnr: 79.869484 cos_err: 0.000000
-- model.layers.0 rfn_err: 0.000560 max_diff/norm: 0.000182 sqnr: 66.250859 cos_err: 0.000000
-- model.layers.1 rfn_err: 0.001196 max_diff/norm: 0.000613 sqnr: 65.494256 cos_err: 0.000001
-- model.layers.2 rfn_err: 0.002691 max_diff/norm: 0.001966 sqnr: 63.802718 cos_err: 0.000003
-- model.layers.3 rfn_err: 0.003877 max_diff/norm: 0.002972 sqnr: 59.584492 cos_err: 0.000005
-- model.layers.4 rfn_err: 0.004591 max_diff/norm: 0.002993 sqnr: 56.390129 cos_err: 0.000007
-- model.layers.5 rfn_err: 0.004672 max_diff/norm: 0.043094 sqnr: 53.401958 cos_err: 0.000011
-- model.layers.6 rfn_err: 0.005053 max_diff/norm: 0.042747 sqnr: 51.403407 cos_err: 0.000017
-- model.layers.7 rfn_err: 0.005484 max_diff/norm: 0.041560 sqnr: 49.283374 cos_err: 0.000024
-- model.layers.8 rfn_err: 0.006207 max_diff/norm: 0.040911 sqnr: 46.516407 cos_err: 0.000038
-- model.layers.9 rfn_err: 0.006919 max_diff/norm: 0.040032 sqnr: 44.392462 cos_err: 0.000051
-- model.layers.10 rfn_err: 0.007554 max_diff/norm: 0.039448 sqnr: 43.250104 cos_err: 0.000064
-- model.layers.11 rfn_err: 0.008283 max_diff/norm: 0.038800 sqnr: 41.952453 cos_err: 0.000080
-- model.layers.12 rfn_err: 0.009074 max_diff/norm: 0.038340 sqnr: 40.670537 cos_err: 0.000099
-- model.layers.13 rfn_err: 0.009857 max_diff/norm: 0.037903 sqnr: 39.760592 cos_err: 0.000120
-- model.layers.14 rfn_err: 0.010539 max_diff/norm: 0.037459 sqnr: 38.997738 cos_err: 0.000137
-- model.layers.15 rfn_err: 0.011476 max_diff/norm: 0.036819 sqnr: 38.084897 cos_err: 0.000162
-- model.layers.16 rfn_err: 0.012086 max_diff/norm: 0.036038 sqnr: 37.657304 cos_err: 0.000175
-- model.layers.17 rfn_err: 0.012987 max_diff/norm: 0.035555 sqnr: 36.976459 cos_err: 0.000204
-- model.layers.18 rfn_err: 0.014156 max_diff/norm: 0.035192 sqnr: 36.152336 cos_err: 0.000246
-- model.layers.19 rfn_err: 0.015402 max_diff/norm: 0.034900 sqnr: 35.448933 cos_err: 0.000290
-- model.layers.20 rfn_err: 0.016710 max_diff/norm: 0.034060 sqnr: 34.786421 cos_err: 0.000338
-- model.layers.21 rfn_err: 0.018065 max_diff/norm: 0.033554 sqnr: 34.188991 cos_err: 0.000401
-- model.layers.22 rfn_err: 0.020125 max_diff/norm: 0.033028 sqnr: 33.411842 cos_err: 0.000499
-- model.layers.23 rfn_err: 0.022564 max_diff/norm: 0.032781 sqnr: 32.553960 cos_err: 0.000653
-- model.layers.24 rfn_err: 0.025218 max_diff/norm: 0.031707 sqnr: 31.774962 cos_err: 0.000839
-- model.layers.25 rfn_err: 0.029645 max_diff/norm: 0.031183 sqnr: 30.675104 cos_err: 0.001193
-- model.layers.26 rfn_err: 0.035183 max_diff/norm: 0.031087 sqnr: 29.389312 cos_err: 0.001664
-- model.layers.27 rfn_err: 0.042841 max_diff/norm: 0.030733 sqnr: 28.315314 cos_err: 0.002444
-- model.layers.28 rfn_err: 0.071600 max_diff/norm: 0.030360 sqnr: 27.148526 cos_err: 0.003679
-- model.layers.29 rfn_err: 0.081257 max_diff/norm: 0.030133 sqnr: 26.339088 cos_err: 0.004428
-- model.layers.30 rfn_err: 0.089474 max_diff/norm: 0.029503 sqnr: 25.159096 cos_err: 0.005543
-- model.layers.31 rfn_err: 0.094679 max_diff/norm: 0.028846 sqnr: 24.468424 cos_err: 0.006236
-- model.layers.32 rfn_err: 0.105001 max_diff/norm: 0.028775 sqnr: 23.675020 cos_err: 0.006935
-- model.layers.33 rfn_err: 0.109669 max_diff/norm: 0.028754 sqnr: 23.043021 cos_err: 0.007975
-- model.layers.34 rfn_err: 0.112336 max_diff/norm: 0.028278 sqnr: 22.645694 cos_err: 0.008555
-- model.layers.35 rfn_err: 0.115750 max_diff/norm: 0.027747 sqnr: 22.345913 cos_err: 0.009223
-- model.layers.36 rfn_err: 0.122733 max_diff/norm: 0.027560 sqnr: 21.706496 cos_err: 0.010586
-- model.layers.37 rfn_err: 0.123626 max_diff/norm: 0.025725 sqnr: 21.635876 cos_err: 0.010734
-- model.layers.38 rfn_err: 0.127712 max_diff/norm: 0.023241 sqnr: 21.348562 cos_err: 0.012312
-- model.layers.39 rfn_err: 0.129427 max_diff/norm: 0.023995 sqnr: 21.426755 cos_err: 0.012149
-- model.layers.40 rfn_err: 0.131556 max_diff/norm: 0.021078 sqnr: 21.274276 cos_err: 0.012101
-- model.layers.41 rfn_err: 0.134928 max_diff/norm: 0.019668 sqnr: 21.173382 cos_err: 0.013130
-- model.layers.42 rfn_err: 0.139890 max_diff/norm: 0.023751 sqnr: 20.989811 cos_err: 0.014258
-- model.layers.43 rfn_err: 0.141563 max_diff/norm: 0.025637 sqnr: 21.096693 cos_err: 0.013388
-- model.layers.44 rfn_err: 0.147833 max_diff/norm: 0.025241 sqnr: 20.853674 cos_err: 0.014561
-- model.layers.45 rfn_err: 0.151443 max_diff/norm: 0.028335 sqnr: 20.701290 cos_err: 0.015050
-- model.layers.46 rfn_err: 0.151276 max_diff/norm: 0.031591 sqnr: 20.743301 cos_err: 0.014719
-- model.layers.47 rfn_err: 0.157034 max_diff/norm: 0.031835 sqnr: 20.584028 cos_err: 0.015708
-- model.layers.48 rfn_err: 0.157965 max_diff/norm: 0.031107 sqnr: 20.529386 cos_err: 0.015626
-- model.layers.49 rfn_err: 0.159416 max_diff/norm: 0.031311 sqnr: 20.515366 cos_err: 0.016340
-- model.layers.50 rfn_err: 0.160044 max_diff/norm: 0.030860 sqnr: 20.441445 cos_err: 0.016615
-- model.layers.51 rfn_err: 0.163510 max_diff/norm: 0.031575 sqnr: 20.315686 cos_err: 0.017451
-- model.layers.52 rfn_err: 0.163440 max_diff/norm: 0.028570 sqnr: 20.268164 cos_err: 0.017664
-- model.layers.53 rfn_err: 0.162917 max_diff/norm: 0.028204 sqnr: 20.162311 cos_err: 0.018130
-- model.layers.54 rfn_err: 0.162807 max_diff/norm: 0.027045 sqnr: 20.049632 cos_err: 0.018021
-- model.layers.55 rfn_err: 0.164703 max_diff/norm: 0.023875 sqnr: 19.840393 cos_err: 0.018919
-- model.layers.56 rfn_err: 0.164629 max_diff/norm: 0.023986 sqnr: 19.686238 cos_err: 0.018869
-- model.layers.57 rfn_err: 0.161328 max_diff/norm: 0.021253 sqnr: 19.574260 cos_err: 0.018143
-- model.layers.58 rfn_err: 0.153104 max_diff/norm: 0.018375 sqnr: 19.530551 cos_err: 0.016282
-- model.layers.59 rfn_err: 0.146190 max_diff/norm: 0.015653 sqnr: 19.507797 cos_err: 0.013920
-- model.layers.60 rfn_err: 0.138407 max_diff/norm: 0.010334 sqnr: 19.770672 cos_err: 0.011484
-- model.layers.61 rfn_err: 0.130370 max_diff/norm: 0.073108 sqnr: 20.301139 cos_err: 0.011050
-- model.norm rfn_err: 0.149805 max_diff/norm: 0.007261 sqnr: 20.115299 cos_err: 0.012592
-- A perplexity: 8.35427106
-- B perplexity: 8.34981264
-- A label in top-K:
K = 1: 0.5595
K = 2: 0.6799
K = 3: 0.7387
K = 4: 0.7748
K = 5: 0.8003
-- B label in top-K:
K = 1: 0.5598
K = 2: 0.6804
K = 3: 0.7390
K = 4: 0.7749
K = 5: 0.8005
-- Top-K agreement, A vs B:
K = 1: 0.9510
K = 2: 0.8380
K = 3: 0.6922
K = 4: 0.5420
K = 5: 0.4046
-- KL divergence (A, B): 0.03484128
-- KL divergence (B, A): 0.03757493