File size: 8,766 Bytes
9bbe9c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

 -- model.embed_tokens                         rfn_err: 0.000000   max_diff/norm: 0.000000   sqnr: 79.869484   cos_err: 0.000000
 -- model.layers.0                             rfn_err: 0.000543   max_diff/norm: 0.000184   sqnr: 68.177185   cos_err: 0.000000
 -- model.layers.1                             rfn_err: 0.001513   max_diff/norm: 0.000866   sqnr: 65.447161   cos_err: 0.000001
 -- model.layers.2                             rfn_err: 0.003429   max_diff/norm: 0.002247   sqnr: 59.905907   cos_err: 0.000004
 -- model.layers.3                             rfn_err: 0.004892   max_diff/norm: 0.002883   sqnr: 55.291200   cos_err: 0.000008
 -- model.layers.4                             rfn_err: 0.005726   max_diff/norm: 0.003310   sqnr: 52.228319   cos_err: 0.000011
 -- model.layers.5                             rfn_err: 0.008686   max_diff/norm: 0.188249   sqnr: 49.429878   cos_err: 0.000017
 -- model.layers.6                             rfn_err: 0.009091   max_diff/norm: 0.187352   sqnr: 47.501211   cos_err: 0.000025
 -- model.layers.7                             rfn_err: 0.009506   max_diff/norm: 0.182608   sqnr: 45.382496   cos_err: 0.000034
 -- model.layers.8                             rfn_err: 0.010246   max_diff/norm: 0.180253   sqnr: 43.051331   cos_err: 0.000053
 -- model.layers.9                             rfn_err: 0.010918   max_diff/norm: 0.177052   sqnr: 41.321413   cos_err: 0.000071
 -- model.layers.10                            rfn_err: 0.011519   max_diff/norm: 0.174697   sqnr: 40.338670   cos_err: 0.000086
 -- model.layers.11                            rfn_err: 0.012264   max_diff/norm: 0.172930   sqnr: 39.208071   cos_err: 0.000108
 -- model.layers.12                            rfn_err: 0.013085   max_diff/norm: 0.171060   sqnr: 38.091909   cos_err: 0.000132
 -- model.layers.13                            rfn_err: 0.013903   max_diff/norm: 0.169636   sqnr: 37.261419   cos_err: 0.000158
 -- model.layers.14                            rfn_err: 0.014629   max_diff/norm: 0.168219   sqnr: 36.583158   cos_err: 0.000180
 -- model.layers.15                            rfn_err: 0.015628   max_diff/norm: 0.166202   sqnr: 35.780368   cos_err: 0.000212
 -- model.layers.16                            rfn_err: 0.016242   max_diff/norm: 0.163085   sqnr: 35.441346   cos_err: 0.000228
 -- model.layers.17                            rfn_err: 0.017188   max_diff/norm: 0.161503   sqnr: 34.837762   cos_err: 0.000263
 -- model.layers.18                            rfn_err: 0.018460   max_diff/norm: 0.160362   sqnr: 34.086316   cos_err: 0.000316
 -- model.layers.19                            rfn_err: 0.019807   max_diff/norm: 0.158439   sqnr: 33.417090   cos_err: 0.000372
 -- model.layers.20                            rfn_err: 0.021275   max_diff/norm: 0.156205   sqnr: 32.797049   cos_err: 0.000434
 -- model.layers.21                            rfn_err: 0.022793   max_diff/norm: 0.154088   sqnr: 32.250618   cos_err: 0.000510
 -- model.layers.22                            rfn_err: 0.025017   max_diff/norm: 0.151355   sqnr: 31.522344   cos_err: 0.000629
 -- model.layers.23                            rfn_err: 0.027571   max_diff/norm: 0.149713   sqnr: 30.720477   cos_err: 0.000811
 -- model.layers.24                            rfn_err: 0.030635   max_diff/norm: 0.143742   sqnr: 29.978750   cos_err: 0.001035
 -- model.layers.25                            rfn_err: 0.035260   max_diff/norm: 0.142449   sqnr: 28.921009   cos_err: 0.001450
 -- model.layers.26                            rfn_err: 0.041012   max_diff/norm: 0.142144   sqnr: 27.738527   cos_err: 0.001992
 -- model.layers.27                            rfn_err: 0.048649   max_diff/norm: 0.140497   sqnr: 26.724511   cos_err: 0.002842
 -- model.layers.28                            rfn_err: 0.075249   max_diff/norm: 0.138490   sqnr: 25.626220   cos_err: 0.004184
 -- model.layers.29                            rfn_err: 0.085114   max_diff/norm: 0.137145   sqnr: 24.873034   cos_err: 0.005033
 -- model.layers.30                            rfn_err: 0.093850   max_diff/norm: 0.133826   sqnr: 23.763328   cos_err: 0.006293
 -- model.layers.31                            rfn_err: 0.099443   max_diff/norm: 0.131075   sqnr: 23.143857   cos_err: 0.007078
 -- model.layers.32                            rfn_err: 0.110217   max_diff/norm: 0.127145   sqnr: 22.407360   cos_err: 0.007872
 -- model.layers.33                            rfn_err: 0.115368   max_diff/norm: 0.124526   sqnr: 21.812169   cos_err: 0.009041
 -- model.layers.34                            rfn_err: 0.118347   max_diff/norm: 0.120848   sqnr: 21.452885   cos_err: 0.009680
 -- model.layers.35                            rfn_err: 0.121983   max_diff/norm: 0.117505   sqnr: 21.180904   cos_err: 0.010409
 -- model.layers.36                            rfn_err: 0.129488   max_diff/norm: 0.115913   sqnr: 20.568714   cos_err: 0.011940
 -- model.layers.37                            rfn_err: 0.130546   max_diff/norm: 0.110626   sqnr: 20.523469   cos_err: 0.012070
 -- model.layers.38                            rfn_err: 0.134762   max_diff/norm: 0.107698   sqnr: 20.258577   cos_err: 0.013730
 -- model.layers.39                            rfn_err: 0.136458   max_diff/norm: 0.102443   sqnr: 20.352471   cos_err: 0.013488
 -- model.layers.40                            rfn_err: 0.138824   max_diff/norm: 0.099604   sqnr: 20.215974   cos_err: 0.013448
 -- model.layers.41                            rfn_err: 0.142556   max_diff/norm: 0.093639   sqnr: 20.129098   cos_err: 0.014499
 -- model.layers.42                            rfn_err: 0.147585   max_diff/norm: 0.089614   sqnr: 19.955028   cos_err: 0.015680
 -- model.layers.43                            rfn_err: 0.149174   max_diff/norm: 0.083302   sqnr: 20.070309   cos_err: 0.014730
 -- model.layers.44                            rfn_err: 0.155522   max_diff/norm: 0.079138   sqnr: 19.834189   cos_err: 0.015953
 -- model.layers.45                            rfn_err: 0.159260   max_diff/norm: 0.076222   sqnr: 19.685693   cos_err: 0.016491
 -- model.layers.46                            rfn_err: 0.159097   max_diff/norm: 0.071667   sqnr: 19.735605   cos_err: 0.016122
 -- model.layers.47                            rfn_err: 0.164939   max_diff/norm: 0.067849   sqnr: 19.584990   cos_err: 0.017165
 -- model.layers.48                            rfn_err: 0.165954   max_diff/norm: 0.064549   sqnr: 19.534291   cos_err: 0.017085
 -- model.layers.49                            rfn_err: 0.167305   max_diff/norm: 0.060484   sqnr: 19.525616   cos_err: 0.017809
 -- model.layers.50                            rfn_err: 0.168045   max_diff/norm: 0.057157   sqnr: 19.455932   cos_err: 0.018103
 -- model.layers.51                            rfn_err: 0.171497   max_diff/norm: 0.053969   sqnr: 19.336356   cos_err: 0.018967
 -- model.layers.52                            rfn_err: 0.171437   max_diff/norm: 0.050450   sqnr: 19.299448   cos_err: 0.019189
 -- model.layers.53                            rfn_err: 0.171026   max_diff/norm: 0.047735   sqnr: 19.203575   cos_err: 0.019697
 -- model.layers.54                            rfn_err: 0.170891   max_diff/norm: 0.045281   sqnr: 19.107354   cos_err: 0.019586
 -- model.layers.55                            rfn_err: 0.173012   max_diff/norm: 0.043646   sqnr: 18.910681   cos_err: 0.020545
 -- model.layers.56                            rfn_err: 0.172990   max_diff/norm: 0.041733   sqnr: 18.769338   cos_err: 0.020506
 -- model.layers.57                            rfn_err: 0.169792   max_diff/norm: 0.038496   sqnr: 18.663130   cos_err: 0.019758
 -- model.layers.58                            rfn_err: 0.161728   max_diff/norm: 0.036592   sqnr: 18.628091   cos_err: 0.017834
 -- model.layers.59                            rfn_err: 0.154931   max_diff/norm: 0.034570   sqnr: 18.614537   cos_err: 0.015379
 -- model.layers.60                            rfn_err: 0.146969   max_diff/norm: 0.030656   sqnr: 18.880935   cos_err: 0.012806
 -- model.layers.61                            rfn_err: 0.138028   max_diff/norm: 0.047421   sqnr: 19.416316   cos_err: 0.012273
 -- model.norm                                 rfn_err: 0.158195   max_diff/norm: 0.006963   sqnr: 19.225764   cos_err: 0.013913
 -- A perplexity:  8.35782554
 -- B perplexity:  8.34981264
 -- A label in top-K:
      K = 1: 0.5596
      K = 2: 0.6798
      K = 3: 0.7387
      K = 4: 0.7746
      K = 5: 0.7998
 -- B label in top-K:
      K = 1: 0.5598
      K = 2: 0.6804
      K = 3: 0.7390
      K = 4: 0.7749
      K = 5: 0.8005
 -- Top-K agreement, A vs B:
      K = 1: 0.9449
      K = 2: 0.8209
      K = 3: 0.6651
      K = 4: 0.5071
      K = 5: 0.3670
 -- KL divergence (A, B):  0.04015230
 -- KL divergence (B, A):  0.04071388