File size: 8,766 Bytes
9bbe9c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88

 -- model.embed_tokens                         rfn_err: 0.000000   max_diff/norm: 0.000000   sqnr: 79.869484   cos_err: 0.000000
 -- model.layers.0                             rfn_err: 0.000560   max_diff/norm: 0.000182   sqnr: 66.250859   cos_err: 0.000000
 -- model.layers.1                             rfn_err: 0.001196   max_diff/norm: 0.000613   sqnr: 65.494256   cos_err: 0.000001
 -- model.layers.2                             rfn_err: 0.002691   max_diff/norm: 0.001966   sqnr: 63.802718   cos_err: 0.000003
 -- model.layers.3                             rfn_err: 0.003877   max_diff/norm: 0.002972   sqnr: 59.584492   cos_err: 0.000005
 -- model.layers.4                             rfn_err: 0.004591   max_diff/norm: 0.002993   sqnr: 56.390129   cos_err: 0.000007
 -- model.layers.5                             rfn_err: 0.004672   max_diff/norm: 0.043094   sqnr: 53.401958   cos_err: 0.000011
 -- model.layers.6                             rfn_err: 0.005053   max_diff/norm: 0.042747   sqnr: 51.403407   cos_err: 0.000017
 -- model.layers.7                             rfn_err: 0.005484   max_diff/norm: 0.041560   sqnr: 49.283374   cos_err: 0.000024
 -- model.layers.8                             rfn_err: 0.006207   max_diff/norm: 0.040911   sqnr: 46.516407   cos_err: 0.000038
 -- model.layers.9                             rfn_err: 0.006919   max_diff/norm: 0.040032   sqnr: 44.392462   cos_err: 0.000051
 -- model.layers.10                            rfn_err: 0.007554   max_diff/norm: 0.039448   sqnr: 43.250104   cos_err: 0.000064
 -- model.layers.11                            rfn_err: 0.008283   max_diff/norm: 0.038800   sqnr: 41.952453   cos_err: 0.000080
 -- model.layers.12                            rfn_err: 0.009074   max_diff/norm: 0.038340   sqnr: 40.670537   cos_err: 0.000099
 -- model.layers.13                            rfn_err: 0.009857   max_diff/norm: 0.037903   sqnr: 39.760592   cos_err: 0.000120
 -- model.layers.14                            rfn_err: 0.010539   max_diff/norm: 0.037459   sqnr: 38.997738   cos_err: 0.000137
 -- model.layers.15                            rfn_err: 0.011476   max_diff/norm: 0.036819   sqnr: 38.084897   cos_err: 0.000162
 -- model.layers.16                            rfn_err: 0.012086   max_diff/norm: 0.036038   sqnr: 37.657304   cos_err: 0.000175
 -- model.layers.17                            rfn_err: 0.012987   max_diff/norm: 0.035555   sqnr: 36.976459   cos_err: 0.000204
 -- model.layers.18                            rfn_err: 0.014156   max_diff/norm: 0.035192   sqnr: 36.152336   cos_err: 0.000246
 -- model.layers.19                            rfn_err: 0.015402   max_diff/norm: 0.034900   sqnr: 35.448933   cos_err: 0.000290
 -- model.layers.20                            rfn_err: 0.016710   max_diff/norm: 0.034060   sqnr: 34.786421   cos_err: 0.000338
 -- model.layers.21                            rfn_err: 0.018065   max_diff/norm: 0.033554   sqnr: 34.188991   cos_err: 0.000401
 -- model.layers.22                            rfn_err: 0.020125   max_diff/norm: 0.033028   sqnr: 33.411842   cos_err: 0.000499
 -- model.layers.23                            rfn_err: 0.022564   max_diff/norm: 0.032781   sqnr: 32.553960   cos_err: 0.000653
 -- model.layers.24                            rfn_err: 0.025218   max_diff/norm: 0.031707   sqnr: 31.774962   cos_err: 0.000839
 -- model.layers.25                            rfn_err: 0.029645   max_diff/norm: 0.031183   sqnr: 30.675104   cos_err: 0.001193
 -- model.layers.26                            rfn_err: 0.035183   max_diff/norm: 0.031087   sqnr: 29.389312   cos_err: 0.001664
 -- model.layers.27                            rfn_err: 0.042841   max_diff/norm: 0.030733   sqnr: 28.315314   cos_err: 0.002444
 -- model.layers.28                            rfn_err: 0.071600   max_diff/norm: 0.030360   sqnr: 27.148526   cos_err: 0.003679
 -- model.layers.29                            rfn_err: 0.081257   max_diff/norm: 0.030133   sqnr: 26.339088   cos_err: 0.004428
 -- model.layers.30                            rfn_err: 0.089474   max_diff/norm: 0.029503   sqnr: 25.159096   cos_err: 0.005543
 -- model.layers.31                            rfn_err: 0.094679   max_diff/norm: 0.028846   sqnr: 24.468424   cos_err: 0.006236
 -- model.layers.32                            rfn_err: 0.105001   max_diff/norm: 0.028775   sqnr: 23.675020   cos_err: 0.006935
 -- model.layers.33                            rfn_err: 0.109669   max_diff/norm: 0.028754   sqnr: 23.043021   cos_err: 0.007975
 -- model.layers.34                            rfn_err: 0.112336   max_diff/norm: 0.028278   sqnr: 22.645694   cos_err: 0.008555
 -- model.layers.35                            rfn_err: 0.115750   max_diff/norm: 0.027747   sqnr: 22.345913   cos_err: 0.009223
 -- model.layers.36                            rfn_err: 0.122733   max_diff/norm: 0.027560   sqnr: 21.706496   cos_err: 0.010586
 -- model.layers.37                            rfn_err: 0.123626   max_diff/norm: 0.025725   sqnr: 21.635876   cos_err: 0.010734
 -- model.layers.38                            rfn_err: 0.127712   max_diff/norm: 0.023241   sqnr: 21.348562   cos_err: 0.012312
 -- model.layers.39                            rfn_err: 0.129427   max_diff/norm: 0.023995   sqnr: 21.426755   cos_err: 0.012149
 -- model.layers.40                            rfn_err: 0.131556   max_diff/norm: 0.021078   sqnr: 21.274276   cos_err: 0.012101
 -- model.layers.41                            rfn_err: 0.134928   max_diff/norm: 0.019668   sqnr: 21.173382   cos_err: 0.013130
 -- model.layers.42                            rfn_err: 0.139890   max_diff/norm: 0.023751   sqnr: 20.989811   cos_err: 0.014258
 -- model.layers.43                            rfn_err: 0.141563   max_diff/norm: 0.025637   sqnr: 21.096693   cos_err: 0.013388
 -- model.layers.44                            rfn_err: 0.147833   max_diff/norm: 0.025241   sqnr: 20.853674   cos_err: 0.014561
 -- model.layers.45                            rfn_err: 0.151443   max_diff/norm: 0.028335   sqnr: 20.701290   cos_err: 0.015050
 -- model.layers.46                            rfn_err: 0.151276   max_diff/norm: 0.031591   sqnr: 20.743301   cos_err: 0.014719
 -- model.layers.47                            rfn_err: 0.157034   max_diff/norm: 0.031835   sqnr: 20.584028   cos_err: 0.015708
 -- model.layers.48                            rfn_err: 0.157965   max_diff/norm: 0.031107   sqnr: 20.529386   cos_err: 0.015626
 -- model.layers.49                            rfn_err: 0.159416   max_diff/norm: 0.031311   sqnr: 20.515366   cos_err: 0.016340
 -- model.layers.50                            rfn_err: 0.160044   max_diff/norm: 0.030860   sqnr: 20.441445   cos_err: 0.016615
 -- model.layers.51                            rfn_err: 0.163510   max_diff/norm: 0.031575   sqnr: 20.315686   cos_err: 0.017451
 -- model.layers.52                            rfn_err: 0.163440   max_diff/norm: 0.028570   sqnr: 20.268164   cos_err: 0.017664
 -- model.layers.53                            rfn_err: 0.162917   max_diff/norm: 0.028204   sqnr: 20.162311   cos_err: 0.018130
 -- model.layers.54                            rfn_err: 0.162807   max_diff/norm: 0.027045   sqnr: 20.049632   cos_err: 0.018021
 -- model.layers.55                            rfn_err: 0.164703   max_diff/norm: 0.023875   sqnr: 19.840393   cos_err: 0.018919
 -- model.layers.56                            rfn_err: 0.164629   max_diff/norm: 0.023986   sqnr: 19.686238   cos_err: 0.018869
 -- model.layers.57                            rfn_err: 0.161328   max_diff/norm: 0.021253   sqnr: 19.574260   cos_err: 0.018143
 -- model.layers.58                            rfn_err: 0.153104   max_diff/norm: 0.018375   sqnr: 19.530551   cos_err: 0.016282
 -- model.layers.59                            rfn_err: 0.146190   max_diff/norm: 0.015653   sqnr: 19.507797   cos_err: 0.013920
 -- model.layers.60                            rfn_err: 0.138407   max_diff/norm: 0.010334   sqnr: 19.770672   cos_err: 0.011484
 -- model.layers.61                            rfn_err: 0.130370   max_diff/norm: 0.073108   sqnr: 20.301139   cos_err: 0.011050
 -- model.norm                                 rfn_err: 0.149805   max_diff/norm: 0.007261   sqnr: 20.115299   cos_err: 0.012592
 -- A perplexity:  8.35427106
 -- B perplexity:  8.34981264
 -- A label in top-K:
      K = 1: 0.5595
      K = 2: 0.6799
      K = 3: 0.7387
      K = 4: 0.7748
      K = 5: 0.8003
 -- B label in top-K:
      K = 1: 0.5598
      K = 2: 0.6804
      K = 3: 0.7390
      K = 4: 0.7749
      K = 5: 0.8005
 -- Top-K agreement, A vs B:
      K = 1: 0.9510
      K = 2: 0.8380
      K = 3: 0.6922
      K = 4: 0.5420
      K = 5: 0.4046
 -- KL divergence (A, B):  0.03484128
 -- KL divergence (B, A):  0.03757493