{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8096510404015869, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016193020808031737, "grad_norm": 2590324.25, "learning_rate": 1e-05, "loss": 27.9785, "mean_token_accuracy": 0.10002041235566139, "num_tokens": 1792.0, "step": 1 }, { "epoch": 0.00032386041616063474, "grad_norm": 15321.634765625, "learning_rate": 9.998380829015544e-06, "loss": 12.7953, "mean_token_accuracy": 0.1478351578116417, "num_tokens": 3581.0, "step": 2 }, { "epoch": 0.00048579062424095217, "grad_norm": 8141.26416015625, "learning_rate": 9.99676165803109e-06, "loss": 8.7196, "mean_token_accuracy": 0.3560480624437332, "num_tokens": 5371.0, "step": 3 }, { "epoch": 0.0006477208323212695, "grad_norm": 4501.52099609375, "learning_rate": 9.995142487046633e-06, "loss": 6.0515, "mean_token_accuracy": 0.5588205456733704, "num_tokens": 7164.0, "step": 4 }, { "epoch": 0.0008096510404015869, "grad_norm": 2391.61669921875, "learning_rate": 9.993523316062177e-06, "loss": 4.3382, "mean_token_accuracy": 0.673895925283432, "num_tokens": 8952.0, "step": 5 }, { "epoch": 0.0009715812484819043, "grad_norm": 3996.810546875, "learning_rate": 9.99190414507772e-06, "loss": 3.6032, "mean_token_accuracy": 0.6714812815189362, "num_tokens": 10753.0, "step": 6 }, { "epoch": 0.0011335114565622216, "grad_norm": 1601.2535400390625, "learning_rate": 9.990284974093266e-06, "loss": 2.967, "mean_token_accuracy": 0.7058201134204865, "num_tokens": 12540.0, "step": 7 }, { "epoch": 0.001295441664642539, "grad_norm": 1023.8958740234375, "learning_rate": 9.98866580310881e-06, "loss": 2.5027, "mean_token_accuracy": 0.7814523577690125, "num_tokens": 14331.0, "step": 8 }, { "epoch": 0.0014573718727228565, "grad_norm": 864.677734375, "learning_rate": 9.987046632124353e-06, "loss": 2.2716, "mean_token_accuracy": 0.8148148059844971, "num_tokens": 16118.0, "step": 9 }, { "epoch": 0.0016193020808031738, "grad_norm": 865.9158325195312, "learning_rate": 9.985427461139897e-06, "loss": 2.0112, "mean_token_accuracy": 0.8334065079689026, "num_tokens": 17900.0, "step": 10 }, { "epoch": 0.0017812322888834911, "grad_norm": 959.9893798828125, "learning_rate": 9.983808290155442e-06, "loss": 2.3178, "mean_token_accuracy": 0.8049798905849457, "num_tokens": 19694.0, "step": 11 }, { "epoch": 0.0019431624969638087, "grad_norm": 732.5724487304688, "learning_rate": 9.982189119170985e-06, "loss": 1.9631, "mean_token_accuracy": 0.8315800726413727, "num_tokens": 21479.0, "step": 12 }, { "epoch": 0.002105092705044126, "grad_norm": 734.8389282226562, "learning_rate": 9.980569948186529e-06, "loss": 1.9683, "mean_token_accuracy": 0.8085106313228607, "num_tokens": 23273.0, "step": 13 }, { "epoch": 0.0022670229131244433, "grad_norm": 691.6798706054688, "learning_rate": 9.978950777202073e-06, "loss": 2.0535, "mean_token_accuracy": 0.8219024240970612, "num_tokens": 25065.0, "step": 14 }, { "epoch": 0.002428953121204761, "grad_norm": 534.7596435546875, "learning_rate": 9.977331606217618e-06, "loss": 1.9077, "mean_token_accuracy": 0.837618499994278, "num_tokens": 26848.0, "step": 15 }, { "epoch": 0.002590883329285078, "grad_norm": 492.1035461425781, "learning_rate": 9.975712435233161e-06, "loss": 1.6952, "mean_token_accuracy": 0.8331745564937592, "num_tokens": 28635.0, "step": 16 }, { "epoch": 0.0027528135373653955, "grad_norm": 407.0179138183594, "learning_rate": 9.974093264248705e-06, "loss": 1.612, "mean_token_accuracy": 0.8341322541236877, "num_tokens": 30418.0, "step": 17 }, { "epoch": 0.002914743745445713, "grad_norm": 465.31854248046875, "learning_rate": 9.972474093264249e-06, "loss": 1.804, "mean_token_accuracy": 0.8161197006702423, "num_tokens": 32218.0, "step": 18 }, { "epoch": 0.00307667395352603, "grad_norm": 492.2633361816406, "learning_rate": 9.970854922279794e-06, "loss": 1.8954, "mean_token_accuracy": 0.8170638978481293, "num_tokens": 34020.0, "step": 19 }, { "epoch": 0.0032386041616063476, "grad_norm": 367.78216552734375, "learning_rate": 9.969235751295337e-06, "loss": 1.8947, "mean_token_accuracy": 0.8388837277889252, "num_tokens": 35811.0, "step": 20 }, { "epoch": 0.003400534369686665, "grad_norm": 357.5298156738281, "learning_rate": 9.967616580310881e-06, "loss": 1.7049, "mean_token_accuracy": 0.8480354249477386, "num_tokens": 37592.0, "step": 21 }, { "epoch": 0.0035624645777669823, "grad_norm": 370.7132568359375, "learning_rate": 9.965997409326426e-06, "loss": 1.974, "mean_token_accuracy": 0.8051095008850098, "num_tokens": 39386.0, "step": 22 }, { "epoch": 0.0037243947858473, "grad_norm": 415.8082580566406, "learning_rate": 9.96437823834197e-06, "loss": 1.9174, "mean_token_accuracy": 0.8263655304908752, "num_tokens": 41174.0, "step": 23 }, { "epoch": 0.0038863249939276173, "grad_norm": 400.77783203125, "learning_rate": 9.962759067357514e-06, "loss": 2.4263, "mean_token_accuracy": 0.7937062978744507, "num_tokens": 42972.0, "step": 24 }, { "epoch": 0.004048255202007934, "grad_norm": 316.3494873046875, "learning_rate": 9.961139896373057e-06, "loss": 1.5235, "mean_token_accuracy": 0.8511984050273895, "num_tokens": 44758.0, "step": 25 }, { "epoch": 0.004210185410088252, "grad_norm": 332.2727355957031, "learning_rate": 9.959520725388602e-06, "loss": 1.6433, "mean_token_accuracy": 0.8394160568714142, "num_tokens": 46544.0, "step": 26 }, { "epoch": 0.0043721156181685695, "grad_norm": 324.8083190917969, "learning_rate": 9.957901554404146e-06, "loss": 1.8186, "mean_token_accuracy": 0.8315262198448181, "num_tokens": 48335.0, "step": 27 }, { "epoch": 0.004534045826248887, "grad_norm": 347.6258544921875, "learning_rate": 9.95628238341969e-06, "loss": 1.6425, "mean_token_accuracy": 0.8448051810264587, "num_tokens": 50130.0, "step": 28 }, { "epoch": 0.004695976034329204, "grad_norm": 294.58636474609375, "learning_rate": 9.954663212435233e-06, "loss": 1.5152, "mean_token_accuracy": 0.8535315692424774, "num_tokens": 51915.0, "step": 29 }, { "epoch": 0.004857906242409522, "grad_norm": 372.940185546875, "learning_rate": 9.953044041450778e-06, "loss": 1.9217, "mean_token_accuracy": 0.81324702501297, "num_tokens": 53711.0, "step": 30 }, { "epoch": 0.005019836450489839, "grad_norm": 298.54095458984375, "learning_rate": 9.951424870466322e-06, "loss": 1.6812, "mean_token_accuracy": 0.838798850774765, "num_tokens": 55496.0, "step": 31 }, { "epoch": 0.005181766658570156, "grad_norm": 319.18182373046875, "learning_rate": 9.949805699481866e-06, "loss": 1.6511, "mean_token_accuracy": 0.822818785905838, "num_tokens": 57302.0, "step": 32 }, { "epoch": 0.005343696866650474, "grad_norm": 331.71240234375, "learning_rate": 9.94818652849741e-06, "loss": 1.9189, "mean_token_accuracy": 0.8330687880516052, "num_tokens": 59089.0, "step": 33 }, { "epoch": 0.005505627074730791, "grad_norm": 272.4907531738281, "learning_rate": 9.946567357512955e-06, "loss": 1.4439, "mean_token_accuracy": 0.8442226648330688, "num_tokens": 60877.0, "step": 34 }, { "epoch": 0.005667557282811108, "grad_norm": 219.92578125, "learning_rate": 9.944948186528498e-06, "loss": 1.4685, "mean_token_accuracy": 0.8554767668247223, "num_tokens": 62665.0, "step": 35 }, { "epoch": 0.005829487490891426, "grad_norm": 267.6018981933594, "learning_rate": 9.943329015544042e-06, "loss": 1.628, "mean_token_accuracy": 0.8447502851486206, "num_tokens": 64454.0, "step": 36 }, { "epoch": 0.005991417698971743, "grad_norm": 257.0301818847656, "learning_rate": 9.941709844559585e-06, "loss": 1.6321, "mean_token_accuracy": 0.8432835936546326, "num_tokens": 66234.0, "step": 37 }, { "epoch": 0.00615334790705206, "grad_norm": 255.00665283203125, "learning_rate": 9.94009067357513e-06, "loss": 1.5121, "mean_token_accuracy": 0.8339080214500427, "num_tokens": 68029.0, "step": 38 }, { "epoch": 0.006315278115132378, "grad_norm": 290.82635498046875, "learning_rate": 9.938471502590674e-06, "loss": 1.8195, "mean_token_accuracy": 0.8093774914741516, "num_tokens": 69824.0, "step": 39 }, { "epoch": 0.006477208323212695, "grad_norm": 239.01097106933594, "learning_rate": 9.936852331606218e-06, "loss": 1.49, "mean_token_accuracy": 0.8441810309886932, "num_tokens": 71625.0, "step": 40 }, { "epoch": 0.006639138531293012, "grad_norm": 281.5107421875, "learning_rate": 9.935233160621763e-06, "loss": 1.6659, "mean_token_accuracy": 0.8286873996257782, "num_tokens": 73423.0, "step": 41 }, { "epoch": 0.00680106873937333, "grad_norm": 253.38821411132812, "learning_rate": 9.933613989637307e-06, "loss": 1.6001, "mean_token_accuracy": 0.8374288380146027, "num_tokens": 75218.0, "step": 42 }, { "epoch": 0.006962998947453647, "grad_norm": 260.29718017578125, "learning_rate": 9.93199481865285e-06, "loss": 1.527, "mean_token_accuracy": 0.8415662348270416, "num_tokens": 77014.0, "step": 43 }, { "epoch": 0.0071249291555339645, "grad_norm": 232.60289001464844, "learning_rate": 9.930375647668394e-06, "loss": 1.4487, "mean_token_accuracy": 0.8467153310775757, "num_tokens": 78800.0, "step": 44 }, { "epoch": 0.0072868593636142825, "grad_norm": 232.3682861328125, "learning_rate": 9.928756476683939e-06, "loss": 1.2732, "mean_token_accuracy": 0.865645170211792, "num_tokens": 80587.0, "step": 45 }, { "epoch": 0.0074487895716946, "grad_norm": 226.4539337158203, "learning_rate": 9.927137305699483e-06, "loss": 1.53, "mean_token_accuracy": 0.8327760696411133, "num_tokens": 82380.0, "step": 46 }, { "epoch": 0.007610719779774917, "grad_norm": 223.02755737304688, "learning_rate": 9.925518134715026e-06, "loss": 1.3685, "mean_token_accuracy": 0.8673881590366364, "num_tokens": 84170.0, "step": 47 }, { "epoch": 0.007772649987855235, "grad_norm": 199.50677490234375, "learning_rate": 9.92389896373057e-06, "loss": 1.7148, "mean_token_accuracy": 0.8449667990207672, "num_tokens": 85952.0, "step": 48 }, { "epoch": 0.007934580195935553, "grad_norm": 229.42681884765625, "learning_rate": 9.922279792746115e-06, "loss": 1.5759, "mean_token_accuracy": 0.8278166353702545, "num_tokens": 87742.0, "step": 49 }, { "epoch": 0.008096510404015869, "grad_norm": 218.40028381347656, "learning_rate": 9.920660621761659e-06, "loss": 1.3814, "mean_token_accuracy": 0.8362470865249634, "num_tokens": 89535.0, "step": 50 }, { "epoch": 0.008258440612096187, "grad_norm": 250.39414978027344, "learning_rate": 9.919041450777202e-06, "loss": 1.6344, "mean_token_accuracy": 0.8230555355548859, "num_tokens": 91341.0, "step": 51 }, { "epoch": 0.008420370820176505, "grad_norm": 223.5367889404297, "learning_rate": 9.917422279792746e-06, "loss": 1.651, "mean_token_accuracy": 0.8363562524318695, "num_tokens": 93134.0, "step": 52 }, { "epoch": 0.008582301028256821, "grad_norm": 200.34713745117188, "learning_rate": 9.915803108808291e-06, "loss": 1.5119, "mean_token_accuracy": 0.8532276153564453, "num_tokens": 94932.0, "step": 53 }, { "epoch": 0.008744231236337139, "grad_norm": 188.26747131347656, "learning_rate": 9.914183937823835e-06, "loss": 1.5943, "mean_token_accuracy": 0.8267816007137299, "num_tokens": 96720.0, "step": 54 }, { "epoch": 0.008906161444417457, "grad_norm": 206.5025177001953, "learning_rate": 9.912564766839378e-06, "loss": 1.6023, "mean_token_accuracy": 0.8385719060897827, "num_tokens": 98516.0, "step": 55 }, { "epoch": 0.009068091652497773, "grad_norm": 190.56814575195312, "learning_rate": 9.910945595854922e-06, "loss": 1.5202, "mean_token_accuracy": 0.842502623796463, "num_tokens": 100305.0, "step": 56 }, { "epoch": 0.009230021860578091, "grad_norm": 189.0370330810547, "learning_rate": 9.909326424870467e-06, "loss": 1.4965, "mean_token_accuracy": 0.8248813450336456, "num_tokens": 102091.0, "step": 57 }, { "epoch": 0.009391952068658407, "grad_norm": 188.38626098632812, "learning_rate": 9.90770725388601e-06, "loss": 1.4053, "mean_token_accuracy": 0.8376361727714539, "num_tokens": 103874.0, "step": 58 }, { "epoch": 0.009553882276738725, "grad_norm": 166.5449676513672, "learning_rate": 9.906088082901554e-06, "loss": 1.1694, "mean_token_accuracy": 0.8681569397449493, "num_tokens": 105659.0, "step": 59 }, { "epoch": 0.009715812484819043, "grad_norm": 293.2898254394531, "learning_rate": 9.9044689119171e-06, "loss": 1.6415, "mean_token_accuracy": 0.8272086679935455, "num_tokens": 107453.0, "step": 60 }, { "epoch": 0.00987774269289936, "grad_norm": 192.3681640625, "learning_rate": 9.902849740932643e-06, "loss": 1.5366, "mean_token_accuracy": 0.8456753194332123, "num_tokens": 109237.0, "step": 61 }, { "epoch": 0.010039672900979678, "grad_norm": 203.23016357421875, "learning_rate": 9.901230569948187e-06, "loss": 1.3134, "mean_token_accuracy": 0.8415968716144562, "num_tokens": 111039.0, "step": 62 }, { "epoch": 0.010201603109059995, "grad_norm": 193.1846923828125, "learning_rate": 9.89961139896373e-06, "loss": 1.5576, "mean_token_accuracy": 0.8363747000694275, "num_tokens": 112826.0, "step": 63 }, { "epoch": 0.010363533317140312, "grad_norm": 183.6842498779297, "learning_rate": 9.897992227979276e-06, "loss": 1.2775, "mean_token_accuracy": 0.8560853004455566, "num_tokens": 114616.0, "step": 64 }, { "epoch": 0.01052546352522063, "grad_norm": 166.9985809326172, "learning_rate": 9.89637305699482e-06, "loss": 1.3046, "mean_token_accuracy": 0.8644736707210541, "num_tokens": 116401.0, "step": 65 }, { "epoch": 0.010687393733300948, "grad_norm": 202.93296813964844, "learning_rate": 9.894753886010363e-06, "loss": 1.3966, "mean_token_accuracy": 0.8367460072040558, "num_tokens": 118195.0, "step": 66 }, { "epoch": 0.010849323941381264, "grad_norm": 190.30007934570312, "learning_rate": 9.893134715025907e-06, "loss": 1.2746, "mean_token_accuracy": 0.8372413516044617, "num_tokens": 120002.0, "step": 67 }, { "epoch": 0.011011254149461582, "grad_norm": 195.32217407226562, "learning_rate": 9.891515544041452e-06, "loss": 1.3609, "mean_token_accuracy": 0.850742518901825, "num_tokens": 121796.0, "step": 68 }, { "epoch": 0.0111731843575419, "grad_norm": 188.3912811279297, "learning_rate": 9.889896373056995e-06, "loss": 1.248, "mean_token_accuracy": 0.8554182052612305, "num_tokens": 123585.0, "step": 69 }, { "epoch": 0.011335114565622216, "grad_norm": 194.74266052246094, "learning_rate": 9.888277202072539e-06, "loss": 1.3703, "mean_token_accuracy": 0.8520025014877319, "num_tokens": 125374.0, "step": 70 }, { "epoch": 0.011497044773702534, "grad_norm": 169.6554718017578, "learning_rate": 9.886658031088083e-06, "loss": 1.3225, "mean_token_accuracy": 0.8728920221328735, "num_tokens": 127162.0, "step": 71 }, { "epoch": 0.011658974981782852, "grad_norm": 174.06930541992188, "learning_rate": 9.885038860103628e-06, "loss": 1.2812, "mean_token_accuracy": 0.8392018675804138, "num_tokens": 128954.0, "step": 72 }, { "epoch": 0.011820905189863168, "grad_norm": 195.21556091308594, "learning_rate": 9.883419689119171e-06, "loss": 1.4616, "mean_token_accuracy": 0.8464285731315613, "num_tokens": 130746.0, "step": 73 }, { "epoch": 0.011982835397943486, "grad_norm": 177.80078125, "learning_rate": 9.881800518134715e-06, "loss": 1.2312, "mean_token_accuracy": 0.8633754253387451, "num_tokens": 132535.0, "step": 74 }, { "epoch": 0.012144765606023804, "grad_norm": 176.92259216308594, "learning_rate": 9.880181347150259e-06, "loss": 1.2379, "mean_token_accuracy": 0.8515756130218506, "num_tokens": 134323.0, "step": 75 }, { "epoch": 0.01230669581410412, "grad_norm": 158.85617065429688, "learning_rate": 9.878562176165804e-06, "loss": 1.3788, "mean_token_accuracy": 0.8581402599811554, "num_tokens": 136110.0, "step": 76 }, { "epoch": 0.012468626022184438, "grad_norm": 178.2767333984375, "learning_rate": 9.876943005181348e-06, "loss": 1.4577, "mean_token_accuracy": 0.84085413813591, "num_tokens": 137904.0, "step": 77 }, { "epoch": 0.012630556230264756, "grad_norm": 158.47547912597656, "learning_rate": 9.875323834196891e-06, "loss": 1.3095, "mean_token_accuracy": 0.8543533980846405, "num_tokens": 139690.0, "step": 78 }, { "epoch": 0.012792486438345073, "grad_norm": 203.07867431640625, "learning_rate": 9.873704663212436e-06, "loss": 1.3092, "mean_token_accuracy": 0.8548941910266876, "num_tokens": 141477.0, "step": 79 }, { "epoch": 0.01295441664642539, "grad_norm": 182.68307495117188, "learning_rate": 9.87208549222798e-06, "loss": 1.4291, "mean_token_accuracy": 0.8384424149990082, "num_tokens": 143267.0, "step": 80 }, { "epoch": 0.013116346854505708, "grad_norm": 163.27781677246094, "learning_rate": 9.870466321243524e-06, "loss": 1.6462, "mean_token_accuracy": 0.8403182923793793, "num_tokens": 145067.0, "step": 81 }, { "epoch": 0.013278277062586025, "grad_norm": 132.82940673828125, "learning_rate": 9.868847150259067e-06, "loss": 1.3092, "mean_token_accuracy": 0.8508674502372742, "num_tokens": 146854.0, "step": 82 }, { "epoch": 0.013440207270666343, "grad_norm": 178.1233673095703, "learning_rate": 9.867227979274612e-06, "loss": 1.5437, "mean_token_accuracy": 0.8374050855636597, "num_tokens": 148649.0, "step": 83 }, { "epoch": 0.01360213747874666, "grad_norm": 147.5355682373047, "learning_rate": 9.865608808290156e-06, "loss": 1.3282, "mean_token_accuracy": 0.850056529045105, "num_tokens": 150440.0, "step": 84 }, { "epoch": 0.013764067686826977, "grad_norm": 180.29693603515625, "learning_rate": 9.8639896373057e-06, "loss": 1.5092, "mean_token_accuracy": 0.8365455865859985, "num_tokens": 152246.0, "step": 85 }, { "epoch": 0.013925997894907295, "grad_norm": 186.56275939941406, "learning_rate": 9.862370466321243e-06, "loss": 1.2075, "mean_token_accuracy": 0.8505065739154816, "num_tokens": 154039.0, "step": 86 }, { "epoch": 0.014087928102987613, "grad_norm": 169.25901794433594, "learning_rate": 9.860751295336788e-06, "loss": 1.5819, "mean_token_accuracy": 0.8284255862236023, "num_tokens": 155848.0, "step": 87 }, { "epoch": 0.014249858311067929, "grad_norm": 154.59115600585938, "learning_rate": 9.859132124352332e-06, "loss": 1.3555, "mean_token_accuracy": 0.840780645608902, "num_tokens": 157649.0, "step": 88 }, { "epoch": 0.014411788519148247, "grad_norm": 131.11549377441406, "learning_rate": 9.857512953367876e-06, "loss": 1.1094, "mean_token_accuracy": 0.8674995005130768, "num_tokens": 159444.0, "step": 89 }, { "epoch": 0.014573718727228565, "grad_norm": 140.81982421875, "learning_rate": 9.85589378238342e-06, "loss": 1.242, "mean_token_accuracy": 0.8588644564151764, "num_tokens": 161238.0, "step": 90 }, { "epoch": 0.014735648935308881, "grad_norm": 126.2032470703125, "learning_rate": 9.854274611398965e-06, "loss": 1.1502, "mean_token_accuracy": 0.8686131536960602, "num_tokens": 163024.0, "step": 91 }, { "epoch": 0.0148975791433892, "grad_norm": 111.11841583251953, "learning_rate": 9.852655440414508e-06, "loss": 1.01, "mean_token_accuracy": 0.8772802650928497, "num_tokens": 164805.0, "step": 92 }, { "epoch": 0.015059509351469517, "grad_norm": 139.14724731445312, "learning_rate": 9.851036269430052e-06, "loss": 0.9732, "mean_token_accuracy": 0.8784445524215698, "num_tokens": 166597.0, "step": 93 }, { "epoch": 0.015221439559549833, "grad_norm": 143.22569274902344, "learning_rate": 9.849417098445595e-06, "loss": 1.299, "mean_token_accuracy": 0.8621516525745392, "num_tokens": 168392.0, "step": 94 }, { "epoch": 0.015383369767630151, "grad_norm": 166.4271697998047, "learning_rate": 9.84779792746114e-06, "loss": 1.4071, "mean_token_accuracy": 0.8573804497718811, "num_tokens": 170182.0, "step": 95 }, { "epoch": 0.01554529997571047, "grad_norm": 152.1414031982422, "learning_rate": 9.846178756476684e-06, "loss": 1.3104, "mean_token_accuracy": 0.8578595221042633, "num_tokens": 171975.0, "step": 96 }, { "epoch": 0.015707230183790787, "grad_norm": 157.1289825439453, "learning_rate": 9.844559585492228e-06, "loss": 1.6479, "mean_token_accuracy": 0.8411366045475006, "num_tokens": 173764.0, "step": 97 }, { "epoch": 0.015869160391871105, "grad_norm": 143.55955505371094, "learning_rate": 9.842940414507773e-06, "loss": 1.1203, "mean_token_accuracy": 0.8773466944694519, "num_tokens": 175553.0, "step": 98 }, { "epoch": 0.01603109059995142, "grad_norm": 153.3444061279297, "learning_rate": 9.841321243523317e-06, "loss": 1.144, "mean_token_accuracy": 0.8682743906974792, "num_tokens": 177346.0, "step": 99 }, { "epoch": 0.016193020808031738, "grad_norm": 141.318115234375, "learning_rate": 9.839702072538862e-06, "loss": 1.268, "mean_token_accuracy": 0.850713849067688, "num_tokens": 179146.0, "step": 100 }, { "epoch": 0.016354951016112056, "grad_norm": 226.34141540527344, "learning_rate": 9.838082901554406e-06, "loss": 1.4913, "mean_token_accuracy": 0.8339845538139343, "num_tokens": 180953.0, "step": 101 }, { "epoch": 0.016516881224192374, "grad_norm": 149.2474822998047, "learning_rate": 9.836463730569949e-06, "loss": 1.3925, "mean_token_accuracy": 0.8564562499523163, "num_tokens": 182744.0, "step": 102 }, { "epoch": 0.01667881143227269, "grad_norm": 156.7301025390625, "learning_rate": 9.834844559585494e-06, "loss": 1.265, "mean_token_accuracy": 0.8668636083602905, "num_tokens": 184534.0, "step": 103 }, { "epoch": 0.01684074164035301, "grad_norm": 126.90946197509766, "learning_rate": 9.833225388601038e-06, "loss": 1.4091, "mean_token_accuracy": 0.8362042903900146, "num_tokens": 186327.0, "step": 104 }, { "epoch": 0.017002671848433324, "grad_norm": 142.30873107910156, "learning_rate": 9.831606217616582e-06, "loss": 1.3331, "mean_token_accuracy": 0.8343567252159119, "num_tokens": 188129.0, "step": 105 }, { "epoch": 0.017164602056513642, "grad_norm": 140.3267059326172, "learning_rate": 9.829987046632125e-06, "loss": 1.3125, "mean_token_accuracy": 0.8535894751548767, "num_tokens": 189921.0, "step": 106 }, { "epoch": 0.01732653226459396, "grad_norm": 134.98789978027344, "learning_rate": 9.82836787564767e-06, "loss": 1.2795, "mean_token_accuracy": 0.8522522449493408, "num_tokens": 191716.0, "step": 107 }, { "epoch": 0.017488462472674278, "grad_norm": 145.98971557617188, "learning_rate": 9.826748704663214e-06, "loss": 1.4273, "mean_token_accuracy": 0.8379662036895752, "num_tokens": 193511.0, "step": 108 }, { "epoch": 0.017650392680754596, "grad_norm": 135.8827362060547, "learning_rate": 9.825129533678758e-06, "loss": 1.2056, "mean_token_accuracy": 0.8697141110897064, "num_tokens": 195307.0, "step": 109 }, { "epoch": 0.017812322888834914, "grad_norm": 121.84761810302734, "learning_rate": 9.823510362694301e-06, "loss": 1.1506, "mean_token_accuracy": 0.8648231029510498, "num_tokens": 197093.0, "step": 110 }, { "epoch": 0.01797425309691523, "grad_norm": 148.14280700683594, "learning_rate": 9.821891191709846e-06, "loss": 1.5437, "mean_token_accuracy": 0.84389927983284, "num_tokens": 198893.0, "step": 111 }, { "epoch": 0.018136183304995546, "grad_norm": 131.35768127441406, "learning_rate": 9.82027202072539e-06, "loss": 1.4677, "mean_token_accuracy": 0.835106372833252, "num_tokens": 200690.0, "step": 112 }, { "epoch": 0.018298113513075864, "grad_norm": 129.72560119628906, "learning_rate": 9.818652849740934e-06, "loss": 1.0861, "mean_token_accuracy": 0.8749391734600067, "num_tokens": 202474.0, "step": 113 }, { "epoch": 0.018460043721156182, "grad_norm": 119.53143310546875, "learning_rate": 9.817033678756477e-06, "loss": 1.2916, "mean_token_accuracy": 0.8645299077033997, "num_tokens": 204251.0, "step": 114 }, { "epoch": 0.0186219739292365, "grad_norm": 128.28497314453125, "learning_rate": 9.815414507772023e-06, "loss": 1.4726, "mean_token_accuracy": 0.8503647744655609, "num_tokens": 206044.0, "step": 115 }, { "epoch": 0.018783904137316815, "grad_norm": 147.23959350585938, "learning_rate": 9.813795336787566e-06, "loss": 1.7437, "mean_token_accuracy": 0.8233158588409424, "num_tokens": 207838.0, "step": 116 }, { "epoch": 0.018945834345397133, "grad_norm": 139.5876007080078, "learning_rate": 9.81217616580311e-06, "loss": 1.2578, "mean_token_accuracy": 0.8539618849754333, "num_tokens": 209631.0, "step": 117 }, { "epoch": 0.01910776455347745, "grad_norm": 118.18374633789062, "learning_rate": 9.810556994818653e-06, "loss": 0.9251, "mean_token_accuracy": 0.8750790357589722, "num_tokens": 211419.0, "step": 118 }, { "epoch": 0.01926969476155777, "grad_norm": 163.70236206054688, "learning_rate": 9.808937823834199e-06, "loss": 1.333, "mean_token_accuracy": 0.8524993658065796, "num_tokens": 213209.0, "step": 119 }, { "epoch": 0.019431624969638087, "grad_norm": 178.47259521484375, "learning_rate": 9.807318652849742e-06, "loss": 1.4292, "mean_token_accuracy": 0.842380940914154, "num_tokens": 215011.0, "step": 120 }, { "epoch": 0.019593555177718405, "grad_norm": 102.3499755859375, "learning_rate": 9.805699481865286e-06, "loss": 0.827, "mean_token_accuracy": 0.8908371031284332, "num_tokens": 216789.0, "step": 121 }, { "epoch": 0.01975548538579872, "grad_norm": 157.03076171875, "learning_rate": 9.804080310880831e-06, "loss": 1.6988, "mean_token_accuracy": 0.8290434181690216, "num_tokens": 218590.0, "step": 122 }, { "epoch": 0.019917415593879037, "grad_norm": 133.3473358154297, "learning_rate": 9.802461139896375e-06, "loss": 1.2202, "mean_token_accuracy": 0.8512350916862488, "num_tokens": 220384.0, "step": 123 }, { "epoch": 0.020079345801959355, "grad_norm": 117.95792388916016, "learning_rate": 9.800841968911918e-06, "loss": 1.2215, "mean_token_accuracy": 0.8653438985347748, "num_tokens": 222171.0, "step": 124 }, { "epoch": 0.020241276010039673, "grad_norm": 115.05699920654297, "learning_rate": 9.799222797927462e-06, "loss": 1.2581, "mean_token_accuracy": 0.8516252934932709, "num_tokens": 223959.0, "step": 125 }, { "epoch": 0.02040320621811999, "grad_norm": 124.35389709472656, "learning_rate": 9.797603626943007e-06, "loss": 1.2404, "mean_token_accuracy": 0.8611111044883728, "num_tokens": 225744.0, "step": 126 }, { "epoch": 0.02056513642620031, "grad_norm": 111.64168548583984, "learning_rate": 9.79598445595855e-06, "loss": 1.1267, "mean_token_accuracy": 0.870034396648407, "num_tokens": 227533.0, "step": 127 }, { "epoch": 0.020727066634280623, "grad_norm": 110.30130004882812, "learning_rate": 9.794365284974094e-06, "loss": 1.0383, "mean_token_accuracy": 0.8764550089836121, "num_tokens": 229320.0, "step": 128 }, { "epoch": 0.02088899684236094, "grad_norm": 136.05819702148438, "learning_rate": 9.792746113989638e-06, "loss": 1.342, "mean_token_accuracy": 0.8593847751617432, "num_tokens": 231109.0, "step": 129 }, { "epoch": 0.02105092705044126, "grad_norm": 155.8380126953125, "learning_rate": 9.791126943005183e-06, "loss": 1.3679, "mean_token_accuracy": 0.8501408398151398, "num_tokens": 232913.0, "step": 130 }, { "epoch": 0.021212857258521577, "grad_norm": 112.73963928222656, "learning_rate": 9.789507772020727e-06, "loss": 1.1217, "mean_token_accuracy": 0.87177973985672, "num_tokens": 234698.0, "step": 131 }, { "epoch": 0.021374787466601895, "grad_norm": 128.66358947753906, "learning_rate": 9.78788860103627e-06, "loss": 1.2164, "mean_token_accuracy": 0.8356244564056396, "num_tokens": 236484.0, "step": 132 }, { "epoch": 0.021536717674682213, "grad_norm": 122.71953582763672, "learning_rate": 9.786269430051814e-06, "loss": 1.1271, "mean_token_accuracy": 0.8730820715427399, "num_tokens": 238280.0, "step": 133 }, { "epoch": 0.021698647882762528, "grad_norm": 119.80425262451172, "learning_rate": 9.78465025906736e-06, "loss": 1.1418, "mean_token_accuracy": 0.8698275983333588, "num_tokens": 240077.0, "step": 134 }, { "epoch": 0.021860578090842846, "grad_norm": 123.12356567382812, "learning_rate": 9.783031088082903e-06, "loss": 1.1351, "mean_token_accuracy": 0.8713235259056091, "num_tokens": 241861.0, "step": 135 }, { "epoch": 0.022022508298923164, "grad_norm": 137.73861694335938, "learning_rate": 9.781411917098446e-06, "loss": 1.2338, "mean_token_accuracy": 0.8567132651805878, "num_tokens": 243652.0, "step": 136 }, { "epoch": 0.02218443850700348, "grad_norm": 116.47329711914062, "learning_rate": 9.77979274611399e-06, "loss": 1.1096, "mean_token_accuracy": 0.8680764138698578, "num_tokens": 245437.0, "step": 137 }, { "epoch": 0.0223463687150838, "grad_norm": 125.22412109375, "learning_rate": 9.778173575129535e-06, "loss": 1.3607, "mean_token_accuracy": 0.8363218009471893, "num_tokens": 247224.0, "step": 138 }, { "epoch": 0.022508298923164118, "grad_norm": 129.5410919189453, "learning_rate": 9.776554404145079e-06, "loss": 1.4095, "mean_token_accuracy": 0.8447043597698212, "num_tokens": 249026.0, "step": 139 }, { "epoch": 0.022670229131244432, "grad_norm": 134.6291961669922, "learning_rate": 9.774935233160622e-06, "loss": 1.5326, "mean_token_accuracy": 0.8500875234603882, "num_tokens": 250825.0, "step": 140 }, { "epoch": 0.02283215933932475, "grad_norm": 147.4597930908203, "learning_rate": 9.773316062176168e-06, "loss": 1.2712, "mean_token_accuracy": 0.8485411107540131, "num_tokens": 252627.0, "step": 141 }, { "epoch": 0.022994089547405068, "grad_norm": 132.6239471435547, "learning_rate": 9.771696891191711e-06, "loss": 1.3977, "mean_token_accuracy": 0.8432987034320831, "num_tokens": 254413.0, "step": 142 }, { "epoch": 0.023156019755485386, "grad_norm": 126.1785659790039, "learning_rate": 9.770077720207255e-06, "loss": 1.3923, "mean_token_accuracy": 0.8496575355529785, "num_tokens": 256211.0, "step": 143 }, { "epoch": 0.023317949963565704, "grad_norm": 123.70172882080078, "learning_rate": 9.768458549222798e-06, "loss": 1.4467, "mean_token_accuracy": 0.8444103002548218, "num_tokens": 258012.0, "step": 144 }, { "epoch": 0.023479880171646022, "grad_norm": 109.92925262451172, "learning_rate": 9.766839378238344e-06, "loss": 1.0699, "mean_token_accuracy": 0.8661601543426514, "num_tokens": 259800.0, "step": 145 }, { "epoch": 0.023641810379726336, "grad_norm": 104.02082824707031, "learning_rate": 9.765220207253887e-06, "loss": 1.1055, "mean_token_accuracy": 0.8659451305866241, "num_tokens": 261594.0, "step": 146 }, { "epoch": 0.023803740587806654, "grad_norm": 115.35977172851562, "learning_rate": 9.763601036269431e-06, "loss": 1.1863, "mean_token_accuracy": 0.8485915660858154, "num_tokens": 263390.0, "step": 147 }, { "epoch": 0.023965670795886972, "grad_norm": 98.9872055053711, "learning_rate": 9.761981865284975e-06, "loss": 1.0046, "mean_token_accuracy": 0.8793355226516724, "num_tokens": 265184.0, "step": 148 }, { "epoch": 0.02412760100396729, "grad_norm": 127.40291595458984, "learning_rate": 9.76036269430052e-06, "loss": 1.2655, "mean_token_accuracy": 0.8649792373180389, "num_tokens": 266977.0, "step": 149 }, { "epoch": 0.024289531212047608, "grad_norm": 131.8263702392578, "learning_rate": 9.758743523316063e-06, "loss": 1.3194, "mean_token_accuracy": 0.8503649830818176, "num_tokens": 268763.0, "step": 150 }, { "epoch": 0.024451461420127926, "grad_norm": 116.09439849853516, "learning_rate": 9.757124352331607e-06, "loss": 1.1478, "mean_token_accuracy": 0.8628665804862976, "num_tokens": 270566.0, "step": 151 }, { "epoch": 0.02461339162820824, "grad_norm": 117.9870376586914, "learning_rate": 9.75550518134715e-06, "loss": 1.3406, "mean_token_accuracy": 0.8555416464805603, "num_tokens": 272355.0, "step": 152 }, { "epoch": 0.02477532183628856, "grad_norm": 104.34040069580078, "learning_rate": 9.753886010362696e-06, "loss": 1.0825, "mean_token_accuracy": 0.8654018640518188, "num_tokens": 274134.0, "step": 153 }, { "epoch": 0.024937252044368877, "grad_norm": 109.73971557617188, "learning_rate": 9.75226683937824e-06, "loss": 1.1913, "mean_token_accuracy": 0.8550039529800415, "num_tokens": 275922.0, "step": 154 }, { "epoch": 0.025099182252449195, "grad_norm": 120.62830352783203, "learning_rate": 9.750647668393783e-06, "loss": 1.0767, "mean_token_accuracy": 0.8692438304424286, "num_tokens": 277717.0, "step": 155 }, { "epoch": 0.025261112460529513, "grad_norm": 119.07559967041016, "learning_rate": 9.749028497409327e-06, "loss": 1.3981, "mean_token_accuracy": 0.8505167663097382, "num_tokens": 279510.0, "step": 156 }, { "epoch": 0.02542304266860983, "grad_norm": 111.41227722167969, "learning_rate": 9.747409326424872e-06, "loss": 1.0012, "mean_token_accuracy": 0.8773983418941498, "num_tokens": 281299.0, "step": 157 }, { "epoch": 0.025584972876690145, "grad_norm": 126.73580932617188, "learning_rate": 9.745790155440416e-06, "loss": 1.413, "mean_token_accuracy": 0.837442934513092, "num_tokens": 283092.0, "step": 158 }, { "epoch": 0.025746903084770463, "grad_norm": 126.39917755126953, "learning_rate": 9.744170984455959e-06, "loss": 1.2451, "mean_token_accuracy": 0.8299241065979004, "num_tokens": 284898.0, "step": 159 }, { "epoch": 0.02590883329285078, "grad_norm": 120.39822387695312, "learning_rate": 9.742551813471504e-06, "loss": 1.3836, "mean_token_accuracy": 0.8522522449493408, "num_tokens": 286693.0, "step": 160 }, { "epoch": 0.0260707635009311, "grad_norm": 115.48530578613281, "learning_rate": 9.740932642487048e-06, "loss": 1.073, "mean_token_accuracy": 0.8645526766777039, "num_tokens": 288493.0, "step": 161 }, { "epoch": 0.026232693709011417, "grad_norm": 122.31940460205078, "learning_rate": 9.739313471502592e-06, "loss": 1.3647, "mean_token_accuracy": 0.8451739847660065, "num_tokens": 290283.0, "step": 162 }, { "epoch": 0.026394623917091735, "grad_norm": 92.15382385253906, "learning_rate": 9.737694300518135e-06, "loss": 1.0321, "mean_token_accuracy": 0.8815624713897705, "num_tokens": 292072.0, "step": 163 }, { "epoch": 0.02655655412517205, "grad_norm": 92.47595977783203, "learning_rate": 9.73607512953368e-06, "loss": 1.1039, "mean_token_accuracy": 0.8732142746448517, "num_tokens": 293868.0, "step": 164 }, { "epoch": 0.026718484333252367, "grad_norm": 114.09842681884766, "learning_rate": 9.734455958549224e-06, "loss": 1.2642, "mean_token_accuracy": 0.839160829782486, "num_tokens": 295666.0, "step": 165 }, { "epoch": 0.026880414541332685, "grad_norm": 100.58135986328125, "learning_rate": 9.732836787564768e-06, "loss": 1.2702, "mean_token_accuracy": 0.8551343381404877, "num_tokens": 297461.0, "step": 166 }, { "epoch": 0.027042344749413003, "grad_norm": 117.5189208984375, "learning_rate": 9.731217616580311e-06, "loss": 1.2679, "mean_token_accuracy": 0.8572303652763367, "num_tokens": 299253.0, "step": 167 }, { "epoch": 0.02720427495749332, "grad_norm": 103.39527893066406, "learning_rate": 9.729598445595857e-06, "loss": 1.2223, "mean_token_accuracy": 0.8658588528633118, "num_tokens": 301056.0, "step": 168 }, { "epoch": 0.02736620516557364, "grad_norm": 129.08644104003906, "learning_rate": 9.7279792746114e-06, "loss": 1.3509, "mean_token_accuracy": 0.8439637720584869, "num_tokens": 302850.0, "step": 169 }, { "epoch": 0.027528135373653954, "grad_norm": 114.4383544921875, "learning_rate": 9.726360103626944e-06, "loss": 1.2728, "mean_token_accuracy": 0.8337662220001221, "num_tokens": 304656.0, "step": 170 }, { "epoch": 0.02769006558173427, "grad_norm": 90.71302032470703, "learning_rate": 9.724740932642487e-06, "loss": 1.1357, "mean_token_accuracy": 0.8639854788780212, "num_tokens": 306440.0, "step": 171 }, { "epoch": 0.02785199578981459, "grad_norm": 116.95759582519531, "learning_rate": 9.723121761658033e-06, "loss": 1.0394, "mean_token_accuracy": 0.8832117021083832, "num_tokens": 308226.0, "step": 172 }, { "epoch": 0.028013925997894908, "grad_norm": 150.5602264404297, "learning_rate": 9.721502590673576e-06, "loss": 1.2547, "mean_token_accuracy": 0.8491332530975342, "num_tokens": 310023.0, "step": 173 }, { "epoch": 0.028175856205975226, "grad_norm": 92.75274658203125, "learning_rate": 9.71988341968912e-06, "loss": 1.0546, "mean_token_accuracy": 0.8627997934818268, "num_tokens": 311812.0, "step": 174 }, { "epoch": 0.028337786414055544, "grad_norm": 82.1319808959961, "learning_rate": 9.718264248704663e-06, "loss": 0.7956, "mean_token_accuracy": 0.9042253494262695, "num_tokens": 313596.0, "step": 175 }, { "epoch": 0.028499716622135858, "grad_norm": 119.94003295898438, "learning_rate": 9.716645077720209e-06, "loss": 1.1073, "mean_token_accuracy": 0.8617897927761078, "num_tokens": 315397.0, "step": 176 }, { "epoch": 0.028661646830216176, "grad_norm": 121.94509887695312, "learning_rate": 9.715025906735752e-06, "loss": 0.9215, "mean_token_accuracy": 0.8742299377918243, "num_tokens": 317187.0, "step": 177 }, { "epoch": 0.028823577038296494, "grad_norm": 93.1749267578125, "learning_rate": 9.713406735751296e-06, "loss": 0.8583, "mean_token_accuracy": 0.8875661194324493, "num_tokens": 318974.0, "step": 178 }, { "epoch": 0.028985507246376812, "grad_norm": 97.17940521240234, "learning_rate": 9.711787564766841e-06, "loss": 1.1198, "mean_token_accuracy": 0.8783540725708008, "num_tokens": 320765.0, "step": 179 }, { "epoch": 0.02914743745445713, "grad_norm": 131.29702758789062, "learning_rate": 9.710168393782385e-06, "loss": 1.5697, "mean_token_accuracy": 0.8433793485164642, "num_tokens": 322564.0, "step": 180 }, { "epoch": 0.029309367662537448, "grad_norm": 119.54278564453125, "learning_rate": 9.708549222797928e-06, "loss": 1.5281, "mean_token_accuracy": 0.8380559682846069, "num_tokens": 324359.0, "step": 181 }, { "epoch": 0.029471297870617762, "grad_norm": 97.37931060791016, "learning_rate": 9.706930051813472e-06, "loss": 0.8184, "mean_token_accuracy": 0.8845532238483429, "num_tokens": 326148.0, "step": 182 }, { "epoch": 0.02963322807869808, "grad_norm": 92.8282470703125, "learning_rate": 9.705310880829017e-06, "loss": 1.3343, "mean_token_accuracy": 0.8527897298336029, "num_tokens": 327943.0, "step": 183 }, { "epoch": 0.0297951582867784, "grad_norm": 105.1593017578125, "learning_rate": 9.70369170984456e-06, "loss": 1.1945, "mean_token_accuracy": 0.857710987329483, "num_tokens": 329736.0, "step": 184 }, { "epoch": 0.029957088494858716, "grad_norm": 103.52218627929688, "learning_rate": 9.702072538860104e-06, "loss": 1.1561, "mean_token_accuracy": 0.8648359179496765, "num_tokens": 331528.0, "step": 185 }, { "epoch": 0.030119018702939034, "grad_norm": 106.11717987060547, "learning_rate": 9.700453367875648e-06, "loss": 1.2645, "mean_token_accuracy": 0.8563829660415649, "num_tokens": 333325.0, "step": 186 }, { "epoch": 0.030280948911019352, "grad_norm": 102.07538604736328, "learning_rate": 9.698834196891193e-06, "loss": 1.165, "mean_token_accuracy": 0.8626444339752197, "num_tokens": 335121.0, "step": 187 }, { "epoch": 0.030442879119099667, "grad_norm": 103.69478607177734, "learning_rate": 9.697215025906737e-06, "loss": 1.0421, "mean_token_accuracy": 0.8777984976768494, "num_tokens": 336911.0, "step": 188 }, { "epoch": 0.030604809327179985, "grad_norm": 102.62185668945312, "learning_rate": 9.69559585492228e-06, "loss": 1.1987, "mean_token_accuracy": 0.8626984059810638, "num_tokens": 338707.0, "step": 189 }, { "epoch": 0.030766739535260303, "grad_norm": 86.39649963378906, "learning_rate": 9.693976683937824e-06, "loss": 1.0547, "mean_token_accuracy": 0.8647922873497009, "num_tokens": 340500.0, "step": 190 }, { "epoch": 0.03092866974334062, "grad_norm": 95.78366088867188, "learning_rate": 9.69235751295337e-06, "loss": 1.2656, "mean_token_accuracy": 0.8473006188869476, "num_tokens": 342300.0, "step": 191 }, { "epoch": 0.03109059995142094, "grad_norm": 102.48013305664062, "learning_rate": 9.690738341968913e-06, "loss": 1.1197, "mean_token_accuracy": 0.8623949587345123, "num_tokens": 344081.0, "step": 192 }, { "epoch": 0.03125253015950125, "grad_norm": 93.49293518066406, "learning_rate": 9.689119170984456e-06, "loss": 0.9591, "mean_token_accuracy": 0.879696249961853, "num_tokens": 345867.0, "step": 193 }, { "epoch": 0.031414460367581575, "grad_norm": 79.96947479248047, "learning_rate": 9.6875e-06, "loss": 0.9843, "mean_token_accuracy": 0.894948273897171, "num_tokens": 347655.0, "step": 194 }, { "epoch": 0.03157639057566189, "grad_norm": 70.88719177246094, "learning_rate": 9.685880829015545e-06, "loss": 0.9235, "mean_token_accuracy": 0.879696249961853, "num_tokens": 349441.0, "step": 195 }, { "epoch": 0.03173832078374221, "grad_norm": 94.55944061279297, "learning_rate": 9.684261658031089e-06, "loss": 1.2195, "mean_token_accuracy": 0.8540273606777191, "num_tokens": 351234.0, "step": 196 }, { "epoch": 0.031900250991822525, "grad_norm": 80.9522705078125, "learning_rate": 9.682642487046632e-06, "loss": 0.8758, "mean_token_accuracy": 0.893457680940628, "num_tokens": 353018.0, "step": 197 }, { "epoch": 0.03206218119990284, "grad_norm": 121.10845947265625, "learning_rate": 9.681023316062178e-06, "loss": 1.218, "mean_token_accuracy": 0.8588652610778809, "num_tokens": 354806.0, "step": 198 }, { "epoch": 0.03222411140798316, "grad_norm": 89.33942413330078, "learning_rate": 9.679404145077721e-06, "loss": 1.0018, "mean_token_accuracy": 0.8747301995754242, "num_tokens": 356597.0, "step": 199 }, { "epoch": 0.032386041616063475, "grad_norm": 76.56890106201172, "learning_rate": 9.677784974093265e-06, "loss": 0.9992, "mean_token_accuracy": 0.880248099565506, "num_tokens": 358376.0, "step": 200 }, { "epoch": 0.0325479718241438, "grad_norm": 98.30876922607422, "learning_rate": 9.676165803108809e-06, "loss": 1.3148, "mean_token_accuracy": 0.8574938774108887, "num_tokens": 360168.0, "step": 201 }, { "epoch": 0.03270990203222411, "grad_norm": 92.54721069335938, "learning_rate": 9.674546632124354e-06, "loss": 1.3033, "mean_token_accuracy": 0.8603916168212891, "num_tokens": 361959.0, "step": 202 }, { "epoch": 0.032871832240304426, "grad_norm": 69.83611297607422, "learning_rate": 9.672927461139897e-06, "loss": 0.7332, "mean_token_accuracy": 0.899563193321228, "num_tokens": 363750.0, "step": 203 }, { "epoch": 0.03303376244838475, "grad_norm": 79.83929443359375, "learning_rate": 9.671308290155441e-06, "loss": 0.9168, "mean_token_accuracy": 0.8892720341682434, "num_tokens": 365542.0, "step": 204 }, { "epoch": 0.03319569265646506, "grad_norm": 104.99756622314453, "learning_rate": 9.669689119170985e-06, "loss": 1.2795, "mean_token_accuracy": 0.8637835085391998, "num_tokens": 367333.0, "step": 205 }, { "epoch": 0.03335762286454538, "grad_norm": 78.70339965820312, "learning_rate": 9.66806994818653e-06, "loss": 1.0603, "mean_token_accuracy": 0.8583677411079407, "num_tokens": 369120.0, "step": 206 }, { "epoch": 0.0335195530726257, "grad_norm": 99.41761779785156, "learning_rate": 9.666450777202073e-06, "loss": 1.3328, "mean_token_accuracy": 0.8484255969524384, "num_tokens": 370909.0, "step": 207 }, { "epoch": 0.03368148328070602, "grad_norm": 72.52552795410156, "learning_rate": 9.664831606217617e-06, "loss": 1.0031, "mean_token_accuracy": 0.890016108751297, "num_tokens": 372694.0, "step": 208 }, { "epoch": 0.033843413488786334, "grad_norm": 78.51555633544922, "learning_rate": 9.66321243523316e-06, "loss": 0.8971, "mean_token_accuracy": 0.8875661194324493, "num_tokens": 374481.0, "step": 209 }, { "epoch": 0.03400534369686665, "grad_norm": 92.80706024169922, "learning_rate": 9.661593264248706e-06, "loss": 1.0469, "mean_token_accuracy": 0.8640248775482178, "num_tokens": 376280.0, "step": 210 }, { "epoch": 0.03416727390494697, "grad_norm": 98.85427856445312, "learning_rate": 9.65997409326425e-06, "loss": 0.9937, "mean_token_accuracy": 0.8770212829113007, "num_tokens": 378083.0, "step": 211 }, { "epoch": 0.034329204113027284, "grad_norm": 99.2446060180664, "learning_rate": 9.658354922279793e-06, "loss": 1.0836, "mean_token_accuracy": 0.854392796754837, "num_tokens": 379883.0, "step": 212 }, { "epoch": 0.034491134321107605, "grad_norm": 92.38343811035156, "learning_rate": 9.656735751295337e-06, "loss": 1.0155, "mean_token_accuracy": 0.8636363744735718, "num_tokens": 381681.0, "step": 213 }, { "epoch": 0.03465306452918792, "grad_norm": 87.25627899169922, "learning_rate": 9.655116580310882e-06, "loss": 1.114, "mean_token_accuracy": 0.8650175333023071, "num_tokens": 383480.0, "step": 214 }, { "epoch": 0.034814994737268234, "grad_norm": 78.69329833984375, "learning_rate": 9.653497409326426e-06, "loss": 0.9398, "mean_token_accuracy": 0.8893114328384399, "num_tokens": 385272.0, "step": 215 }, { "epoch": 0.034976924945348556, "grad_norm": 86.92837524414062, "learning_rate": 9.651878238341969e-06, "loss": 0.9516, "mean_token_accuracy": 0.8828863203525543, "num_tokens": 387064.0, "step": 216 }, { "epoch": 0.03513885515342887, "grad_norm": 73.95407104492188, "learning_rate": 9.650259067357514e-06, "loss": 0.9834, "mean_token_accuracy": 0.8853031992912292, "num_tokens": 388855.0, "step": 217 }, { "epoch": 0.03530078536150919, "grad_norm": 80.76441192626953, "learning_rate": 9.648639896373058e-06, "loss": 0.8365, "mean_token_accuracy": 0.8897058963775635, "num_tokens": 390639.0, "step": 218 }, { "epoch": 0.035462715569589506, "grad_norm": 101.61139678955078, "learning_rate": 9.647020725388602e-06, "loss": 1.4082, "mean_token_accuracy": 0.8655229806900024, "num_tokens": 392433.0, "step": 219 }, { "epoch": 0.03562464577766983, "grad_norm": 81.81256103515625, "learning_rate": 9.645401554404145e-06, "loss": 1.2368, "mean_token_accuracy": 0.8521648347377777, "num_tokens": 394222.0, "step": 220 }, { "epoch": 0.03578657598575014, "grad_norm": 85.47928619384766, "learning_rate": 9.64378238341969e-06, "loss": 1.0927, "mean_token_accuracy": 0.8652519881725311, "num_tokens": 396009.0, "step": 221 }, { "epoch": 0.03594850619383046, "grad_norm": 93.98357391357422, "learning_rate": 9.642163212435234e-06, "loss": 1.0849, "mean_token_accuracy": 0.8631469905376434, "num_tokens": 397799.0, "step": 222 }, { "epoch": 0.03611043640191078, "grad_norm": 89.43865966796875, "learning_rate": 9.640544041450778e-06, "loss": 1.1659, "mean_token_accuracy": 0.86446213722229, "num_tokens": 399591.0, "step": 223 }, { "epoch": 0.03627236660999109, "grad_norm": 77.89927673339844, "learning_rate": 9.638924870466321e-06, "loss": 0.7824, "mean_token_accuracy": 0.8854834735393524, "num_tokens": 401391.0, "step": 224 }, { "epoch": 0.036434296818071414, "grad_norm": 84.23726654052734, "learning_rate": 9.637305699481867e-06, "loss": 1.0932, "mean_token_accuracy": 0.8648186326026917, "num_tokens": 403185.0, "step": 225 }, { "epoch": 0.03659622702615173, "grad_norm": 83.42762756347656, "learning_rate": 9.63568652849741e-06, "loss": 1.0781, "mean_token_accuracy": 0.8731481730937958, "num_tokens": 404972.0, "step": 226 }, { "epoch": 0.03675815723423204, "grad_norm": 90.8319320678711, "learning_rate": 9.634067357512954e-06, "loss": 1.2173, "mean_token_accuracy": 0.8509507179260254, "num_tokens": 406764.0, "step": 227 }, { "epoch": 0.036920087442312365, "grad_norm": 71.17721557617188, "learning_rate": 9.632448186528497e-06, "loss": 0.9213, "mean_token_accuracy": 0.8913982510566711, "num_tokens": 408543.0, "step": 228 }, { "epoch": 0.03708201765039268, "grad_norm": 64.10284423828125, "learning_rate": 9.630829015544043e-06, "loss": 0.8855, "mean_token_accuracy": 0.89650759100914, "num_tokens": 410326.0, "step": 229 }, { "epoch": 0.037243947858473, "grad_norm": 81.04692840576172, "learning_rate": 9.629209844559586e-06, "loss": 1.0142, "mean_token_accuracy": 0.875975489616394, "num_tokens": 412119.0, "step": 230 }, { "epoch": 0.037405878066553315, "grad_norm": 84.19200897216797, "learning_rate": 9.62759067357513e-06, "loss": 1.0721, "mean_token_accuracy": 0.868961364030838, "num_tokens": 413913.0, "step": 231 }, { "epoch": 0.03756780827463363, "grad_norm": 92.13495635986328, "learning_rate": 9.625971502590673e-06, "loss": 1.1045, "mean_token_accuracy": 0.8625689744949341, "num_tokens": 415701.0, "step": 232 }, { "epoch": 0.03772973848271395, "grad_norm": 96.21236419677734, "learning_rate": 9.624352331606219e-06, "loss": 1.1076, "mean_token_accuracy": 0.8659087419509888, "num_tokens": 417489.0, "step": 233 }, { "epoch": 0.037891668690794265, "grad_norm": 62.594482421875, "learning_rate": 9.622733160621762e-06, "loss": 0.711, "mean_token_accuracy": 0.9117647111415863, "num_tokens": 419273.0, "step": 234 }, { "epoch": 0.03805359889887459, "grad_norm": 72.2493667602539, "learning_rate": 9.621113989637306e-06, "loss": 0.9782, "mean_token_accuracy": 0.8702192008495331, "num_tokens": 421067.0, "step": 235 }, { "epoch": 0.0382155291069549, "grad_norm": 76.11592864990234, "learning_rate": 9.619494818652851e-06, "loss": 0.9437, "mean_token_accuracy": 0.8894314765930176, "num_tokens": 422858.0, "step": 236 }, { "epoch": 0.03837745931503522, "grad_norm": 72.14783477783203, "learning_rate": 9.617875647668395e-06, "loss": 0.7942, "mean_token_accuracy": 0.8973430097103119, "num_tokens": 424643.0, "step": 237 }, { "epoch": 0.03853938952311554, "grad_norm": 61.82394027709961, "learning_rate": 9.616256476683938e-06, "loss": 0.8059, "mean_token_accuracy": 0.9017053246498108, "num_tokens": 426429.0, "step": 238 }, { "epoch": 0.03870131973119585, "grad_norm": 86.99252319335938, "learning_rate": 9.614637305699482e-06, "loss": 1.1055, "mean_token_accuracy": 0.8616040349006653, "num_tokens": 428215.0, "step": 239 }, { "epoch": 0.03886324993927617, "grad_norm": 83.90782928466797, "learning_rate": 9.613018134715027e-06, "loss": 1.1475, "mean_token_accuracy": 0.8636764287948608, "num_tokens": 430005.0, "step": 240 }, { "epoch": 0.03902518014735649, "grad_norm": 78.6772232055664, "learning_rate": 9.61139896373057e-06, "loss": 0.9035, "mean_token_accuracy": 0.8970588445663452, "num_tokens": 431789.0, "step": 241 }, { "epoch": 0.03918711035543681, "grad_norm": 101.41692352294922, "learning_rate": 9.609779792746114e-06, "loss": 1.1547, "mean_token_accuracy": 0.8578526079654694, "num_tokens": 433589.0, "step": 242 }, { "epoch": 0.039349040563517124, "grad_norm": 83.16380310058594, "learning_rate": 9.608160621761658e-06, "loss": 1.1516, "mean_token_accuracy": 0.8645991683006287, "num_tokens": 435382.0, "step": 243 }, { "epoch": 0.03951097077159744, "grad_norm": 75.57198333740234, "learning_rate": 9.606541450777203e-06, "loss": 1.1315, "mean_token_accuracy": 0.8804621994495392, "num_tokens": 437170.0, "step": 244 }, { "epoch": 0.03967290097967776, "grad_norm": 89.52605438232422, "learning_rate": 9.604922279792747e-06, "loss": 1.1864, "mean_token_accuracy": 0.8767799437046051, "num_tokens": 438966.0, "step": 245 }, { "epoch": 0.039834831187758074, "grad_norm": 78.80630493164062, "learning_rate": 9.60330310880829e-06, "loss": 1.0454, "mean_token_accuracy": 0.8812949657440186, "num_tokens": 440756.0, "step": 246 }, { "epoch": 0.039996761395838396, "grad_norm": 60.73417282104492, "learning_rate": 9.601683937823834e-06, "loss": 0.7913, "mean_token_accuracy": 0.8968297243118286, "num_tokens": 442539.0, "step": 247 }, { "epoch": 0.04015869160391871, "grad_norm": 92.85823059082031, "learning_rate": 9.60006476683938e-06, "loss": 1.3841, "mean_token_accuracy": 0.8435491025447845, "num_tokens": 444339.0, "step": 248 }, { "epoch": 0.04032062181199903, "grad_norm": 82.19344329833984, "learning_rate": 9.598445595854923e-06, "loss": 1.1141, "mean_token_accuracy": 0.8718289136886597, "num_tokens": 446131.0, "step": 249 }, { "epoch": 0.040482552020079346, "grad_norm": 69.2301025390625, "learning_rate": 9.596826424870466e-06, "loss": 1.0458, "mean_token_accuracy": 0.8918404579162598, "num_tokens": 447920.0, "step": 250 }, { "epoch": 0.04064448222815966, "grad_norm": 73.9405517578125, "learning_rate": 9.59520725388601e-06, "loss": 1.0331, "mean_token_accuracy": 0.8873015940189362, "num_tokens": 449716.0, "step": 251 }, { "epoch": 0.04080641243623998, "grad_norm": 79.64219665527344, "learning_rate": 9.593588082901555e-06, "loss": 1.2847, "mean_token_accuracy": 0.8657226860523224, "num_tokens": 451511.0, "step": 252 }, { "epoch": 0.040968342644320296, "grad_norm": 68.41842651367188, "learning_rate": 9.591968911917099e-06, "loss": 1.1943, "mean_token_accuracy": 0.8614130318164825, "num_tokens": 453297.0, "step": 253 }, { "epoch": 0.04113027285240062, "grad_norm": 66.58094787597656, "learning_rate": 9.590349740932642e-06, "loss": 0.9478, "mean_token_accuracy": 0.8794757127761841, "num_tokens": 455083.0, "step": 254 }, { "epoch": 0.04129220306048093, "grad_norm": 78.18601989746094, "learning_rate": 9.588730569948188e-06, "loss": 1.3005, "mean_token_accuracy": 0.8566032946109772, "num_tokens": 456874.0, "step": 255 }, { "epoch": 0.04145413326856125, "grad_norm": 70.24598693847656, "learning_rate": 9.587111398963731e-06, "loss": 0.8876, "mean_token_accuracy": 0.8764172494411469, "num_tokens": 458677.0, "step": 256 }, { "epoch": 0.04161606347664157, "grad_norm": 59.934486389160156, "learning_rate": 9.585492227979275e-06, "loss": 1.0569, "mean_token_accuracy": 0.873127281665802, "num_tokens": 460457.0, "step": 257 }, { "epoch": 0.04177799368472188, "grad_norm": 70.09595489501953, "learning_rate": 9.583873056994819e-06, "loss": 1.1482, "mean_token_accuracy": 0.8628380000591278, "num_tokens": 462253.0, "step": 258 }, { "epoch": 0.041939923892802204, "grad_norm": 82.5488510131836, "learning_rate": 9.582253886010364e-06, "loss": 1.1594, "mean_token_accuracy": 0.8703097105026245, "num_tokens": 464049.0, "step": 259 }, { "epoch": 0.04210185410088252, "grad_norm": 59.300804138183594, "learning_rate": 9.580634715025907e-06, "loss": 0.9928, "mean_token_accuracy": 0.8900361657142639, "num_tokens": 465851.0, "step": 260 }, { "epoch": 0.04226378430896284, "grad_norm": 81.78755950927734, "learning_rate": 9.579015544041451e-06, "loss": 1.4321, "mean_token_accuracy": 0.8499999940395355, "num_tokens": 467643.0, "step": 261 }, { "epoch": 0.042425714517043155, "grad_norm": 77.30547332763672, "learning_rate": 9.577396373056995e-06, "loss": 0.9493, "mean_token_accuracy": 0.8765076100826263, "num_tokens": 469430.0, "step": 262 }, { "epoch": 0.04258764472512347, "grad_norm": 55.55488586425781, "learning_rate": 9.57577720207254e-06, "loss": 0.9443, "mean_token_accuracy": 0.8917297720909119, "num_tokens": 471218.0, "step": 263 }, { "epoch": 0.04274957493320379, "grad_norm": 82.53324890136719, "learning_rate": 9.574158031088083e-06, "loss": 0.969, "mean_token_accuracy": 0.8780970573425293, "num_tokens": 473009.0, "step": 264 }, { "epoch": 0.042911505141284105, "grad_norm": 53.665287017822266, "learning_rate": 9.572538860103627e-06, "loss": 0.8222, "mean_token_accuracy": 0.8975433111190796, "num_tokens": 474786.0, "step": 265 }, { "epoch": 0.043073435349364426, "grad_norm": 69.20823669433594, "learning_rate": 9.57091968911917e-06, "loss": 1.2566, "mean_token_accuracy": 0.879696249961853, "num_tokens": 476572.0, "step": 266 }, { "epoch": 0.04323536555744474, "grad_norm": 72.65019989013672, "learning_rate": 9.569300518134716e-06, "loss": 0.8797, "mean_token_accuracy": 0.8810641765594482, "num_tokens": 478370.0, "step": 267 }, { "epoch": 0.043397295765525055, "grad_norm": 57.39815139770508, "learning_rate": 9.56768134715026e-06, "loss": 0.8247, "mean_token_accuracy": 0.8956836760044098, "num_tokens": 480150.0, "step": 268 }, { "epoch": 0.04355922597360538, "grad_norm": 62.54112243652344, "learning_rate": 9.566062176165803e-06, "loss": 0.7866, "mean_token_accuracy": 0.893990695476532, "num_tokens": 481926.0, "step": 269 }, { "epoch": 0.04372115618168569, "grad_norm": 73.81266784667969, "learning_rate": 9.564443005181347e-06, "loss": 0.9721, "mean_token_accuracy": 0.8785529732704163, "num_tokens": 483702.0, "step": 270 }, { "epoch": 0.04388308638976601, "grad_norm": 78.97330474853516, "learning_rate": 9.562823834196892e-06, "loss": 0.8328, "mean_token_accuracy": 0.8996659815311432, "num_tokens": 485493.0, "step": 271 }, { "epoch": 0.04404501659784633, "grad_norm": 84.11725616455078, "learning_rate": 9.561204663212436e-06, "loss": 1.0514, "mean_token_accuracy": 0.8642781376838684, "num_tokens": 487306.0, "step": 272 }, { "epoch": 0.04420694680592665, "grad_norm": 81.29261779785156, "learning_rate": 9.559585492227979e-06, "loss": 1.2821, "mean_token_accuracy": 0.8477867245674133, "num_tokens": 489100.0, "step": 273 }, { "epoch": 0.04436887701400696, "grad_norm": 62.31085968017578, "learning_rate": 9.557966321243524e-06, "loss": 0.7428, "mean_token_accuracy": 0.8943609595298767, "num_tokens": 490896.0, "step": 274 }, { "epoch": 0.04453080722208728, "grad_norm": 71.10617065429688, "learning_rate": 9.556347150259068e-06, "loss": 0.9778, "mean_token_accuracy": 0.8795271515846252, "num_tokens": 492690.0, "step": 275 }, { "epoch": 0.0446927374301676, "grad_norm": 83.48087310791016, "learning_rate": 9.554727979274612e-06, "loss": 1.159, "mean_token_accuracy": 0.8639097809791565, "num_tokens": 494494.0, "step": 276 }, { "epoch": 0.044854667638247914, "grad_norm": 54.94813919067383, "learning_rate": 9.553108808290155e-06, "loss": 0.702, "mean_token_accuracy": 0.9010069966316223, "num_tokens": 496278.0, "step": 277 }, { "epoch": 0.045016597846328235, "grad_norm": 64.06427001953125, "learning_rate": 9.5514896373057e-06, "loss": 0.9095, "mean_token_accuracy": 0.8793289959430695, "num_tokens": 498062.0, "step": 278 }, { "epoch": 0.04517852805440855, "grad_norm": 65.4741439819336, "learning_rate": 9.549870466321244e-06, "loss": 0.8072, "mean_token_accuracy": 0.8856909573078156, "num_tokens": 499854.0, "step": 279 }, { "epoch": 0.045340458262488864, "grad_norm": 66.4945297241211, "learning_rate": 9.548251295336788e-06, "loss": 0.9339, "mean_token_accuracy": 0.8850524425506592, "num_tokens": 501645.0, "step": 280 }, { "epoch": 0.045502388470569186, "grad_norm": 71.58250427246094, "learning_rate": 9.546632124352331e-06, "loss": 1.0607, "mean_token_accuracy": 0.8801409304141998, "num_tokens": 503439.0, "step": 281 }, { "epoch": 0.0456643186786495, "grad_norm": 60.97646713256836, "learning_rate": 9.545012953367877e-06, "loss": 0.7326, "mean_token_accuracy": 0.8957557082176208, "num_tokens": 505229.0, "step": 282 }, { "epoch": 0.04582624888672982, "grad_norm": 62.338478088378906, "learning_rate": 9.54339378238342e-06, "loss": 0.8764, "mean_token_accuracy": 0.8967047929763794, "num_tokens": 507012.0, "step": 283 }, { "epoch": 0.045988179094810136, "grad_norm": 70.01009368896484, "learning_rate": 9.541774611398964e-06, "loss": 1.0115, "mean_token_accuracy": 0.8655172288417816, "num_tokens": 508814.0, "step": 284 }, { "epoch": 0.04615010930289046, "grad_norm": 67.59593200683594, "learning_rate": 9.540155440414507e-06, "loss": 1.0102, "mean_token_accuracy": 0.8930011093616486, "num_tokens": 510597.0, "step": 285 }, { "epoch": 0.04631203951097077, "grad_norm": 70.6162109375, "learning_rate": 9.538536269430053e-06, "loss": 0.9386, "mean_token_accuracy": 0.8682743906974792, "num_tokens": 512390.0, "step": 286 }, { "epoch": 0.046473969719051086, "grad_norm": 61.420379638671875, "learning_rate": 9.536917098445596e-06, "loss": 0.9338, "mean_token_accuracy": 0.9003976583480835, "num_tokens": 514173.0, "step": 287 }, { "epoch": 0.04663589992713141, "grad_norm": 67.64603424072266, "learning_rate": 9.53529792746114e-06, "loss": 0.8273, "mean_token_accuracy": 0.8959807753562927, "num_tokens": 515954.0, "step": 288 }, { "epoch": 0.04679783013521172, "grad_norm": 67.98467254638672, "learning_rate": 9.533678756476683e-06, "loss": 0.8773, "mean_token_accuracy": 0.8892516791820526, "num_tokens": 517744.0, "step": 289 }, { "epoch": 0.046959760343292044, "grad_norm": 70.8077392578125, "learning_rate": 9.532059585492229e-06, "loss": 0.9294, "mean_token_accuracy": 0.8863345980644226, "num_tokens": 519536.0, "step": 290 }, { "epoch": 0.04712169055137236, "grad_norm": 62.81648254394531, "learning_rate": 9.530440414507774e-06, "loss": 0.8136, "mean_token_accuracy": 0.905844658613205, "num_tokens": 521324.0, "step": 291 }, { "epoch": 0.04728362075945267, "grad_norm": 65.97950744628906, "learning_rate": 9.528821243523318e-06, "loss": 0.9196, "mean_token_accuracy": 0.8746402859687805, "num_tokens": 523115.0, "step": 292 }, { "epoch": 0.047445550967532994, "grad_norm": 66.72357940673828, "learning_rate": 9.527202072538861e-06, "loss": 0.8658, "mean_token_accuracy": 0.9001141786575317, "num_tokens": 524905.0, "step": 293 }, { "epoch": 0.04760748117561331, "grad_norm": 80.37352752685547, "learning_rate": 9.525582901554405e-06, "loss": 0.8444, "mean_token_accuracy": 0.875331699848175, "num_tokens": 526697.0, "step": 294 }, { "epoch": 0.04776941138369363, "grad_norm": 54.132625579833984, "learning_rate": 9.52396373056995e-06, "loss": 0.7312, "mean_token_accuracy": 0.8980975151062012, "num_tokens": 528474.0, "step": 295 }, { "epoch": 0.047931341591773945, "grad_norm": 70.5918960571289, "learning_rate": 9.522344559585494e-06, "loss": 0.8689, "mean_token_accuracy": 0.8865539729595184, "num_tokens": 530259.0, "step": 296 }, { "epoch": 0.048093271799854266, "grad_norm": 72.65715026855469, "learning_rate": 9.520725388601037e-06, "loss": 0.9228, "mean_token_accuracy": 0.8867753744125366, "num_tokens": 532053.0, "step": 297 }, { "epoch": 0.04825520200793458, "grad_norm": 55.18707275390625, "learning_rate": 9.519106217616582e-06, "loss": 0.7461, "mean_token_accuracy": 0.9007679224014282, "num_tokens": 533837.0, "step": 298 }, { "epoch": 0.048417132216014895, "grad_norm": 77.32190704345703, "learning_rate": 9.517487046632126e-06, "loss": 0.9957, "mean_token_accuracy": 0.8728955388069153, "num_tokens": 535630.0, "step": 299 }, { "epoch": 0.048579062424095217, "grad_norm": 60.8797607421875, "learning_rate": 9.51586787564767e-06, "loss": 0.9098, "mean_token_accuracy": 0.8908167481422424, "num_tokens": 537417.0, "step": 300 }, { "epoch": 0.04874099263217553, "grad_norm": 56.40549087524414, "learning_rate": 9.514248704663213e-06, "loss": 0.7058, "mean_token_accuracy": 0.9058971703052521, "num_tokens": 539205.0, "step": 301 }, { "epoch": 0.04890292284025585, "grad_norm": 66.17485809326172, "learning_rate": 9.512629533678758e-06, "loss": 0.8238, "mean_token_accuracy": 0.8811090290546417, "num_tokens": 540994.0, "step": 302 }, { "epoch": 0.04906485304833617, "grad_norm": 59.788265228271484, "learning_rate": 9.511010362694302e-06, "loss": 0.8027, "mean_token_accuracy": 0.8920482099056244, "num_tokens": 542795.0, "step": 303 }, { "epoch": 0.04922678325641648, "grad_norm": 77.38652038574219, "learning_rate": 9.509391191709846e-06, "loss": 1.3083, "mean_token_accuracy": 0.851588249206543, "num_tokens": 544590.0, "step": 304 }, { "epoch": 0.0493887134644968, "grad_norm": 80.82234191894531, "learning_rate": 9.50777202072539e-06, "loss": 1.0641, "mean_token_accuracy": 0.873657613992691, "num_tokens": 546379.0, "step": 305 }, { "epoch": 0.04955064367257712, "grad_norm": 62.7913932800293, "learning_rate": 9.506152849740935e-06, "loss": 0.8148, "mean_token_accuracy": 0.8920454680919647, "num_tokens": 548170.0, "step": 306 }, { "epoch": 0.04971257388065744, "grad_norm": 81.53544616699219, "learning_rate": 9.504533678756478e-06, "loss": 1.0223, "mean_token_accuracy": 0.8778295814990997, "num_tokens": 549969.0, "step": 307 }, { "epoch": 0.04987450408873775, "grad_norm": 64.9419174194336, "learning_rate": 9.502914507772022e-06, "loss": 1.0853, "mean_token_accuracy": 0.878756046295166, "num_tokens": 551774.0, "step": 308 }, { "epoch": 0.050036434296818075, "grad_norm": 63.24518966674805, "learning_rate": 9.501295336787565e-06, "loss": 0.9629, "mean_token_accuracy": 0.8807711601257324, "num_tokens": 553567.0, "step": 309 }, { "epoch": 0.05019836450489839, "grad_norm": 49.60218048095703, "learning_rate": 9.49967616580311e-06, "loss": 0.7824, "mean_token_accuracy": 0.9033337235450745, "num_tokens": 555348.0, "step": 310 }, { "epoch": 0.050360294712978704, "grad_norm": 49.804195404052734, "learning_rate": 9.498056994818654e-06, "loss": 0.8228, "mean_token_accuracy": 0.915304571390152, "num_tokens": 557131.0, "step": 311 }, { "epoch": 0.050522224921059025, "grad_norm": 78.1496810913086, "learning_rate": 9.496437823834198e-06, "loss": 1.0242, "mean_token_accuracy": 0.8697479069232941, "num_tokens": 558926.0, "step": 312 }, { "epoch": 0.05068415512913934, "grad_norm": 62.136505126953125, "learning_rate": 9.494818652849741e-06, "loss": 0.895, "mean_token_accuracy": 0.8791474103927612, "num_tokens": 560711.0, "step": 313 }, { "epoch": 0.05084608533721966, "grad_norm": 61.435508728027344, "learning_rate": 9.493199481865287e-06, "loss": 0.8759, "mean_token_accuracy": 0.8876686692237854, "num_tokens": 562508.0, "step": 314 }, { "epoch": 0.051008015545299976, "grad_norm": 51.740135192871094, "learning_rate": 9.49158031088083e-06, "loss": 0.7385, "mean_token_accuracy": 0.8983990252017975, "num_tokens": 564305.0, "step": 315 }, { "epoch": 0.05116994575338029, "grad_norm": 64.10138702392578, "learning_rate": 9.489961139896374e-06, "loss": 0.8662, "mean_token_accuracy": 0.8905942142009735, "num_tokens": 566091.0, "step": 316 }, { "epoch": 0.05133187596146061, "grad_norm": 81.78770446777344, "learning_rate": 9.488341968911919e-06, "loss": 1.174, "mean_token_accuracy": 0.8632535636425018, "num_tokens": 567888.0, "step": 317 }, { "epoch": 0.051493806169540926, "grad_norm": 69.4294662475586, "learning_rate": 9.486722797927463e-06, "loss": 0.9715, "mean_token_accuracy": 0.8850221633911133, "num_tokens": 569669.0, "step": 318 }, { "epoch": 0.05165573637762125, "grad_norm": 66.24795532226562, "learning_rate": 9.485103626943006e-06, "loss": 1.0518, "mean_token_accuracy": 0.8722862899303436, "num_tokens": 571447.0, "step": 319 }, { "epoch": 0.05181766658570156, "grad_norm": 56.03800582885742, "learning_rate": 9.48348445595855e-06, "loss": 0.885, "mean_token_accuracy": 0.8868373930454254, "num_tokens": 573243.0, "step": 320 }, { "epoch": 0.05197959679378188, "grad_norm": 66.54347229003906, "learning_rate": 9.481865284974095e-06, "loss": 0.9992, "mean_token_accuracy": 0.8723843097686768, "num_tokens": 575037.0, "step": 321 }, { "epoch": 0.0521415270018622, "grad_norm": 58.032493591308594, "learning_rate": 9.480246113989639e-06, "loss": 0.982, "mean_token_accuracy": 0.8778461813926697, "num_tokens": 576820.0, "step": 322 }, { "epoch": 0.05230345720994251, "grad_norm": 67.31257629394531, "learning_rate": 9.478626943005182e-06, "loss": 1.1216, "mean_token_accuracy": 0.8691913485527039, "num_tokens": 578621.0, "step": 323 }, { "epoch": 0.052465387418022834, "grad_norm": 47.67085647583008, "learning_rate": 9.477007772020726e-06, "loss": 0.8201, "mean_token_accuracy": 0.9005842208862305, "num_tokens": 580415.0, "step": 324 }, { "epoch": 0.05262731762610315, "grad_norm": 58.07624816894531, "learning_rate": 9.475388601036271e-06, "loss": 0.8518, "mean_token_accuracy": 0.8872580230236053, "num_tokens": 582202.0, "step": 325 }, { "epoch": 0.05278924783418347, "grad_norm": 56.813499450683594, "learning_rate": 9.473769430051815e-06, "loss": 0.8567, "mean_token_accuracy": 0.884892076253891, "num_tokens": 583992.0, "step": 326 }, { "epoch": 0.052951178042263784, "grad_norm": 59.193092346191406, "learning_rate": 9.472150259067358e-06, "loss": 1.0664, "mean_token_accuracy": 0.875, "num_tokens": 585776.0, "step": 327 }, { "epoch": 0.0531131082503441, "grad_norm": 69.3732681274414, "learning_rate": 9.470531088082902e-06, "loss": 1.0797, "mean_token_accuracy": 0.8646662831306458, "num_tokens": 587568.0, "step": 328 }, { "epoch": 0.05327503845842442, "grad_norm": 65.90499877929688, "learning_rate": 9.468911917098447e-06, "loss": 1.2268, "mean_token_accuracy": 0.8905171155929565, "num_tokens": 589372.0, "step": 329 }, { "epoch": 0.053436968666504735, "grad_norm": 73.06372833251953, "learning_rate": 9.467292746113991e-06, "loss": 1.1115, "mean_token_accuracy": 0.8717085123062134, "num_tokens": 591164.0, "step": 330 }, { "epoch": 0.053598898874585056, "grad_norm": 44.78315353393555, "learning_rate": 9.465673575129534e-06, "loss": 0.7948, "mean_token_accuracy": 0.9079106450080872, "num_tokens": 592958.0, "step": 331 }, { "epoch": 0.05376082908266537, "grad_norm": 50.79363250732422, "learning_rate": 9.464054404145078e-06, "loss": 0.9067, "mean_token_accuracy": 0.8981527090072632, "num_tokens": 594755.0, "step": 332 }, { "epoch": 0.05392275929074569, "grad_norm": 54.239864349365234, "learning_rate": 9.462435233160623e-06, "loss": 0.8366, "mean_token_accuracy": 0.8912129402160645, "num_tokens": 596543.0, "step": 333 }, { "epoch": 0.05408468949882601, "grad_norm": 60.15306091308594, "learning_rate": 9.460816062176167e-06, "loss": 1.1363, "mean_token_accuracy": 0.8676398992538452, "num_tokens": 598327.0, "step": 334 }, { "epoch": 0.05424661970690632, "grad_norm": 53.625343322753906, "learning_rate": 9.45919689119171e-06, "loss": 1.0098, "mean_token_accuracy": 0.8868244886398315, "num_tokens": 600122.0, "step": 335 }, { "epoch": 0.05440854991498664, "grad_norm": 55.842166900634766, "learning_rate": 9.457577720207256e-06, "loss": 1.0202, "mean_token_accuracy": 0.8848241567611694, "num_tokens": 601911.0, "step": 336 }, { "epoch": 0.05457048012306696, "grad_norm": 52.97017288208008, "learning_rate": 9.4559585492228e-06, "loss": 0.844, "mean_token_accuracy": 0.8799320757389069, "num_tokens": 603706.0, "step": 337 }, { "epoch": 0.05473241033114728, "grad_norm": 59.960540771484375, "learning_rate": 9.454339378238343e-06, "loss": 0.8593, "mean_token_accuracy": 0.8954392969608307, "num_tokens": 605499.0, "step": 338 }, { "epoch": 0.05489434053922759, "grad_norm": 59.53228759765625, "learning_rate": 9.452720207253887e-06, "loss": 0.8366, "mean_token_accuracy": 0.893928050994873, "num_tokens": 607294.0, "step": 339 }, { "epoch": 0.05505627074730791, "grad_norm": 55.91241455078125, "learning_rate": 9.451101036269432e-06, "loss": 0.8793, "mean_token_accuracy": 0.8915935754776001, "num_tokens": 609091.0, "step": 340 }, { "epoch": 0.05521820095538823, "grad_norm": 54.15565490722656, "learning_rate": 9.449481865284975e-06, "loss": 0.7921, "mean_token_accuracy": 0.8945436477661133, "num_tokens": 610887.0, "step": 341 }, { "epoch": 0.05538013116346854, "grad_norm": 64.91709899902344, "learning_rate": 9.447862694300519e-06, "loss": 0.8982, "mean_token_accuracy": 0.8832933604717255, "num_tokens": 612673.0, "step": 342 }, { "epoch": 0.055542061371548865, "grad_norm": 52.16558837890625, "learning_rate": 9.446243523316063e-06, "loss": 0.8571, "mean_token_accuracy": 0.893382340669632, "num_tokens": 614457.0, "step": 343 }, { "epoch": 0.05570399157962918, "grad_norm": 59.97270965576172, "learning_rate": 9.444624352331608e-06, "loss": 0.9543, "mean_token_accuracy": 0.8834813833236694, "num_tokens": 616260.0, "step": 344 }, { "epoch": 0.055865921787709494, "grad_norm": 50.6729621887207, "learning_rate": 9.443005181347151e-06, "loss": 0.7991, "mean_token_accuracy": 0.8850965797901154, "num_tokens": 618042.0, "step": 345 }, { "epoch": 0.056027851995789815, "grad_norm": 73.4538345336914, "learning_rate": 9.441386010362695e-06, "loss": 1.3164, "mean_token_accuracy": 0.8522437214851379, "num_tokens": 619845.0, "step": 346 }, { "epoch": 0.05618978220387013, "grad_norm": 54.023887634277344, "learning_rate": 9.439766839378239e-06, "loss": 0.9981, "mean_token_accuracy": 0.8756336271762848, "num_tokens": 621638.0, "step": 347 }, { "epoch": 0.05635171241195045, "grad_norm": 44.938392639160156, "learning_rate": 9.438147668393784e-06, "loss": 0.8564, "mean_token_accuracy": 0.8960838913917542, "num_tokens": 623429.0, "step": 348 }, { "epoch": 0.056513642620030766, "grad_norm": 48.227046966552734, "learning_rate": 9.436528497409328e-06, "loss": 0.8697, "mean_token_accuracy": 0.8854352533817291, "num_tokens": 625229.0, "step": 349 }, { "epoch": 0.05667557282811109, "grad_norm": 55.08008575439453, "learning_rate": 9.434909326424871e-06, "loss": 0.809, "mean_token_accuracy": 0.8933570086956024, "num_tokens": 627022.0, "step": 350 }, { "epoch": 0.0568375030361914, "grad_norm": 36.199302673339844, "learning_rate": 9.433290155440415e-06, "loss": 0.6275, "mean_token_accuracy": 0.923739492893219, "num_tokens": 628810.0, "step": 351 }, { "epoch": 0.056999433244271716, "grad_norm": 60.80564498901367, "learning_rate": 9.43167098445596e-06, "loss": 1.1439, "mean_token_accuracy": 0.8571495413780212, "num_tokens": 630615.0, "step": 352 }, { "epoch": 0.05716136345235204, "grad_norm": 45.7132453918457, "learning_rate": 9.430051813471504e-06, "loss": 0.7907, "mean_token_accuracy": 0.8997512459754944, "num_tokens": 632396.0, "step": 353 }, { "epoch": 0.05732329366043235, "grad_norm": 51.128448486328125, "learning_rate": 9.428432642487047e-06, "loss": 0.8991, "mean_token_accuracy": 0.8830883800983429, "num_tokens": 634173.0, "step": 354 }, { "epoch": 0.05748522386851267, "grad_norm": 51.39332580566406, "learning_rate": 9.426813471502592e-06, "loss": 0.9404, "mean_token_accuracy": 0.88721963763237, "num_tokens": 635960.0, "step": 355 }, { "epoch": 0.05764715407659299, "grad_norm": 60.653045654296875, "learning_rate": 9.425194300518136e-06, "loss": 1.1926, "mean_token_accuracy": 0.873776912689209, "num_tokens": 637765.0, "step": 356 }, { "epoch": 0.0578090842846733, "grad_norm": 57.790435791015625, "learning_rate": 9.42357512953368e-06, "loss": 1.175, "mean_token_accuracy": 0.8596720099449158, "num_tokens": 639562.0, "step": 357 }, { "epoch": 0.057971014492753624, "grad_norm": 52.67477035522461, "learning_rate": 9.421955958549223e-06, "loss": 0.9902, "mean_token_accuracy": 0.8744454383850098, "num_tokens": 641359.0, "step": 358 }, { "epoch": 0.05813294470083394, "grad_norm": 52.826515197753906, "learning_rate": 9.420336787564769e-06, "loss": 0.8961, "mean_token_accuracy": 0.8944833278656006, "num_tokens": 643155.0, "step": 359 }, { "epoch": 0.05829487490891426, "grad_norm": 61.656402587890625, "learning_rate": 9.418717616580312e-06, "loss": 1.0522, "mean_token_accuracy": 0.8801389932632446, "num_tokens": 644952.0, "step": 360 }, { "epoch": 0.058456805116994574, "grad_norm": 55.095741271972656, "learning_rate": 9.417098445595856e-06, "loss": 0.8306, "mean_token_accuracy": 0.8971613943576813, "num_tokens": 646736.0, "step": 361 }, { "epoch": 0.058618735325074896, "grad_norm": 53.96144485473633, "learning_rate": 9.4154792746114e-06, "loss": 1.0498, "mean_token_accuracy": 0.8833284676074982, "num_tokens": 648535.0, "step": 362 }, { "epoch": 0.05878066553315521, "grad_norm": 51.875186920166016, "learning_rate": 9.413860103626945e-06, "loss": 0.9864, "mean_token_accuracy": 0.8789206147193909, "num_tokens": 650319.0, "step": 363 }, { "epoch": 0.058942595741235525, "grad_norm": 53.755615234375, "learning_rate": 9.412240932642488e-06, "loss": 0.8408, "mean_token_accuracy": 0.8697420656681061, "num_tokens": 652115.0, "step": 364 }, { "epoch": 0.059104525949315846, "grad_norm": 56.433921813964844, "learning_rate": 9.410621761658032e-06, "loss": 1.1127, "mean_token_accuracy": 0.8689115643501282, "num_tokens": 653924.0, "step": 365 }, { "epoch": 0.05926645615739616, "grad_norm": 56.5037956237793, "learning_rate": 9.409002590673575e-06, "loss": 0.9033, "mean_token_accuracy": 0.8764738440513611, "num_tokens": 655719.0, "step": 366 }, { "epoch": 0.05942838636547648, "grad_norm": 54.06751251220703, "learning_rate": 9.40738341968912e-06, "loss": 0.7738, "mean_token_accuracy": 0.9025388360023499, "num_tokens": 657508.0, "step": 367 }, { "epoch": 0.0595903165735568, "grad_norm": 50.80967330932617, "learning_rate": 9.405764248704664e-06, "loss": 0.856, "mean_token_accuracy": 0.894341230392456, "num_tokens": 659293.0, "step": 368 }, { "epoch": 0.05975224678163711, "grad_norm": 53.56394577026367, "learning_rate": 9.404145077720208e-06, "loss": 0.8477, "mean_token_accuracy": 0.895600438117981, "num_tokens": 661083.0, "step": 369 }, { "epoch": 0.05991417698971743, "grad_norm": 40.78882598876953, "learning_rate": 9.402525906735751e-06, "loss": 0.634, "mean_token_accuracy": 0.9120330810546875, "num_tokens": 662868.0, "step": 370 }, { "epoch": 0.06007610719779775, "grad_norm": 50.89396286010742, "learning_rate": 9.400906735751297e-06, "loss": 0.8575, "mean_token_accuracy": 0.8937198221683502, "num_tokens": 664653.0, "step": 371 }, { "epoch": 0.06023803740587807, "grad_norm": 60.67742919921875, "learning_rate": 9.39928756476684e-06, "loss": 1.1599, "mean_token_accuracy": 0.8480996787548065, "num_tokens": 666447.0, "step": 372 }, { "epoch": 0.06039996761395838, "grad_norm": 59.49285888671875, "learning_rate": 9.397668393782384e-06, "loss": 1.0575, "mean_token_accuracy": 0.8697862327098846, "num_tokens": 668236.0, "step": 373 }, { "epoch": 0.060561897822038704, "grad_norm": 58.06311798095703, "learning_rate": 9.396049222797929e-06, "loss": 0.9765, "mean_token_accuracy": 0.8803423047065735, "num_tokens": 670030.0, "step": 374 }, { "epoch": 0.06072382803011902, "grad_norm": 45.018775939941406, "learning_rate": 9.394430051813473e-06, "loss": 0.7249, "mean_token_accuracy": 0.9039260447025299, "num_tokens": 671823.0, "step": 375 }, { "epoch": 0.06088575823819933, "grad_norm": 61.07163619995117, "learning_rate": 9.392810880829016e-06, "loss": 1.1467, "mean_token_accuracy": 0.8736453056335449, "num_tokens": 673620.0, "step": 376 }, { "epoch": 0.061047688446279655, "grad_norm": 60.545082092285156, "learning_rate": 9.39119170984456e-06, "loss": 0.8748, "mean_token_accuracy": 0.8868243098258972, "num_tokens": 675416.0, "step": 377 }, { "epoch": 0.06120961865435997, "grad_norm": 50.514041900634766, "learning_rate": 9.389572538860105e-06, "loss": 0.6595, "mean_token_accuracy": 0.9117647111415863, "num_tokens": 677200.0, "step": 378 }, { "epoch": 0.06137154886244029, "grad_norm": 50.44256591796875, "learning_rate": 9.387953367875649e-06, "loss": 1.0815, "mean_token_accuracy": 0.8913865685462952, "num_tokens": 678988.0, "step": 379 }, { "epoch": 0.061533479070520605, "grad_norm": 67.69346618652344, "learning_rate": 9.386334196891192e-06, "loss": 1.2634, "mean_token_accuracy": 0.8442807197570801, "num_tokens": 680783.0, "step": 380 }, { "epoch": 0.06169540927860092, "grad_norm": 56.003379821777344, "learning_rate": 9.384715025906736e-06, "loss": 0.7757, "mean_token_accuracy": 0.8904095888137817, "num_tokens": 682578.0, "step": 381 }, { "epoch": 0.06185733948668124, "grad_norm": 48.92418670654297, "learning_rate": 9.383095854922281e-06, "loss": 0.7025, "mean_token_accuracy": 0.9082009792327881, "num_tokens": 684373.0, "step": 382 }, { "epoch": 0.062019269694761556, "grad_norm": 66.3655014038086, "learning_rate": 9.381476683937825e-06, "loss": 0.999, "mean_token_accuracy": 0.8667808473110199, "num_tokens": 686171.0, "step": 383 }, { "epoch": 0.06218119990284188, "grad_norm": 51.00679397583008, "learning_rate": 9.379857512953368e-06, "loss": 0.7846, "mean_token_accuracy": 0.9018518328666687, "num_tokens": 687958.0, "step": 384 }, { "epoch": 0.06234313011092219, "grad_norm": 60.59869384765625, "learning_rate": 9.378238341968912e-06, "loss": 0.9606, "mean_token_accuracy": 0.8720994889736176, "num_tokens": 689751.0, "step": 385 }, { "epoch": 0.0625050603190025, "grad_norm": 41.44636917114258, "learning_rate": 9.376619170984457e-06, "loss": 0.7942, "mean_token_accuracy": 0.8996130526065826, "num_tokens": 691532.0, "step": 386 }, { "epoch": 0.06266699052708283, "grad_norm": 54.59798812866211, "learning_rate": 9.375000000000001e-06, "loss": 0.8995, "mean_token_accuracy": 0.8741718530654907, "num_tokens": 693322.0, "step": 387 }, { "epoch": 0.06282892073516315, "grad_norm": 47.00348663330078, "learning_rate": 9.373380829015544e-06, "loss": 0.7725, "mean_token_accuracy": 0.88413867354393, "num_tokens": 695110.0, "step": 388 }, { "epoch": 0.06299085094324346, "grad_norm": 54.444454193115234, "learning_rate": 9.371761658031088e-06, "loss": 0.8278, "mean_token_accuracy": 0.8774765431880951, "num_tokens": 696906.0, "step": 389 }, { "epoch": 0.06315278115132378, "grad_norm": 58.430118560791016, "learning_rate": 9.370142487046633e-06, "loss": 1.1883, "mean_token_accuracy": 0.86239293217659, "num_tokens": 698713.0, "step": 390 }, { "epoch": 0.0633147113594041, "grad_norm": 50.70392608642578, "learning_rate": 9.368523316062177e-06, "loss": 0.9776, "mean_token_accuracy": 0.879334956407547, "num_tokens": 700515.0, "step": 391 }, { "epoch": 0.06347664156748442, "grad_norm": 48.93309020996094, "learning_rate": 9.36690414507772e-06, "loss": 0.9226, "mean_token_accuracy": 0.8825734555721283, "num_tokens": 702308.0, "step": 392 }, { "epoch": 0.06363857177556473, "grad_norm": 45.962833404541016, "learning_rate": 9.365284974093266e-06, "loss": 0.7739, "mean_token_accuracy": 0.8962543606758118, "num_tokens": 704100.0, "step": 393 }, { "epoch": 0.06380050198364505, "grad_norm": 41.612205505371094, "learning_rate": 9.36366580310881e-06, "loss": 0.8133, "mean_token_accuracy": 0.8959369957447052, "num_tokens": 705890.0, "step": 394 }, { "epoch": 0.06396243219172537, "grad_norm": 53.03821563720703, "learning_rate": 9.362046632124353e-06, "loss": 1.0155, "mean_token_accuracy": 0.8670751750469208, "num_tokens": 707673.0, "step": 395 }, { "epoch": 0.06412436239980568, "grad_norm": 57.093421936035156, "learning_rate": 9.360427461139897e-06, "loss": 0.8718, "mean_token_accuracy": 0.8892857134342194, "num_tokens": 709465.0, "step": 396 }, { "epoch": 0.064286292607886, "grad_norm": 57.488162994384766, "learning_rate": 9.358808290155442e-06, "loss": 1.1046, "mean_token_accuracy": 0.8744049370288849, "num_tokens": 711264.0, "step": 397 }, { "epoch": 0.06444822281596632, "grad_norm": 49.41898727416992, "learning_rate": 9.357189119170985e-06, "loss": 1.0047, "mean_token_accuracy": 0.8852111101150513, "num_tokens": 713054.0, "step": 398 }, { "epoch": 0.06461015302404663, "grad_norm": 47.096614837646484, "learning_rate": 9.355569948186529e-06, "loss": 0.8205, "mean_token_accuracy": 0.8815559446811676, "num_tokens": 714845.0, "step": 399 }, { "epoch": 0.06477208323212695, "grad_norm": 49.34243392944336, "learning_rate": 9.353950777202073e-06, "loss": 0.83, "mean_token_accuracy": 0.8867270648479462, "num_tokens": 716631.0, "step": 400 }, { "epoch": 0.06493401344020727, "grad_norm": 57.568424224853516, "learning_rate": 9.352331606217618e-06, "loss": 0.9312, "mean_token_accuracy": 0.8923013508319855, "num_tokens": 718439.0, "step": 401 }, { "epoch": 0.0650959436482876, "grad_norm": 43.34575653076172, "learning_rate": 9.350712435233161e-06, "loss": 0.7075, "mean_token_accuracy": 0.8998723030090332, "num_tokens": 720231.0, "step": 402 }, { "epoch": 0.0652578738563679, "grad_norm": 42.3759880065918, "learning_rate": 9.349093264248705e-06, "loss": 0.744, "mean_token_accuracy": 0.9106818735599518, "num_tokens": 722021.0, "step": 403 }, { "epoch": 0.06541980406444822, "grad_norm": 37.84474182128906, "learning_rate": 9.347474093264249e-06, "loss": 0.7185, "mean_token_accuracy": 0.9000000059604645, "num_tokens": 723813.0, "step": 404 }, { "epoch": 0.06558173427252854, "grad_norm": 50.4435920715332, "learning_rate": 9.345854922279794e-06, "loss": 0.9955, "mean_token_accuracy": 0.8763368427753448, "num_tokens": 725608.0, "step": 405 }, { "epoch": 0.06574366448060885, "grad_norm": 40.89650344848633, "learning_rate": 9.344235751295338e-06, "loss": 0.7661, "mean_token_accuracy": 0.8982283174991608, "num_tokens": 727385.0, "step": 406 }, { "epoch": 0.06590559468868917, "grad_norm": 39.34998321533203, "learning_rate": 9.342616580310881e-06, "loss": 0.9652, "mean_token_accuracy": 0.9014598429203033, "num_tokens": 729171.0, "step": 407 }, { "epoch": 0.0660675248967695, "grad_norm": 44.388153076171875, "learning_rate": 9.340997409326425e-06, "loss": 0.7458, "mean_token_accuracy": 0.9034444987773895, "num_tokens": 730972.0, "step": 408 }, { "epoch": 0.06622945510484982, "grad_norm": 47.50635528564453, "learning_rate": 9.33937823834197e-06, "loss": 0.8579, "mean_token_accuracy": 0.8852040767669678, "num_tokens": 732771.0, "step": 409 }, { "epoch": 0.06639138531293012, "grad_norm": 37.823299407958984, "learning_rate": 9.337759067357514e-06, "loss": 0.7391, "mean_token_accuracy": 0.9090404212474823, "num_tokens": 734558.0, "step": 410 }, { "epoch": 0.06655331552101044, "grad_norm": 53.0116081237793, "learning_rate": 9.336139896373057e-06, "loss": 1.03, "mean_token_accuracy": 0.8753654956817627, "num_tokens": 736357.0, "step": 411 }, { "epoch": 0.06671524572909077, "grad_norm": 48.80860900878906, "learning_rate": 9.334520725388602e-06, "loss": 0.8684, "mean_token_accuracy": 0.8845965564250946, "num_tokens": 738155.0, "step": 412 }, { "epoch": 0.06687717593717107, "grad_norm": 40.512264251708984, "learning_rate": 9.332901554404146e-06, "loss": 0.651, "mean_token_accuracy": 0.906927227973938, "num_tokens": 739936.0, "step": 413 }, { "epoch": 0.0670391061452514, "grad_norm": 47.08877182006836, "learning_rate": 9.33128238341969e-06, "loss": 0.7703, "mean_token_accuracy": 0.8892156779766083, "num_tokens": 741734.0, "step": 414 }, { "epoch": 0.06720103635333172, "grad_norm": 48.44396209716797, "learning_rate": 9.329663212435233e-06, "loss": 0.8961, "mean_token_accuracy": 0.8935215473175049, "num_tokens": 743518.0, "step": 415 }, { "epoch": 0.06736296656141204, "grad_norm": 48.781158447265625, "learning_rate": 9.328044041450779e-06, "loss": 0.9113, "mean_token_accuracy": 0.8889174461364746, "num_tokens": 745309.0, "step": 416 }, { "epoch": 0.06752489676949235, "grad_norm": 52.43247985839844, "learning_rate": 9.326424870466322e-06, "loss": 0.9935, "mean_token_accuracy": 0.8707874119281769, "num_tokens": 747099.0, "step": 417 }, { "epoch": 0.06768682697757267, "grad_norm": 45.427188873291016, "learning_rate": 9.324805699481866e-06, "loss": 0.903, "mean_token_accuracy": 0.8879020512104034, "num_tokens": 748887.0, "step": 418 }, { "epoch": 0.06784875718565299, "grad_norm": 41.08964920043945, "learning_rate": 9.32318652849741e-06, "loss": 0.7527, "mean_token_accuracy": 0.8930870294570923, "num_tokens": 750671.0, "step": 419 }, { "epoch": 0.0680106873937333, "grad_norm": 59.60834503173828, "learning_rate": 9.321567357512955e-06, "loss": 1.1115, "mean_token_accuracy": 0.8592792749404907, "num_tokens": 752481.0, "step": 420 }, { "epoch": 0.06817261760181362, "grad_norm": 60.86803436279297, "learning_rate": 9.319948186528498e-06, "loss": 0.928, "mean_token_accuracy": 0.8842214047908783, "num_tokens": 754278.0, "step": 421 }, { "epoch": 0.06833454780989394, "grad_norm": 48.281036376953125, "learning_rate": 9.318329015544042e-06, "loss": 0.9406, "mean_token_accuracy": 0.8768051266670227, "num_tokens": 756066.0, "step": 422 }, { "epoch": 0.06849647801797425, "grad_norm": 39.070858001708984, "learning_rate": 9.316709844559585e-06, "loss": 0.7163, "mean_token_accuracy": 0.9155176877975464, "num_tokens": 757850.0, "step": 423 }, { "epoch": 0.06865840822605457, "grad_norm": 43.524845123291016, "learning_rate": 9.31509067357513e-06, "loss": 0.8807, "mean_token_accuracy": 0.897176593542099, "num_tokens": 759643.0, "step": 424 }, { "epoch": 0.06882033843413489, "grad_norm": 49.84126281738281, "learning_rate": 9.313471502590674e-06, "loss": 0.8868, "mean_token_accuracy": 0.8762077391147614, "num_tokens": 761437.0, "step": 425 }, { "epoch": 0.06898226864221521, "grad_norm": 35.13079071044922, "learning_rate": 9.311852331606218e-06, "loss": 0.805, "mean_token_accuracy": 0.9032507538795471, "num_tokens": 763218.0, "step": 426 }, { "epoch": 0.06914419885029552, "grad_norm": 53.83464431762695, "learning_rate": 9.310233160621761e-06, "loss": 0.8044, "mean_token_accuracy": 0.8874330222606659, "num_tokens": 765014.0, "step": 427 }, { "epoch": 0.06930612905837584, "grad_norm": 51.93909454345703, "learning_rate": 9.308613989637307e-06, "loss": 1.052, "mean_token_accuracy": 0.8821428716182709, "num_tokens": 766806.0, "step": 428 }, { "epoch": 0.06946805926645616, "grad_norm": 41.9996223449707, "learning_rate": 9.30699481865285e-06, "loss": 0.805, "mean_token_accuracy": 0.9021008610725403, "num_tokens": 768594.0, "step": 429 }, { "epoch": 0.06962998947453647, "grad_norm": 40.266807556152344, "learning_rate": 9.305375647668394e-06, "loss": 0.8126, "mean_token_accuracy": 0.893869936466217, "num_tokens": 770380.0, "step": 430 }, { "epoch": 0.06979191968261679, "grad_norm": 46.49650955200195, "learning_rate": 9.303756476683939e-06, "loss": 0.9622, "mean_token_accuracy": 0.8893324136734009, "num_tokens": 772155.0, "step": 431 }, { "epoch": 0.06995384989069711, "grad_norm": 47.63191604614258, "learning_rate": 9.302137305699483e-06, "loss": 0.8953, "mean_token_accuracy": 0.8974014818668365, "num_tokens": 773940.0, "step": 432 }, { "epoch": 0.07011578009877743, "grad_norm": 50.4738655090332, "learning_rate": 9.300518134715026e-06, "loss": 0.8214, "mean_token_accuracy": 0.8861073553562164, "num_tokens": 775739.0, "step": 433 }, { "epoch": 0.07027771030685774, "grad_norm": 44.69540023803711, "learning_rate": 9.29889896373057e-06, "loss": 0.8688, "mean_token_accuracy": 0.9032374024391174, "num_tokens": 777530.0, "step": 434 }, { "epoch": 0.07043964051493806, "grad_norm": 39.313316345214844, "learning_rate": 9.297279792746115e-06, "loss": 0.7543, "mean_token_accuracy": 0.8961609601974487, "num_tokens": 779321.0, "step": 435 }, { "epoch": 0.07060157072301838, "grad_norm": 44.265167236328125, "learning_rate": 9.295660621761659e-06, "loss": 0.8441, "mean_token_accuracy": 0.8937326967716217, "num_tokens": 781115.0, "step": 436 }, { "epoch": 0.07076350093109869, "grad_norm": 51.03866958618164, "learning_rate": 9.294041450777202e-06, "loss": 0.9556, "mean_token_accuracy": 0.8880982398986816, "num_tokens": 782904.0, "step": 437 }, { "epoch": 0.07092543113917901, "grad_norm": 41.49946975708008, "learning_rate": 9.292422279792746e-06, "loss": 0.756, "mean_token_accuracy": 0.8954051733016968, "num_tokens": 784684.0, "step": 438 }, { "epoch": 0.07108736134725933, "grad_norm": 49.79719924926758, "learning_rate": 9.290803108808291e-06, "loss": 0.8486, "mean_token_accuracy": 0.8881118893623352, "num_tokens": 786482.0, "step": 439 }, { "epoch": 0.07124929155533966, "grad_norm": 34.358699798583984, "learning_rate": 9.289183937823835e-06, "loss": 0.7141, "mean_token_accuracy": 0.9086354076862335, "num_tokens": 788268.0, "step": 440 }, { "epoch": 0.07141122176341996, "grad_norm": 39.487125396728516, "learning_rate": 9.287564766839378e-06, "loss": 0.6951, "mean_token_accuracy": 0.8991561830043793, "num_tokens": 790058.0, "step": 441 }, { "epoch": 0.07157315197150028, "grad_norm": 55.616573333740234, "learning_rate": 9.285945595854922e-06, "loss": 0.9965, "mean_token_accuracy": 0.8776397407054901, "num_tokens": 791848.0, "step": 442 }, { "epoch": 0.0717350821795806, "grad_norm": 44.5058479309082, "learning_rate": 9.284326424870467e-06, "loss": 0.8823, "mean_token_accuracy": 0.8960431516170502, "num_tokens": 793639.0, "step": 443 }, { "epoch": 0.07189701238766091, "grad_norm": 40.10417175292969, "learning_rate": 9.282707253886011e-06, "loss": 0.7197, "mean_token_accuracy": 0.9124087691307068, "num_tokens": 795425.0, "step": 444 }, { "epoch": 0.07205894259574123, "grad_norm": 48.371299743652344, "learning_rate": 9.281088082901554e-06, "loss": 0.8842, "mean_token_accuracy": 0.8732331693172455, "num_tokens": 797221.0, "step": 445 }, { "epoch": 0.07222087280382156, "grad_norm": 41.847511291503906, "learning_rate": 9.279468911917098e-06, "loss": 0.6581, "mean_token_accuracy": 0.9116222262382507, "num_tokens": 799016.0, "step": 446 }, { "epoch": 0.07238280301190186, "grad_norm": 50.69091796875, "learning_rate": 9.277849740932643e-06, "loss": 1.1012, "mean_token_accuracy": 0.8838366270065308, "num_tokens": 800811.0, "step": 447 }, { "epoch": 0.07254473321998219, "grad_norm": 37.527957916259766, "learning_rate": 9.276230569948187e-06, "loss": 0.6223, "mean_token_accuracy": 0.9081102907657623, "num_tokens": 802595.0, "step": 448 }, { "epoch": 0.0727066634280625, "grad_norm": 32.14083480834961, "learning_rate": 9.27461139896373e-06, "loss": 0.6744, "mean_token_accuracy": 0.9088995456695557, "num_tokens": 804381.0, "step": 449 }, { "epoch": 0.07286859363614283, "grad_norm": 57.70633316040039, "learning_rate": 9.272992227979276e-06, "loss": 1.0948, "mean_token_accuracy": 0.8817920386791229, "num_tokens": 806170.0, "step": 450 }, { "epoch": 0.07303052384422314, "grad_norm": 47.517555236816406, "learning_rate": 9.27137305699482e-06, "loss": 0.8162, "mean_token_accuracy": 0.8787415027618408, "num_tokens": 807969.0, "step": 451 }, { "epoch": 0.07319245405230346, "grad_norm": 36.685272216796875, "learning_rate": 9.269753886010363e-06, "loss": 0.7813, "mean_token_accuracy": 0.8994913697242737, "num_tokens": 809750.0, "step": 452 }, { "epoch": 0.07335438426038378, "grad_norm": 47.20469665527344, "learning_rate": 9.268134715025907e-06, "loss": 1.0021, "mean_token_accuracy": 0.8812949657440186, "num_tokens": 811540.0, "step": 453 }, { "epoch": 0.07351631446846409, "grad_norm": 33.17751693725586, "learning_rate": 9.266515544041452e-06, "loss": 0.7147, "mean_token_accuracy": 0.9102478623390198, "num_tokens": 813331.0, "step": 454 }, { "epoch": 0.07367824467654441, "grad_norm": 38.700862884521484, "learning_rate": 9.264896373056995e-06, "loss": 0.7663, "mean_token_accuracy": 0.8981804847717285, "num_tokens": 815118.0, "step": 455 }, { "epoch": 0.07384017488462473, "grad_norm": 47.53046798706055, "learning_rate": 9.263277202072539e-06, "loss": 1.0195, "mean_token_accuracy": 0.8719820380210876, "num_tokens": 816910.0, "step": 456 }, { "epoch": 0.07400210509270505, "grad_norm": 37.84265899658203, "learning_rate": 9.261658031088083e-06, "loss": 0.7791, "mean_token_accuracy": 0.89768186211586, "num_tokens": 818705.0, "step": 457 }, { "epoch": 0.07416403530078536, "grad_norm": 46.852474212646484, "learning_rate": 9.260038860103628e-06, "loss": 0.8355, "mean_token_accuracy": 0.8783625066280365, "num_tokens": 820497.0, "step": 458 }, { "epoch": 0.07432596550886568, "grad_norm": 48.34907913208008, "learning_rate": 9.258419689119172e-06, "loss": 0.8409, "mean_token_accuracy": 0.8811188638210297, "num_tokens": 822295.0, "step": 459 }, { "epoch": 0.074487895716946, "grad_norm": 47.38665771484375, "learning_rate": 9.256800518134715e-06, "loss": 0.8666, "mean_token_accuracy": 0.881118893623352, "num_tokens": 824093.0, "step": 460 }, { "epoch": 0.07464982592502631, "grad_norm": 43.687320709228516, "learning_rate": 9.255181347150259e-06, "loss": 0.7259, "mean_token_accuracy": 0.9018790423870087, "num_tokens": 825890.0, "step": 461 }, { "epoch": 0.07481175613310663, "grad_norm": 39.578758239746094, "learning_rate": 9.253562176165804e-06, "loss": 0.6795, "mean_token_accuracy": 0.8974613845348358, "num_tokens": 827675.0, "step": 462 }, { "epoch": 0.07497368634118695, "grad_norm": 46.43309783935547, "learning_rate": 9.251943005181348e-06, "loss": 0.8544, "mean_token_accuracy": 0.8959917724132538, "num_tokens": 829466.0, "step": 463 }, { "epoch": 0.07513561654926726, "grad_norm": 47.50029754638672, "learning_rate": 9.250323834196891e-06, "loss": 0.8675, "mean_token_accuracy": 0.8819444179534912, "num_tokens": 831266.0, "step": 464 }, { "epoch": 0.07529754675734758, "grad_norm": 47.62020492553711, "learning_rate": 9.248704663212435e-06, "loss": 0.8431, "mean_token_accuracy": 0.8851393163204193, "num_tokens": 833057.0, "step": 465 }, { "epoch": 0.0754594769654279, "grad_norm": 51.59745407104492, "learning_rate": 9.24708549222798e-06, "loss": 1.0138, "mean_token_accuracy": 0.8940199613571167, "num_tokens": 834842.0, "step": 466 }, { "epoch": 0.07562140717350822, "grad_norm": 56.6263313293457, "learning_rate": 9.245466321243524e-06, "loss": 1.2147, "mean_token_accuracy": 0.8861211538314819, "num_tokens": 836634.0, "step": 467 }, { "epoch": 0.07578333738158853, "grad_norm": 45.93951416015625, "learning_rate": 9.243847150259067e-06, "loss": 0.8822, "mean_token_accuracy": 0.8855936825275421, "num_tokens": 838417.0, "step": 468 }, { "epoch": 0.07594526758966885, "grad_norm": 61.10692596435547, "learning_rate": 9.242227979274612e-06, "loss": 1.221, "mean_token_accuracy": 0.8627976179122925, "num_tokens": 840213.0, "step": 469 }, { "epoch": 0.07610719779774917, "grad_norm": 45.47400665283203, "learning_rate": 9.240608808290156e-06, "loss": 0.9271, "mean_token_accuracy": 0.8870469331741333, "num_tokens": 841999.0, "step": 470 }, { "epoch": 0.07626912800582948, "grad_norm": 56.72896957397461, "learning_rate": 9.2389896373057e-06, "loss": 0.905, "mean_token_accuracy": 0.8782622218132019, "num_tokens": 843789.0, "step": 471 }, { "epoch": 0.0764310582139098, "grad_norm": 35.1486701965332, "learning_rate": 9.237370466321243e-06, "loss": 0.7213, "mean_token_accuracy": 0.9051044583320618, "num_tokens": 845575.0, "step": 472 }, { "epoch": 0.07659298842199012, "grad_norm": 48.15449142456055, "learning_rate": 9.235751295336789e-06, "loss": 0.9563, "mean_token_accuracy": 0.8828417956829071, "num_tokens": 847368.0, "step": 473 }, { "epoch": 0.07675491863007045, "grad_norm": 29.401920318603516, "learning_rate": 9.234132124352332e-06, "loss": 0.6137, "mean_token_accuracy": 0.9149962067604065, "num_tokens": 849151.0, "step": 474 }, { "epoch": 0.07691684883815075, "grad_norm": 32.057533264160156, "learning_rate": 9.232512953367876e-06, "loss": 0.6129, "mean_token_accuracy": 0.9194042086601257, "num_tokens": 850936.0, "step": 475 }, { "epoch": 0.07707877904623107, "grad_norm": 36.40903854370117, "learning_rate": 9.23089378238342e-06, "loss": 0.6513, "mean_token_accuracy": 0.9207247197628021, "num_tokens": 852725.0, "step": 476 }, { "epoch": 0.0772407092543114, "grad_norm": 36.75739288330078, "learning_rate": 9.229274611398965e-06, "loss": 0.806, "mean_token_accuracy": 0.903900682926178, "num_tokens": 854518.0, "step": 477 }, { "epoch": 0.0774026394623917, "grad_norm": 35.5255012512207, "learning_rate": 9.227655440414508e-06, "loss": 0.8493, "mean_token_accuracy": 0.8892035186290741, "num_tokens": 856310.0, "step": 478 }, { "epoch": 0.07756456967047202, "grad_norm": 43.96951675415039, "learning_rate": 9.226036269430052e-06, "loss": 0.8318, "mean_token_accuracy": 0.8875713050365448, "num_tokens": 858106.0, "step": 479 }, { "epoch": 0.07772649987855235, "grad_norm": 38.01785659790039, "learning_rate": 9.224417098445595e-06, "loss": 0.8201, "mean_token_accuracy": 0.8991561830043793, "num_tokens": 859896.0, "step": 480 }, { "epoch": 0.07788843008663267, "grad_norm": 32.69612121582031, "learning_rate": 9.22279792746114e-06, "loss": 0.69, "mean_token_accuracy": 0.9075932800769806, "num_tokens": 861689.0, "step": 481 }, { "epoch": 0.07805036029471298, "grad_norm": 38.05558776855469, "learning_rate": 9.221178756476684e-06, "loss": 0.8281, "mean_token_accuracy": 0.9009661972522736, "num_tokens": 863474.0, "step": 482 }, { "epoch": 0.0782122905027933, "grad_norm": 50.20949172973633, "learning_rate": 9.219559585492228e-06, "loss": 1.0526, "mean_token_accuracy": 0.8779354095458984, "num_tokens": 865272.0, "step": 483 }, { "epoch": 0.07837422071087362, "grad_norm": 28.173141479492188, "learning_rate": 9.217940414507773e-06, "loss": 0.6259, "mean_token_accuracy": 0.9204521775245667, "num_tokens": 867061.0, "step": 484 }, { "epoch": 0.07853615091895393, "grad_norm": 46.980064392089844, "learning_rate": 9.216321243523317e-06, "loss": 0.9579, "mean_token_accuracy": 0.8888353109359741, "num_tokens": 868861.0, "step": 485 }, { "epoch": 0.07869808112703425, "grad_norm": 42.32093048095703, "learning_rate": 9.214702072538862e-06, "loss": 0.8986, "mean_token_accuracy": 0.8806146681308746, "num_tokens": 870658.0, "step": 486 }, { "epoch": 0.07886001133511457, "grad_norm": 43.08979034423828, "learning_rate": 9.213082901554406e-06, "loss": 0.82, "mean_token_accuracy": 0.8889216184616089, "num_tokens": 872467.0, "step": 487 }, { "epoch": 0.07902194154319488, "grad_norm": 40.27311325073242, "learning_rate": 9.21146373056995e-06, "loss": 0.7868, "mean_token_accuracy": 0.8957039415836334, "num_tokens": 874257.0, "step": 488 }, { "epoch": 0.0791838717512752, "grad_norm": 41.04425048828125, "learning_rate": 9.209844559585493e-06, "loss": 0.9825, "mean_token_accuracy": 0.875, "num_tokens": 876049.0, "step": 489 }, { "epoch": 0.07934580195935552, "grad_norm": 49.12306594848633, "learning_rate": 9.208225388601038e-06, "loss": 1.0049, "mean_token_accuracy": 0.8899396359920502, "num_tokens": 877843.0, "step": 490 }, { "epoch": 0.07950773216743584, "grad_norm": 40.436012268066406, "learning_rate": 9.206606217616582e-06, "loss": 0.8111, "mean_token_accuracy": 0.8898065984249115, "num_tokens": 879637.0, "step": 491 }, { "epoch": 0.07966966237551615, "grad_norm": 38.13745880126953, "learning_rate": 9.204987046632125e-06, "loss": 0.7403, "mean_token_accuracy": 0.9014921188354492, "num_tokens": 881423.0, "step": 492 }, { "epoch": 0.07983159258359647, "grad_norm": 49.362247467041016, "learning_rate": 9.20336787564767e-06, "loss": 0.9241, "mean_token_accuracy": 0.892573893070221, "num_tokens": 883214.0, "step": 493 }, { "epoch": 0.07999352279167679, "grad_norm": 31.338422775268555, "learning_rate": 9.201748704663214e-06, "loss": 0.6561, "mean_token_accuracy": 0.9075321555137634, "num_tokens": 885006.0, "step": 494 }, { "epoch": 0.0801554529997571, "grad_norm": 40.81686019897461, "learning_rate": 9.200129533678758e-06, "loss": 0.8001, "mean_token_accuracy": 0.8969059884548187, "num_tokens": 886809.0, "step": 495 }, { "epoch": 0.08031738320783742, "grad_norm": 40.96434783935547, "learning_rate": 9.198510362694301e-06, "loss": 0.8109, "mean_token_accuracy": 0.8916457891464233, "num_tokens": 888589.0, "step": 496 }, { "epoch": 0.08047931341591774, "grad_norm": 37.90857696533203, "learning_rate": 9.196891191709847e-06, "loss": 0.7132, "mean_token_accuracy": 0.9121031761169434, "num_tokens": 890385.0, "step": 497 }, { "epoch": 0.08064124362399806, "grad_norm": 52.691017150878906, "learning_rate": 9.19527202072539e-06, "loss": 0.9597, "mean_token_accuracy": 0.8910714387893677, "num_tokens": 892173.0, "step": 498 }, { "epoch": 0.08080317383207837, "grad_norm": 49.31082534790039, "learning_rate": 9.193652849740934e-06, "loss": 0.9952, "mean_token_accuracy": 0.8654859662055969, "num_tokens": 893966.0, "step": 499 }, { "epoch": 0.08096510404015869, "grad_norm": 50.570438385009766, "learning_rate": 9.192033678756477e-06, "loss": 0.8574, "mean_token_accuracy": 0.8862743079662323, "num_tokens": 895758.0, "step": 500 }, { "epoch": 0.08112703424823901, "grad_norm": 33.87987518310547, "learning_rate": 9.190414507772023e-06, "loss": 0.6473, "mean_token_accuracy": 0.9067513644695282, "num_tokens": 897538.0, "step": 501 }, { "epoch": 0.08128896445631932, "grad_norm": 35.96215057373047, "learning_rate": 9.188795336787566e-06, "loss": 0.7317, "mean_token_accuracy": 0.9012077450752258, "num_tokens": 899323.0, "step": 502 }, { "epoch": 0.08145089466439964, "grad_norm": 43.420310974121094, "learning_rate": 9.18717616580311e-06, "loss": 0.8135, "mean_token_accuracy": 0.8935688436031342, "num_tokens": 901117.0, "step": 503 }, { "epoch": 0.08161282487247996, "grad_norm": 45.617618560791016, "learning_rate": 9.185556994818653e-06, "loss": 1.0337, "mean_token_accuracy": 0.8881153464317322, "num_tokens": 902906.0, "step": 504 }, { "epoch": 0.08177475508056029, "grad_norm": 45.894161224365234, "learning_rate": 9.183937823834199e-06, "loss": 0.9719, "mean_token_accuracy": 0.8745863139629364, "num_tokens": 904713.0, "step": 505 }, { "epoch": 0.08193668528864059, "grad_norm": 41.16225814819336, "learning_rate": 9.182318652849742e-06, "loss": 0.8051, "mean_token_accuracy": 0.8966099619865417, "num_tokens": 906496.0, "step": 506 }, { "epoch": 0.08209861549672091, "grad_norm": 30.52025032043457, "learning_rate": 9.180699481865286e-06, "loss": 0.6377, "mean_token_accuracy": 0.9081169068813324, "num_tokens": 908291.0, "step": 507 }, { "epoch": 0.08226054570480124, "grad_norm": 36.748321533203125, "learning_rate": 9.17908031088083e-06, "loss": 0.8389, "mean_token_accuracy": 0.9008110463619232, "num_tokens": 910075.0, "step": 508 }, { "epoch": 0.08242247591288154, "grad_norm": 34.352508544921875, "learning_rate": 9.177461139896375e-06, "loss": 0.582, "mean_token_accuracy": 0.9062213599681854, "num_tokens": 911875.0, "step": 509 }, { "epoch": 0.08258440612096186, "grad_norm": 40.05860900878906, "learning_rate": 9.175841968911918e-06, "loss": 0.936, "mean_token_accuracy": 0.8804570436477661, "num_tokens": 913663.0, "step": 510 }, { "epoch": 0.08274633632904219, "grad_norm": 49.722721099853516, "learning_rate": 9.174222797927462e-06, "loss": 0.933, "mean_token_accuracy": 0.8785386979579926, "num_tokens": 915463.0, "step": 511 }, { "epoch": 0.0829082665371225, "grad_norm": 36.01008987426758, "learning_rate": 9.172603626943007e-06, "loss": 0.8856, "mean_token_accuracy": 0.8921568691730499, "num_tokens": 917255.0, "step": 512 }, { "epoch": 0.08307019674520282, "grad_norm": 46.750553131103516, "learning_rate": 9.17098445595855e-06, "loss": 0.861, "mean_token_accuracy": 0.879907101392746, "num_tokens": 919050.0, "step": 513 }, { "epoch": 0.08323212695328314, "grad_norm": 36.83955001831055, "learning_rate": 9.169365284974094e-06, "loss": 0.6834, "mean_token_accuracy": 0.9160980880260468, "num_tokens": 920836.0, "step": 514 }, { "epoch": 0.08339405716136346, "grad_norm": 35.40216064453125, "learning_rate": 9.167746113989638e-06, "loss": 0.806, "mean_token_accuracy": 0.8974076807498932, "num_tokens": 922621.0, "step": 515 }, { "epoch": 0.08355598736944377, "grad_norm": 42.34437942504883, "learning_rate": 9.166126943005183e-06, "loss": 1.0528, "mean_token_accuracy": 0.8676542043685913, "num_tokens": 924420.0, "step": 516 }, { "epoch": 0.08371791757752409, "grad_norm": 34.62520217895508, "learning_rate": 9.164507772020727e-06, "loss": 0.769, "mean_token_accuracy": 0.8966503441333771, "num_tokens": 926203.0, "step": 517 }, { "epoch": 0.08387984778560441, "grad_norm": 45.12850570678711, "learning_rate": 9.16288860103627e-06, "loss": 1.0655, "mean_token_accuracy": 0.8656690716743469, "num_tokens": 928001.0, "step": 518 }, { "epoch": 0.08404177799368472, "grad_norm": 38.85281753540039, "learning_rate": 9.161269430051814e-06, "loss": 0.8694, "mean_token_accuracy": 0.8876811861991882, "num_tokens": 929789.0, "step": 519 }, { "epoch": 0.08420370820176504, "grad_norm": 35.66532897949219, "learning_rate": 9.15965025906736e-06, "loss": 0.818, "mean_token_accuracy": 0.9037270545959473, "num_tokens": 931570.0, "step": 520 }, { "epoch": 0.08436563840984536, "grad_norm": 40.39627456665039, "learning_rate": 9.158031088082903e-06, "loss": 0.8645, "mean_token_accuracy": 0.8856909573078156, "num_tokens": 933362.0, "step": 521 }, { "epoch": 0.08452756861792568, "grad_norm": 46.52644729614258, "learning_rate": 9.156411917098446e-06, "loss": 1.0621, "mean_token_accuracy": 0.8760567903518677, "num_tokens": 935156.0, "step": 522 }, { "epoch": 0.08468949882600599, "grad_norm": 38.701576232910156, "learning_rate": 9.15479274611399e-06, "loss": 1.0335, "mean_token_accuracy": 0.8865248262882233, "num_tokens": 936950.0, "step": 523 }, { "epoch": 0.08485142903408631, "grad_norm": 41.84128952026367, "learning_rate": 9.153173575129535e-06, "loss": 0.9906, "mean_token_accuracy": 0.8804563581943512, "num_tokens": 938746.0, "step": 524 }, { "epoch": 0.08501335924216663, "grad_norm": 36.47675704956055, "learning_rate": 9.151554404145079e-06, "loss": 0.8053, "mean_token_accuracy": 0.8947421312332153, "num_tokens": 940540.0, "step": 525 }, { "epoch": 0.08517528945024694, "grad_norm": 37.19576644897461, "learning_rate": 9.149935233160623e-06, "loss": 0.8118, "mean_token_accuracy": 0.885046124458313, "num_tokens": 942322.0, "step": 526 }, { "epoch": 0.08533721965832726, "grad_norm": 37.5869255065918, "learning_rate": 9.148316062176166e-06, "loss": 0.696, "mean_token_accuracy": 0.9103787243366241, "num_tokens": 944123.0, "step": 527 }, { "epoch": 0.08549914986640758, "grad_norm": 34.7048225402832, "learning_rate": 9.146696891191711e-06, "loss": 0.6704, "mean_token_accuracy": 0.9105429947376251, "num_tokens": 945915.0, "step": 528 }, { "epoch": 0.0856610800744879, "grad_norm": 33.568546295166016, "learning_rate": 9.145077720207255e-06, "loss": 0.701, "mean_token_accuracy": 0.8984127044677734, "num_tokens": 947702.0, "step": 529 }, { "epoch": 0.08582301028256821, "grad_norm": 44.70387649536133, "learning_rate": 9.143458549222799e-06, "loss": 1.0527, "mean_token_accuracy": 0.8779865801334381, "num_tokens": 949501.0, "step": 530 }, { "epoch": 0.08598494049064853, "grad_norm": 41.08858871459961, "learning_rate": 9.141839378238344e-06, "loss": 0.8312, "mean_token_accuracy": 0.8736836612224579, "num_tokens": 951290.0, "step": 531 }, { "epoch": 0.08614687069872885, "grad_norm": 52.84129333496094, "learning_rate": 9.140220207253887e-06, "loss": 0.9695, "mean_token_accuracy": 0.8727866113185883, "num_tokens": 953094.0, "step": 532 }, { "epoch": 0.08630880090680916, "grad_norm": 45.39836502075195, "learning_rate": 9.138601036269431e-06, "loss": 0.9095, "mean_token_accuracy": 0.8755514621734619, "num_tokens": 954902.0, "step": 533 }, { "epoch": 0.08647073111488948, "grad_norm": 43.694156646728516, "learning_rate": 9.136981865284975e-06, "loss": 0.8516, "mean_token_accuracy": 0.8868613243103027, "num_tokens": 956688.0, "step": 534 }, { "epoch": 0.0866326613229698, "grad_norm": 44.96379089355469, "learning_rate": 9.13536269430052e-06, "loss": 0.9963, "mean_token_accuracy": 0.8791326880455017, "num_tokens": 958473.0, "step": 535 }, { "epoch": 0.08679459153105011, "grad_norm": 35.05347442626953, "learning_rate": 9.133743523316063e-06, "loss": 0.7046, "mean_token_accuracy": 0.9055226147174835, "num_tokens": 960260.0, "step": 536 }, { "epoch": 0.08695652173913043, "grad_norm": 40.07206726074219, "learning_rate": 9.132124352331607e-06, "loss": 0.7868, "mean_token_accuracy": 0.8996916711330414, "num_tokens": 962051.0, "step": 537 }, { "epoch": 0.08711845194721075, "grad_norm": 36.64631652832031, "learning_rate": 9.13050518134715e-06, "loss": 0.8252, "mean_token_accuracy": 0.9051094949245453, "num_tokens": 963837.0, "step": 538 }, { "epoch": 0.08728038215529108, "grad_norm": 34.931793212890625, "learning_rate": 9.128886010362696e-06, "loss": 0.6907, "mean_token_accuracy": 0.9032531678676605, "num_tokens": 965628.0, "step": 539 }, { "epoch": 0.08744231236337138, "grad_norm": 36.16720962524414, "learning_rate": 9.12726683937824e-06, "loss": 0.7744, "mean_token_accuracy": 0.8967038989067078, "num_tokens": 967420.0, "step": 540 }, { "epoch": 0.0876042425714517, "grad_norm": 30.684724807739258, "learning_rate": 9.125647668393783e-06, "loss": 0.6305, "mean_token_accuracy": 0.9159165620803833, "num_tokens": 969207.0, "step": 541 }, { "epoch": 0.08776617277953203, "grad_norm": 44.079593658447266, "learning_rate": 9.124028497409327e-06, "loss": 0.7951, "mean_token_accuracy": 0.8889742493629456, "num_tokens": 970989.0, "step": 542 }, { "epoch": 0.08792810298761233, "grad_norm": 44.77708053588867, "learning_rate": 9.122409326424872e-06, "loss": 0.885, "mean_token_accuracy": 0.8802955746650696, "num_tokens": 972793.0, "step": 543 }, { "epoch": 0.08809003319569265, "grad_norm": 39.766258239746094, "learning_rate": 9.120790155440416e-06, "loss": 0.7149, "mean_token_accuracy": 0.903517335653305, "num_tokens": 974584.0, "step": 544 }, { "epoch": 0.08825196340377298, "grad_norm": 46.68134307861328, "learning_rate": 9.11917098445596e-06, "loss": 0.932, "mean_token_accuracy": 0.8750191330909729, "num_tokens": 976376.0, "step": 545 }, { "epoch": 0.0884138936118533, "grad_norm": 46.088077545166016, "learning_rate": 9.117551813471503e-06, "loss": 0.9902, "mean_token_accuracy": 0.8890588581562042, "num_tokens": 978166.0, "step": 546 }, { "epoch": 0.0885758238199336, "grad_norm": 36.8809700012207, "learning_rate": 9.115932642487048e-06, "loss": 0.7741, "mean_token_accuracy": 0.9067831337451935, "num_tokens": 979957.0, "step": 547 }, { "epoch": 0.08873775402801393, "grad_norm": 38.474056243896484, "learning_rate": 9.114313471502592e-06, "loss": 0.8461, "mean_token_accuracy": 0.8904609680175781, "num_tokens": 981752.0, "step": 548 }, { "epoch": 0.08889968423609425, "grad_norm": 47.807437896728516, "learning_rate": 9.112694300518135e-06, "loss": 0.9322, "mean_token_accuracy": 0.8764282763004303, "num_tokens": 983539.0, "step": 549 }, { "epoch": 0.08906161444417456, "grad_norm": 33.97437286376953, "learning_rate": 9.11107512953368e-06, "loss": 0.726, "mean_token_accuracy": 0.9033996760845184, "num_tokens": 985320.0, "step": 550 }, { "epoch": 0.08922354465225488, "grad_norm": 46.7301139831543, "learning_rate": 9.109455958549224e-06, "loss": 1.0919, "mean_token_accuracy": 0.8827870488166809, "num_tokens": 987121.0, "step": 551 }, { "epoch": 0.0893854748603352, "grad_norm": 31.69512367248535, "learning_rate": 9.107836787564768e-06, "loss": 0.6151, "mean_token_accuracy": 0.9129291772842407, "num_tokens": 988909.0, "step": 552 }, { "epoch": 0.08954740506841552, "grad_norm": 35.732723236083984, "learning_rate": 9.106217616580311e-06, "loss": 0.7588, "mean_token_accuracy": 0.9083463847637177, "num_tokens": 990703.0, "step": 553 }, { "epoch": 0.08970933527649583, "grad_norm": 43.09870529174805, "learning_rate": 9.104598445595857e-06, "loss": 0.9546, "mean_token_accuracy": 0.8877540230751038, "num_tokens": 992491.0, "step": 554 }, { "epoch": 0.08987126548457615, "grad_norm": 42.41291046142578, "learning_rate": 9.1029792746114e-06, "loss": 0.9916, "mean_token_accuracy": 0.8805672228336334, "num_tokens": 994279.0, "step": 555 }, { "epoch": 0.09003319569265647, "grad_norm": 34.58682632446289, "learning_rate": 9.101360103626944e-06, "loss": 0.7759, "mean_token_accuracy": 0.9031945466995239, "num_tokens": 996071.0, "step": 556 }, { "epoch": 0.09019512590073678, "grad_norm": 36.18080520629883, "learning_rate": 9.099740932642487e-06, "loss": 0.7549, "mean_token_accuracy": 0.8992753624916077, "num_tokens": 997861.0, "step": 557 }, { "epoch": 0.0903570561088171, "grad_norm": 30.26384735107422, "learning_rate": 9.098121761658033e-06, "loss": 0.6908, "mean_token_accuracy": 0.8936090171337128, "num_tokens": 999646.0, "step": 558 }, { "epoch": 0.09051898631689742, "grad_norm": 43.25270080566406, "learning_rate": 9.096502590673576e-06, "loss": 1.0387, "mean_token_accuracy": 0.8781447112560272, "num_tokens": 1001445.0, "step": 559 }, { "epoch": 0.09068091652497773, "grad_norm": 47.86593246459961, "learning_rate": 9.09488341968912e-06, "loss": 1.1261, "mean_token_accuracy": 0.8596596419811249, "num_tokens": 1003240.0, "step": 560 }, { "epoch": 0.09084284673305805, "grad_norm": 26.668771743774414, "learning_rate": 9.093264248704663e-06, "loss": 0.6807, "mean_token_accuracy": 0.9109138250350952, "num_tokens": 1005021.0, "step": 561 }, { "epoch": 0.09100477694113837, "grad_norm": 36.10108947753906, "learning_rate": 9.091645077720209e-06, "loss": 0.8069, "mean_token_accuracy": 0.8885050415992737, "num_tokens": 1006801.0, "step": 562 }, { "epoch": 0.09116670714921869, "grad_norm": 26.014118194580078, "learning_rate": 9.090025906735752e-06, "loss": 0.5737, "mean_token_accuracy": 0.9196553230285645, "num_tokens": 1008586.0, "step": 563 }, { "epoch": 0.091328637357299, "grad_norm": 36.54709243774414, "learning_rate": 9.088406735751296e-06, "loss": 0.8463, "mean_token_accuracy": 0.905587375164032, "num_tokens": 1010373.0, "step": 564 }, { "epoch": 0.09149056756537932, "grad_norm": 33.99188232421875, "learning_rate": 9.08678756476684e-06, "loss": 0.8784, "mean_token_accuracy": 0.8889986872673035, "num_tokens": 1012155.0, "step": 565 }, { "epoch": 0.09165249777345964, "grad_norm": 36.17535400390625, "learning_rate": 9.085168393782385e-06, "loss": 0.8154, "mean_token_accuracy": 0.8960067927837372, "num_tokens": 1013946.0, "step": 566 }, { "epoch": 0.09181442798153995, "grad_norm": 30.589298248291016, "learning_rate": 9.083549222797928e-06, "loss": 0.6762, "mean_token_accuracy": 0.89882692694664, "num_tokens": 1015735.0, "step": 567 }, { "epoch": 0.09197635818962027, "grad_norm": 31.206890106201172, "learning_rate": 9.081930051813472e-06, "loss": 0.7463, "mean_token_accuracy": 0.8982758522033691, "num_tokens": 1017532.0, "step": 568 }, { "epoch": 0.0921382883977006, "grad_norm": 38.077938079833984, "learning_rate": 9.080310880829017e-06, "loss": 0.8479, "mean_token_accuracy": 0.8916275501251221, "num_tokens": 1019330.0, "step": 569 }, { "epoch": 0.09230021860578091, "grad_norm": 38.95375061035156, "learning_rate": 9.07869170984456e-06, "loss": 0.856, "mean_token_accuracy": 0.8853963315486908, "num_tokens": 1021121.0, "step": 570 }, { "epoch": 0.09246214881386122, "grad_norm": 33.88456344604492, "learning_rate": 9.077072538860104e-06, "loss": 0.7675, "mean_token_accuracy": 0.902599424123764, "num_tokens": 1022911.0, "step": 571 }, { "epoch": 0.09262407902194154, "grad_norm": 36.16845703125, "learning_rate": 9.075453367875648e-06, "loss": 0.7736, "mean_token_accuracy": 0.8960636854171753, "num_tokens": 1024692.0, "step": 572 }, { "epoch": 0.09278600923002187, "grad_norm": 35.536415100097656, "learning_rate": 9.073834196891193e-06, "loss": 0.8409, "mean_token_accuracy": 0.8763091266155243, "num_tokens": 1026479.0, "step": 573 }, { "epoch": 0.09294793943810217, "grad_norm": 45.72947311401367, "learning_rate": 9.072215025906737e-06, "loss": 0.9993, "mean_token_accuracy": 0.8699954450130463, "num_tokens": 1028288.0, "step": 574 }, { "epoch": 0.0931098696461825, "grad_norm": 43.1564826965332, "learning_rate": 9.07059585492228e-06, "loss": 0.8548, "mean_token_accuracy": 0.8852503001689911, "num_tokens": 1030087.0, "step": 575 }, { "epoch": 0.09327179985426282, "grad_norm": 39.70059585571289, "learning_rate": 9.068976683937824e-06, "loss": 1.3281, "mean_token_accuracy": 0.8701861500740051, "num_tokens": 1031884.0, "step": 576 }, { "epoch": 0.09343373006234312, "grad_norm": 27.399688720703125, "learning_rate": 9.06735751295337e-06, "loss": 0.6897, "mean_token_accuracy": 0.914814829826355, "num_tokens": 1033666.0, "step": 577 }, { "epoch": 0.09359566027042344, "grad_norm": 47.35184097290039, "learning_rate": 9.065738341968913e-06, "loss": 0.9021, "mean_token_accuracy": 0.8777962028980255, "num_tokens": 1035456.0, "step": 578 }, { "epoch": 0.09375759047850377, "grad_norm": 39.08265686035156, "learning_rate": 9.064119170984456e-06, "loss": 0.9229, "mean_token_accuracy": 0.8805022239685059, "num_tokens": 1037251.0, "step": 579 }, { "epoch": 0.09391952068658409, "grad_norm": 38.741668701171875, "learning_rate": 9.0625e-06, "loss": 0.8254, "mean_token_accuracy": 0.9007575511932373, "num_tokens": 1039035.0, "step": 580 }, { "epoch": 0.0940814508946644, "grad_norm": 44.4018669128418, "learning_rate": 9.060880829015545e-06, "loss": 0.9365, "mean_token_accuracy": 0.8770685493946075, "num_tokens": 1040823.0, "step": 581 }, { "epoch": 0.09424338110274472, "grad_norm": 33.8218879699707, "learning_rate": 9.059261658031089e-06, "loss": 0.7447, "mean_token_accuracy": 0.8912815153598785, "num_tokens": 1042611.0, "step": 582 }, { "epoch": 0.09440531131082504, "grad_norm": 39.219520568847656, "learning_rate": 9.057642487046633e-06, "loss": 0.7866, "mean_token_accuracy": 0.8989204466342926, "num_tokens": 1044408.0, "step": 583 }, { "epoch": 0.09456724151890535, "grad_norm": 46.69636154174805, "learning_rate": 9.056023316062176e-06, "loss": 0.9863, "mean_token_accuracy": 0.8728821575641632, "num_tokens": 1046212.0, "step": 584 }, { "epoch": 0.09472917172698567, "grad_norm": 46.50071334838867, "learning_rate": 9.054404145077721e-06, "loss": 0.8847, "mean_token_accuracy": 0.9067711234092712, "num_tokens": 1048013.0, "step": 585 }, { "epoch": 0.09489110193506599, "grad_norm": 39.08499526977539, "learning_rate": 9.052784974093265e-06, "loss": 0.8672, "mean_token_accuracy": 0.8857594728469849, "num_tokens": 1049805.0, "step": 586 }, { "epoch": 0.09505303214314631, "grad_norm": 38.8447151184082, "learning_rate": 9.051165803108809e-06, "loss": 0.8807, "mean_token_accuracy": 0.8857826292514801, "num_tokens": 1051587.0, "step": 587 }, { "epoch": 0.09521496235122662, "grad_norm": 34.5479621887207, "learning_rate": 9.049546632124354e-06, "loss": 0.8669, "mean_token_accuracy": 0.8918128609657288, "num_tokens": 1053367.0, "step": 588 }, { "epoch": 0.09537689255930694, "grad_norm": 29.89374542236328, "learning_rate": 9.047927461139897e-06, "loss": 0.6209, "mean_token_accuracy": 0.9084370732307434, "num_tokens": 1055152.0, "step": 589 }, { "epoch": 0.09553882276738726, "grad_norm": 33.414695739746094, "learning_rate": 9.046308290155441e-06, "loss": 0.6534, "mean_token_accuracy": 0.9157062470912933, "num_tokens": 1056949.0, "step": 590 }, { "epoch": 0.09570075297546757, "grad_norm": 39.24887466430664, "learning_rate": 9.044689119170985e-06, "loss": 0.8538, "mean_token_accuracy": 0.8920360505580902, "num_tokens": 1058739.0, "step": 591 }, { "epoch": 0.09586268318354789, "grad_norm": 29.195756912231445, "learning_rate": 9.04306994818653e-06, "loss": 0.7711, "mean_token_accuracy": 0.9147091805934906, "num_tokens": 1060532.0, "step": 592 }, { "epoch": 0.09602461339162821, "grad_norm": 43.67234802246094, "learning_rate": 9.041450777202073e-06, "loss": 0.9277, "mean_token_accuracy": 0.8783040642738342, "num_tokens": 1062332.0, "step": 593 }, { "epoch": 0.09618654359970853, "grad_norm": 39.56077575683594, "learning_rate": 9.039831606217617e-06, "loss": 0.8221, "mean_token_accuracy": 0.8751722574234009, "num_tokens": 1064124.0, "step": 594 }, { "epoch": 0.09634847380778884, "grad_norm": 38.047847747802734, "learning_rate": 9.03821243523316e-06, "loss": 0.7873, "mean_token_accuracy": 0.8981804847717285, "num_tokens": 1065911.0, "step": 595 }, { "epoch": 0.09651040401586916, "grad_norm": 37.424049377441406, "learning_rate": 9.036593264248706e-06, "loss": 0.8616, "mean_token_accuracy": 0.8858951032161713, "num_tokens": 1067703.0, "step": 596 }, { "epoch": 0.09667233422394948, "grad_norm": 39.37641906738281, "learning_rate": 9.03497409326425e-06, "loss": 0.7727, "mean_token_accuracy": 0.9019958078861237, "num_tokens": 1069491.0, "step": 597 }, { "epoch": 0.09683426443202979, "grad_norm": 44.069984436035156, "learning_rate": 9.033354922279793e-06, "loss": 0.9644, "mean_token_accuracy": 0.8804119527339935, "num_tokens": 1071286.0, "step": 598 }, { "epoch": 0.09699619464011011, "grad_norm": 34.022003173828125, "learning_rate": 9.031735751295337e-06, "loss": 0.7213, "mean_token_accuracy": 0.9068345129489899, "num_tokens": 1073077.0, "step": 599 }, { "epoch": 0.09715812484819043, "grad_norm": 44.12709426879883, "learning_rate": 9.030116580310882e-06, "loss": 0.8703, "mean_token_accuracy": 0.876198798418045, "num_tokens": 1074872.0, "step": 600 }, { "epoch": 0.09732005505627074, "grad_norm": 33.81965255737305, "learning_rate": 9.028497409326426e-06, "loss": 0.7184, "mean_token_accuracy": 0.8938003480434418, "num_tokens": 1076657.0, "step": 601 }, { "epoch": 0.09748198526435106, "grad_norm": 40.71698760986328, "learning_rate": 9.02687823834197e-06, "loss": 0.7707, "mean_token_accuracy": 0.8902597427368164, "num_tokens": 1078452.0, "step": 602 }, { "epoch": 0.09764391547243138, "grad_norm": 32.85609436035156, "learning_rate": 9.025259067357513e-06, "loss": 0.6628, "mean_token_accuracy": 0.9024606645107269, "num_tokens": 1080241.0, "step": 603 }, { "epoch": 0.0978058456805117, "grad_norm": 30.258365631103516, "learning_rate": 9.023639896373058e-06, "loss": 0.6489, "mean_token_accuracy": 0.9113465547561646, "num_tokens": 1082024.0, "step": 604 }, { "epoch": 0.09796777588859201, "grad_norm": 45.52676010131836, "learning_rate": 9.022020725388602e-06, "loss": 0.8451, "mean_token_accuracy": 0.8769958019256592, "num_tokens": 1083812.0, "step": 605 }, { "epoch": 0.09812970609667233, "grad_norm": 31.065317153930664, "learning_rate": 9.020401554404145e-06, "loss": 0.6603, "mean_token_accuracy": 0.9060838520526886, "num_tokens": 1085601.0, "step": 606 }, { "epoch": 0.09829163630475266, "grad_norm": 30.893630981445312, "learning_rate": 9.01878238341969e-06, "loss": 0.71, "mean_token_accuracy": 0.9122442603111267, "num_tokens": 1087387.0, "step": 607 }, { "epoch": 0.09845356651283296, "grad_norm": 36.142784118652344, "learning_rate": 9.017163212435234e-06, "loss": 0.7292, "mean_token_accuracy": 0.9053024351596832, "num_tokens": 1089173.0, "step": 608 }, { "epoch": 0.09861549672091328, "grad_norm": 40.01546859741211, "learning_rate": 9.015544041450778e-06, "loss": 0.7704, "mean_token_accuracy": 0.8971709907054901, "num_tokens": 1090957.0, "step": 609 }, { "epoch": 0.0987774269289936, "grad_norm": 33.17009353637695, "learning_rate": 9.013924870466321e-06, "loss": 0.7009, "mean_token_accuracy": 0.9080882370471954, "num_tokens": 1092741.0, "step": 610 }, { "epoch": 0.09893935713707393, "grad_norm": 44.18925857543945, "learning_rate": 9.012305699481867e-06, "loss": 0.8227, "mean_token_accuracy": 0.888827919960022, "num_tokens": 1094523.0, "step": 611 }, { "epoch": 0.09910128734515423, "grad_norm": 45.22432327270508, "learning_rate": 9.01068652849741e-06, "loss": 0.742, "mean_token_accuracy": 0.8964992463588715, "num_tokens": 1096325.0, "step": 612 }, { "epoch": 0.09926321755323456, "grad_norm": 31.28477668762207, "learning_rate": 9.009067357512954e-06, "loss": 0.6511, "mean_token_accuracy": 0.9182723760604858, "num_tokens": 1098119.0, "step": 613 }, { "epoch": 0.09942514776131488, "grad_norm": 41.96350860595703, "learning_rate": 9.007448186528497e-06, "loss": 0.6631, "mean_token_accuracy": 0.9085756838321686, "num_tokens": 1099915.0, "step": 614 }, { "epoch": 0.09958707796939519, "grad_norm": 45.22574996948242, "learning_rate": 9.005829015544043e-06, "loss": 0.7646, "mean_token_accuracy": 0.8899290561676025, "num_tokens": 1101718.0, "step": 615 }, { "epoch": 0.0997490081774755, "grad_norm": 28.762096405029297, "learning_rate": 9.004209844559586e-06, "loss": 0.6382, "mean_token_accuracy": 0.9142857193946838, "num_tokens": 1103510.0, "step": 616 }, { "epoch": 0.09991093838555583, "grad_norm": 34.82968521118164, "learning_rate": 9.00259067357513e-06, "loss": 0.6864, "mean_token_accuracy": 0.9024765491485596, "num_tokens": 1105299.0, "step": 617 }, { "epoch": 0.10007286859363615, "grad_norm": 36.802894592285156, "learning_rate": 9.000971502590673e-06, "loss": 0.8045, "mean_token_accuracy": 0.8917339146137238, "num_tokens": 1107097.0, "step": 618 }, { "epoch": 0.10023479880171646, "grad_norm": 30.758092880249023, "learning_rate": 8.999352331606219e-06, "loss": 0.7274, "mean_token_accuracy": 0.9057396352291107, "num_tokens": 1108885.0, "step": 619 }, { "epoch": 0.10039672900979678, "grad_norm": 36.754512786865234, "learning_rate": 8.997733160621762e-06, "loss": 0.6363, "mean_token_accuracy": 0.9100378751754761, "num_tokens": 1110673.0, "step": 620 }, { "epoch": 0.1005586592178771, "grad_norm": 23.808488845825195, "learning_rate": 8.996113989637306e-06, "loss": 0.6034, "mean_token_accuracy": 0.9239267706871033, "num_tokens": 1112461.0, "step": 621 }, { "epoch": 0.10072058942595741, "grad_norm": 42.57846450805664, "learning_rate": 8.99449481865285e-06, "loss": 0.8523, "mean_token_accuracy": 0.8903903961181641, "num_tokens": 1114265.0, "step": 622 }, { "epoch": 0.10088251963403773, "grad_norm": 35.165733337402344, "learning_rate": 8.992875647668395e-06, "loss": 0.7541, "mean_token_accuracy": 0.8905109763145447, "num_tokens": 1116051.0, "step": 623 }, { "epoch": 0.10104444984211805, "grad_norm": 44.851806640625, "learning_rate": 8.991256476683938e-06, "loss": 0.9609, "mean_token_accuracy": 0.8868994116783142, "num_tokens": 1117846.0, "step": 624 }, { "epoch": 0.10120638005019836, "grad_norm": 30.463428497314453, "learning_rate": 8.989637305699482e-06, "loss": 0.6335, "mean_token_accuracy": 0.9128254354000092, "num_tokens": 1119632.0, "step": 625 }, { "epoch": 0.10136831025827868, "grad_norm": 34.8211669921875, "learning_rate": 8.988018134715027e-06, "loss": 0.7314, "mean_token_accuracy": 0.893382340669632, "num_tokens": 1121416.0, "step": 626 }, { "epoch": 0.101530240466359, "grad_norm": 35.37688446044922, "learning_rate": 8.98639896373057e-06, "loss": 0.6801, "mean_token_accuracy": 0.8952054679393768, "num_tokens": 1123224.0, "step": 627 }, { "epoch": 0.10169217067443932, "grad_norm": 34.03190994262695, "learning_rate": 8.984779792746114e-06, "loss": 0.7145, "mean_token_accuracy": 0.9028058052062988, "num_tokens": 1125014.0, "step": 628 }, { "epoch": 0.10185410088251963, "grad_norm": 41.09244155883789, "learning_rate": 8.983160621761658e-06, "loss": 0.7899, "mean_token_accuracy": 0.8963044285774231, "num_tokens": 1126806.0, "step": 629 }, { "epoch": 0.10201603109059995, "grad_norm": 36.194007873535156, "learning_rate": 8.981541450777203e-06, "loss": 0.8943, "mean_token_accuracy": 0.8953647315502167, "num_tokens": 1128594.0, "step": 630 }, { "epoch": 0.10217796129868027, "grad_norm": 42.55862808227539, "learning_rate": 8.979922279792747e-06, "loss": 0.8817, "mean_token_accuracy": 0.8936170041561127, "num_tokens": 1130388.0, "step": 631 }, { "epoch": 0.10233989150676058, "grad_norm": 30.45798683166504, "learning_rate": 8.97830310880829e-06, "loss": 0.6512, "mean_token_accuracy": 0.9172663688659668, "num_tokens": 1132177.0, "step": 632 }, { "epoch": 0.1025018217148409, "grad_norm": 34.06311798095703, "learning_rate": 8.976683937823834e-06, "loss": 0.8853, "mean_token_accuracy": 0.8865019977092743, "num_tokens": 1133971.0, "step": 633 }, { "epoch": 0.10266375192292122, "grad_norm": 34.95612335205078, "learning_rate": 8.97506476683938e-06, "loss": 0.757, "mean_token_accuracy": 0.8997070789337158, "num_tokens": 1135762.0, "step": 634 }, { "epoch": 0.10282568213100154, "grad_norm": 37.71987533569336, "learning_rate": 8.973445595854923e-06, "loss": 0.8899, "mean_token_accuracy": 0.8888073265552521, "num_tokens": 1137560.0, "step": 635 }, { "epoch": 0.10298761233908185, "grad_norm": 41.074493408203125, "learning_rate": 8.971826424870466e-06, "loss": 0.8972, "mean_token_accuracy": 0.8853802680969238, "num_tokens": 1139351.0, "step": 636 }, { "epoch": 0.10314954254716217, "grad_norm": 36.32420349121094, "learning_rate": 8.97020725388601e-06, "loss": 0.774, "mean_token_accuracy": 0.9025167226791382, "num_tokens": 1141140.0, "step": 637 }, { "epoch": 0.1033114727552425, "grad_norm": 31.13274574279785, "learning_rate": 8.968588082901555e-06, "loss": 0.7184, "mean_token_accuracy": 0.9094537794589996, "num_tokens": 1142928.0, "step": 638 }, { "epoch": 0.1034734029633228, "grad_norm": 36.037418365478516, "learning_rate": 8.966968911917099e-06, "loss": 0.7179, "mean_token_accuracy": 0.892691969871521, "num_tokens": 1144719.0, "step": 639 }, { "epoch": 0.10363533317140312, "grad_norm": 29.357982635498047, "learning_rate": 8.965349740932643e-06, "loss": 0.7073, "mean_token_accuracy": 0.9127601683139801, "num_tokens": 1146507.0, "step": 640 }, { "epoch": 0.10379726337948345, "grad_norm": 36.70446014404297, "learning_rate": 8.963730569948186e-06, "loss": 0.8754, "mean_token_accuracy": 0.8896499276161194, "num_tokens": 1148318.0, "step": 641 }, { "epoch": 0.10395919358756377, "grad_norm": 31.734821319580078, "learning_rate": 8.962111398963731e-06, "loss": 0.7229, "mean_token_accuracy": 0.9053531885147095, "num_tokens": 1150105.0, "step": 642 }, { "epoch": 0.10412112379564407, "grad_norm": 36.4366455078125, "learning_rate": 8.960492227979275e-06, "loss": 0.8446, "mean_token_accuracy": 0.8867652714252472, "num_tokens": 1151906.0, "step": 643 }, { "epoch": 0.1042830540037244, "grad_norm": 33.08853530883789, "learning_rate": 8.958873056994819e-06, "loss": 0.7713, "mean_token_accuracy": 0.8930742740631104, "num_tokens": 1153699.0, "step": 644 }, { "epoch": 0.10444498421180472, "grad_norm": 35.976219177246094, "learning_rate": 8.957253886010364e-06, "loss": 0.8038, "mean_token_accuracy": 0.9029101133346558, "num_tokens": 1155489.0, "step": 645 }, { "epoch": 0.10460691441988502, "grad_norm": 30.516292572021484, "learning_rate": 8.955634715025907e-06, "loss": 0.7688, "mean_token_accuracy": 0.9038292169570923, "num_tokens": 1157271.0, "step": 646 }, { "epoch": 0.10476884462796535, "grad_norm": 35.352481842041016, "learning_rate": 8.954015544041451e-06, "loss": 0.8136, "mean_token_accuracy": 0.9027210772037506, "num_tokens": 1159070.0, "step": 647 }, { "epoch": 0.10493077483604567, "grad_norm": 32.153175354003906, "learning_rate": 8.952396373056995e-06, "loss": 0.7827, "mean_token_accuracy": 0.89800626039505, "num_tokens": 1160866.0, "step": 648 }, { "epoch": 0.10509270504412598, "grad_norm": 36.029296875, "learning_rate": 8.95077720207254e-06, "loss": 0.7273, "mean_token_accuracy": 0.8847948610782623, "num_tokens": 1162665.0, "step": 649 }, { "epoch": 0.1052546352522063, "grad_norm": 29.155750274658203, "learning_rate": 8.949158031088084e-06, "loss": 0.5944, "mean_token_accuracy": 0.9239130616188049, "num_tokens": 1164453.0, "step": 650 }, { "epoch": 0.10541656546028662, "grad_norm": 30.928163528442383, "learning_rate": 8.947538860103627e-06, "loss": 0.6682, "mean_token_accuracy": 0.9058971703052521, "num_tokens": 1166241.0, "step": 651 }, { "epoch": 0.10557849566836694, "grad_norm": 33.00962448120117, "learning_rate": 8.94591968911917e-06, "loss": 0.7461, "mean_token_accuracy": 0.9033761322498322, "num_tokens": 1168031.0, "step": 652 }, { "epoch": 0.10574042587644725, "grad_norm": 38.731590270996094, "learning_rate": 8.944300518134716e-06, "loss": 0.9223, "mean_token_accuracy": 0.8935846984386444, "num_tokens": 1169824.0, "step": 653 }, { "epoch": 0.10590235608452757, "grad_norm": 46.37031936645508, "learning_rate": 8.94268134715026e-06, "loss": 1.1388, "mean_token_accuracy": 0.8711753189563751, "num_tokens": 1171623.0, "step": 654 }, { "epoch": 0.10606428629260789, "grad_norm": 23.43425941467285, "learning_rate": 8.941062176165803e-06, "loss": 0.5895, "mean_token_accuracy": 0.9194042086601257, "num_tokens": 1173408.0, "step": 655 }, { "epoch": 0.1062262165006882, "grad_norm": 43.1533088684082, "learning_rate": 8.939443005181347e-06, "loss": 0.9865, "mean_token_accuracy": 0.8692688047885895, "num_tokens": 1175203.0, "step": 656 }, { "epoch": 0.10638814670876852, "grad_norm": 40.089820861816406, "learning_rate": 8.937823834196892e-06, "loss": 0.7628, "mean_token_accuracy": 0.9004509150981903, "num_tokens": 1177007.0, "step": 657 }, { "epoch": 0.10655007691684884, "grad_norm": 37.989830017089844, "learning_rate": 8.936204663212436e-06, "loss": 0.8494, "mean_token_accuracy": 0.8887249529361725, "num_tokens": 1178797.0, "step": 658 }, { "epoch": 0.10671200712492916, "grad_norm": 35.25375747680664, "learning_rate": 8.93458549222798e-06, "loss": 0.814, "mean_token_accuracy": 0.9076961874961853, "num_tokens": 1180591.0, "step": 659 }, { "epoch": 0.10687393733300947, "grad_norm": 47.49795913696289, "learning_rate": 8.932966321243523e-06, "loss": 0.8879, "mean_token_accuracy": 0.8851646780967712, "num_tokens": 1182390.0, "step": 660 }, { "epoch": 0.10703586754108979, "grad_norm": 29.93039321899414, "learning_rate": 8.931347150259068e-06, "loss": 0.6396, "mean_token_accuracy": 0.9158540070056915, "num_tokens": 1184187.0, "step": 661 }, { "epoch": 0.10719779774917011, "grad_norm": 39.823307037353516, "learning_rate": 8.929727979274612e-06, "loss": 0.966, "mean_token_accuracy": 0.8834685385227203, "num_tokens": 1185980.0, "step": 662 }, { "epoch": 0.10735972795725042, "grad_norm": 36.069766998291016, "learning_rate": 8.928108808290155e-06, "loss": 0.7925, "mean_token_accuracy": 0.8881153464317322, "num_tokens": 1187769.0, "step": 663 }, { "epoch": 0.10752165816533074, "grad_norm": 38.42499923706055, "learning_rate": 8.9264896373057e-06, "loss": 0.7749, "mean_token_accuracy": 0.882578045129776, "num_tokens": 1189562.0, "step": 664 }, { "epoch": 0.10768358837341106, "grad_norm": 43.1353874206543, "learning_rate": 8.924870466321244e-06, "loss": 0.8533, "mean_token_accuracy": 0.8968591690063477, "num_tokens": 1191355.0, "step": 665 }, { "epoch": 0.10784551858149138, "grad_norm": 26.2728214263916, "learning_rate": 8.923251295336788e-06, "loss": 0.5897, "mean_token_accuracy": 0.9280426800251007, "num_tokens": 1193145.0, "step": 666 }, { "epoch": 0.10800744878957169, "grad_norm": 37.30793762207031, "learning_rate": 8.921632124352331e-06, "loss": 0.8028, "mean_token_accuracy": 0.8986742198467255, "num_tokens": 1194933.0, "step": 667 }, { "epoch": 0.10816937899765201, "grad_norm": 38.652015686035156, "learning_rate": 8.920012953367877e-06, "loss": 0.7444, "mean_token_accuracy": 0.8946167230606079, "num_tokens": 1196729.0, "step": 668 }, { "epoch": 0.10833130920573233, "grad_norm": 34.655982971191406, "learning_rate": 8.91839378238342e-06, "loss": 0.8399, "mean_token_accuracy": 0.8971447348594666, "num_tokens": 1198513.0, "step": 669 }, { "epoch": 0.10849323941381264, "grad_norm": 41.78131866455078, "learning_rate": 8.916774611398964e-06, "loss": 0.9931, "mean_token_accuracy": 0.8818943798542023, "num_tokens": 1200323.0, "step": 670 }, { "epoch": 0.10865516962189296, "grad_norm": 35.171539306640625, "learning_rate": 8.915155440414507e-06, "loss": 0.9169, "mean_token_accuracy": 0.8848682343959808, "num_tokens": 1202113.0, "step": 671 }, { "epoch": 0.10881709982997329, "grad_norm": 33.28158950805664, "learning_rate": 8.913536269430053e-06, "loss": 0.8622, "mean_token_accuracy": 0.8812186121940613, "num_tokens": 1203903.0, "step": 672 }, { "epoch": 0.10897903003805359, "grad_norm": 38.598793029785156, "learning_rate": 8.911917098445596e-06, "loss": 0.8656, "mean_token_accuracy": 0.886401355266571, "num_tokens": 1205697.0, "step": 673 }, { "epoch": 0.10914096024613391, "grad_norm": 29.30876350402832, "learning_rate": 8.91029792746114e-06, "loss": 0.7139, "mean_token_accuracy": 0.9107498526573181, "num_tokens": 1207478.0, "step": 674 }, { "epoch": 0.10930289045421424, "grad_norm": 27.58310890197754, "learning_rate": 8.908678756476683e-06, "loss": 0.6573, "mean_token_accuracy": 0.9163265228271484, "num_tokens": 1209277.0, "step": 675 }, { "epoch": 0.10946482066229456, "grad_norm": 29.989320755004883, "learning_rate": 8.907059585492229e-06, "loss": 0.8035, "mean_token_accuracy": 0.9001165926456451, "num_tokens": 1211070.0, "step": 676 }, { "epoch": 0.10962675087037486, "grad_norm": 38.820762634277344, "learning_rate": 8.905440414507774e-06, "loss": 0.888, "mean_token_accuracy": 0.893869936466217, "num_tokens": 1212856.0, "step": 677 }, { "epoch": 0.10978868107845519, "grad_norm": 28.231014251708984, "learning_rate": 8.903821243523318e-06, "loss": 0.6537, "mean_token_accuracy": 0.9022297263145447, "num_tokens": 1214646.0, "step": 678 }, { "epoch": 0.10995061128653551, "grad_norm": 26.858198165893555, "learning_rate": 8.902202072538861e-06, "loss": 0.6189, "mean_token_accuracy": 0.9097744226455688, "num_tokens": 1216424.0, "step": 679 }, { "epoch": 0.11011254149461581, "grad_norm": 35.589088439941406, "learning_rate": 8.900582901554405e-06, "loss": 0.8308, "mean_token_accuracy": 0.8960067927837372, "num_tokens": 1218215.0, "step": 680 }, { "epoch": 0.11027447170269614, "grad_norm": 37.692317962646484, "learning_rate": 8.89896373056995e-06, "loss": 0.9541, "mean_token_accuracy": 0.889007955789566, "num_tokens": 1220007.0, "step": 681 }, { "epoch": 0.11043640191077646, "grad_norm": 32.157203674316406, "learning_rate": 8.897344559585494e-06, "loss": 0.7772, "mean_token_accuracy": 0.8979166448116302, "num_tokens": 1221803.0, "step": 682 }, { "epoch": 0.11059833211885678, "grad_norm": 27.846221923828125, "learning_rate": 8.895725388601037e-06, "loss": 0.7194, "mean_token_accuracy": 0.9058738052845001, "num_tokens": 1223593.0, "step": 683 }, { "epoch": 0.11076026232693709, "grad_norm": 31.302717208862305, "learning_rate": 8.89410621761658e-06, "loss": 0.848, "mean_token_accuracy": 0.8885755240917206, "num_tokens": 1225374.0, "step": 684 }, { "epoch": 0.11092219253501741, "grad_norm": 34.415802001953125, "learning_rate": 8.892487046632126e-06, "loss": 0.8123, "mean_token_accuracy": 0.8927008211612701, "num_tokens": 1227176.0, "step": 685 }, { "epoch": 0.11108412274309773, "grad_norm": 38.12055587768555, "learning_rate": 8.89086787564767e-06, "loss": 0.8681, "mean_token_accuracy": 0.8892121016979218, "num_tokens": 1228968.0, "step": 686 }, { "epoch": 0.11124605295117804, "grad_norm": 39.62919616699219, "learning_rate": 8.889248704663213e-06, "loss": 1.0607, "mean_token_accuracy": 0.8866875171661377, "num_tokens": 1230764.0, "step": 687 }, { "epoch": 0.11140798315925836, "grad_norm": 29.010278701782227, "learning_rate": 8.887629533678757e-06, "loss": 0.7236, "mean_token_accuracy": 0.9052418172359467, "num_tokens": 1232550.0, "step": 688 }, { "epoch": 0.11156991336733868, "grad_norm": 28.291397094726562, "learning_rate": 8.886010362694302e-06, "loss": 0.6895, "mean_token_accuracy": 0.9096193909645081, "num_tokens": 1234339.0, "step": 689 }, { "epoch": 0.11173184357541899, "grad_norm": 34.68824768066406, "learning_rate": 8.884391191709846e-06, "loss": 0.8723, "mean_token_accuracy": 0.9032531678676605, "num_tokens": 1236130.0, "step": 690 }, { "epoch": 0.11189377378349931, "grad_norm": 38.88625717163086, "learning_rate": 8.88277202072539e-06, "loss": 1.1191, "mean_token_accuracy": 0.8669329583644867, "num_tokens": 1237919.0, "step": 691 }, { "epoch": 0.11205570399157963, "grad_norm": 28.062644958496094, "learning_rate": 8.881152849740935e-06, "loss": 0.6811, "mean_token_accuracy": 0.9083270728588104, "num_tokens": 1239704.0, "step": 692 }, { "epoch": 0.11221763419965995, "grad_norm": 26.984647750854492, "learning_rate": 8.879533678756478e-06, "loss": 0.6311, "mean_token_accuracy": 0.9071076214313507, "num_tokens": 1241495.0, "step": 693 }, { "epoch": 0.11237956440774026, "grad_norm": 33.64918518066406, "learning_rate": 8.877914507772022e-06, "loss": 0.9416, "mean_token_accuracy": 0.9012126624584198, "num_tokens": 1243292.0, "step": 694 }, { "epoch": 0.11254149461582058, "grad_norm": 33.74709701538086, "learning_rate": 8.876295336787565e-06, "loss": 0.8171, "mean_token_accuracy": 0.88856241106987, "num_tokens": 1245091.0, "step": 695 }, { "epoch": 0.1127034248239009, "grad_norm": 32.96370315551758, "learning_rate": 8.87467616580311e-06, "loss": 0.7269, "mean_token_accuracy": 0.8838293552398682, "num_tokens": 1246887.0, "step": 696 }, { "epoch": 0.11286535503198121, "grad_norm": 27.40631675720215, "learning_rate": 8.873056994818654e-06, "loss": 0.6132, "mean_token_accuracy": 0.91847363114357, "num_tokens": 1248681.0, "step": 697 }, { "epoch": 0.11302728524006153, "grad_norm": 31.34441375732422, "learning_rate": 8.871437823834198e-06, "loss": 0.8418, "mean_token_accuracy": 0.8975626528263092, "num_tokens": 1250476.0, "step": 698 }, { "epoch": 0.11318921544814185, "grad_norm": 38.183109283447266, "learning_rate": 8.869818652849741e-06, "loss": 0.8731, "mean_token_accuracy": 0.8779591619968414, "num_tokens": 1252273.0, "step": 699 }, { "epoch": 0.11335114565622217, "grad_norm": 33.806419372558594, "learning_rate": 8.868199481865287e-06, "loss": 0.662, "mean_token_accuracy": 0.9022125005722046, "num_tokens": 1254070.0, "step": 700 }, { "epoch": 0.11351307586430248, "grad_norm": 30.135028839111328, "learning_rate": 8.86658031088083e-06, "loss": 0.6622, "mean_token_accuracy": 0.9025118350982666, "num_tokens": 1255867.0, "step": 701 }, { "epoch": 0.1136750060723828, "grad_norm": 29.978445053100586, "learning_rate": 8.864961139896374e-06, "loss": 0.7296, "mean_token_accuracy": 0.9041289985179901, "num_tokens": 1257650.0, "step": 702 }, { "epoch": 0.11383693628046312, "grad_norm": 30.304397583007812, "learning_rate": 8.863341968911917e-06, "loss": 0.7728, "mean_token_accuracy": 0.9004205167293549, "num_tokens": 1259443.0, "step": 703 }, { "epoch": 0.11399886648854343, "grad_norm": 26.6141357421875, "learning_rate": 8.861722797927463e-06, "loss": 0.7837, "mean_token_accuracy": 0.9057525396347046, "num_tokens": 1261231.0, "step": 704 }, { "epoch": 0.11416079669662375, "grad_norm": 45.95237731933594, "learning_rate": 8.860103626943006e-06, "loss": 1.2493, "mean_token_accuracy": 0.8576714396476746, "num_tokens": 1263038.0, "step": 705 }, { "epoch": 0.11432272690470408, "grad_norm": 30.297012329101562, "learning_rate": 8.85848445595855e-06, "loss": 0.8052, "mean_token_accuracy": 0.8910098671913147, "num_tokens": 1264835.0, "step": 706 }, { "epoch": 0.1144846571127844, "grad_norm": 28.939964294433594, "learning_rate": 8.856865284974094e-06, "loss": 0.7305, "mean_token_accuracy": 0.9061901271343231, "num_tokens": 1266625.0, "step": 707 }, { "epoch": 0.1146465873208647, "grad_norm": 32.28270721435547, "learning_rate": 8.855246113989639e-06, "loss": 0.8299, "mean_token_accuracy": 0.8883920013904572, "num_tokens": 1268424.0, "step": 708 }, { "epoch": 0.11480851752894503, "grad_norm": 32.24588394165039, "learning_rate": 8.853626943005182e-06, "loss": 0.8408, "mean_token_accuracy": 0.8886352479457855, "num_tokens": 1270223.0, "step": 709 }, { "epoch": 0.11497044773702535, "grad_norm": 38.89261245727539, "learning_rate": 8.852007772020726e-06, "loss": 0.7924, "mean_token_accuracy": 0.8886702656745911, "num_tokens": 1272013.0, "step": 710 }, { "epoch": 0.11513237794510565, "grad_norm": 30.0701904296875, "learning_rate": 8.850388601036271e-06, "loss": 0.9498, "mean_token_accuracy": 0.8905204236507416, "num_tokens": 1273808.0, "step": 711 }, { "epoch": 0.11529430815318598, "grad_norm": 33.369205474853516, "learning_rate": 8.848769430051815e-06, "loss": 0.8784, "mean_token_accuracy": 0.8914824426174164, "num_tokens": 1275605.0, "step": 712 }, { "epoch": 0.1154562383612663, "grad_norm": 33.0394287109375, "learning_rate": 8.847150259067358e-06, "loss": 0.9072, "mean_token_accuracy": 0.8923611044883728, "num_tokens": 1277405.0, "step": 713 }, { "epoch": 0.1156181685693466, "grad_norm": 21.442638397216797, "learning_rate": 8.845531088082902e-06, "loss": 0.5428, "mean_token_accuracy": 0.9266505837440491, "num_tokens": 1279190.0, "step": 714 }, { "epoch": 0.11578009877742693, "grad_norm": 38.59785842895508, "learning_rate": 8.843911917098447e-06, "loss": 0.9538, "mean_token_accuracy": 0.8735786378383636, "num_tokens": 1280986.0, "step": 715 }, { "epoch": 0.11594202898550725, "grad_norm": 37.1681022644043, "learning_rate": 8.842292746113991e-06, "loss": 0.8548, "mean_token_accuracy": 0.8747104108333588, "num_tokens": 1282786.0, "step": 716 }, { "epoch": 0.11610395919358757, "grad_norm": 30.42218589782715, "learning_rate": 8.840673575129535e-06, "loss": 0.8087, "mean_token_accuracy": 0.8992751240730286, "num_tokens": 1284575.0, "step": 717 }, { "epoch": 0.11626588940166788, "grad_norm": 41.17758560180664, "learning_rate": 8.839054404145078e-06, "loss": 0.995, "mean_token_accuracy": 0.8808045089244843, "num_tokens": 1286372.0, "step": 718 }, { "epoch": 0.1164278196097482, "grad_norm": 28.876922607421875, "learning_rate": 8.837435233160623e-06, "loss": 0.7285, "mean_token_accuracy": 0.8978873491287231, "num_tokens": 1288168.0, "step": 719 }, { "epoch": 0.11658974981782852, "grad_norm": 32.4138069152832, "learning_rate": 8.835816062176167e-06, "loss": 0.8988, "mean_token_accuracy": 0.9073280394077301, "num_tokens": 1289960.0, "step": 720 }, { "epoch": 0.11675168002590883, "grad_norm": 36.10765075683594, "learning_rate": 8.83419689119171e-06, "loss": 0.8341, "mean_token_accuracy": 0.9030250310897827, "num_tokens": 1291760.0, "step": 721 }, { "epoch": 0.11691361023398915, "grad_norm": 32.35591125488281, "learning_rate": 8.832577720207254e-06, "loss": 0.9178, "mean_token_accuracy": 0.8790132105350494, "num_tokens": 1293545.0, "step": 722 }, { "epoch": 0.11707554044206947, "grad_norm": 30.638582229614258, "learning_rate": 8.8309585492228e-06, "loss": 0.6793, "mean_token_accuracy": 0.8979054987430573, "num_tokens": 1295341.0, "step": 723 }, { "epoch": 0.11723747065014979, "grad_norm": 31.42913246154785, "learning_rate": 8.829339378238343e-06, "loss": 0.8654, "mean_token_accuracy": 0.8879020512104034, "num_tokens": 1297129.0, "step": 724 }, { "epoch": 0.1173994008582301, "grad_norm": 34.921226501464844, "learning_rate": 8.827720207253887e-06, "loss": 0.8094, "mean_token_accuracy": 0.895411878824234, "num_tokens": 1298918.0, "step": 725 }, { "epoch": 0.11756133106631042, "grad_norm": 33.83235168457031, "learning_rate": 8.826101036269432e-06, "loss": 0.787, "mean_token_accuracy": 0.8928516805171967, "num_tokens": 1300710.0, "step": 726 }, { "epoch": 0.11772326127439074, "grad_norm": 35.45464324951172, "learning_rate": 8.824481865284975e-06, "loss": 0.9478, "mean_token_accuracy": 0.8833283185958862, "num_tokens": 1302505.0, "step": 727 }, { "epoch": 0.11788519148247105, "grad_norm": 23.681251525878906, "learning_rate": 8.822862694300519e-06, "loss": 0.6374, "mean_token_accuracy": 0.9216992855072021, "num_tokens": 1304298.0, "step": 728 }, { "epoch": 0.11804712169055137, "grad_norm": 28.124034881591797, "learning_rate": 8.821243523316063e-06, "loss": 0.7163, "mean_token_accuracy": 0.9000459313392639, "num_tokens": 1306090.0, "step": 729 }, { "epoch": 0.11820905189863169, "grad_norm": 30.2061824798584, "learning_rate": 8.819624352331608e-06, "loss": 0.7736, "mean_token_accuracy": 0.9000459313392639, "num_tokens": 1307882.0, "step": 730 }, { "epoch": 0.11837098210671201, "grad_norm": 27.444787979125977, "learning_rate": 8.818005181347152e-06, "loss": 0.6383, "mean_token_accuracy": 0.9054268598556519, "num_tokens": 1309669.0, "step": 731 }, { "epoch": 0.11853291231479232, "grad_norm": 30.661623001098633, "learning_rate": 8.816386010362695e-06, "loss": 0.6903, "mean_token_accuracy": 0.9084208011627197, "num_tokens": 1311464.0, "step": 732 }, { "epoch": 0.11869484252287264, "grad_norm": 18.264039993286133, "learning_rate": 8.814766839378239e-06, "loss": 0.5503, "mean_token_accuracy": 0.9261902272701263, "num_tokens": 1313247.0, "step": 733 }, { "epoch": 0.11885677273095296, "grad_norm": 28.01018714904785, "learning_rate": 8.813147668393784e-06, "loss": 0.6969, "mean_token_accuracy": 0.9010291695594788, "num_tokens": 1315042.0, "step": 734 }, { "epoch": 0.11901870293903327, "grad_norm": 25.607126235961914, "learning_rate": 8.811528497409328e-06, "loss": 0.7184, "mean_token_accuracy": 0.9112319052219391, "num_tokens": 1316836.0, "step": 735 }, { "epoch": 0.1191806331471136, "grad_norm": 44.4820556640625, "learning_rate": 8.809909326424871e-06, "loss": 0.9638, "mean_token_accuracy": 0.8848186731338501, "num_tokens": 1318634.0, "step": 736 }, { "epoch": 0.11934256335519391, "grad_norm": 38.70259475708008, "learning_rate": 8.808290155440415e-06, "loss": 0.9395, "mean_token_accuracy": 0.8745207190513611, "num_tokens": 1320430.0, "step": 737 }, { "epoch": 0.11950449356327422, "grad_norm": 39.5324592590332, "learning_rate": 8.80667098445596e-06, "loss": 0.9776, "mean_token_accuracy": 0.8870314955711365, "num_tokens": 1322225.0, "step": 738 }, { "epoch": 0.11966642377135454, "grad_norm": 29.941497802734375, "learning_rate": 8.805051813471504e-06, "loss": 0.6261, "mean_token_accuracy": 0.89882692694664, "num_tokens": 1324014.0, "step": 739 }, { "epoch": 0.11982835397943487, "grad_norm": 38.259830474853516, "learning_rate": 8.803432642487047e-06, "loss": 0.6989, "mean_token_accuracy": 0.9024127423763275, "num_tokens": 1325812.0, "step": 740 }, { "epoch": 0.11999028418751519, "grad_norm": 32.23947525024414, "learning_rate": 8.80181347150259e-06, "loss": 0.7983, "mean_token_accuracy": 0.8921913802623749, "num_tokens": 1327602.0, "step": 741 }, { "epoch": 0.1201522143955955, "grad_norm": 26.683002471923828, "learning_rate": 8.800194300518136e-06, "loss": 0.6095, "mean_token_accuracy": 0.916133850812912, "num_tokens": 1329388.0, "step": 742 }, { "epoch": 0.12031414460367582, "grad_norm": 31.33710289001465, "learning_rate": 8.79857512953368e-06, "loss": 0.7804, "mean_token_accuracy": 0.8904584050178528, "num_tokens": 1331174.0, "step": 743 }, { "epoch": 0.12047607481175614, "grad_norm": 36.16375732421875, "learning_rate": 8.796955958549223e-06, "loss": 0.9147, "mean_token_accuracy": 0.8830054998397827, "num_tokens": 1332968.0, "step": 744 }, { "epoch": 0.12063800501983644, "grad_norm": 58.49775695800781, "learning_rate": 8.795336787564769e-06, "loss": 0.8585, "mean_token_accuracy": 0.8914824426174164, "num_tokens": 1334765.0, "step": 745 }, { "epoch": 0.12079993522791677, "grad_norm": 38.21027374267578, "learning_rate": 8.793717616580312e-06, "loss": 0.8303, "mean_token_accuracy": 0.8922150731086731, "num_tokens": 1336547.0, "step": 746 }, { "epoch": 0.12096186543599709, "grad_norm": 40.312984466552734, "learning_rate": 8.792098445595856e-06, "loss": 1.0097, "mean_token_accuracy": 0.8784005641937256, "num_tokens": 1338347.0, "step": 747 }, { "epoch": 0.12112379564407741, "grad_norm": 33.33052062988281, "learning_rate": 8.7904792746114e-06, "loss": 0.8333, "mean_token_accuracy": 0.8948729038238525, "num_tokens": 1340144.0, "step": 748 }, { "epoch": 0.12128572585215772, "grad_norm": 39.75764846801758, "learning_rate": 8.788860103626945e-06, "loss": 1.027, "mean_token_accuracy": 0.8861840069293976, "num_tokens": 1341937.0, "step": 749 }, { "epoch": 0.12144765606023804, "grad_norm": 27.47093391418457, "learning_rate": 8.787240932642488e-06, "loss": 0.6893, "mean_token_accuracy": 0.8968908786773682, "num_tokens": 1343731.0, "step": 750 }, { "epoch": 0.12160958626831836, "grad_norm": 37.69817352294922, "learning_rate": 8.785621761658032e-06, "loss": 1.0947, "mean_token_accuracy": 0.8614116609096527, "num_tokens": 1345539.0, "step": 751 }, { "epoch": 0.12177151647639867, "grad_norm": 31.21765899658203, "learning_rate": 8.784002590673575e-06, "loss": 0.7884, "mean_token_accuracy": 0.8887874782085419, "num_tokens": 1347320.0, "step": 752 }, { "epoch": 0.12193344668447899, "grad_norm": 48.6146125793457, "learning_rate": 8.78238341968912e-06, "loss": 0.9298, "mean_token_accuracy": 0.8782394230365753, "num_tokens": 1349111.0, "step": 753 }, { "epoch": 0.12209537689255931, "grad_norm": 34.6551513671875, "learning_rate": 8.780764248704664e-06, "loss": 0.9379, "mean_token_accuracy": 0.8879284858703613, "num_tokens": 1350908.0, "step": 754 }, { "epoch": 0.12225730710063963, "grad_norm": 40.05681610107422, "learning_rate": 8.779145077720208e-06, "loss": 0.9149, "mean_token_accuracy": 0.8786833882331848, "num_tokens": 1352708.0, "step": 755 }, { "epoch": 0.12241923730871994, "grad_norm": 36.126956939697266, "learning_rate": 8.777525906735751e-06, "loss": 0.9102, "mean_token_accuracy": 0.8810479640960693, "num_tokens": 1354497.0, "step": 756 }, { "epoch": 0.12258116751680026, "grad_norm": 26.102785110473633, "learning_rate": 8.775906735751297e-06, "loss": 0.697, "mean_token_accuracy": 0.9042248427867889, "num_tokens": 1356289.0, "step": 757 }, { "epoch": 0.12274309772488058, "grad_norm": 21.950023651123047, "learning_rate": 8.77428756476684e-06, "loss": 0.636, "mean_token_accuracy": 0.9039300382137299, "num_tokens": 1358082.0, "step": 758 }, { "epoch": 0.12290502793296089, "grad_norm": 27.387094497680664, "learning_rate": 8.772668393782384e-06, "loss": 0.7485, "mean_token_accuracy": 0.8994036912918091, "num_tokens": 1359872.0, "step": 759 }, { "epoch": 0.12306695814104121, "grad_norm": 23.670595169067383, "learning_rate": 8.771049222797927e-06, "loss": 0.6562, "mean_token_accuracy": 0.920149177312851, "num_tokens": 1361659.0, "step": 760 }, { "epoch": 0.12322888834912153, "grad_norm": 34.474918365478516, "learning_rate": 8.769430051813473e-06, "loss": 0.7429, "mean_token_accuracy": 0.8982540667057037, "num_tokens": 1363465.0, "step": 761 }, { "epoch": 0.12339081855720184, "grad_norm": 42.511199951171875, "learning_rate": 8.767810880829016e-06, "loss": 1.0044, "mean_token_accuracy": 0.8815292119979858, "num_tokens": 1365255.0, "step": 762 }, { "epoch": 0.12355274876528216, "grad_norm": 25.6155948638916, "learning_rate": 8.76619170984456e-06, "loss": 0.6854, "mean_token_accuracy": 0.9093892574310303, "num_tokens": 1367043.0, "step": 763 }, { "epoch": 0.12371467897336248, "grad_norm": 35.98655319213867, "learning_rate": 8.764572538860105e-06, "loss": 1.0008, "mean_token_accuracy": 0.8880626261234283, "num_tokens": 1368841.0, "step": 764 }, { "epoch": 0.1238766091814428, "grad_norm": 34.600765228271484, "learning_rate": 8.762953367875649e-06, "loss": 0.7616, "mean_token_accuracy": 0.886418491601944, "num_tokens": 1370635.0, "step": 765 }, { "epoch": 0.12403853938952311, "grad_norm": 24.08222770690918, "learning_rate": 8.761334196891192e-06, "loss": 0.6396, "mean_token_accuracy": 0.9126984179019928, "num_tokens": 1372422.0, "step": 766 }, { "epoch": 0.12420046959760343, "grad_norm": 25.452463150024414, "learning_rate": 8.759715025906736e-06, "loss": 0.6791, "mean_token_accuracy": 0.9114472568035126, "num_tokens": 1374215.0, "step": 767 }, { "epoch": 0.12436239980568375, "grad_norm": 27.962905883789062, "learning_rate": 8.758095854922281e-06, "loss": 0.7791, "mean_token_accuracy": 0.9067831337451935, "num_tokens": 1376006.0, "step": 768 }, { "epoch": 0.12452433001376406, "grad_norm": 29.30245590209961, "learning_rate": 8.756476683937825e-06, "loss": 0.8984, "mean_token_accuracy": 0.8857174217700958, "num_tokens": 1377789.0, "step": 769 }, { "epoch": 0.12468626022184438, "grad_norm": 18.69622230529785, "learning_rate": 8.754857512953368e-06, "loss": 0.5596, "mean_token_accuracy": 0.9188180863857269, "num_tokens": 1379572.0, "step": 770 }, { "epoch": 0.1248481904299247, "grad_norm": 22.967805862426758, "learning_rate": 8.753238341968912e-06, "loss": 0.691, "mean_token_accuracy": 0.9044117629528046, "num_tokens": 1381356.0, "step": 771 }, { "epoch": 0.125010120638005, "grad_norm": 23.452070236206055, "learning_rate": 8.751619170984457e-06, "loss": 0.6444, "mean_token_accuracy": 0.9067532122135162, "num_tokens": 1383136.0, "step": 772 }, { "epoch": 0.12517205084608535, "grad_norm": 34.1168098449707, "learning_rate": 8.750000000000001e-06, "loss": 0.7558, "mean_token_accuracy": 0.8844388723373413, "num_tokens": 1384925.0, "step": 773 }, { "epoch": 0.12533398105416566, "grad_norm": 30.217897415161133, "learning_rate": 8.748380829015545e-06, "loss": 0.7849, "mean_token_accuracy": 0.9047619104385376, "num_tokens": 1386721.0, "step": 774 }, { "epoch": 0.12549591126224596, "grad_norm": 28.14794921875, "learning_rate": 8.746761658031088e-06, "loss": 0.6556, "mean_token_accuracy": 0.9149396419525146, "num_tokens": 1388515.0, "step": 775 }, { "epoch": 0.1256578414703263, "grad_norm": 30.352880477905273, "learning_rate": 8.745142487046633e-06, "loss": 0.7497, "mean_token_accuracy": 0.8929492235183716, "num_tokens": 1390306.0, "step": 776 }, { "epoch": 0.1258197716784066, "grad_norm": 30.50710105895996, "learning_rate": 8.743523316062177e-06, "loss": 0.7603, "mean_token_accuracy": 0.9083270728588104, "num_tokens": 1392091.0, "step": 777 }, { "epoch": 0.1259817018864869, "grad_norm": 28.93914031982422, "learning_rate": 8.74190414507772e-06, "loss": 0.7015, "mean_token_accuracy": 0.9137163758277893, "num_tokens": 1393881.0, "step": 778 }, { "epoch": 0.12614363209456725, "grad_norm": 32.38669204711914, "learning_rate": 8.740284974093264e-06, "loss": 0.8076, "mean_token_accuracy": 0.8885893523693085, "num_tokens": 1395680.0, "step": 779 }, { "epoch": 0.12630556230264756, "grad_norm": 30.107797622680664, "learning_rate": 8.73866580310881e-06, "loss": 0.6413, "mean_token_accuracy": 0.9154411852359772, "num_tokens": 1397464.0, "step": 780 }, { "epoch": 0.12646749251072786, "grad_norm": 26.293188095092773, "learning_rate": 8.737046632124353e-06, "loss": 0.6918, "mean_token_accuracy": 0.908279538154602, "num_tokens": 1399258.0, "step": 781 }, { "epoch": 0.1266294227188082, "grad_norm": 35.28606033325195, "learning_rate": 8.735427461139897e-06, "loss": 0.9475, "mean_token_accuracy": 0.8830456137657166, "num_tokens": 1401044.0, "step": 782 }, { "epoch": 0.1267913529268885, "grad_norm": 27.882164001464844, "learning_rate": 8.733808290155442e-06, "loss": 0.6812, "mean_token_accuracy": 0.9055748581886292, "num_tokens": 1402842.0, "step": 783 }, { "epoch": 0.12695328313496884, "grad_norm": 25.17303466796875, "learning_rate": 8.732189119170985e-06, "loss": 0.6765, "mean_token_accuracy": 0.9154636561870575, "num_tokens": 1404626.0, "step": 784 }, { "epoch": 0.12711521334304915, "grad_norm": 26.61556053161621, "learning_rate": 8.730569948186529e-06, "loss": 0.6077, "mean_token_accuracy": 0.9111111164093018, "num_tokens": 1406408.0, "step": 785 }, { "epoch": 0.12727714355112946, "grad_norm": 33.89521408081055, "learning_rate": 8.728950777202073e-06, "loss": 0.7766, "mean_token_accuracy": 0.8943609595298767, "num_tokens": 1408204.0, "step": 786 }, { "epoch": 0.1274390737592098, "grad_norm": 28.257461547851562, "learning_rate": 8.727331606217618e-06, "loss": 0.7052, "mean_token_accuracy": 0.9011698365211487, "num_tokens": 1409998.0, "step": 787 }, { "epoch": 0.1276010039672901, "grad_norm": 34.67573547363281, "learning_rate": 8.725712435233162e-06, "loss": 0.8196, "mean_token_accuracy": 0.8919222056865692, "num_tokens": 1411787.0, "step": 788 }, { "epoch": 0.1277629341753704, "grad_norm": 35.834556579589844, "learning_rate": 8.724093264248705e-06, "loss": 0.8941, "mean_token_accuracy": 0.8900547325611115, "num_tokens": 1413580.0, "step": 789 }, { "epoch": 0.12792486438345074, "grad_norm": 38.697296142578125, "learning_rate": 8.722474093264249e-06, "loss": 0.9277, "mean_token_accuracy": 0.8904455006122589, "num_tokens": 1415375.0, "step": 790 }, { "epoch": 0.12808679459153105, "grad_norm": 39.40624237060547, "learning_rate": 8.720854922279794e-06, "loss": 1.0274, "mean_token_accuracy": 0.877743273973465, "num_tokens": 1417165.0, "step": 791 }, { "epoch": 0.12824872479961136, "grad_norm": 28.56023406982422, "learning_rate": 8.719235751295338e-06, "loss": 0.748, "mean_token_accuracy": 0.9094496071338654, "num_tokens": 1418962.0, "step": 792 }, { "epoch": 0.1284106550076917, "grad_norm": 29.009428024291992, "learning_rate": 8.717616580310881e-06, "loss": 0.754, "mean_token_accuracy": 0.8983812630176544, "num_tokens": 1420758.0, "step": 793 }, { "epoch": 0.128572585215772, "grad_norm": 32.4525260925293, "learning_rate": 8.715997409326425e-06, "loss": 0.723, "mean_token_accuracy": 0.9010837972164154, "num_tokens": 1422553.0, "step": 794 }, { "epoch": 0.1287345154238523, "grad_norm": 29.586387634277344, "learning_rate": 8.71437823834197e-06, "loss": 0.7636, "mean_token_accuracy": 0.9029796719551086, "num_tokens": 1424333.0, "step": 795 }, { "epoch": 0.12889644563193264, "grad_norm": 25.813024520874023, "learning_rate": 8.712759067357514e-06, "loss": 0.839, "mean_token_accuracy": 0.8931864202022552, "num_tokens": 1426126.0, "step": 796 }, { "epoch": 0.12905837584001295, "grad_norm": 27.574996948242188, "learning_rate": 8.711139896373057e-06, "loss": 0.6717, "mean_token_accuracy": 0.8982804119586945, "num_tokens": 1427913.0, "step": 797 }, { "epoch": 0.12922030604809326, "grad_norm": 31.408742904663086, "learning_rate": 8.7095207253886e-06, "loss": 0.9606, "mean_token_accuracy": 0.871626079082489, "num_tokens": 1429705.0, "step": 798 }, { "epoch": 0.1293822362561736, "grad_norm": 27.938873291015625, "learning_rate": 8.707901554404146e-06, "loss": 0.6794, "mean_token_accuracy": 0.8991718590259552, "num_tokens": 1431495.0, "step": 799 }, { "epoch": 0.1295441664642539, "grad_norm": 35.222267150878906, "learning_rate": 8.70628238341969e-06, "loss": 0.9042, "mean_token_accuracy": 0.8914916217327118, "num_tokens": 1433283.0, "step": 800 }, { "epoch": 0.12970609667233424, "grad_norm": 27.772075653076172, "learning_rate": 8.704663212435233e-06, "loss": 0.7787, "mean_token_accuracy": 0.8973808586597443, "num_tokens": 1435068.0, "step": 801 }, { "epoch": 0.12986802688041454, "grad_norm": 26.73502540588379, "learning_rate": 8.703044041450779e-06, "loss": 0.7193, "mean_token_accuracy": 0.8992805778980255, "num_tokens": 1436858.0, "step": 802 }, { "epoch": 0.13002995708849485, "grad_norm": 41.384403228759766, "learning_rate": 8.701424870466322e-06, "loss": 1.1225, "mean_token_accuracy": 0.8648176491260529, "num_tokens": 1438651.0, "step": 803 }, { "epoch": 0.1301918872965752, "grad_norm": 29.48305892944336, "learning_rate": 8.699805699481866e-06, "loss": 0.735, "mean_token_accuracy": 0.8992060720920563, "num_tokens": 1440439.0, "step": 804 }, { "epoch": 0.1303538175046555, "grad_norm": 36.73165512084961, "learning_rate": 8.69818652849741e-06, "loss": 0.8788, "mean_token_accuracy": 0.8806320428848267, "num_tokens": 1442228.0, "step": 805 }, { "epoch": 0.1305157477127358, "grad_norm": 24.63804054260254, "learning_rate": 8.696567357512955e-06, "loss": 0.6966, "mean_token_accuracy": 0.9103121757507324, "num_tokens": 1444018.0, "step": 806 }, { "epoch": 0.13067767792081614, "grad_norm": 31.458341598510742, "learning_rate": 8.694948186528498e-06, "loss": 0.8644, "mean_token_accuracy": 0.8865043222904205, "num_tokens": 1445811.0, "step": 807 }, { "epoch": 0.13083960812889645, "grad_norm": 34.828094482421875, "learning_rate": 8.693329015544042e-06, "loss": 0.9492, "mean_token_accuracy": 0.8981518447399139, "num_tokens": 1447607.0, "step": 808 }, { "epoch": 0.13100153833697675, "grad_norm": 34.25908279418945, "learning_rate": 8.691709844559585e-06, "loss": 0.7753, "mean_token_accuracy": 0.8959731459617615, "num_tokens": 1449417.0, "step": 809 }, { "epoch": 0.1311634685450571, "grad_norm": 36.357547760009766, "learning_rate": 8.69009067357513e-06, "loss": 1.0524, "mean_token_accuracy": 0.8840375542640686, "num_tokens": 1451221.0, "step": 810 }, { "epoch": 0.1313253987531374, "grad_norm": 39.3553352355957, "learning_rate": 8.688471502590674e-06, "loss": 1.0092, "mean_token_accuracy": 0.8818948864936829, "num_tokens": 1453005.0, "step": 811 }, { "epoch": 0.1314873289612177, "grad_norm": 27.179302215576172, "learning_rate": 8.686852331606218e-06, "loss": 0.7451, "mean_token_accuracy": 0.8962962925434113, "num_tokens": 1454796.0, "step": 812 }, { "epoch": 0.13164925916929804, "grad_norm": 25.398508071899414, "learning_rate": 8.685233160621761e-06, "loss": 0.7274, "mean_token_accuracy": 0.8970921039581299, "num_tokens": 1456590.0, "step": 813 }, { "epoch": 0.13181118937737835, "grad_norm": 32.33420181274414, "learning_rate": 8.683613989637307e-06, "loss": 0.8641, "mean_token_accuracy": 0.9021242558956146, "num_tokens": 1458389.0, "step": 814 }, { "epoch": 0.13197311958545865, "grad_norm": 27.961353302001953, "learning_rate": 8.68199481865285e-06, "loss": 0.802, "mean_token_accuracy": 0.890135258436203, "num_tokens": 1460174.0, "step": 815 }, { "epoch": 0.132135049793539, "grad_norm": 17.271499633789062, "learning_rate": 8.680375647668394e-06, "loss": 0.5451, "mean_token_accuracy": 0.924217939376831, "num_tokens": 1461963.0, "step": 816 }, { "epoch": 0.1322969800016193, "grad_norm": 29.830921173095703, "learning_rate": 8.678756476683938e-06, "loss": 0.7147, "mean_token_accuracy": 0.9098977446556091, "num_tokens": 1463763.0, "step": 817 }, { "epoch": 0.13245891020969963, "grad_norm": 28.314706802368164, "learning_rate": 8.677137305699483e-06, "loss": 0.7503, "mean_token_accuracy": 0.891791820526123, "num_tokens": 1465552.0, "step": 818 }, { "epoch": 0.13262084041777994, "grad_norm": 29.0943603515625, "learning_rate": 8.675518134715026e-06, "loss": 0.7085, "mean_token_accuracy": 0.9017763137817383, "num_tokens": 1467340.0, "step": 819 }, { "epoch": 0.13278277062586025, "grad_norm": 32.64145278930664, "learning_rate": 8.67389896373057e-06, "loss": 0.9305, "mean_token_accuracy": 0.8762452006340027, "num_tokens": 1469132.0, "step": 820 }, { "epoch": 0.13294470083394058, "grad_norm": 22.297565460205078, "learning_rate": 8.672279792746115e-06, "loss": 0.6083, "mean_token_accuracy": 0.9077786207199097, "num_tokens": 1470915.0, "step": 821 }, { "epoch": 0.1331066310420209, "grad_norm": 23.34813117980957, "learning_rate": 8.670660621761659e-06, "loss": 0.701, "mean_token_accuracy": 0.9093756973743439, "num_tokens": 1472692.0, "step": 822 }, { "epoch": 0.1332685612501012, "grad_norm": 34.98750305175781, "learning_rate": 8.669041450777202e-06, "loss": 0.9331, "mean_token_accuracy": 0.8653196096420288, "num_tokens": 1474494.0, "step": 823 }, { "epoch": 0.13343049145818153, "grad_norm": 29.603885650634766, "learning_rate": 8.667422279792746e-06, "loss": 0.7147, "mean_token_accuracy": 0.9051044583320618, "num_tokens": 1476280.0, "step": 824 }, { "epoch": 0.13359242166626184, "grad_norm": 33.01629638671875, "learning_rate": 8.665803108808291e-06, "loss": 0.756, "mean_token_accuracy": 0.8935846984386444, "num_tokens": 1478073.0, "step": 825 }, { "epoch": 0.13375435187434215, "grad_norm": 32.65421676635742, "learning_rate": 8.664183937823835e-06, "loss": 0.8832, "mean_token_accuracy": 0.8876811861991882, "num_tokens": 1479861.0, "step": 826 }, { "epoch": 0.13391628208242248, "grad_norm": 24.05576515197754, "learning_rate": 8.662564766839378e-06, "loss": 0.6576, "mean_token_accuracy": 0.9151683151721954, "num_tokens": 1481656.0, "step": 827 }, { "epoch": 0.1340782122905028, "grad_norm": 32.75737762451172, "learning_rate": 8.660945595854922e-06, "loss": 0.7644, "mean_token_accuracy": 0.8908450901508331, "num_tokens": 1483452.0, "step": 828 }, { "epoch": 0.1342401424985831, "grad_norm": 27.679901123046875, "learning_rate": 8.659326424870467e-06, "loss": 0.7004, "mean_token_accuracy": 0.8928079307079315, "num_tokens": 1485244.0, "step": 829 }, { "epoch": 0.13440207270666343, "grad_norm": 21.084152221679688, "learning_rate": 8.657707253886011e-06, "loss": 0.5672, "mean_token_accuracy": 0.9168752431869507, "num_tokens": 1487033.0, "step": 830 }, { "epoch": 0.13456400291474374, "grad_norm": 29.352325439453125, "learning_rate": 8.656088082901555e-06, "loss": 0.6099, "mean_token_accuracy": 0.9055672287940979, "num_tokens": 1488821.0, "step": 831 }, { "epoch": 0.13472593312282408, "grad_norm": 35.51498031616211, "learning_rate": 8.654468911917098e-06, "loss": 0.8169, "mean_token_accuracy": 0.893150269985199, "num_tokens": 1490614.0, "step": 832 }, { "epoch": 0.13488786333090438, "grad_norm": 33.8050422668457, "learning_rate": 8.652849740932643e-06, "loss": 0.8596, "mean_token_accuracy": 0.8993945717811584, "num_tokens": 1492402.0, "step": 833 }, { "epoch": 0.1350497935389847, "grad_norm": 36.920982360839844, "learning_rate": 8.651230569948187e-06, "loss": 0.9361, "mean_token_accuracy": 0.8865518867969513, "num_tokens": 1494186.0, "step": 834 }, { "epoch": 0.13521172374706503, "grad_norm": 31.333059310913086, "learning_rate": 8.64961139896373e-06, "loss": 0.6991, "mean_token_accuracy": 0.9097591638565063, "num_tokens": 1495975.0, "step": 835 }, { "epoch": 0.13537365395514533, "grad_norm": 40.11623001098633, "learning_rate": 8.647992227979274e-06, "loss": 1.2166, "mean_token_accuracy": 0.8759398460388184, "num_tokens": 1497753.0, "step": 836 }, { "epoch": 0.13553558416322564, "grad_norm": 25.671533584594727, "learning_rate": 8.64637305699482e-06, "loss": 0.6483, "mean_token_accuracy": 0.9056722819805145, "num_tokens": 1499541.0, "step": 837 }, { "epoch": 0.13569751437130598, "grad_norm": 37.287193298339844, "learning_rate": 8.644753886010363e-06, "loss": 0.8375, "mean_token_accuracy": 0.8869949579238892, "num_tokens": 1501329.0, "step": 838 }, { "epoch": 0.13585944457938628, "grad_norm": 30.371614456176758, "learning_rate": 8.643134715025907e-06, "loss": 0.7868, "mean_token_accuracy": 0.8889376819133759, "num_tokens": 1503111.0, "step": 839 }, { "epoch": 0.1360213747874666, "grad_norm": 24.413530349731445, "learning_rate": 8.641515544041452e-06, "loss": 0.7351, "mean_token_accuracy": 0.9074074029922485, "num_tokens": 1504893.0, "step": 840 }, { "epoch": 0.13618330499554693, "grad_norm": 31.285934448242188, "learning_rate": 8.639896373056996e-06, "loss": 0.726, "mean_token_accuracy": 0.8945697247982025, "num_tokens": 1506680.0, "step": 841 }, { "epoch": 0.13634523520362724, "grad_norm": 32.2459831237793, "learning_rate": 8.638277202072539e-06, "loss": 0.9052, "mean_token_accuracy": 0.8897615373134613, "num_tokens": 1508482.0, "step": 842 }, { "epoch": 0.13650716541170754, "grad_norm": 37.652164459228516, "learning_rate": 8.636658031088083e-06, "loss": 1.0096, "mean_token_accuracy": 0.8858367800712585, "num_tokens": 1510283.0, "step": 843 }, { "epoch": 0.13666909561978788, "grad_norm": 31.184146881103516, "learning_rate": 8.635038860103628e-06, "loss": 0.881, "mean_token_accuracy": 0.8902844190597534, "num_tokens": 1512077.0, "step": 844 }, { "epoch": 0.13683102582786819, "grad_norm": 36.45187759399414, "learning_rate": 8.633419689119172e-06, "loss": 0.9082, "mean_token_accuracy": 0.8785934746265411, "num_tokens": 1513877.0, "step": 845 }, { "epoch": 0.1369929560359485, "grad_norm": 38.46100616455078, "learning_rate": 8.631800518134715e-06, "loss": 0.942, "mean_token_accuracy": 0.8774697184562683, "num_tokens": 1515682.0, "step": 846 }, { "epoch": 0.13715488624402883, "grad_norm": 29.387264251708984, "learning_rate": 8.630181347150259e-06, "loss": 0.7967, "mean_token_accuracy": 0.8912192583084106, "num_tokens": 1517487.0, "step": 847 }, { "epoch": 0.13731681645210914, "grad_norm": 24.65268325805664, "learning_rate": 8.628562176165804e-06, "loss": 0.6277, "mean_token_accuracy": 0.9117217361927032, "num_tokens": 1519271.0, "step": 848 }, { "epoch": 0.13747874666018947, "grad_norm": 29.235944747924805, "learning_rate": 8.626943005181348e-06, "loss": 0.7261, "mean_token_accuracy": 0.8946630656719208, "num_tokens": 1521078.0, "step": 849 }, { "epoch": 0.13764067686826978, "grad_norm": 33.69078826904297, "learning_rate": 8.625323834196891e-06, "loss": 1.0, "mean_token_accuracy": 0.8706004321575165, "num_tokens": 1522875.0, "step": 850 }, { "epoch": 0.1378026070763501, "grad_norm": 29.134723663330078, "learning_rate": 8.623704663212435e-06, "loss": 0.6893, "mean_token_accuracy": 0.9006756246089935, "num_tokens": 1524658.0, "step": 851 }, { "epoch": 0.13796453728443042, "grad_norm": 22.728116989135742, "learning_rate": 8.62208549222798e-06, "loss": 0.6287, "mean_token_accuracy": 0.9011110067367554, "num_tokens": 1526443.0, "step": 852 }, { "epoch": 0.13812646749251073, "grad_norm": 29.707117080688477, "learning_rate": 8.620466321243524e-06, "loss": 0.8523, "mean_token_accuracy": 0.8967532515525818, "num_tokens": 1528227.0, "step": 853 }, { "epoch": 0.13828839770059104, "grad_norm": 17.45728874206543, "learning_rate": 8.618847150259067e-06, "loss": 0.557, "mean_token_accuracy": 0.9195680320262909, "num_tokens": 1530012.0, "step": 854 }, { "epoch": 0.13845032790867137, "grad_norm": 25.25996971130371, "learning_rate": 8.61722797927461e-06, "loss": 0.7139, "mean_token_accuracy": 0.8958131670951843, "num_tokens": 1531812.0, "step": 855 }, { "epoch": 0.13861225811675168, "grad_norm": 23.43341827392578, "learning_rate": 8.615608808290156e-06, "loss": 0.6105, "mean_token_accuracy": 0.8968254029750824, "num_tokens": 1533606.0, "step": 856 }, { "epoch": 0.138774188324832, "grad_norm": 24.679168701171875, "learning_rate": 8.6139896373057e-06, "loss": 0.677, "mean_token_accuracy": 0.9079623520374298, "num_tokens": 1535390.0, "step": 857 }, { "epoch": 0.13893611853291232, "grad_norm": 27.431867599487305, "learning_rate": 8.612370466321243e-06, "loss": 0.6638, "mean_token_accuracy": 0.9074242413043976, "num_tokens": 1537184.0, "step": 858 }, { "epoch": 0.13909804874099263, "grad_norm": 28.906810760498047, "learning_rate": 8.610751295336789e-06, "loss": 0.6913, "mean_token_accuracy": 0.9112429022789001, "num_tokens": 1538977.0, "step": 859 }, { "epoch": 0.13925997894907294, "grad_norm": 37.66145324707031, "learning_rate": 8.609132124352332e-06, "loss": 0.9545, "mean_token_accuracy": 0.8882094025611877, "num_tokens": 1540775.0, "step": 860 }, { "epoch": 0.13942190915715327, "grad_norm": 24.447853088378906, "learning_rate": 8.607512953367876e-06, "loss": 0.6192, "mean_token_accuracy": 0.9074721336364746, "num_tokens": 1542568.0, "step": 861 }, { "epoch": 0.13958383936523358, "grad_norm": 30.687110900878906, "learning_rate": 8.60589378238342e-06, "loss": 0.7871, "mean_token_accuracy": 0.8986208736896515, "num_tokens": 1544366.0, "step": 862 }, { "epoch": 0.1397457695733139, "grad_norm": 32.879188537597656, "learning_rate": 8.604274611398965e-06, "loss": 0.9254, "mean_token_accuracy": 0.8770364820957184, "num_tokens": 1546161.0, "step": 863 }, { "epoch": 0.13990769978139422, "grad_norm": 28.076580047607422, "learning_rate": 8.602655440414508e-06, "loss": 0.7723, "mean_token_accuracy": 0.8982598185539246, "num_tokens": 1547948.0, "step": 864 }, { "epoch": 0.14006962998947453, "grad_norm": 16.121753692626953, "learning_rate": 8.601036269430052e-06, "loss": 0.5469, "mean_token_accuracy": 0.9231182336807251, "num_tokens": 1549733.0, "step": 865 }, { "epoch": 0.14023156019755487, "grad_norm": 28.865304946899414, "learning_rate": 8.599417098445595e-06, "loss": 0.7078, "mean_token_accuracy": 0.8985010981559753, "num_tokens": 1551529.0, "step": 866 }, { "epoch": 0.14039349040563517, "grad_norm": 24.488113403320312, "learning_rate": 8.59779792746114e-06, "loss": 0.6929, "mean_token_accuracy": 0.9079061448574066, "num_tokens": 1553313.0, "step": 867 }, { "epoch": 0.14055542061371548, "grad_norm": 37.51773452758789, "learning_rate": 8.596178756476684e-06, "loss": 0.7625, "mean_token_accuracy": 0.8992283642292023, "num_tokens": 1555113.0, "step": 868 }, { "epoch": 0.14071735082179582, "grad_norm": 42.04418182373047, "learning_rate": 8.594559585492228e-06, "loss": 1.0627, "mean_token_accuracy": 0.8807623982429504, "num_tokens": 1556910.0, "step": 869 }, { "epoch": 0.14087928102987612, "grad_norm": 31.518220901489258, "learning_rate": 8.592940414507773e-06, "loss": 0.7802, "mean_token_accuracy": 0.89775151014328, "num_tokens": 1558696.0, "step": 870 }, { "epoch": 0.14104121123795643, "grad_norm": 29.54262924194336, "learning_rate": 8.591321243523317e-06, "loss": 0.8329, "mean_token_accuracy": 0.9021909236907959, "num_tokens": 1560485.0, "step": 871 }, { "epoch": 0.14120314144603677, "grad_norm": 25.617778778076172, "learning_rate": 8.589702072538862e-06, "loss": 0.6006, "mean_token_accuracy": 0.9239353239536285, "num_tokens": 1562273.0, "step": 872 }, { "epoch": 0.14136507165411707, "grad_norm": 29.190038681030273, "learning_rate": 8.588082901554406e-06, "loss": 0.8591, "mean_token_accuracy": 0.8864051103591919, "num_tokens": 1564066.0, "step": 873 }, { "epoch": 0.14152700186219738, "grad_norm": 27.611671447753906, "learning_rate": 8.58646373056995e-06, "loss": 0.6643, "mean_token_accuracy": 0.8939849734306335, "num_tokens": 1565851.0, "step": 874 }, { "epoch": 0.14168893207027772, "grad_norm": 24.773714065551758, "learning_rate": 8.584844559585493e-06, "loss": 0.6696, "mean_token_accuracy": 0.9076147675514221, "num_tokens": 1567644.0, "step": 875 }, { "epoch": 0.14185086227835803, "grad_norm": 22.59209442138672, "learning_rate": 8.583225388601038e-06, "loss": 0.6446, "mean_token_accuracy": 0.9148893356323242, "num_tokens": 1569438.0, "step": 876 }, { "epoch": 0.14201279248643833, "grad_norm": 25.45064926147461, "learning_rate": 8.581606217616582e-06, "loss": 0.6881, "mean_token_accuracy": 0.9087953269481659, "num_tokens": 1571224.0, "step": 877 }, { "epoch": 0.14217472269451867, "grad_norm": 36.724063873291016, "learning_rate": 8.579987046632125e-06, "loss": 1.0746, "mean_token_accuracy": 0.8841786980628967, "num_tokens": 1573019.0, "step": 878 }, { "epoch": 0.14233665290259898, "grad_norm": 28.60527801513672, "learning_rate": 8.578367875647669e-06, "loss": 0.7566, "mean_token_accuracy": 0.9042792916297913, "num_tokens": 1574811.0, "step": 879 }, { "epoch": 0.1424985831106793, "grad_norm": 31.895648956298828, "learning_rate": 8.576748704663214e-06, "loss": 0.9783, "mean_token_accuracy": 0.8872102200984955, "num_tokens": 1576597.0, "step": 880 }, { "epoch": 0.14266051331875962, "grad_norm": 27.735748291015625, "learning_rate": 8.575129533678758e-06, "loss": 0.7719, "mean_token_accuracy": 0.9045013785362244, "num_tokens": 1578392.0, "step": 881 }, { "epoch": 0.14282244352683993, "grad_norm": 29.527376174926758, "learning_rate": 8.573510362694301e-06, "loss": 0.7771, "mean_token_accuracy": 0.8884373009204865, "num_tokens": 1580173.0, "step": 882 }, { "epoch": 0.14298437373492026, "grad_norm": 30.437082290649414, "learning_rate": 8.571891191709845e-06, "loss": 0.9052, "mean_token_accuracy": 0.8962447047233582, "num_tokens": 1581965.0, "step": 883 }, { "epoch": 0.14314630394300057, "grad_norm": 27.638748168945312, "learning_rate": 8.57027202072539e-06, "loss": 0.7683, "mean_token_accuracy": 0.8946863114833832, "num_tokens": 1583762.0, "step": 884 }, { "epoch": 0.14330823415108088, "grad_norm": 27.933658599853516, "learning_rate": 8.568652849740934e-06, "loss": 0.8711, "mean_token_accuracy": 0.8928240537643433, "num_tokens": 1585553.0, "step": 885 }, { "epoch": 0.1434701643591612, "grad_norm": 38.2045783996582, "learning_rate": 8.567033678756477e-06, "loss": 0.9178, "mean_token_accuracy": 0.8746964335441589, "num_tokens": 1587352.0, "step": 886 }, { "epoch": 0.14363209456724152, "grad_norm": 20.439668655395508, "learning_rate": 8.565414507772023e-06, "loss": 0.5914, "mean_token_accuracy": 0.9223517775535583, "num_tokens": 1589148.0, "step": 887 }, { "epoch": 0.14379402477532183, "grad_norm": 32.88519287109375, "learning_rate": 8.563795336787566e-06, "loss": 0.9117, "mean_token_accuracy": 0.893098384141922, "num_tokens": 1590931.0, "step": 888 }, { "epoch": 0.14395595498340216, "grad_norm": 23.225061416625977, "learning_rate": 8.56217616580311e-06, "loss": 0.6568, "mean_token_accuracy": 0.9044384062290192, "num_tokens": 1592725.0, "step": 889 }, { "epoch": 0.14411788519148247, "grad_norm": 22.49410057067871, "learning_rate": 8.560556994818653e-06, "loss": 0.6638, "mean_token_accuracy": 0.9103226661682129, "num_tokens": 1594514.0, "step": 890 }, { "epoch": 0.14427981539956278, "grad_norm": 26.509963989257812, "learning_rate": 8.558937823834199e-06, "loss": 0.8014, "mean_token_accuracy": 0.8860294222831726, "num_tokens": 1596298.0, "step": 891 }, { "epoch": 0.1444417456076431, "grad_norm": 26.991771697998047, "learning_rate": 8.557318652849742e-06, "loss": 0.7559, "mean_token_accuracy": 0.8896606862545013, "num_tokens": 1598102.0, "step": 892 }, { "epoch": 0.14460367581572342, "grad_norm": 26.024194717407227, "learning_rate": 8.555699481865286e-06, "loss": 0.7394, "mean_token_accuracy": 0.8957327306270599, "num_tokens": 1599898.0, "step": 893 }, { "epoch": 0.14476560602380373, "grad_norm": 30.685579299926758, "learning_rate": 8.55408031088083e-06, "loss": 0.8896, "mean_token_accuracy": 0.8886958956718445, "num_tokens": 1601698.0, "step": 894 }, { "epoch": 0.14492753623188406, "grad_norm": 28.975326538085938, "learning_rate": 8.552461139896375e-06, "loss": 0.8115, "mean_token_accuracy": 0.8850038945674896, "num_tokens": 1603497.0, "step": 895 }, { "epoch": 0.14508946643996437, "grad_norm": 32.98832321166992, "learning_rate": 8.550841968911918e-06, "loss": 0.8911, "mean_token_accuracy": 0.8881553113460541, "num_tokens": 1605295.0, "step": 896 }, { "epoch": 0.1452513966480447, "grad_norm": 26.0952205657959, "learning_rate": 8.549222797927462e-06, "loss": 0.7243, "mean_token_accuracy": 0.8973332643508911, "num_tokens": 1607079.0, "step": 897 }, { "epoch": 0.145413326856125, "grad_norm": 24.03562355041504, "learning_rate": 8.547603626943006e-06, "loss": 0.6666, "mean_token_accuracy": 0.9059873819351196, "num_tokens": 1608867.0, "step": 898 }, { "epoch": 0.14557525706420532, "grad_norm": 29.828283309936523, "learning_rate": 8.54598445595855e-06, "loss": 0.6988, "mean_token_accuracy": 0.8997685015201569, "num_tokens": 1610658.0, "step": 899 }, { "epoch": 0.14573718727228566, "grad_norm": 26.028789520263672, "learning_rate": 8.544365284974094e-06, "loss": 0.6952, "mean_token_accuracy": 0.8958526253700256, "num_tokens": 1612449.0, "step": 900 }, { "epoch": 0.14589911748036596, "grad_norm": 24.521835327148438, "learning_rate": 8.542746113989638e-06, "loss": 0.7482, "mean_token_accuracy": 0.8971920311450958, "num_tokens": 1614243.0, "step": 901 }, { "epoch": 0.14606104768844627, "grad_norm": 23.212135314941406, "learning_rate": 8.541126943005182e-06, "loss": 0.6815, "mean_token_accuracy": 0.9056142568588257, "num_tokens": 1616041.0, "step": 902 }, { "epoch": 0.1462229778965266, "grad_norm": 33.281070709228516, "learning_rate": 8.539507772020727e-06, "loss": 0.8784, "mean_token_accuracy": 0.8838293552398682, "num_tokens": 1617837.0, "step": 903 }, { "epoch": 0.14638490810460691, "grad_norm": 27.358675003051758, "learning_rate": 8.53788860103627e-06, "loss": 0.7836, "mean_token_accuracy": 0.9034389555454254, "num_tokens": 1619629.0, "step": 904 }, { "epoch": 0.14654683831268722, "grad_norm": 27.35979461669922, "learning_rate": 8.536269430051814e-06, "loss": 0.8527, "mean_token_accuracy": 0.8901889026165009, "num_tokens": 1621414.0, "step": 905 }, { "epoch": 0.14670876852076756, "grad_norm": 30.20436668395996, "learning_rate": 8.53465025906736e-06, "loss": 0.7724, "mean_token_accuracy": 0.9110654890537262, "num_tokens": 1623207.0, "step": 906 }, { "epoch": 0.14687069872884786, "grad_norm": 27.33577537536621, "learning_rate": 8.533031088082903e-06, "loss": 0.6996, "mean_token_accuracy": 0.8921325206756592, "num_tokens": 1624997.0, "step": 907 }, { "epoch": 0.14703262893692817, "grad_norm": 27.49772071838379, "learning_rate": 8.531411917098447e-06, "loss": 0.6868, "mean_token_accuracy": 0.9044117629528046, "num_tokens": 1626781.0, "step": 908 }, { "epoch": 0.1471945591450085, "grad_norm": 26.854997634887695, "learning_rate": 8.52979274611399e-06, "loss": 0.6668, "mean_token_accuracy": 0.9193868935108185, "num_tokens": 1628566.0, "step": 909 }, { "epoch": 0.14735648935308882, "grad_norm": 24.952070236206055, "learning_rate": 8.528173575129535e-06, "loss": 0.7393, "mean_token_accuracy": 0.9098546504974365, "num_tokens": 1630366.0, "step": 910 }, { "epoch": 0.14751841956116912, "grad_norm": 28.105072021484375, "learning_rate": 8.526554404145079e-06, "loss": 0.9235, "mean_token_accuracy": 0.88413867354393, "num_tokens": 1632154.0, "step": 911 }, { "epoch": 0.14768034976924946, "grad_norm": 28.145858764648438, "learning_rate": 8.524935233160623e-06, "loss": 0.7908, "mean_token_accuracy": 0.8932623863220215, "num_tokens": 1633947.0, "step": 912 }, { "epoch": 0.14784227997732977, "grad_norm": 20.32073974609375, "learning_rate": 8.523316062176166e-06, "loss": 0.6645, "mean_token_accuracy": 0.9208264350891113, "num_tokens": 1635737.0, "step": 913 }, { "epoch": 0.1480042101854101, "grad_norm": 26.286941528320312, "learning_rate": 8.521696891191711e-06, "loss": 0.6877, "mean_token_accuracy": 0.893796980381012, "num_tokens": 1637522.0, "step": 914 }, { "epoch": 0.1481661403934904, "grad_norm": 28.174116134643555, "learning_rate": 8.520077720207255e-06, "loss": 0.7565, "mean_token_accuracy": 0.8952020108699799, "num_tokens": 1639301.0, "step": 915 }, { "epoch": 0.14832807060157072, "grad_norm": 22.744596481323242, "learning_rate": 8.518458549222799e-06, "loss": 0.5981, "mean_token_accuracy": 0.9134517908096313, "num_tokens": 1641101.0, "step": 916 }, { "epoch": 0.14849000080965105, "grad_norm": 25.569793701171875, "learning_rate": 8.516839378238342e-06, "loss": 0.6127, "mean_token_accuracy": 0.9087750613689423, "num_tokens": 1642897.0, "step": 917 }, { "epoch": 0.14865193101773136, "grad_norm": 27.837949752807617, "learning_rate": 8.515220207253887e-06, "loss": 0.7336, "mean_token_accuracy": 0.9123667478561401, "num_tokens": 1644683.0, "step": 918 }, { "epoch": 0.14881386122581167, "grad_norm": 24.422086715698242, "learning_rate": 8.513601036269431e-06, "loss": 0.7033, "mean_token_accuracy": 0.9062043726444244, "num_tokens": 1646472.0, "step": 919 }, { "epoch": 0.148975791433892, "grad_norm": 23.904264450073242, "learning_rate": 8.511981865284975e-06, "loss": 0.6535, "mean_token_accuracy": 0.9138985574245453, "num_tokens": 1648251.0, "step": 920 }, { "epoch": 0.1491377216419723, "grad_norm": 29.145042419433594, "learning_rate": 8.510362694300518e-06, "loss": 0.8172, "mean_token_accuracy": 0.8869743347167969, "num_tokens": 1650046.0, "step": 921 }, { "epoch": 0.14929965185005262, "grad_norm": 29.6810302734375, "learning_rate": 8.508743523316064e-06, "loss": 0.7228, "mean_token_accuracy": 0.8966501653194427, "num_tokens": 1651848.0, "step": 922 }, { "epoch": 0.14946158205813295, "grad_norm": 29.418107986450195, "learning_rate": 8.507124352331607e-06, "loss": 0.7755, "mean_token_accuracy": 0.8968901038169861, "num_tokens": 1653642.0, "step": 923 }, { "epoch": 0.14962351226621326, "grad_norm": 25.847246170043945, "learning_rate": 8.50550518134715e-06, "loss": 0.7024, "mean_token_accuracy": 0.9053932428359985, "num_tokens": 1655429.0, "step": 924 }, { "epoch": 0.14978544247429357, "grad_norm": 26.78089714050293, "learning_rate": 8.503886010362696e-06, "loss": 0.8157, "mean_token_accuracy": 0.8966714143753052, "num_tokens": 1657222.0, "step": 925 }, { "epoch": 0.1499473726823739, "grad_norm": 25.565675735473633, "learning_rate": 8.50226683937824e-06, "loss": 0.744, "mean_token_accuracy": 0.9042780995368958, "num_tokens": 1659004.0, "step": 926 }, { "epoch": 0.1501093028904542, "grad_norm": 32.19752502441406, "learning_rate": 8.500647668393783e-06, "loss": 0.7824, "mean_token_accuracy": 0.8999948501586914, "num_tokens": 1660795.0, "step": 927 }, { "epoch": 0.15027123309853452, "grad_norm": 30.858156204223633, "learning_rate": 8.499028497409327e-06, "loss": 0.7624, "mean_token_accuracy": 0.9026522934436798, "num_tokens": 1662594.0, "step": 928 }, { "epoch": 0.15043316330661485, "grad_norm": 23.60885238647461, "learning_rate": 8.497409326424872e-06, "loss": 0.6486, "mean_token_accuracy": 0.9205905199050903, "num_tokens": 1664384.0, "step": 929 }, { "epoch": 0.15059509351469516, "grad_norm": 23.985355377197266, "learning_rate": 8.495790155440416e-06, "loss": 0.6405, "mean_token_accuracy": 0.9112013578414917, "num_tokens": 1666166.0, "step": 930 }, { "epoch": 0.1507570237227755, "grad_norm": 31.914487838745117, "learning_rate": 8.49417098445596e-06, "loss": 0.8839, "mean_token_accuracy": 0.9006539285182953, "num_tokens": 1667960.0, "step": 931 }, { "epoch": 0.1509189539308558, "grad_norm": 24.270599365234375, "learning_rate": 8.492551813471503e-06, "loss": 0.6655, "mean_token_accuracy": 0.9157386422157288, "num_tokens": 1669757.0, "step": 932 }, { "epoch": 0.1510808841389361, "grad_norm": 27.624757766723633, "learning_rate": 8.490932642487048e-06, "loss": 0.7161, "mean_token_accuracy": 0.9020979106426239, "num_tokens": 1671542.0, "step": 933 }, { "epoch": 0.15124281434701645, "grad_norm": 19.0823974609375, "learning_rate": 8.489313471502592e-06, "loss": 0.5645, "mean_token_accuracy": 0.9097693562507629, "num_tokens": 1673331.0, "step": 934 }, { "epoch": 0.15140474455509675, "grad_norm": 34.84246063232422, "learning_rate": 8.487694300518135e-06, "loss": 0.9767, "mean_token_accuracy": 0.8904704749584198, "num_tokens": 1675126.0, "step": 935 }, { "epoch": 0.15156667476317706, "grad_norm": 21.22995948791504, "learning_rate": 8.486075129533679e-06, "loss": 0.5987, "mean_token_accuracy": 0.916402131319046, "num_tokens": 1676913.0, "step": 936 }, { "epoch": 0.1517286049712574, "grad_norm": 28.371997833251953, "learning_rate": 8.484455958549224e-06, "loss": 0.7745, "mean_token_accuracy": 0.8929268419742584, "num_tokens": 1678715.0, "step": 937 }, { "epoch": 0.1518905351793377, "grad_norm": 31.93210220336914, "learning_rate": 8.482836787564768e-06, "loss": 0.8506, "mean_token_accuracy": 0.8849896490573883, "num_tokens": 1680505.0, "step": 938 }, { "epoch": 0.152052465387418, "grad_norm": 17.640134811401367, "learning_rate": 8.481217616580311e-06, "loss": 0.5681, "mean_token_accuracy": 0.9218370020389557, "num_tokens": 1682286.0, "step": 939 }, { "epoch": 0.15221439559549835, "grad_norm": 34.61970520019531, "learning_rate": 8.479598445595855e-06, "loss": 0.9972, "mean_token_accuracy": 0.8887709677219391, "num_tokens": 1684086.0, "step": 940 }, { "epoch": 0.15237632580357865, "grad_norm": 22.304615020751953, "learning_rate": 8.4779792746114e-06, "loss": 0.6237, "mean_token_accuracy": 0.9135932624340057, "num_tokens": 1685887.0, "step": 941 }, { "epoch": 0.15253825601165896, "grad_norm": 30.35696029663086, "learning_rate": 8.476360103626944e-06, "loss": 0.8199, "mean_token_accuracy": 0.9139194190502167, "num_tokens": 1687689.0, "step": 942 }, { "epoch": 0.1527001862197393, "grad_norm": 26.467973709106445, "learning_rate": 8.474740932642487e-06, "loss": 0.6961, "mean_token_accuracy": 0.9063776135444641, "num_tokens": 1689488.0, "step": 943 }, { "epoch": 0.1528621164278196, "grad_norm": 34.51942825317383, "learning_rate": 8.473121761658033e-06, "loss": 0.9158, "mean_token_accuracy": 0.8851426243782043, "num_tokens": 1691295.0, "step": 944 }, { "epoch": 0.15302404663589994, "grad_norm": 28.47652244567871, "learning_rate": 8.471502590673576e-06, "loss": 0.8389, "mean_token_accuracy": 0.8971630930900574, "num_tokens": 1693089.0, "step": 945 }, { "epoch": 0.15318597684398025, "grad_norm": 27.88567352294922, "learning_rate": 8.46988341968912e-06, "loss": 0.6497, "mean_token_accuracy": 0.9126865565776825, "num_tokens": 1694875.0, "step": 946 }, { "epoch": 0.15334790705206056, "grad_norm": 25.989086151123047, "learning_rate": 8.468264248704663e-06, "loss": 0.7237, "mean_token_accuracy": 0.9117217361927032, "num_tokens": 1696659.0, "step": 947 }, { "epoch": 0.1535098372601409, "grad_norm": 28.449325561523438, "learning_rate": 8.466645077720209e-06, "loss": 0.8126, "mean_token_accuracy": 0.8941029608249664, "num_tokens": 1698454.0, "step": 948 }, { "epoch": 0.1536717674682212, "grad_norm": 30.008527755737305, "learning_rate": 8.465025906735752e-06, "loss": 0.7406, "mean_token_accuracy": 0.9015873074531555, "num_tokens": 1700250.0, "step": 949 }, { "epoch": 0.1538336976763015, "grad_norm": 25.61617660522461, "learning_rate": 8.463406735751296e-06, "loss": 0.6326, "mean_token_accuracy": 0.9069034159183502, "num_tokens": 1702031.0, "step": 950 }, { "epoch": 0.15399562788438184, "grad_norm": 32.07202911376953, "learning_rate": 8.46178756476684e-06, "loss": 0.8723, "mean_token_accuracy": 0.8929268419742584, "num_tokens": 1703833.0, "step": 951 }, { "epoch": 0.15415755809246215, "grad_norm": 28.24432373046875, "learning_rate": 8.460168393782385e-06, "loss": 0.7789, "mean_token_accuracy": 0.8913426399230957, "num_tokens": 1705631.0, "step": 952 }, { "epoch": 0.15431948830054246, "grad_norm": 26.870410919189453, "learning_rate": 8.458549222797928e-06, "loss": 0.834, "mean_token_accuracy": 0.9130023717880249, "num_tokens": 1707419.0, "step": 953 }, { "epoch": 0.1544814185086228, "grad_norm": 30.178091049194336, "learning_rate": 8.456930051813472e-06, "loss": 0.8246, "mean_token_accuracy": 0.8924460411071777, "num_tokens": 1709210.0, "step": 954 }, { "epoch": 0.1546433487167031, "grad_norm": 30.722068786621094, "learning_rate": 8.455310880829016e-06, "loss": 0.9278, "mean_token_accuracy": 0.89723339676857, "num_tokens": 1711004.0, "step": 955 }, { "epoch": 0.1548052789247834, "grad_norm": 27.30564308166504, "learning_rate": 8.45369170984456e-06, "loss": 0.7107, "mean_token_accuracy": 0.9052592515945435, "num_tokens": 1712800.0, "step": 956 }, { "epoch": 0.15496720913286374, "grad_norm": 21.477548599243164, "learning_rate": 8.452072538860104e-06, "loss": 0.6206, "mean_token_accuracy": 0.9039260447025299, "num_tokens": 1714593.0, "step": 957 }, { "epoch": 0.15512913934094405, "grad_norm": 23.72429084777832, "learning_rate": 8.450453367875648e-06, "loss": 0.6494, "mean_token_accuracy": 0.9007092118263245, "num_tokens": 1716387.0, "step": 958 }, { "epoch": 0.15529106954902436, "grad_norm": 17.59723663330078, "learning_rate": 8.448834196891193e-06, "loss": 0.5793, "mean_token_accuracy": 0.9219769835472107, "num_tokens": 1718168.0, "step": 959 }, { "epoch": 0.1554529997571047, "grad_norm": 25.647672653198242, "learning_rate": 8.447215025906737e-06, "loss": 0.8421, "mean_token_accuracy": 0.8756858706474304, "num_tokens": 1719962.0, "step": 960 }, { "epoch": 0.155614929965185, "grad_norm": 25.639387130737305, "learning_rate": 8.44559585492228e-06, "loss": 0.7233, "mean_token_accuracy": 0.9118537902832031, "num_tokens": 1721746.0, "step": 961 }, { "epoch": 0.15577686017326534, "grad_norm": 34.39677810668945, "learning_rate": 8.443976683937824e-06, "loss": 0.8674, "mean_token_accuracy": 0.8847903907299042, "num_tokens": 1723553.0, "step": 962 }, { "epoch": 0.15593879038134564, "grad_norm": 24.351181030273438, "learning_rate": 8.44235751295337e-06, "loss": 0.7558, "mean_token_accuracy": 0.9055944085121155, "num_tokens": 1725351.0, "step": 963 }, { "epoch": 0.15610072058942595, "grad_norm": 33.06178665161133, "learning_rate": 8.440738341968913e-06, "loss": 0.8693, "mean_token_accuracy": 0.8937968611717224, "num_tokens": 1727145.0, "step": 964 }, { "epoch": 0.1562626507975063, "grad_norm": 24.48506736755371, "learning_rate": 8.439119170984457e-06, "loss": 0.7509, "mean_token_accuracy": 0.9168752431869507, "num_tokens": 1728934.0, "step": 965 }, { "epoch": 0.1564245810055866, "grad_norm": 24.079999923706055, "learning_rate": 8.4375e-06, "loss": 0.6613, "mean_token_accuracy": 0.9053837954998016, "num_tokens": 1730720.0, "step": 966 }, { "epoch": 0.1565865112136669, "grad_norm": 28.945960998535156, "learning_rate": 8.435880829015545e-06, "loss": 0.6909, "mean_token_accuracy": 0.9163067042827606, "num_tokens": 1732506.0, "step": 967 }, { "epoch": 0.15674844142174724, "grad_norm": 29.045324325561523, "learning_rate": 8.434261658031089e-06, "loss": 0.778, "mean_token_accuracy": 0.8924980163574219, "num_tokens": 1734294.0, "step": 968 }, { "epoch": 0.15691037162982754, "grad_norm": 24.221921920776367, "learning_rate": 8.432642487046633e-06, "loss": 0.6623, "mean_token_accuracy": 0.9034090936183929, "num_tokens": 1736082.0, "step": 969 }, { "epoch": 0.15707230183790785, "grad_norm": 26.38144874572754, "learning_rate": 8.431023316062176e-06, "loss": 0.729, "mean_token_accuracy": 0.9141156673431396, "num_tokens": 1737873.0, "step": 970 }, { "epoch": 0.1572342320459882, "grad_norm": 20.605619430541992, "learning_rate": 8.429404145077721e-06, "loss": 0.6105, "mean_token_accuracy": 0.925168365240097, "num_tokens": 1739652.0, "step": 971 }, { "epoch": 0.1573961622540685, "grad_norm": 28.105173110961914, "learning_rate": 8.427784974093265e-06, "loss": 0.7459, "mean_token_accuracy": 0.89723339676857, "num_tokens": 1741446.0, "step": 972 }, { "epoch": 0.1575580924621488, "grad_norm": 29.532089233398438, "learning_rate": 8.426165803108809e-06, "loss": 0.6786, "mean_token_accuracy": 0.9031603336334229, "num_tokens": 1743237.0, "step": 973 }, { "epoch": 0.15772002267022914, "grad_norm": 36.528770446777344, "learning_rate": 8.424546632124352e-06, "loss": 1.1743, "mean_token_accuracy": 0.8727026879787445, "num_tokens": 1745047.0, "step": 974 }, { "epoch": 0.15788195287830945, "grad_norm": 28.37747573852539, "learning_rate": 8.422927461139897e-06, "loss": 0.6049, "mean_token_accuracy": 0.9088670015335083, "num_tokens": 1746844.0, "step": 975 }, { "epoch": 0.15804388308638975, "grad_norm": 33.5258674621582, "learning_rate": 8.421308290155441e-06, "loss": 0.9566, "mean_token_accuracy": 0.880923718214035, "num_tokens": 1748650.0, "step": 976 }, { "epoch": 0.1582058132944701, "grad_norm": 24.976877212524414, "learning_rate": 8.419689119170985e-06, "loss": 0.7283, "mean_token_accuracy": 0.9019651114940643, "num_tokens": 1750447.0, "step": 977 }, { "epoch": 0.1583677435025504, "grad_norm": 29.21996307373047, "learning_rate": 8.41806994818653e-06, "loss": 0.8034, "mean_token_accuracy": 0.8776244223117828, "num_tokens": 1752245.0, "step": 978 }, { "epoch": 0.15852967371063073, "grad_norm": 25.497209548950195, "learning_rate": 8.416450777202074e-06, "loss": 0.6859, "mean_token_accuracy": 0.9057921469211578, "num_tokens": 1754033.0, "step": 979 }, { "epoch": 0.15869160391871104, "grad_norm": 25.13727569580078, "learning_rate": 8.414831606217617e-06, "loss": 0.6612, "mean_token_accuracy": 0.9112793803215027, "num_tokens": 1755827.0, "step": 980 }, { "epoch": 0.15885353412679135, "grad_norm": 32.218650817871094, "learning_rate": 8.41321243523316e-06, "loss": 0.8252, "mean_token_accuracy": 0.9064554274082184, "num_tokens": 1757617.0, "step": 981 }, { "epoch": 0.15901546433487168, "grad_norm": 23.252197265625, "learning_rate": 8.411593264248706e-06, "loss": 0.6495, "mean_token_accuracy": 0.9141464829444885, "num_tokens": 1759397.0, "step": 982 }, { "epoch": 0.159177394542952, "grad_norm": 33.847816467285156, "learning_rate": 8.40997409326425e-06, "loss": 0.8402, "mean_token_accuracy": 0.8996093273162842, "num_tokens": 1761188.0, "step": 983 }, { "epoch": 0.1593393247510323, "grad_norm": 26.308055877685547, "learning_rate": 8.408354922279793e-06, "loss": 0.6681, "mean_token_accuracy": 0.9154391586780548, "num_tokens": 1762984.0, "step": 984 }, { "epoch": 0.15950125495911263, "grad_norm": 23.857769012451172, "learning_rate": 8.406735751295337e-06, "loss": 0.7549, "mean_token_accuracy": 0.9074297845363617, "num_tokens": 1764766.0, "step": 985 }, { "epoch": 0.15966318516719294, "grad_norm": 29.106836318969727, "learning_rate": 8.405116580310882e-06, "loss": 0.8487, "mean_token_accuracy": 0.8929536640644073, "num_tokens": 1766566.0, "step": 986 }, { "epoch": 0.15982511537527325, "grad_norm": 21.778444290161133, "learning_rate": 8.403497409326426e-06, "loss": 0.6758, "mean_token_accuracy": 0.9108070731163025, "num_tokens": 1768347.0, "step": 987 }, { "epoch": 0.15998704558335358, "grad_norm": 25.258974075317383, "learning_rate": 8.40187823834197e-06, "loss": 0.6708, "mean_token_accuracy": 0.9010291695594788, "num_tokens": 1770142.0, "step": 988 }, { "epoch": 0.1601489757914339, "grad_norm": 28.957754135131836, "learning_rate": 8.400259067357513e-06, "loss": 0.7294, "mean_token_accuracy": 0.8993730545043945, "num_tokens": 1771942.0, "step": 989 }, { "epoch": 0.1603109059995142, "grad_norm": 34.045379638671875, "learning_rate": 8.398639896373058e-06, "loss": 0.9298, "mean_token_accuracy": 0.8682598173618317, "num_tokens": 1773734.0, "step": 990 }, { "epoch": 0.16047283620759453, "grad_norm": 29.821964263916016, "learning_rate": 8.397020725388602e-06, "loss": 0.9176, "mean_token_accuracy": 0.8944444358348846, "num_tokens": 1775530.0, "step": 991 }, { "epoch": 0.16063476641567484, "grad_norm": 23.459064483642578, "learning_rate": 8.395401554404145e-06, "loss": 0.6956, "mean_token_accuracy": 0.9061990678310394, "num_tokens": 1777318.0, "step": 992 }, { "epoch": 0.16079669662375518, "grad_norm": 23.6264705657959, "learning_rate": 8.393782383419689e-06, "loss": 0.6858, "mean_token_accuracy": 0.906389594078064, "num_tokens": 1779096.0, "step": 993 }, { "epoch": 0.16095862683183548, "grad_norm": 35.015262603759766, "learning_rate": 8.392163212435234e-06, "loss": 0.8484, "mean_token_accuracy": 0.8844228684902191, "num_tokens": 1780885.0, "step": 994 }, { "epoch": 0.1611205570399158, "grad_norm": 34.203609466552734, "learning_rate": 8.390544041450778e-06, "loss": 0.9118, "mean_token_accuracy": 0.881540060043335, "num_tokens": 1782675.0, "step": 995 }, { "epoch": 0.16128248724799613, "grad_norm": 22.607555389404297, "learning_rate": 8.388924870466321e-06, "loss": 0.7447, "mean_token_accuracy": 0.9079106450080872, "num_tokens": 1784469.0, "step": 996 }, { "epoch": 0.16144441745607643, "grad_norm": 27.346338272094727, "learning_rate": 8.387305699481867e-06, "loss": 0.6785, "mean_token_accuracy": 0.919334203004837, "num_tokens": 1786266.0, "step": 997 }, { "epoch": 0.16160634766415674, "grad_norm": 26.722373962402344, "learning_rate": 8.38568652849741e-06, "loss": 0.7465, "mean_token_accuracy": 0.8961466252803802, "num_tokens": 1788047.0, "step": 998 }, { "epoch": 0.16176827787223708, "grad_norm": 32.32159423828125, "learning_rate": 8.384067357512954e-06, "loss": 0.8692, "mean_token_accuracy": 0.895600438117981, "num_tokens": 1789837.0, "step": 999 }, { "epoch": 0.16193020808031738, "grad_norm": 32.08845520019531, "learning_rate": 8.382448186528497e-06, "loss": 0.891, "mean_token_accuracy": 0.8835561871528625, "num_tokens": 1791641.0, "step": 1000 }, { "epoch": 0.1620921382883977, "grad_norm": 27.835010528564453, "learning_rate": 8.380829015544043e-06, "loss": 0.9151, "mean_token_accuracy": 0.8854427933692932, "num_tokens": 1793441.0, "step": 1001 }, { "epoch": 0.16225406849647803, "grad_norm": 22.59184455871582, "learning_rate": 8.379209844559586e-06, "loss": 0.7484, "mean_token_accuracy": 0.910397082567215, "num_tokens": 1795230.0, "step": 1002 }, { "epoch": 0.16241599870455833, "grad_norm": 16.92840003967285, "learning_rate": 8.37759067357513e-06, "loss": 0.5801, "mean_token_accuracy": 0.9187537133693695, "num_tokens": 1797014.0, "step": 1003 }, { "epoch": 0.16257792891263864, "grad_norm": 27.888029098510742, "learning_rate": 8.375971502590673e-06, "loss": 0.8512, "mean_token_accuracy": 0.9067992568016052, "num_tokens": 1798805.0, "step": 1004 }, { "epoch": 0.16273985912071898, "grad_norm": 29.029632568359375, "learning_rate": 8.374352331606219e-06, "loss": 0.9087, "mean_token_accuracy": 0.8823952972888947, "num_tokens": 1800606.0, "step": 1005 }, { "epoch": 0.16290178932879928, "grad_norm": 16.296430587768555, "learning_rate": 8.372733160621762e-06, "loss": 0.5272, "mean_token_accuracy": 0.9232524335384369, "num_tokens": 1802391.0, "step": 1006 }, { "epoch": 0.1630637195368796, "grad_norm": 30.251874923706055, "learning_rate": 8.371113989637306e-06, "loss": 0.8144, "mean_token_accuracy": 0.8787719011306763, "num_tokens": 1804192.0, "step": 1007 }, { "epoch": 0.16322564974495993, "grad_norm": 26.44344139099121, "learning_rate": 8.36949481865285e-06, "loss": 0.7729, "mean_token_accuracy": 0.9017285704612732, "num_tokens": 1805978.0, "step": 1008 }, { "epoch": 0.16338757995304024, "grad_norm": 28.625341415405273, "learning_rate": 8.367875647668395e-06, "loss": 0.7675, "mean_token_accuracy": 0.8992329835891724, "num_tokens": 1807779.0, "step": 1009 }, { "epoch": 0.16354951016112057, "grad_norm": 26.60886573791504, "learning_rate": 8.366256476683938e-06, "loss": 0.7071, "mean_token_accuracy": 0.9022475481033325, "num_tokens": 1809567.0, "step": 1010 }, { "epoch": 0.16371144036920088, "grad_norm": 21.12505340576172, "learning_rate": 8.364637305699482e-06, "loss": 0.6248, "mean_token_accuracy": 0.9216565489768982, "num_tokens": 1811360.0, "step": 1011 }, { "epoch": 0.16387337057728119, "grad_norm": 33.39212417602539, "learning_rate": 8.363018134715026e-06, "loss": 0.8699, "mean_token_accuracy": 0.8789272010326385, "num_tokens": 1813161.0, "step": 1012 }, { "epoch": 0.16403530078536152, "grad_norm": 28.788116455078125, "learning_rate": 8.36139896373057e-06, "loss": 0.84, "mean_token_accuracy": 0.8966071605682373, "num_tokens": 1814962.0, "step": 1013 }, { "epoch": 0.16419723099344183, "grad_norm": 22.292428970336914, "learning_rate": 8.359779792746114e-06, "loss": 0.7033, "mean_token_accuracy": 0.9087591171264648, "num_tokens": 1816748.0, "step": 1014 }, { "epoch": 0.16435916120152214, "grad_norm": 29.944517135620117, "learning_rate": 8.358160621761658e-06, "loss": 1.0369, "mean_token_accuracy": 0.8854166567325592, "num_tokens": 1818539.0, "step": 1015 }, { "epoch": 0.16452109140960247, "grad_norm": 18.673097610473633, "learning_rate": 8.356541450777203e-06, "loss": 0.6187, "mean_token_accuracy": 0.9097744226455688, "num_tokens": 1820317.0, "step": 1016 }, { "epoch": 0.16468302161768278, "grad_norm": 19.84192657470703, "learning_rate": 8.354922279792747e-06, "loss": 0.6125, "mean_token_accuracy": 0.9169273376464844, "num_tokens": 1822106.0, "step": 1017 }, { "epoch": 0.1648449518257631, "grad_norm": 27.13695526123047, "learning_rate": 8.35330310880829e-06, "loss": 0.7023, "mean_token_accuracy": 0.9027547836303711, "num_tokens": 1823896.0, "step": 1018 }, { "epoch": 0.16500688203384342, "grad_norm": 23.334693908691406, "learning_rate": 8.351683937823834e-06, "loss": 0.6746, "mean_token_accuracy": 0.9060479700565338, "num_tokens": 1825685.0, "step": 1019 }, { "epoch": 0.16516881224192373, "grad_norm": 18.940563201904297, "learning_rate": 8.35006476683938e-06, "loss": 0.5209, "mean_token_accuracy": 0.9340487122535706, "num_tokens": 1827485.0, "step": 1020 }, { "epoch": 0.16533074245000404, "grad_norm": 35.45963668823242, "learning_rate": 8.348445595854923e-06, "loss": 0.9689, "mean_token_accuracy": 0.8938707709312439, "num_tokens": 1829279.0, "step": 1021 }, { "epoch": 0.16549267265808437, "grad_norm": 20.04072380065918, "learning_rate": 8.346826424870467e-06, "loss": 0.5604, "mean_token_accuracy": 0.9194128215312958, "num_tokens": 1831064.0, "step": 1022 }, { "epoch": 0.16565460286616468, "grad_norm": 29.936227798461914, "learning_rate": 8.34520725388601e-06, "loss": 0.8103, "mean_token_accuracy": 0.905802845954895, "num_tokens": 1832860.0, "step": 1023 }, { "epoch": 0.165816533074245, "grad_norm": 24.985944747924805, "learning_rate": 8.343588082901555e-06, "loss": 0.6962, "mean_token_accuracy": 0.9070305228233337, "num_tokens": 1834651.0, "step": 1024 }, { "epoch": 0.16597846328232532, "grad_norm": 28.883068084716797, "learning_rate": 8.341968911917099e-06, "loss": 0.781, "mean_token_accuracy": 0.9091353714466095, "num_tokens": 1836449.0, "step": 1025 }, { "epoch": 0.16614039349040563, "grad_norm": 27.938495635986328, "learning_rate": 8.340349740932643e-06, "loss": 0.7688, "mean_token_accuracy": 0.9004205167293549, "num_tokens": 1838242.0, "step": 1026 }, { "epoch": 0.16630232369848597, "grad_norm": 23.80014419555664, "learning_rate": 8.338730569948186e-06, "loss": 0.6039, "mean_token_accuracy": 0.9061383306980133, "num_tokens": 1840030.0, "step": 1027 }, { "epoch": 0.16646425390656627, "grad_norm": 36.050506591796875, "learning_rate": 8.337111398963731e-06, "loss": 0.9703, "mean_token_accuracy": 0.8878731727600098, "num_tokens": 1841836.0, "step": 1028 }, { "epoch": 0.16662618411464658, "grad_norm": 23.888402938842773, "learning_rate": 8.335492227979275e-06, "loss": 0.6407, "mean_token_accuracy": 0.9055112302303314, "num_tokens": 1843633.0, "step": 1029 }, { "epoch": 0.16678811432272692, "grad_norm": 19.903118133544922, "learning_rate": 8.333873056994819e-06, "loss": 0.608, "mean_token_accuracy": 0.9089947044849396, "num_tokens": 1845420.0, "step": 1030 }, { "epoch": 0.16695004453080722, "grad_norm": 26.717721939086914, "learning_rate": 8.332253886010362e-06, "loss": 0.6391, "mean_token_accuracy": 0.897081196308136, "num_tokens": 1847213.0, "step": 1031 }, { "epoch": 0.16711197473888753, "grad_norm": 29.56040382385254, "learning_rate": 8.330634715025908e-06, "loss": 0.7301, "mean_token_accuracy": 0.900211364030838, "num_tokens": 1849007.0, "step": 1032 }, { "epoch": 0.16727390494696787, "grad_norm": 26.610042572021484, "learning_rate": 8.329015544041451e-06, "loss": 0.7002, "mean_token_accuracy": 0.9204458296298981, "num_tokens": 1850808.0, "step": 1033 }, { "epoch": 0.16743583515504817, "grad_norm": 31.945354461669922, "learning_rate": 8.327396373056995e-06, "loss": 0.7646, "mean_token_accuracy": 0.8885893523693085, "num_tokens": 1852607.0, "step": 1034 }, { "epoch": 0.16759776536312848, "grad_norm": 29.442951202392578, "learning_rate": 8.32577720207254e-06, "loss": 0.6447, "mean_token_accuracy": 0.9102682769298553, "num_tokens": 1854398.0, "step": 1035 }, { "epoch": 0.16775969557120882, "grad_norm": 27.62604522705078, "learning_rate": 8.324158031088084e-06, "loss": 0.8042, "mean_token_accuracy": 0.8967592716217041, "num_tokens": 1856181.0, "step": 1036 }, { "epoch": 0.16792162577928912, "grad_norm": 25.561246871948242, "learning_rate": 8.322538860103627e-06, "loss": 0.6113, "mean_token_accuracy": 0.9116471707820892, "num_tokens": 1857976.0, "step": 1037 }, { "epoch": 0.16808355598736943, "grad_norm": 19.984628677368164, "learning_rate": 8.32091968911917e-06, "loss": 0.5661, "mean_token_accuracy": 0.9274227619171143, "num_tokens": 1859763.0, "step": 1038 }, { "epoch": 0.16824548619544977, "grad_norm": 30.77582359313965, "learning_rate": 8.319300518134716e-06, "loss": 0.8598, "mean_token_accuracy": 0.8869130909442902, "num_tokens": 1861558.0, "step": 1039 }, { "epoch": 0.16840741640353007, "grad_norm": 30.961000442504883, "learning_rate": 8.31768134715026e-06, "loss": 0.8359, "mean_token_accuracy": 0.8897708058357239, "num_tokens": 1863350.0, "step": 1040 }, { "epoch": 0.16856934661161038, "grad_norm": 24.89232063293457, "learning_rate": 8.316062176165803e-06, "loss": 0.658, "mean_token_accuracy": 0.9166927635669708, "num_tokens": 1865139.0, "step": 1041 }, { "epoch": 0.16873127681969072, "grad_norm": 23.86810874938965, "learning_rate": 8.314443005181347e-06, "loss": 0.686, "mean_token_accuracy": 0.9107033312320709, "num_tokens": 1866931.0, "step": 1042 }, { "epoch": 0.16889320702777103, "grad_norm": 31.158031463623047, "learning_rate": 8.312823834196892e-06, "loss": 0.8774, "mean_token_accuracy": 0.8963263034820557, "num_tokens": 1868732.0, "step": 1043 }, { "epoch": 0.16905513723585136, "grad_norm": 28.18429183959961, "learning_rate": 8.311204663212436e-06, "loss": 0.702, "mean_token_accuracy": 0.8967308700084686, "num_tokens": 1870543.0, "step": 1044 }, { "epoch": 0.16921706744393167, "grad_norm": 30.117345809936523, "learning_rate": 8.30958549222798e-06, "loss": 0.7637, "mean_token_accuracy": 0.8870314955711365, "num_tokens": 1872338.0, "step": 1045 }, { "epoch": 0.16937899765201198, "grad_norm": 27.53786277770996, "learning_rate": 8.307966321243523e-06, "loss": 0.7493, "mean_token_accuracy": 0.8953185379505157, "num_tokens": 1874127.0, "step": 1046 }, { "epoch": 0.1695409278600923, "grad_norm": 25.65018653869629, "learning_rate": 8.306347150259068e-06, "loss": 0.8218, "mean_token_accuracy": 0.904900074005127, "num_tokens": 1875923.0, "step": 1047 }, { "epoch": 0.16970285806817262, "grad_norm": 29.309276580810547, "learning_rate": 8.304727979274612e-06, "loss": 0.779, "mean_token_accuracy": 0.8931216895580292, "num_tokens": 1877717.0, "step": 1048 }, { "epoch": 0.16986478827625293, "grad_norm": 29.517623901367188, "learning_rate": 8.303108808290155e-06, "loss": 0.7239, "mean_token_accuracy": 0.9039066433906555, "num_tokens": 1879510.0, "step": 1049 }, { "epoch": 0.17002671848433326, "grad_norm": 25.233959197998047, "learning_rate": 8.301489637305699e-06, "loss": 0.6591, "mean_token_accuracy": 0.9136288166046143, "num_tokens": 1881300.0, "step": 1050 }, { "epoch": 0.17018864869241357, "grad_norm": 27.839366912841797, "learning_rate": 8.299870466321244e-06, "loss": 0.8125, "mean_token_accuracy": 0.8921431005001068, "num_tokens": 1883090.0, "step": 1051 }, { "epoch": 0.17035057890049388, "grad_norm": 28.8481388092041, "learning_rate": 8.298251295336788e-06, "loss": 0.9297, "mean_token_accuracy": 0.8960882127285004, "num_tokens": 1884881.0, "step": 1052 }, { "epoch": 0.1705125091085742, "grad_norm": 31.49526596069336, "learning_rate": 8.296632124352331e-06, "loss": 0.8186, "mean_token_accuracy": 0.9040741920471191, "num_tokens": 1886674.0, "step": 1053 }, { "epoch": 0.17067443931665452, "grad_norm": 27.991830825805664, "learning_rate": 8.295012953367877e-06, "loss": 0.7997, "mean_token_accuracy": 0.9014880955219269, "num_tokens": 1888470.0, "step": 1054 }, { "epoch": 0.17083636952473483, "grad_norm": 29.720895767211914, "learning_rate": 8.29339378238342e-06, "loss": 0.681, "mean_token_accuracy": 0.8990960717201233, "num_tokens": 1890258.0, "step": 1055 }, { "epoch": 0.17099829973281516, "grad_norm": 26.77786636352539, "learning_rate": 8.291774611398964e-06, "loss": 0.7915, "mean_token_accuracy": 0.9031760692596436, "num_tokens": 1892049.0, "step": 1056 }, { "epoch": 0.17116022994089547, "grad_norm": 15.971278190612793, "learning_rate": 8.290155440414507e-06, "loss": 0.5799, "mean_token_accuracy": 0.9282234609127045, "num_tokens": 1893839.0, "step": 1057 }, { "epoch": 0.1713221601489758, "grad_norm": 26.042957305908203, "learning_rate": 8.288536269430053e-06, "loss": 0.6894, "mean_token_accuracy": 0.8944527804851532, "num_tokens": 1895634.0, "step": 1058 }, { "epoch": 0.1714840903570561, "grad_norm": 22.951515197753906, "learning_rate": 8.286917098445596e-06, "loss": 0.6691, "mean_token_accuracy": 0.912091463804245, "num_tokens": 1897430.0, "step": 1059 }, { "epoch": 0.17164602056513642, "grad_norm": 26.006912231445312, "learning_rate": 8.28529792746114e-06, "loss": 0.7017, "mean_token_accuracy": 0.9104984700679779, "num_tokens": 1899210.0, "step": 1060 }, { "epoch": 0.17180795077321676, "grad_norm": 25.629560470581055, "learning_rate": 8.283678756476683e-06, "loss": 0.7552, "mean_token_accuracy": 0.9049533605575562, "num_tokens": 1901006.0, "step": 1061 }, { "epoch": 0.17196988098129706, "grad_norm": 33.309391021728516, "learning_rate": 8.282059585492229e-06, "loss": 0.7553, "mean_token_accuracy": 0.9005594551563263, "num_tokens": 1902811.0, "step": 1062 }, { "epoch": 0.17213181118937737, "grad_norm": 27.263444900512695, "learning_rate": 8.280440414507774e-06, "loss": 0.7335, "mean_token_accuracy": 0.9054373502731323, "num_tokens": 1904608.0, "step": 1063 }, { "epoch": 0.1722937413974577, "grad_norm": 23.66631507873535, "learning_rate": 8.278821243523318e-06, "loss": 0.7291, "mean_token_accuracy": 0.9117647111415863, "num_tokens": 1906392.0, "step": 1064 }, { "epoch": 0.172455671605538, "grad_norm": 26.183629989624023, "learning_rate": 8.277202072538861e-06, "loss": 0.7053, "mean_token_accuracy": 0.8985491693019867, "num_tokens": 1908179.0, "step": 1065 }, { "epoch": 0.17261760181361832, "grad_norm": 25.572322845458984, "learning_rate": 8.275582901554405e-06, "loss": 0.7487, "mean_token_accuracy": 0.904900074005127, "num_tokens": 1909975.0, "step": 1066 }, { "epoch": 0.17277953202169866, "grad_norm": 24.543785095214844, "learning_rate": 8.27396373056995e-06, "loss": 0.7252, "mean_token_accuracy": 0.9144460260868073, "num_tokens": 1911756.0, "step": 1067 }, { "epoch": 0.17294146222977896, "grad_norm": 34.99859619140625, "learning_rate": 8.272344559585494e-06, "loss": 0.74, "mean_token_accuracy": 0.9032531678676605, "num_tokens": 1913547.0, "step": 1068 }, { "epoch": 0.17310339243785927, "grad_norm": 21.34604263305664, "learning_rate": 8.270725388601037e-06, "loss": 0.6489, "mean_token_accuracy": 0.9059401154518127, "num_tokens": 1915346.0, "step": 1069 }, { "epoch": 0.1732653226459396, "grad_norm": 22.849685668945312, "learning_rate": 8.269106217616581e-06, "loss": 0.6233, "mean_token_accuracy": 0.9062369465827942, "num_tokens": 1917135.0, "step": 1070 }, { "epoch": 0.17342725285401991, "grad_norm": 30.736602783203125, "learning_rate": 8.267487046632126e-06, "loss": 0.7909, "mean_token_accuracy": 0.902877688407898, "num_tokens": 1918925.0, "step": 1071 }, { "epoch": 0.17358918306210022, "grad_norm": 38.222381591796875, "learning_rate": 8.26586787564767e-06, "loss": 0.9476, "mean_token_accuracy": 0.8739229142665863, "num_tokens": 1920719.0, "step": 1072 }, { "epoch": 0.17375111327018056, "grad_norm": 30.508625030517578, "learning_rate": 8.264248704663213e-06, "loss": 0.8954, "mean_token_accuracy": 0.8942881524562836, "num_tokens": 1922505.0, "step": 1073 }, { "epoch": 0.17391304347826086, "grad_norm": 27.91756248474121, "learning_rate": 8.262629533678757e-06, "loss": 0.7049, "mean_token_accuracy": 0.9043585360050201, "num_tokens": 1924303.0, "step": 1074 }, { "epoch": 0.1740749736863412, "grad_norm": 29.92466926574707, "learning_rate": 8.261010362694302e-06, "loss": 0.7575, "mean_token_accuracy": 0.8922041058540344, "num_tokens": 1926093.0, "step": 1075 }, { "epoch": 0.1742369038944215, "grad_norm": 21.300933837890625, "learning_rate": 8.259391191709846e-06, "loss": 0.5956, "mean_token_accuracy": 0.9121207296848297, "num_tokens": 1927879.0, "step": 1076 }, { "epoch": 0.17439883410250182, "grad_norm": 24.171720504760742, "learning_rate": 8.25777202072539e-06, "loss": 0.6282, "mean_token_accuracy": 0.9152413010597229, "num_tokens": 1929662.0, "step": 1077 }, { "epoch": 0.17456076431058215, "grad_norm": 30.51509666442871, "learning_rate": 8.256152849740933e-06, "loss": 0.7215, "mean_token_accuracy": 0.9052110016345978, "num_tokens": 1931448.0, "step": 1078 }, { "epoch": 0.17472269451866246, "grad_norm": 25.310985565185547, "learning_rate": 8.254533678756478e-06, "loss": 0.6876, "mean_token_accuracy": 0.9041759967803955, "num_tokens": 1933244.0, "step": 1079 }, { "epoch": 0.17488462472674277, "grad_norm": 29.202320098876953, "learning_rate": 8.252914507772022e-06, "loss": 0.685, "mean_token_accuracy": 0.8978347778320312, "num_tokens": 1935032.0, "step": 1080 }, { "epoch": 0.1750465549348231, "grad_norm": 27.092933654785156, "learning_rate": 8.251295336787565e-06, "loss": 0.6689, "mean_token_accuracy": 0.904411792755127, "num_tokens": 1936816.0, "step": 1081 }, { "epoch": 0.1752084851429034, "grad_norm": 26.014677047729492, "learning_rate": 8.24967616580311e-06, "loss": 0.6719, "mean_token_accuracy": 0.9114806056022644, "num_tokens": 1938610.0, "step": 1082 }, { "epoch": 0.17537041535098372, "grad_norm": 25.77332305908203, "learning_rate": 8.248056994818654e-06, "loss": 0.6362, "mean_token_accuracy": 0.9077786207199097, "num_tokens": 1940393.0, "step": 1083 }, { "epoch": 0.17553234555906405, "grad_norm": 32.63511657714844, "learning_rate": 8.246437823834198e-06, "loss": 0.7955, "mean_token_accuracy": 0.8903985619544983, "num_tokens": 1942187.0, "step": 1084 }, { "epoch": 0.17569427576714436, "grad_norm": 28.944568634033203, "learning_rate": 8.244818652849741e-06, "loss": 0.7032, "mean_token_accuracy": 0.8952281475067139, "num_tokens": 1943983.0, "step": 1085 }, { "epoch": 0.17585620597522467, "grad_norm": 29.824087142944336, "learning_rate": 8.243199481865287e-06, "loss": 0.8283, "mean_token_accuracy": 0.8979211151599884, "num_tokens": 1945769.0, "step": 1086 }, { "epoch": 0.176018136183305, "grad_norm": 27.063369750976562, "learning_rate": 8.24158031088083e-06, "loss": 0.741, "mean_token_accuracy": 0.9059762358665466, "num_tokens": 1947558.0, "step": 1087 }, { "epoch": 0.1761800663913853, "grad_norm": 33.166141510009766, "learning_rate": 8.239961139896374e-06, "loss": 0.7747, "mean_token_accuracy": 0.8994308412075043, "num_tokens": 1949358.0, "step": 1088 }, { "epoch": 0.17634199659946562, "grad_norm": 25.398836135864258, "learning_rate": 8.238341968911918e-06, "loss": 0.7109, "mean_token_accuracy": 0.9055555462837219, "num_tokens": 1951145.0, "step": 1089 }, { "epoch": 0.17650392680754595, "grad_norm": 22.27993392944336, "learning_rate": 8.236722797927463e-06, "loss": 0.6545, "mean_token_accuracy": 0.9088341891765594, "num_tokens": 1952931.0, "step": 1090 }, { "epoch": 0.17666585701562626, "grad_norm": 21.60903549194336, "learning_rate": 8.235103626943006e-06, "loss": 0.6121, "mean_token_accuracy": 0.9133562743663788, "num_tokens": 1954720.0, "step": 1091 }, { "epoch": 0.1768277872237066, "grad_norm": 31.3790283203125, "learning_rate": 8.23348445595855e-06, "loss": 0.9464, "mean_token_accuracy": 0.8852774798870087, "num_tokens": 1956511.0, "step": 1092 }, { "epoch": 0.1769897174317869, "grad_norm": 31.63407325744629, "learning_rate": 8.231865284974094e-06, "loss": 0.7967, "mean_token_accuracy": 0.9042253494262695, "num_tokens": 1958305.0, "step": 1093 }, { "epoch": 0.1771516476398672, "grad_norm": 22.23463249206543, "learning_rate": 8.230246113989639e-06, "loss": 0.5778, "mean_token_accuracy": 0.9158298671245575, "num_tokens": 1960099.0, "step": 1094 }, { "epoch": 0.17731357784794755, "grad_norm": 32.00529098510742, "learning_rate": 8.228626943005182e-06, "loss": 0.7984, "mean_token_accuracy": 0.8856922388076782, "num_tokens": 1961890.0, "step": 1095 }, { "epoch": 0.17747550805602785, "grad_norm": 27.947002410888672, "learning_rate": 8.227007772020726e-06, "loss": 0.7759, "mean_token_accuracy": 0.890306144952774, "num_tokens": 1963693.0, "step": 1096 }, { "epoch": 0.17763743826410816, "grad_norm": 25.10782241821289, "learning_rate": 8.22538860103627e-06, "loss": 0.72, "mean_token_accuracy": 0.9003545939922333, "num_tokens": 1965486.0, "step": 1097 }, { "epoch": 0.1777993684721885, "grad_norm": 29.25543212890625, "learning_rate": 8.223769430051815e-06, "loss": 0.8409, "mean_token_accuracy": 0.8910053074359894, "num_tokens": 1967273.0, "step": 1098 }, { "epoch": 0.1779612986802688, "grad_norm": 28.227872848510742, "learning_rate": 8.222150259067359e-06, "loss": 0.7281, "mean_token_accuracy": 0.9027804732322693, "num_tokens": 1969072.0, "step": 1099 }, { "epoch": 0.1781232288883491, "grad_norm": 29.319116592407227, "learning_rate": 8.220531088082902e-06, "loss": 0.8797, "mean_token_accuracy": 0.8921104967594147, "num_tokens": 1970872.0, "step": 1100 }, { "epoch": 0.17828515909642945, "grad_norm": 31.41961669921875, "learning_rate": 8.218911917098447e-06, "loss": 0.8696, "mean_token_accuracy": 0.8847214579582214, "num_tokens": 1972679.0, "step": 1101 }, { "epoch": 0.17844708930450975, "grad_norm": 28.50559425354004, "learning_rate": 8.217292746113991e-06, "loss": 0.8072, "mean_token_accuracy": 0.8995442986488342, "num_tokens": 1974469.0, "step": 1102 }, { "epoch": 0.17860901951259006, "grad_norm": 16.01573371887207, "learning_rate": 8.215673575129535e-06, "loss": 0.524, "mean_token_accuracy": 0.9239532649517059, "num_tokens": 1976244.0, "step": 1103 }, { "epoch": 0.1787709497206704, "grad_norm": 27.766342163085938, "learning_rate": 8.214054404145078e-06, "loss": 0.8365, "mean_token_accuracy": 0.8985978960990906, "num_tokens": 1978032.0, "step": 1104 }, { "epoch": 0.1789328799287507, "grad_norm": 28.34012222290039, "learning_rate": 8.212435233160623e-06, "loss": 0.8526, "mean_token_accuracy": 0.8863832950592041, "num_tokens": 1979816.0, "step": 1105 }, { "epoch": 0.17909481013683104, "grad_norm": 29.48569107055664, "learning_rate": 8.210816062176167e-06, "loss": 0.8623, "mean_token_accuracy": 0.891687273979187, "num_tokens": 1981604.0, "step": 1106 }, { "epoch": 0.17925674034491135, "grad_norm": 25.87701988220215, "learning_rate": 8.20919689119171e-06, "loss": 0.9785, "mean_token_accuracy": 0.8857634961605072, "num_tokens": 1983393.0, "step": 1107 }, { "epoch": 0.17941867055299165, "grad_norm": 29.637012481689453, "learning_rate": 8.207577720207254e-06, "loss": 0.7529, "mean_token_accuracy": 0.8990444839000702, "num_tokens": 1985182.0, "step": 1108 }, { "epoch": 0.179580600761072, "grad_norm": 26.58379554748535, "learning_rate": 8.2059585492228e-06, "loss": 0.7437, "mean_token_accuracy": 0.9144144356250763, "num_tokens": 1986986.0, "step": 1109 }, { "epoch": 0.1797425309691523, "grad_norm": 17.786781311035156, "learning_rate": 8.204339378238343e-06, "loss": 0.5478, "mean_token_accuracy": 0.9175507426261902, "num_tokens": 1988777.0, "step": 1110 }, { "epoch": 0.1799044611772326, "grad_norm": 26.482398986816406, "learning_rate": 8.202720207253887e-06, "loss": 0.6847, "mean_token_accuracy": 0.9134517908096313, "num_tokens": 1990577.0, "step": 1111 }, { "epoch": 0.18006639138531294, "grad_norm": 22.108814239501953, "learning_rate": 8.20110103626943e-06, "loss": 0.6283, "mean_token_accuracy": 0.9064748287200928, "num_tokens": 1992367.0, "step": 1112 }, { "epoch": 0.18022832159339325, "grad_norm": 29.971628189086914, "learning_rate": 8.199481865284976e-06, "loss": 0.8744, "mean_token_accuracy": 0.9059009552001953, "num_tokens": 1994155.0, "step": 1113 }, { "epoch": 0.18039025180147356, "grad_norm": 28.439516067504883, "learning_rate": 8.197862694300519e-06, "loss": 0.8624, "mean_token_accuracy": 0.8909341096878052, "num_tokens": 1995942.0, "step": 1114 }, { "epoch": 0.1805521820095539, "grad_norm": 39.925140380859375, "learning_rate": 8.196243523316063e-06, "loss": 1.1399, "mean_token_accuracy": 0.8619047701358795, "num_tokens": 1997751.0, "step": 1115 }, { "epoch": 0.1807141122176342, "grad_norm": 28.248197555541992, "learning_rate": 8.194624352331606e-06, "loss": 0.8036, "mean_token_accuracy": 0.88520547747612, "num_tokens": 1999559.0, "step": 1116 }, { "epoch": 0.1808760424257145, "grad_norm": 24.39079475402832, "learning_rate": 8.193005181347152e-06, "loss": 0.6632, "mean_token_accuracy": 0.9162560105323792, "num_tokens": 2001356.0, "step": 1117 }, { "epoch": 0.18103797263379484, "grad_norm": 24.221967697143555, "learning_rate": 8.191386010362695e-06, "loss": 0.6952, "mean_token_accuracy": 0.9056521952152252, "num_tokens": 2003156.0, "step": 1118 }, { "epoch": 0.18119990284187515, "grad_norm": 26.90912437438965, "learning_rate": 8.189766839378239e-06, "loss": 0.7613, "mean_token_accuracy": 0.8972624838352203, "num_tokens": 2004941.0, "step": 1119 }, { "epoch": 0.18136183304995546, "grad_norm": 21.62910270690918, "learning_rate": 8.188147668393784e-06, "loss": 0.6974, "mean_token_accuracy": 0.9056650102138519, "num_tokens": 2006738.0, "step": 1120 }, { "epoch": 0.1815237632580358, "grad_norm": 27.202850341796875, "learning_rate": 8.186528497409328e-06, "loss": 0.7229, "mean_token_accuracy": 0.8865191042423248, "num_tokens": 2008532.0, "step": 1121 }, { "epoch": 0.1816856934661161, "grad_norm": 34.75956344604492, "learning_rate": 8.184909326424871e-06, "loss": 0.9544, "mean_token_accuracy": 0.877687931060791, "num_tokens": 2010329.0, "step": 1122 }, { "epoch": 0.18184762367419643, "grad_norm": 18.468286514282227, "learning_rate": 8.183290155440415e-06, "loss": 0.6412, "mean_token_accuracy": 0.9209109842777252, "num_tokens": 2012119.0, "step": 1123 }, { "epoch": 0.18200955388227674, "grad_norm": 29.60638999938965, "learning_rate": 8.18167098445596e-06, "loss": 1.0246, "mean_token_accuracy": 0.8882653117179871, "num_tokens": 2013918.0, "step": 1124 }, { "epoch": 0.18217148409035705, "grad_norm": 21.280981063842773, "learning_rate": 8.180051813471504e-06, "loss": 0.7142, "mean_token_accuracy": 0.9124059975147247, "num_tokens": 2015703.0, "step": 1125 }, { "epoch": 0.18233341429843739, "grad_norm": 19.67123031616211, "learning_rate": 8.178432642487047e-06, "loss": 0.6123, "mean_token_accuracy": 0.9148550927639008, "num_tokens": 2017497.0, "step": 1126 }, { "epoch": 0.1824953445065177, "grad_norm": 23.823774337768555, "learning_rate": 8.176813471502591e-06, "loss": 0.7127, "mean_token_accuracy": 0.8936701118946075, "num_tokens": 2019282.0, "step": 1127 }, { "epoch": 0.182657274714598, "grad_norm": 24.950214385986328, "learning_rate": 8.175194300518136e-06, "loss": 0.6825, "mean_token_accuracy": 0.91737300157547, "num_tokens": 2021070.0, "step": 1128 }, { "epoch": 0.18281920492267834, "grad_norm": 23.3730525970459, "learning_rate": 8.17357512953368e-06, "loss": 0.6849, "mean_token_accuracy": 0.9131805896759033, "num_tokens": 2022858.0, "step": 1129 }, { "epoch": 0.18298113513075864, "grad_norm": 28.625581741333008, "learning_rate": 8.171955958549223e-06, "loss": 0.7212, "mean_token_accuracy": 0.8903346657752991, "num_tokens": 2024653.0, "step": 1130 }, { "epoch": 0.18314306533883895, "grad_norm": 17.436969757080078, "learning_rate": 8.170336787564767e-06, "loss": 0.5895, "mean_token_accuracy": 0.924433171749115, "num_tokens": 2026430.0, "step": 1131 }, { "epoch": 0.18330499554691929, "grad_norm": 27.786771774291992, "learning_rate": 8.168717616580312e-06, "loss": 0.7282, "mean_token_accuracy": 0.8920877575874329, "num_tokens": 2028228.0, "step": 1132 }, { "epoch": 0.1834669257549996, "grad_norm": 24.240581512451172, "learning_rate": 8.167098445595856e-06, "loss": 0.7558, "mean_token_accuracy": 0.9045454561710358, "num_tokens": 2030023.0, "step": 1133 }, { "epoch": 0.1836288559630799, "grad_norm": 22.884143829345703, "learning_rate": 8.1654792746114e-06, "loss": 0.662, "mean_token_accuracy": 0.9100719392299652, "num_tokens": 2031813.0, "step": 1134 }, { "epoch": 0.18379078617116024, "grad_norm": 21.736400604248047, "learning_rate": 8.163860103626943e-06, "loss": 0.6893, "mean_token_accuracy": 0.9108347296714783, "num_tokens": 2033594.0, "step": 1135 }, { "epoch": 0.18395271637924054, "grad_norm": 17.320335388183594, "learning_rate": 8.162240932642488e-06, "loss": 0.5911, "mean_token_accuracy": 0.9178501665592194, "num_tokens": 2035374.0, "step": 1136 }, { "epoch": 0.18411464658732085, "grad_norm": 22.587438583374023, "learning_rate": 8.160621761658032e-06, "loss": 0.7026, "mean_token_accuracy": 0.9135279059410095, "num_tokens": 2037175.0, "step": 1137 }, { "epoch": 0.1842765767954012, "grad_norm": 26.97759246826172, "learning_rate": 8.159002590673575e-06, "loss": 0.738, "mean_token_accuracy": 0.9051197171211243, "num_tokens": 2038972.0, "step": 1138 }, { "epoch": 0.1844385070034815, "grad_norm": 19.594486236572266, "learning_rate": 8.15738341968912e-06, "loss": 0.6572, "mean_token_accuracy": 0.9057179093360901, "num_tokens": 2040760.0, "step": 1139 }, { "epoch": 0.18460043721156183, "grad_norm": 25.995878219604492, "learning_rate": 8.155764248704664e-06, "loss": 0.7645, "mean_token_accuracy": 0.9039260447025299, "num_tokens": 2042553.0, "step": 1140 }, { "epoch": 0.18476236741964214, "grad_norm": 22.171667098999023, "learning_rate": 8.154145077720208e-06, "loss": 0.6972, "mean_token_accuracy": 0.908413290977478, "num_tokens": 2044337.0, "step": 1141 }, { "epoch": 0.18492429762772244, "grad_norm": 22.2275333404541, "learning_rate": 8.152525906735751e-06, "loss": 0.6204, "mean_token_accuracy": 0.9176007807254791, "num_tokens": 2046128.0, "step": 1142 }, { "epoch": 0.18508622783580278, "grad_norm": 27.31783676147461, "learning_rate": 8.150906735751297e-06, "loss": 0.8123, "mean_token_accuracy": 0.8987409472465515, "num_tokens": 2047914.0, "step": 1143 }, { "epoch": 0.1852481580438831, "grad_norm": 30.983585357666016, "learning_rate": 8.14928756476684e-06, "loss": 0.8746, "mean_token_accuracy": 0.896200567483902, "num_tokens": 2049712.0, "step": 1144 }, { "epoch": 0.1854100882519634, "grad_norm": 25.47585678100586, "learning_rate": 8.147668393782384e-06, "loss": 0.7732, "mean_token_accuracy": 0.8934580087661743, "num_tokens": 2051505.0, "step": 1145 }, { "epoch": 0.18557201846004373, "grad_norm": 21.676660537719727, "learning_rate": 8.146049222797928e-06, "loss": 0.5962, "mean_token_accuracy": 0.917391300201416, "num_tokens": 2053295.0, "step": 1146 }, { "epoch": 0.18573394866812404, "grad_norm": 23.765411376953125, "learning_rate": 8.144430051813473e-06, "loss": 0.5917, "mean_token_accuracy": 0.8993055522441864, "num_tokens": 2055095.0, "step": 1147 }, { "epoch": 0.18589587887620435, "grad_norm": 21.997718811035156, "learning_rate": 8.142810880829016e-06, "loss": 0.6556, "mean_token_accuracy": 0.9146656692028046, "num_tokens": 2056888.0, "step": 1148 }, { "epoch": 0.18605780908428468, "grad_norm": 24.783058166503906, "learning_rate": 8.14119170984456e-06, "loss": 0.8632, "mean_token_accuracy": 0.8886054456233978, "num_tokens": 2058687.0, "step": 1149 }, { "epoch": 0.186219739292365, "grad_norm": 36.05255126953125, "learning_rate": 8.139572538860104e-06, "loss": 1.1645, "mean_token_accuracy": 0.8693121671676636, "num_tokens": 2060481.0, "step": 1150 }, { "epoch": 0.1863816695004453, "grad_norm": 27.821964263916016, "learning_rate": 8.137953367875649e-06, "loss": 0.8362, "mean_token_accuracy": 0.8832116723060608, "num_tokens": 2062267.0, "step": 1151 }, { "epoch": 0.18654359970852563, "grad_norm": 22.230712890625, "learning_rate": 8.136334196891192e-06, "loss": 0.7296, "mean_token_accuracy": 0.9081102907657623, "num_tokens": 2064051.0, "step": 1152 }, { "epoch": 0.18670552991660594, "grad_norm": 24.18702507019043, "learning_rate": 8.134715025906736e-06, "loss": 0.736, "mean_token_accuracy": 0.9047702252864838, "num_tokens": 2065846.0, "step": 1153 }, { "epoch": 0.18686746012468625, "grad_norm": 25.95163345336914, "learning_rate": 8.13309585492228e-06, "loss": 0.9258, "mean_token_accuracy": 0.8866084218025208, "num_tokens": 2067649.0, "step": 1154 }, { "epoch": 0.18702939033276658, "grad_norm": 20.390827178955078, "learning_rate": 8.131476683937825e-06, "loss": 0.7482, "mean_token_accuracy": 0.9115451872348785, "num_tokens": 2069432.0, "step": 1155 }, { "epoch": 0.1871913205408469, "grad_norm": 18.583084106445312, "learning_rate": 8.129857512953369e-06, "loss": 0.5579, "mean_token_accuracy": 0.9198294281959534, "num_tokens": 2071218.0, "step": 1156 }, { "epoch": 0.18735325074892722, "grad_norm": 28.328243255615234, "learning_rate": 8.128238341968912e-06, "loss": 0.7341, "mean_token_accuracy": 0.897201418876648, "num_tokens": 2073013.0, "step": 1157 }, { "epoch": 0.18751518095700753, "grad_norm": 25.317455291748047, "learning_rate": 8.126619170984457e-06, "loss": 0.779, "mean_token_accuracy": 0.8967473804950714, "num_tokens": 2074806.0, "step": 1158 }, { "epoch": 0.18767711116508784, "grad_norm": 24.809972763061523, "learning_rate": 8.125000000000001e-06, "loss": 0.7398, "mean_token_accuracy": 0.8940375447273254, "num_tokens": 2076610.0, "step": 1159 }, { "epoch": 0.18783904137316818, "grad_norm": 20.725688934326172, "learning_rate": 8.123380829015545e-06, "loss": 0.6963, "mean_token_accuracy": 0.9077828824520111, "num_tokens": 2078393.0, "step": 1160 }, { "epoch": 0.18800097158124848, "grad_norm": 19.541519165039062, "learning_rate": 8.121761658031088e-06, "loss": 0.6043, "mean_token_accuracy": 0.9143867790699005, "num_tokens": 2080173.0, "step": 1161 }, { "epoch": 0.1881629017893288, "grad_norm": 21.553709030151367, "learning_rate": 8.120142487046633e-06, "loss": 0.6597, "mean_token_accuracy": 0.9043911099433899, "num_tokens": 2081957.0, "step": 1162 }, { "epoch": 0.18832483199740913, "grad_norm": 25.712669372558594, "learning_rate": 8.118523316062177e-06, "loss": 0.7646, "mean_token_accuracy": 0.8952664136886597, "num_tokens": 2083746.0, "step": 1163 }, { "epoch": 0.18848676220548943, "grad_norm": 15.06070613861084, "learning_rate": 8.11690414507772e-06, "loss": 0.6219, "mean_token_accuracy": 0.9184591770172119, "num_tokens": 2085528.0, "step": 1164 }, { "epoch": 0.18864869241356974, "grad_norm": 33.2811164855957, "learning_rate": 8.115284974093264e-06, "loss": 1.1145, "mean_token_accuracy": 0.8748107850551605, "num_tokens": 2087331.0, "step": 1165 }, { "epoch": 0.18881062262165008, "grad_norm": 28.879222869873047, "learning_rate": 8.11366580310881e-06, "loss": 0.9252, "mean_token_accuracy": 0.8837940394878387, "num_tokens": 2089135.0, "step": 1166 }, { "epoch": 0.18897255282973038, "grad_norm": 18.189077377319336, "learning_rate": 8.112046632124353e-06, "loss": 0.6675, "mean_token_accuracy": 0.9051094651222229, "num_tokens": 2090921.0, "step": 1167 }, { "epoch": 0.1891344830378107, "grad_norm": 25.741928100585938, "learning_rate": 8.110427461139897e-06, "loss": 0.7372, "mean_token_accuracy": 0.9044477939605713, "num_tokens": 2092716.0, "step": 1168 }, { "epoch": 0.18929641324589103, "grad_norm": 24.907745361328125, "learning_rate": 8.10880829015544e-06, "loss": 0.7784, "mean_token_accuracy": 0.9029007852077484, "num_tokens": 2094506.0, "step": 1169 }, { "epoch": 0.18945834345397133, "grad_norm": 26.88646697998047, "learning_rate": 8.107189119170986e-06, "loss": 0.7936, "mean_token_accuracy": 0.9039436280727386, "num_tokens": 2096299.0, "step": 1170 }, { "epoch": 0.18962027366205167, "grad_norm": 16.537071228027344, "learning_rate": 8.105569948186529e-06, "loss": 0.5142, "mean_token_accuracy": 0.9260563254356384, "num_tokens": 2098095.0, "step": 1171 }, { "epoch": 0.18978220387013198, "grad_norm": 24.197826385498047, "learning_rate": 8.103950777202073e-06, "loss": 0.7423, "mean_token_accuracy": 0.8968545496463776, "num_tokens": 2099887.0, "step": 1172 }, { "epoch": 0.18994413407821228, "grad_norm": 24.82110595703125, "learning_rate": 8.102331606217616e-06, "loss": 0.6624, "mean_token_accuracy": 0.9094041585922241, "num_tokens": 2101674.0, "step": 1173 }, { "epoch": 0.19010606428629262, "grad_norm": 23.7404842376709, "learning_rate": 8.100712435233162e-06, "loss": 0.8088, "mean_token_accuracy": 0.9000329375267029, "num_tokens": 2103456.0, "step": 1174 }, { "epoch": 0.19026799449437293, "grad_norm": 28.095535278320312, "learning_rate": 8.099093264248705e-06, "loss": 0.7779, "mean_token_accuracy": 0.8889317512512207, "num_tokens": 2105256.0, "step": 1175 }, { "epoch": 0.19042992470245323, "grad_norm": 24.71026039123535, "learning_rate": 8.097474093264249e-06, "loss": 0.7809, "mean_token_accuracy": 0.9141661822795868, "num_tokens": 2107048.0, "step": 1176 }, { "epoch": 0.19059185491053357, "grad_norm": 26.66474723815918, "learning_rate": 8.095854922279794e-06, "loss": 0.8989, "mean_token_accuracy": 0.8950007855892181, "num_tokens": 2108836.0, "step": 1177 }, { "epoch": 0.19075378511861388, "grad_norm": 34.18571090698242, "learning_rate": 8.094235751295338e-06, "loss": 1.0227, "mean_token_accuracy": 0.8766702711582184, "num_tokens": 2110646.0, "step": 1178 }, { "epoch": 0.19091571532669419, "grad_norm": 21.079694747924805, "learning_rate": 8.092616580310881e-06, "loss": 0.5885, "mean_token_accuracy": 0.909546822309494, "num_tokens": 2112434.0, "step": 1179 }, { "epoch": 0.19107764553477452, "grad_norm": 22.957441329956055, "learning_rate": 8.090997409326425e-06, "loss": 0.6918, "mean_token_accuracy": 0.9119808673858643, "num_tokens": 2114228.0, "step": 1180 }, { "epoch": 0.19123957574285483, "grad_norm": 29.247337341308594, "learning_rate": 8.08937823834197e-06, "loss": 0.8224, "mean_token_accuracy": 0.8916361033916473, "num_tokens": 2116026.0, "step": 1181 }, { "epoch": 0.19140150595093514, "grad_norm": 23.098365783691406, "learning_rate": 8.087759067357514e-06, "loss": 0.6655, "mean_token_accuracy": 0.9030748903751373, "num_tokens": 2117806.0, "step": 1182 }, { "epoch": 0.19156343615901547, "grad_norm": 25.975364685058594, "learning_rate": 8.086139896373057e-06, "loss": 0.7532, "mean_token_accuracy": 0.911080926656723, "num_tokens": 2119600.0, "step": 1183 }, { "epoch": 0.19172536636709578, "grad_norm": 18.41541290283203, "learning_rate": 8.084520725388601e-06, "loss": 0.6056, "mean_token_accuracy": 0.9174720048904419, "num_tokens": 2121391.0, "step": 1184 }, { "epoch": 0.1918872965751761, "grad_norm": 30.505582809448242, "learning_rate": 8.082901554404146e-06, "loss": 0.9433, "mean_token_accuracy": 0.8689639568328857, "num_tokens": 2123201.0, "step": 1185 }, { "epoch": 0.19204922678325642, "grad_norm": 21.6943416595459, "learning_rate": 8.08128238341969e-06, "loss": 0.658, "mean_token_accuracy": 0.906897246837616, "num_tokens": 2124991.0, "step": 1186 }, { "epoch": 0.19221115699133673, "grad_norm": 20.60589027404785, "learning_rate": 8.079663212435233e-06, "loss": 0.5987, "mean_token_accuracy": 0.9109354317188263, "num_tokens": 2126772.0, "step": 1187 }, { "epoch": 0.19237308719941706, "grad_norm": 28.147605895996094, "learning_rate": 8.078044041450777e-06, "loss": 0.7814, "mean_token_accuracy": 0.8932117521762848, "num_tokens": 2128565.0, "step": 1188 }, { "epoch": 0.19253501740749737, "grad_norm": 23.695560455322266, "learning_rate": 8.076424870466322e-06, "loss": 0.7022, "mean_token_accuracy": 0.9094942808151245, "num_tokens": 2130353.0, "step": 1189 }, { "epoch": 0.19269694761557768, "grad_norm": 21.533119201660156, "learning_rate": 8.074805699481866e-06, "loss": 0.6648, "mean_token_accuracy": 0.8966437876224518, "num_tokens": 2132136.0, "step": 1190 }, { "epoch": 0.19285887782365801, "grad_norm": 27.630273818969727, "learning_rate": 8.07318652849741e-06, "loss": 0.7796, "mean_token_accuracy": 0.901408463716507, "num_tokens": 2133932.0, "step": 1191 }, { "epoch": 0.19302080803173832, "grad_norm": 26.547698974609375, "learning_rate": 8.071567357512955e-06, "loss": 0.758, "mean_token_accuracy": 0.8882890045642853, "num_tokens": 2135721.0, "step": 1192 }, { "epoch": 0.19318273823981863, "grad_norm": 27.16016960144043, "learning_rate": 8.069948186528498e-06, "loss": 0.7538, "mean_token_accuracy": 0.9013746976852417, "num_tokens": 2137507.0, "step": 1193 }, { "epoch": 0.19334466844789897, "grad_norm": 25.121185302734375, "learning_rate": 8.068329015544042e-06, "loss": 0.745, "mean_token_accuracy": 0.9100438058376312, "num_tokens": 2139296.0, "step": 1194 }, { "epoch": 0.19350659865597927, "grad_norm": 29.57895278930664, "learning_rate": 8.066709844559585e-06, "loss": 0.7415, "mean_token_accuracy": 0.8931781053543091, "num_tokens": 2141105.0, "step": 1195 }, { "epoch": 0.19366852886405958, "grad_norm": 22.85805320739746, "learning_rate": 8.06509067357513e-06, "loss": 0.5281, "mean_token_accuracy": 0.9187187254428864, "num_tokens": 2142900.0, "step": 1196 }, { "epoch": 0.19383045907213992, "grad_norm": 26.71902847290039, "learning_rate": 8.063471502590674e-06, "loss": 0.6898, "mean_token_accuracy": 0.9058204889297485, "num_tokens": 2144698.0, "step": 1197 }, { "epoch": 0.19399238928022022, "grad_norm": 23.67783546447754, "learning_rate": 8.061852331606218e-06, "loss": 0.6248, "mean_token_accuracy": 0.9172877967357635, "num_tokens": 2146488.0, "step": 1198 }, { "epoch": 0.19415431948830053, "grad_norm": 35.40565490722656, "learning_rate": 8.060233160621762e-06, "loss": 0.8489, "mean_token_accuracy": 0.8943355679512024, "num_tokens": 2148283.0, "step": 1199 }, { "epoch": 0.19431624969638087, "grad_norm": 30.098793029785156, "learning_rate": 8.058613989637307e-06, "loss": 0.7829, "mean_token_accuracy": 0.8916083872318268, "num_tokens": 2150081.0, "step": 1200 }, { "epoch": 0.19447817990446117, "grad_norm": 26.206090927124023, "learning_rate": 8.05699481865285e-06, "loss": 0.6646, "mean_token_accuracy": 0.9059523940086365, "num_tokens": 2151880.0, "step": 1201 }, { "epoch": 0.19464011011254148, "grad_norm": 36.07347106933594, "learning_rate": 8.055375647668394e-06, "loss": 0.966, "mean_token_accuracy": 0.87706458568573, "num_tokens": 2153684.0, "step": 1202 }, { "epoch": 0.19480204032062182, "grad_norm": 25.063003540039062, "learning_rate": 8.053756476683938e-06, "loss": 0.6934, "mean_token_accuracy": 0.9081102907657623, "num_tokens": 2155468.0, "step": 1203 }, { "epoch": 0.19496397052870212, "grad_norm": 28.45714569091797, "learning_rate": 8.052137305699483e-06, "loss": 0.6877, "mean_token_accuracy": 0.8988896012306213, "num_tokens": 2157257.0, "step": 1204 }, { "epoch": 0.19512590073678246, "grad_norm": 30.118324279785156, "learning_rate": 8.050518134715026e-06, "loss": 0.7811, "mean_token_accuracy": 0.8998035788536072, "num_tokens": 2159058.0, "step": 1205 }, { "epoch": 0.19528783094486277, "grad_norm": 15.193723678588867, "learning_rate": 8.04889896373057e-06, "loss": 0.5468, "mean_token_accuracy": 0.929380863904953, "num_tokens": 2160839.0, "step": 1206 }, { "epoch": 0.19544976115294307, "grad_norm": 26.35215950012207, "learning_rate": 8.047279792746114e-06, "loss": 0.8037, "mean_token_accuracy": 0.9021909236907959, "num_tokens": 2162637.0, "step": 1207 }, { "epoch": 0.1956116913610234, "grad_norm": 26.451229095458984, "learning_rate": 8.045660621761659e-06, "loss": 0.7258, "mean_token_accuracy": 0.9034482836723328, "num_tokens": 2164439.0, "step": 1208 }, { "epoch": 0.19577362156910372, "grad_norm": 25.747333526611328, "learning_rate": 8.044041450777202e-06, "loss": 0.7482, "mean_token_accuracy": 0.8984929025173187, "num_tokens": 2166227.0, "step": 1209 }, { "epoch": 0.19593555177718402, "grad_norm": 23.462419509887695, "learning_rate": 8.042422279792746e-06, "loss": 0.6088, "mean_token_accuracy": 0.9098878800868988, "num_tokens": 2168028.0, "step": 1210 }, { "epoch": 0.19609748198526436, "grad_norm": 16.312673568725586, "learning_rate": 8.040803108808291e-06, "loss": 0.5246, "mean_token_accuracy": 0.9318181872367859, "num_tokens": 2169804.0, "step": 1211 }, { "epoch": 0.19625941219334467, "grad_norm": 26.479970932006836, "learning_rate": 8.039183937823835e-06, "loss": 0.708, "mean_token_accuracy": 0.901408463716507, "num_tokens": 2171600.0, "step": 1212 }, { "epoch": 0.19642134240142498, "grad_norm": 30.41205406188965, "learning_rate": 8.037564766839379e-06, "loss": 0.767, "mean_token_accuracy": 0.8999904990196228, "num_tokens": 2173403.0, "step": 1213 }, { "epoch": 0.1965832726095053, "grad_norm": 25.26466178894043, "learning_rate": 8.035945595854922e-06, "loss": 0.7768, "mean_token_accuracy": 0.907131016254425, "num_tokens": 2175193.0, "step": 1214 }, { "epoch": 0.19674520281758562, "grad_norm": 21.494007110595703, "learning_rate": 8.034326424870467e-06, "loss": 0.6372, "mean_token_accuracy": 0.9097758233547211, "num_tokens": 2176982.0, "step": 1215 }, { "epoch": 0.19690713302566593, "grad_norm": 25.302734375, "learning_rate": 8.032707253886011e-06, "loss": 0.5933, "mean_token_accuracy": 0.9087297320365906, "num_tokens": 2178767.0, "step": 1216 }, { "epoch": 0.19706906323374626, "grad_norm": 24.172443389892578, "learning_rate": 8.031088082901555e-06, "loss": 0.6975, "mean_token_accuracy": 0.9043652415275574, "num_tokens": 2180551.0, "step": 1217 }, { "epoch": 0.19723099344182657, "grad_norm": 36.29187774658203, "learning_rate": 8.029468911917098e-06, "loss": 0.906, "mean_token_accuracy": 0.8779591619968414, "num_tokens": 2182348.0, "step": 1218 }, { "epoch": 0.1973929236499069, "grad_norm": 23.735004425048828, "learning_rate": 8.027849740932643e-06, "loss": 0.7442, "mean_token_accuracy": 0.8912636637687683, "num_tokens": 2184145.0, "step": 1219 }, { "epoch": 0.1975548538579872, "grad_norm": 30.058345794677734, "learning_rate": 8.026230569948187e-06, "loss": 0.9144, "mean_token_accuracy": 0.8915935754776001, "num_tokens": 2185942.0, "step": 1220 }, { "epoch": 0.19771678406606752, "grad_norm": 23.397602081298828, "learning_rate": 8.02461139896373e-06, "loss": 0.722, "mean_token_accuracy": 0.9078834652900696, "num_tokens": 2187736.0, "step": 1221 }, { "epoch": 0.19787871427414785, "grad_norm": 27.18838882446289, "learning_rate": 8.022992227979274e-06, "loss": 0.7102, "mean_token_accuracy": 0.901430606842041, "num_tokens": 2189522.0, "step": 1222 }, { "epoch": 0.19804064448222816, "grad_norm": 23.468795776367188, "learning_rate": 8.02137305699482e-06, "loss": 0.7404, "mean_token_accuracy": 0.9081010818481445, "num_tokens": 2191317.0, "step": 1223 }, { "epoch": 0.19820257469030847, "grad_norm": 31.06011199951172, "learning_rate": 8.019753886010363e-06, "loss": 0.7648, "mean_token_accuracy": 0.9059420526027679, "num_tokens": 2193117.0, "step": 1224 }, { "epoch": 0.1983645048983888, "grad_norm": 22.679725646972656, "learning_rate": 8.018134715025907e-06, "loss": 0.7271, "mean_token_accuracy": 0.9179934859275818, "num_tokens": 2194909.0, "step": 1225 }, { "epoch": 0.1985264351064691, "grad_norm": 23.59375, "learning_rate": 8.01651554404145e-06, "loss": 0.7013, "mean_token_accuracy": 0.8968297243118286, "num_tokens": 2196692.0, "step": 1226 }, { "epoch": 0.19868836531454942, "grad_norm": 23.235851287841797, "learning_rate": 8.014896373056996e-06, "loss": 0.8048, "mean_token_accuracy": 0.9032624065876007, "num_tokens": 2198495.0, "step": 1227 }, { "epoch": 0.19885029552262976, "grad_norm": 27.12327003479004, "learning_rate": 8.01327720207254e-06, "loss": 0.6802, "mean_token_accuracy": 0.9052418172359467, "num_tokens": 2200281.0, "step": 1228 }, { "epoch": 0.19901222573071006, "grad_norm": 20.638362884521484, "learning_rate": 8.011658031088083e-06, "loss": 0.6389, "mean_token_accuracy": 0.9024864137172699, "num_tokens": 2202071.0, "step": 1229 }, { "epoch": 0.19917415593879037, "grad_norm": 22.3620662689209, "learning_rate": 8.010038860103628e-06, "loss": 0.6141, "mean_token_accuracy": 0.9149899184703827, "num_tokens": 2203865.0, "step": 1230 }, { "epoch": 0.1993360861468707, "grad_norm": 26.976482391357422, "learning_rate": 8.008419689119172e-06, "loss": 0.7196, "mean_token_accuracy": 0.8991561830043793, "num_tokens": 2205655.0, "step": 1231 }, { "epoch": 0.199498016354951, "grad_norm": 29.312589645385742, "learning_rate": 8.006800518134715e-06, "loss": 0.9631, "mean_token_accuracy": 0.8712643682956696, "num_tokens": 2207462.0, "step": 1232 }, { "epoch": 0.19965994656303132, "grad_norm": 25.790000915527344, "learning_rate": 8.005181347150259e-06, "loss": 0.6958, "mean_token_accuracy": 0.9038500487804413, "num_tokens": 2209255.0, "step": 1233 }, { "epoch": 0.19982187677111166, "grad_norm": 26.386384963989258, "learning_rate": 8.003562176165804e-06, "loss": 0.7477, "mean_token_accuracy": 0.9032407402992249, "num_tokens": 2211046.0, "step": 1234 }, { "epoch": 0.19998380697919196, "grad_norm": 31.159879684448242, "learning_rate": 8.001943005181348e-06, "loss": 0.8568, "mean_token_accuracy": 0.8943355679512024, "num_tokens": 2212841.0, "step": 1235 }, { "epoch": 0.2001457371872723, "grad_norm": 31.24150848388672, "learning_rate": 8.000323834196891e-06, "loss": 0.8831, "mean_token_accuracy": 0.8791281580924988, "num_tokens": 2214633.0, "step": 1236 }, { "epoch": 0.2003076673953526, "grad_norm": 16.259525299072266, "learning_rate": 7.998704663212435e-06, "loss": 0.5422, "mean_token_accuracy": 0.9250216782093048, "num_tokens": 2216425.0, "step": 1237 }, { "epoch": 0.20046959760343291, "grad_norm": 26.028379440307617, "learning_rate": 7.99708549222798e-06, "loss": 0.7836, "mean_token_accuracy": 0.8952554762363434, "num_tokens": 2218214.0, "step": 1238 }, { "epoch": 0.20063152781151325, "grad_norm": 30.29619026184082, "learning_rate": 7.995466321243524e-06, "loss": 0.7502, "mean_token_accuracy": 0.9125567078590393, "num_tokens": 2220011.0, "step": 1239 }, { "epoch": 0.20079345801959356, "grad_norm": 21.31903076171875, "learning_rate": 7.993847150259067e-06, "loss": 0.6764, "mean_token_accuracy": 0.9077857732772827, "num_tokens": 2221794.0, "step": 1240 }, { "epoch": 0.20095538822767386, "grad_norm": 23.54694938659668, "learning_rate": 7.992227979274611e-06, "loss": 0.6945, "mean_token_accuracy": 0.9064182341098785, "num_tokens": 2223584.0, "step": 1241 }, { "epoch": 0.2011173184357542, "grad_norm": 31.1170597076416, "learning_rate": 7.990608808290156e-06, "loss": 1.0261, "mean_token_accuracy": 0.8809839189052582, "num_tokens": 2225387.0, "step": 1242 }, { "epoch": 0.2012792486438345, "grad_norm": 24.913646697998047, "learning_rate": 7.9889896373057e-06, "loss": 0.6126, "mean_token_accuracy": 0.9072401821613312, "num_tokens": 2227179.0, "step": 1243 }, { "epoch": 0.20144117885191481, "grad_norm": 27.373817443847656, "learning_rate": 7.987370466321243e-06, "loss": 0.8113, "mean_token_accuracy": 0.9032531678676605, "num_tokens": 2228970.0, "step": 1244 }, { "epoch": 0.20160310905999515, "grad_norm": 16.3076171875, "learning_rate": 7.985751295336787e-06, "loss": 0.5798, "mean_token_accuracy": 0.925621896982193, "num_tokens": 2230751.0, "step": 1245 }, { "epoch": 0.20176503926807546, "grad_norm": 21.297365188598633, "learning_rate": 7.984132124352332e-06, "loss": 0.6674, "mean_token_accuracy": 0.9205517172813416, "num_tokens": 2232540.0, "step": 1246 }, { "epoch": 0.20192696947615577, "grad_norm": 26.522666931152344, "learning_rate": 7.982512953367876e-06, "loss": 0.6942, "mean_token_accuracy": 0.9116956293582916, "num_tokens": 2234335.0, "step": 1247 }, { "epoch": 0.2020888996842361, "grad_norm": 29.628372192382812, "learning_rate": 7.98089378238342e-06, "loss": 0.8793, "mean_token_accuracy": 0.8961202502250671, "num_tokens": 2236126.0, "step": 1248 }, { "epoch": 0.2022508298923164, "grad_norm": 22.819133758544922, "learning_rate": 7.979274611398965e-06, "loss": 0.7544, "mean_token_accuracy": 0.9080882370471954, "num_tokens": 2237910.0, "step": 1249 }, { "epoch": 0.20241276010039672, "grad_norm": 25.56781768798828, "learning_rate": 7.977655440414508e-06, "loss": 0.6363, "mean_token_accuracy": 0.9072797000408173, "num_tokens": 2239702.0, "step": 1250 }, { "epoch": 0.20257469030847705, "grad_norm": 27.90135955810547, "learning_rate": 7.976036269430052e-06, "loss": 0.8012, "mean_token_accuracy": 0.9027777910232544, "num_tokens": 2241481.0, "step": 1251 }, { "epoch": 0.20273662051655736, "grad_norm": 26.237934112548828, "learning_rate": 7.974417098445595e-06, "loss": 0.8411, "mean_token_accuracy": 0.9003292620182037, "num_tokens": 2243274.0, "step": 1252 }, { "epoch": 0.2028985507246377, "grad_norm": 29.386486053466797, "learning_rate": 7.97279792746114e-06, "loss": 0.8633, "mean_token_accuracy": 0.8825460076332092, "num_tokens": 2245076.0, "step": 1253 }, { "epoch": 0.203060480932718, "grad_norm": 19.233985900878906, "learning_rate": 7.971178756476684e-06, "loss": 0.6225, "mean_token_accuracy": 0.921707957983017, "num_tokens": 2246882.0, "step": 1254 }, { "epoch": 0.2032224111407983, "grad_norm": 29.620250701904297, "learning_rate": 7.969559585492228e-06, "loss": 0.7555, "mean_token_accuracy": 0.8967578411102295, "num_tokens": 2248675.0, "step": 1255 }, { "epoch": 0.20338434134887864, "grad_norm": 28.40350341796875, "learning_rate": 7.967940414507773e-06, "loss": 0.6629, "mean_token_accuracy": 0.9082458913326263, "num_tokens": 2250470.0, "step": 1256 }, { "epoch": 0.20354627155695895, "grad_norm": 33.82870864868164, "learning_rate": 7.966321243523317e-06, "loss": 1.0483, "mean_token_accuracy": 0.8770425021648407, "num_tokens": 2252271.0, "step": 1257 }, { "epoch": 0.20370820176503926, "grad_norm": 21.204627990722656, "learning_rate": 7.964702072538862e-06, "loss": 0.6308, "mean_token_accuracy": 0.9131805896759033, "num_tokens": 2254059.0, "step": 1258 }, { "epoch": 0.2038701319731196, "grad_norm": 19.29145622253418, "learning_rate": 7.963082901554406e-06, "loss": 0.5522, "mean_token_accuracy": 0.9252963960170746, "num_tokens": 2255852.0, "step": 1259 }, { "epoch": 0.2040320621811999, "grad_norm": 15.525376319885254, "learning_rate": 7.96146373056995e-06, "loss": 0.5858, "mean_token_accuracy": 0.9204832017421722, "num_tokens": 2257640.0, "step": 1260 }, { "epoch": 0.2041939923892802, "grad_norm": 23.608671188354492, "learning_rate": 7.959844559585493e-06, "loss": 0.5815, "mean_token_accuracy": 0.9208798110485077, "num_tokens": 2259443.0, "step": 1261 }, { "epoch": 0.20435592259736055, "grad_norm": 29.058277130126953, "learning_rate": 7.958225388601038e-06, "loss": 0.9118, "mean_token_accuracy": 0.8783470988273621, "num_tokens": 2261243.0, "step": 1262 }, { "epoch": 0.20451785280544085, "grad_norm": 29.889188766479492, "learning_rate": 7.956606217616582e-06, "loss": 0.8227, "mean_token_accuracy": 0.886211633682251, "num_tokens": 2263036.0, "step": 1263 }, { "epoch": 0.20467978301352116, "grad_norm": 22.74662971496582, "learning_rate": 7.954987046632125e-06, "loss": 0.734, "mean_token_accuracy": 0.905089259147644, "num_tokens": 2264822.0, "step": 1264 }, { "epoch": 0.2048417132216015, "grad_norm": 24.45282745361328, "learning_rate": 7.953367875647669e-06, "loss": 0.6978, "mean_token_accuracy": 0.9092390239238739, "num_tokens": 2266610.0, "step": 1265 }, { "epoch": 0.2050036434296818, "grad_norm": 22.241226196289062, "learning_rate": 7.951748704663214e-06, "loss": 0.6192, "mean_token_accuracy": 0.9075387418270111, "num_tokens": 2268403.0, "step": 1266 }, { "epoch": 0.2051655736377621, "grad_norm": 21.609891891479492, "learning_rate": 7.950129533678758e-06, "loss": 0.6528, "mean_token_accuracy": 0.9169946312904358, "num_tokens": 2270193.0, "step": 1267 }, { "epoch": 0.20532750384584245, "grad_norm": 27.25162696838379, "learning_rate": 7.948510362694301e-06, "loss": 0.7711, "mean_token_accuracy": 0.9016619026660919, "num_tokens": 2271980.0, "step": 1268 }, { "epoch": 0.20548943405392275, "grad_norm": 22.569896697998047, "learning_rate": 7.946891191709845e-06, "loss": 0.6697, "mean_token_accuracy": 0.9053504168987274, "num_tokens": 2273776.0, "step": 1269 }, { "epoch": 0.2056513642620031, "grad_norm": 18.745363235473633, "learning_rate": 7.94527202072539e-06, "loss": 0.5899, "mean_token_accuracy": 0.9148510098457336, "num_tokens": 2275558.0, "step": 1270 }, { "epoch": 0.2058132944700834, "grad_norm": 24.38509750366211, "learning_rate": 7.943652849740934e-06, "loss": 0.694, "mean_token_accuracy": 0.9044477939605713, "num_tokens": 2277353.0, "step": 1271 }, { "epoch": 0.2059752246781637, "grad_norm": 26.728092193603516, "learning_rate": 7.942033678756477e-06, "loss": 0.7285, "mean_token_accuracy": 0.9018024206161499, "num_tokens": 2279150.0, "step": 1272 }, { "epoch": 0.20613715488624404, "grad_norm": 29.9021053314209, "learning_rate": 7.940414507772021e-06, "loss": 0.7536, "mean_token_accuracy": 0.8949199318885803, "num_tokens": 2280948.0, "step": 1273 }, { "epoch": 0.20629908509432435, "grad_norm": 24.022016525268555, "learning_rate": 7.938795336787566e-06, "loss": 0.7013, "mean_token_accuracy": 0.8990746736526489, "num_tokens": 2282757.0, "step": 1274 }, { "epoch": 0.20646101530240465, "grad_norm": 20.628925323486328, "learning_rate": 7.93717616580311e-06, "loss": 0.6829, "mean_token_accuracy": 0.9062825739383698, "num_tokens": 2284546.0, "step": 1275 }, { "epoch": 0.206622945510485, "grad_norm": 22.150596618652344, "learning_rate": 7.935556994818653e-06, "loss": 0.6206, "mean_token_accuracy": 0.9142856895923615, "num_tokens": 2286338.0, "step": 1276 }, { "epoch": 0.2067848757185653, "grad_norm": 22.98813819885254, "learning_rate": 7.933937823834199e-06, "loss": 0.7472, "mean_token_accuracy": 0.9059523940086365, "num_tokens": 2288137.0, "step": 1277 }, { "epoch": 0.2069468059266456, "grad_norm": 25.81568717956543, "learning_rate": 7.932318652849742e-06, "loss": 0.762, "mean_token_accuracy": 0.8968901038169861, "num_tokens": 2289931.0, "step": 1278 }, { "epoch": 0.20710873613472594, "grad_norm": 24.10573959350586, "learning_rate": 7.930699481865286e-06, "loss": 0.6725, "mean_token_accuracy": 0.9096696376800537, "num_tokens": 2291731.0, "step": 1279 }, { "epoch": 0.20727066634280625, "grad_norm": 22.798585891723633, "learning_rate": 7.92908031088083e-06, "loss": 0.7465, "mean_token_accuracy": 0.8974413573741913, "num_tokens": 2293517.0, "step": 1280 }, { "epoch": 0.20743259655088656, "grad_norm": 25.903820037841797, "learning_rate": 7.927461139896375e-06, "loss": 0.728, "mean_token_accuracy": 0.9001071751117706, "num_tokens": 2295309.0, "step": 1281 }, { "epoch": 0.2075945267589669, "grad_norm": 27.341861724853516, "learning_rate": 7.925841968911918e-06, "loss": 0.7181, "mean_token_accuracy": 0.9090731143951416, "num_tokens": 2297107.0, "step": 1282 }, { "epoch": 0.2077564569670472, "grad_norm": 15.652795791625977, "learning_rate": 7.924222797927462e-06, "loss": 0.5975, "mean_token_accuracy": 0.9236221313476562, "num_tokens": 2298894.0, "step": 1283 }, { "epoch": 0.20791838717512753, "grad_norm": 36.49123764038086, "learning_rate": 7.922603626943006e-06, "loss": 0.6371, "mean_token_accuracy": 0.9106518626213074, "num_tokens": 2300697.0, "step": 1284 }, { "epoch": 0.20808031738320784, "grad_norm": 22.572795867919922, "learning_rate": 7.920984455958551e-06, "loss": 0.6637, "mean_token_accuracy": 0.9148550927639008, "num_tokens": 2302479.0, "step": 1285 }, { "epoch": 0.20824224759128815, "grad_norm": 26.504154205322266, "learning_rate": 7.919365284974094e-06, "loss": 0.6554, "mean_token_accuracy": 0.904900074005127, "num_tokens": 2304264.0, "step": 1286 }, { "epoch": 0.20840417779936848, "grad_norm": 23.916187286376953, "learning_rate": 7.917746113989638e-06, "loss": 0.6363, "mean_token_accuracy": 0.9089095592498779, "num_tokens": 2306061.0, "step": 1287 }, { "epoch": 0.2085661080074488, "grad_norm": 25.13608741760254, "learning_rate": 7.916126943005182e-06, "loss": 0.6777, "mean_token_accuracy": 0.905089259147644, "num_tokens": 2307847.0, "step": 1288 }, { "epoch": 0.2087280382155291, "grad_norm": 20.095993041992188, "learning_rate": 7.914507772020727e-06, "loss": 0.6085, "mean_token_accuracy": 0.913159966468811, "num_tokens": 2309635.0, "step": 1289 }, { "epoch": 0.20888996842360943, "grad_norm": 25.800636291503906, "learning_rate": 7.91288860103627e-06, "loss": 0.687, "mean_token_accuracy": 0.9041857421398163, "num_tokens": 2311429.0, "step": 1290 }, { "epoch": 0.20905189863168974, "grad_norm": 21.290725708007812, "learning_rate": 7.911269430051814e-06, "loss": 0.6412, "mean_token_accuracy": 0.9139508605003357, "num_tokens": 2313220.0, "step": 1291 }, { "epoch": 0.20921382883977005, "grad_norm": 31.271337509155273, "learning_rate": 7.909650259067358e-06, "loss": 1.0309, "mean_token_accuracy": 0.8817271590232849, "num_tokens": 2315026.0, "step": 1292 }, { "epoch": 0.20937575904785038, "grad_norm": 25.913667678833008, "learning_rate": 7.908031088082903e-06, "loss": 0.7225, "mean_token_accuracy": 0.9037662148475647, "num_tokens": 2316819.0, "step": 1293 }, { "epoch": 0.2095376892559307, "grad_norm": 33.95661544799805, "learning_rate": 7.906411917098447e-06, "loss": 0.926, "mean_token_accuracy": 0.8770685493946075, "num_tokens": 2318607.0, "step": 1294 }, { "epoch": 0.209699619464011, "grad_norm": 22.09990692138672, "learning_rate": 7.90479274611399e-06, "loss": 0.6087, "mean_token_accuracy": 0.908687949180603, "num_tokens": 2320404.0, "step": 1295 }, { "epoch": 0.20986154967209134, "grad_norm": 26.62694549560547, "learning_rate": 7.903173575129535e-06, "loss": 0.7369, "mean_token_accuracy": 0.9004559218883514, "num_tokens": 2322197.0, "step": 1296 }, { "epoch": 0.21002347988017164, "grad_norm": 31.434181213378906, "learning_rate": 7.901554404145079e-06, "loss": 0.9451, "mean_token_accuracy": 0.8788608312606812, "num_tokens": 2323981.0, "step": 1297 }, { "epoch": 0.21018541008825195, "grad_norm": 27.5379581451416, "learning_rate": 7.899935233160623e-06, "loss": 0.7717, "mean_token_accuracy": 0.8913753032684326, "num_tokens": 2325786.0, "step": 1298 }, { "epoch": 0.21034734029633229, "grad_norm": 15.636994361877441, "learning_rate": 7.898316062176166e-06, "loss": 0.6176, "mean_token_accuracy": 0.9166475236415863, "num_tokens": 2327562.0, "step": 1299 }, { "epoch": 0.2105092705044126, "grad_norm": 19.72308921813965, "learning_rate": 7.896696891191711e-06, "loss": 0.6203, "mean_token_accuracy": 0.9030748903751373, "num_tokens": 2329353.0, "step": 1300 }, { "epoch": 0.21067120071249293, "grad_norm": 18.04261016845703, "learning_rate": 7.895077720207255e-06, "loss": 0.5889, "mean_token_accuracy": 0.9189277589321136, "num_tokens": 2331136.0, "step": 1301 }, { "epoch": 0.21083313092057324, "grad_norm": 23.98662757873535, "learning_rate": 7.893458549222799e-06, "loss": 0.7044, "mean_token_accuracy": 0.8980670273303986, "num_tokens": 2332942.0, "step": 1302 }, { "epoch": 0.21099506112865354, "grad_norm": 18.525197982788086, "learning_rate": 7.891839378238342e-06, "loss": 0.5254, "mean_token_accuracy": 0.9305733144283295, "num_tokens": 2334728.0, "step": 1303 }, { "epoch": 0.21115699133673388, "grad_norm": 16.44396209716797, "learning_rate": 7.890220207253888e-06, "loss": 0.5434, "mean_token_accuracy": 0.9205682873725891, "num_tokens": 2336517.0, "step": 1304 }, { "epoch": 0.2113189215448142, "grad_norm": 19.87498664855957, "learning_rate": 7.888601036269431e-06, "loss": 0.5981, "mean_token_accuracy": 0.9141328632831573, "num_tokens": 2338321.0, "step": 1305 }, { "epoch": 0.2114808517528945, "grad_norm": 22.984481811523438, "learning_rate": 7.886981865284975e-06, "loss": 0.6893, "mean_token_accuracy": 0.9144723415374756, "num_tokens": 2340113.0, "step": 1306 }, { "epoch": 0.21164278196097483, "grad_norm": 28.53995704650879, "learning_rate": 7.885362694300518e-06, "loss": 0.8072, "mean_token_accuracy": 0.8944272994995117, "num_tokens": 2341897.0, "step": 1307 }, { "epoch": 0.21180471216905514, "grad_norm": 13.00845718383789, "learning_rate": 7.883743523316064e-06, "loss": 0.5373, "mean_token_accuracy": 0.9289297759532928, "num_tokens": 2343690.0, "step": 1308 }, { "epoch": 0.21196664237713544, "grad_norm": 20.79195213317871, "learning_rate": 7.882124352331607e-06, "loss": 0.641, "mean_token_accuracy": 0.9079380929470062, "num_tokens": 2345472.0, "step": 1309 }, { "epoch": 0.21212857258521578, "grad_norm": 27.601594924926758, "learning_rate": 7.88050518134715e-06, "loss": 0.7372, "mean_token_accuracy": 0.89560467004776, "num_tokens": 2347271.0, "step": 1310 }, { "epoch": 0.2122905027932961, "grad_norm": 21.75929832458496, "learning_rate": 7.878886010362694e-06, "loss": 0.6011, "mean_token_accuracy": 0.9186292290687561, "num_tokens": 2349065.0, "step": 1311 }, { "epoch": 0.2124524330013764, "grad_norm": 24.202491760253906, "learning_rate": 7.87726683937824e-06, "loss": 0.6438, "mean_token_accuracy": 0.9069488048553467, "num_tokens": 2350854.0, "step": 1312 }, { "epoch": 0.21261436320945673, "grad_norm": 26.516664505004883, "learning_rate": 7.875647668393783e-06, "loss": 0.7615, "mean_token_accuracy": 0.8975427448749542, "num_tokens": 2352640.0, "step": 1313 }, { "epoch": 0.21277629341753704, "grad_norm": 29.516916275024414, "learning_rate": 7.874028497409327e-06, "loss": 0.7527, "mean_token_accuracy": 0.9042788743972778, "num_tokens": 2354434.0, "step": 1314 }, { "epoch": 0.21293822362561735, "grad_norm": 25.39187240600586, "learning_rate": 7.872409326424872e-06, "loss": 0.7375, "mean_token_accuracy": 0.8921815752983093, "num_tokens": 2356225.0, "step": 1315 }, { "epoch": 0.21310015383369768, "grad_norm": 27.50579833984375, "learning_rate": 7.870790155440416e-06, "loss": 0.7729, "mean_token_accuracy": 0.9000816643238068, "num_tokens": 2358017.0, "step": 1316 }, { "epoch": 0.213262084041778, "grad_norm": 26.496875762939453, "learning_rate": 7.86917098445596e-06, "loss": 0.6924, "mean_token_accuracy": 0.8958283066749573, "num_tokens": 2359817.0, "step": 1317 }, { "epoch": 0.21342401424985832, "grad_norm": 25.31161117553711, "learning_rate": 7.867551813471503e-06, "loss": 0.7377, "mean_token_accuracy": 0.8931970596313477, "num_tokens": 2361601.0, "step": 1318 }, { "epoch": 0.21358594445793863, "grad_norm": 25.23062515258789, "learning_rate": 7.865932642487048e-06, "loss": 0.7168, "mean_token_accuracy": 0.9007633626461029, "num_tokens": 2363375.0, "step": 1319 }, { "epoch": 0.21374787466601894, "grad_norm": 21.320762634277344, "learning_rate": 7.864313471502592e-06, "loss": 0.6952, "mean_token_accuracy": 0.9121415317058563, "num_tokens": 2365172.0, "step": 1320 }, { "epoch": 0.21390980487409927, "grad_norm": 26.12787628173828, "learning_rate": 7.862694300518135e-06, "loss": 0.7382, "mean_token_accuracy": 0.9006058275699615, "num_tokens": 2366956.0, "step": 1321 }, { "epoch": 0.21407173508217958, "grad_norm": 23.60651206970215, "learning_rate": 7.861075129533679e-06, "loss": 0.6749, "mean_token_accuracy": 0.9182727932929993, "num_tokens": 2368762.0, "step": 1322 }, { "epoch": 0.2142336652902599, "grad_norm": 24.133113861083984, "learning_rate": 7.859455958549224e-06, "loss": 0.6514, "mean_token_accuracy": 0.9066033959388733, "num_tokens": 2370563.0, "step": 1323 }, { "epoch": 0.21439559549834022, "grad_norm": 23.532142639160156, "learning_rate": 7.857836787564768e-06, "loss": 0.6872, "mean_token_accuracy": 0.8986014127731323, "num_tokens": 2372361.0, "step": 1324 }, { "epoch": 0.21455752570642053, "grad_norm": 21.116100311279297, "learning_rate": 7.856217616580311e-06, "loss": 0.608, "mean_token_accuracy": 0.9245029091835022, "num_tokens": 2374151.0, "step": 1325 }, { "epoch": 0.21471945591450084, "grad_norm": 28.5745792388916, "learning_rate": 7.854598445595855e-06, "loss": 0.756, "mean_token_accuracy": 0.8998612463474274, "num_tokens": 2375942.0, "step": 1326 }, { "epoch": 0.21488138612258117, "grad_norm": 19.671836853027344, "learning_rate": 7.8529792746114e-06, "loss": 0.6606, "mean_token_accuracy": 0.8993055522441864, "num_tokens": 2377730.0, "step": 1327 }, { "epoch": 0.21504331633066148, "grad_norm": 26.114513397216797, "learning_rate": 7.851360103626944e-06, "loss": 0.7025, "mean_token_accuracy": 0.9104059636592865, "num_tokens": 2379521.0, "step": 1328 }, { "epoch": 0.2152052465387418, "grad_norm": 23.38545799255371, "learning_rate": 7.849740932642487e-06, "loss": 0.6904, "mean_token_accuracy": 0.903900682926178, "num_tokens": 2381314.0, "step": 1329 }, { "epoch": 0.21536717674682213, "grad_norm": 23.690956115722656, "learning_rate": 7.848121761658031e-06, "loss": 0.6481, "mean_token_accuracy": 0.9098140597343445, "num_tokens": 2383115.0, "step": 1330 }, { "epoch": 0.21552910695490243, "grad_norm": 25.65019416809082, "learning_rate": 7.846502590673576e-06, "loss": 0.8145, "mean_token_accuracy": 0.8888489007949829, "num_tokens": 2384906.0, "step": 1331 }, { "epoch": 0.21569103716298277, "grad_norm": 24.10000991821289, "learning_rate": 7.84488341968912e-06, "loss": 0.7075, "mean_token_accuracy": 0.9091245234012604, "num_tokens": 2386703.0, "step": 1332 }, { "epoch": 0.21585296737106308, "grad_norm": 21.570556640625, "learning_rate": 7.843264248704663e-06, "loss": 0.7281, "mean_token_accuracy": 0.9097693562507629, "num_tokens": 2388492.0, "step": 1333 }, { "epoch": 0.21601489757914338, "grad_norm": 20.557296752929688, "learning_rate": 7.841645077720209e-06, "loss": 0.6124, "mean_token_accuracy": 0.906470000743866, "num_tokens": 2390282.0, "step": 1334 }, { "epoch": 0.21617682778722372, "grad_norm": 31.200876235961914, "learning_rate": 7.840025906735752e-06, "loss": 1.0413, "mean_token_accuracy": 0.8734805285930634, "num_tokens": 2392078.0, "step": 1335 }, { "epoch": 0.21633875799530403, "grad_norm": 23.863723754882812, "learning_rate": 7.838406735751296e-06, "loss": 0.781, "mean_token_accuracy": 0.894610196352005, "num_tokens": 2393865.0, "step": 1336 }, { "epoch": 0.21650068820338433, "grad_norm": 24.848957061767578, "learning_rate": 7.83678756476684e-06, "loss": 0.6987, "mean_token_accuracy": 0.9015037715435028, "num_tokens": 2395650.0, "step": 1337 }, { "epoch": 0.21666261841146467, "grad_norm": 32.46807098388672, "learning_rate": 7.835168393782385e-06, "loss": 0.8752, "mean_token_accuracy": 0.8708485960960388, "num_tokens": 2397448.0, "step": 1338 }, { "epoch": 0.21682454861954498, "grad_norm": 28.673524856567383, "learning_rate": 7.833549222797928e-06, "loss": 0.8233, "mean_token_accuracy": 0.8889916837215424, "num_tokens": 2399239.0, "step": 1339 }, { "epoch": 0.21698647882762528, "grad_norm": 20.588809967041016, "learning_rate": 7.831930051813472e-06, "loss": 0.5904, "mean_token_accuracy": 0.9190376698970795, "num_tokens": 2401035.0, "step": 1340 }, { "epoch": 0.21714840903570562, "grad_norm": 22.57328987121582, "learning_rate": 7.830310880829016e-06, "loss": 0.7865, "mean_token_accuracy": 0.9100719392299652, "num_tokens": 2402825.0, "step": 1341 }, { "epoch": 0.21731033924378593, "grad_norm": 27.874149322509766, "learning_rate": 7.828691709844561e-06, "loss": 0.7786, "mean_token_accuracy": 0.8975524306297302, "num_tokens": 2404620.0, "step": 1342 }, { "epoch": 0.21747226945186623, "grad_norm": 18.996997833251953, "learning_rate": 7.827072538860104e-06, "loss": 0.7108, "mean_token_accuracy": 0.9154095649719238, "num_tokens": 2406404.0, "step": 1343 }, { "epoch": 0.21763419965994657, "grad_norm": 29.685951232910156, "learning_rate": 7.825453367875648e-06, "loss": 1.1648, "mean_token_accuracy": 0.8810355961322784, "num_tokens": 2408209.0, "step": 1344 }, { "epoch": 0.21779612986802688, "grad_norm": 22.576805114746094, "learning_rate": 7.823834196891192e-06, "loss": 0.659, "mean_token_accuracy": 0.913159966468811, "num_tokens": 2409997.0, "step": 1345 }, { "epoch": 0.21795806007610719, "grad_norm": 25.6392765045166, "learning_rate": 7.822215025906737e-06, "loss": 0.7045, "mean_token_accuracy": 0.9051474332809448, "num_tokens": 2411792.0, "step": 1346 }, { "epoch": 0.21811999028418752, "grad_norm": 24.15930938720703, "learning_rate": 7.82059585492228e-06, "loss": 0.6591, "mean_token_accuracy": 0.9027804732322693, "num_tokens": 2413591.0, "step": 1347 }, { "epoch": 0.21828192049226783, "grad_norm": 23.67548370361328, "learning_rate": 7.818976683937824e-06, "loss": 0.6812, "mean_token_accuracy": 0.8874224722385406, "num_tokens": 2415387.0, "step": 1348 }, { "epoch": 0.21844385070034816, "grad_norm": 18.736164093017578, "learning_rate": 7.817357512953368e-06, "loss": 0.591, "mean_token_accuracy": 0.920550525188446, "num_tokens": 2417176.0, "step": 1349 }, { "epoch": 0.21860578090842847, "grad_norm": 18.090635299682617, "learning_rate": 7.815738341968913e-06, "loss": 0.5364, "mean_token_accuracy": 0.9181873500347137, "num_tokens": 2418957.0, "step": 1350 }, { "epoch": 0.21876771111650878, "grad_norm": 22.60944938659668, "learning_rate": 7.814119170984457e-06, "loss": 0.6928, "mean_token_accuracy": 0.9181602001190186, "num_tokens": 2420738.0, "step": 1351 }, { "epoch": 0.2189296413245891, "grad_norm": 23.433828353881836, "learning_rate": 7.8125e-06, "loss": 0.7531, "mean_token_accuracy": 0.9080882370471954, "num_tokens": 2422522.0, "step": 1352 }, { "epoch": 0.21909157153266942, "grad_norm": 21.873239517211914, "learning_rate": 7.810880829015545e-06, "loss": 0.7351, "mean_token_accuracy": 0.9010319709777832, "num_tokens": 2424316.0, "step": 1353 }, { "epoch": 0.21925350174074973, "grad_norm": 24.422439575195312, "learning_rate": 7.809261658031089e-06, "loss": 0.6623, "mean_token_accuracy": 0.9116041362285614, "num_tokens": 2426112.0, "step": 1354 }, { "epoch": 0.21941543194883006, "grad_norm": 19.918426513671875, "learning_rate": 7.807642487046633e-06, "loss": 0.6226, "mean_token_accuracy": 0.9134254455566406, "num_tokens": 2427901.0, "step": 1355 }, { "epoch": 0.21957736215691037, "grad_norm": 28.144012451171875, "learning_rate": 7.806023316062176e-06, "loss": 0.8787, "mean_token_accuracy": 0.8800454437732697, "num_tokens": 2429701.0, "step": 1356 }, { "epoch": 0.21973929236499068, "grad_norm": 24.157426834106445, "learning_rate": 7.804404145077721e-06, "loss": 0.7693, "mean_token_accuracy": 0.9004964828491211, "num_tokens": 2431494.0, "step": 1357 }, { "epoch": 0.21990122257307101, "grad_norm": 19.312145233154297, "learning_rate": 7.802784974093265e-06, "loss": 0.5653, "mean_token_accuracy": 0.9266602396965027, "num_tokens": 2433279.0, "step": 1358 }, { "epoch": 0.22006315278115132, "grad_norm": 20.21554946899414, "learning_rate": 7.801165803108809e-06, "loss": 0.6578, "mean_token_accuracy": 0.924761027097702, "num_tokens": 2435070.0, "step": 1359 }, { "epoch": 0.22022508298923163, "grad_norm": 24.232200622558594, "learning_rate": 7.799546632124352e-06, "loss": 0.799, "mean_token_accuracy": 0.8945572972297668, "num_tokens": 2436857.0, "step": 1360 }, { "epoch": 0.22038701319731197, "grad_norm": 30.001169204711914, "learning_rate": 7.797927461139898e-06, "loss": 1.0244, "mean_token_accuracy": 0.8915297389030457, "num_tokens": 2438655.0, "step": 1361 }, { "epoch": 0.22054894340539227, "grad_norm": 28.108449935913086, "learning_rate": 7.796308290155441e-06, "loss": 0.8801, "mean_token_accuracy": 0.8862321972846985, "num_tokens": 2440459.0, "step": 1362 }, { "epoch": 0.22071087361347258, "grad_norm": 26.04998207092285, "learning_rate": 7.794689119170985e-06, "loss": 0.8182, "mean_token_accuracy": 0.8954448401927948, "num_tokens": 2442258.0, "step": 1363 }, { "epoch": 0.22087280382155292, "grad_norm": 21.462345123291016, "learning_rate": 7.793069948186528e-06, "loss": 0.6494, "mean_token_accuracy": 0.9084370732307434, "num_tokens": 2444043.0, "step": 1364 }, { "epoch": 0.22103473402963322, "grad_norm": 18.6356258392334, "learning_rate": 7.791450777202074e-06, "loss": 0.5697, "mean_token_accuracy": 0.9255318939685822, "num_tokens": 2445837.0, "step": 1365 }, { "epoch": 0.22119666423771356, "grad_norm": 24.375577926635742, "learning_rate": 7.789831606217617e-06, "loss": 0.8147, "mean_token_accuracy": 0.8933209776878357, "num_tokens": 2447628.0, "step": 1366 }, { "epoch": 0.22135859444579387, "grad_norm": 20.464832305908203, "learning_rate": 7.78821243523316e-06, "loss": 0.6247, "mean_token_accuracy": 0.9079841077327728, "num_tokens": 2449422.0, "step": 1367 }, { "epoch": 0.22152052465387417, "grad_norm": 19.878673553466797, "learning_rate": 7.786593264248704e-06, "loss": 0.6297, "mean_token_accuracy": 0.9013539850711823, "num_tokens": 2451207.0, "step": 1368 }, { "epoch": 0.2216824548619545, "grad_norm": 17.676183700561523, "learning_rate": 7.78497409326425e-06, "loss": 0.5698, "mean_token_accuracy": 0.9192405343055725, "num_tokens": 2452991.0, "step": 1369 }, { "epoch": 0.22184438507003482, "grad_norm": 22.46243667602539, "learning_rate": 7.783354922279793e-06, "loss": 0.736, "mean_token_accuracy": 0.9061065912246704, "num_tokens": 2454780.0, "step": 1370 }, { "epoch": 0.22200631527811512, "grad_norm": 19.763105392456055, "learning_rate": 7.781735751295337e-06, "loss": 0.6251, "mean_token_accuracy": 0.9067688584327698, "num_tokens": 2456572.0, "step": 1371 }, { "epoch": 0.22216824548619546, "grad_norm": 27.176179885864258, "learning_rate": 7.780116580310882e-06, "loss": 0.8215, "mean_token_accuracy": 0.9007352888584137, "num_tokens": 2458356.0, "step": 1372 }, { "epoch": 0.22233017569427577, "grad_norm": 19.885753631591797, "learning_rate": 7.778497409326426e-06, "loss": 0.7022, "mean_token_accuracy": 0.9202898740768433, "num_tokens": 2460144.0, "step": 1373 }, { "epoch": 0.22249210590235607, "grad_norm": 25.12118911743164, "learning_rate": 7.77687823834197e-06, "loss": 0.7783, "mean_token_accuracy": 0.8936170041561127, "num_tokens": 2461938.0, "step": 1374 }, { "epoch": 0.2226540361104364, "grad_norm": 24.92267417907715, "learning_rate": 7.775259067357513e-06, "loss": 0.6884, "mean_token_accuracy": 0.8918404579162598, "num_tokens": 2463727.0, "step": 1375 }, { "epoch": 0.22281596631851672, "grad_norm": 20.28502082824707, "learning_rate": 7.773639896373058e-06, "loss": 0.6384, "mean_token_accuracy": 0.919762909412384, "num_tokens": 2465514.0, "step": 1376 }, { "epoch": 0.22297789652659702, "grad_norm": 29.23663330078125, "learning_rate": 7.772020725388602e-06, "loss": 0.7601, "mean_token_accuracy": 0.8926453292369843, "num_tokens": 2467313.0, "step": 1377 }, { "epoch": 0.22313982673467736, "grad_norm": 25.073808670043945, "learning_rate": 7.770401554404145e-06, "loss": 0.7095, "mean_token_accuracy": 0.8994308412075043, "num_tokens": 2469113.0, "step": 1378 }, { "epoch": 0.22330175694275767, "grad_norm": 27.79206085205078, "learning_rate": 7.768782383419689e-06, "loss": 0.8204, "mean_token_accuracy": 0.8808779120445251, "num_tokens": 2470909.0, "step": 1379 }, { "epoch": 0.22346368715083798, "grad_norm": 25.242015838623047, "learning_rate": 7.767163212435234e-06, "loss": 0.7251, "mean_token_accuracy": 0.9044065773487091, "num_tokens": 2472693.0, "step": 1380 }, { "epoch": 0.2236256173589183, "grad_norm": 24.53001594543457, "learning_rate": 7.765544041450778e-06, "loss": 0.5962, "mean_token_accuracy": 0.9142078757286072, "num_tokens": 2474484.0, "step": 1381 }, { "epoch": 0.22378754756699862, "grad_norm": 20.68842887878418, "learning_rate": 7.763924870466321e-06, "loss": 0.5838, "mean_token_accuracy": 0.9108880758285522, "num_tokens": 2476265.0, "step": 1382 }, { "epoch": 0.22394947777507895, "grad_norm": 23.500314712524414, "learning_rate": 7.762305699481865e-06, "loss": 0.6658, "mean_token_accuracy": 0.9125318229198456, "num_tokens": 2478052.0, "step": 1383 }, { "epoch": 0.22411140798315926, "grad_norm": 28.060489654541016, "learning_rate": 7.76068652849741e-06, "loss": 0.8354, "mean_token_accuracy": 0.9050595164299011, "num_tokens": 2479848.0, "step": 1384 }, { "epoch": 0.22427333819123957, "grad_norm": 24.152925491333008, "learning_rate": 7.759067357512954e-06, "loss": 0.758, "mean_token_accuracy": 0.9088743031024933, "num_tokens": 2481635.0, "step": 1385 }, { "epoch": 0.2244352683993199, "grad_norm": 24.893659591674805, "learning_rate": 7.757448186528497e-06, "loss": 0.714, "mean_token_accuracy": 0.9049295783042908, "num_tokens": 2483431.0, "step": 1386 }, { "epoch": 0.2245971986074002, "grad_norm": 24.605436325073242, "learning_rate": 7.755829015544041e-06, "loss": 0.6974, "mean_token_accuracy": 0.9062369465827942, "num_tokens": 2485220.0, "step": 1387 }, { "epoch": 0.22475912881548052, "grad_norm": 16.942914962768555, "learning_rate": 7.754209844559586e-06, "loss": 0.535, "mean_token_accuracy": 0.9213924705982208, "num_tokens": 2487012.0, "step": 1388 }, { "epoch": 0.22492105902356085, "grad_norm": 29.950647354125977, "learning_rate": 7.75259067357513e-06, "loss": 0.7726, "mean_token_accuracy": 0.8884143531322479, "num_tokens": 2488802.0, "step": 1389 }, { "epoch": 0.22508298923164116, "grad_norm": 22.724166870117188, "learning_rate": 7.750971502590674e-06, "loss": 0.6355, "mean_token_accuracy": 0.9154545664787292, "num_tokens": 2490596.0, "step": 1390 }, { "epoch": 0.22524491943972147, "grad_norm": 23.553930282592773, "learning_rate": 7.749352331606219e-06, "loss": 0.6989, "mean_token_accuracy": 0.9071327447891235, "num_tokens": 2492399.0, "step": 1391 }, { "epoch": 0.2254068496478018, "grad_norm": 15.872113227844238, "learning_rate": 7.747733160621762e-06, "loss": 0.5489, "mean_token_accuracy": 0.9289695024490356, "num_tokens": 2494192.0, "step": 1392 }, { "epoch": 0.2255687798558821, "grad_norm": 24.228591918945312, "learning_rate": 7.746113989637306e-06, "loss": 0.8398, "mean_token_accuracy": 0.8924717307090759, "num_tokens": 2495983.0, "step": 1393 }, { "epoch": 0.22573071006396242, "grad_norm": 19.374597549438477, "learning_rate": 7.74449481865285e-06, "loss": 0.666, "mean_token_accuracy": 0.9094203114509583, "num_tokens": 2497771.0, "step": 1394 }, { "epoch": 0.22589264027204276, "grad_norm": 24.548786163330078, "learning_rate": 7.742875647668395e-06, "loss": 0.733, "mean_token_accuracy": 0.8859421610832214, "num_tokens": 2499555.0, "step": 1395 }, { "epoch": 0.22605457048012306, "grad_norm": 23.346195220947266, "learning_rate": 7.741256476683938e-06, "loss": 0.7169, "mean_token_accuracy": 0.8956018388271332, "num_tokens": 2501346.0, "step": 1396 }, { "epoch": 0.2262165006882034, "grad_norm": 33.626216888427734, "learning_rate": 7.739637305699482e-06, "loss": 0.9383, "mean_token_accuracy": 0.8953339755535126, "num_tokens": 2503154.0, "step": 1397 }, { "epoch": 0.2263784308962837, "grad_norm": 18.985727310180664, "learning_rate": 7.738018134715026e-06, "loss": 0.5546, "mean_token_accuracy": 0.9245029091835022, "num_tokens": 2504944.0, "step": 1398 }, { "epoch": 0.226540361104364, "grad_norm": 25.161592483520508, "learning_rate": 7.736398963730571e-06, "loss": 0.8582, "mean_token_accuracy": 0.9025547206401825, "num_tokens": 2506733.0, "step": 1399 }, { "epoch": 0.22670229131244435, "grad_norm": 19.465166091918945, "learning_rate": 7.734779792746114e-06, "loss": 0.5997, "mean_token_accuracy": 0.9239541888237, "num_tokens": 2508521.0, "step": 1400 }, { "epoch": 0.22686422152052466, "grad_norm": 21.66914176940918, "learning_rate": 7.733160621761658e-06, "loss": 0.7542, "mean_token_accuracy": 0.9044117629528046, "num_tokens": 2510305.0, "step": 1401 }, { "epoch": 0.22702615172860496, "grad_norm": 28.106422424316406, "learning_rate": 7.731541450777202e-06, "loss": 0.7842, "mean_token_accuracy": 0.8937198221683502, "num_tokens": 2512090.0, "step": 1402 }, { "epoch": 0.2271880819366853, "grad_norm": 14.6756010055542, "learning_rate": 7.729922279792747e-06, "loss": 0.5455, "mean_token_accuracy": 0.9160583913326263, "num_tokens": 2513876.0, "step": 1403 }, { "epoch": 0.2273500121447656, "grad_norm": 25.356327056884766, "learning_rate": 7.72830310880829e-06, "loss": 0.6846, "mean_token_accuracy": 0.902165025472641, "num_tokens": 2515685.0, "step": 1404 }, { "epoch": 0.22751194235284591, "grad_norm": 21.3913516998291, "learning_rate": 7.726683937823834e-06, "loss": 0.6885, "mean_token_accuracy": 0.9084541201591492, "num_tokens": 2517470.0, "step": 1405 }, { "epoch": 0.22767387256092625, "grad_norm": 22.52349853515625, "learning_rate": 7.725064766839378e-06, "loss": 0.6411, "mean_token_accuracy": 0.9082632958889008, "num_tokens": 2519265.0, "step": 1406 }, { "epoch": 0.22783580276900656, "grad_norm": 20.94371223449707, "learning_rate": 7.723445595854923e-06, "loss": 0.6858, "mean_token_accuracy": 0.9111787378787994, "num_tokens": 2521046.0, "step": 1407 }, { "epoch": 0.22799773297708686, "grad_norm": 19.096118927001953, "learning_rate": 7.721826424870467e-06, "loss": 0.6268, "mean_token_accuracy": 0.9067419171333313, "num_tokens": 2522836.0, "step": 1408 }, { "epoch": 0.2281596631851672, "grad_norm": 21.31864356994629, "learning_rate": 7.72020725388601e-06, "loss": 0.6323, "mean_token_accuracy": 0.9032630920410156, "num_tokens": 2524627.0, "step": 1409 }, { "epoch": 0.2283215933932475, "grad_norm": 13.813851356506348, "learning_rate": 7.718588082901555e-06, "loss": 0.4722, "mean_token_accuracy": 0.9407802522182465, "num_tokens": 2526427.0, "step": 1410 }, { "epoch": 0.22848352360132781, "grad_norm": 21.170127868652344, "learning_rate": 7.716968911917099e-06, "loss": 0.6638, "mean_token_accuracy": 0.9124966859817505, "num_tokens": 2528213.0, "step": 1411 }, { "epoch": 0.22864545380940815, "grad_norm": 27.441308975219727, "learning_rate": 7.715349740932643e-06, "loss": 0.7244, "mean_token_accuracy": 0.8875938653945923, "num_tokens": 2530002.0, "step": 1412 }, { "epoch": 0.22880738401748846, "grad_norm": 23.334585189819336, "learning_rate": 7.713730569948186e-06, "loss": 0.6673, "mean_token_accuracy": 0.9046095311641693, "num_tokens": 2531797.0, "step": 1413 }, { "epoch": 0.2289693142255688, "grad_norm": 18.853336334228516, "learning_rate": 7.712111398963732e-06, "loss": 0.5559, "mean_token_accuracy": 0.9182054400444031, "num_tokens": 2533578.0, "step": 1414 }, { "epoch": 0.2291312444336491, "grad_norm": 23.185396194458008, "learning_rate": 7.710492227979275e-06, "loss": 0.626, "mean_token_accuracy": 0.9049992561340332, "num_tokens": 2535374.0, "step": 1415 }, { "epoch": 0.2292931746417294, "grad_norm": 14.400371551513672, "learning_rate": 7.708873056994819e-06, "loss": 0.5139, "mean_token_accuracy": 0.9228314161300659, "num_tokens": 2537157.0, "step": 1416 }, { "epoch": 0.22945510484980974, "grad_norm": 21.116069793701172, "learning_rate": 7.707253886010362e-06, "loss": 0.6285, "mean_token_accuracy": 0.9159381687641144, "num_tokens": 2538943.0, "step": 1417 }, { "epoch": 0.22961703505789005, "grad_norm": 22.72123146057129, "learning_rate": 7.705634715025908e-06, "loss": 0.6637, "mean_token_accuracy": 0.89585942029953, "num_tokens": 2540732.0, "step": 1418 }, { "epoch": 0.22977896526597036, "grad_norm": 23.89801025390625, "learning_rate": 7.704015544041451e-06, "loss": 0.7743, "mean_token_accuracy": 0.8941701948642731, "num_tokens": 2542528.0, "step": 1419 }, { "epoch": 0.2299408954740507, "grad_norm": 28.52039909362793, "learning_rate": 7.702396373056995e-06, "loss": 0.8128, "mean_token_accuracy": 0.8997378349304199, "num_tokens": 2544319.0, "step": 1420 }, { "epoch": 0.230102825682131, "grad_norm": 24.92635726928711, "learning_rate": 7.700777202072538e-06, "loss": 0.6067, "mean_token_accuracy": 0.9124040901660919, "num_tokens": 2546105.0, "step": 1421 }, { "epoch": 0.2302647558902113, "grad_norm": 27.74716567993164, "learning_rate": 7.699158031088084e-06, "loss": 0.6653, "mean_token_accuracy": 0.9103453755378723, "num_tokens": 2547896.0, "step": 1422 }, { "epoch": 0.23042668609829164, "grad_norm": 29.32065200805664, "learning_rate": 7.697538860103627e-06, "loss": 0.8924, "mean_token_accuracy": 0.8966663181781769, "num_tokens": 2549679.0, "step": 1423 }, { "epoch": 0.23058861630637195, "grad_norm": 23.518152236938477, "learning_rate": 7.69591968911917e-06, "loss": 0.678, "mean_token_accuracy": 0.9049295783042908, "num_tokens": 2551475.0, "step": 1424 }, { "epoch": 0.23075054651445226, "grad_norm": 20.571474075317383, "learning_rate": 7.694300518134716e-06, "loss": 0.6019, "mean_token_accuracy": 0.9113828539848328, "num_tokens": 2553269.0, "step": 1425 }, { "epoch": 0.2309124767225326, "grad_norm": 19.52507781982422, "learning_rate": 7.69268134715026e-06, "loss": 0.5923, "mean_token_accuracy": 0.9212669432163239, "num_tokens": 2555047.0, "step": 1426 }, { "epoch": 0.2310744069306129, "grad_norm": 22.284969329833984, "learning_rate": 7.691062176165803e-06, "loss": 0.7425, "mean_token_accuracy": 0.9127790331840515, "num_tokens": 2556834.0, "step": 1427 }, { "epoch": 0.2312363371386932, "grad_norm": 24.386430740356445, "learning_rate": 7.689443005181347e-06, "loss": 0.7547, "mean_token_accuracy": 0.8994667828083038, "num_tokens": 2558624.0, "step": 1428 }, { "epoch": 0.23139826734677355, "grad_norm": 22.268789291381836, "learning_rate": 7.687823834196892e-06, "loss": 0.5542, "mean_token_accuracy": 0.9182723760604858, "num_tokens": 2560418.0, "step": 1429 }, { "epoch": 0.23156019755485385, "grad_norm": 20.271615982055664, "learning_rate": 7.686204663212436e-06, "loss": 0.6307, "mean_token_accuracy": 0.9018568992614746, "num_tokens": 2562195.0, "step": 1430 }, { "epoch": 0.2317221277629342, "grad_norm": 33.752899169921875, "learning_rate": 7.68458549222798e-06, "loss": 0.8091, "mean_token_accuracy": 0.8899519145488739, "num_tokens": 2563997.0, "step": 1431 }, { "epoch": 0.2318840579710145, "grad_norm": 27.174192428588867, "learning_rate": 7.682966321243523e-06, "loss": 0.8071, "mean_token_accuracy": 0.8941729366779327, "num_tokens": 2565782.0, "step": 1432 }, { "epoch": 0.2320459881790948, "grad_norm": 24.73139190673828, "learning_rate": 7.681347150259068e-06, "loss": 0.7051, "mean_token_accuracy": 0.9002998471260071, "num_tokens": 2567577.0, "step": 1433 }, { "epoch": 0.23220791838717514, "grad_norm": 24.544334411621094, "learning_rate": 7.679727979274612e-06, "loss": 0.6896, "mean_token_accuracy": 0.9112384617328644, "num_tokens": 2569382.0, "step": 1434 }, { "epoch": 0.23236984859525545, "grad_norm": 13.470335960388184, "learning_rate": 7.678108808290155e-06, "loss": 0.5007, "mean_token_accuracy": 0.924838125705719, "num_tokens": 2571173.0, "step": 1435 }, { "epoch": 0.23253177880333575, "grad_norm": 25.5701904296875, "learning_rate": 7.676489637305699e-06, "loss": 0.6852, "mean_token_accuracy": 0.9034899771213531, "num_tokens": 2572965.0, "step": 1436 }, { "epoch": 0.2326937090114161, "grad_norm": 30.022069931030273, "learning_rate": 7.674870466321244e-06, "loss": 0.7794, "mean_token_accuracy": 0.8899728953838348, "num_tokens": 2574777.0, "step": 1437 }, { "epoch": 0.2328556392194964, "grad_norm": 19.45071792602539, "learning_rate": 7.673251295336788e-06, "loss": 0.5666, "mean_token_accuracy": 0.9142857193946838, "num_tokens": 2576569.0, "step": 1438 }, { "epoch": 0.2330175694275767, "grad_norm": 18.2053165435791, "learning_rate": 7.671632124352331e-06, "loss": 0.5819, "mean_token_accuracy": 0.9208124577999115, "num_tokens": 2578358.0, "step": 1439 }, { "epoch": 0.23317949963565704, "grad_norm": 23.79119873046875, "learning_rate": 7.670012953367875e-06, "loss": 0.7058, "mean_token_accuracy": 0.9053932428359985, "num_tokens": 2580145.0, "step": 1440 }, { "epoch": 0.23334142984373735, "grad_norm": 27.382652282714844, "learning_rate": 7.66839378238342e-06, "loss": 0.7591, "mean_token_accuracy": 0.9009722173213959, "num_tokens": 2581951.0, "step": 1441 }, { "epoch": 0.23350336005181765, "grad_norm": 28.890094757080078, "learning_rate": 7.666774611398964e-06, "loss": 0.845, "mean_token_accuracy": 0.8961202502250671, "num_tokens": 2583742.0, "step": 1442 }, { "epoch": 0.233665290259898, "grad_norm": 32.617794036865234, "learning_rate": 7.665155440414507e-06, "loss": 0.8926, "mean_token_accuracy": 0.8866921663284302, "num_tokens": 2585545.0, "step": 1443 }, { "epoch": 0.2338272204679783, "grad_norm": 31.685365676879883, "learning_rate": 7.663536269430053e-06, "loss": 0.8971, "mean_token_accuracy": 0.8888835310935974, "num_tokens": 2587345.0, "step": 1444 }, { "epoch": 0.23398915067605863, "grad_norm": 20.850082397460938, "learning_rate": 7.661917098445596e-06, "loss": 0.6985, "mean_token_accuracy": 0.8996683359146118, "num_tokens": 2589126.0, "step": 1445 }, { "epoch": 0.23415108088413894, "grad_norm": 25.50518226623535, "learning_rate": 7.66029792746114e-06, "loss": 0.6996, "mean_token_accuracy": 0.9110672175884247, "num_tokens": 2590919.0, "step": 1446 }, { "epoch": 0.23431301109221925, "grad_norm": 25.487224578857422, "learning_rate": 7.658678756476684e-06, "loss": 0.6788, "mean_token_accuracy": 0.9054646492004395, "num_tokens": 2592716.0, "step": 1447 }, { "epoch": 0.23447494130029958, "grad_norm": 26.57265281677246, "learning_rate": 7.657059585492229e-06, "loss": 0.7482, "mean_token_accuracy": 0.907892107963562, "num_tokens": 2594511.0, "step": 1448 }, { "epoch": 0.2346368715083799, "grad_norm": 20.960603713989258, "learning_rate": 7.655440414507772e-06, "loss": 0.6549, "mean_token_accuracy": 0.9197037518024445, "num_tokens": 2596297.0, "step": 1449 }, { "epoch": 0.2347988017164602, "grad_norm": 29.576522827148438, "learning_rate": 7.653821243523318e-06, "loss": 0.7623, "mean_token_accuracy": 0.9039513766765594, "num_tokens": 2598090.0, "step": 1450 }, { "epoch": 0.23496073192454053, "grad_norm": 20.15174102783203, "learning_rate": 7.652202072538861e-06, "loss": 0.6108, "mean_token_accuracy": 0.9192526638507843, "num_tokens": 2599875.0, "step": 1451 }, { "epoch": 0.23512266213262084, "grad_norm": 23.13926887512207, "learning_rate": 7.650582901554405e-06, "loss": 0.8055, "mean_token_accuracy": 0.9043422639369965, "num_tokens": 2601667.0, "step": 1452 }, { "epoch": 0.23528459234070115, "grad_norm": 29.10275650024414, "learning_rate": 7.64896373056995e-06, "loss": 0.9394, "mean_token_accuracy": 0.887701541185379, "num_tokens": 2603455.0, "step": 1453 }, { "epoch": 0.23544652254878148, "grad_norm": 23.72844123840332, "learning_rate": 7.647344559585494e-06, "loss": 0.7338, "mean_token_accuracy": 0.9113768041133881, "num_tokens": 2605249.0, "step": 1454 }, { "epoch": 0.2356084527568618, "grad_norm": 29.564889907836914, "learning_rate": 7.645725388601037e-06, "loss": 0.7709, "mean_token_accuracy": 0.8921815752983093, "num_tokens": 2607040.0, "step": 1455 }, { "epoch": 0.2357703829649421, "grad_norm": 27.300100326538086, "learning_rate": 7.644106217616581e-06, "loss": 0.8275, "mean_token_accuracy": 0.8922254145145416, "num_tokens": 2608831.0, "step": 1456 }, { "epoch": 0.23593231317302243, "grad_norm": 22.000505447387695, "learning_rate": 7.642487046632126e-06, "loss": 0.6718, "mean_token_accuracy": 0.9094942808151245, "num_tokens": 2610619.0, "step": 1457 }, { "epoch": 0.23609424338110274, "grad_norm": 26.95020866394043, "learning_rate": 7.64086787564767e-06, "loss": 0.7362, "mean_token_accuracy": 0.9104224443435669, "num_tokens": 2612410.0, "step": 1458 }, { "epoch": 0.23625617358918305, "grad_norm": 24.554372787475586, "learning_rate": 7.639248704663213e-06, "loss": 0.68, "mean_token_accuracy": 0.9136690497398376, "num_tokens": 2614200.0, "step": 1459 }, { "epoch": 0.23641810379726338, "grad_norm": 23.83368492126465, "learning_rate": 7.637629533678757e-06, "loss": 0.7534, "mean_token_accuracy": 0.8920835256576538, "num_tokens": 2615999.0, "step": 1460 }, { "epoch": 0.2365800340053437, "grad_norm": 22.855274200439453, "learning_rate": 7.636010362694302e-06, "loss": 0.7516, "mean_token_accuracy": 0.9004510045051575, "num_tokens": 2617792.0, "step": 1461 }, { "epoch": 0.23674196421342403, "grad_norm": 25.45528221130371, "learning_rate": 7.634391191709846e-06, "loss": 0.7649, "mean_token_accuracy": 0.8968993723392487, "num_tokens": 2619585.0, "step": 1462 }, { "epoch": 0.23690389442150434, "grad_norm": 24.026029586791992, "learning_rate": 7.63277202072539e-06, "loss": 0.7509, "mean_token_accuracy": 0.8956202864646912, "num_tokens": 2621385.0, "step": 1463 }, { "epoch": 0.23706582462958464, "grad_norm": 15.273926734924316, "learning_rate": 7.631152849740933e-06, "loss": 0.5584, "mean_token_accuracy": 0.9177290201187134, "num_tokens": 2623176.0, "step": 1464 }, { "epoch": 0.23722775483766498, "grad_norm": 21.250120162963867, "learning_rate": 7.629533678756478e-06, "loss": 0.6154, "mean_token_accuracy": 0.9167449176311493, "num_tokens": 2624965.0, "step": 1465 }, { "epoch": 0.23738968504574529, "grad_norm": 18.954471588134766, "learning_rate": 7.627914507772022e-06, "loss": 0.6021, "mean_token_accuracy": 0.9067012369632721, "num_tokens": 2626758.0, "step": 1466 }, { "epoch": 0.2375516152538256, "grad_norm": 16.815805435180664, "learning_rate": 7.6262953367875655e-06, "loss": 0.6039, "mean_token_accuracy": 0.9197114408016205, "num_tokens": 2628556.0, "step": 1467 }, { "epoch": 0.23771354546190593, "grad_norm": 23.18590545654297, "learning_rate": 7.62467616580311e-06, "loss": 0.6901, "mean_token_accuracy": 0.890966385602951, "num_tokens": 2630344.0, "step": 1468 }, { "epoch": 0.23787547566998624, "grad_norm": 20.75792694091797, "learning_rate": 7.6230569948186535e-06, "loss": 0.6454, "mean_token_accuracy": 0.9175784289836884, "num_tokens": 2632134.0, "step": 1469 }, { "epoch": 0.23803740587806654, "grad_norm": 23.482240676879883, "learning_rate": 7.621437823834198e-06, "loss": 0.6573, "mean_token_accuracy": 0.9166666865348816, "num_tokens": 2633922.0, "step": 1470 }, { "epoch": 0.23819933608614688, "grad_norm": 17.71479034423828, "learning_rate": 7.619818652849742e-06, "loss": 0.5873, "mean_token_accuracy": 0.9216595590114594, "num_tokens": 2635715.0, "step": 1471 }, { "epoch": 0.2383612662942272, "grad_norm": 20.87181282043457, "learning_rate": 7.618199481865286e-06, "loss": 0.6385, "mean_token_accuracy": 0.9134100675582886, "num_tokens": 2637492.0, "step": 1472 }, { "epoch": 0.2385231965023075, "grad_norm": 20.5545597076416, "learning_rate": 7.61658031088083e-06, "loss": 0.677, "mean_token_accuracy": 0.911565750837326, "num_tokens": 2639295.0, "step": 1473 }, { "epoch": 0.23868512671038783, "grad_norm": 29.069311141967773, "learning_rate": 7.614961139896374e-06, "loss": 0.7604, "mean_token_accuracy": 0.901127815246582, "num_tokens": 2641080.0, "step": 1474 }, { "epoch": 0.23884705691846814, "grad_norm": 20.82093048095703, "learning_rate": 7.6133419689119184e-06, "loss": 0.6569, "mean_token_accuracy": 0.9105582237243652, "num_tokens": 2642871.0, "step": 1475 }, { "epoch": 0.23900898712654844, "grad_norm": 18.608184814453125, "learning_rate": 7.611722797927462e-06, "loss": 0.5745, "mean_token_accuracy": 0.9069086015224457, "num_tokens": 2644662.0, "step": 1476 }, { "epoch": 0.23917091733462878, "grad_norm": 25.548845291137695, "learning_rate": 7.6101036269430065e-06, "loss": 0.7505, "mean_token_accuracy": 0.8976739048957825, "num_tokens": 2646448.0, "step": 1477 }, { "epoch": 0.2393328475427091, "grad_norm": 24.109821319580078, "learning_rate": 7.60848445595855e-06, "loss": 0.7261, "mean_token_accuracy": 0.9069280624389648, "num_tokens": 2648250.0, "step": 1478 }, { "epoch": 0.23949477775078942, "grad_norm": 17.441059112548828, "learning_rate": 7.6068652849740945e-06, "loss": 0.5261, "mean_token_accuracy": 0.9265454113483429, "num_tokens": 2650048.0, "step": 1479 }, { "epoch": 0.23965670795886973, "grad_norm": 20.806262969970703, "learning_rate": 7.605246113989638e-06, "loss": 0.6008, "mean_token_accuracy": 0.9220424890518188, "num_tokens": 2651843.0, "step": 1480 }, { "epoch": 0.23981863816695004, "grad_norm": 29.643178939819336, "learning_rate": 7.6036269430051825e-06, "loss": 0.89, "mean_token_accuracy": 0.8886658549308777, "num_tokens": 2653655.0, "step": 1481 }, { "epoch": 0.23998056837503037, "grad_norm": 28.434528350830078, "learning_rate": 7.602007772020726e-06, "loss": 0.8315, "mean_token_accuracy": 0.8941605985164642, "num_tokens": 2655441.0, "step": 1482 }, { "epoch": 0.24014249858311068, "grad_norm": 26.569814682006836, "learning_rate": 7.6003886010362705e-06, "loss": 0.8511, "mean_token_accuracy": 0.8942823112010956, "num_tokens": 2657237.0, "step": 1483 }, { "epoch": 0.240304428791191, "grad_norm": 27.218181610107422, "learning_rate": 7.598769430051814e-06, "loss": 0.7874, "mean_token_accuracy": 0.8952403366565704, "num_tokens": 2659026.0, "step": 1484 }, { "epoch": 0.24046635899927132, "grad_norm": 18.8972110748291, "learning_rate": 7.5971502590673586e-06, "loss": 0.6046, "mean_token_accuracy": 0.9118140041828156, "num_tokens": 2660810.0, "step": 1485 }, { "epoch": 0.24062828920735163, "grad_norm": 22.67765235900879, "learning_rate": 7.595531088082902e-06, "loss": 0.5834, "mean_token_accuracy": 0.911750465631485, "num_tokens": 2662595.0, "step": 1486 }, { "epoch": 0.24079021941543194, "grad_norm": 16.718015670776367, "learning_rate": 7.593911917098447e-06, "loss": 0.5339, "mean_token_accuracy": 0.9213612377643585, "num_tokens": 2664374.0, "step": 1487 }, { "epoch": 0.24095214962351227, "grad_norm": 17.105724334716797, "learning_rate": 7.59229274611399e-06, "loss": 0.5609, "mean_token_accuracy": 0.9253042638301849, "num_tokens": 2666167.0, "step": 1488 }, { "epoch": 0.24111407983159258, "grad_norm": 19.515966415405273, "learning_rate": 7.590673575129535e-06, "loss": 0.5879, "mean_token_accuracy": 0.9197037518024445, "num_tokens": 2667953.0, "step": 1489 }, { "epoch": 0.2412760100396729, "grad_norm": 21.645244598388672, "learning_rate": 7.589054404145079e-06, "loss": 0.6582, "mean_token_accuracy": 0.9079285264015198, "num_tokens": 2669747.0, "step": 1490 }, { "epoch": 0.24143794024775322, "grad_norm": 30.58285140991211, "learning_rate": 7.587435233160623e-06, "loss": 0.8108, "mean_token_accuracy": 0.8922710418701172, "num_tokens": 2671537.0, "step": 1491 }, { "epoch": 0.24159987045583353, "grad_norm": 22.627206802368164, "learning_rate": 7.585816062176167e-06, "loss": 0.65, "mean_token_accuracy": 0.9050639867782593, "num_tokens": 2673323.0, "step": 1492 }, { "epoch": 0.24176180066391384, "grad_norm": 24.889347076416016, "learning_rate": 7.584196891191711e-06, "loss": 0.6735, "mean_token_accuracy": 0.9036940634250641, "num_tokens": 2675115.0, "step": 1493 }, { "epoch": 0.24192373087199417, "grad_norm": 21.869319915771484, "learning_rate": 7.582577720207255e-06, "loss": 0.6485, "mean_token_accuracy": 0.9047642946243286, "num_tokens": 2676909.0, "step": 1494 }, { "epoch": 0.24208566108007448, "grad_norm": 19.944337844848633, "learning_rate": 7.580958549222799e-06, "loss": 0.6158, "mean_token_accuracy": 0.9129863977432251, "num_tokens": 2678697.0, "step": 1495 }, { "epoch": 0.24224759128815482, "grad_norm": 18.30863380432129, "learning_rate": 7.579339378238343e-06, "loss": 0.5913, "mean_token_accuracy": 0.9129549264907837, "num_tokens": 2680484.0, "step": 1496 }, { "epoch": 0.24240952149623513, "grad_norm": 23.380239486694336, "learning_rate": 7.577720207253887e-06, "loss": 0.6162, "mean_token_accuracy": 0.9076152145862579, "num_tokens": 2682267.0, "step": 1497 }, { "epoch": 0.24257145170431543, "grad_norm": 26.253238677978516, "learning_rate": 7.576101036269431e-06, "loss": 0.8074, "mean_token_accuracy": 0.8954682946205139, "num_tokens": 2684056.0, "step": 1498 }, { "epoch": 0.24273338191239577, "grad_norm": 16.309751510620117, "learning_rate": 7.574481865284975e-06, "loss": 0.5567, "mean_token_accuracy": 0.9154388010501862, "num_tokens": 2685828.0, "step": 1499 }, { "epoch": 0.24289531212047608, "grad_norm": 25.659961700439453, "learning_rate": 7.572862694300519e-06, "loss": 0.7175, "mean_token_accuracy": 0.9036984443664551, "num_tokens": 2687610.0, "step": 1500 }, { "epoch": 0.24305724232855638, "grad_norm": 21.450206756591797, "learning_rate": 7.571243523316063e-06, "loss": 0.584, "mean_token_accuracy": 0.912291944026947, "num_tokens": 2689407.0, "step": 1501 }, { "epoch": 0.24321917253663672, "grad_norm": 22.383747100830078, "learning_rate": 7.569624352331607e-06, "loss": 0.6825, "mean_token_accuracy": 0.9118537902832031, "num_tokens": 2691191.0, "step": 1502 }, { "epoch": 0.24338110274471703, "grad_norm": 24.435007095336914, "learning_rate": 7.568005181347151e-06, "loss": 0.6746, "mean_token_accuracy": 0.9078405201435089, "num_tokens": 2692987.0, "step": 1503 }, { "epoch": 0.24354303295279733, "grad_norm": 22.43130874633789, "learning_rate": 7.566386010362695e-06, "loss": 0.5701, "mean_token_accuracy": 0.9187643826007843, "num_tokens": 2694782.0, "step": 1504 }, { "epoch": 0.24370496316087767, "grad_norm": 23.468494415283203, "learning_rate": 7.564766839378239e-06, "loss": 0.6053, "mean_token_accuracy": 0.9134188592433929, "num_tokens": 2696582.0, "step": 1505 }, { "epoch": 0.24386689336895798, "grad_norm": 27.195268630981445, "learning_rate": 7.563147668393783e-06, "loss": 0.7489, "mean_token_accuracy": 0.8979646265506744, "num_tokens": 2698368.0, "step": 1506 }, { "epoch": 0.24402882357703828, "grad_norm": 27.289867401123047, "learning_rate": 7.561528497409327e-06, "loss": 0.82, "mean_token_accuracy": 0.9015287756919861, "num_tokens": 2700171.0, "step": 1507 }, { "epoch": 0.24419075378511862, "grad_norm": 30.325847625732422, "learning_rate": 7.559909326424871e-06, "loss": 0.8562, "mean_token_accuracy": 0.8940750360488892, "num_tokens": 2701957.0, "step": 1508 }, { "epoch": 0.24435268399319893, "grad_norm": 22.628604888916016, "learning_rate": 7.558290155440416e-06, "loss": 0.8047, "mean_token_accuracy": 0.9064748287200928, "num_tokens": 2703747.0, "step": 1509 }, { "epoch": 0.24451461420127926, "grad_norm": 22.936546325683594, "learning_rate": 7.556670984455959e-06, "loss": 0.6127, "mean_token_accuracy": 0.9130063951015472, "num_tokens": 2705533.0, "step": 1510 }, { "epoch": 0.24467654440935957, "grad_norm": 25.915376663208008, "learning_rate": 7.555051813471504e-06, "loss": 0.687, "mean_token_accuracy": 0.904751181602478, "num_tokens": 2707328.0, "step": 1511 }, { "epoch": 0.24483847461743988, "grad_norm": 23.123489379882812, "learning_rate": 7.553432642487047e-06, "loss": 0.6126, "mean_token_accuracy": 0.91131791472435, "num_tokens": 2709122.0, "step": 1512 }, { "epoch": 0.2450004048255202, "grad_norm": 18.64593505859375, "learning_rate": 7.551813471502592e-06, "loss": 0.6223, "mean_token_accuracy": 0.9169561862945557, "num_tokens": 2710912.0, "step": 1513 }, { "epoch": 0.24516233503360052, "grad_norm": 28.47271728515625, "learning_rate": 7.550194300518135e-06, "loss": 0.9546, "mean_token_accuracy": 0.8945723176002502, "num_tokens": 2712718.0, "step": 1514 }, { "epoch": 0.24532426524168083, "grad_norm": 29.037677764892578, "learning_rate": 7.54857512953368e-06, "loss": 0.8005, "mean_token_accuracy": 0.8883761167526245, "num_tokens": 2714507.0, "step": 1515 }, { "epoch": 0.24548619544976116, "grad_norm": 23.622699737548828, "learning_rate": 7.546955958549223e-06, "loss": 0.7693, "mean_token_accuracy": 0.9115781486034393, "num_tokens": 2716301.0, "step": 1516 }, { "epoch": 0.24564812565784147, "grad_norm": 18.688987731933594, "learning_rate": 7.545336787564768e-06, "loss": 0.5294, "mean_token_accuracy": 0.9199221730232239, "num_tokens": 2718089.0, "step": 1517 }, { "epoch": 0.24581005586592178, "grad_norm": 20.6220703125, "learning_rate": 7.543717616580311e-06, "loss": 0.6489, "mean_token_accuracy": 0.9208124577999115, "num_tokens": 2719878.0, "step": 1518 }, { "epoch": 0.2459719860740021, "grad_norm": 26.859193801879883, "learning_rate": 7.542098445595856e-06, "loss": 0.7166, "mean_token_accuracy": 0.896783173084259, "num_tokens": 2721671.0, "step": 1519 }, { "epoch": 0.24613391628208242, "grad_norm": 25.553131103515625, "learning_rate": 7.5404792746113994e-06, "loss": 0.8986, "mean_token_accuracy": 0.8910236060619354, "num_tokens": 2723458.0, "step": 1520 }, { "epoch": 0.24629584649016273, "grad_norm": 23.847808837890625, "learning_rate": 7.538860103626944e-06, "loss": 0.6409, "mean_token_accuracy": 0.9069312810897827, "num_tokens": 2725249.0, "step": 1521 }, { "epoch": 0.24645777669824306, "grad_norm": 33.21686935424805, "learning_rate": 7.5372409326424875e-06, "loss": 0.9335, "mean_token_accuracy": 0.8881908357143402, "num_tokens": 2727038.0, "step": 1522 }, { "epoch": 0.24661970690632337, "grad_norm": 26.16434097290039, "learning_rate": 7.535621761658032e-06, "loss": 0.8076, "mean_token_accuracy": 0.8986344635486603, "num_tokens": 2728826.0, "step": 1523 }, { "epoch": 0.24678163711440368, "grad_norm": 21.566883087158203, "learning_rate": 7.5340025906735755e-06, "loss": 0.6333, "mean_token_accuracy": 0.915032148361206, "num_tokens": 2730609.0, "step": 1524 }, { "epoch": 0.24694356732248401, "grad_norm": 16.49151039123535, "learning_rate": 7.53238341968912e-06, "loss": 0.5304, "mean_token_accuracy": 0.9265749752521515, "num_tokens": 2732406.0, "step": 1525 }, { "epoch": 0.24710549753056432, "grad_norm": 24.9566650390625, "learning_rate": 7.5307642487046635e-06, "loss": 0.7011, "mean_token_accuracy": 0.9080615937709808, "num_tokens": 2734200.0, "step": 1526 }, { "epoch": 0.24726742773864466, "grad_norm": 24.29615592956543, "learning_rate": 7.529145077720208e-06, "loss": 0.6075, "mean_token_accuracy": 0.9031609296798706, "num_tokens": 2735989.0, "step": 1527 }, { "epoch": 0.24742935794672496, "grad_norm": 24.336101531982422, "learning_rate": 7.527525906735752e-06, "loss": 0.8013, "mean_token_accuracy": 0.8999948799610138, "num_tokens": 2737781.0, "step": 1528 }, { "epoch": 0.24759128815480527, "grad_norm": 23.452478408813477, "learning_rate": 7.525906735751296e-06, "loss": 0.6558, "mean_token_accuracy": 0.9184397161006927, "num_tokens": 2739575.0, "step": 1529 }, { "epoch": 0.2477532183628856, "grad_norm": 18.691591262817383, "learning_rate": 7.52428756476684e-06, "loss": 0.5776, "mean_token_accuracy": 0.9154922664165497, "num_tokens": 2741370.0, "step": 1530 }, { "epoch": 0.24791514857096592, "grad_norm": 23.151039123535156, "learning_rate": 7.522668393782384e-06, "loss": 0.6995, "mean_token_accuracy": 0.9080419540405273, "num_tokens": 2743165.0, "step": 1531 }, { "epoch": 0.24807707877904622, "grad_norm": 26.80721092224121, "learning_rate": 7.5210492227979284e-06, "loss": 0.7665, "mean_token_accuracy": 0.8945767283439636, "num_tokens": 2744952.0, "step": 1532 }, { "epoch": 0.24823900898712656, "grad_norm": 28.548725128173828, "learning_rate": 7.519430051813472e-06, "loss": 0.707, "mean_token_accuracy": 0.89616858959198, "num_tokens": 2746753.0, "step": 1533 }, { "epoch": 0.24840093919520687, "grad_norm": 25.250219345092773, "learning_rate": 7.5178108808290165e-06, "loss": 0.7029, "mean_token_accuracy": 0.8999183177947998, "num_tokens": 2748545.0, "step": 1534 }, { "epoch": 0.24856286940328717, "grad_norm": 19.515411376953125, "learning_rate": 7.51619170984456e-06, "loss": 0.6769, "mean_token_accuracy": 0.9077614545822144, "num_tokens": 2750328.0, "step": 1535 }, { "epoch": 0.2487247996113675, "grad_norm": 21.264278411865234, "learning_rate": 7.5145725388601045e-06, "loss": 0.646, "mean_token_accuracy": 0.9023066759109497, "num_tokens": 2752106.0, "step": 1536 }, { "epoch": 0.24888672981944782, "grad_norm": 22.406787872314453, "learning_rate": 7.512953367875648e-06, "loss": 0.6026, "mean_token_accuracy": 0.9196296334266663, "num_tokens": 2753903.0, "step": 1537 }, { "epoch": 0.24904866002752812, "grad_norm": 23.076675415039062, "learning_rate": 7.5113341968911925e-06, "loss": 0.6263, "mean_token_accuracy": 0.9081918001174927, "num_tokens": 2755698.0, "step": 1538 }, { "epoch": 0.24921059023560846, "grad_norm": 27.663761138916016, "learning_rate": 7.509715025906736e-06, "loss": 0.7635, "mean_token_accuracy": 0.8968591690063477, "num_tokens": 2757491.0, "step": 1539 }, { "epoch": 0.24937252044368877, "grad_norm": 26.995481491088867, "learning_rate": 7.5080958549222805e-06, "loss": 0.8452, "mean_token_accuracy": 0.8955065310001373, "num_tokens": 2759281.0, "step": 1540 }, { "epoch": 0.24953445065176907, "grad_norm": 27.979995727539062, "learning_rate": 7.506476683937824e-06, "loss": 0.6811, "mean_token_accuracy": 0.9012860655784607, "num_tokens": 2761077.0, "step": 1541 }, { "epoch": 0.2496963808598494, "grad_norm": 24.379438400268555, "learning_rate": 7.5048575129533686e-06, "loss": 0.6127, "mean_token_accuracy": 0.9179728329181671, "num_tokens": 2762866.0, "step": 1542 }, { "epoch": 0.24985831106792972, "grad_norm": 25.636383056640625, "learning_rate": 7.503238341968912e-06, "loss": 0.7259, "mean_token_accuracy": 0.9112319052219391, "num_tokens": 2764660.0, "step": 1543 }, { "epoch": 0.25002024127601, "grad_norm": 20.341447830200195, "learning_rate": 7.501619170984457e-06, "loss": 0.5708, "mean_token_accuracy": 0.908687949180603, "num_tokens": 2766445.0, "step": 1544 }, { "epoch": 0.25018217148409033, "grad_norm": 20.21607780456543, "learning_rate": 7.500000000000001e-06, "loss": 0.5866, "mean_token_accuracy": 0.9089393615722656, "num_tokens": 2768232.0, "step": 1545 }, { "epoch": 0.2503441016921707, "grad_norm": 22.839038848876953, "learning_rate": 7.498380829015545e-06, "loss": 0.6486, "mean_token_accuracy": 0.9091029465198517, "num_tokens": 2770019.0, "step": 1546 }, { "epoch": 0.250506031900251, "grad_norm": 31.88857650756836, "learning_rate": 7.496761658031089e-06, "loss": 0.8796, "mean_token_accuracy": 0.8856574296951294, "num_tokens": 2771811.0, "step": 1547 }, { "epoch": 0.2506679621083313, "grad_norm": 23.820371627807617, "learning_rate": 7.495142487046633e-06, "loss": 0.7752, "mean_token_accuracy": 0.9087075293064117, "num_tokens": 2773607.0, "step": 1548 }, { "epoch": 0.2508298923164116, "grad_norm": 32.36301803588867, "learning_rate": 7.493523316062177e-06, "loss": 0.8627, "mean_token_accuracy": 0.8912698328495026, "num_tokens": 2775403.0, "step": 1549 }, { "epoch": 0.2509918225244919, "grad_norm": 30.207796096801758, "learning_rate": 7.491904145077721e-06, "loss": 0.8015, "mean_token_accuracy": 0.8877179026603699, "num_tokens": 2777200.0, "step": 1550 }, { "epoch": 0.2511537527325723, "grad_norm": 29.130632400512695, "learning_rate": 7.490284974093265e-06, "loss": 0.8696, "mean_token_accuracy": 0.8989678025245667, "num_tokens": 2778989.0, "step": 1551 }, { "epoch": 0.2513156829406526, "grad_norm": 29.353410720825195, "learning_rate": 7.488665803108809e-06, "loss": 0.8974, "mean_token_accuracy": 0.8890840411186218, "num_tokens": 2780771.0, "step": 1552 }, { "epoch": 0.2514776131487329, "grad_norm": 29.334426879882812, "learning_rate": 7.487046632124353e-06, "loss": 0.7337, "mean_token_accuracy": 0.9125295579433441, "num_tokens": 2782568.0, "step": 1553 }, { "epoch": 0.2516395433568132, "grad_norm": 17.115327835083008, "learning_rate": 7.485427461139897e-06, "loss": 0.5462, "mean_token_accuracy": 0.9213072657585144, "num_tokens": 2784359.0, "step": 1554 }, { "epoch": 0.2518014735648935, "grad_norm": 26.857919692993164, "learning_rate": 7.483808290155441e-06, "loss": 0.609, "mean_token_accuracy": 0.9131987988948822, "num_tokens": 2786148.0, "step": 1555 }, { "epoch": 0.2519634037729738, "grad_norm": 17.469654083251953, "learning_rate": 7.482189119170985e-06, "loss": 0.5203, "mean_token_accuracy": 0.9192526638507843, "num_tokens": 2787933.0, "step": 1556 }, { "epoch": 0.2521253339810542, "grad_norm": 29.414236068725586, "learning_rate": 7.480569948186529e-06, "loss": 0.8304, "mean_token_accuracy": 0.8974026143550873, "num_tokens": 2789728.0, "step": 1557 }, { "epoch": 0.2522872641891345, "grad_norm": 17.6724796295166, "learning_rate": 7.478950777202073e-06, "loss": 0.5933, "mean_token_accuracy": 0.925000011920929, "num_tokens": 2791519.0, "step": 1558 }, { "epoch": 0.2524491943972148, "grad_norm": 26.13325309753418, "learning_rate": 7.477331606217617e-06, "loss": 0.8515, "mean_token_accuracy": 0.9007092118263245, "num_tokens": 2793313.0, "step": 1559 }, { "epoch": 0.2526111246052951, "grad_norm": 27.62114906311035, "learning_rate": 7.475712435233161e-06, "loss": 0.7712, "mean_token_accuracy": 0.8849717974662781, "num_tokens": 2795103.0, "step": 1560 }, { "epoch": 0.2527730548133754, "grad_norm": 22.445478439331055, "learning_rate": 7.474093264248705e-06, "loss": 0.6687, "mean_token_accuracy": 0.9126871824264526, "num_tokens": 2796891.0, "step": 1561 }, { "epoch": 0.2529349850214557, "grad_norm": 26.90172576904297, "learning_rate": 7.472474093264249e-06, "loss": 0.8351, "mean_token_accuracy": 0.8858237564563751, "num_tokens": 2798683.0, "step": 1562 }, { "epoch": 0.2530969152295361, "grad_norm": 16.617937088012695, "learning_rate": 7.470854922279793e-06, "loss": 0.6143, "mean_token_accuracy": 0.9143631160259247, "num_tokens": 2800464.0, "step": 1563 }, { "epoch": 0.2532588454376164, "grad_norm": 18.556692123413086, "learning_rate": 7.469235751295338e-06, "loss": 0.5806, "mean_token_accuracy": 0.9084370732307434, "num_tokens": 2802249.0, "step": 1564 }, { "epoch": 0.2534207756456967, "grad_norm": 20.311861038208008, "learning_rate": 7.467616580310881e-06, "loss": 0.5975, "mean_token_accuracy": 0.9232409298419952, "num_tokens": 2804035.0, "step": 1565 }, { "epoch": 0.253582705853777, "grad_norm": 24.73240852355957, "learning_rate": 7.465997409326426e-06, "loss": 0.6951, "mean_token_accuracy": 0.898207426071167, "num_tokens": 2805821.0, "step": 1566 }, { "epoch": 0.2537446360618573, "grad_norm": 25.900110244750977, "learning_rate": 7.464378238341969e-06, "loss": 0.6718, "mean_token_accuracy": 0.9127525687217712, "num_tokens": 2807608.0, "step": 1567 }, { "epoch": 0.2539065662699377, "grad_norm": 20.611909866333008, "learning_rate": 7.462759067357514e-06, "loss": 0.6174, "mean_token_accuracy": 0.9211897850036621, "num_tokens": 2809396.0, "step": 1568 }, { "epoch": 0.254068496478018, "grad_norm": 22.453723907470703, "learning_rate": 7.461139896373057e-06, "loss": 0.6729, "mean_token_accuracy": 0.9144104421138763, "num_tokens": 2811178.0, "step": 1569 }, { "epoch": 0.2542304266860983, "grad_norm": 33.576324462890625, "learning_rate": 7.459520725388602e-06, "loss": 0.9475, "mean_token_accuracy": 0.8982540667057037, "num_tokens": 2812984.0, "step": 1570 }, { "epoch": 0.2543923568941786, "grad_norm": 19.841344833374023, "learning_rate": 7.457901554404145e-06, "loss": 0.677, "mean_token_accuracy": 0.9145643413066864, "num_tokens": 2814777.0, "step": 1571 }, { "epoch": 0.2545542871022589, "grad_norm": 22.187772750854492, "learning_rate": 7.45628238341969e-06, "loss": 0.5655, "mean_token_accuracy": 0.9186760783195496, "num_tokens": 2816571.0, "step": 1572 }, { "epoch": 0.2547162173103392, "grad_norm": 18.278175354003906, "learning_rate": 7.454663212435233e-06, "loss": 0.5916, "mean_token_accuracy": 0.9174720048904419, "num_tokens": 2818362.0, "step": 1573 }, { "epoch": 0.2548781475184196, "grad_norm": 17.979129791259766, "learning_rate": 7.453044041450778e-06, "loss": 0.5493, "mean_token_accuracy": 0.9128985702991486, "num_tokens": 2820162.0, "step": 1574 }, { "epoch": 0.2550400777264999, "grad_norm": 19.199766159057617, "learning_rate": 7.4514248704663214e-06, "loss": 0.5867, "mean_token_accuracy": 0.9079736173152924, "num_tokens": 2821957.0, "step": 1575 }, { "epoch": 0.2552020079345802, "grad_norm": 19.891965866088867, "learning_rate": 7.449805699481866e-06, "loss": 0.6775, "mean_token_accuracy": 0.9185689091682434, "num_tokens": 2823739.0, "step": 1576 }, { "epoch": 0.2553639381426605, "grad_norm": 32.130130767822266, "learning_rate": 7.4481865284974095e-06, "loss": 0.9009, "mean_token_accuracy": 0.8872180283069611, "num_tokens": 2825543.0, "step": 1577 }, { "epoch": 0.2555258683507408, "grad_norm": 14.756477355957031, "learning_rate": 7.446567357512954e-06, "loss": 0.5659, "mean_token_accuracy": 0.9263225495815277, "num_tokens": 2827340.0, "step": 1578 }, { "epoch": 0.2556877985588211, "grad_norm": 19.128576278686523, "learning_rate": 7.4449481865284975e-06, "loss": 0.6239, "mean_token_accuracy": 0.9171972870826721, "num_tokens": 2829130.0, "step": 1579 }, { "epoch": 0.2558497287669015, "grad_norm": 18.862079620361328, "learning_rate": 7.443329015544042e-06, "loss": 0.6894, "mean_token_accuracy": 0.9112637341022491, "num_tokens": 2830912.0, "step": 1580 }, { "epoch": 0.2560116589749818, "grad_norm": 26.105390548706055, "learning_rate": 7.4417098445595855e-06, "loss": 0.6622, "mean_token_accuracy": 0.9000000059604645, "num_tokens": 2832714.0, "step": 1581 }, { "epoch": 0.2561735891830621, "grad_norm": 22.801639556884766, "learning_rate": 7.44009067357513e-06, "loss": 0.7076, "mean_token_accuracy": 0.9014279842376709, "num_tokens": 2834500.0, "step": 1582 }, { "epoch": 0.2563355193911424, "grad_norm": 28.046396255493164, "learning_rate": 7.438471502590674e-06, "loss": 0.8387, "mean_token_accuracy": 0.898670494556427, "num_tokens": 2836289.0, "step": 1583 }, { "epoch": 0.2564974495992227, "grad_norm": 15.66650390625, "learning_rate": 7.436852331606218e-06, "loss": 0.5118, "mean_token_accuracy": 0.9282343983650208, "num_tokens": 2838079.0, "step": 1584 }, { "epoch": 0.2566593798073031, "grad_norm": 23.62172508239746, "learning_rate": 7.435233160621762e-06, "loss": 0.7315, "mean_token_accuracy": 0.9111754298210144, "num_tokens": 2839873.0, "step": 1585 }, { "epoch": 0.2568213100153834, "grad_norm": 27.808300018310547, "learning_rate": 7.433613989637306e-06, "loss": 0.8329, "mean_token_accuracy": 0.8953336775302887, "num_tokens": 2841662.0, "step": 1586 }, { "epoch": 0.2569832402234637, "grad_norm": 18.4217472076416, "learning_rate": 7.4319948186528504e-06, "loss": 0.6233, "mean_token_accuracy": 0.9146904051303864, "num_tokens": 2843444.0, "step": 1587 }, { "epoch": 0.257145170431544, "grad_norm": 21.971416473388672, "learning_rate": 7.430375647668394e-06, "loss": 0.7316, "mean_token_accuracy": 0.9037989974021912, "num_tokens": 2845236.0, "step": 1588 }, { "epoch": 0.2573071006396243, "grad_norm": 21.468717575073242, "learning_rate": 7.4287564766839385e-06, "loss": 0.6737, "mean_token_accuracy": 0.9122180640697479, "num_tokens": 2847021.0, "step": 1589 }, { "epoch": 0.2574690308477046, "grad_norm": 18.815839767456055, "learning_rate": 7.427137305699482e-06, "loss": 0.5851, "mean_token_accuracy": 0.9184397161006927, "num_tokens": 2848815.0, "step": 1590 }, { "epoch": 0.257630961055785, "grad_norm": 22.126934051513672, "learning_rate": 7.4255181347150265e-06, "loss": 0.6571, "mean_token_accuracy": 0.9103114902973175, "num_tokens": 2850607.0, "step": 1591 }, { "epoch": 0.2577928912638653, "grad_norm": 24.832630157470703, "learning_rate": 7.42389896373057e-06, "loss": 0.744, "mean_token_accuracy": 0.8955419659614563, "num_tokens": 2852398.0, "step": 1592 }, { "epoch": 0.2579548214719456, "grad_norm": 24.52344512939453, "learning_rate": 7.4222797927461145e-06, "loss": 0.648, "mean_token_accuracy": 0.9103802740573883, "num_tokens": 2854189.0, "step": 1593 }, { "epoch": 0.2581167516800259, "grad_norm": 14.720680236816406, "learning_rate": 7.420660621761658e-06, "loss": 0.4965, "mean_token_accuracy": 0.9260977506637573, "num_tokens": 2855985.0, "step": 1594 }, { "epoch": 0.2582786818881062, "grad_norm": 25.270793914794922, "learning_rate": 7.4190414507772025e-06, "loss": 0.7717, "mean_token_accuracy": 0.9138705134391785, "num_tokens": 2857764.0, "step": 1595 }, { "epoch": 0.2584406120961865, "grad_norm": 20.666845321655273, "learning_rate": 7.417422279792746e-06, "loss": 0.7059, "mean_token_accuracy": 0.9033996760845184, "num_tokens": 2859545.0, "step": 1596 }, { "epoch": 0.2586025423042669, "grad_norm": 24.13327407836914, "learning_rate": 7.4158031088082906e-06, "loss": 0.7184, "mean_token_accuracy": 0.8925729393959045, "num_tokens": 2861345.0, "step": 1597 }, { "epoch": 0.2587644725123472, "grad_norm": 19.873493194580078, "learning_rate": 7.414183937823834e-06, "loss": 0.5498, "mean_token_accuracy": 0.917929470539093, "num_tokens": 2863137.0, "step": 1598 }, { "epoch": 0.2589264027204275, "grad_norm": 19.287906646728516, "learning_rate": 7.412564766839379e-06, "loss": 0.635, "mean_token_accuracy": 0.9122960269451141, "num_tokens": 2864924.0, "step": 1599 }, { "epoch": 0.2590883329285078, "grad_norm": 25.044971466064453, "learning_rate": 7.410945595854922e-06, "loss": 0.7811, "mean_token_accuracy": 0.9067248702049255, "num_tokens": 2866712.0, "step": 1600 }, { "epoch": 0.2592502631365881, "grad_norm": 24.437191009521484, "learning_rate": 7.409326424870467e-06, "loss": 0.7451, "mean_token_accuracy": 0.8893246352672577, "num_tokens": 2868495.0, "step": 1601 }, { "epoch": 0.2594121933446685, "grad_norm": 21.227680206298828, "learning_rate": 7.407707253886011e-06, "loss": 0.6862, "mean_token_accuracy": 0.9082276821136475, "num_tokens": 2870291.0, "step": 1602 }, { "epoch": 0.2595741235527488, "grad_norm": 29.17960548400879, "learning_rate": 7.406088082901555e-06, "loss": 1.0729, "mean_token_accuracy": 0.8758048117160797, "num_tokens": 2872085.0, "step": 1603 }, { "epoch": 0.2597360537608291, "grad_norm": 22.167009353637695, "learning_rate": 7.404468911917099e-06, "loss": 0.7462, "mean_token_accuracy": 0.8975377082824707, "num_tokens": 2873880.0, "step": 1604 }, { "epoch": 0.2598979839689094, "grad_norm": 33.898197174072266, "learning_rate": 7.402849740932643e-06, "loss": 0.8595, "mean_token_accuracy": 0.8941076397895813, "num_tokens": 2875675.0, "step": 1605 }, { "epoch": 0.2600599141769897, "grad_norm": 22.588411331176758, "learning_rate": 7.401230569948187e-06, "loss": 0.6975, "mean_token_accuracy": 0.9095667600631714, "num_tokens": 2877465.0, "step": 1606 }, { "epoch": 0.26022184438507, "grad_norm": 25.015331268310547, "learning_rate": 7.399611398963731e-06, "loss": 0.8261, "mean_token_accuracy": 0.9037989974021912, "num_tokens": 2879257.0, "step": 1607 }, { "epoch": 0.2603837745931504, "grad_norm": 25.3990535736084, "learning_rate": 7.397992227979275e-06, "loss": 0.7607, "mean_token_accuracy": 0.8942881524562836, "num_tokens": 2881043.0, "step": 1608 }, { "epoch": 0.2605457048012307, "grad_norm": 22.725894927978516, "learning_rate": 7.396373056994819e-06, "loss": 0.7696, "mean_token_accuracy": 0.9011540710926056, "num_tokens": 2882838.0, "step": 1609 }, { "epoch": 0.260707635009311, "grad_norm": 24.916545867919922, "learning_rate": 7.394753886010363e-06, "loss": 0.6697, "mean_token_accuracy": 0.9055270254611969, "num_tokens": 2884636.0, "step": 1610 }, { "epoch": 0.2608695652173913, "grad_norm": 18.58320426940918, "learning_rate": 7.393134715025907e-06, "loss": 0.5916, "mean_token_accuracy": 0.9201717376708984, "num_tokens": 2886436.0, "step": 1611 }, { "epoch": 0.2610314954254716, "grad_norm": 21.788318634033203, "learning_rate": 7.391515544041451e-06, "loss": 0.6117, "mean_token_accuracy": 0.9121415317058563, "num_tokens": 2888233.0, "step": 1612 }, { "epoch": 0.2611934256335519, "grad_norm": 24.78152847290039, "learning_rate": 7.389896373056995e-06, "loss": 0.9246, "mean_token_accuracy": 0.9063853025436401, "num_tokens": 2890043.0, "step": 1613 }, { "epoch": 0.2613553558416323, "grad_norm": 19.024290084838867, "learning_rate": 7.388277202072539e-06, "loss": 0.6388, "mean_token_accuracy": 0.9004613161087036, "num_tokens": 2891825.0, "step": 1614 }, { "epoch": 0.2615172860497126, "grad_norm": 19.56563949584961, "learning_rate": 7.386658031088083e-06, "loss": 0.6168, "mean_token_accuracy": 0.9070870876312256, "num_tokens": 2893617.0, "step": 1615 }, { "epoch": 0.2616792162577929, "grad_norm": 16.62118911743164, "learning_rate": 7.385038860103627e-06, "loss": 0.6309, "mean_token_accuracy": 0.9163140058517456, "num_tokens": 2895404.0, "step": 1616 }, { "epoch": 0.2618411464658732, "grad_norm": 19.283964157104492, "learning_rate": 7.383419689119171e-06, "loss": 0.579, "mean_token_accuracy": 0.91215580701828, "num_tokens": 2897200.0, "step": 1617 }, { "epoch": 0.2620030766739535, "grad_norm": 24.186052322387695, "learning_rate": 7.381800518134715e-06, "loss": 0.7242, "mean_token_accuracy": 0.8960174918174744, "num_tokens": 2898991.0, "step": 1618 }, { "epoch": 0.26216500688203387, "grad_norm": 18.09012794494629, "learning_rate": 7.380181347150259e-06, "loss": 0.6185, "mean_token_accuracy": 0.9062369465827942, "num_tokens": 2900780.0, "step": 1619 }, { "epoch": 0.2623269370901142, "grad_norm": 21.075101852416992, "learning_rate": 7.378562176165803e-06, "loss": 0.6515, "mean_token_accuracy": 0.9239674508571625, "num_tokens": 2902569.0, "step": 1620 }, { "epoch": 0.2624888672981945, "grad_norm": 20.784603118896484, "learning_rate": 7.376943005181348e-06, "loss": 0.6022, "mean_token_accuracy": 0.9206287264823914, "num_tokens": 2904358.0, "step": 1621 }, { "epoch": 0.2626507975062748, "grad_norm": 26.173858642578125, "learning_rate": 7.375323834196891e-06, "loss": 0.7976, "mean_token_accuracy": 0.8859971463680267, "num_tokens": 2906150.0, "step": 1622 }, { "epoch": 0.2628127277143551, "grad_norm": 26.334566116333008, "learning_rate": 7.373704663212436e-06, "loss": 0.7366, "mean_token_accuracy": 0.9070670008659363, "num_tokens": 2907942.0, "step": 1623 }, { "epoch": 0.2629746579224354, "grad_norm": 20.980764389038086, "learning_rate": 7.372085492227979e-06, "loss": 0.618, "mean_token_accuracy": 0.9102904498577118, "num_tokens": 2909734.0, "step": 1624 }, { "epoch": 0.26313658813051577, "grad_norm": 18.446298599243164, "learning_rate": 7.370466321243524e-06, "loss": 0.6578, "mean_token_accuracy": 0.9192083179950714, "num_tokens": 2911518.0, "step": 1625 }, { "epoch": 0.2632985183385961, "grad_norm": 19.16767692565918, "learning_rate": 7.368847150259067e-06, "loss": 0.6321, "mean_token_accuracy": 0.901695728302002, "num_tokens": 2913304.0, "step": 1626 }, { "epoch": 0.2634604485466764, "grad_norm": 21.383743286132812, "learning_rate": 7.367227979274612e-06, "loss": 0.6772, "mean_token_accuracy": 0.9109818339347839, "num_tokens": 2915086.0, "step": 1627 }, { "epoch": 0.2636223787547567, "grad_norm": 24.10162353515625, "learning_rate": 7.365608808290155e-06, "loss": 0.833, "mean_token_accuracy": 0.9025388360023499, "num_tokens": 2916875.0, "step": 1628 }, { "epoch": 0.263784308962837, "grad_norm": 19.051870346069336, "learning_rate": 7.3639896373057e-06, "loss": 0.624, "mean_token_accuracy": 0.9239382445812225, "num_tokens": 2918675.0, "step": 1629 }, { "epoch": 0.2639462391709173, "grad_norm": 21.700407028198242, "learning_rate": 7.362370466321243e-06, "loss": 0.6047, "mean_token_accuracy": 0.9085765480995178, "num_tokens": 2920471.0, "step": 1630 }, { "epoch": 0.26410816937899767, "grad_norm": 25.165332794189453, "learning_rate": 7.360751295336788e-06, "loss": 0.7159, "mean_token_accuracy": 0.9000000059604645, "num_tokens": 2922273.0, "step": 1631 }, { "epoch": 0.264270099587078, "grad_norm": 30.421682357788086, "learning_rate": 7.3591321243523314e-06, "loss": 0.9077, "mean_token_accuracy": 0.8911159336566925, "num_tokens": 2924070.0, "step": 1632 }, { "epoch": 0.2644320297951583, "grad_norm": 20.775630950927734, "learning_rate": 7.357512953367876e-06, "loss": 0.6237, "mean_token_accuracy": 0.9061861932277679, "num_tokens": 2925869.0, "step": 1633 }, { "epoch": 0.2645939600032386, "grad_norm": 15.506750106811523, "learning_rate": 7.3558937823834195e-06, "loss": 0.5065, "mean_token_accuracy": 0.9258370995521545, "num_tokens": 2927664.0, "step": 1634 }, { "epoch": 0.2647558902113189, "grad_norm": 20.23809814453125, "learning_rate": 7.354274611398964e-06, "loss": 0.606, "mean_token_accuracy": 0.9184607565402985, "num_tokens": 2929458.0, "step": 1635 }, { "epoch": 0.26491782041939926, "grad_norm": 25.841655731201172, "learning_rate": 7.3526554404145075e-06, "loss": 0.7959, "mean_token_accuracy": 0.9018123745918274, "num_tokens": 2931244.0, "step": 1636 }, { "epoch": 0.26507975062747957, "grad_norm": 27.68209457397461, "learning_rate": 7.351036269430052e-06, "loss": 0.746, "mean_token_accuracy": 0.9044131338596344, "num_tokens": 2933048.0, "step": 1637 }, { "epoch": 0.2652416808355599, "grad_norm": 18.19292640686035, "learning_rate": 7.3494170984455955e-06, "loss": 0.5807, "mean_token_accuracy": 0.9142778217792511, "num_tokens": 2934829.0, "step": 1638 }, { "epoch": 0.2654036110436402, "grad_norm": 21.33133888244629, "learning_rate": 7.34779792746114e-06, "loss": 0.6917, "mean_token_accuracy": 0.9137547016143799, "num_tokens": 2936619.0, "step": 1639 }, { "epoch": 0.2655655412517205, "grad_norm": 24.50990867614746, "learning_rate": 7.346178756476684e-06, "loss": 0.6993, "mean_token_accuracy": 0.8989050984382629, "num_tokens": 2938408.0, "step": 1640 }, { "epoch": 0.2657274714598008, "grad_norm": 26.69629669189453, "learning_rate": 7.344559585492228e-06, "loss": 0.7969, "mean_token_accuracy": 0.8951023519039154, "num_tokens": 2940216.0, "step": 1641 }, { "epoch": 0.26588940166788116, "grad_norm": 28.35828971862793, "learning_rate": 7.342940414507773e-06, "loss": 0.8344, "mean_token_accuracy": 0.8862600028514862, "num_tokens": 2942009.0, "step": 1642 }, { "epoch": 0.26605133187596147, "grad_norm": 26.645549774169922, "learning_rate": 7.341321243523317e-06, "loss": 0.7001, "mean_token_accuracy": 0.897817462682724, "num_tokens": 2943805.0, "step": 1643 }, { "epoch": 0.2662132620840418, "grad_norm": 20.410158157348633, "learning_rate": 7.339702072538861e-06, "loss": 0.6523, "mean_token_accuracy": 0.9127601683139801, "num_tokens": 2945593.0, "step": 1644 }, { "epoch": 0.2663751922921221, "grad_norm": 24.349576950073242, "learning_rate": 7.338082901554405e-06, "loss": 0.7482, "mean_token_accuracy": 0.8949275612831116, "num_tokens": 2947381.0, "step": 1645 }, { "epoch": 0.2665371225002024, "grad_norm": 25.9034423828125, "learning_rate": 7.336463730569949e-06, "loss": 0.7316, "mean_token_accuracy": 0.9089070856571198, "num_tokens": 2949178.0, "step": 1646 }, { "epoch": 0.26669905270828276, "grad_norm": 18.9997501373291, "learning_rate": 7.334844559585494e-06, "loss": 0.5377, "mean_token_accuracy": 0.9262779355049133, "num_tokens": 2950975.0, "step": 1647 }, { "epoch": 0.26686098291636307, "grad_norm": 26.151365280151367, "learning_rate": 7.333225388601037e-06, "loss": 0.7331, "mean_token_accuracy": 0.8992022573947906, "num_tokens": 2952766.0, "step": 1648 }, { "epoch": 0.2670229131244434, "grad_norm": 21.182880401611328, "learning_rate": 7.331606217616582e-06, "loss": 0.6852, "mean_token_accuracy": 0.9083566069602966, "num_tokens": 2954551.0, "step": 1649 }, { "epoch": 0.2671848433325237, "grad_norm": 22.718381881713867, "learning_rate": 7.329987046632125e-06, "loss": 0.6221, "mean_token_accuracy": 0.9123508334159851, "num_tokens": 2956337.0, "step": 1650 }, { "epoch": 0.267346773540604, "grad_norm": 12.982319831848145, "learning_rate": 7.32836787564767e-06, "loss": 0.5756, "mean_token_accuracy": 0.9285956621170044, "num_tokens": 2958115.0, "step": 1651 }, { "epoch": 0.2675087037486843, "grad_norm": 20.37946319580078, "learning_rate": 7.326748704663213e-06, "loss": 0.6445, "mean_token_accuracy": 0.9067419171333313, "num_tokens": 2959905.0, "step": 1652 }, { "epoch": 0.26767063395676466, "grad_norm": 20.764528274536133, "learning_rate": 7.325129533678758e-06, "loss": 0.6242, "mean_token_accuracy": 0.9079882204532623, "num_tokens": 2961700.0, "step": 1653 }, { "epoch": 0.26783256416484497, "grad_norm": 23.021770477294922, "learning_rate": 7.3235103626943014e-06, "loss": 0.666, "mean_token_accuracy": 0.9037662148475647, "num_tokens": 2963493.0, "step": 1654 }, { "epoch": 0.2679944943729253, "grad_norm": 21.963050842285156, "learning_rate": 7.321891191709846e-06, "loss": 0.6467, "mean_token_accuracy": 0.9121578335762024, "num_tokens": 2965278.0, "step": 1655 }, { "epoch": 0.2681564245810056, "grad_norm": 22.99561309814453, "learning_rate": 7.3202720207253895e-06, "loss": 0.6609, "mean_token_accuracy": 0.904386967420578, "num_tokens": 2967072.0, "step": 1656 }, { "epoch": 0.2683183547890859, "grad_norm": 24.727148056030273, "learning_rate": 7.318652849740934e-06, "loss": 0.7579, "mean_token_accuracy": 0.9037568271160126, "num_tokens": 2968864.0, "step": 1657 }, { "epoch": 0.2684802849971662, "grad_norm": 20.770639419555664, "learning_rate": 7.3170336787564775e-06, "loss": 0.655, "mean_token_accuracy": 0.9138047397136688, "num_tokens": 2970654.0, "step": 1658 }, { "epoch": 0.26864221520524656, "grad_norm": 25.987524032592773, "learning_rate": 7.315414507772022e-06, "loss": 0.707, "mean_token_accuracy": 0.9010467231273651, "num_tokens": 2972439.0, "step": 1659 }, { "epoch": 0.26880414541332687, "grad_norm": 22.091861724853516, "learning_rate": 7.3137953367875655e-06, "loss": 0.6424, "mean_token_accuracy": 0.913192093372345, "num_tokens": 2974227.0, "step": 1660 }, { "epoch": 0.2689660756214072, "grad_norm": 21.8024845123291, "learning_rate": 7.31217616580311e-06, "loss": 0.6333, "mean_token_accuracy": 0.9130645990371704, "num_tokens": 2976026.0, "step": 1661 }, { "epoch": 0.2691280058294875, "grad_norm": 16.567657470703125, "learning_rate": 7.3105569948186535e-06, "loss": 0.5409, "mean_token_accuracy": 0.9172661900520325, "num_tokens": 2977816.0, "step": 1662 }, { "epoch": 0.2692899360375678, "grad_norm": 25.517282485961914, "learning_rate": 7.308937823834198e-06, "loss": 0.5815, "mean_token_accuracy": 0.9107862710952759, "num_tokens": 2979608.0, "step": 1663 }, { "epoch": 0.26945186624564815, "grad_norm": 13.7872896194458, "learning_rate": 7.3073186528497416e-06, "loss": 0.5357, "mean_token_accuracy": 0.9232734441757202, "num_tokens": 2981381.0, "step": 1664 }, { "epoch": 0.26961379645372846, "grad_norm": 17.210819244384766, "learning_rate": 7.305699481865286e-06, "loss": 0.5341, "mean_token_accuracy": 0.9283071458339691, "num_tokens": 2983172.0, "step": 1665 }, { "epoch": 0.26977572666180877, "grad_norm": 20.563995361328125, "learning_rate": 7.3040803108808304e-06, "loss": 0.6303, "mean_token_accuracy": 0.9100719392299652, "num_tokens": 2984962.0, "step": 1666 }, { "epoch": 0.2699376568698891, "grad_norm": 20.487215042114258, "learning_rate": 7.302461139896374e-06, "loss": 0.601, "mean_token_accuracy": 0.9241819083690643, "num_tokens": 2986764.0, "step": 1667 }, { "epoch": 0.2700995870779694, "grad_norm": 19.923784255981445, "learning_rate": 7.3008419689119185e-06, "loss": 0.6009, "mean_token_accuracy": 0.9166505932807922, "num_tokens": 2988564.0, "step": 1668 }, { "epoch": 0.2702615172860497, "grad_norm": 28.150285720825195, "learning_rate": 7.299222797927462e-06, "loss": 0.7712, "mean_token_accuracy": 0.8988725543022156, "num_tokens": 2990362.0, "step": 1669 }, { "epoch": 0.27042344749413005, "grad_norm": 25.16350746154785, "learning_rate": 7.2976036269430065e-06, "loss": 0.6881, "mean_token_accuracy": 0.9053639471530914, "num_tokens": 2992149.0, "step": 1670 }, { "epoch": 0.27058537770221036, "grad_norm": 31.642532348632812, "learning_rate": 7.29598445595855e-06, "loss": 0.8571, "mean_token_accuracy": 0.8931206166744232, "num_tokens": 2993950.0, "step": 1671 }, { "epoch": 0.27074730791029067, "grad_norm": 29.09742546081543, "learning_rate": 7.2943652849740945e-06, "loss": 0.7577, "mean_token_accuracy": 0.9048761129379272, "num_tokens": 2995745.0, "step": 1672 }, { "epoch": 0.270909238118371, "grad_norm": 29.280715942382812, "learning_rate": 7.292746113989638e-06, "loss": 0.705, "mean_token_accuracy": 0.8885361850261688, "num_tokens": 2997553.0, "step": 1673 }, { "epoch": 0.2710711683264513, "grad_norm": 22.281274795532227, "learning_rate": 7.2911269430051825e-06, "loss": 0.6363, "mean_token_accuracy": 0.9023892879486084, "num_tokens": 2999352.0, "step": 1674 }, { "epoch": 0.2712330985345316, "grad_norm": 27.497831344604492, "learning_rate": 7.289507772020726e-06, "loss": 0.8469, "mean_token_accuracy": 0.9021909236907959, "num_tokens": 3001141.0, "step": 1675 }, { "epoch": 0.27139502874261195, "grad_norm": 27.839120864868164, "learning_rate": 7.2878886010362706e-06, "loss": 0.8249, "mean_token_accuracy": 0.9010110795497894, "num_tokens": 3002946.0, "step": 1676 }, { "epoch": 0.27155695895069226, "grad_norm": 22.24791145324707, "learning_rate": 7.286269430051814e-06, "loss": 0.6675, "mean_token_accuracy": 0.9094203114509583, "num_tokens": 3004734.0, "step": 1677 }, { "epoch": 0.27171888915877257, "grad_norm": 26.267562866210938, "learning_rate": 7.284650259067359e-06, "loss": 0.6961, "mean_token_accuracy": 0.9125371277332306, "num_tokens": 3006542.0, "step": 1678 }, { "epoch": 0.2718808193668529, "grad_norm": 22.462438583374023, "learning_rate": 7.283031088082902e-06, "loss": 0.6491, "mean_token_accuracy": 0.9124087691307068, "num_tokens": 3008328.0, "step": 1679 }, { "epoch": 0.2720427495749332, "grad_norm": 25.66066551208496, "learning_rate": 7.281411917098447e-06, "loss": 0.6398, "mean_token_accuracy": 0.9113828539848328, "num_tokens": 3010122.0, "step": 1680 }, { "epoch": 0.27220467978301355, "grad_norm": 26.425907135009766, "learning_rate": 7.27979274611399e-06, "loss": 0.6993, "mean_token_accuracy": 0.9058688282966614, "num_tokens": 3011920.0, "step": 1681 }, { "epoch": 0.27236660999109386, "grad_norm": 21.59770965576172, "learning_rate": 7.278173575129535e-06, "loss": 0.8758, "mean_token_accuracy": 0.894313633441925, "num_tokens": 3013708.0, "step": 1682 }, { "epoch": 0.27252854019917416, "grad_norm": 26.245925903320312, "learning_rate": 7.276554404145078e-06, "loss": 0.8479, "mean_token_accuracy": 0.8905109763145447, "num_tokens": 3015494.0, "step": 1683 }, { "epoch": 0.27269047040725447, "grad_norm": 28.3322811126709, "learning_rate": 7.274935233160623e-06, "loss": 0.85, "mean_token_accuracy": 0.8906124830245972, "num_tokens": 3017289.0, "step": 1684 }, { "epoch": 0.2728524006153348, "grad_norm": 20.236648559570312, "learning_rate": 7.273316062176167e-06, "loss": 0.6314, "mean_token_accuracy": 0.9073867499828339, "num_tokens": 3019082.0, "step": 1685 }, { "epoch": 0.2730143308234151, "grad_norm": 22.558427810668945, "learning_rate": 7.271696891191711e-06, "loss": 0.7295, "mean_token_accuracy": 0.901033878326416, "num_tokens": 3020877.0, "step": 1686 }, { "epoch": 0.27317626103149545, "grad_norm": 20.01369857788086, "learning_rate": 7.270077720207255e-06, "loss": 0.6531, "mean_token_accuracy": 0.9148764908313751, "num_tokens": 3022671.0, "step": 1687 }, { "epoch": 0.27333819123957576, "grad_norm": 19.840303421020508, "learning_rate": 7.268458549222799e-06, "loss": 0.6389, "mean_token_accuracy": 0.9163931012153625, "num_tokens": 3024458.0, "step": 1688 }, { "epoch": 0.27350012144765606, "grad_norm": 24.691333770751953, "learning_rate": 7.266839378238343e-06, "loss": 0.717, "mean_token_accuracy": 0.8982333838939667, "num_tokens": 3026245.0, "step": 1689 }, { "epoch": 0.27366205165573637, "grad_norm": 22.79033660888672, "learning_rate": 7.265220207253887e-06, "loss": 0.6267, "mean_token_accuracy": 0.9070870876312256, "num_tokens": 3028037.0, "step": 1690 }, { "epoch": 0.2738239818638167, "grad_norm": 15.833498001098633, "learning_rate": 7.263601036269431e-06, "loss": 0.5777, "mean_token_accuracy": 0.9146759510040283, "num_tokens": 3029818.0, "step": 1691 }, { "epoch": 0.273985912071897, "grad_norm": 17.392908096313477, "learning_rate": 7.261981865284975e-06, "loss": 0.6534, "mean_token_accuracy": 0.9181021451950073, "num_tokens": 3031611.0, "step": 1692 }, { "epoch": 0.27414784227997735, "grad_norm": 24.336748123168945, "learning_rate": 7.260362694300519e-06, "loss": 0.7693, "mean_token_accuracy": 0.8917735517024994, "num_tokens": 3033400.0, "step": 1693 }, { "epoch": 0.27430977248805766, "grad_norm": 19.489585876464844, "learning_rate": 7.258743523316063e-06, "loss": 0.67, "mean_token_accuracy": 0.9124087393283844, "num_tokens": 3035186.0, "step": 1694 }, { "epoch": 0.27447170269613796, "grad_norm": 26.379592895507812, "learning_rate": 7.257124352331607e-06, "loss": 0.7216, "mean_token_accuracy": 0.8939060866832733, "num_tokens": 3036981.0, "step": 1695 }, { "epoch": 0.27463363290421827, "grad_norm": 18.416854858398438, "learning_rate": 7.255505181347151e-06, "loss": 0.548, "mean_token_accuracy": 0.9272640645503998, "num_tokens": 3038768.0, "step": 1696 }, { "epoch": 0.2747955631122986, "grad_norm": 21.19996452331543, "learning_rate": 7.253886010362695e-06, "loss": 0.6504, "mean_token_accuracy": 0.9184423983097076, "num_tokens": 3040561.0, "step": 1697 }, { "epoch": 0.27495749332037894, "grad_norm": 17.445430755615234, "learning_rate": 7.252266839378239e-06, "loss": 0.599, "mean_token_accuracy": 0.9213643670082092, "num_tokens": 3042353.0, "step": 1698 }, { "epoch": 0.27511942352845925, "grad_norm": 16.73468589782715, "learning_rate": 7.250647668393783e-06, "loss": 0.5413, "mean_token_accuracy": 0.927003413438797, "num_tokens": 3044139.0, "step": 1699 }, { "epoch": 0.27528135373653956, "grad_norm": 24.797021865844727, "learning_rate": 7.249028497409327e-06, "loss": 0.6211, "mean_token_accuracy": 0.9191596508026123, "num_tokens": 3045947.0, "step": 1700 }, { "epoch": 0.27544328394461987, "grad_norm": 21.3189754486084, "learning_rate": 7.247409326424871e-06, "loss": 0.6931, "mean_token_accuracy": 0.9075706899166107, "num_tokens": 3047740.0, "step": 1701 }, { "epoch": 0.2756052141527002, "grad_norm": 17.231767654418945, "learning_rate": 7.245790155440415e-06, "loss": 0.6018, "mean_token_accuracy": 0.9214285910129547, "num_tokens": 3049532.0, "step": 1702 }, { "epoch": 0.2757671443607805, "grad_norm": 27.29370880126953, "learning_rate": 7.244170984455959e-06, "loss": 0.8351, "mean_token_accuracy": 0.8919772505760193, "num_tokens": 3051322.0, "step": 1703 }, { "epoch": 0.27592907456886084, "grad_norm": 25.817386627197266, "learning_rate": 7.242551813471504e-06, "loss": 0.8354, "mean_token_accuracy": 0.8861111104488373, "num_tokens": 3053113.0, "step": 1704 }, { "epoch": 0.27609100477694115, "grad_norm": 17.878170013427734, "learning_rate": 7.240932642487047e-06, "loss": 0.6253, "mean_token_accuracy": 0.9097945094108582, "num_tokens": 3054903.0, "step": 1705 }, { "epoch": 0.27625293498502146, "grad_norm": 25.780912399291992, "learning_rate": 7.239313471502592e-06, "loss": 0.7586, "mean_token_accuracy": 0.8893822431564331, "num_tokens": 3056703.0, "step": 1706 }, { "epoch": 0.27641486519310177, "grad_norm": 35.73030471801758, "learning_rate": 7.237694300518135e-06, "loss": 1.0544, "mean_token_accuracy": 0.8657047748565674, "num_tokens": 3058510.0, "step": 1707 }, { "epoch": 0.2765767954011821, "grad_norm": 22.286853790283203, "learning_rate": 7.23607512953368e-06, "loss": 0.7221, "mean_token_accuracy": 0.9059281051158905, "num_tokens": 3060298.0, "step": 1708 }, { "epoch": 0.2767387256092624, "grad_norm": 31.68348503112793, "learning_rate": 7.234455958549223e-06, "loss": 1.0219, "mean_token_accuracy": 0.8848258852958679, "num_tokens": 3062094.0, "step": 1709 }, { "epoch": 0.27690065581734274, "grad_norm": 34.010719299316406, "learning_rate": 7.232836787564768e-06, "loss": 0.7757, "mean_token_accuracy": 0.8824006617069244, "num_tokens": 3063887.0, "step": 1710 }, { "epoch": 0.27706258602542305, "grad_norm": 21.52349281311035, "learning_rate": 7.2312176165803114e-06, "loss": 0.6982, "mean_token_accuracy": 0.9139194190502167, "num_tokens": 3065689.0, "step": 1711 }, { "epoch": 0.27722451623350336, "grad_norm": 37.346290588378906, "learning_rate": 7.229598445595856e-06, "loss": 1.0875, "mean_token_accuracy": 0.8657718300819397, "num_tokens": 3067499.0, "step": 1712 }, { "epoch": 0.27738644644158367, "grad_norm": 27.85877799987793, "learning_rate": 7.2279792746113995e-06, "loss": 0.8146, "mean_token_accuracy": 0.8947355449199677, "num_tokens": 3069296.0, "step": 1713 }, { "epoch": 0.277548376649664, "grad_norm": 23.893590927124023, "learning_rate": 7.226360103626944e-06, "loss": 0.6507, "mean_token_accuracy": 0.89560467004776, "num_tokens": 3071095.0, "step": 1714 }, { "epoch": 0.27771030685774434, "grad_norm": 20.309354782104492, "learning_rate": 7.2247409326424875e-06, "loss": 0.6597, "mean_token_accuracy": 0.9127551019191742, "num_tokens": 3072894.0, "step": 1715 }, { "epoch": 0.27787223706582465, "grad_norm": 26.50943946838379, "learning_rate": 7.223121761658032e-06, "loss": 0.735, "mean_token_accuracy": 0.9046299159526825, "num_tokens": 3074689.0, "step": 1716 }, { "epoch": 0.27803416727390495, "grad_norm": 20.479528427124023, "learning_rate": 7.2215025906735755e-06, "loss": 0.5773, "mean_token_accuracy": 0.9187424778938293, "num_tokens": 3076484.0, "step": 1717 }, { "epoch": 0.27819609748198526, "grad_norm": 16.82716178894043, "learning_rate": 7.21988341968912e-06, "loss": 0.6007, "mean_token_accuracy": 0.9288500547409058, "num_tokens": 3078277.0, "step": 1718 }, { "epoch": 0.27835802769006557, "grad_norm": 19.977067947387695, "learning_rate": 7.2182642487046635e-06, "loss": 0.6873, "mean_token_accuracy": 0.9223257005214691, "num_tokens": 3080071.0, "step": 1719 }, { "epoch": 0.2785199578981459, "grad_norm": 22.197608947753906, "learning_rate": 7.216645077720208e-06, "loss": 0.7036, "mean_token_accuracy": 0.8984741866588593, "num_tokens": 3081857.0, "step": 1720 }, { "epoch": 0.27868188810622624, "grad_norm": 24.387502670288086, "learning_rate": 7.2150259067357516e-06, "loss": 0.6268, "mean_token_accuracy": 0.8981804847717285, "num_tokens": 3083644.0, "step": 1721 }, { "epoch": 0.27884381831430655, "grad_norm": 20.709949493408203, "learning_rate": 7.213406735751296e-06, "loss": 0.7047, "mean_token_accuracy": 0.9100719392299652, "num_tokens": 3085434.0, "step": 1722 }, { "epoch": 0.27900574852238685, "grad_norm": 20.250320434570312, "learning_rate": 7.2117875647668404e-06, "loss": 0.6484, "mean_token_accuracy": 0.9085853397846222, "num_tokens": 3087230.0, "step": 1723 }, { "epoch": 0.27916767873046716, "grad_norm": 21.463279724121094, "learning_rate": 7.210168393782384e-06, "loss": 0.7792, "mean_token_accuracy": 0.8989899158477783, "num_tokens": 3089018.0, "step": 1724 }, { "epoch": 0.27932960893854747, "grad_norm": 23.75426483154297, "learning_rate": 7.2085492227979285e-06, "loss": 0.7252, "mean_token_accuracy": 0.9062369465827942, "num_tokens": 3090807.0, "step": 1725 }, { "epoch": 0.2794915391466278, "grad_norm": 17.86284828186035, "learning_rate": 7.206930051813472e-06, "loss": 0.5551, "mean_token_accuracy": 0.920451283454895, "num_tokens": 3092608.0, "step": 1726 }, { "epoch": 0.27965346935470814, "grad_norm": 21.230356216430664, "learning_rate": 7.2053108808290165e-06, "loss": 0.6512, "mean_token_accuracy": 0.9052238762378693, "num_tokens": 3094394.0, "step": 1727 }, { "epoch": 0.27981539956278845, "grad_norm": 27.15345573425293, "learning_rate": 7.20369170984456e-06, "loss": 0.7591, "mean_token_accuracy": 0.8992336392402649, "num_tokens": 3096184.0, "step": 1728 }, { "epoch": 0.27997732977086875, "grad_norm": 18.63810920715332, "learning_rate": 7.2020725388601045e-06, "loss": 0.6006, "mean_token_accuracy": 0.9218443036079407, "num_tokens": 3097978.0, "step": 1729 }, { "epoch": 0.28013925997894906, "grad_norm": 19.0826416015625, "learning_rate": 7.200453367875648e-06, "loss": 0.6346, "mean_token_accuracy": 0.9006539285182953, "num_tokens": 3099772.0, "step": 1730 }, { "epoch": 0.28030119018702937, "grad_norm": 22.469587326049805, "learning_rate": 7.1988341968911925e-06, "loss": 0.7041, "mean_token_accuracy": 0.9026418626308441, "num_tokens": 3101570.0, "step": 1731 }, { "epoch": 0.28046312039510973, "grad_norm": 18.425203323364258, "learning_rate": 7.197215025906736e-06, "loss": 0.5702, "mean_token_accuracy": 0.9202331602573395, "num_tokens": 3103358.0, "step": 1732 }, { "epoch": 0.28062505060319004, "grad_norm": 17.37875747680664, "learning_rate": 7.1955958549222806e-06, "loss": 0.5344, "mean_token_accuracy": 0.924378901720047, "num_tokens": 3105148.0, "step": 1733 }, { "epoch": 0.28078698081127035, "grad_norm": 28.775325775146484, "learning_rate": 7.193976683937824e-06, "loss": 0.8964, "mean_token_accuracy": 0.8898792564868927, "num_tokens": 3106949.0, "step": 1734 }, { "epoch": 0.28094891101935066, "grad_norm": 26.76213264465332, "learning_rate": 7.192357512953369e-06, "loss": 0.6796, "mean_token_accuracy": 0.9092342555522919, "num_tokens": 3108746.0, "step": 1735 }, { "epoch": 0.28111084122743096, "grad_norm": 21.826457977294922, "learning_rate": 7.190738341968912e-06, "loss": 0.6157, "mean_token_accuracy": 0.9163931012153625, "num_tokens": 3110533.0, "step": 1736 }, { "epoch": 0.28127277143551127, "grad_norm": 19.561569213867188, "learning_rate": 7.189119170984457e-06, "loss": 0.6039, "mean_token_accuracy": 0.921950489282608, "num_tokens": 3112327.0, "step": 1737 }, { "epoch": 0.28143470164359163, "grad_norm": 16.025388717651367, "learning_rate": 7.1875e-06, "loss": 0.5484, "mean_token_accuracy": 0.9285677969455719, "num_tokens": 3114119.0, "step": 1738 }, { "epoch": 0.28159663185167194, "grad_norm": 21.772972106933594, "learning_rate": 7.185880829015545e-06, "loss": 0.5805, "mean_token_accuracy": 0.9229403436183929, "num_tokens": 3115891.0, "step": 1739 }, { "epoch": 0.28175856205975225, "grad_norm": 31.67912483215332, "learning_rate": 7.184261658031088e-06, "loss": 0.8604, "mean_token_accuracy": 0.8773398995399475, "num_tokens": 3117688.0, "step": 1740 }, { "epoch": 0.28192049226783256, "grad_norm": 24.92591094970703, "learning_rate": 7.182642487046633e-06, "loss": 0.6503, "mean_token_accuracy": 0.9142156839370728, "num_tokens": 3119480.0, "step": 1741 }, { "epoch": 0.28208242247591286, "grad_norm": 16.902267456054688, "learning_rate": 7.181023316062177e-06, "loss": 0.4875, "mean_token_accuracy": 0.9216987490653992, "num_tokens": 3121272.0, "step": 1742 }, { "epoch": 0.28224435268399317, "grad_norm": 18.500478744506836, "learning_rate": 7.179404145077721e-06, "loss": 0.6429, "mean_token_accuracy": 0.9199725091457367, "num_tokens": 3123059.0, "step": 1743 }, { "epoch": 0.28240628289207353, "grad_norm": 25.750652313232422, "learning_rate": 7.177784974093265e-06, "loss": 0.7089, "mean_token_accuracy": 0.8999382853507996, "num_tokens": 3124850.0, "step": 1744 }, { "epoch": 0.28256821310015384, "grad_norm": 21.59621238708496, "learning_rate": 7.176165803108809e-06, "loss": 0.6082, "mean_token_accuracy": 0.9203707277774811, "num_tokens": 3126651.0, "step": 1745 }, { "epoch": 0.28273014330823415, "grad_norm": 14.908434867858887, "learning_rate": 7.174546632124353e-06, "loss": 0.5229, "mean_token_accuracy": 0.9211378395557404, "num_tokens": 3128442.0, "step": 1746 }, { "epoch": 0.28289207351631446, "grad_norm": 8.731510162353516, "learning_rate": 7.172927461139897e-06, "loss": 0.4402, "mean_token_accuracy": 0.9343991577625275, "num_tokens": 3130228.0, "step": 1747 }, { "epoch": 0.28305400372439476, "grad_norm": 22.36594581604004, "learning_rate": 7.171308290155441e-06, "loss": 0.5988, "mean_token_accuracy": 0.9206026494503021, "num_tokens": 3132017.0, "step": 1748 }, { "epoch": 0.28321593393247513, "grad_norm": 26.78276252746582, "learning_rate": 7.169689119170985e-06, "loss": 0.6731, "mean_token_accuracy": 0.8984403610229492, "num_tokens": 3133805.0, "step": 1749 }, { "epoch": 0.28337786414055544, "grad_norm": 22.339689254760742, "learning_rate": 7.168069948186529e-06, "loss": 0.6043, "mean_token_accuracy": 0.9100719392299652, "num_tokens": 3135595.0, "step": 1750 }, { "epoch": 0.28353979434863574, "grad_norm": 24.507793426513672, "learning_rate": 7.166450777202073e-06, "loss": 0.6374, "mean_token_accuracy": 0.9116646647453308, "num_tokens": 3137390.0, "step": 1751 }, { "epoch": 0.28370172455671605, "grad_norm": 31.657018661499023, "learning_rate": 7.164831606217617e-06, "loss": 0.8256, "mean_token_accuracy": 0.8889145851135254, "num_tokens": 3139181.0, "step": 1752 }, { "epoch": 0.28386365476479636, "grad_norm": 25.023855209350586, "learning_rate": 7.163212435233161e-06, "loss": 0.6248, "mean_token_accuracy": 0.920797735452652, "num_tokens": 3140958.0, "step": 1753 }, { "epoch": 0.28402558497287667, "grad_norm": 25.699691772460938, "learning_rate": 7.161593264248705e-06, "loss": 0.7191, "mean_token_accuracy": 0.9119867086410522, "num_tokens": 3142754.0, "step": 1754 }, { "epoch": 0.28418751518095703, "grad_norm": 23.910308837890625, "learning_rate": 7.159974093264249e-06, "loss": 0.7273, "mean_token_accuracy": 0.9109405279159546, "num_tokens": 3144547.0, "step": 1755 }, { "epoch": 0.28434944538903734, "grad_norm": 27.815820693969727, "learning_rate": 7.158354922279793e-06, "loss": 0.8439, "mean_token_accuracy": 0.9046299159526825, "num_tokens": 3146342.0, "step": 1756 }, { "epoch": 0.28451137559711764, "grad_norm": 13.852578163146973, "learning_rate": 7.156735751295337e-06, "loss": 0.4965, "mean_token_accuracy": 0.9316923022270203, "num_tokens": 3148132.0, "step": 1757 }, { "epoch": 0.28467330580519795, "grad_norm": 27.006715774536133, "learning_rate": 7.155116580310881e-06, "loss": 0.8091, "mean_token_accuracy": 0.8957136273384094, "num_tokens": 3149922.0, "step": 1758 }, { "epoch": 0.28483523601327826, "grad_norm": 27.09081268310547, "learning_rate": 7.153497409326425e-06, "loss": 0.6912, "mean_token_accuracy": 0.9068683385848999, "num_tokens": 3151724.0, "step": 1759 }, { "epoch": 0.2849971662213586, "grad_norm": 18.78533935546875, "learning_rate": 7.151878238341969e-06, "loss": 0.5705, "mean_token_accuracy": 0.9096638560295105, "num_tokens": 3153512.0, "step": 1760 }, { "epoch": 0.28515909642943893, "grad_norm": 21.3704833984375, "learning_rate": 7.150259067357514e-06, "loss": 0.6402, "mean_token_accuracy": 0.9099134206771851, "num_tokens": 3155301.0, "step": 1761 }, { "epoch": 0.28532102663751924, "grad_norm": 26.896732330322266, "learning_rate": 7.148639896373057e-06, "loss": 0.7646, "mean_token_accuracy": 0.8941701948642731, "num_tokens": 3157097.0, "step": 1762 }, { "epoch": 0.28548295684559954, "grad_norm": 21.581382751464844, "learning_rate": 7.147020725388602e-06, "loss": 0.6961, "mean_token_accuracy": 0.9101685881614685, "num_tokens": 3158898.0, "step": 1763 }, { "epoch": 0.28564488705367985, "grad_norm": 23.770286560058594, "learning_rate": 7.145401554404145e-06, "loss": 0.6248, "mean_token_accuracy": 0.9125587046146393, "num_tokens": 3160684.0, "step": 1764 }, { "epoch": 0.28580681726176016, "grad_norm": 24.074739456176758, "learning_rate": 7.14378238341969e-06, "loss": 0.5971, "mean_token_accuracy": 0.9010489284992218, "num_tokens": 3162479.0, "step": 1765 }, { "epoch": 0.2859687474698405, "grad_norm": 29.391033172607422, "learning_rate": 7.1421632124352334e-06, "loss": 0.8131, "mean_token_accuracy": 0.8920139968395233, "num_tokens": 3164278.0, "step": 1766 }, { "epoch": 0.28613067767792083, "grad_norm": 20.008272171020508, "learning_rate": 7.140544041450778e-06, "loss": 0.606, "mean_token_accuracy": 0.9132780730724335, "num_tokens": 3166067.0, "step": 1767 }, { "epoch": 0.28629260788600114, "grad_norm": 29.234251022338867, "learning_rate": 7.1389248704663215e-06, "loss": 0.6728, "mean_token_accuracy": 0.8939980864524841, "num_tokens": 3167861.0, "step": 1768 }, { "epoch": 0.28645453809408145, "grad_norm": 27.4943904876709, "learning_rate": 7.137305699481866e-06, "loss": 0.756, "mean_token_accuracy": 0.8960067927837372, "num_tokens": 3169652.0, "step": 1769 }, { "epoch": 0.28661646830216175, "grad_norm": 23.386791229248047, "learning_rate": 7.1356865284974095e-06, "loss": 0.7078, "mean_token_accuracy": 0.9003976583480835, "num_tokens": 3171435.0, "step": 1770 }, { "epoch": 0.28677839851024206, "grad_norm": 25.822498321533203, "learning_rate": 7.134067357512954e-06, "loss": 0.9502, "mean_token_accuracy": 0.8897051215171814, "num_tokens": 3173228.0, "step": 1771 }, { "epoch": 0.2869403287183224, "grad_norm": 33.0545654296875, "learning_rate": 7.1324481865284975e-06, "loss": 0.8666, "mean_token_accuracy": 0.8997684121131897, "num_tokens": 3175028.0, "step": 1772 }, { "epoch": 0.28710225892640273, "grad_norm": 16.903104782104492, "learning_rate": 7.130829015544042e-06, "loss": 0.6484, "mean_token_accuracy": 0.9195243418216705, "num_tokens": 3176801.0, "step": 1773 }, { "epoch": 0.28726418913448304, "grad_norm": 28.1411190032959, "learning_rate": 7.1292098445595855e-06, "loss": 0.827, "mean_token_accuracy": 0.8897902071475983, "num_tokens": 3178585.0, "step": 1774 }, { "epoch": 0.28742611934256335, "grad_norm": 22.95647430419922, "learning_rate": 7.12759067357513e-06, "loss": 0.6941, "mean_token_accuracy": 0.9054373502731323, "num_tokens": 3180382.0, "step": 1775 }, { "epoch": 0.28758804955064365, "grad_norm": 17.32350730895996, "learning_rate": 7.1259715025906736e-06, "loss": 0.5548, "mean_token_accuracy": 0.927619993686676, "num_tokens": 3182184.0, "step": 1776 }, { "epoch": 0.287749979758724, "grad_norm": 33.343257904052734, "learning_rate": 7.124352331606218e-06, "loss": 0.8738, "mean_token_accuracy": 0.9024864137172699, "num_tokens": 3183983.0, "step": 1777 }, { "epoch": 0.2879119099668043, "grad_norm": 23.17249870300293, "learning_rate": 7.1227331606217624e-06, "loss": 0.7261, "mean_token_accuracy": 0.9051577150821686, "num_tokens": 3185769.0, "step": 1778 }, { "epoch": 0.28807384017488463, "grad_norm": 24.464021682739258, "learning_rate": 7.121113989637306e-06, "loss": 0.8161, "mean_token_accuracy": 0.9078470766544342, "num_tokens": 3187563.0, "step": 1779 }, { "epoch": 0.28823577038296494, "grad_norm": 25.535337448120117, "learning_rate": 7.1194948186528505e-06, "loss": 0.7252, "mean_token_accuracy": 0.9045390188694, "num_tokens": 3189366.0, "step": 1780 }, { "epoch": 0.28839770059104525, "grad_norm": 27.85541534423828, "learning_rate": 7.117875647668394e-06, "loss": 0.7757, "mean_token_accuracy": 0.9077968001365662, "num_tokens": 3191160.0, "step": 1781 }, { "epoch": 0.28855963079912555, "grad_norm": 19.054771423339844, "learning_rate": 7.1162564766839385e-06, "loss": 0.5553, "mean_token_accuracy": 0.9195241630077362, "num_tokens": 3192958.0, "step": 1782 }, { "epoch": 0.2887215610072059, "grad_norm": 25.0560302734375, "learning_rate": 7.114637305699482e-06, "loss": 0.7264, "mean_token_accuracy": 0.8999121785163879, "num_tokens": 3194740.0, "step": 1783 }, { "epoch": 0.2888834912152862, "grad_norm": 24.85127067565918, "learning_rate": 7.1130181347150265e-06, "loss": 0.6725, "mean_token_accuracy": 0.8858367800712585, "num_tokens": 3196541.0, "step": 1784 }, { "epoch": 0.28904542142336653, "grad_norm": 22.763168334960938, "learning_rate": 7.11139896373057e-06, "loss": 0.6293, "mean_token_accuracy": 0.9073200225830078, "num_tokens": 3198323.0, "step": 1785 }, { "epoch": 0.28920735163144684, "grad_norm": 18.682348251342773, "learning_rate": 7.1097797927461145e-06, "loss": 0.5815, "mean_token_accuracy": 0.9149402678012848, "num_tokens": 3200128.0, "step": 1786 }, { "epoch": 0.28936928183952715, "grad_norm": 26.31068992614746, "learning_rate": 7.108160621761658e-06, "loss": 0.6941, "mean_token_accuracy": 0.9076961874961853, "num_tokens": 3201922.0, "step": 1787 }, { "epoch": 0.28953121204760746, "grad_norm": 23.188451766967773, "learning_rate": 7.1065414507772026e-06, "loss": 0.7718, "mean_token_accuracy": 0.8951772749423981, "num_tokens": 3203711.0, "step": 1788 }, { "epoch": 0.2896931422556878, "grad_norm": 21.997140884399414, "learning_rate": 7.104922279792746e-06, "loss": 0.6518, "mean_token_accuracy": 0.9054292142391205, "num_tokens": 3205509.0, "step": 1789 }, { "epoch": 0.2898550724637681, "grad_norm": 20.585386276245117, "learning_rate": 7.103303108808291e-06, "loss": 0.6402, "mean_token_accuracy": 0.9077979624271393, "num_tokens": 3207293.0, "step": 1790 }, { "epoch": 0.29001700267184843, "grad_norm": 21.726736068725586, "learning_rate": 7.101683937823834e-06, "loss": 0.7439, "mean_token_accuracy": 0.903900682926178, "num_tokens": 3209076.0, "step": 1791 }, { "epoch": 0.29017893287992874, "grad_norm": 21.304414749145508, "learning_rate": 7.100064766839379e-06, "loss": 0.6034, "mean_token_accuracy": 0.9222372174263, "num_tokens": 3210871.0, "step": 1792 }, { "epoch": 0.29034086308800905, "grad_norm": 21.047353744506836, "learning_rate": 7.098445595854922e-06, "loss": 0.5971, "mean_token_accuracy": 0.9229517877101898, "num_tokens": 3212668.0, "step": 1793 }, { "epoch": 0.2905027932960894, "grad_norm": 22.387699127197266, "learning_rate": 7.096826424870467e-06, "loss": 0.6792, "mean_token_accuracy": 0.9159420430660248, "num_tokens": 3214453.0, "step": 1794 }, { "epoch": 0.2906647235041697, "grad_norm": 16.854310989379883, "learning_rate": 7.09520725388601e-06, "loss": 0.583, "mean_token_accuracy": 0.9225809872150421, "num_tokens": 3216249.0, "step": 1795 }, { "epoch": 0.29082665371225, "grad_norm": 17.550506591796875, "learning_rate": 7.093588082901555e-06, "loss": 0.5446, "mean_token_accuracy": 0.9181357622146606, "num_tokens": 3218042.0, "step": 1796 }, { "epoch": 0.29098858392033033, "grad_norm": 25.162944793701172, "learning_rate": 7.091968911917099e-06, "loss": 0.708, "mean_token_accuracy": 0.9017778038978577, "num_tokens": 3219839.0, "step": 1797 }, { "epoch": 0.29115051412841064, "grad_norm": 18.8907527923584, "learning_rate": 7.090349740932643e-06, "loss": 0.5515, "mean_token_accuracy": 0.9176688194274902, "num_tokens": 3221630.0, "step": 1798 }, { "epoch": 0.29131244433649095, "grad_norm": 24.114839553833008, "learning_rate": 7.088730569948187e-06, "loss": 0.6788, "mean_token_accuracy": 0.9061359763145447, "num_tokens": 3223419.0, "step": 1799 }, { "epoch": 0.2914743745445713, "grad_norm": 21.52777671813965, "learning_rate": 7.087111398963731e-06, "loss": 0.6067, "mean_token_accuracy": 0.9120011925697327, "num_tokens": 3225215.0, "step": 1800 }, { "epoch": 0.2916363047526516, "grad_norm": 16.458261489868164, "learning_rate": 7.085492227979275e-06, "loss": 0.6179, "mean_token_accuracy": 0.92044797539711, "num_tokens": 3227003.0, "step": 1801 }, { "epoch": 0.29179823496073193, "grad_norm": 24.96923065185547, "learning_rate": 7.083873056994819e-06, "loss": 0.656, "mean_token_accuracy": 0.8987536430358887, "num_tokens": 3228792.0, "step": 1802 }, { "epoch": 0.29196016516881224, "grad_norm": 18.641719818115234, "learning_rate": 7.082253886010363e-06, "loss": 0.6011, "mean_token_accuracy": 0.9200254082679749, "num_tokens": 3230579.0, "step": 1803 }, { "epoch": 0.29212209537689254, "grad_norm": 23.91495704650879, "learning_rate": 7.080634715025907e-06, "loss": 0.8618, "mean_token_accuracy": 0.9035947918891907, "num_tokens": 3232371.0, "step": 1804 }, { "epoch": 0.29228402558497285, "grad_norm": 25.538455963134766, "learning_rate": 7.079015544041451e-06, "loss": 0.7299, "mean_token_accuracy": 0.9019423723220825, "num_tokens": 3234148.0, "step": 1805 }, { "epoch": 0.2924459557930532, "grad_norm": 21.477170944213867, "learning_rate": 7.077396373056995e-06, "loss": 0.6279, "mean_token_accuracy": 0.9062043726444244, "num_tokens": 3235937.0, "step": 1806 }, { "epoch": 0.2926078860011335, "grad_norm": 30.938232421875, "learning_rate": 7.075777202072539e-06, "loss": 0.8269, "mean_token_accuracy": 0.9032630920410156, "num_tokens": 3237728.0, "step": 1807 }, { "epoch": 0.29276981620921383, "grad_norm": 24.801923751831055, "learning_rate": 7.074158031088083e-06, "loss": 0.7228, "mean_token_accuracy": 0.8962906301021576, "num_tokens": 3239510.0, "step": 1808 }, { "epoch": 0.29293174641729414, "grad_norm": 25.62843132019043, "learning_rate": 7.072538860103627e-06, "loss": 0.7495, "mean_token_accuracy": 0.9031884074211121, "num_tokens": 3241310.0, "step": 1809 }, { "epoch": 0.29309367662537444, "grad_norm": 24.041162490844727, "learning_rate": 7.070919689119171e-06, "loss": 0.6757, "mean_token_accuracy": 0.9052418172359467, "num_tokens": 3243096.0, "step": 1810 }, { "epoch": 0.2932556068334548, "grad_norm": 23.216135025024414, "learning_rate": 7.069300518134715e-06, "loss": 0.5936, "mean_token_accuracy": 0.9085957109928131, "num_tokens": 3244891.0, "step": 1811 }, { "epoch": 0.2934175370415351, "grad_norm": 29.72441291809082, "learning_rate": 7.067681347150259e-06, "loss": 0.7815, "mean_token_accuracy": 0.8975876569747925, "num_tokens": 3246686.0, "step": 1812 }, { "epoch": 0.2935794672496154, "grad_norm": 26.464025497436523, "learning_rate": 7.066062176165803e-06, "loss": 0.6181, "mean_token_accuracy": 0.9070670008659363, "num_tokens": 3248478.0, "step": 1813 }, { "epoch": 0.29374139745769573, "grad_norm": 20.82701873779297, "learning_rate": 7.064443005181347e-06, "loss": 0.6096, "mean_token_accuracy": 0.9132352769374847, "num_tokens": 3250266.0, "step": 1814 }, { "epoch": 0.29390332766577604, "grad_norm": 27.885255813598633, "learning_rate": 7.062823834196891e-06, "loss": 0.6806, "mean_token_accuracy": 0.902446061372757, "num_tokens": 3252067.0, "step": 1815 }, { "epoch": 0.29406525787385634, "grad_norm": 24.843610763549805, "learning_rate": 7.061204663212436e-06, "loss": 0.6487, "mean_token_accuracy": 0.9192118346691132, "num_tokens": 3253864.0, "step": 1816 }, { "epoch": 0.2942271880819367, "grad_norm": 25.647825241088867, "learning_rate": 7.059585492227979e-06, "loss": 0.7726, "mean_token_accuracy": 0.9078470766544342, "num_tokens": 3255658.0, "step": 1817 }, { "epoch": 0.294389118290017, "grad_norm": 19.608985900878906, "learning_rate": 7.057966321243524e-06, "loss": 0.5449, "mean_token_accuracy": 0.9183256030082703, "num_tokens": 3257439.0, "step": 1818 }, { "epoch": 0.2945510484980973, "grad_norm": 22.01833724975586, "learning_rate": 7.056347150259067e-06, "loss": 0.6027, "mean_token_accuracy": 0.9080341756343842, "num_tokens": 3259234.0, "step": 1819 }, { "epoch": 0.29471297870617763, "grad_norm": 23.9237060546875, "learning_rate": 7.054727979274612e-06, "loss": 0.659, "mean_token_accuracy": 0.9042107164859772, "num_tokens": 3261017.0, "step": 1820 }, { "epoch": 0.29487490891425794, "grad_norm": 25.75096321105957, "learning_rate": 7.053108808290155e-06, "loss": 0.8119, "mean_token_accuracy": 0.9050742387771606, "num_tokens": 3262810.0, "step": 1821 }, { "epoch": 0.29503683912233825, "grad_norm": 22.666175842285156, "learning_rate": 7.0514896373057e-06, "loss": 0.5853, "mean_token_accuracy": 0.9119242131710052, "num_tokens": 3264595.0, "step": 1822 }, { "epoch": 0.2951987693304186, "grad_norm": 24.539077758789062, "learning_rate": 7.0498704663212434e-06, "loss": 0.6422, "mean_token_accuracy": 0.9094203114509583, "num_tokens": 3266383.0, "step": 1823 }, { "epoch": 0.2953606995384989, "grad_norm": 14.791534423828125, "learning_rate": 7.048251295336788e-06, "loss": 0.5063, "mean_token_accuracy": 0.9255244135856628, "num_tokens": 3268177.0, "step": 1824 }, { "epoch": 0.2955226297465792, "grad_norm": 20.986412048339844, "learning_rate": 7.0466321243523315e-06, "loss": 0.641, "mean_token_accuracy": 0.9122835099697113, "num_tokens": 3269963.0, "step": 1825 }, { "epoch": 0.29568455995465953, "grad_norm": 20.484474182128906, "learning_rate": 7.045012953367876e-06, "loss": 0.5988, "mean_token_accuracy": 0.9147412180900574, "num_tokens": 3271745.0, "step": 1826 }, { "epoch": 0.29584649016273984, "grad_norm": 21.436328887939453, "learning_rate": 7.0433937823834195e-06, "loss": 0.6073, "mean_token_accuracy": 0.9102211594581604, "num_tokens": 3273524.0, "step": 1827 }, { "epoch": 0.2960084203708202, "grad_norm": 20.828622817993164, "learning_rate": 7.041774611398964e-06, "loss": 0.7566, "mean_token_accuracy": 0.9098861515522003, "num_tokens": 3275314.0, "step": 1828 }, { "epoch": 0.2961703505789005, "grad_norm": 25.984052658081055, "learning_rate": 7.0401554404145075e-06, "loss": 0.839, "mean_token_accuracy": 0.8964285552501678, "num_tokens": 3277106.0, "step": 1829 }, { "epoch": 0.2963322807869808, "grad_norm": 17.318105697631836, "learning_rate": 7.038536269430052e-06, "loss": 0.55, "mean_token_accuracy": 0.9267389476299286, "num_tokens": 3278891.0, "step": 1830 }, { "epoch": 0.2964942109950611, "grad_norm": 21.48662567138672, "learning_rate": 7.0369170984455956e-06, "loss": 0.5909, "mean_token_accuracy": 0.9152255654335022, "num_tokens": 3280676.0, "step": 1831 }, { "epoch": 0.29665614120314143, "grad_norm": 29.47977638244629, "learning_rate": 7.03529792746114e-06, "loss": 0.827, "mean_token_accuracy": 0.8855248391628265, "num_tokens": 3282467.0, "step": 1832 }, { "epoch": 0.29681807141122174, "grad_norm": 22.44864273071289, "learning_rate": 7.0336787564766836e-06, "loss": 0.6171, "mean_token_accuracy": 0.9077857732772827, "num_tokens": 3284250.0, "step": 1833 }, { "epoch": 0.2969800016193021, "grad_norm": 23.180383682250977, "learning_rate": 7.032059585492228e-06, "loss": 0.7767, "mean_token_accuracy": 0.9025468528270721, "num_tokens": 3286037.0, "step": 1834 }, { "epoch": 0.2971419318273824, "grad_norm": 19.097293853759766, "learning_rate": 7.030440414507773e-06, "loss": 0.6403, "mean_token_accuracy": 0.9049040675163269, "num_tokens": 3287823.0, "step": 1835 }, { "epoch": 0.2973038620354627, "grad_norm": 18.602781295776367, "learning_rate": 7.028821243523317e-06, "loss": 0.6654, "mean_token_accuracy": 0.9163228571414948, "num_tokens": 3289610.0, "step": 1836 }, { "epoch": 0.297465792243543, "grad_norm": 20.606163024902344, "learning_rate": 7.027202072538861e-06, "loss": 0.5904, "mean_token_accuracy": 0.9134344756603241, "num_tokens": 3291399.0, "step": 1837 }, { "epoch": 0.29762772245162333, "grad_norm": 29.514991760253906, "learning_rate": 7.025582901554405e-06, "loss": 0.8008, "mean_token_accuracy": 0.9006385803222656, "num_tokens": 3293191.0, "step": 1838 }, { "epoch": 0.29778965265970364, "grad_norm": 24.596303939819336, "learning_rate": 7.023963730569949e-06, "loss": 0.7256, "mean_token_accuracy": 0.9115384519100189, "num_tokens": 3294986.0, "step": 1839 }, { "epoch": 0.297951582867784, "grad_norm": 15.325115203857422, "learning_rate": 7.022344559585493e-06, "loss": 0.5578, "mean_token_accuracy": 0.9207678437232971, "num_tokens": 3296763.0, "step": 1840 }, { "epoch": 0.2981135130758643, "grad_norm": 25.706947326660156, "learning_rate": 7.020725388601037e-06, "loss": 0.7381, "mean_token_accuracy": 0.8916953504085541, "num_tokens": 3298552.0, "step": 1841 }, { "epoch": 0.2982754432839446, "grad_norm": 12.791678428649902, "learning_rate": 7.019106217616582e-06, "loss": 0.5406, "mean_token_accuracy": 0.9204900860786438, "num_tokens": 3300341.0, "step": 1842 }, { "epoch": 0.2984373734920249, "grad_norm": 27.136098861694336, "learning_rate": 7.017487046632125e-06, "loss": 0.8081, "mean_token_accuracy": 0.8882094025611877, "num_tokens": 3302139.0, "step": 1843 }, { "epoch": 0.29859930370010523, "grad_norm": 16.981441497802734, "learning_rate": 7.01586787564767e-06, "loss": 0.5522, "mean_token_accuracy": 0.9280426800251007, "num_tokens": 3303929.0, "step": 1844 }, { "epoch": 0.2987612339081856, "grad_norm": 23.834144592285156, "learning_rate": 7.0142487046632134e-06, "loss": 0.8907, "mean_token_accuracy": 0.8959557414054871, "num_tokens": 3305727.0, "step": 1845 }, { "epoch": 0.2989231641162659, "grad_norm": 17.492734909057617, "learning_rate": 7.012629533678758e-06, "loss": 0.534, "mean_token_accuracy": 0.9230892956256866, "num_tokens": 3307512.0, "step": 1846 }, { "epoch": 0.2990850943243462, "grad_norm": 24.705543518066406, "learning_rate": 7.0110103626943015e-06, "loss": 0.7642, "mean_token_accuracy": 0.894191324710846, "num_tokens": 3309298.0, "step": 1847 }, { "epoch": 0.2992470245324265, "grad_norm": 17.670211791992188, "learning_rate": 7.009391191709846e-06, "loss": 0.5444, "mean_token_accuracy": 0.920273095369339, "num_tokens": 3311086.0, "step": 1848 }, { "epoch": 0.2994089547405068, "grad_norm": 22.412593841552734, "learning_rate": 7.0077720207253895e-06, "loss": 0.7882, "mean_token_accuracy": 0.9022997617721558, "num_tokens": 3312875.0, "step": 1849 }, { "epoch": 0.29957088494858714, "grad_norm": 21.903331756591797, "learning_rate": 7.006152849740934e-06, "loss": 0.7485, "mean_token_accuracy": 0.9199481308460236, "num_tokens": 3314662.0, "step": 1850 }, { "epoch": 0.2997328151566675, "grad_norm": 22.382152557373047, "learning_rate": 7.0045336787564775e-06, "loss": 0.7417, "mean_token_accuracy": 0.9107106626033783, "num_tokens": 3316455.0, "step": 1851 }, { "epoch": 0.2998947453647478, "grad_norm": 16.95178985595703, "learning_rate": 7.002914507772022e-06, "loss": 0.5694, "mean_token_accuracy": 0.9163933396339417, "num_tokens": 3318242.0, "step": 1852 }, { "epoch": 0.3000566755728281, "grad_norm": 21.244850158691406, "learning_rate": 7.0012953367875655e-06, "loss": 0.6569, "mean_token_accuracy": 0.9016563296318054, "num_tokens": 3320039.0, "step": 1853 }, { "epoch": 0.3002186057809084, "grad_norm": 30.909950256347656, "learning_rate": 6.99967616580311e-06, "loss": 0.9497, "mean_token_accuracy": 0.8684979379177094, "num_tokens": 3321840.0, "step": 1854 }, { "epoch": 0.30038053598898873, "grad_norm": 22.09109878540039, "learning_rate": 6.9980569948186536e-06, "loss": 0.6898, "mean_token_accuracy": 0.9082706868648529, "num_tokens": 3323625.0, "step": 1855 }, { "epoch": 0.30054246619706904, "grad_norm": 21.22285270690918, "learning_rate": 6.996437823834198e-06, "loss": 0.6648, "mean_token_accuracy": 0.9114378988742828, "num_tokens": 3325408.0, "step": 1856 }, { "epoch": 0.3007043964051494, "grad_norm": 16.059595108032227, "learning_rate": 6.994818652849742e-06, "loss": 0.6143, "mean_token_accuracy": 0.9188898801803589, "num_tokens": 3327203.0, "step": 1857 }, { "epoch": 0.3008663266132297, "grad_norm": 24.401905059814453, "learning_rate": 6.993199481865286e-06, "loss": 0.6713, "mean_token_accuracy": 0.9079736173152924, "num_tokens": 3328998.0, "step": 1858 }, { "epoch": 0.30102825682131, "grad_norm": 15.001967430114746, "learning_rate": 6.99158031088083e-06, "loss": 0.5801, "mean_token_accuracy": 0.9214604198932648, "num_tokens": 3330778.0, "step": 1859 }, { "epoch": 0.3011901870293903, "grad_norm": 17.746395111083984, "learning_rate": 6.989961139896374e-06, "loss": 0.7012, "mean_token_accuracy": 0.9071033895015717, "num_tokens": 3332559.0, "step": 1860 }, { "epoch": 0.30135211723747063, "grad_norm": 18.552631378173828, "learning_rate": 6.9883419689119185e-06, "loss": 0.5791, "mean_token_accuracy": 0.9260731339454651, "num_tokens": 3334355.0, "step": 1861 }, { "epoch": 0.301514047445551, "grad_norm": 15.442876815795898, "learning_rate": 6.986722797927462e-06, "loss": 0.5363, "mean_token_accuracy": 0.9165207743644714, "num_tokens": 3336154.0, "step": 1862 }, { "epoch": 0.3016759776536313, "grad_norm": 18.62020492553711, "learning_rate": 6.9851036269430065e-06, "loss": 0.5473, "mean_token_accuracy": 0.9289631247520447, "num_tokens": 3337948.0, "step": 1863 }, { "epoch": 0.3018379078617116, "grad_norm": 26.801982879638672, "learning_rate": 6.98348445595855e-06, "loss": 0.8929, "mean_token_accuracy": 0.9023066759109497, "num_tokens": 3339737.0, "step": 1864 }, { "epoch": 0.3019998380697919, "grad_norm": 23.987979888916016, "learning_rate": 6.9818652849740945e-06, "loss": 0.6722, "mean_token_accuracy": 0.9144199192523956, "num_tokens": 3341529.0, "step": 1865 }, { "epoch": 0.3021617682778722, "grad_norm": 25.172924041748047, "learning_rate": 6.980246113989638e-06, "loss": 0.7177, "mean_token_accuracy": 0.9084325432777405, "num_tokens": 3343325.0, "step": 1866 }, { "epoch": 0.30232369848595253, "grad_norm": 15.044196128845215, "learning_rate": 6.9786269430051826e-06, "loss": 0.5094, "mean_token_accuracy": 0.9206465184688568, "num_tokens": 3345114.0, "step": 1867 }, { "epoch": 0.3024856286940329, "grad_norm": 21.617923736572266, "learning_rate": 6.977007772020726e-06, "loss": 0.6935, "mean_token_accuracy": 0.8904095888137817, "num_tokens": 3346909.0, "step": 1868 }, { "epoch": 0.3026475589021132, "grad_norm": 23.95100212097168, "learning_rate": 6.975388601036271e-06, "loss": 0.9092, "mean_token_accuracy": 0.8981540501117706, "num_tokens": 3348696.0, "step": 1869 }, { "epoch": 0.3028094891101935, "grad_norm": 19.04384422302246, "learning_rate": 6.973769430051814e-06, "loss": 0.5468, "mean_token_accuracy": 0.9258203208446503, "num_tokens": 3350491.0, "step": 1870 }, { "epoch": 0.3029714193182738, "grad_norm": 16.7266845703125, "learning_rate": 6.972150259067359e-06, "loss": 0.5911, "mean_token_accuracy": 0.9291283786296844, "num_tokens": 3352271.0, "step": 1871 }, { "epoch": 0.3031333495263541, "grad_norm": 18.93291473388672, "learning_rate": 6.970531088082902e-06, "loss": 0.6019, "mean_token_accuracy": 0.9159350991249084, "num_tokens": 3354068.0, "step": 1872 }, { "epoch": 0.3032952797344345, "grad_norm": 18.34059715270996, "learning_rate": 6.968911917098447e-06, "loss": 0.5352, "mean_token_accuracy": 0.9197037518024445, "num_tokens": 3355854.0, "step": 1873 }, { "epoch": 0.3034572099425148, "grad_norm": 18.49833869934082, "learning_rate": 6.96729274611399e-06, "loss": 0.6321, "mean_token_accuracy": 0.9137681126594543, "num_tokens": 3357644.0, "step": 1874 }, { "epoch": 0.3036191401505951, "grad_norm": 25.430721282958984, "learning_rate": 6.965673575129535e-06, "loss": 0.8121, "mean_token_accuracy": 0.8993876874446869, "num_tokens": 3359434.0, "step": 1875 }, { "epoch": 0.3037810703586754, "grad_norm": 27.58957862854004, "learning_rate": 6.964054404145078e-06, "loss": 0.7445, "mean_token_accuracy": 0.8917474746704102, "num_tokens": 3361223.0, "step": 1876 }, { "epoch": 0.3039430005667557, "grad_norm": 23.025615692138672, "learning_rate": 6.962435233160623e-06, "loss": 0.6832, "mean_token_accuracy": 0.9156656563282013, "num_tokens": 3363018.0, "step": 1877 }, { "epoch": 0.304104930774836, "grad_norm": 19.629365921020508, "learning_rate": 6.960816062176166e-06, "loss": 0.7063, "mean_token_accuracy": 0.9144199192523956, "num_tokens": 3364810.0, "step": 1878 }, { "epoch": 0.3042668609829164, "grad_norm": 19.332963943481445, "learning_rate": 6.959196891191711e-06, "loss": 0.5813, "mean_token_accuracy": 0.9159740209579468, "num_tokens": 3366596.0, "step": 1879 }, { "epoch": 0.3044287911909967, "grad_norm": 20.373083114624023, "learning_rate": 6.957577720207255e-06, "loss": 0.6618, "mean_token_accuracy": 0.9054433107376099, "num_tokens": 3368383.0, "step": 1880 }, { "epoch": 0.304590721399077, "grad_norm": 13.793593406677246, "learning_rate": 6.955958549222799e-06, "loss": 0.4985, "mean_token_accuracy": 0.9342925548553467, "num_tokens": 3370169.0, "step": 1881 }, { "epoch": 0.3047526516071573, "grad_norm": 15.495427131652832, "learning_rate": 6.954339378238343e-06, "loss": 0.5459, "mean_token_accuracy": 0.9253092110157013, "num_tokens": 3371962.0, "step": 1882 }, { "epoch": 0.3049145818152376, "grad_norm": 26.103240966796875, "learning_rate": 6.952720207253887e-06, "loss": 0.8723, "mean_token_accuracy": 0.9024867117404938, "num_tokens": 3373751.0, "step": 1883 }, { "epoch": 0.3050765120233179, "grad_norm": 22.593032836914062, "learning_rate": 6.951101036269431e-06, "loss": 0.652, "mean_token_accuracy": 0.9052592515945435, "num_tokens": 3375547.0, "step": 1884 }, { "epoch": 0.3052384422313983, "grad_norm": 19.726655960083008, "learning_rate": 6.949481865284975e-06, "loss": 0.5785, "mean_token_accuracy": 0.9178866446018219, "num_tokens": 3377351.0, "step": 1885 }, { "epoch": 0.3054003724394786, "grad_norm": 30.640186309814453, "learning_rate": 6.947862694300519e-06, "loss": 0.8496, "mean_token_accuracy": 0.8836050927639008, "num_tokens": 3379145.0, "step": 1886 }, { "epoch": 0.3055623026475589, "grad_norm": 21.694368362426758, "learning_rate": 6.946243523316063e-06, "loss": 0.6258, "mean_token_accuracy": 0.913159966468811, "num_tokens": 3380933.0, "step": 1887 }, { "epoch": 0.3057242328556392, "grad_norm": 24.265811920166016, "learning_rate": 6.944624352331607e-06, "loss": 0.6526, "mean_token_accuracy": 0.9049978852272034, "num_tokens": 3382719.0, "step": 1888 }, { "epoch": 0.3058861630637195, "grad_norm": 25.403676986694336, "learning_rate": 6.943005181347151e-06, "loss": 0.6758, "mean_token_accuracy": 0.897176593542099, "num_tokens": 3384512.0, "step": 1889 }, { "epoch": 0.3060480932717999, "grad_norm": 28.32275390625, "learning_rate": 6.941386010362695e-06, "loss": 0.7246, "mean_token_accuracy": 0.8904704749584198, "num_tokens": 3386307.0, "step": 1890 }, { "epoch": 0.3062100234798802, "grad_norm": 13.344834327697754, "learning_rate": 6.939766839378239e-06, "loss": 0.4958, "mean_token_accuracy": 0.9352190792560577, "num_tokens": 3388096.0, "step": 1891 }, { "epoch": 0.3063719536879605, "grad_norm": 19.27501678466797, "learning_rate": 6.938147668393783e-06, "loss": 0.642, "mean_token_accuracy": 0.9164286553859711, "num_tokens": 3389883.0, "step": 1892 }, { "epoch": 0.3065338838960408, "grad_norm": 24.48785400390625, "learning_rate": 6.936528497409327e-06, "loss": 0.7565, "mean_token_accuracy": 0.8967473804950714, "num_tokens": 3391676.0, "step": 1893 }, { "epoch": 0.3066958141041211, "grad_norm": 17.873153686523438, "learning_rate": 6.934909326424871e-06, "loss": 0.5646, "mean_token_accuracy": 0.918720155954361, "num_tokens": 3393458.0, "step": 1894 }, { "epoch": 0.3068577443122014, "grad_norm": 20.680383682250977, "learning_rate": 6.933290155440415e-06, "loss": 0.7507, "mean_token_accuracy": 0.9086743593215942, "num_tokens": 3395244.0, "step": 1895 }, { "epoch": 0.3070196745202818, "grad_norm": 23.652812957763672, "learning_rate": 6.931670984455959e-06, "loss": 0.8392, "mean_token_accuracy": 0.9061359763145447, "num_tokens": 3397033.0, "step": 1896 }, { "epoch": 0.3071816047283621, "grad_norm": 26.407339096069336, "learning_rate": 6.930051813471503e-06, "loss": 0.6942, "mean_token_accuracy": 0.9091712236404419, "num_tokens": 3398810.0, "step": 1897 }, { "epoch": 0.3073435349364424, "grad_norm": 21.341291427612305, "learning_rate": 6.928432642487047e-06, "loss": 0.594, "mean_token_accuracy": 0.9184397161006927, "num_tokens": 3400604.0, "step": 1898 }, { "epoch": 0.3075054651445227, "grad_norm": 15.613903999328613, "learning_rate": 6.926813471502592e-06, "loss": 0.5954, "mean_token_accuracy": 0.9220707416534424, "num_tokens": 3402398.0, "step": 1899 }, { "epoch": 0.307667395352603, "grad_norm": 25.65532875061035, "learning_rate": 6.925194300518135e-06, "loss": 0.6464, "mean_token_accuracy": 0.9087617993354797, "num_tokens": 3404195.0, "step": 1900 }, { "epoch": 0.3078293255606833, "grad_norm": 27.368011474609375, "learning_rate": 6.92357512953368e-06, "loss": 0.7721, "mean_token_accuracy": 0.8879284858703613, "num_tokens": 3405992.0, "step": 1901 }, { "epoch": 0.3079912557687637, "grad_norm": 23.894681930541992, "learning_rate": 6.9219559585492234e-06, "loss": 0.8441, "mean_token_accuracy": 0.9103172123432159, "num_tokens": 3407793.0, "step": 1902 }, { "epoch": 0.308153185976844, "grad_norm": 26.8140811920166, "learning_rate": 6.920336787564768e-06, "loss": 0.7239, "mean_token_accuracy": 0.903620183467865, "num_tokens": 3409595.0, "step": 1903 }, { "epoch": 0.3083151161849243, "grad_norm": 21.525659561157227, "learning_rate": 6.9187176165803115e-06, "loss": 0.6203, "mean_token_accuracy": 0.9226754009723663, "num_tokens": 3411379.0, "step": 1904 }, { "epoch": 0.3084770463930046, "grad_norm": 17.20766830444336, "learning_rate": 6.917098445595856e-06, "loss": 0.5542, "mean_token_accuracy": 0.9230810403823853, "num_tokens": 3413165.0, "step": 1905 }, { "epoch": 0.3086389766010849, "grad_norm": 24.366456985473633, "learning_rate": 6.9154792746113995e-06, "loss": 0.6657, "mean_token_accuracy": 0.9044228792190552, "num_tokens": 3414968.0, "step": 1906 }, { "epoch": 0.3088009068091653, "grad_norm": 31.981435775756836, "learning_rate": 6.913860103626944e-06, "loss": 0.8968, "mean_token_accuracy": 0.8914006948471069, "num_tokens": 3416765.0, "step": 1907 }, { "epoch": 0.3089628370172456, "grad_norm": 22.964876174926758, "learning_rate": 6.9122409326424875e-06, "loss": 0.6266, "mean_token_accuracy": 0.9024867117404938, "num_tokens": 3418554.0, "step": 1908 }, { "epoch": 0.3091247672253259, "grad_norm": 23.065261840820312, "learning_rate": 6.910621761658032e-06, "loss": 0.7399, "mean_token_accuracy": 0.9148924648761749, "num_tokens": 3420336.0, "step": 1909 }, { "epoch": 0.3092866974334062, "grad_norm": 21.18498992919922, "learning_rate": 6.9090025906735755e-06, "loss": 0.5604, "mean_token_accuracy": 0.9206026494503021, "num_tokens": 3422125.0, "step": 1910 }, { "epoch": 0.3094486276414865, "grad_norm": 20.91034507751465, "learning_rate": 6.90738341968912e-06, "loss": 0.5784, "mean_token_accuracy": 0.9143702983856201, "num_tokens": 3423917.0, "step": 1911 }, { "epoch": 0.3096105578495668, "grad_norm": 21.986989974975586, "learning_rate": 6.9057642487046636e-06, "loss": 0.7474, "mean_token_accuracy": 0.9142578542232513, "num_tokens": 3425697.0, "step": 1912 }, { "epoch": 0.3097724880576472, "grad_norm": 25.399215698242188, "learning_rate": 6.904145077720208e-06, "loss": 0.7128, "mean_token_accuracy": 0.9093185067176819, "num_tokens": 3427496.0, "step": 1913 }, { "epoch": 0.3099344182657275, "grad_norm": 19.495424270629883, "learning_rate": 6.902525906735752e-06, "loss": 0.5762, "mean_token_accuracy": 0.9114588499069214, "num_tokens": 3429291.0, "step": 1914 }, { "epoch": 0.3100963484738078, "grad_norm": 23.71278953552246, "learning_rate": 6.900906735751296e-06, "loss": 0.6681, "mean_token_accuracy": 0.9090617895126343, "num_tokens": 3431088.0, "step": 1915 }, { "epoch": 0.3102582786818881, "grad_norm": 15.605432510375977, "learning_rate": 6.89928756476684e-06, "loss": 0.5833, "mean_token_accuracy": 0.9214173555374146, "num_tokens": 3432867.0, "step": 1916 }, { "epoch": 0.3104202088899684, "grad_norm": 19.36934471130371, "learning_rate": 6.897668393782384e-06, "loss": 0.613, "mean_token_accuracy": 0.9160839319229126, "num_tokens": 3434652.0, "step": 1917 }, { "epoch": 0.3105821390980487, "grad_norm": 24.261260986328125, "learning_rate": 6.8960492227979285e-06, "loss": 0.72, "mean_token_accuracy": 0.8996146023273468, "num_tokens": 3436443.0, "step": 1918 }, { "epoch": 0.3107440693061291, "grad_norm": 18.746381759643555, "learning_rate": 6.894430051813472e-06, "loss": 0.5813, "mean_token_accuracy": 0.9184607565402985, "num_tokens": 3438237.0, "step": 1919 }, { "epoch": 0.3109059995142094, "grad_norm": 25.644567489624023, "learning_rate": 6.8928108808290165e-06, "loss": 0.8296, "mean_token_accuracy": 0.8938788175582886, "num_tokens": 3440039.0, "step": 1920 }, { "epoch": 0.3110679297222897, "grad_norm": 18.823265075683594, "learning_rate": 6.89119170984456e-06, "loss": 0.6231, "mean_token_accuracy": 0.9175146520137787, "num_tokens": 3441830.0, "step": 1921 }, { "epoch": 0.31122985993037, "grad_norm": 34.07335662841797, "learning_rate": 6.8895725388601046e-06, "loss": 0.9359, "mean_token_accuracy": 0.8788793087005615, "num_tokens": 3443631.0, "step": 1922 }, { "epoch": 0.3113917901384503, "grad_norm": 24.817089080810547, "learning_rate": 6.887953367875648e-06, "loss": 0.7112, "mean_token_accuracy": 0.9011238813400269, "num_tokens": 3445426.0, "step": 1923 }, { "epoch": 0.31155372034653067, "grad_norm": 17.930213928222656, "learning_rate": 6.886334196891193e-06, "loss": 0.5673, "mean_token_accuracy": 0.924413800239563, "num_tokens": 3447217.0, "step": 1924 }, { "epoch": 0.311715650554611, "grad_norm": 21.13175392150879, "learning_rate": 6.884715025906736e-06, "loss": 0.5363, "mean_token_accuracy": 0.9273166060447693, "num_tokens": 3449017.0, "step": 1925 }, { "epoch": 0.3118775807626913, "grad_norm": 22.420001983642578, "learning_rate": 6.883095854922281e-06, "loss": 0.6276, "mean_token_accuracy": 0.9131964445114136, "num_tokens": 3450805.0, "step": 1926 }, { "epoch": 0.3120395109707716, "grad_norm": 18.214536666870117, "learning_rate": 6.881476683937824e-06, "loss": 0.5256, "mean_token_accuracy": 0.9271321892738342, "num_tokens": 3452591.0, "step": 1927 }, { "epoch": 0.3122014411788519, "grad_norm": 22.271533966064453, "learning_rate": 6.879857512953369e-06, "loss": 0.5691, "mean_token_accuracy": 0.9136956036090851, "num_tokens": 3454392.0, "step": 1928 }, { "epoch": 0.3123633713869322, "grad_norm": 30.941083908081055, "learning_rate": 6.878238341968912e-06, "loss": 1.1504, "mean_token_accuracy": 0.8910346925258636, "num_tokens": 3456186.0, "step": 1929 }, { "epoch": 0.3125253015950126, "grad_norm": 19.465234756469727, "learning_rate": 6.876619170984457e-06, "loss": 0.5998, "mean_token_accuracy": 0.9128319621086121, "num_tokens": 3457971.0, "step": 1930 }, { "epoch": 0.3126872318030929, "grad_norm": 16.59850311279297, "learning_rate": 6.875e-06, "loss": 0.5629, "mean_token_accuracy": 0.919047623872757, "num_tokens": 3459755.0, "step": 1931 }, { "epoch": 0.3128491620111732, "grad_norm": 20.765493392944336, "learning_rate": 6.873380829015545e-06, "loss": 0.6289, "mean_token_accuracy": 0.9211459457874298, "num_tokens": 3461546.0, "step": 1932 }, { "epoch": 0.3130110922192535, "grad_norm": 26.060544967651367, "learning_rate": 6.871761658031088e-06, "loss": 0.6832, "mean_token_accuracy": 0.9129863977432251, "num_tokens": 3463334.0, "step": 1933 }, { "epoch": 0.3131730224273338, "grad_norm": 19.259361267089844, "learning_rate": 6.870142487046633e-06, "loss": 0.5682, "mean_token_accuracy": 0.9171325266361237, "num_tokens": 3465124.0, "step": 1934 }, { "epoch": 0.3133349526354141, "grad_norm": 32.98956298828125, "learning_rate": 6.868523316062176e-06, "loss": 0.8951, "mean_token_accuracy": 0.8826494514942169, "num_tokens": 3466917.0, "step": 1935 }, { "epoch": 0.3134968828434945, "grad_norm": 25.334579467773438, "learning_rate": 6.866904145077721e-06, "loss": 0.8699, "mean_token_accuracy": 0.906528502702713, "num_tokens": 3468697.0, "step": 1936 }, { "epoch": 0.3136588130515748, "grad_norm": 21.44000244140625, "learning_rate": 6.865284974093265e-06, "loss": 0.6089, "mean_token_accuracy": 0.9114553928375244, "num_tokens": 3470501.0, "step": 1937 }, { "epoch": 0.3138207432596551, "grad_norm": 22.911813735961914, "learning_rate": 6.863665803108809e-06, "loss": 0.6441, "mean_token_accuracy": 0.9065613150596619, "num_tokens": 3472302.0, "step": 1938 }, { "epoch": 0.3139826734677354, "grad_norm": 25.368274688720703, "learning_rate": 6.862046632124353e-06, "loss": 0.8079, "mean_token_accuracy": 0.8980347812175751, "num_tokens": 3474096.0, "step": 1939 }, { "epoch": 0.3141446036758157, "grad_norm": 24.771181106567383, "learning_rate": 6.860427461139897e-06, "loss": 0.6943, "mean_token_accuracy": 0.9033122956752777, "num_tokens": 3475877.0, "step": 1940 }, { "epoch": 0.31430653388389607, "grad_norm": 19.39136505126953, "learning_rate": 6.858808290155441e-06, "loss": 0.5568, "mean_token_accuracy": 0.9175740778446198, "num_tokens": 3477656.0, "step": 1941 }, { "epoch": 0.3144684640919764, "grad_norm": 24.30678367614746, "learning_rate": 6.857189119170985e-06, "loss": 0.6816, "mean_token_accuracy": 0.9056722819805145, "num_tokens": 3479444.0, "step": 1942 }, { "epoch": 0.3146303943000567, "grad_norm": 15.824822425842285, "learning_rate": 6.855569948186529e-06, "loss": 0.5442, "mean_token_accuracy": 0.9248643219470978, "num_tokens": 3481222.0, "step": 1943 }, { "epoch": 0.314792324508137, "grad_norm": 29.671920776367188, "learning_rate": 6.853950777202073e-06, "loss": 0.8447, "mean_token_accuracy": 0.8927496075630188, "num_tokens": 3483014.0, "step": 1944 }, { "epoch": 0.3149542547162173, "grad_norm": 21.041963577270508, "learning_rate": 6.852331606217617e-06, "loss": 0.5611, "mean_token_accuracy": 0.9127962291240692, "num_tokens": 3484801.0, "step": 1945 }, { "epoch": 0.3151161849242976, "grad_norm": 28.425615310668945, "learning_rate": 6.850712435233161e-06, "loss": 0.7455, "mean_token_accuracy": 0.8961202502250671, "num_tokens": 3486592.0, "step": 1946 }, { "epoch": 0.31527811513237797, "grad_norm": 24.378219604492188, "learning_rate": 6.849093264248705e-06, "loss": 0.6984, "mean_token_accuracy": 0.8991971015930176, "num_tokens": 3488382.0, "step": 1947 }, { "epoch": 0.3154400453404583, "grad_norm": 30.380659103393555, "learning_rate": 6.847474093264249e-06, "loss": 0.9292, "mean_token_accuracy": 0.8788124322891235, "num_tokens": 3490166.0, "step": 1948 }, { "epoch": 0.3156019755485386, "grad_norm": 17.325603485107422, "learning_rate": 6.845854922279793e-06, "loss": 0.5398, "mean_token_accuracy": 0.9238302707672119, "num_tokens": 3491954.0, "step": 1949 }, { "epoch": 0.3157639057566189, "grad_norm": 14.562023162841797, "learning_rate": 6.844235751295337e-06, "loss": 0.557, "mean_token_accuracy": 0.9157631993293762, "num_tokens": 3493739.0, "step": 1950 }, { "epoch": 0.3159258359646992, "grad_norm": 19.599218368530273, "learning_rate": 6.842616580310881e-06, "loss": 0.5676, "mean_token_accuracy": 0.9235645532608032, "num_tokens": 3495538.0, "step": 1951 }, { "epoch": 0.3160877661727795, "grad_norm": 17.00807762145996, "learning_rate": 6.840997409326425e-06, "loss": 0.5828, "mean_token_accuracy": 0.9196504652500153, "num_tokens": 3497324.0, "step": 1952 }, { "epoch": 0.31624969638085987, "grad_norm": 21.016956329345703, "learning_rate": 6.839378238341969e-06, "loss": 0.6203, "mean_token_accuracy": 0.925253301858902, "num_tokens": 3499117.0, "step": 1953 }, { "epoch": 0.3164116265889402, "grad_norm": 19.136619567871094, "learning_rate": 6.837759067357513e-06, "loss": 0.6435, "mean_token_accuracy": 0.9066585004329681, "num_tokens": 3500909.0, "step": 1954 }, { "epoch": 0.3165735567970205, "grad_norm": 18.964651107788086, "learning_rate": 6.836139896373057e-06, "loss": 0.7001, "mean_token_accuracy": 0.9048619270324707, "num_tokens": 3502704.0, "step": 1955 }, { "epoch": 0.3167354870051008, "grad_norm": 25.121936798095703, "learning_rate": 6.834520725388602e-06, "loss": 0.6829, "mean_token_accuracy": 0.905919760465622, "num_tokens": 3504502.0, "step": 1956 }, { "epoch": 0.3168974172131811, "grad_norm": 23.669727325439453, "learning_rate": 6.8329015544041454e-06, "loss": 0.5991, "mean_token_accuracy": 0.9120011925697327, "num_tokens": 3506298.0, "step": 1957 }, { "epoch": 0.31705934742126146, "grad_norm": 21.615951538085938, "learning_rate": 6.83128238341969e-06, "loss": 0.6623, "mean_token_accuracy": 0.9109025001525879, "num_tokens": 3508080.0, "step": 1958 }, { "epoch": 0.31722127762934177, "grad_norm": 21.237550735473633, "learning_rate": 6.8296632124352335e-06, "loss": 0.6103, "mean_token_accuracy": 0.9176004827022552, "num_tokens": 3509871.0, "step": 1959 }, { "epoch": 0.3173832078374221, "grad_norm": 24.932605743408203, "learning_rate": 6.828044041450778e-06, "loss": 0.7593, "mean_token_accuracy": 0.8964575529098511, "num_tokens": 3511663.0, "step": 1960 }, { "epoch": 0.3175451380455024, "grad_norm": 21.199174880981445, "learning_rate": 6.8264248704663215e-06, "loss": 0.6658, "mean_token_accuracy": 0.9043275713920593, "num_tokens": 3513456.0, "step": 1961 }, { "epoch": 0.3177070682535827, "grad_norm": 22.532611846923828, "learning_rate": 6.824805699481866e-06, "loss": 0.7125, "mean_token_accuracy": 0.9090404212474823, "num_tokens": 3515243.0, "step": 1962 }, { "epoch": 0.317868998461663, "grad_norm": 17.341398239135742, "learning_rate": 6.8231865284974095e-06, "loss": 0.5549, "mean_token_accuracy": 0.9179342985153198, "num_tokens": 3517047.0, "step": 1963 }, { "epoch": 0.31803092866974336, "grad_norm": 25.42489242553711, "learning_rate": 6.821567357512954e-06, "loss": 0.8635, "mean_token_accuracy": 0.8838652670383453, "num_tokens": 3518844.0, "step": 1964 }, { "epoch": 0.31819285887782367, "grad_norm": 17.559890747070312, "learning_rate": 6.8199481865284975e-06, "loss": 0.5887, "mean_token_accuracy": 0.9091269969940186, "num_tokens": 3520631.0, "step": 1965 }, { "epoch": 0.318354789085904, "grad_norm": 21.703004837036133, "learning_rate": 6.818329015544042e-06, "loss": 0.6578, "mean_token_accuracy": 0.926225483417511, "num_tokens": 3522414.0, "step": 1966 }, { "epoch": 0.3185167192939843, "grad_norm": 25.73619270324707, "learning_rate": 6.8167098445595856e-06, "loss": 0.7461, "mean_token_accuracy": 0.8907374143600464, "num_tokens": 3524201.0, "step": 1967 }, { "epoch": 0.3186786495020646, "grad_norm": 21.41761589050293, "learning_rate": 6.81509067357513e-06, "loss": 0.6666, "mean_token_accuracy": 0.9065134227275848, "num_tokens": 3525993.0, "step": 1968 }, { "epoch": 0.3188405797101449, "grad_norm": 16.376726150512695, "learning_rate": 6.813471502590674e-06, "loss": 0.5905, "mean_token_accuracy": 0.9169117510318756, "num_tokens": 3527781.0, "step": 1969 }, { "epoch": 0.31900250991822526, "grad_norm": 28.338106155395508, "learning_rate": 6.811852331606218e-06, "loss": 0.8056, "mean_token_accuracy": 0.8929156363010406, "num_tokens": 3529573.0, "step": 1970 }, { "epoch": 0.31916444012630557, "grad_norm": 19.332244873046875, "learning_rate": 6.810233160621762e-06, "loss": 0.5974, "mean_token_accuracy": 0.9200184643268585, "num_tokens": 3531372.0, "step": 1971 }, { "epoch": 0.3193263703343859, "grad_norm": 20.007776260375977, "learning_rate": 6.808613989637306e-06, "loss": 0.6484, "mean_token_accuracy": 0.910084992647171, "num_tokens": 3533162.0, "step": 1972 }, { "epoch": 0.3194883005424662, "grad_norm": 13.949797630310059, "learning_rate": 6.80699481865285e-06, "loss": 0.4969, "mean_token_accuracy": 0.9313608109951019, "num_tokens": 3534951.0, "step": 1973 }, { "epoch": 0.3196502307505465, "grad_norm": 22.358320236206055, "learning_rate": 6.805375647668394e-06, "loss": 0.6605, "mean_token_accuracy": 0.9104166626930237, "num_tokens": 3536742.0, "step": 1974 }, { "epoch": 0.31981216095862686, "grad_norm": 28.46381950378418, "learning_rate": 6.8037564766839385e-06, "loss": 0.8002, "mean_token_accuracy": 0.8937085866928101, "num_tokens": 3538545.0, "step": 1975 }, { "epoch": 0.31997409116670716, "grad_norm": 19.395126342773438, "learning_rate": 6.802137305699482e-06, "loss": 0.6045, "mean_token_accuracy": 0.9068088531494141, "num_tokens": 3540336.0, "step": 1976 }, { "epoch": 0.32013602137478747, "grad_norm": 25.95215606689453, "learning_rate": 6.8005181347150265e-06, "loss": 0.8335, "mean_token_accuracy": 0.8960882127285004, "num_tokens": 3542127.0, "step": 1977 }, { "epoch": 0.3202979515828678, "grad_norm": 18.66096305847168, "learning_rate": 6.79889896373057e-06, "loss": 0.5855, "mean_token_accuracy": 0.9215896725654602, "num_tokens": 3543919.0, "step": 1978 }, { "epoch": 0.3204598817909481, "grad_norm": 25.1312313079834, "learning_rate": 6.7972797927461146e-06, "loss": 0.719, "mean_token_accuracy": 0.906470000743866, "num_tokens": 3545709.0, "step": 1979 }, { "epoch": 0.3206218119990284, "grad_norm": 18.188488006591797, "learning_rate": 6.795660621761658e-06, "loss": 0.5693, "mean_token_accuracy": 0.9198676943778992, "num_tokens": 3547495.0, "step": 1980 }, { "epoch": 0.32078374220710876, "grad_norm": 18.340505599975586, "learning_rate": 6.794041450777203e-06, "loss": 0.5744, "mean_token_accuracy": 0.9162003695964813, "num_tokens": 3549281.0, "step": 1981 }, { "epoch": 0.32094567241518906, "grad_norm": 25.959964752197266, "learning_rate": 6.792422279792746e-06, "loss": 0.7136, "mean_token_accuracy": 0.9021967649459839, "num_tokens": 3551069.0, "step": 1982 }, { "epoch": 0.3211076026232694, "grad_norm": 26.811525344848633, "learning_rate": 6.790803108808291e-06, "loss": 0.7483, "mean_token_accuracy": 0.8895547986030579, "num_tokens": 3552863.0, "step": 1983 }, { "epoch": 0.3212695328313497, "grad_norm": 26.007314682006836, "learning_rate": 6.789183937823834e-06, "loss": 0.6691, "mean_token_accuracy": 0.9056684076786041, "num_tokens": 3554649.0, "step": 1984 }, { "epoch": 0.32143146303943, "grad_norm": 31.120197296142578, "learning_rate": 6.787564766839379e-06, "loss": 1.0163, "mean_token_accuracy": 0.8775861859321594, "num_tokens": 3556446.0, "step": 1985 }, { "epoch": 0.32159339324751035, "grad_norm": 18.893774032592773, "learning_rate": 6.785945595854922e-06, "loss": 0.5843, "mean_token_accuracy": 0.9166885316371918, "num_tokens": 3558234.0, "step": 1986 }, { "epoch": 0.32175532345559066, "grad_norm": 33.02800750732422, "learning_rate": 6.784326424870467e-06, "loss": 0.8639, "mean_token_accuracy": 0.886833906173706, "num_tokens": 3560037.0, "step": 1987 }, { "epoch": 0.32191725366367097, "grad_norm": 21.673070907592773, "learning_rate": 6.78270725388601e-06, "loss": 0.6506, "mean_token_accuracy": 0.9175745248794556, "num_tokens": 3561828.0, "step": 1988 }, { "epoch": 0.3220791838717513, "grad_norm": 21.844501495361328, "learning_rate": 6.781088082901555e-06, "loss": 0.6004, "mean_token_accuracy": 0.9182330667972565, "num_tokens": 3563632.0, "step": 1989 }, { "epoch": 0.3222411140798316, "grad_norm": 13.191789627075195, "learning_rate": 6.779468911917098e-06, "loss": 0.4856, "mean_token_accuracy": 0.9226222932338715, "num_tokens": 3565415.0, "step": 1990 }, { "epoch": 0.3224030442879119, "grad_norm": 28.812782287597656, "learning_rate": 6.777849740932643e-06, "loss": 0.7094, "mean_token_accuracy": 0.8956834375858307, "num_tokens": 3567205.0, "step": 1991 }, { "epoch": 0.32256497449599225, "grad_norm": 9.942994117736816, "learning_rate": 6.776230569948186e-06, "loss": 0.448, "mean_token_accuracy": 0.9384453892707825, "num_tokens": 3568993.0, "step": 1992 }, { "epoch": 0.32272690470407256, "grad_norm": 21.88311195373535, "learning_rate": 6.774611398963731e-06, "loss": 0.6143, "mean_token_accuracy": 0.9302331507205963, "num_tokens": 3570792.0, "step": 1993 }, { "epoch": 0.32288883491215287, "grad_norm": 24.2215576171875, "learning_rate": 6.772992227979275e-06, "loss": 0.6833, "mean_token_accuracy": 0.9200254082679749, "num_tokens": 3572579.0, "step": 1994 }, { "epoch": 0.3230507651202332, "grad_norm": 20.88166046142578, "learning_rate": 6.771373056994819e-06, "loss": 0.6339, "mean_token_accuracy": 0.9164022207260132, "num_tokens": 3574366.0, "step": 1995 }, { "epoch": 0.3232126953283135, "grad_norm": 22.844417572021484, "learning_rate": 6.769753886010363e-06, "loss": 0.6878, "mean_token_accuracy": 0.9099134206771851, "num_tokens": 3576155.0, "step": 1996 }, { "epoch": 0.3233746255363938, "grad_norm": 21.03485870361328, "learning_rate": 6.768134715025907e-06, "loss": 0.6386, "mean_token_accuracy": 0.9197080433368683, "num_tokens": 3577941.0, "step": 1997 }, { "epoch": 0.32353655574447415, "grad_norm": 27.215187072753906, "learning_rate": 6.766515544041451e-06, "loss": 0.8277, "mean_token_accuracy": 0.9036587774753571, "num_tokens": 3579731.0, "step": 1998 }, { "epoch": 0.32369848595255446, "grad_norm": 22.64470672607422, "learning_rate": 6.764896373056995e-06, "loss": 0.6064, "mean_token_accuracy": 0.9181357622146606, "num_tokens": 3581524.0, "step": 1999 }, { "epoch": 0.32386041616063477, "grad_norm": 22.80707550048828, "learning_rate": 6.763277202072539e-06, "loss": 0.6502, "mean_token_accuracy": 0.9168103933334351, "num_tokens": 3583325.0, "step": 2000 }, { "epoch": 0.3240223463687151, "grad_norm": 26.383079528808594, "learning_rate": 6.761658031088083e-06, "loss": 0.6784, "mean_token_accuracy": 0.9136288166046143, "num_tokens": 3585115.0, "step": 2001 }, { "epoch": 0.3241842765767954, "grad_norm": 25.72802734375, "learning_rate": 6.760038860103627e-06, "loss": 0.626, "mean_token_accuracy": 0.9035947918891907, "num_tokens": 3586907.0, "step": 2002 }, { "epoch": 0.32434620678487575, "grad_norm": 24.603004455566406, "learning_rate": 6.758419689119171e-06, "loss": 0.7181, "mean_token_accuracy": 0.9002188444137573, "num_tokens": 3588709.0, "step": 2003 }, { "epoch": 0.32450813699295605, "grad_norm": 20.02543830871582, "learning_rate": 6.756800518134715e-06, "loss": 0.5884, "mean_token_accuracy": 0.9172143638134003, "num_tokens": 3590497.0, "step": 2004 }, { "epoch": 0.32467006720103636, "grad_norm": 23.4384822845459, "learning_rate": 6.755181347150259e-06, "loss": 0.6238, "mean_token_accuracy": 0.9175007045269012, "num_tokens": 3592275.0, "step": 2005 }, { "epoch": 0.32483199740911667, "grad_norm": 28.369173049926758, "learning_rate": 6.753562176165803e-06, "loss": 0.6712, "mean_token_accuracy": 0.902777761220932, "num_tokens": 3594075.0, "step": 2006 }, { "epoch": 0.324993927617197, "grad_norm": 20.84299087524414, "learning_rate": 6.751943005181347e-06, "loss": 0.6576, "mean_token_accuracy": 0.9120141863822937, "num_tokens": 3595872.0, "step": 2007 }, { "epoch": 0.3251558578252773, "grad_norm": 27.899316787719727, "learning_rate": 6.750323834196891e-06, "loss": 0.8756, "mean_token_accuracy": 0.9035714268684387, "num_tokens": 3597664.0, "step": 2008 }, { "epoch": 0.32531778803335765, "grad_norm": 28.193729400634766, "learning_rate": 6.748704663212435e-06, "loss": 0.6159, "mean_token_accuracy": 0.9147864580154419, "num_tokens": 3599458.0, "step": 2009 }, { "epoch": 0.32547971824143795, "grad_norm": 21.17216682434082, "learning_rate": 6.747085492227979e-06, "loss": 0.6979, "mean_token_accuracy": 0.9143323600292206, "num_tokens": 3601250.0, "step": 2010 }, { "epoch": 0.32564164844951826, "grad_norm": 20.555675506591797, "learning_rate": 6.745466321243524e-06, "loss": 0.6367, "mean_token_accuracy": 0.9260968863964081, "num_tokens": 3603046.0, "step": 2011 }, { "epoch": 0.32580357865759857, "grad_norm": 25.259862899780273, "learning_rate": 6.743847150259067e-06, "loss": 0.7043, "mean_token_accuracy": 0.9092986583709717, "num_tokens": 3604834.0, "step": 2012 }, { "epoch": 0.3259655088656789, "grad_norm": 24.507728576660156, "learning_rate": 6.742227979274612e-06, "loss": 0.7654, "mean_token_accuracy": 0.8953706622123718, "num_tokens": 3606623.0, "step": 2013 }, { "epoch": 0.3261274390737592, "grad_norm": 24.899192810058594, "learning_rate": 6.7406088082901554e-06, "loss": 0.841, "mean_token_accuracy": 0.9067947864532471, "num_tokens": 3608403.0, "step": 2014 }, { "epoch": 0.32628936928183955, "grad_norm": 15.977191925048828, "learning_rate": 6.7389896373057e-06, "loss": 0.5683, "mean_token_accuracy": 0.9286187887191772, "num_tokens": 3610195.0, "step": 2015 }, { "epoch": 0.32645129948991986, "grad_norm": 17.4147891998291, "learning_rate": 6.7373704663212435e-06, "loss": 0.597, "mean_token_accuracy": 0.9253731369972229, "num_tokens": 3611975.0, "step": 2016 }, { "epoch": 0.32661322969800016, "grad_norm": 24.433765411376953, "learning_rate": 6.735751295336788e-06, "loss": 0.8006, "mean_token_accuracy": 0.8985491693019867, "num_tokens": 3613762.0, "step": 2017 }, { "epoch": 0.32677515990608047, "grad_norm": 25.415340423583984, "learning_rate": 6.7341321243523315e-06, "loss": 0.7494, "mean_token_accuracy": 0.8961718380451202, "num_tokens": 3615549.0, "step": 2018 }, { "epoch": 0.3269370901141608, "grad_norm": 28.617385864257812, "learning_rate": 6.732512953367876e-06, "loss": 0.7518, "mean_token_accuracy": 0.8899040818214417, "num_tokens": 3617350.0, "step": 2019 }, { "epoch": 0.32709902032224114, "grad_norm": 24.829530715942383, "learning_rate": 6.7308937823834195e-06, "loss": 0.8476, "mean_token_accuracy": 0.8891937732696533, "num_tokens": 3619132.0, "step": 2020 }, { "epoch": 0.32726095053032145, "grad_norm": 26.703296661376953, "learning_rate": 6.729274611398964e-06, "loss": 0.7448, "mean_token_accuracy": 0.905139833688736, "num_tokens": 3620926.0, "step": 2021 }, { "epoch": 0.32742288073840176, "grad_norm": 31.412715911865234, "learning_rate": 6.7276554404145076e-06, "loss": 0.8586, "mean_token_accuracy": 0.8864319622516632, "num_tokens": 3622728.0, "step": 2022 }, { "epoch": 0.32758481094648206, "grad_norm": 16.676950454711914, "learning_rate": 6.726036269430052e-06, "loss": 0.5005, "mean_token_accuracy": 0.9283071458339691, "num_tokens": 3624519.0, "step": 2023 }, { "epoch": 0.32774674115456237, "grad_norm": 26.585298538208008, "learning_rate": 6.724417098445596e-06, "loss": 0.7084, "mean_token_accuracy": 0.8922753632068634, "num_tokens": 3626319.0, "step": 2024 }, { "epoch": 0.3279086713626427, "grad_norm": 16.874208450317383, "learning_rate": 6.72279792746114e-06, "loss": 0.6067, "mean_token_accuracy": 0.9233440160751343, "num_tokens": 3628118.0, "step": 2025 }, { "epoch": 0.32807060157072304, "grad_norm": 23.043498992919922, "learning_rate": 6.721178756476684e-06, "loss": 0.7078, "mean_token_accuracy": 0.9078603386878967, "num_tokens": 3629901.0, "step": 2026 }, { "epoch": 0.32823253177880335, "grad_norm": 25.120412826538086, "learning_rate": 6.719559585492228e-06, "loss": 0.9259, "mean_token_accuracy": 0.896541953086853, "num_tokens": 3631704.0, "step": 2027 }, { "epoch": 0.32839446198688366, "grad_norm": 20.261638641357422, "learning_rate": 6.717940414507773e-06, "loss": 0.5755, "mean_token_accuracy": 0.9192405343055725, "num_tokens": 3633488.0, "step": 2028 }, { "epoch": 0.32855639219496396, "grad_norm": 27.570838928222656, "learning_rate": 6.716321243523317e-06, "loss": 0.76, "mean_token_accuracy": 0.8926311731338501, "num_tokens": 3635280.0, "step": 2029 }, { "epoch": 0.32871832240304427, "grad_norm": 17.54373550415039, "learning_rate": 6.714702072538861e-06, "loss": 0.5781, "mean_token_accuracy": 0.9218370020389557, "num_tokens": 3637061.0, "step": 2030 }, { "epoch": 0.3288802526111246, "grad_norm": 22.089427947998047, "learning_rate": 6.713082901554405e-06, "loss": 0.7107, "mean_token_accuracy": 0.9107623100280762, "num_tokens": 3638843.0, "step": 2031 }, { "epoch": 0.32904218281920494, "grad_norm": 27.783069610595703, "learning_rate": 6.711463730569949e-06, "loss": 0.8794, "mean_token_accuracy": 0.8858237564563751, "num_tokens": 3640644.0, "step": 2032 }, { "epoch": 0.32920411302728525, "grad_norm": 24.617048263549805, "learning_rate": 6.709844559585493e-06, "loss": 0.7112, "mean_token_accuracy": 0.8977023065090179, "num_tokens": 3642439.0, "step": 2033 }, { "epoch": 0.32936604323536556, "grad_norm": 16.71503257751465, "learning_rate": 6.708225388601037e-06, "loss": 0.5286, "mean_token_accuracy": 0.9290726780891418, "num_tokens": 3644219.0, "step": 2034 }, { "epoch": 0.32952797344344587, "grad_norm": 23.588836669921875, "learning_rate": 6.706606217616581e-06, "loss": 0.6035, "mean_token_accuracy": 0.9159002006053925, "num_tokens": 3646017.0, "step": 2035 }, { "epoch": 0.3296899036515262, "grad_norm": 18.560108184814453, "learning_rate": 6.7049870466321254e-06, "loss": 0.613, "mean_token_accuracy": 0.9233440160751343, "num_tokens": 3647816.0, "step": 2036 }, { "epoch": 0.32985183385960654, "grad_norm": 26.22181510925293, "learning_rate": 6.70336787564767e-06, "loss": 0.8454, "mean_token_accuracy": 0.8948275744915009, "num_tokens": 3649613.0, "step": 2037 }, { "epoch": 0.33001376406768684, "grad_norm": 18.475557327270508, "learning_rate": 6.7017487046632135e-06, "loss": 0.554, "mean_token_accuracy": 0.9198883175849915, "num_tokens": 3651412.0, "step": 2038 }, { "epoch": 0.33017569427576715, "grad_norm": 18.607297897338867, "learning_rate": 6.700129533678758e-06, "loss": 0.5602, "mean_token_accuracy": 0.9221243560314178, "num_tokens": 3653206.0, "step": 2039 }, { "epoch": 0.33033762448384746, "grad_norm": 20.52931022644043, "learning_rate": 6.6985103626943015e-06, "loss": 0.7029, "mean_token_accuracy": 0.9153079688549042, "num_tokens": 3655000.0, "step": 2040 }, { "epoch": 0.33049955469192777, "grad_norm": 25.807109832763672, "learning_rate": 6.696891191709846e-06, "loss": 0.7485, "mean_token_accuracy": 0.893869936466217, "num_tokens": 3656786.0, "step": 2041 }, { "epoch": 0.3306614849000081, "grad_norm": 22.523910522460938, "learning_rate": 6.6952720207253895e-06, "loss": 0.7192, "mean_token_accuracy": 0.9076103568077087, "num_tokens": 3658579.0, "step": 2042 }, { "epoch": 0.33082341510808844, "grad_norm": 31.554567337036133, "learning_rate": 6.693652849740934e-06, "loss": 0.6749, "mean_token_accuracy": 0.9000000059604645, "num_tokens": 3660371.0, "step": 2043 }, { "epoch": 0.33098534531616874, "grad_norm": 19.515472412109375, "learning_rate": 6.6920336787564775e-06, "loss": 0.6109, "mean_token_accuracy": 0.9102682769298553, "num_tokens": 3662162.0, "step": 2044 }, { "epoch": 0.33114727552424905, "grad_norm": 24.487789154052734, "learning_rate": 6.690414507772022e-06, "loss": 0.8155, "mean_token_accuracy": 0.8914916217327118, "num_tokens": 3663950.0, "step": 2045 }, { "epoch": 0.33130920573232936, "grad_norm": 18.697772979736328, "learning_rate": 6.6887953367875656e-06, "loss": 0.5862, "mean_token_accuracy": 0.925220400094986, "num_tokens": 3665743.0, "step": 2046 }, { "epoch": 0.33147113594040967, "grad_norm": 13.087461471557617, "learning_rate": 6.68717616580311e-06, "loss": 0.5326, "mean_token_accuracy": 0.927003413438797, "num_tokens": 3667529.0, "step": 2047 }, { "epoch": 0.33163306614849, "grad_norm": 24.593027114868164, "learning_rate": 6.685556994818654e-06, "loss": 0.6975, "mean_token_accuracy": 0.911445826292038, "num_tokens": 3669324.0, "step": 2048 }, { "epoch": 0.33179499635657034, "grad_norm": 20.102643966674805, "learning_rate": 6.683937823834198e-06, "loss": 0.5943, "mean_token_accuracy": 0.925000011920929, "num_tokens": 3671115.0, "step": 2049 }, { "epoch": 0.33195692656465065, "grad_norm": 25.548480987548828, "learning_rate": 6.682318652849742e-06, "loss": 0.7083, "mean_token_accuracy": 0.89896559715271, "num_tokens": 3672914.0, "step": 2050 }, { "epoch": 0.33211885677273095, "grad_norm": 26.462007522583008, "learning_rate": 6.680699481865286e-06, "loss": 0.8313, "mean_token_accuracy": 0.8949579894542694, "num_tokens": 3674702.0, "step": 2051 }, { "epoch": 0.33228078698081126, "grad_norm": 18.874984741210938, "learning_rate": 6.67908031088083e-06, "loss": 0.6242, "mean_token_accuracy": 0.91366907954216, "num_tokens": 3676492.0, "step": 2052 }, { "epoch": 0.33244271718889157, "grad_norm": 26.781715393066406, "learning_rate": 6.677461139896374e-06, "loss": 0.8172, "mean_token_accuracy": 0.8842191100120544, "num_tokens": 3678297.0, "step": 2053 }, { "epoch": 0.33260464739697193, "grad_norm": 21.541614532470703, "learning_rate": 6.675841968911918e-06, "loss": 0.6176, "mean_token_accuracy": 0.9157900214195251, "num_tokens": 3680082.0, "step": 2054 }, { "epoch": 0.33276657760505224, "grad_norm": 15.959024429321289, "learning_rate": 6.674222797927462e-06, "loss": 0.5471, "mean_token_accuracy": 0.9268921315670013, "num_tokens": 3681867.0, "step": 2055 }, { "epoch": 0.33292850781313255, "grad_norm": 18.403867721557617, "learning_rate": 6.6726036269430065e-06, "loss": 0.6644, "mean_token_accuracy": 0.9185185432434082, "num_tokens": 3683649.0, "step": 2056 }, { "epoch": 0.33309043802121285, "grad_norm": 17.219331741333008, "learning_rate": 6.67098445595855e-06, "loss": 0.6022, "mean_token_accuracy": 0.9277708232402802, "num_tokens": 3685438.0, "step": 2057 }, { "epoch": 0.33325236822929316, "grad_norm": 25.002901077270508, "learning_rate": 6.6693652849740946e-06, "loss": 0.7181, "mean_token_accuracy": 0.9075932800769806, "num_tokens": 3687231.0, "step": 2058 }, { "epoch": 0.33341429843737347, "grad_norm": 17.4008846282959, "learning_rate": 6.667746113989638e-06, "loss": 0.5938, "mean_token_accuracy": 0.9212316572666168, "num_tokens": 3689023.0, "step": 2059 }, { "epoch": 0.33357622864545383, "grad_norm": 20.410114288330078, "learning_rate": 6.666126943005183e-06, "loss": 0.6496, "mean_token_accuracy": 0.9204900860786438, "num_tokens": 3690812.0, "step": 2060 }, { "epoch": 0.33373815885353414, "grad_norm": 22.921175003051758, "learning_rate": 6.664507772020726e-06, "loss": 0.6911, "mean_token_accuracy": 0.9034899771213531, "num_tokens": 3692604.0, "step": 2061 }, { "epoch": 0.33390008906161445, "grad_norm": 19.56877326965332, "learning_rate": 6.662888601036271e-06, "loss": 0.6567, "mean_token_accuracy": 0.9231182336807251, "num_tokens": 3694389.0, "step": 2062 }, { "epoch": 0.33406201926969475, "grad_norm": 25.663497924804688, "learning_rate": 6.661269430051814e-06, "loss": 0.7977, "mean_token_accuracy": 0.9108623564243317, "num_tokens": 3696179.0, "step": 2063 }, { "epoch": 0.33422394947777506, "grad_norm": 21.264484405517578, "learning_rate": 6.659650259067359e-06, "loss": 0.5346, "mean_token_accuracy": 0.9215896725654602, "num_tokens": 3697971.0, "step": 2064 }, { "epoch": 0.33438587968585537, "grad_norm": 21.311599731445312, "learning_rate": 6.658031088082902e-06, "loss": 0.5876, "mean_token_accuracy": 0.9121031761169434, "num_tokens": 3699767.0, "step": 2065 }, { "epoch": 0.33454780989393573, "grad_norm": 28.692983627319336, "learning_rate": 6.656411917098447e-06, "loss": 0.7161, "mean_token_accuracy": 0.9006994962692261, "num_tokens": 3701570.0, "step": 2066 }, { "epoch": 0.33470974010201604, "grad_norm": 23.254192352294922, "learning_rate": 6.65479274611399e-06, "loss": 0.7947, "mean_token_accuracy": 0.8967473804950714, "num_tokens": 3703363.0, "step": 2067 }, { "epoch": 0.33487167031009635, "grad_norm": 26.036184310913086, "learning_rate": 6.653173575129535e-06, "loss": 0.7015, "mean_token_accuracy": 0.9035947918891907, "num_tokens": 3705155.0, "step": 2068 }, { "epoch": 0.33503360051817666, "grad_norm": 22.794485092163086, "learning_rate": 6.651554404145078e-06, "loss": 0.7085, "mean_token_accuracy": 0.9043233096599579, "num_tokens": 3706940.0, "step": 2069 }, { "epoch": 0.33519553072625696, "grad_norm": 21.834421157836914, "learning_rate": 6.649935233160623e-06, "loss": 0.6725, "mean_token_accuracy": 0.9057921469211578, "num_tokens": 3708728.0, "step": 2070 }, { "epoch": 0.3353574609343373, "grad_norm": 25.597959518432617, "learning_rate": 6.648316062176166e-06, "loss": 0.713, "mean_token_accuracy": 0.9058879911899567, "num_tokens": 3710514.0, "step": 2071 }, { "epoch": 0.33551939114241763, "grad_norm": 15.884485244750977, "learning_rate": 6.646696891191711e-06, "loss": 0.5761, "mean_token_accuracy": 0.9224588871002197, "num_tokens": 3712297.0, "step": 2072 }, { "epoch": 0.33568132135049794, "grad_norm": 21.887197494506836, "learning_rate": 6.645077720207254e-06, "loss": 0.7276, "mean_token_accuracy": 0.8964285850524902, "num_tokens": 3714089.0, "step": 2073 }, { "epoch": 0.33584325155857825, "grad_norm": 16.8392276763916, "learning_rate": 6.643458549222799e-06, "loss": 0.6167, "mean_token_accuracy": 0.9265734255313873, "num_tokens": 3715887.0, "step": 2074 }, { "epoch": 0.33600518176665856, "grad_norm": 18.458192825317383, "learning_rate": 6.641839378238343e-06, "loss": 0.656, "mean_token_accuracy": 0.9099322259426117, "num_tokens": 3717676.0, "step": 2075 }, { "epoch": 0.33616711197473886, "grad_norm": 21.561832427978516, "learning_rate": 6.640220207253887e-06, "loss": 0.5217, "mean_token_accuracy": 0.9246582388877869, "num_tokens": 3719466.0, "step": 2076 }, { "epoch": 0.3363290421828192, "grad_norm": 16.147790908813477, "learning_rate": 6.638601036269431e-06, "loss": 0.5606, "mean_token_accuracy": 0.9244604408740997, "num_tokens": 3721256.0, "step": 2077 }, { "epoch": 0.33649097239089953, "grad_norm": 27.431909561157227, "learning_rate": 6.636981865284975e-06, "loss": 0.7874, "mean_token_accuracy": 0.9035714268684387, "num_tokens": 3723048.0, "step": 2078 }, { "epoch": 0.33665290259897984, "grad_norm": 23.539583206176758, "learning_rate": 6.635362694300519e-06, "loss": 0.636, "mean_token_accuracy": 0.9092437028884888, "num_tokens": 3724836.0, "step": 2079 }, { "epoch": 0.33681483280706015, "grad_norm": 30.017309188842773, "learning_rate": 6.633743523316063e-06, "loss": 0.7121, "mean_token_accuracy": 0.8976097106933594, "num_tokens": 3726641.0, "step": 2080 }, { "epoch": 0.33697676301514046, "grad_norm": 15.944644927978516, "learning_rate": 6.632124352331607e-06, "loss": 0.6127, "mean_token_accuracy": 0.9175926148891449, "num_tokens": 3728420.0, "step": 2081 }, { "epoch": 0.33713869322322076, "grad_norm": 16.257055282592773, "learning_rate": 6.630505181347151e-06, "loss": 0.5104, "mean_token_accuracy": 0.9328828752040863, "num_tokens": 3730215.0, "step": 2082 }, { "epoch": 0.3373006234313011, "grad_norm": 23.728219985961914, "learning_rate": 6.628886010362695e-06, "loss": 0.598, "mean_token_accuracy": 0.909731537103653, "num_tokens": 3732016.0, "step": 2083 }, { "epoch": 0.33746255363938144, "grad_norm": 24.099153518676758, "learning_rate": 6.627266839378239e-06, "loss": 0.6714, "mean_token_accuracy": 0.9021464586257935, "num_tokens": 3733804.0, "step": 2084 }, { "epoch": 0.33762448384746174, "grad_norm": 24.35784912109375, "learning_rate": 6.625647668393783e-06, "loss": 0.6357, "mean_token_accuracy": 0.9089610874652863, "num_tokens": 3735591.0, "step": 2085 }, { "epoch": 0.33778641405554205, "grad_norm": 25.602277755737305, "learning_rate": 6.624028497409327e-06, "loss": 0.6798, "mean_token_accuracy": 0.9007353186607361, "num_tokens": 3737375.0, "step": 2086 }, { "epoch": 0.33794834426362236, "grad_norm": 26.737590789794922, "learning_rate": 6.622409326424871e-06, "loss": 0.6938, "mean_token_accuracy": 0.900648832321167, "num_tokens": 3739168.0, "step": 2087 }, { "epoch": 0.3381102744717027, "grad_norm": 15.450510025024414, "learning_rate": 6.620790155440415e-06, "loss": 0.5095, "mean_token_accuracy": 0.9195210933685303, "num_tokens": 3740953.0, "step": 2088 }, { "epoch": 0.33827220467978303, "grad_norm": 25.912057876586914, "learning_rate": 6.619170984455959e-06, "loss": 0.7243, "mean_token_accuracy": 0.9024069905281067, "num_tokens": 3742741.0, "step": 2089 }, { "epoch": 0.33843413488786334, "grad_norm": 19.65656852722168, "learning_rate": 6.617551813471503e-06, "loss": 0.7414, "mean_token_accuracy": 0.9061065912246704, "num_tokens": 3744530.0, "step": 2090 }, { "epoch": 0.33859606509594364, "grad_norm": 25.34670066833496, "learning_rate": 6.615932642487047e-06, "loss": 0.6934, "mean_token_accuracy": 0.8944444358348846, "num_tokens": 3746317.0, "step": 2091 }, { "epoch": 0.33875799530402395, "grad_norm": 18.848058700561523, "learning_rate": 6.614313471502591e-06, "loss": 0.5913, "mean_token_accuracy": 0.9214125573635101, "num_tokens": 3748109.0, "step": 2092 }, { "epoch": 0.33891992551210426, "grad_norm": 26.715635299682617, "learning_rate": 6.6126943005181354e-06, "loss": 0.7968, "mean_token_accuracy": 0.9032630920410156, "num_tokens": 3749900.0, "step": 2093 }, { "epoch": 0.3390818557201846, "grad_norm": 21.10277557373047, "learning_rate": 6.61107512953368e-06, "loss": 0.6999, "mean_token_accuracy": 0.9136128425598145, "num_tokens": 3751690.0, "step": 2094 }, { "epoch": 0.33924378592826493, "grad_norm": 20.28023338317871, "learning_rate": 6.6094559585492235e-06, "loss": 0.585, "mean_token_accuracy": 0.9159272611141205, "num_tokens": 3753487.0, "step": 2095 }, { "epoch": 0.33940571613634524, "grad_norm": 24.7639102935791, "learning_rate": 6.607836787564768e-06, "loss": 0.6634, "mean_token_accuracy": 0.901890754699707, "num_tokens": 3755275.0, "step": 2096 }, { "epoch": 0.33956764634442554, "grad_norm": 27.058752059936523, "learning_rate": 6.6062176165803115e-06, "loss": 0.7063, "mean_token_accuracy": 0.896896243095398, "num_tokens": 3757078.0, "step": 2097 }, { "epoch": 0.33972957655250585, "grad_norm": 19.066463470458984, "learning_rate": 6.604598445595856e-06, "loss": 0.5668, "mean_token_accuracy": 0.9209109842777252, "num_tokens": 3758868.0, "step": 2098 }, { "epoch": 0.3398915067605862, "grad_norm": 25.428539276123047, "learning_rate": 6.6029792746113995e-06, "loss": 0.6257, "mean_token_accuracy": 0.9128378331661224, "num_tokens": 3760668.0, "step": 2099 }, { "epoch": 0.3400534369686665, "grad_norm": 20.141963958740234, "learning_rate": 6.601360103626944e-06, "loss": 0.6578, "mean_token_accuracy": 0.9193500280380249, "num_tokens": 3762464.0, "step": 2100 }, { "epoch": 0.34021536717674683, "grad_norm": 24.149559020996094, "learning_rate": 6.5997409326424875e-06, "loss": 0.6452, "mean_token_accuracy": 0.9061971306800842, "num_tokens": 3764264.0, "step": 2101 }, { "epoch": 0.34037729738482714, "grad_norm": 20.161081314086914, "learning_rate": 6.598121761658032e-06, "loss": 0.6499, "mean_token_accuracy": 0.9059343636035919, "num_tokens": 3766052.0, "step": 2102 }, { "epoch": 0.34053922759290745, "grad_norm": 25.36886215209961, "learning_rate": 6.5965025906735756e-06, "loss": 0.6747, "mean_token_accuracy": 0.9120689630508423, "num_tokens": 3767849.0, "step": 2103 }, { "epoch": 0.34070115780098775, "grad_norm": 25.93685531616211, "learning_rate": 6.59488341968912e-06, "loss": 0.6519, "mean_token_accuracy": 0.9087276458740234, "num_tokens": 3769635.0, "step": 2104 }, { "epoch": 0.3408630880090681, "grad_norm": 32.673309326171875, "learning_rate": 6.593264248704664e-06, "loss": 0.8867, "mean_token_accuracy": 0.8947907388210297, "num_tokens": 3771423.0, "step": 2105 }, { "epoch": 0.3410250182171484, "grad_norm": 24.944774627685547, "learning_rate": 6.591645077720208e-06, "loss": 0.6735, "mean_token_accuracy": 0.8978102207183838, "num_tokens": 3773209.0, "step": 2106 }, { "epoch": 0.34118694842522873, "grad_norm": 24.814537048339844, "learning_rate": 6.590025906735752e-06, "loss": 0.6365, "mean_token_accuracy": 0.9089886546134949, "num_tokens": 3775007.0, "step": 2107 }, { "epoch": 0.34134887863330904, "grad_norm": 16.94260597229004, "learning_rate": 6.588406735751296e-06, "loss": 0.5809, "mean_token_accuracy": 0.9143631160259247, "num_tokens": 3776788.0, "step": 2108 }, { "epoch": 0.34151080884138935, "grad_norm": 18.645910263061523, "learning_rate": 6.58678756476684e-06, "loss": 0.6291, "mean_token_accuracy": 0.9120418429374695, "num_tokens": 3778584.0, "step": 2109 }, { "epoch": 0.34167273904946965, "grad_norm": 22.338497161865234, "learning_rate": 6.585168393782384e-06, "loss": 0.5211, "mean_token_accuracy": 0.9230088293552399, "num_tokens": 3780369.0, "step": 2110 }, { "epoch": 0.34183466925755, "grad_norm": 27.6822509765625, "learning_rate": 6.583549222797928e-06, "loss": 0.7036, "mean_token_accuracy": 0.9130434989929199, "num_tokens": 3782157.0, "step": 2111 }, { "epoch": 0.3419965994656303, "grad_norm": 17.167701721191406, "learning_rate": 6.581930051813472e-06, "loss": 0.5243, "mean_token_accuracy": 0.9244965612888336, "num_tokens": 3783947.0, "step": 2112 }, { "epoch": 0.34215852967371063, "grad_norm": 18.025650024414062, "learning_rate": 6.5803108808290166e-06, "loss": 0.5194, "mean_token_accuracy": 0.9195785224437714, "num_tokens": 3785732.0, "step": 2113 }, { "epoch": 0.34232045988179094, "grad_norm": 24.95975112915039, "learning_rate": 6.57869170984456e-06, "loss": 0.6442, "mean_token_accuracy": 0.9143518507480621, "num_tokens": 3787523.0, "step": 2114 }, { "epoch": 0.34248239008987125, "grad_norm": 19.770832061767578, "learning_rate": 6.577072538860105e-06, "loss": 0.553, "mean_token_accuracy": 0.9280538260936737, "num_tokens": 3789313.0, "step": 2115 }, { "epoch": 0.3426443202979516, "grad_norm": 31.38514518737793, "learning_rate": 6.575453367875648e-06, "loss": 0.78, "mean_token_accuracy": 0.8878780007362366, "num_tokens": 3791102.0, "step": 2116 }, { "epoch": 0.3428062505060319, "grad_norm": 29.965070724487305, "learning_rate": 6.573834196891193e-06, "loss": 0.6884, "mean_token_accuracy": 0.8893055617809296, "num_tokens": 3792894.0, "step": 2117 }, { "epoch": 0.3429681807141122, "grad_norm": 24.24552345275879, "learning_rate": 6.572215025906736e-06, "loss": 0.7219, "mean_token_accuracy": 0.8999391794204712, "num_tokens": 3794675.0, "step": 2118 }, { "epoch": 0.34313011092219253, "grad_norm": 18.665618896484375, "learning_rate": 6.570595854922281e-06, "loss": 0.5663, "mean_token_accuracy": 0.9210539758205414, "num_tokens": 3796465.0, "step": 2119 }, { "epoch": 0.34329204113027284, "grad_norm": 24.936668395996094, "learning_rate": 6.568976683937824e-06, "loss": 0.6943, "mean_token_accuracy": 0.9131924510002136, "num_tokens": 3798266.0, "step": 2120 }, { "epoch": 0.34345397133835315, "grad_norm": 27.47084617614746, "learning_rate": 6.567357512953369e-06, "loss": 0.8474, "mean_token_accuracy": 0.8929530084133148, "num_tokens": 3800067.0, "step": 2121 }, { "epoch": 0.3436159015464335, "grad_norm": 24.947195053100586, "learning_rate": 6.565738341968912e-06, "loss": 0.5777, "mean_token_accuracy": 0.9059751033782959, "num_tokens": 3801864.0, "step": 2122 }, { "epoch": 0.3437778317545138, "grad_norm": 21.40584373474121, "learning_rate": 6.564119170984457e-06, "loss": 0.6048, "mean_token_accuracy": 0.9168370068073273, "num_tokens": 3803663.0, "step": 2123 }, { "epoch": 0.3439397619625941, "grad_norm": 20.94043731689453, "learning_rate": 6.5625e-06, "loss": 0.6664, "mean_token_accuracy": 0.9150566458702087, "num_tokens": 3805434.0, "step": 2124 }, { "epoch": 0.34410169217067443, "grad_norm": 26.31580352783203, "learning_rate": 6.560880829015545e-06, "loss": 0.6515, "mean_token_accuracy": 0.90427565574646, "num_tokens": 3807228.0, "step": 2125 }, { "epoch": 0.34426362237875474, "grad_norm": 21.0064754486084, "learning_rate": 6.559261658031088e-06, "loss": 0.6781, "mean_token_accuracy": 0.913382351398468, "num_tokens": 3809017.0, "step": 2126 }, { "epoch": 0.34442555258683505, "grad_norm": 30.202377319335938, "learning_rate": 6.557642487046633e-06, "loss": 0.7601, "mean_token_accuracy": 0.9010576605796814, "num_tokens": 3810822.0, "step": 2127 }, { "epoch": 0.3445874827949154, "grad_norm": 23.299419403076172, "learning_rate": 6.556023316062176e-06, "loss": 0.5992, "mean_token_accuracy": 0.9135643839836121, "num_tokens": 3812613.0, "step": 2128 }, { "epoch": 0.3447494130029957, "grad_norm": 21.97918701171875, "learning_rate": 6.554404145077721e-06, "loss": 0.5576, "mean_token_accuracy": 0.919034868478775, "num_tokens": 3814409.0, "step": 2129 }, { "epoch": 0.344911343211076, "grad_norm": 25.32720947265625, "learning_rate": 6.552784974093264e-06, "loss": 0.743, "mean_token_accuracy": 0.9148935973644257, "num_tokens": 3816203.0, "step": 2130 }, { "epoch": 0.34507327341915633, "grad_norm": 26.97684669494629, "learning_rate": 6.551165803108809e-06, "loss": 0.7316, "mean_token_accuracy": 0.9024765491485596, "num_tokens": 3817992.0, "step": 2131 }, { "epoch": 0.34523520362723664, "grad_norm": 26.395828247070312, "learning_rate": 6.549546632124353e-06, "loss": 0.6384, "mean_token_accuracy": 0.9059554934501648, "num_tokens": 3819791.0, "step": 2132 }, { "epoch": 0.345397133835317, "grad_norm": 26.689434051513672, "learning_rate": 6.547927461139897e-06, "loss": 0.6337, "mean_token_accuracy": 0.9123508334159851, "num_tokens": 3821577.0, "step": 2133 }, { "epoch": 0.3455590640433973, "grad_norm": 16.77696990966797, "learning_rate": 6.546308290155441e-06, "loss": 0.5896, "mean_token_accuracy": 0.9193372428417206, "num_tokens": 3823360.0, "step": 2134 }, { "epoch": 0.3457209942514776, "grad_norm": 21.114429473876953, "learning_rate": 6.544689119170985e-06, "loss": 0.6127, "mean_token_accuracy": 0.9181883335113525, "num_tokens": 3825152.0, "step": 2135 }, { "epoch": 0.3458829244595579, "grad_norm": 24.115955352783203, "learning_rate": 6.543069948186529e-06, "loss": 0.5741, "mean_token_accuracy": 0.9202331602573395, "num_tokens": 3826940.0, "step": 2136 }, { "epoch": 0.34604485466763824, "grad_norm": 26.51895523071289, "learning_rate": 6.541450777202073e-06, "loss": 0.7621, "mean_token_accuracy": 0.8995098173618317, "num_tokens": 3828732.0, "step": 2137 }, { "epoch": 0.34620678487571854, "grad_norm": 29.941570281982422, "learning_rate": 6.539831606217617e-06, "loss": 0.9387, "mean_token_accuracy": 0.8849684298038483, "num_tokens": 3830531.0, "step": 2138 }, { "epoch": 0.3463687150837989, "grad_norm": 17.610179901123047, "learning_rate": 6.538212435233161e-06, "loss": 0.5196, "mean_token_accuracy": 0.919669508934021, "num_tokens": 3832317.0, "step": 2139 }, { "epoch": 0.3465306452918792, "grad_norm": 21.39354705810547, "learning_rate": 6.536593264248705e-06, "loss": 0.6063, "mean_token_accuracy": 0.9195118546485901, "num_tokens": 3834125.0, "step": 2140 }, { "epoch": 0.3466925754999595, "grad_norm": 18.09645652770996, "learning_rate": 6.534974093264249e-06, "loss": 0.5855, "mean_token_accuracy": 0.911347508430481, "num_tokens": 3835919.0, "step": 2141 }, { "epoch": 0.34685450570803983, "grad_norm": 21.196632385253906, "learning_rate": 6.533354922279793e-06, "loss": 0.5496, "mean_token_accuracy": 0.917339563369751, "num_tokens": 3837709.0, "step": 2142 }, { "epoch": 0.34701643591612014, "grad_norm": 30.876632690429688, "learning_rate": 6.531735751295337e-06, "loss": 0.7879, "mean_token_accuracy": 0.8922254145145416, "num_tokens": 3839500.0, "step": 2143 }, { "epoch": 0.34717836612420044, "grad_norm": 22.115489959716797, "learning_rate": 6.530116580310881e-06, "loss": 0.6259, "mean_token_accuracy": 0.9021967649459839, "num_tokens": 3841288.0, "step": 2144 }, { "epoch": 0.3473402963322808, "grad_norm": 22.1717586517334, "learning_rate": 6.528497409326425e-06, "loss": 0.5928, "mean_token_accuracy": 0.9206287264823914, "num_tokens": 3843077.0, "step": 2145 }, { "epoch": 0.3475022265403611, "grad_norm": 15.205970764160156, "learning_rate": 6.526878238341969e-06, "loss": 0.6142, "mean_token_accuracy": 0.9209504723548889, "num_tokens": 3844867.0, "step": 2146 }, { "epoch": 0.3476641567484414, "grad_norm": 14.341293334960938, "learning_rate": 6.525259067357513e-06, "loss": 0.5297, "mean_token_accuracy": 0.930633544921875, "num_tokens": 3846653.0, "step": 2147 }, { "epoch": 0.34782608695652173, "grad_norm": 32.03675079345703, "learning_rate": 6.5236398963730574e-06, "loss": 0.8825, "mean_token_accuracy": 0.8916361033916473, "num_tokens": 3848451.0, "step": 2148 }, { "epoch": 0.34798801716460204, "grad_norm": 23.610078811645508, "learning_rate": 6.522020725388601e-06, "loss": 0.7015, "mean_token_accuracy": 0.9120039641857147, "num_tokens": 3850247.0, "step": 2149 }, { "epoch": 0.3481499473726824, "grad_norm": 24.427082061767578, "learning_rate": 6.5204015544041455e-06, "loss": 0.6618, "mean_token_accuracy": 0.9093185067176819, "num_tokens": 3852046.0, "step": 2150 }, { "epoch": 0.3483118775807627, "grad_norm": 28.468671798706055, "learning_rate": 6.51878238341969e-06, "loss": 0.618, "mean_token_accuracy": 0.9067831337451935, "num_tokens": 3853837.0, "step": 2151 }, { "epoch": 0.348473807788843, "grad_norm": 19.28902816772461, "learning_rate": 6.5171632124352335e-06, "loss": 0.6626, "mean_token_accuracy": 0.9191673398017883, "num_tokens": 3855621.0, "step": 2152 }, { "epoch": 0.3486357379969233, "grad_norm": 19.29517936706543, "learning_rate": 6.515544041450778e-06, "loss": 0.5749, "mean_token_accuracy": 0.9207557141780853, "num_tokens": 3857411.0, "step": 2153 }, { "epoch": 0.34879766820500363, "grad_norm": 21.733097076416016, "learning_rate": 6.5139248704663215e-06, "loss": 0.6338, "mean_token_accuracy": 0.9153417944908142, "num_tokens": 3859207.0, "step": 2154 }, { "epoch": 0.34895959841308394, "grad_norm": 16.114097595214844, "learning_rate": 6.512305699481866e-06, "loss": 0.4715, "mean_token_accuracy": 0.9319444596767426, "num_tokens": 3860998.0, "step": 2155 }, { "epoch": 0.3491215286211643, "grad_norm": 22.43841552734375, "learning_rate": 6.5106865284974095e-06, "loss": 0.6581, "mean_token_accuracy": 0.9116838276386261, "num_tokens": 3862793.0, "step": 2156 }, { "epoch": 0.3492834588292446, "grad_norm": 14.085238456726074, "learning_rate": 6.509067357512954e-06, "loss": 0.4634, "mean_token_accuracy": 0.9301860332489014, "num_tokens": 3864577.0, "step": 2157 }, { "epoch": 0.3494453890373249, "grad_norm": 29.33224105834961, "learning_rate": 6.5074481865284976e-06, "loss": 0.7893, "mean_token_accuracy": 0.8738393187522888, "num_tokens": 3866366.0, "step": 2158 }, { "epoch": 0.3496073192454052, "grad_norm": 21.894073486328125, "learning_rate": 6.505829015544042e-06, "loss": 0.6197, "mean_token_accuracy": 0.9097852110862732, "num_tokens": 3868155.0, "step": 2159 }, { "epoch": 0.34976924945348553, "grad_norm": 25.372777938842773, "learning_rate": 6.504209844559586e-06, "loss": 0.6815, "mean_token_accuracy": 0.9064182341098785, "num_tokens": 3869945.0, "step": 2160 }, { "epoch": 0.34993117966156584, "grad_norm": 31.39737892150879, "learning_rate": 6.50259067357513e-06, "loss": 0.7751, "mean_token_accuracy": 0.9008542001247406, "num_tokens": 3871749.0, "step": 2161 }, { "epoch": 0.3500931098696462, "grad_norm": 25.322107315063477, "learning_rate": 6.500971502590674e-06, "loss": 0.7988, "mean_token_accuracy": 0.8953647315502167, "num_tokens": 3873537.0, "step": 2162 }, { "epoch": 0.3502550400777265, "grad_norm": 23.296398162841797, "learning_rate": 6.499352331606218e-06, "loss": 0.6366, "mean_token_accuracy": 0.9176002144813538, "num_tokens": 3875328.0, "step": 2163 }, { "epoch": 0.3504169702858068, "grad_norm": 28.476030349731445, "learning_rate": 6.497733160621762e-06, "loss": 0.8945, "mean_token_accuracy": 0.902803361415863, "num_tokens": 3877124.0, "step": 2164 }, { "epoch": 0.3505789004938871, "grad_norm": 26.659847259521484, "learning_rate": 6.496113989637306e-06, "loss": 0.6791, "mean_token_accuracy": 0.901033878326416, "num_tokens": 3878919.0, "step": 2165 }, { "epoch": 0.35074083070196743, "grad_norm": 23.27151870727539, "learning_rate": 6.49449481865285e-06, "loss": 0.7501, "mean_token_accuracy": 0.8963005542755127, "num_tokens": 3880720.0, "step": 2166 }, { "epoch": 0.3509027609100478, "grad_norm": 21.743480682373047, "learning_rate": 6.492875647668394e-06, "loss": 0.6307, "mean_token_accuracy": 0.9157631993293762, "num_tokens": 3882505.0, "step": 2167 }, { "epoch": 0.3510646911181281, "grad_norm": 33.06528091430664, "learning_rate": 6.491256476683938e-06, "loss": 0.7314, "mean_token_accuracy": 0.8974468111991882, "num_tokens": 3884308.0, "step": 2168 }, { "epoch": 0.3512266213262084, "grad_norm": 21.615249633789062, "learning_rate": 6.489637305699482e-06, "loss": 0.5949, "mean_token_accuracy": 0.9198540151119232, "num_tokens": 3886107.0, "step": 2169 }, { "epoch": 0.3513885515342887, "grad_norm": 23.438983917236328, "learning_rate": 6.4880181347150266e-06, "loss": 0.6351, "mean_token_accuracy": 0.9104995429515839, "num_tokens": 3887898.0, "step": 2170 }, { "epoch": 0.351550481742369, "grad_norm": 24.982452392578125, "learning_rate": 6.48639896373057e-06, "loss": 0.5721, "mean_token_accuracy": 0.9085317552089691, "num_tokens": 3889694.0, "step": 2171 }, { "epoch": 0.35171241195044933, "grad_norm": 22.238332748413086, "learning_rate": 6.484779792746115e-06, "loss": 0.6205, "mean_token_accuracy": 0.9106506407260895, "num_tokens": 3891474.0, "step": 2172 }, { "epoch": 0.3518743421585297, "grad_norm": 19.65492057800293, "learning_rate": 6.483160621761658e-06, "loss": 0.5746, "mean_token_accuracy": 0.9171359837055206, "num_tokens": 3893263.0, "step": 2173 }, { "epoch": 0.35203627236661, "grad_norm": 28.740571975708008, "learning_rate": 6.481541450777203e-06, "loss": 0.6848, "mean_token_accuracy": 0.8981566727161407, "num_tokens": 3895070.0, "step": 2174 }, { "epoch": 0.3521982025746903, "grad_norm": 20.463491439819336, "learning_rate": 6.479922279792746e-06, "loss": 0.5577, "mean_token_accuracy": 0.9199904799461365, "num_tokens": 3896857.0, "step": 2175 }, { "epoch": 0.3523601327827706, "grad_norm": 28.632444381713867, "learning_rate": 6.478303108808291e-06, "loss": 0.6384, "mean_token_accuracy": 0.9181406795978546, "num_tokens": 3898648.0, "step": 2176 }, { "epoch": 0.3525220629908509, "grad_norm": 31.122787475585938, "learning_rate": 6.476683937823834e-06, "loss": 0.8832, "mean_token_accuracy": 0.8806885182857513, "num_tokens": 3900445.0, "step": 2177 }, { "epoch": 0.35268399319893123, "grad_norm": 39.111812591552734, "learning_rate": 6.475064766839379e-06, "loss": 1.1488, "mean_token_accuracy": 0.8923469483852386, "num_tokens": 3902244.0, "step": 2178 }, { "epoch": 0.3528459234070116, "grad_norm": 26.414106369018555, "learning_rate": 6.473445595854922e-06, "loss": 0.7062, "mean_token_accuracy": 0.9079371392726898, "num_tokens": 3904038.0, "step": 2179 }, { "epoch": 0.3530078536150919, "grad_norm": 16.560407638549805, "learning_rate": 6.471826424870467e-06, "loss": 0.5431, "mean_token_accuracy": 0.923355907201767, "num_tokens": 3905823.0, "step": 2180 }, { "epoch": 0.3531697838231722, "grad_norm": 30.784868240356445, "learning_rate": 6.47020725388601e-06, "loss": 0.7834, "mean_token_accuracy": 0.8981938660144806, "num_tokens": 3907620.0, "step": 2181 }, { "epoch": 0.3533317140312525, "grad_norm": 19.335500717163086, "learning_rate": 6.468588082901555e-06, "loss": 0.6107, "mean_token_accuracy": 0.9160934388637543, "num_tokens": 3909406.0, "step": 2182 }, { "epoch": 0.3534936442393328, "grad_norm": 21.249637603759766, "learning_rate": 6.466968911917098e-06, "loss": 0.5645, "mean_token_accuracy": 0.9219701290130615, "num_tokens": 3911200.0, "step": 2183 }, { "epoch": 0.3536555744474132, "grad_norm": 28.769323348999023, "learning_rate": 6.465349740932643e-06, "loss": 0.6755, "mean_token_accuracy": 0.8975040316581726, "num_tokens": 3912985.0, "step": 2184 }, { "epoch": 0.3538175046554935, "grad_norm": 19.6943416595459, "learning_rate": 6.463730569948186e-06, "loss": 0.6119, "mean_token_accuracy": 0.9160839319229126, "num_tokens": 3914783.0, "step": 2185 }, { "epoch": 0.3539794348635738, "grad_norm": 19.53598403930664, "learning_rate": 6.462111398963731e-06, "loss": 0.5575, "mean_token_accuracy": 0.9122383296489716, "num_tokens": 3916568.0, "step": 2186 }, { "epoch": 0.3541413650716541, "grad_norm": 26.573444366455078, "learning_rate": 6.460492227979274e-06, "loss": 0.8213, "mean_token_accuracy": 0.9058031737804413, "num_tokens": 3918357.0, "step": 2187 }, { "epoch": 0.3543032952797344, "grad_norm": 24.73661994934082, "learning_rate": 6.458873056994819e-06, "loss": 0.7473, "mean_token_accuracy": 0.9100933074951172, "num_tokens": 3920144.0, "step": 2188 }, { "epoch": 0.35446522548781473, "grad_norm": 24.69605255126953, "learning_rate": 6.457253886010363e-06, "loss": 0.782, "mean_token_accuracy": 0.8998858034610748, "num_tokens": 3921946.0, "step": 2189 }, { "epoch": 0.3546271556958951, "grad_norm": 17.774948120117188, "learning_rate": 6.455634715025907e-06, "loss": 0.5751, "mean_token_accuracy": 0.9215146005153656, "num_tokens": 3923738.0, "step": 2190 }, { "epoch": 0.3547890859039754, "grad_norm": 19.63977813720703, "learning_rate": 6.454015544041451e-06, "loss": 0.5557, "mean_token_accuracy": 0.9250841736793518, "num_tokens": 3925517.0, "step": 2191 }, { "epoch": 0.3549510161120557, "grad_norm": 21.64784812927246, "learning_rate": 6.452396373056995e-06, "loss": 0.5961, "mean_token_accuracy": 0.9041826725006104, "num_tokens": 3927303.0, "step": 2192 }, { "epoch": 0.355112946320136, "grad_norm": 25.56424331665039, "learning_rate": 6.450777202072539e-06, "loss": 0.7396, "mean_token_accuracy": 0.9089855253696442, "num_tokens": 3929103.0, "step": 2193 }, { "epoch": 0.3552748765282163, "grad_norm": 25.165279388427734, "learning_rate": 6.449158031088083e-06, "loss": 0.6583, "mean_token_accuracy": 0.901846170425415, "num_tokens": 3930890.0, "step": 2194 }, { "epoch": 0.35543680673629663, "grad_norm": 20.730913162231445, "learning_rate": 6.447538860103627e-06, "loss": 0.5908, "mean_token_accuracy": 0.9091269969940186, "num_tokens": 3932677.0, "step": 2195 }, { "epoch": 0.355598736944377, "grad_norm": 20.076528549194336, "learning_rate": 6.445919689119171e-06, "loss": 0.6286, "mean_token_accuracy": 0.9171116650104523, "num_tokens": 3934475.0, "step": 2196 }, { "epoch": 0.3557606671524573, "grad_norm": 22.78238868713379, "learning_rate": 6.444300518134715e-06, "loss": 0.5913, "mean_token_accuracy": 0.9063973128795624, "num_tokens": 3936265.0, "step": 2197 }, { "epoch": 0.3559225973605376, "grad_norm": 29.081985473632812, "learning_rate": 6.442681347150259e-06, "loss": 0.7617, "mean_token_accuracy": 0.8993506729602814, "num_tokens": 3938085.0, "step": 2198 }, { "epoch": 0.3560845275686179, "grad_norm": 24.60321807861328, "learning_rate": 6.441062176165803e-06, "loss": 0.7889, "mean_token_accuracy": 0.8968862593173981, "num_tokens": 3939876.0, "step": 2199 }, { "epoch": 0.3562464577766982, "grad_norm": 20.562898635864258, "learning_rate": 6.439443005181347e-06, "loss": 0.6586, "mean_token_accuracy": 0.9092437028884888, "num_tokens": 3941664.0, "step": 2200 }, { "epoch": 0.3564083879847786, "grad_norm": 24.693836212158203, "learning_rate": 6.437823834196891e-06, "loss": 0.6961, "mean_token_accuracy": 0.9002995491027832, "num_tokens": 3943447.0, "step": 2201 }, { "epoch": 0.3565703181928589, "grad_norm": 17.60163688659668, "learning_rate": 6.436204663212435e-06, "loss": 0.6268, "mean_token_accuracy": 0.9113799929618835, "num_tokens": 3945230.0, "step": 2202 }, { "epoch": 0.3567322484009392, "grad_norm": 17.729053497314453, "learning_rate": 6.434585492227979e-06, "loss": 0.6026, "mean_token_accuracy": 0.9117646813392639, "num_tokens": 3947014.0, "step": 2203 }, { "epoch": 0.3568941786090195, "grad_norm": 17.950485229492188, "learning_rate": 6.432966321243523e-06, "loss": 0.6298, "mean_token_accuracy": 0.9147887229919434, "num_tokens": 3948808.0, "step": 2204 }, { "epoch": 0.3570561088170998, "grad_norm": 20.892547607421875, "learning_rate": 6.4313471502590674e-06, "loss": 0.6421, "mean_token_accuracy": 0.9107877314090729, "num_tokens": 3950599.0, "step": 2205 }, { "epoch": 0.3572180390251801, "grad_norm": 28.4982852935791, "learning_rate": 6.429727979274611e-06, "loss": 0.7078, "mean_token_accuracy": 0.8985221683979034, "num_tokens": 3952396.0, "step": 2206 }, { "epoch": 0.3573799692332605, "grad_norm": 19.08439826965332, "learning_rate": 6.4281088082901555e-06, "loss": 0.6201, "mean_token_accuracy": 0.9081169068813324, "num_tokens": 3954180.0, "step": 2207 }, { "epoch": 0.3575418994413408, "grad_norm": 18.782014846801758, "learning_rate": 6.4264896373057e-06, "loss": 0.6324, "mean_token_accuracy": 0.9076087176799774, "num_tokens": 3955974.0, "step": 2208 }, { "epoch": 0.3577038296494211, "grad_norm": 17.24158477783203, "learning_rate": 6.4248704663212435e-06, "loss": 0.5923, "mean_token_accuracy": 0.9196909368038177, "num_tokens": 3957760.0, "step": 2209 }, { "epoch": 0.3578657598575014, "grad_norm": 19.374765396118164, "learning_rate": 6.423251295336788e-06, "loss": 0.5686, "mean_token_accuracy": 0.9154887795448303, "num_tokens": 3959556.0, "step": 2210 }, { "epoch": 0.3580276900655817, "grad_norm": 17.251399993896484, "learning_rate": 6.4216321243523315e-06, "loss": 0.5782, "mean_token_accuracy": 0.9157369434833527, "num_tokens": 3961342.0, "step": 2211 }, { "epoch": 0.3581896202736621, "grad_norm": 18.58675193786621, "learning_rate": 6.420012953367876e-06, "loss": 0.623, "mean_token_accuracy": 0.9122377634048462, "num_tokens": 3963127.0, "step": 2212 }, { "epoch": 0.3583515504817424, "grad_norm": 23.050987243652344, "learning_rate": 6.4183937823834196e-06, "loss": 0.6375, "mean_token_accuracy": 0.9127601683139801, "num_tokens": 3964915.0, "step": 2213 }, { "epoch": 0.3585134806898227, "grad_norm": 16.583791732788086, "learning_rate": 6.416774611398964e-06, "loss": 0.5465, "mean_token_accuracy": 0.919117659330368, "num_tokens": 3966699.0, "step": 2214 }, { "epoch": 0.358675410897903, "grad_norm": 18.57876968383789, "learning_rate": 6.415155440414508e-06, "loss": 0.5859, "mean_token_accuracy": 0.9094203114509583, "num_tokens": 3968487.0, "step": 2215 }, { "epoch": 0.3588373411059833, "grad_norm": 20.355701446533203, "learning_rate": 6.413536269430052e-06, "loss": 0.5909, "mean_token_accuracy": 0.9210199117660522, "num_tokens": 3970277.0, "step": 2216 }, { "epoch": 0.3589992713140636, "grad_norm": 20.74054527282715, "learning_rate": 6.411917098445596e-06, "loss": 0.6268, "mean_token_accuracy": 0.9163228571414948, "num_tokens": 3972064.0, "step": 2217 }, { "epoch": 0.359161201522144, "grad_norm": 19.709436416625977, "learning_rate": 6.41029792746114e-06, "loss": 0.6573, "mean_token_accuracy": 0.9106077551841736, "num_tokens": 3973855.0, "step": 2218 }, { "epoch": 0.3593231317302243, "grad_norm": 27.718469619750977, "learning_rate": 6.408678756476684e-06, "loss": 0.6875, "mean_token_accuracy": 0.9007360637187958, "num_tokens": 3975647.0, "step": 2219 }, { "epoch": 0.3594850619383046, "grad_norm": 21.05332374572754, "learning_rate": 6.407059585492228e-06, "loss": 0.6253, "mean_token_accuracy": 0.9156555533409119, "num_tokens": 3977432.0, "step": 2220 }, { "epoch": 0.3596469921463849, "grad_norm": 23.723247528076172, "learning_rate": 6.405440414507773e-06, "loss": 0.6236, "mean_token_accuracy": 0.9197080135345459, "num_tokens": 3979218.0, "step": 2221 }, { "epoch": 0.3598089223544652, "grad_norm": 22.432613372802734, "learning_rate": 6.403821243523317e-06, "loss": 0.6312, "mean_token_accuracy": 0.9116883277893066, "num_tokens": 3981002.0, "step": 2222 }, { "epoch": 0.3599708525625455, "grad_norm": 22.70639991760254, "learning_rate": 6.402202072538861e-06, "loss": 0.6073, "mean_token_accuracy": 0.9172360301017761, "num_tokens": 3982792.0, "step": 2223 }, { "epoch": 0.3601327827706259, "grad_norm": 20.778104782104492, "learning_rate": 6.400582901554405e-06, "loss": 0.618, "mean_token_accuracy": 0.9178501665592194, "num_tokens": 3984572.0, "step": 2224 }, { "epoch": 0.3602947129787062, "grad_norm": 17.046594619750977, "learning_rate": 6.398963730569949e-06, "loss": 0.524, "mean_token_accuracy": 0.9230892956256866, "num_tokens": 3986357.0, "step": 2225 }, { "epoch": 0.3604566431867865, "grad_norm": 24.413124084472656, "learning_rate": 6.397344559585493e-06, "loss": 0.681, "mean_token_accuracy": 0.9015937745571136, "num_tokens": 3988163.0, "step": 2226 }, { "epoch": 0.3606185733948668, "grad_norm": 23.731372833251953, "learning_rate": 6.3957253886010374e-06, "loss": 0.6578, "mean_token_accuracy": 0.9119867086410522, "num_tokens": 3989959.0, "step": 2227 }, { "epoch": 0.3607805036029471, "grad_norm": 26.18189239501953, "learning_rate": 6.394106217616581e-06, "loss": 0.6074, "mean_token_accuracy": 0.9135818481445312, "num_tokens": 3991748.0, "step": 2228 }, { "epoch": 0.3609424338110275, "grad_norm": 21.4954891204834, "learning_rate": 6.3924870466321255e-06, "loss": 0.571, "mean_token_accuracy": 0.9179131388664246, "num_tokens": 3993539.0, "step": 2229 }, { "epoch": 0.3611043640191078, "grad_norm": 25.930837631225586, "learning_rate": 6.390867875647669e-06, "loss": 0.743, "mean_token_accuracy": 0.9045799672603607, "num_tokens": 3995334.0, "step": 2230 }, { "epoch": 0.3612662942271881, "grad_norm": 29.53824806213379, "learning_rate": 6.3892487046632135e-06, "loss": 0.6814, "mean_token_accuracy": 0.9051370322704315, "num_tokens": 3997129.0, "step": 2231 }, { "epoch": 0.3614282244352684, "grad_norm": 25.533300399780273, "learning_rate": 6.387629533678757e-06, "loss": 0.6064, "mean_token_accuracy": 0.9019704461097717, "num_tokens": 3998926.0, "step": 2232 }, { "epoch": 0.3615901546433487, "grad_norm": 27.277572631835938, "learning_rate": 6.3860103626943015e-06, "loss": 0.7397, "mean_token_accuracy": 0.8994092047214508, "num_tokens": 4000716.0, "step": 2233 }, { "epoch": 0.361752084851429, "grad_norm": 24.49983787536621, "learning_rate": 6.384391191709846e-06, "loss": 0.6236, "mean_token_accuracy": 0.9057773351669312, "num_tokens": 4002504.0, "step": 2234 }, { "epoch": 0.3619140150595094, "grad_norm": 25.49953269958496, "learning_rate": 6.3827720207253895e-06, "loss": 0.6405, "mean_token_accuracy": 0.9103453755378723, "num_tokens": 4004295.0, "step": 2235 }, { "epoch": 0.3620759452675897, "grad_norm": 29.299388885498047, "learning_rate": 6.381152849740934e-06, "loss": 0.8793, "mean_token_accuracy": 0.9023735225200653, "num_tokens": 4006084.0, "step": 2236 }, { "epoch": 0.36223787547567, "grad_norm": 22.609750747680664, "learning_rate": 6.3795336787564776e-06, "loss": 0.6354, "mean_token_accuracy": 0.9089886546134949, "num_tokens": 4007882.0, "step": 2237 }, { "epoch": 0.3623998056837503, "grad_norm": 21.020383834838867, "learning_rate": 6.377914507772022e-06, "loss": 0.5655, "mean_token_accuracy": 0.9249706864356995, "num_tokens": 4009674.0, "step": 2238 }, { "epoch": 0.3625617358918306, "grad_norm": 22.250280380249023, "learning_rate": 6.376295336787566e-06, "loss": 0.5649, "mean_token_accuracy": 0.9104297459125519, "num_tokens": 4011465.0, "step": 2239 }, { "epoch": 0.3627236660999109, "grad_norm": 26.73594856262207, "learning_rate": 6.37467616580311e-06, "loss": 0.7128, "mean_token_accuracy": 0.8925926089286804, "num_tokens": 4013256.0, "step": 2240 }, { "epoch": 0.3628855963079913, "grad_norm": 22.481779098510742, "learning_rate": 6.373056994818654e-06, "loss": 0.6856, "mean_token_accuracy": 0.9039260447025299, "num_tokens": 4015049.0, "step": 2241 }, { "epoch": 0.3630475265160716, "grad_norm": 23.815908432006836, "learning_rate": 6.371437823834198e-06, "loss": 0.7403, "mean_token_accuracy": 0.9049826860427856, "num_tokens": 4016835.0, "step": 2242 }, { "epoch": 0.3632094567241519, "grad_norm": 15.922857284545898, "learning_rate": 6.369818652849742e-06, "loss": 0.5015, "mean_token_accuracy": 0.9239558577537537, "num_tokens": 4018623.0, "step": 2243 }, { "epoch": 0.3633713869322322, "grad_norm": 17.92227554321289, "learning_rate": 6.368199481865286e-06, "loss": 0.5072, "mean_token_accuracy": 0.9259096682071686, "num_tokens": 4020405.0, "step": 2244 }, { "epoch": 0.3635333171403125, "grad_norm": 24.845996856689453, "learning_rate": 6.36658031088083e-06, "loss": 0.7001, "mean_token_accuracy": 0.897082507610321, "num_tokens": 4022199.0, "step": 2245 }, { "epoch": 0.36369524734839287, "grad_norm": 30.46639633178711, "learning_rate": 6.364961139896374e-06, "loss": 0.8517, "mean_token_accuracy": 0.8785386979579926, "num_tokens": 4023999.0, "step": 2246 }, { "epoch": 0.3638571775564732, "grad_norm": 28.35569190979004, "learning_rate": 6.363341968911918e-06, "loss": 0.6992, "mean_token_accuracy": 0.9060223400592804, "num_tokens": 4025798.0, "step": 2247 }, { "epoch": 0.3640191077645535, "grad_norm": 19.480100631713867, "learning_rate": 6.361722797927462e-06, "loss": 0.6185, "mean_token_accuracy": 0.9233989119529724, "num_tokens": 4027584.0, "step": 2248 }, { "epoch": 0.3641810379726338, "grad_norm": 27.425073623657227, "learning_rate": 6.360103626943006e-06, "loss": 0.745, "mean_token_accuracy": 0.8896499276161194, "num_tokens": 4029386.0, "step": 2249 }, { "epoch": 0.3643429681807141, "grad_norm": 23.494970321655273, "learning_rate": 6.35848445595855e-06, "loss": 0.6428, "mean_token_accuracy": 0.9052592515945435, "num_tokens": 4031182.0, "step": 2250 }, { "epoch": 0.3645048983887944, "grad_norm": 24.000337600708008, "learning_rate": 6.356865284974094e-06, "loss": 0.6155, "mean_token_accuracy": 0.9052910208702087, "num_tokens": 4032969.0, "step": 2251 }, { "epoch": 0.36466682859687477, "grad_norm": 26.413236618041992, "learning_rate": 6.355246113989638e-06, "loss": 0.638, "mean_token_accuracy": 0.9183934032917023, "num_tokens": 4034764.0, "step": 2252 }, { "epoch": 0.3648287588049551, "grad_norm": 28.493099212646484, "learning_rate": 6.353626943005183e-06, "loss": 0.7176, "mean_token_accuracy": 0.9046049118041992, "num_tokens": 4036559.0, "step": 2253 }, { "epoch": 0.3649906890130354, "grad_norm": 28.178659439086914, "learning_rate": 6.352007772020726e-06, "loss": 0.7841, "mean_token_accuracy": 0.9089753031730652, "num_tokens": 4038357.0, "step": 2254 }, { "epoch": 0.3651526192211157, "grad_norm": 28.810771942138672, "learning_rate": 6.350388601036271e-06, "loss": 0.6868, "mean_token_accuracy": 0.9027248620986938, "num_tokens": 4040157.0, "step": 2255 }, { "epoch": 0.365314549429196, "grad_norm": 18.55508041381836, "learning_rate": 6.348769430051814e-06, "loss": 0.5721, "mean_token_accuracy": 0.9272255897521973, "num_tokens": 4041944.0, "step": 2256 }, { "epoch": 0.3654764796372763, "grad_norm": 15.460658073425293, "learning_rate": 6.347150259067359e-06, "loss": 0.4797, "mean_token_accuracy": 0.9333333373069763, "num_tokens": 4043726.0, "step": 2257 }, { "epoch": 0.36563840984535667, "grad_norm": 29.21817398071289, "learning_rate": 6.345531088082902e-06, "loss": 0.7512, "mean_token_accuracy": 0.8981158137321472, "num_tokens": 4045513.0, "step": 2258 }, { "epoch": 0.365800340053437, "grad_norm": 30.518543243408203, "learning_rate": 6.343911917098447e-06, "loss": 0.7653, "mean_token_accuracy": 0.8950586915016174, "num_tokens": 4047311.0, "step": 2259 }, { "epoch": 0.3659622702615173, "grad_norm": 21.996871948242188, "learning_rate": 6.34229274611399e-06, "loss": 0.5766, "mean_token_accuracy": 0.9106077551841736, "num_tokens": 4049102.0, "step": 2260 }, { "epoch": 0.3661242004695976, "grad_norm": 23.002965927124023, "learning_rate": 6.340673575129535e-06, "loss": 0.6653, "mean_token_accuracy": 0.9161014258861542, "num_tokens": 4050887.0, "step": 2261 }, { "epoch": 0.3662861306776779, "grad_norm": 21.776159286499023, "learning_rate": 6.339054404145078e-06, "loss": 0.6339, "mean_token_accuracy": 0.9082373082637787, "num_tokens": 4052673.0, "step": 2262 }, { "epoch": 0.36644806088575826, "grad_norm": 18.76465606689453, "learning_rate": 6.337435233160623e-06, "loss": 0.5934, "mean_token_accuracy": 0.922358363866806, "num_tokens": 4054468.0, "step": 2263 }, { "epoch": 0.36660999109383857, "grad_norm": 30.67425537109375, "learning_rate": 6.335816062176166e-06, "loss": 0.8946, "mean_token_accuracy": 0.8983929753303528, "num_tokens": 4056273.0, "step": 2264 }, { "epoch": 0.3667719213019189, "grad_norm": 23.984634399414062, "learning_rate": 6.334196891191711e-06, "loss": 0.5597, "mean_token_accuracy": 0.9116408228874207, "num_tokens": 4058079.0, "step": 2265 }, { "epoch": 0.3669338515099992, "grad_norm": 25.59101676940918, "learning_rate": 6.332577720207254e-06, "loss": 0.65, "mean_token_accuracy": 0.9103172123432159, "num_tokens": 4059880.0, "step": 2266 }, { "epoch": 0.3670957817180795, "grad_norm": 25.86172866821289, "learning_rate": 6.330958549222799e-06, "loss": 0.6583, "mean_token_accuracy": 0.913968563079834, "num_tokens": 4061671.0, "step": 2267 }, { "epoch": 0.3672577119261598, "grad_norm": 22.27694320678711, "learning_rate": 6.329339378238342e-06, "loss": 0.5915, "mean_token_accuracy": 0.9078137874603271, "num_tokens": 4063476.0, "step": 2268 }, { "epoch": 0.36741964213424017, "grad_norm": 23.27398681640625, "learning_rate": 6.327720207253887e-06, "loss": 0.6593, "mean_token_accuracy": 0.905844658613205, "num_tokens": 4065264.0, "step": 2269 }, { "epoch": 0.3675815723423205, "grad_norm": 23.89768409729004, "learning_rate": 6.326101036269431e-06, "loss": 0.8353, "mean_token_accuracy": 0.8995938897132874, "num_tokens": 4067046.0, "step": 2270 }, { "epoch": 0.3677435025504008, "grad_norm": 22.042600631713867, "learning_rate": 6.324481865284975e-06, "loss": 0.5969, "mean_token_accuracy": 0.9139508605003357, "num_tokens": 4068837.0, "step": 2271 }, { "epoch": 0.3679054327584811, "grad_norm": 14.952237129211426, "learning_rate": 6.322862694300519e-06, "loss": 0.5491, "mean_token_accuracy": 0.9237650036811829, "num_tokens": 4070638.0, "step": 2272 }, { "epoch": 0.3680673629665614, "grad_norm": 22.978012084960938, "learning_rate": 6.321243523316063e-06, "loss": 0.6401, "mean_token_accuracy": 0.9067129492759705, "num_tokens": 4072429.0, "step": 2273 }, { "epoch": 0.3682292931746417, "grad_norm": 25.961809158325195, "learning_rate": 6.319624352331607e-06, "loss": 0.8068, "mean_token_accuracy": 0.8986742198467255, "num_tokens": 4074217.0, "step": 2274 }, { "epoch": 0.36839122338272207, "grad_norm": 27.614992141723633, "learning_rate": 6.318005181347151e-06, "loss": 0.7049, "mean_token_accuracy": 0.8922211229801178, "num_tokens": 4076015.0, "step": 2275 }, { "epoch": 0.3685531535908024, "grad_norm": 20.41364097595215, "learning_rate": 6.316386010362695e-06, "loss": 0.65, "mean_token_accuracy": 0.9083916246891022, "num_tokens": 4077800.0, "step": 2276 }, { "epoch": 0.3687150837988827, "grad_norm": 32.38789749145508, "learning_rate": 6.314766839378239e-06, "loss": 0.8597, "mean_token_accuracy": 0.9021503329277039, "num_tokens": 4079597.0, "step": 2277 }, { "epoch": 0.368877014006963, "grad_norm": 23.38788414001465, "learning_rate": 6.313147668393783e-06, "loss": 0.7145, "mean_token_accuracy": 0.9045549929141998, "num_tokens": 4081392.0, "step": 2278 }, { "epoch": 0.3690389442150433, "grad_norm": 23.370681762695312, "learning_rate": 6.311528497409327e-06, "loss": 0.6662, "mean_token_accuracy": 0.9151688516139984, "num_tokens": 4083175.0, "step": 2279 }, { "epoch": 0.36920087442312366, "grad_norm": 21.638273239135742, "learning_rate": 6.309909326424871e-06, "loss": 0.6185, "mean_token_accuracy": 0.9138127863407135, "num_tokens": 4084965.0, "step": 2280 }, { "epoch": 0.36936280463120397, "grad_norm": 22.466955184936523, "learning_rate": 6.308290155440415e-06, "loss": 0.6432, "mean_token_accuracy": 0.9143323600292206, "num_tokens": 4086757.0, "step": 2281 }, { "epoch": 0.3695247348392843, "grad_norm": 24.6545352935791, "learning_rate": 6.306670984455959e-06, "loss": 0.6682, "mean_token_accuracy": 0.9001071751117706, "num_tokens": 4088549.0, "step": 2282 }, { "epoch": 0.3696866650473646, "grad_norm": 15.135995864868164, "learning_rate": 6.305051813471503e-06, "loss": 0.5243, "mean_token_accuracy": 0.9280020892620087, "num_tokens": 4090339.0, "step": 2283 }, { "epoch": 0.3698485952554449, "grad_norm": 31.99400520324707, "learning_rate": 6.3034326424870474e-06, "loss": 0.9475, "mean_token_accuracy": 0.8988343775272369, "num_tokens": 4092138.0, "step": 2284 }, { "epoch": 0.3700105254635252, "grad_norm": 22.13029670715332, "learning_rate": 6.301813471502591e-06, "loss": 0.6305, "mean_token_accuracy": 0.9074974656105042, "num_tokens": 4093931.0, "step": 2285 }, { "epoch": 0.37017245567160556, "grad_norm": 19.34627342224121, "learning_rate": 6.3001943005181355e-06, "loss": 0.579, "mean_token_accuracy": 0.9171972870826721, "num_tokens": 4095721.0, "step": 2286 }, { "epoch": 0.37033438587968587, "grad_norm": 24.71289825439453, "learning_rate": 6.298575129533679e-06, "loss": 0.7779, "mean_token_accuracy": 0.8978580832481384, "num_tokens": 4097507.0, "step": 2287 }, { "epoch": 0.3704963160877662, "grad_norm": 28.232088088989258, "learning_rate": 6.2969559585492235e-06, "loss": 0.8462, "mean_token_accuracy": 0.9048126935958862, "num_tokens": 4099303.0, "step": 2288 }, { "epoch": 0.3706582462958465, "grad_norm": 25.628108978271484, "learning_rate": 6.295336787564768e-06, "loss": 0.7317, "mean_token_accuracy": 0.892824113368988, "num_tokens": 4101094.0, "step": 2289 }, { "epoch": 0.3708201765039268, "grad_norm": 19.739723205566406, "learning_rate": 6.2937176165803115e-06, "loss": 0.5759, "mean_token_accuracy": 0.9203381836414337, "num_tokens": 4102882.0, "step": 2290 }, { "epoch": 0.3709821067120071, "grad_norm": 17.226787567138672, "learning_rate": 6.292098445595856e-06, "loss": 0.5839, "mean_token_accuracy": 0.9204832017421722, "num_tokens": 4104670.0, "step": 2291 }, { "epoch": 0.37114403692008746, "grad_norm": 22.36802101135254, "learning_rate": 6.2904792746113995e-06, "loss": 0.6395, "mean_token_accuracy": 0.9168752431869507, "num_tokens": 4106459.0, "step": 2292 }, { "epoch": 0.37130596712816777, "grad_norm": 15.932029724121094, "learning_rate": 6.288860103626944e-06, "loss": 0.4912, "mean_token_accuracy": 0.9266433119773865, "num_tokens": 4108256.0, "step": 2293 }, { "epoch": 0.3714678973362481, "grad_norm": 21.709257125854492, "learning_rate": 6.2872409326424876e-06, "loss": 0.7146, "mean_token_accuracy": 0.9003114402294159, "num_tokens": 4110029.0, "step": 2294 }, { "epoch": 0.3716298275443284, "grad_norm": 35.92877960205078, "learning_rate": 6.285621761658032e-06, "loss": 0.7824, "mean_token_accuracy": 0.8998287618160248, "num_tokens": 4111839.0, "step": 2295 }, { "epoch": 0.3717917577524087, "grad_norm": 18.95516014099121, "learning_rate": 6.284002590673576e-06, "loss": 0.5684, "mean_token_accuracy": 0.9136690497398376, "num_tokens": 4113629.0, "step": 2296 }, { "epoch": 0.37195368796048905, "grad_norm": 21.056535720825195, "learning_rate": 6.28238341968912e-06, "loss": 0.61, "mean_token_accuracy": 0.9245029091835022, "num_tokens": 4115419.0, "step": 2297 }, { "epoch": 0.37211561816856936, "grad_norm": 26.51069450378418, "learning_rate": 6.280764248704664e-06, "loss": 0.632, "mean_token_accuracy": 0.9092245101928711, "num_tokens": 4117217.0, "step": 2298 }, { "epoch": 0.37227754837664967, "grad_norm": 19.568313598632812, "learning_rate": 6.279145077720208e-06, "loss": 0.545, "mean_token_accuracy": 0.9250357449054718, "num_tokens": 4119009.0, "step": 2299 }, { "epoch": 0.37243947858473, "grad_norm": 23.084980010986328, "learning_rate": 6.277525906735752e-06, "loss": 0.615, "mean_token_accuracy": 0.9119922816753387, "num_tokens": 4120805.0, "step": 2300 }, { "epoch": 0.3726014087928103, "grad_norm": 17.704103469848633, "learning_rate": 6.275906735751296e-06, "loss": 0.6366, "mean_token_accuracy": 0.9284208714962006, "num_tokens": 4122582.0, "step": 2301 }, { "epoch": 0.3727633390008906, "grad_norm": 26.454050064086914, "learning_rate": 6.27428756476684e-06, "loss": 0.7545, "mean_token_accuracy": 0.9072712361812592, "num_tokens": 4124383.0, "step": 2302 }, { "epoch": 0.37292526920897096, "grad_norm": 20.5181827545166, "learning_rate": 6.272668393782384e-06, "loss": 0.5578, "mean_token_accuracy": 0.918178141117096, "num_tokens": 4126176.0, "step": 2303 }, { "epoch": 0.37308719941705126, "grad_norm": 28.848773956298828, "learning_rate": 6.271049222797928e-06, "loss": 0.9202, "mean_token_accuracy": 0.8857142925262451, "num_tokens": 4127968.0, "step": 2304 }, { "epoch": 0.37324912962513157, "grad_norm": 18.847166061401367, "learning_rate": 6.269430051813472e-06, "loss": 0.5958, "mean_token_accuracy": 0.9130389094352722, "num_tokens": 4129756.0, "step": 2305 }, { "epoch": 0.3734110598332119, "grad_norm": 30.33755874633789, "learning_rate": 6.267810880829016e-06, "loss": 0.6247, "mean_token_accuracy": 0.9020931124687195, "num_tokens": 4131554.0, "step": 2306 }, { "epoch": 0.3735729900412922, "grad_norm": 15.333966255187988, "learning_rate": 6.26619170984456e-06, "loss": 0.5607, "mean_token_accuracy": 0.9197037518024445, "num_tokens": 4133340.0, "step": 2307 }, { "epoch": 0.3737349202493725, "grad_norm": 17.22572898864746, "learning_rate": 6.264572538860105e-06, "loss": 0.6413, "mean_token_accuracy": 0.9240403473377228, "num_tokens": 4135128.0, "step": 2308 }, { "epoch": 0.37389685045745286, "grad_norm": 23.395233154296875, "learning_rate": 6.262953367875648e-06, "loss": 0.6831, "mean_token_accuracy": 0.9148142039775848, "num_tokens": 4136909.0, "step": 2309 }, { "epoch": 0.37405878066553316, "grad_norm": 25.794370651245117, "learning_rate": 6.261334196891193e-06, "loss": 0.6187, "mean_token_accuracy": 0.9094164371490479, "num_tokens": 4138696.0, "step": 2310 }, { "epoch": 0.37422071087361347, "grad_norm": 21.045114517211914, "learning_rate": 6.259715025906736e-06, "loss": 0.6175, "mean_token_accuracy": 0.919584333896637, "num_tokens": 4140482.0, "step": 2311 }, { "epoch": 0.3743826410816938, "grad_norm": 24.307363510131836, "learning_rate": 6.258095854922281e-06, "loss": 0.6822, "mean_token_accuracy": 0.9070360660552979, "num_tokens": 4142274.0, "step": 2312 }, { "epoch": 0.3745445712897741, "grad_norm": 28.865230560302734, "learning_rate": 6.256476683937824e-06, "loss": 0.7225, "mean_token_accuracy": 0.8979166448116302, "num_tokens": 4144070.0, "step": 2313 }, { "epoch": 0.37470650149785445, "grad_norm": 22.208402633666992, "learning_rate": 6.254857512953369e-06, "loss": 0.6102, "mean_token_accuracy": 0.9111519455909729, "num_tokens": 4145862.0, "step": 2314 }, { "epoch": 0.37486843170593476, "grad_norm": 20.481834411621094, "learning_rate": 6.253238341968912e-06, "loss": 0.5823, "mean_token_accuracy": 0.9198294281959534, "num_tokens": 4147648.0, "step": 2315 }, { "epoch": 0.37503036191401506, "grad_norm": 17.121299743652344, "learning_rate": 6.251619170984457e-06, "loss": 0.5352, "mean_token_accuracy": 0.9275209903717041, "num_tokens": 4149436.0, "step": 2316 }, { "epoch": 0.37519229212209537, "grad_norm": 21.6956729888916, "learning_rate": 6.25e-06, "loss": 0.559, "mean_token_accuracy": 0.9205268919467926, "num_tokens": 4151225.0, "step": 2317 }, { "epoch": 0.3753542223301757, "grad_norm": 20.43297576904297, "learning_rate": 6.248380829015545e-06, "loss": 0.635, "mean_token_accuracy": 0.908904105424881, "num_tokens": 4153023.0, "step": 2318 }, { "epoch": 0.375516152538256, "grad_norm": 23.611289978027344, "learning_rate": 6.246761658031088e-06, "loss": 0.5564, "mean_token_accuracy": 0.9193286001682281, "num_tokens": 4154820.0, "step": 2319 }, { "epoch": 0.37567808274633635, "grad_norm": 31.101444244384766, "learning_rate": 6.245142487046633e-06, "loss": 0.7569, "mean_token_accuracy": 0.8932262659072876, "num_tokens": 4156613.0, "step": 2320 }, { "epoch": 0.37584001295441666, "grad_norm": 27.796873092651367, "learning_rate": 6.243523316062176e-06, "loss": 0.7647, "mean_token_accuracy": 0.9142303168773651, "num_tokens": 4158405.0, "step": 2321 }, { "epoch": 0.37600194316249697, "grad_norm": 19.307470321655273, "learning_rate": 6.241904145077721e-06, "loss": 0.5623, "mean_token_accuracy": 0.9273574352264404, "num_tokens": 4160205.0, "step": 2322 }, { "epoch": 0.3761638733705773, "grad_norm": 32.03840255737305, "learning_rate": 6.240284974093264e-06, "loss": 0.8969, "mean_token_accuracy": 0.88413867354393, "num_tokens": 4161993.0, "step": 2323 }, { "epoch": 0.3763258035786576, "grad_norm": 34.219757080078125, "learning_rate": 6.238665803108809e-06, "loss": 0.8241, "mean_token_accuracy": 0.882758617401123, "num_tokens": 4163795.0, "step": 2324 }, { "epoch": 0.37648773378673794, "grad_norm": 23.71923065185547, "learning_rate": 6.237046632124352e-06, "loss": 0.5972, "mean_token_accuracy": 0.9079736173152924, "num_tokens": 4165578.0, "step": 2325 }, { "epoch": 0.37664966399481825, "grad_norm": 37.05076599121094, "learning_rate": 6.235427461139897e-06, "loss": 1.143, "mean_token_accuracy": 0.8762303590774536, "num_tokens": 4167371.0, "step": 2326 }, { "epoch": 0.37681159420289856, "grad_norm": 19.135000228881836, "learning_rate": 6.233808290155441e-06, "loss": 0.5389, "mean_token_accuracy": 0.9110513627529144, "num_tokens": 4169153.0, "step": 2327 }, { "epoch": 0.37697352441097887, "grad_norm": 18.642662048339844, "learning_rate": 6.232189119170985e-06, "loss": 0.6031, "mean_token_accuracy": 0.9231814444065094, "num_tokens": 4170939.0, "step": 2328 }, { "epoch": 0.3771354546190592, "grad_norm": 28.72759437561035, "learning_rate": 6.230569948186529e-06, "loss": 0.8576, "mean_token_accuracy": 0.8905594348907471, "num_tokens": 4172734.0, "step": 2329 }, { "epoch": 0.3772973848271395, "grad_norm": 28.877880096435547, "learning_rate": 6.228950777202073e-06, "loss": 0.6739, "mean_token_accuracy": 0.9017039239406586, "num_tokens": 4174531.0, "step": 2330 }, { "epoch": 0.37745931503521984, "grad_norm": 29.932432174682617, "learning_rate": 6.227331606217617e-06, "loss": 0.6826, "mean_token_accuracy": 0.9043845534324646, "num_tokens": 4176326.0, "step": 2331 }, { "epoch": 0.37762124524330015, "grad_norm": 25.98511505126953, "learning_rate": 6.225712435233161e-06, "loss": 0.6355, "mean_token_accuracy": 0.9164335429668427, "num_tokens": 4178111.0, "step": 2332 }, { "epoch": 0.37778317545138046, "grad_norm": 25.36676597595215, "learning_rate": 6.224093264248705e-06, "loss": 0.6286, "mean_token_accuracy": 0.916067510843277, "num_tokens": 4179909.0, "step": 2333 }, { "epoch": 0.37794510565946077, "grad_norm": 26.015789031982422, "learning_rate": 6.222474093264249e-06, "loss": 0.8692, "mean_token_accuracy": 0.8996115028858185, "num_tokens": 4181699.0, "step": 2334 }, { "epoch": 0.3781070358675411, "grad_norm": 23.07358741760254, "learning_rate": 6.220854922279793e-06, "loss": 0.5629, "mean_token_accuracy": 0.9124854505062103, "num_tokens": 4183486.0, "step": 2335 }, { "epoch": 0.3782689660756214, "grad_norm": 20.87764549255371, "learning_rate": 6.219235751295337e-06, "loss": 0.6071, "mean_token_accuracy": 0.9129863977432251, "num_tokens": 4185274.0, "step": 2336 }, { "epoch": 0.37843089628370175, "grad_norm": 21.269840240478516, "learning_rate": 6.217616580310881e-06, "loss": 0.5893, "mean_token_accuracy": 0.9165872633457184, "num_tokens": 4187061.0, "step": 2337 }, { "epoch": 0.37859282649178205, "grad_norm": 21.05706787109375, "learning_rate": 6.215997409326425e-06, "loss": 0.5562, "mean_token_accuracy": 0.9277969002723694, "num_tokens": 4188850.0, "step": 2338 }, { "epoch": 0.37875475669986236, "grad_norm": 20.515602111816406, "learning_rate": 6.2143782383419694e-06, "loss": 0.6027, "mean_token_accuracy": 0.9243930280208588, "num_tokens": 4190640.0, "step": 2339 }, { "epoch": 0.37891668690794267, "grad_norm": 24.510225296020508, "learning_rate": 6.212759067357513e-06, "loss": 0.7594, "mean_token_accuracy": 0.8986153602600098, "num_tokens": 4192437.0, "step": 2340 }, { "epoch": 0.379078617116023, "grad_norm": 19.635196685791016, "learning_rate": 6.2111398963730575e-06, "loss": 0.6407, "mean_token_accuracy": 0.9179104566574097, "num_tokens": 4194217.0, "step": 2341 }, { "epoch": 0.37924054732410334, "grad_norm": 21.90341567993164, "learning_rate": 6.209520725388601e-06, "loss": 0.5974, "mean_token_accuracy": 0.9139773845672607, "num_tokens": 4196008.0, "step": 2342 }, { "epoch": 0.37940247753218365, "grad_norm": 18.383474349975586, "learning_rate": 6.2079015544041455e-06, "loss": 0.5168, "mean_token_accuracy": 0.9220010936260223, "num_tokens": 4197789.0, "step": 2343 }, { "epoch": 0.37956440774026395, "grad_norm": 24.39859390258789, "learning_rate": 6.206282383419689e-06, "loss": 0.6109, "mean_token_accuracy": 0.9136646091938019, "num_tokens": 4199579.0, "step": 2344 }, { "epoch": 0.37972633794834426, "grad_norm": 28.421485900878906, "learning_rate": 6.2046632124352335e-06, "loss": 0.7756, "mean_token_accuracy": 0.9034113883972168, "num_tokens": 4201372.0, "step": 2345 }, { "epoch": 0.37988826815642457, "grad_norm": 22.188867568969727, "learning_rate": 6.203044041450778e-06, "loss": 0.5955, "mean_token_accuracy": 0.9139243066310883, "num_tokens": 4203163.0, "step": 2346 }, { "epoch": 0.3800501983645049, "grad_norm": 26.85664176940918, "learning_rate": 6.2014248704663215e-06, "loss": 0.6117, "mean_token_accuracy": 0.9104297459125519, "num_tokens": 4204954.0, "step": 2347 }, { "epoch": 0.38021212857258524, "grad_norm": 21.927711486816406, "learning_rate": 6.199805699481866e-06, "loss": 0.5582, "mean_token_accuracy": 0.9123508334159851, "num_tokens": 4206740.0, "step": 2348 }, { "epoch": 0.38037405878066555, "grad_norm": 33.48126983642578, "learning_rate": 6.1981865284974096e-06, "loss": 0.9131, "mean_token_accuracy": 0.8973762691020966, "num_tokens": 4208554.0, "step": 2349 }, { "epoch": 0.38053598898874585, "grad_norm": 31.373567581176758, "learning_rate": 6.196567357512954e-06, "loss": 0.7561, "mean_token_accuracy": 0.9085317552089691, "num_tokens": 4210350.0, "step": 2350 }, { "epoch": 0.38069791919682616, "grad_norm": 22.877470016479492, "learning_rate": 6.194948186528498e-06, "loss": 0.5837, "mean_token_accuracy": 0.9247430562973022, "num_tokens": 4212141.0, "step": 2351 }, { "epoch": 0.38085984940490647, "grad_norm": 29.325273513793945, "learning_rate": 6.193329015544042e-06, "loss": 0.6223, "mean_token_accuracy": 0.9041364789009094, "num_tokens": 4213935.0, "step": 2352 }, { "epoch": 0.3810217796129868, "grad_norm": 31.117767333984375, "learning_rate": 6.191709844559586e-06, "loss": 0.7902, "mean_token_accuracy": 0.8759579062461853, "num_tokens": 4215737.0, "step": 2353 }, { "epoch": 0.38118370982106714, "grad_norm": 26.284822463989258, "learning_rate": 6.19009067357513e-06, "loss": 0.654, "mean_token_accuracy": 0.9106282591819763, "num_tokens": 4217540.0, "step": 2354 }, { "epoch": 0.38134564002914745, "grad_norm": 22.111026763916016, "learning_rate": 6.188471502590674e-06, "loss": 0.5835, "mean_token_accuracy": 0.9267310798168182, "num_tokens": 4219325.0, "step": 2355 }, { "epoch": 0.38150757023722776, "grad_norm": 27.846500396728516, "learning_rate": 6.186852331606218e-06, "loss": 0.7235, "mean_token_accuracy": 0.9067532122135162, "num_tokens": 4221105.0, "step": 2356 }, { "epoch": 0.38166950044530806, "grad_norm": 25.400686264038086, "learning_rate": 6.185233160621762e-06, "loss": 0.7524, "mean_token_accuracy": 0.9107513427734375, "num_tokens": 4222895.0, "step": 2357 }, { "epoch": 0.38183143065338837, "grad_norm": 18.665996551513672, "learning_rate": 6.183613989637306e-06, "loss": 0.6434, "mean_token_accuracy": 0.9037568271160126, "num_tokens": 4224687.0, "step": 2358 }, { "epoch": 0.38199336086146873, "grad_norm": 14.849250793457031, "learning_rate": 6.18199481865285e-06, "loss": 0.4834, "mean_token_accuracy": 0.9334261417388916, "num_tokens": 4226470.0, "step": 2359 }, { "epoch": 0.38215529106954904, "grad_norm": 21.723913192749023, "learning_rate": 6.180375647668394e-06, "loss": 0.5461, "mean_token_accuracy": 0.9247430562973022, "num_tokens": 4228261.0, "step": 2360 }, { "epoch": 0.38231722127762935, "grad_norm": 24.744943618774414, "learning_rate": 6.178756476683938e-06, "loss": 0.6468, "mean_token_accuracy": 0.911750465631485, "num_tokens": 4230046.0, "step": 2361 }, { "epoch": 0.38247915148570966, "grad_norm": 27.28764533996582, "learning_rate": 6.177137305699482e-06, "loss": 0.6535, "mean_token_accuracy": 0.9223533868789673, "num_tokens": 4231844.0, "step": 2362 }, { "epoch": 0.38264108169378996, "grad_norm": 26.96405792236328, "learning_rate": 6.175518134715026e-06, "loss": 0.6673, "mean_token_accuracy": 0.8986698091030121, "num_tokens": 4233633.0, "step": 2363 }, { "epoch": 0.38280301190187027, "grad_norm": 22.760257720947266, "learning_rate": 6.17389896373057e-06, "loss": 0.6277, "mean_token_accuracy": 0.9163931012153625, "num_tokens": 4235420.0, "step": 2364 }, { "epoch": 0.38296494210995063, "grad_norm": 29.48963165283203, "learning_rate": 6.172279792746115e-06, "loss": 0.756, "mean_token_accuracy": 0.896064817905426, "num_tokens": 4237211.0, "step": 2365 }, { "epoch": 0.38312687231803094, "grad_norm": 21.912670135498047, "learning_rate": 6.170660621761658e-06, "loss": 0.5291, "mean_token_accuracy": 0.9205766022205353, "num_tokens": 4239000.0, "step": 2366 }, { "epoch": 0.38328880252611125, "grad_norm": 19.380645751953125, "learning_rate": 6.169041450777203e-06, "loss": 0.5056, "mean_token_accuracy": 0.9288247227668762, "num_tokens": 4240793.0, "step": 2367 }, { "epoch": 0.38345073273419156, "grad_norm": 19.986095428466797, "learning_rate": 6.167422279792746e-06, "loss": 0.5959, "mean_token_accuracy": 0.920409768819809, "num_tokens": 4242581.0, "step": 2368 }, { "epoch": 0.38361266294227186, "grad_norm": 15.863972663879395, "learning_rate": 6.165803108808291e-06, "loss": 0.5452, "mean_token_accuracy": 0.9266417622566223, "num_tokens": 4244365.0, "step": 2369 }, { "epoch": 0.3837745931503522, "grad_norm": 19.17365837097168, "learning_rate": 6.164183937823834e-06, "loss": 0.5289, "mean_token_accuracy": 0.9221530854701996, "num_tokens": 4246148.0, "step": 2370 }, { "epoch": 0.38393652335843254, "grad_norm": 28.560333251953125, "learning_rate": 6.162564766839379e-06, "loss": 0.7032, "mean_token_accuracy": 0.9154411554336548, "num_tokens": 4247932.0, "step": 2371 }, { "epoch": 0.38409845356651284, "grad_norm": 28.11639976501465, "learning_rate": 6.160945595854922e-06, "loss": 0.8123, "mean_token_accuracy": 0.9031755924224854, "num_tokens": 4249733.0, "step": 2372 }, { "epoch": 0.38426038377459315, "grad_norm": 24.67559242248535, "learning_rate": 6.159326424870467e-06, "loss": 0.6375, "mean_token_accuracy": 0.9055226147174835, "num_tokens": 4251520.0, "step": 2373 }, { "epoch": 0.38442231398267346, "grad_norm": 31.49180793762207, "learning_rate": 6.15770725388601e-06, "loss": 0.7372, "mean_token_accuracy": 0.9009661972522736, "num_tokens": 4253314.0, "step": 2374 }, { "epoch": 0.38458424419075377, "grad_norm": 24.148473739624023, "learning_rate": 6.156088082901555e-06, "loss": 0.5422, "mean_token_accuracy": 0.9177156090736389, "num_tokens": 4255104.0, "step": 2375 }, { "epoch": 0.38474617439883413, "grad_norm": 31.747270584106445, "learning_rate": 6.154468911917098e-06, "loss": 0.8418, "mean_token_accuracy": 0.9109589159488678, "num_tokens": 4256908.0, "step": 2376 }, { "epoch": 0.38490810460691444, "grad_norm": 25.216537475585938, "learning_rate": 6.152849740932643e-06, "loss": 0.5637, "mean_token_accuracy": 0.9107142984867096, "num_tokens": 4258700.0, "step": 2377 }, { "epoch": 0.38507003481499474, "grad_norm": 14.640902519226074, "learning_rate": 6.151230569948186e-06, "loss": 0.5105, "mean_token_accuracy": 0.9241215884685516, "num_tokens": 4260489.0, "step": 2378 }, { "epoch": 0.38523196502307505, "grad_norm": 34.02447509765625, "learning_rate": 6.149611398963731e-06, "loss": 0.9235, "mean_token_accuracy": 0.8783153593540192, "num_tokens": 4262288.0, "step": 2379 }, { "epoch": 0.38539389523115536, "grad_norm": 15.521620750427246, "learning_rate": 6.147992227979274e-06, "loss": 0.5066, "mean_token_accuracy": 0.9316252768039703, "num_tokens": 4264078.0, "step": 2380 }, { "epoch": 0.38555582543923567, "grad_norm": 26.370441436767578, "learning_rate": 6.146373056994819e-06, "loss": 0.6768, "mean_token_accuracy": 0.8977884352207184, "num_tokens": 4265864.0, "step": 2381 }, { "epoch": 0.38571775564731603, "grad_norm": 34.29618453979492, "learning_rate": 6.144753886010362e-06, "loss": 0.8395, "mean_token_accuracy": 0.8916840553283691, "num_tokens": 4267653.0, "step": 2382 }, { "epoch": 0.38587968585539634, "grad_norm": 25.426679611206055, "learning_rate": 6.143134715025907e-06, "loss": 0.6618, "mean_token_accuracy": 0.8966927230358124, "num_tokens": 4269446.0, "step": 2383 }, { "epoch": 0.38604161606347664, "grad_norm": 22.19244956970215, "learning_rate": 6.141515544041451e-06, "loss": 0.513, "mean_token_accuracy": 0.9310924410820007, "num_tokens": 4271234.0, "step": 2384 }, { "epoch": 0.38620354627155695, "grad_norm": 23.652576446533203, "learning_rate": 6.139896373056995e-06, "loss": 0.5381, "mean_token_accuracy": 0.9228723645210266, "num_tokens": 4273019.0, "step": 2385 }, { "epoch": 0.38636547647963726, "grad_norm": 18.39960289001465, "learning_rate": 6.138277202072539e-06, "loss": 0.5066, "mean_token_accuracy": 0.9268921315670013, "num_tokens": 4274804.0, "step": 2386 }, { "epoch": 0.38652740668771757, "grad_norm": 20.631511688232422, "learning_rate": 6.136658031088083e-06, "loss": 0.643, "mean_token_accuracy": 0.9215146005153656, "num_tokens": 4276596.0, "step": 2387 }, { "epoch": 0.38668933689579793, "grad_norm": 24.41177749633789, "learning_rate": 6.135038860103627e-06, "loss": 0.5838, "mean_token_accuracy": 0.9176002144813538, "num_tokens": 4278387.0, "step": 2388 }, { "epoch": 0.38685126710387824, "grad_norm": 41.67818069458008, "learning_rate": 6.133419689119171e-06, "loss": 0.9614, "mean_token_accuracy": 0.8757992386817932, "num_tokens": 4280173.0, "step": 2389 }, { "epoch": 0.38701319731195855, "grad_norm": 21.251855850219727, "learning_rate": 6.131800518134715e-06, "loss": 0.5454, "mean_token_accuracy": 0.9172651469707489, "num_tokens": 4281976.0, "step": 2390 }, { "epoch": 0.38717512752003885, "grad_norm": 29.321077346801758, "learning_rate": 6.130181347150259e-06, "loss": 0.8673, "mean_token_accuracy": 0.8996478021144867, "num_tokens": 4283768.0, "step": 2391 }, { "epoch": 0.38733705772811916, "grad_norm": 30.430627822875977, "learning_rate": 6.128562176165803e-06, "loss": 0.7073, "mean_token_accuracy": 0.9084491729736328, "num_tokens": 4285552.0, "step": 2392 }, { "epoch": 0.3874989879361995, "grad_norm": 33.49610900878906, "learning_rate": 6.126943005181347e-06, "loss": 0.7672, "mean_token_accuracy": 0.8874330222606659, "num_tokens": 4287348.0, "step": 2393 }, { "epoch": 0.38766091814427983, "grad_norm": 24.73077964782715, "learning_rate": 6.125323834196891e-06, "loss": 0.6351, "mean_token_accuracy": 0.9160839319229126, "num_tokens": 4289146.0, "step": 2394 }, { "epoch": 0.38782284835236014, "grad_norm": 33.180877685546875, "learning_rate": 6.123704663212435e-06, "loss": 0.7255, "mean_token_accuracy": 0.9076961874961853, "num_tokens": 4290940.0, "step": 2395 }, { "epoch": 0.38798477856044045, "grad_norm": 35.44948196411133, "learning_rate": 6.1220854922279794e-06, "loss": 0.9667, "mean_token_accuracy": 0.8878780007362366, "num_tokens": 4292729.0, "step": 2396 }, { "epoch": 0.38814670876852075, "grad_norm": 29.867279052734375, "learning_rate": 6.120466321243523e-06, "loss": 0.6441, "mean_token_accuracy": 0.9082009792327881, "num_tokens": 4294524.0, "step": 2397 }, { "epoch": 0.38830863897660106, "grad_norm": 17.9906005859375, "learning_rate": 6.1188471502590675e-06, "loss": 0.6241, "mean_token_accuracy": 0.9147235453128815, "num_tokens": 4296294.0, "step": 2398 }, { "epoch": 0.3884705691846814, "grad_norm": 29.192676544189453, "learning_rate": 6.117227979274611e-06, "loss": 0.6341, "mean_token_accuracy": 0.8953312635421753, "num_tokens": 4298083.0, "step": 2399 }, { "epoch": 0.38863249939276173, "grad_norm": 22.05814552307129, "learning_rate": 6.1156088082901555e-06, "loss": 0.5492, "mean_token_accuracy": 0.9166885912418365, "num_tokens": 4299871.0, "step": 2400 }, { "epoch": 0.38879442960084204, "grad_norm": 23.100486755371094, "learning_rate": 6.113989637305699e-06, "loss": 0.5409, "mean_token_accuracy": 0.9197114408016205, "num_tokens": 4301669.0, "step": 2401 }, { "epoch": 0.38895635980892235, "grad_norm": 26.80182647705078, "learning_rate": 6.1123704663212435e-06, "loss": 0.6037, "mean_token_accuracy": 0.9133472442626953, "num_tokens": 4303458.0, "step": 2402 }, { "epoch": 0.38911829001700265, "grad_norm": 31.55597686767578, "learning_rate": 6.110751295336788e-06, "loss": 0.9219, "mean_token_accuracy": 0.8886907696723938, "num_tokens": 4305264.0, "step": 2403 }, { "epoch": 0.38928022022508296, "grad_norm": 31.774499893188477, "learning_rate": 6.1091321243523316e-06, "loss": 0.8408, "mean_token_accuracy": 0.8809821903705597, "num_tokens": 4307053.0, "step": 2404 }, { "epoch": 0.3894421504331633, "grad_norm": 27.53371238708496, "learning_rate": 6.107512953367876e-06, "loss": 0.6792, "mean_token_accuracy": 0.9090012311935425, "num_tokens": 4308839.0, "step": 2405 }, { "epoch": 0.38960408064124363, "grad_norm": 21.76186180114746, "learning_rate": 6.10589378238342e-06, "loss": 0.5684, "mean_token_accuracy": 0.925220400094986, "num_tokens": 4310632.0, "step": 2406 }, { "epoch": 0.38976601084932394, "grad_norm": 16.794729232788086, "learning_rate": 6.104274611398964e-06, "loss": 0.5307, "mean_token_accuracy": 0.9231884181499481, "num_tokens": 4312417.0, "step": 2407 }, { "epoch": 0.38992794105740425, "grad_norm": 19.790908813476562, "learning_rate": 6.102655440414508e-06, "loss": 0.6876, "mean_token_accuracy": 0.9195210933685303, "num_tokens": 4314202.0, "step": 2408 }, { "epoch": 0.39008987126548456, "grad_norm": 27.3967227935791, "learning_rate": 6.101036269430052e-06, "loss": 0.6574, "mean_token_accuracy": 0.9037568271160126, "num_tokens": 4315994.0, "step": 2409 }, { "epoch": 0.3902518014735649, "grad_norm": 27.468692779541016, "learning_rate": 6.099417098445596e-06, "loss": 0.6894, "mean_token_accuracy": 0.9087953269481659, "num_tokens": 4317780.0, "step": 2410 }, { "epoch": 0.3904137316816452, "grad_norm": 13.325836181640625, "learning_rate": 6.09779792746114e-06, "loss": 0.5389, "mean_token_accuracy": 0.9294127523899078, "num_tokens": 4319561.0, "step": 2411 }, { "epoch": 0.39057566188972553, "grad_norm": 23.503740310668945, "learning_rate": 6.096178756476684e-06, "loss": 0.6462, "mean_token_accuracy": 0.9142034649848938, "num_tokens": 4321363.0, "step": 2412 }, { "epoch": 0.39073759209780584, "grad_norm": 23.477584838867188, "learning_rate": 6.094559585492228e-06, "loss": 0.7222, "mean_token_accuracy": 0.9085765480995178, "num_tokens": 4323159.0, "step": 2413 }, { "epoch": 0.39089952230588615, "grad_norm": 27.98770523071289, "learning_rate": 6.092940414507773e-06, "loss": 0.6692, "mean_token_accuracy": 0.9034899771213531, "num_tokens": 4324951.0, "step": 2414 }, { "epoch": 0.39106145251396646, "grad_norm": 23.318613052368164, "learning_rate": 6.091321243523317e-06, "loss": 0.6385, "mean_token_accuracy": 0.9146631062030792, "num_tokens": 4326756.0, "step": 2415 }, { "epoch": 0.3912233827220468, "grad_norm": 20.08234977722168, "learning_rate": 6.089702072538861e-06, "loss": 0.6044, "mean_token_accuracy": 0.9100168347358704, "num_tokens": 4328535.0, "step": 2416 }, { "epoch": 0.3913853129301271, "grad_norm": 24.074081420898438, "learning_rate": 6.088082901554405e-06, "loss": 0.7044, "mean_token_accuracy": 0.9077968001365662, "num_tokens": 4330329.0, "step": 2417 }, { "epoch": 0.39154724313820743, "grad_norm": 29.092697143554688, "learning_rate": 6.0864637305699494e-06, "loss": 0.8586, "mean_token_accuracy": 0.88978111743927, "num_tokens": 4332122.0, "step": 2418 }, { "epoch": 0.39170917334628774, "grad_norm": 20.12398338317871, "learning_rate": 6.084844559585493e-06, "loss": 0.5945, "mean_token_accuracy": 0.9107279777526855, "num_tokens": 4333914.0, "step": 2419 }, { "epoch": 0.39187110355436805, "grad_norm": 23.617050170898438, "learning_rate": 6.0832253886010375e-06, "loss": 0.584, "mean_token_accuracy": 0.9223276674747467, "num_tokens": 4335709.0, "step": 2420 }, { "epoch": 0.39203303376244836, "grad_norm": 27.276884078979492, "learning_rate": 6.081606217616581e-06, "loss": 0.8358, "mean_token_accuracy": 0.8941934108734131, "num_tokens": 4337501.0, "step": 2421 }, { "epoch": 0.3921949639705287, "grad_norm": 24.665903091430664, "learning_rate": 6.0799870466321255e-06, "loss": 0.6515, "mean_token_accuracy": 0.9157529175281525, "num_tokens": 4339297.0, "step": 2422 }, { "epoch": 0.39235689417860903, "grad_norm": 20.519485473632812, "learning_rate": 6.078367875647669e-06, "loss": 0.5719, "mean_token_accuracy": 0.9202898740768433, "num_tokens": 4341085.0, "step": 2423 }, { "epoch": 0.39251882438668934, "grad_norm": 23.69145393371582, "learning_rate": 6.0767487046632135e-06, "loss": 0.727, "mean_token_accuracy": 0.9065680205821991, "num_tokens": 4342876.0, "step": 2424 }, { "epoch": 0.39268075459476964, "grad_norm": 25.522798538208008, "learning_rate": 6.075129533678757e-06, "loss": 0.6424, "mean_token_accuracy": 0.9090236127376556, "num_tokens": 4344663.0, "step": 2425 }, { "epoch": 0.39284268480284995, "grad_norm": 17.07276153564453, "learning_rate": 6.0735103626943015e-06, "loss": 0.5262, "mean_token_accuracy": 0.9265037775039673, "num_tokens": 4346448.0, "step": 2426 }, { "epoch": 0.3930046150109303, "grad_norm": 13.80805492401123, "learning_rate": 6.071891191709845e-06, "loss": 0.4956, "mean_token_accuracy": 0.9304717183113098, "num_tokens": 4348248.0, "step": 2427 }, { "epoch": 0.3931665452190106, "grad_norm": 18.124977111816406, "learning_rate": 6.0702720207253896e-06, "loss": 0.6996, "mean_token_accuracy": 0.9284613132476807, "num_tokens": 4350039.0, "step": 2428 }, { "epoch": 0.39332847542709093, "grad_norm": 22.377042770385742, "learning_rate": 6.068652849740934e-06, "loss": 0.5824, "mean_token_accuracy": 0.9244702756404877, "num_tokens": 4351827.0, "step": 2429 }, { "epoch": 0.39349040563517124, "grad_norm": 18.47957420349121, "learning_rate": 6.067033678756478e-06, "loss": 0.5664, "mean_token_accuracy": 0.9189484119415283, "num_tokens": 4353623.0, "step": 2430 }, { "epoch": 0.39365233584325154, "grad_norm": 17.989002227783203, "learning_rate": 6.065414507772022e-06, "loss": 0.54, "mean_token_accuracy": 0.9215906858444214, "num_tokens": 4355415.0, "step": 2431 }, { "epoch": 0.39381426605133185, "grad_norm": 30.640718460083008, "learning_rate": 6.063795336787566e-06, "loss": 0.7541, "mean_token_accuracy": 0.8983498811721802, "num_tokens": 4357222.0, "step": 2432 }, { "epoch": 0.3939761962594122, "grad_norm": 21.26382064819336, "learning_rate": 6.06217616580311e-06, "loss": 0.5673, "mean_token_accuracy": 0.9168076515197754, "num_tokens": 4359022.0, "step": 2433 }, { "epoch": 0.3941381264674925, "grad_norm": 17.125774383544922, "learning_rate": 6.060556994818654e-06, "loss": 0.5826, "mean_token_accuracy": 0.9148924648761749, "num_tokens": 4360804.0, "step": 2434 }, { "epoch": 0.39430005667557283, "grad_norm": 19.612777709960938, "learning_rate": 6.058937823834198e-06, "loss": 0.6028, "mean_token_accuracy": 0.9131964445114136, "num_tokens": 4362592.0, "step": 2435 }, { "epoch": 0.39446198688365314, "grad_norm": 19.06717300415039, "learning_rate": 6.057318652849742e-06, "loss": 0.5443, "mean_token_accuracy": 0.9174337685108185, "num_tokens": 4364371.0, "step": 2436 }, { "epoch": 0.39462391709173344, "grad_norm": 17.325023651123047, "learning_rate": 6.055699481865286e-06, "loss": 0.6022, "mean_token_accuracy": 0.9102470278739929, "num_tokens": 4366150.0, "step": 2437 }, { "epoch": 0.3947858472998138, "grad_norm": 27.475378036499023, "learning_rate": 6.05408031088083e-06, "loss": 0.8074, "mean_token_accuracy": 0.897081196308136, "num_tokens": 4367943.0, "step": 2438 }, { "epoch": 0.3949477775078941, "grad_norm": 16.568401336669922, "learning_rate": 6.052461139896374e-06, "loss": 0.5083, "mean_token_accuracy": 0.9233269393444061, "num_tokens": 4369729.0, "step": 2439 }, { "epoch": 0.3951097077159744, "grad_norm": 24.641258239746094, "learning_rate": 6.050841968911918e-06, "loss": 0.63, "mean_token_accuracy": 0.9069159030914307, "num_tokens": 4371531.0, "step": 2440 }, { "epoch": 0.39527163792405473, "grad_norm": 21.62548065185547, "learning_rate": 6.049222797927462e-06, "loss": 0.6217, "mean_token_accuracy": 0.9185331463813782, "num_tokens": 4373326.0, "step": 2441 }, { "epoch": 0.39543356813213504, "grad_norm": 17.75751304626465, "learning_rate": 6.047603626943006e-06, "loss": 0.4965, "mean_token_accuracy": 0.9304511249065399, "num_tokens": 4375111.0, "step": 2442 }, { "epoch": 0.39559549834021535, "grad_norm": 23.21023178100586, "learning_rate": 6.04598445595855e-06, "loss": 0.6059, "mean_token_accuracy": 0.9090500473976135, "num_tokens": 4376898.0, "step": 2443 }, { "epoch": 0.3957574285482957, "grad_norm": 33.61475372314453, "learning_rate": 6.044365284974094e-06, "loss": 0.7537, "mean_token_accuracy": 0.9015286862850189, "num_tokens": 4378694.0, "step": 2444 }, { "epoch": 0.395919358756376, "grad_norm": 26.48158073425293, "learning_rate": 6.042746113989638e-06, "loss": 0.6273, "mean_token_accuracy": 0.9106450378894806, "num_tokens": 4380486.0, "step": 2445 }, { "epoch": 0.3960812889644563, "grad_norm": 19.514726638793945, "learning_rate": 6.041126943005182e-06, "loss": 0.565, "mean_token_accuracy": 0.9172885715961456, "num_tokens": 4382264.0, "step": 2446 }, { "epoch": 0.39624321917253663, "grad_norm": 22.42393684387207, "learning_rate": 6.039507772020726e-06, "loss": 0.6092, "mean_token_accuracy": 0.9151678681373596, "num_tokens": 4384059.0, "step": 2447 }, { "epoch": 0.39640514938061694, "grad_norm": 33.834651947021484, "learning_rate": 6.037888601036271e-06, "loss": 0.964, "mean_token_accuracy": 0.8863383233547211, "num_tokens": 4385852.0, "step": 2448 }, { "epoch": 0.39656707958869725, "grad_norm": 25.878007888793945, "learning_rate": 6.036269430051814e-06, "loss": 0.6084, "mean_token_accuracy": 0.9095016121864319, "num_tokens": 4387651.0, "step": 2449 }, { "epoch": 0.3967290097967776, "grad_norm": 33.61030197143555, "learning_rate": 6.034650259067359e-06, "loss": 0.8716, "mean_token_accuracy": 0.8814473152160645, "num_tokens": 4389450.0, "step": 2450 }, { "epoch": 0.3968909400048579, "grad_norm": 24.599184036254883, "learning_rate": 6.033031088082902e-06, "loss": 0.5835, "mean_token_accuracy": 0.9265101552009583, "num_tokens": 4391248.0, "step": 2451 }, { "epoch": 0.3970528702129382, "grad_norm": 36.088565826416016, "learning_rate": 6.031411917098447e-06, "loss": 0.6652, "mean_token_accuracy": 0.8985527157783508, "num_tokens": 4393056.0, "step": 2452 }, { "epoch": 0.39721480042101853, "grad_norm": 23.43155860900879, "learning_rate": 6.02979274611399e-06, "loss": 0.763, "mean_token_accuracy": 0.9103802740573883, "num_tokens": 4394847.0, "step": 2453 }, { "epoch": 0.39737673062909884, "grad_norm": 27.536840438842773, "learning_rate": 6.028173575129535e-06, "loss": 0.6048, "mean_token_accuracy": 0.9161564707756042, "num_tokens": 4396646.0, "step": 2454 }, { "epoch": 0.3975386608371792, "grad_norm": 27.58855438232422, "learning_rate": 6.026554404145078e-06, "loss": 0.7231, "mean_token_accuracy": 0.9101727306842804, "num_tokens": 4398437.0, "step": 2455 }, { "epoch": 0.3977005910452595, "grad_norm": 29.029890060424805, "learning_rate": 6.024935233160623e-06, "loss": 0.8758, "mean_token_accuracy": 0.9041210412979126, "num_tokens": 4400230.0, "step": 2456 }, { "epoch": 0.3978625212533398, "grad_norm": 29.050634384155273, "learning_rate": 6.023316062176166e-06, "loss": 0.7283, "mean_token_accuracy": 0.9099523723125458, "num_tokens": 4402020.0, "step": 2457 }, { "epoch": 0.3980244514614201, "grad_norm": 27.5640811920166, "learning_rate": 6.021696891191711e-06, "loss": 0.6544, "mean_token_accuracy": 0.9092592597007751, "num_tokens": 4403807.0, "step": 2458 }, { "epoch": 0.39818638166950043, "grad_norm": 15.775254249572754, "learning_rate": 6.020077720207254e-06, "loss": 0.5347, "mean_token_accuracy": 0.9245341718196869, "num_tokens": 4405597.0, "step": 2459 }, { "epoch": 0.39834831187758074, "grad_norm": 31.133451461791992, "learning_rate": 6.018458549222799e-06, "loss": 0.7803, "mean_token_accuracy": 0.8978846967220306, "num_tokens": 4407393.0, "step": 2460 }, { "epoch": 0.3985102420856611, "grad_norm": 23.698078155517578, "learning_rate": 6.016839378238342e-06, "loss": 0.6928, "mean_token_accuracy": 0.9162943363189697, "num_tokens": 4409192.0, "step": 2461 }, { "epoch": 0.3986721722937414, "grad_norm": 22.433738708496094, "learning_rate": 6.015220207253887e-06, "loss": 0.5842, "mean_token_accuracy": 0.9181863963603973, "num_tokens": 4410985.0, "step": 2462 }, { "epoch": 0.3988341025018217, "grad_norm": 27.3464412689209, "learning_rate": 6.0136010362694304e-06, "loss": 0.7313, "mean_token_accuracy": 0.8985449969768524, "num_tokens": 4412772.0, "step": 2463 }, { "epoch": 0.398996032709902, "grad_norm": 15.620551109313965, "learning_rate": 6.011981865284975e-06, "loss": 0.5383, "mean_token_accuracy": 0.9259096682071686, "num_tokens": 4414554.0, "step": 2464 }, { "epoch": 0.39915796291798233, "grad_norm": 30.190011978149414, "learning_rate": 6.0103626943005185e-06, "loss": 0.7134, "mean_token_accuracy": 0.9055112302303314, "num_tokens": 4416351.0, "step": 2465 }, { "epoch": 0.39931989312606264, "grad_norm": 21.78976821899414, "learning_rate": 6.008743523316063e-06, "loss": 0.638, "mean_token_accuracy": 0.9120462834835052, "num_tokens": 4418135.0, "step": 2466 }, { "epoch": 0.399481823334143, "grad_norm": 23.980592727661133, "learning_rate": 6.007124352331607e-06, "loss": 0.6598, "mean_token_accuracy": 0.9081102907657623, "num_tokens": 4419919.0, "step": 2467 }, { "epoch": 0.3996437535422233, "grad_norm": 29.508264541625977, "learning_rate": 6.005505181347151e-06, "loss": 0.6362, "mean_token_accuracy": 0.9135153293609619, "num_tokens": 4421719.0, "step": 2468 }, { "epoch": 0.3998056837503036, "grad_norm": 20.48666763305664, "learning_rate": 6.003886010362695e-06, "loss": 0.6011, "mean_token_accuracy": 0.9116883277893066, "num_tokens": 4423503.0, "step": 2469 }, { "epoch": 0.3999676139583839, "grad_norm": 22.19963836669922, "learning_rate": 6.002266839378239e-06, "loss": 0.5748, "mean_token_accuracy": 0.9145896732807159, "num_tokens": 4425296.0, "step": 2470 }, { "epoch": 0.40012954416646423, "grad_norm": 18.056947708129883, "learning_rate": 6.000647668393783e-06, "loss": 0.5402, "mean_token_accuracy": 0.9283071458339691, "num_tokens": 4427087.0, "step": 2471 }, { "epoch": 0.4002914743745446, "grad_norm": 24.245954513549805, "learning_rate": 5.999028497409327e-06, "loss": 0.6585, "mean_token_accuracy": 0.9065713882446289, "num_tokens": 4428878.0, "step": 2472 }, { "epoch": 0.4004534045826249, "grad_norm": 30.069059371948242, "learning_rate": 5.997409326424871e-06, "loss": 0.7756, "mean_token_accuracy": 0.9020979106426239, "num_tokens": 4430676.0, "step": 2473 }, { "epoch": 0.4006153347907052, "grad_norm": 16.61443328857422, "learning_rate": 5.995790155440415e-06, "loss": 0.5294, "mean_token_accuracy": 0.929848313331604, "num_tokens": 4432473.0, "step": 2474 }, { "epoch": 0.4007772649987855, "grad_norm": 23.960824966430664, "learning_rate": 5.9941709844559594e-06, "loss": 0.7774, "mean_token_accuracy": 0.9051418602466583, "num_tokens": 4434270.0, "step": 2475 }, { "epoch": 0.40093919520686583, "grad_norm": 23.24922752380371, "learning_rate": 5.992551813471503e-06, "loss": 0.7104, "mean_token_accuracy": 0.9059259295463562, "num_tokens": 4436067.0, "step": 2476 }, { "epoch": 0.40110112541494614, "grad_norm": 25.58210563659668, "learning_rate": 5.9909326424870475e-06, "loss": 0.7281, "mean_token_accuracy": 0.9025649130344391, "num_tokens": 4437856.0, "step": 2477 }, { "epoch": 0.4012630556230265, "grad_norm": 27.10450553894043, "learning_rate": 5.989313471502591e-06, "loss": 0.721, "mean_token_accuracy": 0.9046140313148499, "num_tokens": 4439653.0, "step": 2478 }, { "epoch": 0.4014249858311068, "grad_norm": 23.10318374633789, "learning_rate": 5.9876943005181355e-06, "loss": 0.6368, "mean_token_accuracy": 0.9073809385299683, "num_tokens": 4441455.0, "step": 2479 }, { "epoch": 0.4015869160391871, "grad_norm": 22.148962020874023, "learning_rate": 5.986075129533679e-06, "loss": 0.6759, "mean_token_accuracy": 0.9035920202732086, "num_tokens": 4443247.0, "step": 2480 }, { "epoch": 0.4017488462472674, "grad_norm": 22.3599796295166, "learning_rate": 5.9844559585492235e-06, "loss": 0.6438, "mean_token_accuracy": 0.9138582646846771, "num_tokens": 4445037.0, "step": 2481 }, { "epoch": 0.40191077645534773, "grad_norm": 25.71211814880371, "learning_rate": 5.982836787564767e-06, "loss": 0.7741, "mean_token_accuracy": 0.9142748713493347, "num_tokens": 4446830.0, "step": 2482 }, { "epoch": 0.40207270666342804, "grad_norm": 16.857341766357422, "learning_rate": 5.9812176165803116e-06, "loss": 0.6919, "mean_token_accuracy": 0.9236375987529755, "num_tokens": 4448617.0, "step": 2483 }, { "epoch": 0.4022346368715084, "grad_norm": 25.236000061035156, "learning_rate": 5.979598445595855e-06, "loss": 0.7004, "mean_token_accuracy": 0.9066280722618103, "num_tokens": 4450406.0, "step": 2484 }, { "epoch": 0.4023965670795887, "grad_norm": 22.98699378967285, "learning_rate": 5.9779792746114e-06, "loss": 0.5689, "mean_token_accuracy": 0.926809549331665, "num_tokens": 4452204.0, "step": 2485 }, { "epoch": 0.402558497287669, "grad_norm": 20.611967086791992, "learning_rate": 5.976360103626944e-06, "loss": 0.58, "mean_token_accuracy": 0.9121921360492706, "num_tokens": 4454001.0, "step": 2486 }, { "epoch": 0.4027204274957493, "grad_norm": 25.8209228515625, "learning_rate": 5.974740932642488e-06, "loss": 0.662, "mean_token_accuracy": 0.9118140041828156, "num_tokens": 4455785.0, "step": 2487 }, { "epoch": 0.40288235770382963, "grad_norm": 19.91486167907715, "learning_rate": 5.973121761658032e-06, "loss": 0.5329, "mean_token_accuracy": 0.9110361337661743, "num_tokens": 4457576.0, "step": 2488 }, { "epoch": 0.40304428791191, "grad_norm": 19.50437355041504, "learning_rate": 5.971502590673576e-06, "loss": 0.622, "mean_token_accuracy": 0.9277893602848053, "num_tokens": 4459365.0, "step": 2489 }, { "epoch": 0.4032062181199903, "grad_norm": 22.586467742919922, "learning_rate": 5.96988341968912e-06, "loss": 0.6572, "mean_token_accuracy": 0.923235297203064, "num_tokens": 4461163.0, "step": 2490 }, { "epoch": 0.4033681483280706, "grad_norm": 22.85340690612793, "learning_rate": 5.968264248704664e-06, "loss": 0.6753, "mean_token_accuracy": 0.9136938452720642, "num_tokens": 4462954.0, "step": 2491 }, { "epoch": 0.4035300785361509, "grad_norm": 27.774314880371094, "learning_rate": 5.966645077720208e-06, "loss": 0.7819, "mean_token_accuracy": 0.8940350711345673, "num_tokens": 4464768.0, "step": 2492 }, { "epoch": 0.4036920087442312, "grad_norm": 23.874713897705078, "learning_rate": 5.965025906735752e-06, "loss": 0.6301, "mean_token_accuracy": 0.9095029234886169, "num_tokens": 4466567.0, "step": 2493 }, { "epoch": 0.40385393895231153, "grad_norm": 24.43355941772461, "learning_rate": 5.963406735751296e-06, "loss": 0.8181, "mean_token_accuracy": 0.9071428775787354, "num_tokens": 4468359.0, "step": 2494 }, { "epoch": 0.4040158691603919, "grad_norm": 25.318588256835938, "learning_rate": 5.96178756476684e-06, "loss": 0.8076, "mean_token_accuracy": 0.89723339676857, "num_tokens": 4470153.0, "step": 2495 }, { "epoch": 0.4041777993684722, "grad_norm": 28.589645385742188, "learning_rate": 5.960168393782384e-06, "loss": 0.8128, "mean_token_accuracy": 0.9009498357772827, "num_tokens": 4471957.0, "step": 2496 }, { "epoch": 0.4043397295765525, "grad_norm": 25.1281681060791, "learning_rate": 5.958549222797928e-06, "loss": 0.6572, "mean_token_accuracy": 0.9164653122425079, "num_tokens": 4473768.0, "step": 2497 }, { "epoch": 0.4045016597846328, "grad_norm": 21.27931785583496, "learning_rate": 5.956930051813472e-06, "loss": 0.6308, "mean_token_accuracy": 0.9097070395946503, "num_tokens": 4475557.0, "step": 2498 }, { "epoch": 0.4046635899927131, "grad_norm": 17.604877471923828, "learning_rate": 5.955310880829016e-06, "loss": 0.6041, "mean_token_accuracy": 0.9176002144813538, "num_tokens": 4477348.0, "step": 2499 }, { "epoch": 0.40482552020079343, "grad_norm": 21.642637252807617, "learning_rate": 5.95369170984456e-06, "loss": 0.6268, "mean_token_accuracy": 0.9219181835651398, "num_tokens": 4479129.0, "step": 2500 }, { "epoch": 0.4049874504088738, "grad_norm": 26.70907211303711, "learning_rate": 5.952072538860104e-06, "loss": 0.8711, "mean_token_accuracy": 0.8918569087982178, "num_tokens": 4480919.0, "step": 2501 }, { "epoch": 0.4051493806169541, "grad_norm": 24.135648727416992, "learning_rate": 5.950453367875648e-06, "loss": 0.6191, "mean_token_accuracy": 0.9158806204795837, "num_tokens": 4482705.0, "step": 2502 }, { "epoch": 0.4053113108250344, "grad_norm": 21.401960372924805, "learning_rate": 5.948834196891193e-06, "loss": 0.6786, "mean_token_accuracy": 0.9153079688549042, "num_tokens": 4484499.0, "step": 2503 }, { "epoch": 0.4054732410331147, "grad_norm": 17.22169303894043, "learning_rate": 5.947215025906736e-06, "loss": 0.5788, "mean_token_accuracy": 0.9126032292842865, "num_tokens": 4486285.0, "step": 2504 }, { "epoch": 0.405635171241195, "grad_norm": 22.238935470581055, "learning_rate": 5.945595854922281e-06, "loss": 0.5978, "mean_token_accuracy": 0.9169968664646149, "num_tokens": 4488074.0, "step": 2505 }, { "epoch": 0.4057971014492754, "grad_norm": 20.01361656188965, "learning_rate": 5.943976683937824e-06, "loss": 0.5587, "mean_token_accuracy": 0.9195588231086731, "num_tokens": 4489872.0, "step": 2506 }, { "epoch": 0.4059590316573557, "grad_norm": 21.853452682495117, "learning_rate": 5.942357512953369e-06, "loss": 0.5928, "mean_token_accuracy": 0.9114184975624084, "num_tokens": 4491666.0, "step": 2507 }, { "epoch": 0.406120961865436, "grad_norm": 14.27613353729248, "learning_rate": 5.940738341968912e-06, "loss": 0.5134, "mean_token_accuracy": 0.9321029782295227, "num_tokens": 4493443.0, "step": 2508 }, { "epoch": 0.4062828920735163, "grad_norm": 20.791099548339844, "learning_rate": 5.939119170984457e-06, "loss": 0.6977, "mean_token_accuracy": 0.9173325896263123, "num_tokens": 4495245.0, "step": 2509 }, { "epoch": 0.4064448222815966, "grad_norm": 17.984691619873047, "learning_rate": 5.9375e-06, "loss": 0.5653, "mean_token_accuracy": 0.9258756041526794, "num_tokens": 4497039.0, "step": 2510 }, { "epoch": 0.4066067524896769, "grad_norm": 26.65800666809082, "learning_rate": 5.935880829015545e-06, "loss": 0.6621, "mean_token_accuracy": 0.9114285707473755, "num_tokens": 4498841.0, "step": 2511 }, { "epoch": 0.4067686826977573, "grad_norm": 24.68552017211914, "learning_rate": 5.934261658031088e-06, "loss": 0.7004, "mean_token_accuracy": 0.9046815931797028, "num_tokens": 4500635.0, "step": 2512 }, { "epoch": 0.4069306129058376, "grad_norm": 19.272958755493164, "learning_rate": 5.932642487046633e-06, "loss": 0.5992, "mean_token_accuracy": 0.9239901900291443, "num_tokens": 4502424.0, "step": 2513 }, { "epoch": 0.4070925431139179, "grad_norm": 16.699970245361328, "learning_rate": 5.931023316062176e-06, "loss": 0.5159, "mean_token_accuracy": 0.9236669540405273, "num_tokens": 4504211.0, "step": 2514 }, { "epoch": 0.4072544733219982, "grad_norm": 27.650733947753906, "learning_rate": 5.929404145077721e-06, "loss": 0.7357, "mean_token_accuracy": 0.896987795829773, "num_tokens": 4506014.0, "step": 2515 }, { "epoch": 0.4074164035300785, "grad_norm": 25.33139991760254, "learning_rate": 5.927784974093264e-06, "loss": 0.6255, "mean_token_accuracy": 0.9077353179454803, "num_tokens": 4507819.0, "step": 2516 }, { "epoch": 0.4075783337381588, "grad_norm": 20.26680564880371, "learning_rate": 5.926165803108809e-06, "loss": 0.5968, "mean_token_accuracy": 0.9172661602497101, "num_tokens": 4509609.0, "step": 2517 }, { "epoch": 0.4077402639462392, "grad_norm": 23.118213653564453, "learning_rate": 5.9245466321243524e-06, "loss": 0.6447, "mean_token_accuracy": 0.9091197550296783, "num_tokens": 4511396.0, "step": 2518 }, { "epoch": 0.4079021941543195, "grad_norm": 24.033912658691406, "learning_rate": 5.922927461139897e-06, "loss": 0.6026, "mean_token_accuracy": 0.9103405475616455, "num_tokens": 4513198.0, "step": 2519 }, { "epoch": 0.4080641243623998, "grad_norm": 26.5091495513916, "learning_rate": 5.9213082901554405e-06, "loss": 0.7549, "mean_token_accuracy": 0.9003831446170807, "num_tokens": 4514990.0, "step": 2520 }, { "epoch": 0.4082260545704801, "grad_norm": 25.187744140625, "learning_rate": 5.919689119170985e-06, "loss": 0.5909, "mean_token_accuracy": 0.9122793972492218, "num_tokens": 4516786.0, "step": 2521 }, { "epoch": 0.4083879847785604, "grad_norm": 18.701908111572266, "learning_rate": 5.918069948186529e-06, "loss": 0.5936, "mean_token_accuracy": 0.9224390387535095, "num_tokens": 4518582.0, "step": 2522 }, { "epoch": 0.4085499149866408, "grad_norm": 29.930334091186523, "learning_rate": 5.916450777202073e-06, "loss": 0.8413, "mean_token_accuracy": 0.9078470766544342, "num_tokens": 4520376.0, "step": 2523 }, { "epoch": 0.4087118451947211, "grad_norm": 28.683332443237305, "learning_rate": 5.914831606217617e-06, "loss": 0.8563, "mean_token_accuracy": 0.8981540501117706, "num_tokens": 4522163.0, "step": 2524 }, { "epoch": 0.4088737754028014, "grad_norm": 22.226905822753906, "learning_rate": 5.913212435233161e-06, "loss": 0.5779, "mean_token_accuracy": 0.9238445162773132, "num_tokens": 4523951.0, "step": 2525 }, { "epoch": 0.4090357056108817, "grad_norm": 21.099422454833984, "learning_rate": 5.911593264248705e-06, "loss": 0.6355, "mean_token_accuracy": 0.9116721749305725, "num_tokens": 4525746.0, "step": 2526 }, { "epoch": 0.409197635818962, "grad_norm": 26.98863983154297, "learning_rate": 5.909974093264249e-06, "loss": 0.8536, "mean_token_accuracy": 0.9043259620666504, "num_tokens": 4527540.0, "step": 2527 }, { "epoch": 0.4093595660270423, "grad_norm": 21.997154235839844, "learning_rate": 5.908354922279793e-06, "loss": 0.5245, "mean_token_accuracy": 0.9259183704853058, "num_tokens": 4529349.0, "step": 2528 }, { "epoch": 0.4095214962351227, "grad_norm": 19.95395851135254, "learning_rate": 5.906735751295337e-06, "loss": 0.573, "mean_token_accuracy": 0.91847363114357, "num_tokens": 4531143.0, "step": 2529 }, { "epoch": 0.409683426443203, "grad_norm": 31.12894630432129, "learning_rate": 5.9051165803108814e-06, "loss": 0.7347, "mean_token_accuracy": 0.9041050970554352, "num_tokens": 4532947.0, "step": 2530 }, { "epoch": 0.4098453566512833, "grad_norm": 20.292251586914062, "learning_rate": 5.903497409326425e-06, "loss": 0.5308, "mean_token_accuracy": 0.9309569299221039, "num_tokens": 4534734.0, "step": 2531 }, { "epoch": 0.4100072868593636, "grad_norm": 19.76032257080078, "learning_rate": 5.9018782383419695e-06, "loss": 0.5684, "mean_token_accuracy": 0.9219315946102142, "num_tokens": 4536528.0, "step": 2532 }, { "epoch": 0.4101692170674439, "grad_norm": 22.101547241210938, "learning_rate": 5.900259067357513e-06, "loss": 0.6854, "mean_token_accuracy": 0.9174394011497498, "num_tokens": 4538316.0, "step": 2533 }, { "epoch": 0.4103311472755242, "grad_norm": 22.76506805419922, "learning_rate": 5.8986398963730575e-06, "loss": 0.6059, "mean_token_accuracy": 0.9249706864356995, "num_tokens": 4540108.0, "step": 2534 }, { "epoch": 0.4104930774836046, "grad_norm": 22.418729782104492, "learning_rate": 5.897020725388601e-06, "loss": 0.6385, "mean_token_accuracy": 0.9114282727241516, "num_tokens": 4541891.0, "step": 2535 }, { "epoch": 0.4106550076916849, "grad_norm": 28.73288345336914, "learning_rate": 5.8954015544041455e-06, "loss": 0.7069, "mean_token_accuracy": 0.9032674133777618, "num_tokens": 4543690.0, "step": 2536 }, { "epoch": 0.4108169378997652, "grad_norm": 31.85521125793457, "learning_rate": 5.893782383419689e-06, "loss": 0.7699, "mean_token_accuracy": 0.9028123617172241, "num_tokens": 4545480.0, "step": 2537 }, { "epoch": 0.4109788681078455, "grad_norm": 24.487714767456055, "learning_rate": 5.8921632124352335e-06, "loss": 0.5962, "mean_token_accuracy": 0.9132784605026245, "num_tokens": 4547271.0, "step": 2538 }, { "epoch": 0.4111407983159258, "grad_norm": 27.29911994934082, "learning_rate": 5.890544041450777e-06, "loss": 0.6679, "mean_token_accuracy": 0.9045549929141998, "num_tokens": 4549066.0, "step": 2539 }, { "epoch": 0.4113027285240062, "grad_norm": 20.49338150024414, "learning_rate": 5.8889248704663216e-06, "loss": 0.5816, "mean_token_accuracy": 0.9243475496768951, "num_tokens": 4550856.0, "step": 2540 }, { "epoch": 0.4114646587320865, "grad_norm": 16.157089233398438, "learning_rate": 5.887305699481866e-06, "loss": 0.492, "mean_token_accuracy": 0.9228169620037079, "num_tokens": 4552640.0, "step": 2541 }, { "epoch": 0.4116265889401668, "grad_norm": 27.82122039794922, "learning_rate": 5.88568652849741e-06, "loss": 0.7263, "mean_token_accuracy": 0.8979591727256775, "num_tokens": 4554446.0, "step": 2542 }, { "epoch": 0.4117885191482471, "grad_norm": 29.573129653930664, "learning_rate": 5.884067357512954e-06, "loss": 0.7553, "mean_token_accuracy": 0.895780086517334, "num_tokens": 4556246.0, "step": 2543 }, { "epoch": 0.4119504493563274, "grad_norm": 21.327186584472656, "learning_rate": 5.882448186528498e-06, "loss": 0.5487, "mean_token_accuracy": 0.9154887795448303, "num_tokens": 4558042.0, "step": 2544 }, { "epoch": 0.4121123795644077, "grad_norm": 24.890348434448242, "learning_rate": 5.880829015544042e-06, "loss": 0.726, "mean_token_accuracy": 0.8984703123569489, "num_tokens": 4559832.0, "step": 2545 }, { "epoch": 0.4122743097724881, "grad_norm": 20.519004821777344, "learning_rate": 5.879209844559586e-06, "loss": 0.544, "mean_token_accuracy": 0.919334203004837, "num_tokens": 4561629.0, "step": 2546 }, { "epoch": 0.4124362399805684, "grad_norm": 20.222881317138672, "learning_rate": 5.87759067357513e-06, "loss": 0.5477, "mean_token_accuracy": 0.9250216782093048, "num_tokens": 4563421.0, "step": 2547 }, { "epoch": 0.4125981701886487, "grad_norm": 34.028263092041016, "learning_rate": 5.875971502590674e-06, "loss": 0.6945, "mean_token_accuracy": 0.9096132516860962, "num_tokens": 4565221.0, "step": 2548 }, { "epoch": 0.412760100396729, "grad_norm": 18.328582763671875, "learning_rate": 5.874352331606218e-06, "loss": 0.5435, "mean_token_accuracy": 0.9250421226024628, "num_tokens": 4567013.0, "step": 2549 }, { "epoch": 0.4129220306048093, "grad_norm": 23.221229553222656, "learning_rate": 5.872733160621762e-06, "loss": 0.5967, "mean_token_accuracy": 0.9072261154651642, "num_tokens": 4568803.0, "step": 2550 }, { "epoch": 0.4130839608128897, "grad_norm": 22.880868911743164, "learning_rate": 5.871113989637306e-06, "loss": 0.594, "mean_token_accuracy": 0.920152485370636, "num_tokens": 4570590.0, "step": 2551 }, { "epoch": 0.41324589102097, "grad_norm": 18.815189361572266, "learning_rate": 5.86949481865285e-06, "loss": 0.6619, "mean_token_accuracy": 0.922222226858139, "num_tokens": 4572372.0, "step": 2552 }, { "epoch": 0.4134078212290503, "grad_norm": 31.15639305114746, "learning_rate": 5.867875647668394e-06, "loss": 0.8807, "mean_token_accuracy": 0.8942348957061768, "num_tokens": 4574158.0, "step": 2553 }, { "epoch": 0.4135697514371306, "grad_norm": 16.219648361206055, "learning_rate": 5.866256476683938e-06, "loss": 0.4892, "mean_token_accuracy": 0.9253065884113312, "num_tokens": 4575951.0, "step": 2554 }, { "epoch": 0.4137316816452109, "grad_norm": 25.33356285095215, "learning_rate": 5.864637305699482e-06, "loss": 0.6299, "mean_token_accuracy": 0.917548805475235, "num_tokens": 4577742.0, "step": 2555 }, { "epoch": 0.4138936118532912, "grad_norm": 22.65717887878418, "learning_rate": 5.863018134715026e-06, "loss": 0.6747, "mean_token_accuracy": 0.9111111164093018, "num_tokens": 4579524.0, "step": 2556 }, { "epoch": 0.4140555420613716, "grad_norm": 22.198528289794922, "learning_rate": 5.86139896373057e-06, "loss": 0.6564, "mean_token_accuracy": 0.911347508430481, "num_tokens": 4581318.0, "step": 2557 }, { "epoch": 0.4142174722694519, "grad_norm": 26.533159255981445, "learning_rate": 5.859779792746114e-06, "loss": 0.6745, "mean_token_accuracy": 0.9003607630729675, "num_tokens": 4583119.0, "step": 2558 }, { "epoch": 0.4143794024775322, "grad_norm": 24.138341903686523, "learning_rate": 5.858160621761658e-06, "loss": 0.6372, "mean_token_accuracy": 0.9162554144859314, "num_tokens": 4584916.0, "step": 2559 }, { "epoch": 0.4145413326856125, "grad_norm": 25.73813819885254, "learning_rate": 5.856541450777203e-06, "loss": 0.632, "mean_token_accuracy": 0.9055944085121155, "num_tokens": 4586714.0, "step": 2560 }, { "epoch": 0.4147032628936928, "grad_norm": 37.1443977355957, "learning_rate": 5.854922279792746e-06, "loss": 1.1386, "mean_token_accuracy": 0.8717955052852631, "num_tokens": 4588507.0, "step": 2561 }, { "epoch": 0.4148651931017731, "grad_norm": 25.781566619873047, "learning_rate": 5.853303108808291e-06, "loss": 0.6735, "mean_token_accuracy": 0.9101727306842804, "num_tokens": 4590298.0, "step": 2562 }, { "epoch": 0.4150271233098535, "grad_norm": 17.320585250854492, "learning_rate": 5.851683937823834e-06, "loss": 0.5214, "mean_token_accuracy": 0.9257739782333374, "num_tokens": 4592092.0, "step": 2563 }, { "epoch": 0.4151890535179338, "grad_norm": 25.535747528076172, "learning_rate": 5.850064766839379e-06, "loss": 0.6595, "mean_token_accuracy": 0.9130645990371704, "num_tokens": 4593891.0, "step": 2564 }, { "epoch": 0.4153509837260141, "grad_norm": 24.325716018676758, "learning_rate": 5.848445595854922e-06, "loss": 0.6276, "mean_token_accuracy": 0.9164724051952362, "num_tokens": 4595690.0, "step": 2565 }, { "epoch": 0.4155129139340944, "grad_norm": 27.941818237304688, "learning_rate": 5.846826424870467e-06, "loss": 0.7436, "mean_token_accuracy": 0.8991561830043793, "num_tokens": 4597480.0, "step": 2566 }, { "epoch": 0.4156748441421747, "grad_norm": 22.496826171875, "learning_rate": 5.84520725388601e-06, "loss": 0.5944, "mean_token_accuracy": 0.9260774850845337, "num_tokens": 4599276.0, "step": 2567 }, { "epoch": 0.41583677435025507, "grad_norm": 27.053525924682617, "learning_rate": 5.843588082901555e-06, "loss": 0.7122, "mean_token_accuracy": 0.9051958322525024, "num_tokens": 4601062.0, "step": 2568 }, { "epoch": 0.4159987045583354, "grad_norm": 22.22924041748047, "learning_rate": 5.841968911917098e-06, "loss": 0.6819, "mean_token_accuracy": 0.9153555035591125, "num_tokens": 4602846.0, "step": 2569 }, { "epoch": 0.4161606347664157, "grad_norm": 33.67448425292969, "learning_rate": 5.840349740932643e-06, "loss": 0.7568, "mean_token_accuracy": 0.8908644616603851, "num_tokens": 4604642.0, "step": 2570 }, { "epoch": 0.416322564974496, "grad_norm": 24.905147552490234, "learning_rate": 5.838730569948186e-06, "loss": 0.6279, "mean_token_accuracy": 0.9090061187744141, "num_tokens": 4606428.0, "step": 2571 }, { "epoch": 0.4164844951825763, "grad_norm": 24.318157196044922, "learning_rate": 5.837111398963731e-06, "loss": 0.6429, "mean_token_accuracy": 0.9092437028884888, "num_tokens": 4608216.0, "step": 2572 }, { "epoch": 0.4166464253906566, "grad_norm": 24.842966079711914, "learning_rate": 5.835492227979274e-06, "loss": 0.5731, "mean_token_accuracy": 0.9167156219482422, "num_tokens": 4610014.0, "step": 2573 }, { "epoch": 0.41680835559873697, "grad_norm": 25.95463752746582, "learning_rate": 5.833873056994819e-06, "loss": 0.905, "mean_token_accuracy": 0.8942229747772217, "num_tokens": 4611798.0, "step": 2574 }, { "epoch": 0.4169702858068173, "grad_norm": 23.649118423461914, "learning_rate": 5.8322538860103624e-06, "loss": 0.5664, "mean_token_accuracy": 0.9157062470912933, "num_tokens": 4613595.0, "step": 2575 }, { "epoch": 0.4171322160148976, "grad_norm": 30.974651336669922, "learning_rate": 5.830634715025907e-06, "loss": 0.7434, "mean_token_accuracy": 0.8999438583850861, "num_tokens": 4615387.0, "step": 2576 }, { "epoch": 0.4172941462229779, "grad_norm": 19.067163467407227, "learning_rate": 5.8290155440414505e-06, "loss": 0.5929, "mean_token_accuracy": 0.920550525188446, "num_tokens": 4617176.0, "step": 2577 }, { "epoch": 0.4174560764310582, "grad_norm": 26.5705623626709, "learning_rate": 5.827396373056995e-06, "loss": 0.6725, "mean_token_accuracy": 0.901753157377243, "num_tokens": 4618973.0, "step": 2578 }, { "epoch": 0.4176180066391385, "grad_norm": 30.52427101135254, "learning_rate": 5.825777202072539e-06, "loss": 0.7626, "mean_token_accuracy": 0.906272828578949, "num_tokens": 4620762.0, "step": 2579 }, { "epoch": 0.41777993684721887, "grad_norm": 31.000513076782227, "learning_rate": 5.824158031088083e-06, "loss": 0.7346, "mean_token_accuracy": 0.9050009846687317, "num_tokens": 4622557.0, "step": 2580 }, { "epoch": 0.4179418670552992, "grad_norm": 30.3168888092041, "learning_rate": 5.822538860103627e-06, "loss": 0.7187, "mean_token_accuracy": 0.9071656465530396, "num_tokens": 4624359.0, "step": 2581 }, { "epoch": 0.4181037972633795, "grad_norm": 27.74933433532715, "learning_rate": 5.820919689119171e-06, "loss": 0.7209, "mean_token_accuracy": 0.9094203114509583, "num_tokens": 4626147.0, "step": 2582 }, { "epoch": 0.4182657274714598, "grad_norm": 26.21828269958496, "learning_rate": 5.819300518134715e-06, "loss": 0.6781, "mean_token_accuracy": 0.9160181879997253, "num_tokens": 4627945.0, "step": 2583 }, { "epoch": 0.4184276576795401, "grad_norm": 27.86294937133789, "learning_rate": 5.817681347150259e-06, "loss": 0.6502, "mean_token_accuracy": 0.9113768041133881, "num_tokens": 4629739.0, "step": 2584 }, { "epoch": 0.41858958788762046, "grad_norm": 24.900253295898438, "learning_rate": 5.8160621761658034e-06, "loss": 0.6338, "mean_token_accuracy": 0.9100041687488556, "num_tokens": 4631528.0, "step": 2585 }, { "epoch": 0.41875151809570077, "grad_norm": 25.233060836791992, "learning_rate": 5.814443005181347e-06, "loss": 0.6313, "mean_token_accuracy": 0.9164092838764191, "num_tokens": 4633328.0, "step": 2586 }, { "epoch": 0.4189134483037811, "grad_norm": 19.95783042907715, "learning_rate": 5.8128238341968915e-06, "loss": 0.6106, "mean_token_accuracy": 0.9197080135345459, "num_tokens": 4635114.0, "step": 2587 }, { "epoch": 0.4190753785118614, "grad_norm": 21.245548248291016, "learning_rate": 5.811204663212435e-06, "loss": 0.5853, "mean_token_accuracy": 0.916770339012146, "num_tokens": 4636904.0, "step": 2588 }, { "epoch": 0.4192373087199417, "grad_norm": 21.612083435058594, "learning_rate": 5.8095854922279795e-06, "loss": 0.6156, "mean_token_accuracy": 0.9161564707756042, "num_tokens": 4638703.0, "step": 2589 }, { "epoch": 0.419399238928022, "grad_norm": 19.873619079589844, "learning_rate": 5.807966321243523e-06, "loss": 0.4921, "mean_token_accuracy": 0.9268577098846436, "num_tokens": 4640502.0, "step": 2590 }, { "epoch": 0.41956116913610236, "grad_norm": 29.5637149810791, "learning_rate": 5.8063471502590675e-06, "loss": 0.7449, "mean_token_accuracy": 0.8982490599155426, "num_tokens": 4642289.0, "step": 2591 }, { "epoch": 0.41972309934418267, "grad_norm": 27.144763946533203, "learning_rate": 5.804727979274611e-06, "loss": 0.6497, "mean_token_accuracy": 0.908977746963501, "num_tokens": 4644085.0, "step": 2592 }, { "epoch": 0.419885029552263, "grad_norm": 16.945966720581055, "learning_rate": 5.8031088082901555e-06, "loss": 0.5778, "mean_token_accuracy": 0.9172511100769043, "num_tokens": 4645863.0, "step": 2593 }, { "epoch": 0.4200469597603433, "grad_norm": 28.04581069946289, "learning_rate": 5.801489637305699e-06, "loss": 0.7202, "mean_token_accuracy": 0.9052250683307648, "num_tokens": 4647660.0, "step": 2594 }, { "epoch": 0.4202088899684236, "grad_norm": 22.998987197875977, "learning_rate": 5.7998704663212436e-06, "loss": 0.6566, "mean_token_accuracy": 0.9103288650512695, "num_tokens": 4649451.0, "step": 2595 }, { "epoch": 0.4203708201765039, "grad_norm": 24.98158836364746, "learning_rate": 5.798251295336787e-06, "loss": 0.6474, "mean_token_accuracy": 0.9150382578372955, "num_tokens": 4651245.0, "step": 2596 }, { "epoch": 0.42053275038458426, "grad_norm": 23.99442481994629, "learning_rate": 5.796632124352332e-06, "loss": 0.6018, "mean_token_accuracy": 0.9081889390945435, "num_tokens": 4653039.0, "step": 2597 }, { "epoch": 0.42069468059266457, "grad_norm": 22.756681442260742, "learning_rate": 5.795012953367876e-06, "loss": 0.62, "mean_token_accuracy": 0.9184397161006927, "num_tokens": 4654833.0, "step": 2598 }, { "epoch": 0.4208566108007449, "grad_norm": 32.483375549316406, "learning_rate": 5.79339378238342e-06, "loss": 0.8112, "mean_token_accuracy": 0.9015027582645416, "num_tokens": 4656629.0, "step": 2599 }, { "epoch": 0.4210185410088252, "grad_norm": 22.955522537231445, "learning_rate": 5.791774611398964e-06, "loss": 0.6299, "mean_token_accuracy": 0.9139266312122345, "num_tokens": 4658408.0, "step": 2600 }, { "epoch": 0.4211804712169055, "grad_norm": 28.10015106201172, "learning_rate": 5.790155440414508e-06, "loss": 0.7678, "mean_token_accuracy": 0.9017808437347412, "num_tokens": 4660216.0, "step": 2601 }, { "epoch": 0.42134240142498586, "grad_norm": 29.612756729125977, "learning_rate": 5.788536269430052e-06, "loss": 0.6319, "mean_token_accuracy": 0.9055380523204803, "num_tokens": 4662024.0, "step": 2602 }, { "epoch": 0.42150433163306616, "grad_norm": 27.647302627563477, "learning_rate": 5.786917098445596e-06, "loss": 0.6712, "mean_token_accuracy": 0.901695728302002, "num_tokens": 4663810.0, "step": 2603 }, { "epoch": 0.4216662618411465, "grad_norm": 20.27496910095215, "learning_rate": 5.78529792746114e-06, "loss": 0.6259, "mean_token_accuracy": 0.9195804297924042, "num_tokens": 4665608.0, "step": 2604 }, { "epoch": 0.4218281920492268, "grad_norm": 23.218393325805664, "learning_rate": 5.783678756476684e-06, "loss": 0.625, "mean_token_accuracy": 0.9081760048866272, "num_tokens": 4667403.0, "step": 2605 }, { "epoch": 0.4219901222573071, "grad_norm": 25.345605850219727, "learning_rate": 5.782059585492228e-06, "loss": 0.7572, "mean_token_accuracy": 0.8941427767276764, "num_tokens": 4669196.0, "step": 2606 }, { "epoch": 0.4221520524653874, "grad_norm": 20.053924560546875, "learning_rate": 5.780440414507773e-06, "loss": 0.5484, "mean_token_accuracy": 0.9227321743965149, "num_tokens": 4670993.0, "step": 2607 }, { "epoch": 0.42231398267346776, "grad_norm": 26.87257194519043, "learning_rate": 5.778821243523317e-06, "loss": 0.7186, "mean_token_accuracy": 0.8981329500675201, "num_tokens": 4672789.0, "step": 2608 }, { "epoch": 0.42247591288154807, "grad_norm": 12.919868469238281, "learning_rate": 5.7772020725388614e-06, "loss": 0.5004, "mean_token_accuracy": 0.9254679083824158, "num_tokens": 4674569.0, "step": 2609 }, { "epoch": 0.4226378430896284, "grad_norm": 23.54468536376953, "learning_rate": 5.775582901554405e-06, "loss": 0.6156, "mean_token_accuracy": 0.9104477763175964, "num_tokens": 4676349.0, "step": 2610 }, { "epoch": 0.4227997732977087, "grad_norm": 24.3798828125, "learning_rate": 5.7739637305699495e-06, "loss": 0.7064, "mean_token_accuracy": 0.9140931963920593, "num_tokens": 4678141.0, "step": 2611 }, { "epoch": 0.422961703505789, "grad_norm": 12.141080856323242, "learning_rate": 5.772344559585493e-06, "loss": 0.4844, "mean_token_accuracy": 0.9399600327014923, "num_tokens": 4679936.0, "step": 2612 }, { "epoch": 0.4231236337138693, "grad_norm": 18.956920623779297, "learning_rate": 5.7707253886010375e-06, "loss": 0.5844, "mean_token_accuracy": 0.9241921901702881, "num_tokens": 4681726.0, "step": 2613 }, { "epoch": 0.42328556392194966, "grad_norm": 27.292633056640625, "learning_rate": 5.769106217616581e-06, "loss": 0.8231, "mean_token_accuracy": 0.9077271521091461, "num_tokens": 4683520.0, "step": 2614 }, { "epoch": 0.42344749413002997, "grad_norm": 21.6551456451416, "learning_rate": 5.7674870466321255e-06, "loss": 0.6106, "mean_token_accuracy": 0.921777218580246, "num_tokens": 4685326.0, "step": 2615 }, { "epoch": 0.4236094243381103, "grad_norm": 20.61473846435547, "learning_rate": 5.765867875647669e-06, "loss": 0.652, "mean_token_accuracy": 0.920273095369339, "num_tokens": 4687114.0, "step": 2616 }, { "epoch": 0.4237713545461906, "grad_norm": 26.408039093017578, "learning_rate": 5.7642487046632135e-06, "loss": 0.618, "mean_token_accuracy": 0.9165661931037903, "num_tokens": 4688914.0, "step": 2617 }, { "epoch": 0.4239332847542709, "grad_norm": 32.399925231933594, "learning_rate": 5.762629533678757e-06, "loss": 0.7132, "mean_token_accuracy": 0.9081632792949677, "num_tokens": 4690720.0, "step": 2618 }, { "epoch": 0.42409521496235125, "grad_norm": 15.553979873657227, "learning_rate": 5.7610103626943016e-06, "loss": 0.5308, "mean_token_accuracy": 0.93031245470047, "num_tokens": 4692505.0, "step": 2619 }, { "epoch": 0.42425714517043156, "grad_norm": 24.6832275390625, "learning_rate": 5.759391191709845e-06, "loss": 0.7021, "mean_token_accuracy": 0.9002940058708191, "num_tokens": 4694298.0, "step": 2620 }, { "epoch": 0.42441907537851187, "grad_norm": 34.95283126831055, "learning_rate": 5.75777202072539e-06, "loss": 0.9311, "mean_token_accuracy": 0.8843482434749603, "num_tokens": 4696096.0, "step": 2621 }, { "epoch": 0.4245810055865922, "grad_norm": 30.824203491210938, "learning_rate": 5.756152849740933e-06, "loss": 0.7446, "mean_token_accuracy": 0.8984484672546387, "num_tokens": 4697891.0, "step": 2622 }, { "epoch": 0.4247429357946725, "grad_norm": 31.794389724731445, "learning_rate": 5.754533678756478e-06, "loss": 0.7985, "mean_token_accuracy": 0.8897947072982788, "num_tokens": 4699685.0, "step": 2623 }, { "epoch": 0.4249048660027528, "grad_norm": 27.11253547668457, "learning_rate": 5.752914507772022e-06, "loss": 0.7059, "mean_token_accuracy": 0.8947382271289825, "num_tokens": 4701473.0, "step": 2624 }, { "epoch": 0.42506679621083315, "grad_norm": 23.858304977416992, "learning_rate": 5.751295336787566e-06, "loss": 0.6784, "mean_token_accuracy": 0.902512788772583, "num_tokens": 4703262.0, "step": 2625 }, { "epoch": 0.42522872641891346, "grad_norm": 23.854270935058594, "learning_rate": 5.74967616580311e-06, "loss": 0.6209, "mean_token_accuracy": 0.9186400771141052, "num_tokens": 4705057.0, "step": 2626 }, { "epoch": 0.42539065662699377, "grad_norm": 21.63140106201172, "learning_rate": 5.748056994818654e-06, "loss": 0.5647, "mean_token_accuracy": 0.9125266671180725, "num_tokens": 4706843.0, "step": 2627 }, { "epoch": 0.4255525868350741, "grad_norm": 36.0816650390625, "learning_rate": 5.746437823834198e-06, "loss": 1.0929, "mean_token_accuracy": 0.8799603283405304, "num_tokens": 4708646.0, "step": 2628 }, { "epoch": 0.4257145170431544, "grad_norm": 26.497888565063477, "learning_rate": 5.744818652849742e-06, "loss": 0.6863, "mean_token_accuracy": 0.9111787378787994, "num_tokens": 4710440.0, "step": 2629 }, { "epoch": 0.4258764472512347, "grad_norm": 18.768795013427734, "learning_rate": 5.743199481865286e-06, "loss": 0.6027, "mean_token_accuracy": 0.9124966859817505, "num_tokens": 4712226.0, "step": 2630 }, { "epoch": 0.42603837745931505, "grad_norm": 17.265892028808594, "learning_rate": 5.74158031088083e-06, "loss": 0.5499, "mean_token_accuracy": 0.9233953356742859, "num_tokens": 4714011.0, "step": 2631 }, { "epoch": 0.42620030766739536, "grad_norm": 19.992643356323242, "learning_rate": 5.739961139896374e-06, "loss": 0.6266, "mean_token_accuracy": 0.9145896732807159, "num_tokens": 4715804.0, "step": 2632 }, { "epoch": 0.42636223787547567, "grad_norm": 29.299583435058594, "learning_rate": 5.738341968911918e-06, "loss": 0.7797, "mean_token_accuracy": 0.8843642771244049, "num_tokens": 4717608.0, "step": 2633 }, { "epoch": 0.426524168083556, "grad_norm": 31.09081268310547, "learning_rate": 5.736722797927462e-06, "loss": 0.7851, "mean_token_accuracy": 0.9000816643238068, "num_tokens": 4719400.0, "step": 2634 }, { "epoch": 0.4266860982916363, "grad_norm": 15.340228080749512, "learning_rate": 5.735103626943006e-06, "loss": 0.5312, "mean_token_accuracy": 0.9304481148719788, "num_tokens": 4721185.0, "step": 2635 }, { "epoch": 0.42684802849971665, "grad_norm": 16.546255111694336, "learning_rate": 5.73348445595855e-06, "loss": 0.5452, "mean_token_accuracy": 0.9294085204601288, "num_tokens": 4722966.0, "step": 2636 }, { "epoch": 0.42700995870779695, "grad_norm": 23.766008377075195, "learning_rate": 5.731865284974094e-06, "loss": 0.6068, "mean_token_accuracy": 0.9143702983856201, "num_tokens": 4724758.0, "step": 2637 }, { "epoch": 0.42717188891587726, "grad_norm": 24.576339721679688, "learning_rate": 5.730246113989638e-06, "loss": 0.625, "mean_token_accuracy": 0.9040851294994354, "num_tokens": 4726552.0, "step": 2638 }, { "epoch": 0.42733381912395757, "grad_norm": 29.197771072387695, "learning_rate": 5.728626943005182e-06, "loss": 0.7505, "mean_token_accuracy": 0.8945578336715698, "num_tokens": 4728358.0, "step": 2639 }, { "epoch": 0.4274957493320379, "grad_norm": 25.663074493408203, "learning_rate": 5.727007772020726e-06, "loss": 0.6315, "mean_token_accuracy": 0.9103405475616455, "num_tokens": 4730160.0, "step": 2640 }, { "epoch": 0.4276576795401182, "grad_norm": 25.778417587280273, "learning_rate": 5.72538860103627e-06, "loss": 0.6107, "mean_token_accuracy": 0.9197080135345459, "num_tokens": 4731946.0, "step": 2641 }, { "epoch": 0.42781960974819855, "grad_norm": 22.788490295410156, "learning_rate": 5.723769430051814e-06, "loss": 0.6079, "mean_token_accuracy": 0.910306453704834, "num_tokens": 4733748.0, "step": 2642 }, { "epoch": 0.42798153995627886, "grad_norm": 21.9490909576416, "learning_rate": 5.722150259067359e-06, "loss": 0.613, "mean_token_accuracy": 0.9189872145652771, "num_tokens": 4735533.0, "step": 2643 }, { "epoch": 0.42814347016435916, "grad_norm": 25.787572860717773, "learning_rate": 5.720531088082902e-06, "loss": 0.7368, "mean_token_accuracy": 0.9083473086357117, "num_tokens": 4737338.0, "step": 2644 }, { "epoch": 0.42830540037243947, "grad_norm": 26.58540153503418, "learning_rate": 5.718911917098447e-06, "loss": 0.6601, "mean_token_accuracy": 0.9166666865348816, "num_tokens": 4739138.0, "step": 2645 }, { "epoch": 0.4284673305805198, "grad_norm": 23.15785026550293, "learning_rate": 5.71729274611399e-06, "loss": 0.6202, "mean_token_accuracy": 0.9148764908313751, "num_tokens": 4740932.0, "step": 2646 }, { "epoch": 0.4286292607886001, "grad_norm": 34.17253112792969, "learning_rate": 5.715673575129535e-06, "loss": 0.8711, "mean_token_accuracy": 0.8918783962726593, "num_tokens": 4742741.0, "step": 2647 }, { "epoch": 0.42879119099668045, "grad_norm": 29.94915008544922, "learning_rate": 5.714054404145078e-06, "loss": 0.8706, "mean_token_accuracy": 0.887706845998764, "num_tokens": 4744529.0, "step": 2648 }, { "epoch": 0.42895312120476076, "grad_norm": 17.295637130737305, "learning_rate": 5.712435233160623e-06, "loss": 0.5666, "mean_token_accuracy": 0.9282300472259521, "num_tokens": 4746320.0, "step": 2649 }, { "epoch": 0.42911505141284106, "grad_norm": 18.61489486694336, "learning_rate": 5.710816062176166e-06, "loss": 0.6005, "mean_token_accuracy": 0.9180035591125488, "num_tokens": 4748100.0, "step": 2650 }, { "epoch": 0.42927698162092137, "grad_norm": 22.395387649536133, "learning_rate": 5.709196891191711e-06, "loss": 0.682, "mean_token_accuracy": 0.9140350818634033, "num_tokens": 4749880.0, "step": 2651 }, { "epoch": 0.4294389118290017, "grad_norm": 27.54579734802246, "learning_rate": 5.707577720207254e-06, "loss": 0.7183, "mean_token_accuracy": 0.9027763307094574, "num_tokens": 4751669.0, "step": 2652 }, { "epoch": 0.42960084203708204, "grad_norm": 25.062528610229492, "learning_rate": 5.705958549222799e-06, "loss": 0.5874, "mean_token_accuracy": 0.9062761068344116, "num_tokens": 4753458.0, "step": 2653 }, { "epoch": 0.42976277224516235, "grad_norm": 23.13579559326172, "learning_rate": 5.7043393782383424e-06, "loss": 0.6558, "mean_token_accuracy": 0.9072912335395813, "num_tokens": 4755249.0, "step": 2654 }, { "epoch": 0.42992470245324266, "grad_norm": 26.464677810668945, "learning_rate": 5.702720207253887e-06, "loss": 0.68, "mean_token_accuracy": 0.8968591690063477, "num_tokens": 4757042.0, "step": 2655 }, { "epoch": 0.43008663266132297, "grad_norm": 22.059852600097656, "learning_rate": 5.7011010362694305e-06, "loss": 0.6176, "mean_token_accuracy": 0.9116941690444946, "num_tokens": 4758837.0, "step": 2656 }, { "epoch": 0.4302485628694033, "grad_norm": 29.53142738342285, "learning_rate": 5.699481865284975e-06, "loss": 0.7051, "mean_token_accuracy": 0.9084957540035248, "num_tokens": 4760633.0, "step": 2657 }, { "epoch": 0.4304104930774836, "grad_norm": 20.650423049926758, "learning_rate": 5.6978626943005185e-06, "loss": 0.559, "mean_token_accuracy": 0.9237982928752899, "num_tokens": 4762421.0, "step": 2658 }, { "epoch": 0.43057242328556394, "grad_norm": 19.217164993286133, "learning_rate": 5.696243523316063e-06, "loss": 0.564, "mean_token_accuracy": 0.9206821024417877, "num_tokens": 4764210.0, "step": 2659 }, { "epoch": 0.43073435349364425, "grad_norm": 22.50171661376953, "learning_rate": 5.6946243523316065e-06, "loss": 0.585, "mean_token_accuracy": 0.9168067276477814, "num_tokens": 4765998.0, "step": 2660 }, { "epoch": 0.43089628370172456, "grad_norm": 21.96280288696289, "learning_rate": 5.693005181347151e-06, "loss": 0.6544, "mean_token_accuracy": 0.9148834943771362, "num_tokens": 4767793.0, "step": 2661 }, { "epoch": 0.43105821390980487, "grad_norm": 13.62598705291748, "learning_rate": 5.691386010362695e-06, "loss": 0.4766, "mean_token_accuracy": 0.9291044771671295, "num_tokens": 4769573.0, "step": 2662 }, { "epoch": 0.4312201441178852, "grad_norm": 20.593812942504883, "learning_rate": 5.689766839378239e-06, "loss": 0.6067, "mean_token_accuracy": 0.9189277589321136, "num_tokens": 4771356.0, "step": 2663 }, { "epoch": 0.43138207432596554, "grad_norm": 27.30518341064453, "learning_rate": 5.688147668393783e-06, "loss": 0.6109, "mean_token_accuracy": 0.9087708294391632, "num_tokens": 4773153.0, "step": 2664 }, { "epoch": 0.43154400453404584, "grad_norm": 24.983089447021484, "learning_rate": 5.686528497409327e-06, "loss": 0.6171, "mean_token_accuracy": 0.9202521741390228, "num_tokens": 4774941.0, "step": 2665 }, { "epoch": 0.43170593474212615, "grad_norm": 21.732349395751953, "learning_rate": 5.6849093264248714e-06, "loss": 0.5606, "mean_token_accuracy": 0.9137163758277893, "num_tokens": 4776731.0, "step": 2666 }, { "epoch": 0.43186786495020646, "grad_norm": 22.275178909301758, "learning_rate": 5.683290155440415e-06, "loss": 0.6409, "mean_token_accuracy": 0.9053651690483093, "num_tokens": 4778515.0, "step": 2667 }, { "epoch": 0.43202979515828677, "grad_norm": 17.078319549560547, "learning_rate": 5.6816709844559595e-06, "loss": 0.5715, "mean_token_accuracy": 0.9252963960170746, "num_tokens": 4780308.0, "step": 2668 }, { "epoch": 0.4321917253663671, "grad_norm": 27.664405822753906, "learning_rate": 5.680051813471503e-06, "loss": 0.6458, "mean_token_accuracy": 0.8983109295368195, "num_tokens": 4782095.0, "step": 2669 }, { "epoch": 0.43235365557444744, "grad_norm": 28.662857055664062, "learning_rate": 5.6784326424870475e-06, "loss": 0.8077, "mean_token_accuracy": 0.8996916711330414, "num_tokens": 4783886.0, "step": 2670 }, { "epoch": 0.43251558578252775, "grad_norm": 28.25349235534668, "learning_rate": 5.676813471502591e-06, "loss": 0.7125, "mean_token_accuracy": 0.9098878800868988, "num_tokens": 4785687.0, "step": 2671 }, { "epoch": 0.43267751599060805, "grad_norm": 19.321746826171875, "learning_rate": 5.6751943005181355e-06, "loss": 0.559, "mean_token_accuracy": 0.919584333896637, "num_tokens": 4787473.0, "step": 2672 }, { "epoch": 0.43283944619868836, "grad_norm": 21.610492706298828, "learning_rate": 5.673575129533679e-06, "loss": 0.539, "mean_token_accuracy": 0.9187147319316864, "num_tokens": 4789267.0, "step": 2673 }, { "epoch": 0.43300137640676867, "grad_norm": 27.249958038330078, "learning_rate": 5.6719559585492236e-06, "loss": 0.581, "mean_token_accuracy": 0.92306187748909, "num_tokens": 4791065.0, "step": 2674 }, { "epoch": 0.433163306614849, "grad_norm": 21.888282775878906, "learning_rate": 5.670336787564767e-06, "loss": 0.5607, "mean_token_accuracy": 0.9181873500347137, "num_tokens": 4792858.0, "step": 2675 }, { "epoch": 0.43332523682292934, "grad_norm": 27.923105239868164, "learning_rate": 5.668717616580312e-06, "loss": 0.6431, "mean_token_accuracy": 0.9148550927639008, "num_tokens": 4794652.0, "step": 2676 }, { "epoch": 0.43348716703100965, "grad_norm": 19.418935775756836, "learning_rate": 5.667098445595855e-06, "loss": 0.5988, "mean_token_accuracy": 0.9209782183170319, "num_tokens": 4796430.0, "step": 2677 }, { "epoch": 0.43364909723908995, "grad_norm": 25.08905792236328, "learning_rate": 5.6654792746114e-06, "loss": 0.5355, "mean_token_accuracy": 0.9261982440948486, "num_tokens": 4798213.0, "step": 2678 }, { "epoch": 0.43381102744717026, "grad_norm": 26.60637664794922, "learning_rate": 5.663860103626943e-06, "loss": 0.6992, "mean_token_accuracy": 0.9145744144916534, "num_tokens": 4800005.0, "step": 2679 }, { "epoch": 0.43397295765525057, "grad_norm": 18.8463077545166, "learning_rate": 5.662240932642488e-06, "loss": 0.518, "mean_token_accuracy": 0.9267310798168182, "num_tokens": 4801790.0, "step": 2680 }, { "epoch": 0.43413488786333093, "grad_norm": 29.289709091186523, "learning_rate": 5.660621761658032e-06, "loss": 0.671, "mean_token_accuracy": 0.9097487032413483, "num_tokens": 4803577.0, "step": 2681 }, { "epoch": 0.43429681807141124, "grad_norm": 21.096981048583984, "learning_rate": 5.659002590673576e-06, "loss": 0.5209, "mean_token_accuracy": 0.9295774698257446, "num_tokens": 4805373.0, "step": 2682 }, { "epoch": 0.43445874827949155, "grad_norm": 32.044883728027344, "learning_rate": 5.65738341968912e-06, "loss": 0.7071, "mean_token_accuracy": 0.9010662138462067, "num_tokens": 4807180.0, "step": 2683 }, { "epoch": 0.43462067848757185, "grad_norm": 25.138193130493164, "learning_rate": 5.655764248704664e-06, "loss": 0.663, "mean_token_accuracy": 0.9160937964916229, "num_tokens": 4808966.0, "step": 2684 }, { "epoch": 0.43478260869565216, "grad_norm": 19.392642974853516, "learning_rate": 5.654145077720208e-06, "loss": 0.508, "mean_token_accuracy": 0.9236772358417511, "num_tokens": 4810753.0, "step": 2685 }, { "epoch": 0.43494453890373247, "grad_norm": 21.204151153564453, "learning_rate": 5.652525906735752e-06, "loss": 0.5527, "mean_token_accuracy": 0.9176872968673706, "num_tokens": 4812545.0, "step": 2686 }, { "epoch": 0.43510646911181283, "grad_norm": 26.812856674194336, "learning_rate": 5.650906735751296e-06, "loss": 0.7648, "mean_token_accuracy": 0.9075387418270111, "num_tokens": 4814338.0, "step": 2687 }, { "epoch": 0.43526839931989314, "grad_norm": 33.402442932128906, "learning_rate": 5.64928756476684e-06, "loss": 0.7583, "mean_token_accuracy": 0.8943662047386169, "num_tokens": 4816134.0, "step": 2688 }, { "epoch": 0.43543032952797345, "grad_norm": 20.40118980407715, "learning_rate": 5.647668393782384e-06, "loss": 0.5795, "mean_token_accuracy": 0.9147412180900574, "num_tokens": 4817916.0, "step": 2689 }, { "epoch": 0.43559225973605376, "grad_norm": 26.93141746520996, "learning_rate": 5.646049222797928e-06, "loss": 0.6172, "mean_token_accuracy": 0.9041759967803955, "num_tokens": 4819712.0, "step": 2690 }, { "epoch": 0.43575418994413406, "grad_norm": 28.92831039428711, "learning_rate": 5.644430051813472e-06, "loss": 0.6386, "mean_token_accuracy": 0.8897416591644287, "num_tokens": 4821505.0, "step": 2691 }, { "epoch": 0.43591612015221437, "grad_norm": 22.93592643737793, "learning_rate": 5.642810880829016e-06, "loss": 0.5556, "mean_token_accuracy": 0.9180963933467865, "num_tokens": 4823297.0, "step": 2692 }, { "epoch": 0.43607805036029473, "grad_norm": 12.93975830078125, "learning_rate": 5.64119170984456e-06, "loss": 0.4883, "mean_token_accuracy": 0.9337658882141113, "num_tokens": 4825081.0, "step": 2693 }, { "epoch": 0.43623998056837504, "grad_norm": 32.457054138183594, "learning_rate": 5.639572538860104e-06, "loss": 0.7436, "mean_token_accuracy": 0.8971927165985107, "num_tokens": 4826875.0, "step": 2694 }, { "epoch": 0.43640191077645535, "grad_norm": 19.667940139770508, "learning_rate": 5.637953367875648e-06, "loss": 0.5652, "mean_token_accuracy": 0.9194128215312958, "num_tokens": 4828660.0, "step": 2695 }, { "epoch": 0.43656384098453566, "grad_norm": 22.991769790649414, "learning_rate": 5.636334196891192e-06, "loss": 0.5946, "mean_token_accuracy": 0.9160839319229126, "num_tokens": 4830447.0, "step": 2696 }, { "epoch": 0.43672577119261596, "grad_norm": 25.189119338989258, "learning_rate": 5.634715025906736e-06, "loss": 0.6778, "mean_token_accuracy": 0.9027777910232544, "num_tokens": 4832238.0, "step": 2697 }, { "epoch": 0.4368877014006963, "grad_norm": 22.71463966369629, "learning_rate": 5.63309585492228e-06, "loss": 0.5778, "mean_token_accuracy": 0.9167017042636871, "num_tokens": 4834026.0, "step": 2698 }, { "epoch": 0.43704963160877663, "grad_norm": 26.27680778503418, "learning_rate": 5.631476683937824e-06, "loss": 0.5936, "mean_token_accuracy": 0.9130810499191284, "num_tokens": 4835827.0, "step": 2699 }, { "epoch": 0.43721156181685694, "grad_norm": 23.116701126098633, "learning_rate": 5.629857512953369e-06, "loss": 0.6003, "mean_token_accuracy": 0.9161287248134613, "num_tokens": 4837625.0, "step": 2700 }, { "epoch": 0.43737349202493725, "grad_norm": 18.830284118652344, "learning_rate": 5.628238341968912e-06, "loss": 0.5403, "mean_token_accuracy": 0.9265037775039673, "num_tokens": 4839410.0, "step": 2701 }, { "epoch": 0.43753542223301756, "grad_norm": 20.59864616394043, "learning_rate": 5.626619170984457e-06, "loss": 0.5544, "mean_token_accuracy": 0.9170937538146973, "num_tokens": 4841200.0, "step": 2702 }, { "epoch": 0.43769735244109786, "grad_norm": 24.57659339904785, "learning_rate": 5.625e-06, "loss": 0.5824, "mean_token_accuracy": 0.9157004952430725, "num_tokens": 4842985.0, "step": 2703 }, { "epoch": 0.4378592826491782, "grad_norm": 26.68417739868164, "learning_rate": 5.623380829015545e-06, "loss": 0.746, "mean_token_accuracy": 0.9068345129489899, "num_tokens": 4844776.0, "step": 2704 }, { "epoch": 0.43802121285725854, "grad_norm": 17.324947357177734, "learning_rate": 5.621761658031088e-06, "loss": 0.5752, "mean_token_accuracy": 0.9229723215103149, "num_tokens": 4846562.0, "step": 2705 }, { "epoch": 0.43818314306533884, "grad_norm": 23.368986129760742, "learning_rate": 5.620142487046633e-06, "loss": 0.5833, "mean_token_accuracy": 0.9187802076339722, "num_tokens": 4848356.0, "step": 2706 }, { "epoch": 0.43834507327341915, "grad_norm": 26.392820358276367, "learning_rate": 5.618523316062176e-06, "loss": 0.66, "mean_token_accuracy": 0.9090151488780975, "num_tokens": 4850141.0, "step": 2707 }, { "epoch": 0.43850700348149946, "grad_norm": 19.447568893432617, "learning_rate": 5.616904145077721e-06, "loss": 0.5647, "mean_token_accuracy": 0.9331349432468414, "num_tokens": 4851937.0, "step": 2708 }, { "epoch": 0.43866893368957977, "grad_norm": 31.07088279724121, "learning_rate": 5.6152849740932644e-06, "loss": 0.7856, "mean_token_accuracy": 0.905844658613205, "num_tokens": 4853725.0, "step": 2709 }, { "epoch": 0.43883086389766013, "grad_norm": 27.62972640991211, "learning_rate": 5.613665803108809e-06, "loss": 0.6222, "mean_token_accuracy": 0.9139773845672607, "num_tokens": 4855516.0, "step": 2710 }, { "epoch": 0.43899279410574044, "grad_norm": 23.208091735839844, "learning_rate": 5.6120466321243525e-06, "loss": 0.5503, "mean_token_accuracy": 0.9099584519863129, "num_tokens": 4857304.0, "step": 2711 }, { "epoch": 0.43915472431382074, "grad_norm": 17.770944595336914, "learning_rate": 5.610427461139897e-06, "loss": 0.5036, "mean_token_accuracy": 0.9229517877101898, "num_tokens": 4859101.0, "step": 2712 }, { "epoch": 0.43931665452190105, "grad_norm": 27.526386260986328, "learning_rate": 5.6088082901554405e-06, "loss": 0.8291, "mean_token_accuracy": 0.8963922262191772, "num_tokens": 4860892.0, "step": 2713 }, { "epoch": 0.43947858472998136, "grad_norm": 30.08072853088379, "learning_rate": 5.607189119170985e-06, "loss": 0.7403, "mean_token_accuracy": 0.9060161411762238, "num_tokens": 4862690.0, "step": 2714 }, { "epoch": 0.4396405149380617, "grad_norm": 23.267772674560547, "learning_rate": 5.6055699481865285e-06, "loss": 0.6049, "mean_token_accuracy": 0.9203906953334808, "num_tokens": 4864478.0, "step": 2715 }, { "epoch": 0.43980244514614203, "grad_norm": 21.03306770324707, "learning_rate": 5.603950777202073e-06, "loss": 0.6328, "mean_token_accuracy": 0.9191176295280457, "num_tokens": 4866262.0, "step": 2716 }, { "epoch": 0.43996437535422234, "grad_norm": 28.422283172607422, "learning_rate": 5.6023316062176165e-06, "loss": 0.5873, "mean_token_accuracy": 0.9212790131568909, "num_tokens": 4868052.0, "step": 2717 }, { "epoch": 0.44012630556230264, "grad_norm": 24.285396575927734, "learning_rate": 5.600712435233161e-06, "loss": 0.5968, "mean_token_accuracy": 0.9147909283638, "num_tokens": 4869847.0, "step": 2718 }, { "epoch": 0.44028823577038295, "grad_norm": 27.072790145874023, "learning_rate": 5.599093264248705e-06, "loss": 0.6352, "mean_token_accuracy": 0.9072340428829193, "num_tokens": 4871650.0, "step": 2719 }, { "epoch": 0.44045016597846326, "grad_norm": 26.667236328125, "learning_rate": 5.597474093264249e-06, "loss": 0.6668, "mean_token_accuracy": 0.9160805642604828, "num_tokens": 4873436.0, "step": 2720 }, { "epoch": 0.4406120961865436, "grad_norm": 34.54020690917969, "learning_rate": 5.5958549222797934e-06, "loss": 0.6538, "mean_token_accuracy": 0.898889034986496, "num_tokens": 4875215.0, "step": 2721 }, { "epoch": 0.44077402639462393, "grad_norm": 23.151290893554688, "learning_rate": 5.594235751295337e-06, "loss": 0.5613, "mean_token_accuracy": 0.9230088293552399, "num_tokens": 4877000.0, "step": 2722 }, { "epoch": 0.44093595660270424, "grad_norm": 33.38759994506836, "learning_rate": 5.5926165803108815e-06, "loss": 0.67, "mean_token_accuracy": 0.9074074029922485, "num_tokens": 4878800.0, "step": 2723 }, { "epoch": 0.44109788681078455, "grad_norm": 26.60508918762207, "learning_rate": 5.590997409326425e-06, "loss": 0.5924, "mean_token_accuracy": 0.9129863977432251, "num_tokens": 4880588.0, "step": 2724 }, { "epoch": 0.44125981701886485, "grad_norm": 28.57872772216797, "learning_rate": 5.5893782383419695e-06, "loss": 0.7296, "mean_token_accuracy": 0.9103121757507324, "num_tokens": 4882378.0, "step": 2725 }, { "epoch": 0.44142174722694516, "grad_norm": 21.66533851623535, "learning_rate": 5.587759067357513e-06, "loss": 0.4925, "mean_token_accuracy": 0.9250536262989044, "num_tokens": 4884183.0, "step": 2726 }, { "epoch": 0.4415836774350255, "grad_norm": 25.022130966186523, "learning_rate": 5.5861398963730575e-06, "loss": 0.5758, "mean_token_accuracy": 0.925781637430191, "num_tokens": 4885978.0, "step": 2727 }, { "epoch": 0.44174560764310583, "grad_norm": 25.449390411376953, "learning_rate": 5.584520725388601e-06, "loss": 0.6625, "mean_token_accuracy": 0.9050511717796326, "num_tokens": 4887764.0, "step": 2728 }, { "epoch": 0.44190753785118614, "grad_norm": 26.575929641723633, "learning_rate": 5.5829015544041455e-06, "loss": 0.705, "mean_token_accuracy": 0.9071428775787354, "num_tokens": 4889556.0, "step": 2729 }, { "epoch": 0.44206946805926645, "grad_norm": 28.210323333740234, "learning_rate": 5.581282383419689e-06, "loss": 0.7162, "mean_token_accuracy": 0.9019704461097717, "num_tokens": 4891353.0, "step": 2730 }, { "epoch": 0.44223139826734675, "grad_norm": 25.475828170776367, "learning_rate": 5.5796632124352336e-06, "loss": 0.6002, "mean_token_accuracy": 0.9227039813995361, "num_tokens": 4893136.0, "step": 2731 }, { "epoch": 0.4423933284754271, "grad_norm": 28.722536087036133, "learning_rate": 5.578044041450777e-06, "loss": 0.6277, "mean_token_accuracy": 0.9136646091938019, "num_tokens": 4894926.0, "step": 2732 }, { "epoch": 0.4425552586835074, "grad_norm": 25.91667938232422, "learning_rate": 5.576424870466322e-06, "loss": 0.6486, "mean_token_accuracy": 0.9045088589191437, "num_tokens": 4896711.0, "step": 2733 }, { "epoch": 0.44271718889158773, "grad_norm": 23.53471565246582, "learning_rate": 5.574805699481865e-06, "loss": 0.6322, "mean_token_accuracy": 0.9170056283473969, "num_tokens": 4898500.0, "step": 2734 }, { "epoch": 0.44287911909966804, "grad_norm": 25.055017471313477, "learning_rate": 5.57318652849741e-06, "loss": 0.6123, "mean_token_accuracy": 0.9089095592498779, "num_tokens": 4900297.0, "step": 2735 }, { "epoch": 0.44304104930774835, "grad_norm": 26.53508186340332, "learning_rate": 5.571567357512954e-06, "loss": 0.5755, "mean_token_accuracy": 0.9150429666042328, "num_tokens": 4902092.0, "step": 2736 }, { "epoch": 0.44320297951582865, "grad_norm": 27.02204704284668, "learning_rate": 5.569948186528498e-06, "loss": 0.5914, "mean_token_accuracy": 0.9140793681144714, "num_tokens": 4903883.0, "step": 2737 }, { "epoch": 0.443364909723909, "grad_norm": 28.95026206970215, "learning_rate": 5.568329015544042e-06, "loss": 0.6372, "mean_token_accuracy": 0.9164723455905914, "num_tokens": 4905682.0, "step": 2738 }, { "epoch": 0.4435268399319893, "grad_norm": 24.438188552856445, "learning_rate": 5.566709844559586e-06, "loss": 0.5999, "mean_token_accuracy": 0.9187915623188019, "num_tokens": 4907465.0, "step": 2739 }, { "epoch": 0.44368877014006963, "grad_norm": 16.558094024658203, "learning_rate": 5.56509067357513e-06, "loss": 0.4899, "mean_token_accuracy": 0.925567239522934, "num_tokens": 4909246.0, "step": 2740 }, { "epoch": 0.44385070034814994, "grad_norm": 35.34246826171875, "learning_rate": 5.563471502590674e-06, "loss": 1.0793, "mean_token_accuracy": 0.8841721713542938, "num_tokens": 4911043.0, "step": 2741 }, { "epoch": 0.44401263055623025, "grad_norm": 23.990554809570312, "learning_rate": 5.561852331606218e-06, "loss": 0.5784, "mean_token_accuracy": 0.9163069427013397, "num_tokens": 4912829.0, "step": 2742 }, { "epoch": 0.44417456076431056, "grad_norm": 20.967924118041992, "learning_rate": 5.560233160621762e-06, "loss": 0.6003, "mean_token_accuracy": 0.920550525188446, "num_tokens": 4914618.0, "step": 2743 }, { "epoch": 0.4443364909723909, "grad_norm": 23.729198455810547, "learning_rate": 5.558613989637306e-06, "loss": 0.543, "mean_token_accuracy": 0.9198492169380188, "num_tokens": 4916416.0, "step": 2744 }, { "epoch": 0.4444984211804712, "grad_norm": 25.297008514404297, "learning_rate": 5.55699481865285e-06, "loss": 0.5947, "mean_token_accuracy": 0.9157738089561462, "num_tokens": 4918212.0, "step": 2745 }, { "epoch": 0.44466035138855153, "grad_norm": 16.53269386291504, "learning_rate": 5.555375647668394e-06, "loss": 0.5077, "mean_token_accuracy": 0.929892361164093, "num_tokens": 4920010.0, "step": 2746 }, { "epoch": 0.44482228159663184, "grad_norm": 22.38106918334961, "learning_rate": 5.553756476683938e-06, "loss": 0.6166, "mean_token_accuracy": 0.9132690131664276, "num_tokens": 4921799.0, "step": 2747 }, { "epoch": 0.44498421180471215, "grad_norm": 18.34514617919922, "learning_rate": 5.552137305699482e-06, "loss": 0.513, "mean_token_accuracy": 0.9303542673587799, "num_tokens": 4923584.0, "step": 2748 }, { "epoch": 0.4451461420127925, "grad_norm": 17.57979965209961, "learning_rate": 5.550518134715026e-06, "loss": 0.5468, "mean_token_accuracy": 0.9276595711708069, "num_tokens": 4925372.0, "step": 2749 }, { "epoch": 0.4453080722208728, "grad_norm": 28.670560836791992, "learning_rate": 5.54889896373057e-06, "loss": 0.725, "mean_token_accuracy": 0.9060908854007721, "num_tokens": 4927170.0, "step": 2750 }, { "epoch": 0.4454700024289531, "grad_norm": 29.89539909362793, "learning_rate": 5.547279792746114e-06, "loss": 0.773, "mean_token_accuracy": 0.9009331166744232, "num_tokens": 4928966.0, "step": 2751 }, { "epoch": 0.44563193263703343, "grad_norm": 23.308883666992188, "learning_rate": 5.545660621761658e-06, "loss": 0.6299, "mean_token_accuracy": 0.9170055389404297, "num_tokens": 4930755.0, "step": 2752 }, { "epoch": 0.44579386284511374, "grad_norm": 17.085216522216797, "learning_rate": 5.544041450777202e-06, "loss": 0.5019, "mean_token_accuracy": 0.925621896982193, "num_tokens": 4932536.0, "step": 2753 }, { "epoch": 0.44595579305319405, "grad_norm": 24.243133544921875, "learning_rate": 5.542422279792746e-06, "loss": 0.5517, "mean_token_accuracy": 0.9160574078559875, "num_tokens": 4934321.0, "step": 2754 }, { "epoch": 0.4461177232612744, "grad_norm": 22.24656867980957, "learning_rate": 5.540803108808291e-06, "loss": 0.5991, "mean_token_accuracy": 0.915139764547348, "num_tokens": 4936117.0, "step": 2755 }, { "epoch": 0.4462796534693547, "grad_norm": 22.625072479248047, "learning_rate": 5.539183937823834e-06, "loss": 0.6627, "mean_token_accuracy": 0.9067419171333313, "num_tokens": 4937907.0, "step": 2756 }, { "epoch": 0.446441583677435, "grad_norm": 16.466907501220703, "learning_rate": 5.537564766839379e-06, "loss": 0.5189, "mean_token_accuracy": 0.9284502267837524, "num_tokens": 4939698.0, "step": 2757 }, { "epoch": 0.44660351388551534, "grad_norm": 29.324201583862305, "learning_rate": 5.535945595854922e-06, "loss": 0.702, "mean_token_accuracy": 0.9055489599704742, "num_tokens": 4941494.0, "step": 2758 }, { "epoch": 0.44676544409359564, "grad_norm": 28.939008712768555, "learning_rate": 5.534326424870467e-06, "loss": 0.6695, "mean_token_accuracy": 0.8974235355854034, "num_tokens": 4943279.0, "step": 2759 }, { "epoch": 0.44692737430167595, "grad_norm": 29.352474212646484, "learning_rate": 5.53270725388601e-06, "loss": 0.9059, "mean_token_accuracy": 0.8951492607593536, "num_tokens": 4945065.0, "step": 2760 }, { "epoch": 0.4470893045097563, "grad_norm": 29.990007400512695, "learning_rate": 5.531088082901555e-06, "loss": 0.8233, "mean_token_accuracy": 0.8965224027633667, "num_tokens": 4946868.0, "step": 2761 }, { "epoch": 0.4472512347178366, "grad_norm": 32.23251724243164, "learning_rate": 5.529468911917098e-06, "loss": 0.8037, "mean_token_accuracy": 0.8844271302223206, "num_tokens": 4948666.0, "step": 2762 }, { "epoch": 0.44741316492591693, "grad_norm": 27.487356185913086, "learning_rate": 5.527849740932643e-06, "loss": 0.7334, "mean_token_accuracy": 0.9192523658275604, "num_tokens": 4950463.0, "step": 2763 }, { "epoch": 0.44757509513399724, "grad_norm": 27.68252182006836, "learning_rate": 5.526230569948186e-06, "loss": 0.612, "mean_token_accuracy": 0.9084639251232147, "num_tokens": 4952248.0, "step": 2764 }, { "epoch": 0.44773702534207754, "grad_norm": 15.705184936523438, "learning_rate": 5.524611398963731e-06, "loss": 0.4943, "mean_token_accuracy": 0.9345941543579102, "num_tokens": 4954036.0, "step": 2765 }, { "epoch": 0.4478989555501579, "grad_norm": 21.034793853759766, "learning_rate": 5.5229922279792744e-06, "loss": 0.6366, "mean_token_accuracy": 0.9187709391117096, "num_tokens": 4955833.0, "step": 2766 }, { "epoch": 0.4480608857582382, "grad_norm": 15.66590404510498, "learning_rate": 5.521373056994819e-06, "loss": 0.5162, "mean_token_accuracy": 0.927371621131897, "num_tokens": 4957619.0, "step": 2767 }, { "epoch": 0.4482228159663185, "grad_norm": 18.492515563964844, "learning_rate": 5.5197538860103625e-06, "loss": 0.5607, "mean_token_accuracy": 0.9262706935405731, "num_tokens": 4959416.0, "step": 2768 }, { "epoch": 0.44838474617439883, "grad_norm": 18.984676361083984, "learning_rate": 5.518134715025907e-06, "loss": 0.5309, "mean_token_accuracy": 0.925253301858902, "num_tokens": 4961209.0, "step": 2769 }, { "epoch": 0.44854667638247914, "grad_norm": 27.814537048339844, "learning_rate": 5.5165155440414505e-06, "loss": 0.8334, "mean_token_accuracy": 0.8949275612831116, "num_tokens": 4962997.0, "step": 2770 }, { "epoch": 0.44870860659055944, "grad_norm": 19.05231475830078, "learning_rate": 5.514896373056995e-06, "loss": 0.5369, "mean_token_accuracy": 0.920273095369339, "num_tokens": 4964785.0, "step": 2771 }, { "epoch": 0.4488705367986398, "grad_norm": 25.37510871887207, "learning_rate": 5.5132772020725385e-06, "loss": 0.6252, "mean_token_accuracy": 0.9160839319229126, "num_tokens": 4966583.0, "step": 2772 }, { "epoch": 0.4490324670067201, "grad_norm": 25.628129959106445, "learning_rate": 5.511658031088083e-06, "loss": 0.5766, "mean_token_accuracy": 0.9076103568077087, "num_tokens": 4968376.0, "step": 2773 }, { "epoch": 0.4491943972148004, "grad_norm": 29.199174880981445, "learning_rate": 5.510038860103627e-06, "loss": 0.7002, "mean_token_accuracy": 0.8978102207183838, "num_tokens": 4970162.0, "step": 2774 }, { "epoch": 0.44935632742288073, "grad_norm": 23.482973098754883, "learning_rate": 5.508419689119171e-06, "loss": 0.6748, "mean_token_accuracy": 0.9113682210445404, "num_tokens": 4971956.0, "step": 2775 }, { "epoch": 0.44951825763096104, "grad_norm": 19.054073333740234, "learning_rate": 5.5068005181347154e-06, "loss": 0.5559, "mean_token_accuracy": 0.9225352108478546, "num_tokens": 4973752.0, "step": 2776 }, { "epoch": 0.4496801878390414, "grad_norm": 26.444379806518555, "learning_rate": 5.505181347150259e-06, "loss": 0.7227, "mean_token_accuracy": 0.9127170443534851, "num_tokens": 4975550.0, "step": 2777 }, { "epoch": 0.4498421180471217, "grad_norm": 19.205793380737305, "learning_rate": 5.5035621761658035e-06, "loss": 0.515, "mean_token_accuracy": 0.9288500547409058, "num_tokens": 4977343.0, "step": 2778 }, { "epoch": 0.450004048255202, "grad_norm": 15.34057903289795, "learning_rate": 5.501943005181347e-06, "loss": 0.5155, "mean_token_accuracy": 0.9248889684677124, "num_tokens": 4979135.0, "step": 2779 }, { "epoch": 0.4501659784632823, "grad_norm": 27.74591064453125, "learning_rate": 5.5003238341968915e-06, "loss": 0.6879, "mean_token_accuracy": 0.9089954197406769, "num_tokens": 4980943.0, "step": 2780 }, { "epoch": 0.45032790867136263, "grad_norm": 21.331445693969727, "learning_rate": 5.498704663212435e-06, "loss": 0.5074, "mean_token_accuracy": 0.9219819009304047, "num_tokens": 4982737.0, "step": 2781 }, { "epoch": 0.45048983887944294, "grad_norm": 17.33263397216797, "learning_rate": 5.4970854922279795e-06, "loss": 0.5332, "mean_token_accuracy": 0.9224945604801178, "num_tokens": 4984520.0, "step": 2782 }, { "epoch": 0.4506517690875233, "grad_norm": 20.298730850219727, "learning_rate": 5.495466321243523e-06, "loss": 0.5184, "mean_token_accuracy": 0.9211378395557404, "num_tokens": 4986311.0, "step": 2783 }, { "epoch": 0.4508136992956036, "grad_norm": 21.53404426574707, "learning_rate": 5.4938471502590675e-06, "loss": 0.6246, "mean_token_accuracy": 0.924717366695404, "num_tokens": 4988102.0, "step": 2784 }, { "epoch": 0.4509756295036839, "grad_norm": 29.463748931884766, "learning_rate": 5.492227979274611e-06, "loss": 0.761, "mean_token_accuracy": 0.9069536328315735, "num_tokens": 4989915.0, "step": 2785 }, { "epoch": 0.4511375597117642, "grad_norm": 32.4338264465332, "learning_rate": 5.4906088082901556e-06, "loss": 0.8733, "mean_token_accuracy": 0.8913461565971375, "num_tokens": 4991701.0, "step": 2786 }, { "epoch": 0.45129948991984453, "grad_norm": 20.215728759765625, "learning_rate": 5.488989637305699e-06, "loss": 0.5587, "mean_token_accuracy": 0.917548805475235, "num_tokens": 4993492.0, "step": 2787 }, { "epoch": 0.45146142012792484, "grad_norm": 19.93372344970703, "learning_rate": 5.487370466321244e-06, "loss": 0.5925, "mean_token_accuracy": 0.9151683151721954, "num_tokens": 4995287.0, "step": 2788 }, { "epoch": 0.4516233503360052, "grad_norm": 26.035869598388672, "learning_rate": 5.485751295336787e-06, "loss": 0.7338, "mean_token_accuracy": 0.9004205167293549, "num_tokens": 4997080.0, "step": 2789 }, { "epoch": 0.4517852805440855, "grad_norm": 25.034929275512695, "learning_rate": 5.484132124352332e-06, "loss": 0.7171, "mean_token_accuracy": 0.9066450893878937, "num_tokens": 4998871.0, "step": 2790 }, { "epoch": 0.4519472107521658, "grad_norm": 30.935081481933594, "learning_rate": 5.482512953367875e-06, "loss": 0.7372, "mean_token_accuracy": 0.8970410823822021, "num_tokens": 5000665.0, "step": 2791 }, { "epoch": 0.4521091409602461, "grad_norm": 26.603248596191406, "learning_rate": 5.48089378238342e-06, "loss": 0.644, "mean_token_accuracy": 0.9118581116199493, "num_tokens": 5002461.0, "step": 2792 }, { "epoch": 0.45227107116832643, "grad_norm": 17.50041961669922, "learning_rate": 5.479274611398964e-06, "loss": 0.4993, "mean_token_accuracy": 0.9292457401752472, "num_tokens": 5004256.0, "step": 2793 }, { "epoch": 0.4524330013764068, "grad_norm": 30.962717056274414, "learning_rate": 5.477655440414508e-06, "loss": 0.8089, "mean_token_accuracy": 0.8976273834705353, "num_tokens": 5006051.0, "step": 2794 }, { "epoch": 0.4525949315844871, "grad_norm": 14.432768821716309, "learning_rate": 5.476036269430052e-06, "loss": 0.5568, "mean_token_accuracy": 0.9181868433952332, "num_tokens": 5007832.0, "step": 2795 }, { "epoch": 0.4527568617925674, "grad_norm": 28.438678741455078, "learning_rate": 5.474417098445596e-06, "loss": 0.6945, "mean_token_accuracy": 0.9127098321914673, "num_tokens": 5009618.0, "step": 2796 }, { "epoch": 0.4529187920006477, "grad_norm": 14.729020118713379, "learning_rate": 5.47279792746114e-06, "loss": 0.4991, "mean_token_accuracy": 0.9290726780891418, "num_tokens": 5011398.0, "step": 2797 }, { "epoch": 0.453080722208728, "grad_norm": 22.016693115234375, "learning_rate": 5.471178756476684e-06, "loss": 0.5833, "mean_token_accuracy": 0.9120418429374695, "num_tokens": 5013194.0, "step": 2798 }, { "epoch": 0.45324265241680833, "grad_norm": 21.031702041625977, "learning_rate": 5.469559585492228e-06, "loss": 0.5417, "mean_token_accuracy": 0.9257246553897858, "num_tokens": 5014988.0, "step": 2799 }, { "epoch": 0.4534045826248887, "grad_norm": 17.902660369873047, "learning_rate": 5.4679404145077734e-06, "loss": 0.6153, "mean_token_accuracy": 0.9168752431869507, "num_tokens": 5016777.0, "step": 2800 }, { "epoch": 0.453566512832969, "grad_norm": 12.702520370483398, "learning_rate": 5.466321243523317e-06, "loss": 0.4598, "mean_token_accuracy": 0.9298729598522186, "num_tokens": 5018574.0, "step": 2801 }, { "epoch": 0.4537284430410493, "grad_norm": 19.156356811523438, "learning_rate": 5.4647020725388615e-06, "loss": 0.5916, "mean_token_accuracy": 0.925709456205368, "num_tokens": 5020369.0, "step": 2802 }, { "epoch": 0.4538903732491296, "grad_norm": 28.117366790771484, "learning_rate": 5.463082901554405e-06, "loss": 0.7326, "mean_token_accuracy": 0.9012333154678345, "num_tokens": 5022163.0, "step": 2803 }, { "epoch": 0.4540523034572099, "grad_norm": 17.92461395263672, "learning_rate": 5.4614637305699495e-06, "loss": 0.6019, "mean_token_accuracy": 0.9154166281223297, "num_tokens": 5023949.0, "step": 2804 }, { "epoch": 0.45421423366529023, "grad_norm": 25.747995376586914, "learning_rate": 5.459844559585493e-06, "loss": 0.5747, "mean_token_accuracy": 0.922680139541626, "num_tokens": 5025745.0, "step": 2805 }, { "epoch": 0.4543761638733706, "grad_norm": 22.67438316345215, "learning_rate": 5.4582253886010375e-06, "loss": 0.6578, "mean_token_accuracy": 0.9175146520137787, "num_tokens": 5027536.0, "step": 2806 }, { "epoch": 0.4545380940814509, "grad_norm": 28.96368980407715, "learning_rate": 5.456606217616581e-06, "loss": 0.7984, "mean_token_accuracy": 0.8960694372653961, "num_tokens": 5029336.0, "step": 2807 }, { "epoch": 0.4547000242895312, "grad_norm": 36.49106979370117, "learning_rate": 5.4549870466321255e-06, "loss": 0.9024, "mean_token_accuracy": 0.8745629489421844, "num_tokens": 5031135.0, "step": 2808 }, { "epoch": 0.4548619544976115, "grad_norm": 24.831716537475586, "learning_rate": 5.453367875647669e-06, "loss": 0.6366, "mean_token_accuracy": 0.9065735042095184, "num_tokens": 5032925.0, "step": 2809 }, { "epoch": 0.45502388470569183, "grad_norm": 25.967815399169922, "learning_rate": 5.4517487046632136e-06, "loss": 0.6781, "mean_token_accuracy": 0.9176878929138184, "num_tokens": 5034717.0, "step": 2810 }, { "epoch": 0.4551858149137722, "grad_norm": 23.92116928100586, "learning_rate": 5.450129533678757e-06, "loss": 0.6478, "mean_token_accuracy": 0.9033152163028717, "num_tokens": 5036507.0, "step": 2811 }, { "epoch": 0.4553477451218525, "grad_norm": 16.223268508911133, "learning_rate": 5.448510362694302e-06, "loss": 0.522, "mean_token_accuracy": 0.9236077964305878, "num_tokens": 5038293.0, "step": 2812 }, { "epoch": 0.4555096753299328, "grad_norm": 21.59657096862793, "learning_rate": 5.446891191709845e-06, "loss": 0.5989, "mean_token_accuracy": 0.9093892574310303, "num_tokens": 5040081.0, "step": 2813 }, { "epoch": 0.4556716055380131, "grad_norm": 18.043031692504883, "learning_rate": 5.44527202072539e-06, "loss": 0.5386, "mean_token_accuracy": 0.9277969002723694, "num_tokens": 5041870.0, "step": 2814 }, { "epoch": 0.4558335357460934, "grad_norm": 21.975814819335938, "learning_rate": 5.443652849740933e-06, "loss": 0.5381, "mean_token_accuracy": 0.9263225495815277, "num_tokens": 5043667.0, "step": 2815 }, { "epoch": 0.45599546595417373, "grad_norm": 26.610212326049805, "learning_rate": 5.442033678756478e-06, "loss": 0.6974, "mean_token_accuracy": 0.9009661972522736, "num_tokens": 5045461.0, "step": 2816 }, { "epoch": 0.4561573961622541, "grad_norm": 16.82908058166504, "learning_rate": 5.440414507772021e-06, "loss": 0.5227, "mean_token_accuracy": 0.916636049747467, "num_tokens": 5047249.0, "step": 2817 }, { "epoch": 0.4563193263703344, "grad_norm": 24.216522216796875, "learning_rate": 5.438795336787566e-06, "loss": 0.6182, "mean_token_accuracy": 0.9120439887046814, "num_tokens": 5049044.0, "step": 2818 }, { "epoch": 0.4564812565784147, "grad_norm": 25.455324172973633, "learning_rate": 5.43717616580311e-06, "loss": 0.594, "mean_token_accuracy": 0.9079841077327728, "num_tokens": 5050838.0, "step": 2819 }, { "epoch": 0.456643186786495, "grad_norm": 23.583614349365234, "learning_rate": 5.435556994818654e-06, "loss": 0.6547, "mean_token_accuracy": 0.9151848256587982, "num_tokens": 5052633.0, "step": 2820 }, { "epoch": 0.4568051169945753, "grad_norm": 24.683320999145508, "learning_rate": 5.433937823834198e-06, "loss": 0.7528, "mean_token_accuracy": 0.8986204266548157, "num_tokens": 5054432.0, "step": 2821 }, { "epoch": 0.45696704720265563, "grad_norm": 24.798282623291016, "learning_rate": 5.432318652849742e-06, "loss": 0.6165, "mean_token_accuracy": 0.9121578335762024, "num_tokens": 5056217.0, "step": 2822 }, { "epoch": 0.457128977410736, "grad_norm": 32.945404052734375, "learning_rate": 5.430699481865286e-06, "loss": 0.8815, "mean_token_accuracy": 0.882340282201767, "num_tokens": 5058018.0, "step": 2823 }, { "epoch": 0.4572909076188163, "grad_norm": 13.521900177001953, "learning_rate": 5.42908031088083e-06, "loss": 0.4774, "mean_token_accuracy": 0.9336929321289062, "num_tokens": 5059801.0, "step": 2824 }, { "epoch": 0.4574528378268966, "grad_norm": 22.05010414123535, "learning_rate": 5.427461139896374e-06, "loss": 0.571, "mean_token_accuracy": 0.9081010818481445, "num_tokens": 5061596.0, "step": 2825 }, { "epoch": 0.4576147680349769, "grad_norm": 25.248748779296875, "learning_rate": 5.425841968911918e-06, "loss": 0.6441, "mean_token_accuracy": 0.9033879339694977, "num_tokens": 5063388.0, "step": 2826 }, { "epoch": 0.4577766982430572, "grad_norm": 23.87033462524414, "learning_rate": 5.424222797927462e-06, "loss": 0.6315, "mean_token_accuracy": 0.913968563079834, "num_tokens": 5065179.0, "step": 2827 }, { "epoch": 0.4579386284511376, "grad_norm": 26.319520950317383, "learning_rate": 5.422603626943006e-06, "loss": 0.662, "mean_token_accuracy": 0.9100414216518402, "num_tokens": 5066969.0, "step": 2828 }, { "epoch": 0.4581005586592179, "grad_norm": 13.703821182250977, "learning_rate": 5.42098445595855e-06, "loss": 0.4779, "mean_token_accuracy": 0.9357960820198059, "num_tokens": 5068762.0, "step": 2829 }, { "epoch": 0.4582624888672982, "grad_norm": 20.925565719604492, "learning_rate": 5.419365284974094e-06, "loss": 0.6277, "mean_token_accuracy": 0.9188898801803589, "num_tokens": 5070557.0, "step": 2830 }, { "epoch": 0.4584244190753785, "grad_norm": 25.780550003051758, "learning_rate": 5.417746113989638e-06, "loss": 0.7226, "mean_token_accuracy": 0.91847363114357, "num_tokens": 5072351.0, "step": 2831 }, { "epoch": 0.4585863492834588, "grad_norm": 27.270395278930664, "learning_rate": 5.416126943005182e-06, "loss": 0.8813, "mean_token_accuracy": 0.8855840265750885, "num_tokens": 5074151.0, "step": 2832 }, { "epoch": 0.4587482794915391, "grad_norm": 28.422182083129883, "learning_rate": 5.414507772020726e-06, "loss": 0.6886, "mean_token_accuracy": 0.9022475481033325, "num_tokens": 5075939.0, "step": 2833 }, { "epoch": 0.4589102096996195, "grad_norm": 22.179424285888672, "learning_rate": 5.41288860103627e-06, "loss": 0.5997, "mean_token_accuracy": 0.9132466912269592, "num_tokens": 5077716.0, "step": 2834 }, { "epoch": 0.4590721399076998, "grad_norm": 17.141340255737305, "learning_rate": 5.411269430051814e-06, "loss": 0.5413, "mean_token_accuracy": 0.9219367802143097, "num_tokens": 5079498.0, "step": 2835 }, { "epoch": 0.4592340701157801, "grad_norm": 21.801841735839844, "learning_rate": 5.409650259067358e-06, "loss": 0.6031, "mean_token_accuracy": 0.9222591519355774, "num_tokens": 5081279.0, "step": 2836 }, { "epoch": 0.4593960003238604, "grad_norm": 24.38442039489746, "learning_rate": 5.408031088082902e-06, "loss": 0.6615, "mean_token_accuracy": 0.9234082102775574, "num_tokens": 5083077.0, "step": 2837 }, { "epoch": 0.4595579305319407, "grad_norm": 12.665369987487793, "learning_rate": 5.406411917098447e-06, "loss": 0.5244, "mean_token_accuracy": 0.9268797039985657, "num_tokens": 5084862.0, "step": 2838 }, { "epoch": 0.459719860740021, "grad_norm": 20.18532371520996, "learning_rate": 5.40479274611399e-06, "loss": 0.56, "mean_token_accuracy": 0.9214285612106323, "num_tokens": 5086654.0, "step": 2839 }, { "epoch": 0.4598817909481014, "grad_norm": 20.738492965698242, "learning_rate": 5.403173575129535e-06, "loss": 0.5513, "mean_token_accuracy": 0.919047623872757, "num_tokens": 5088438.0, "step": 2840 }, { "epoch": 0.4600437211561817, "grad_norm": 21.79051971435547, "learning_rate": 5.401554404145078e-06, "loss": 0.6259, "mean_token_accuracy": 0.9157062470912933, "num_tokens": 5090235.0, "step": 2841 }, { "epoch": 0.460205651364262, "grad_norm": 30.287353515625, "learning_rate": 5.399935233160623e-06, "loss": 0.7546, "mean_token_accuracy": 0.9126865565776825, "num_tokens": 5092021.0, "step": 2842 }, { "epoch": 0.4603675815723423, "grad_norm": 21.694766998291016, "learning_rate": 5.398316062176166e-06, "loss": 0.6355, "mean_token_accuracy": 0.9133446216583252, "num_tokens": 5093799.0, "step": 2843 }, { "epoch": 0.4605295117804226, "grad_norm": 30.974061965942383, "learning_rate": 5.396696891191711e-06, "loss": 0.7749, "mean_token_accuracy": 0.897817462682724, "num_tokens": 5095595.0, "step": 2844 }, { "epoch": 0.460691441988503, "grad_norm": 21.503873825073242, "learning_rate": 5.3950777202072544e-06, "loss": 0.5842, "mean_token_accuracy": 0.9208074510097504, "num_tokens": 5097385.0, "step": 2845 }, { "epoch": 0.4608533721965833, "grad_norm": 23.618362426757812, "learning_rate": 5.393458549222799e-06, "loss": 0.5762, "mean_token_accuracy": 0.9154929518699646, "num_tokens": 5099181.0, "step": 2846 }, { "epoch": 0.4610153024046636, "grad_norm": 16.37499237060547, "learning_rate": 5.3918393782383425e-06, "loss": 0.5803, "mean_token_accuracy": 0.9311820566654205, "num_tokens": 5100969.0, "step": 2847 }, { "epoch": 0.4611772326127439, "grad_norm": 22.36064910888672, "learning_rate": 5.390220207253887e-06, "loss": 0.6145, "mean_token_accuracy": 0.9070101678371429, "num_tokens": 5102750.0, "step": 2848 }, { "epoch": 0.4613391628208242, "grad_norm": 30.884986877441406, "learning_rate": 5.3886010362694305e-06, "loss": 0.8304, "mean_token_accuracy": 0.9010878205299377, "num_tokens": 5104544.0, "step": 2849 }, { "epoch": 0.4615010930289045, "grad_norm": 21.944713592529297, "learning_rate": 5.386981865284975e-06, "loss": 0.6046, "mean_token_accuracy": 0.925709456205368, "num_tokens": 5106339.0, "step": 2850 }, { "epoch": 0.4616630232369849, "grad_norm": 23.72605323791504, "learning_rate": 5.3853626943005185e-06, "loss": 0.6855, "mean_token_accuracy": 0.9118026793003082, "num_tokens": 5108135.0, "step": 2851 }, { "epoch": 0.4618249534450652, "grad_norm": 27.90321922302246, "learning_rate": 5.383743523316063e-06, "loss": 0.6652, "mean_token_accuracy": 0.9058675467967987, "num_tokens": 5109932.0, "step": 2852 }, { "epoch": 0.4619868836531455, "grad_norm": 23.34131622314453, "learning_rate": 5.3821243523316065e-06, "loss": 0.6354, "mean_token_accuracy": 0.9053634703159332, "num_tokens": 5111729.0, "step": 2853 }, { "epoch": 0.4621488138612258, "grad_norm": 24.6957950592041, "learning_rate": 5.380505181347151e-06, "loss": 0.69, "mean_token_accuracy": 0.9039099514484406, "num_tokens": 5113521.0, "step": 2854 }, { "epoch": 0.4623107440693061, "grad_norm": 13.516589164733887, "learning_rate": 5.3788860103626946e-06, "loss": 0.5154, "mean_token_accuracy": 0.9321138858795166, "num_tokens": 5115313.0, "step": 2855 }, { "epoch": 0.4624726742773864, "grad_norm": 18.10955810546875, "learning_rate": 5.377266839378239e-06, "loss": 0.5519, "mean_token_accuracy": 0.9263710677623749, "num_tokens": 5117097.0, "step": 2856 }, { "epoch": 0.4626346044854668, "grad_norm": 19.24322509765625, "learning_rate": 5.3756476683937834e-06, "loss": 0.566, "mean_token_accuracy": 0.921171635389328, "num_tokens": 5118888.0, "step": 2857 }, { "epoch": 0.4627965346935471, "grad_norm": 16.20059585571289, "learning_rate": 5.374028497409327e-06, "loss": 0.5109, "mean_token_accuracy": 0.9195095896720886, "num_tokens": 5120674.0, "step": 2858 }, { "epoch": 0.4629584649016274, "grad_norm": 23.226848602294922, "learning_rate": 5.3724093264248715e-06, "loss": 0.69, "mean_token_accuracy": 0.9109677076339722, "num_tokens": 5122465.0, "step": 2859 }, { "epoch": 0.4631203951097077, "grad_norm": 25.412384033203125, "learning_rate": 5.370790155440415e-06, "loss": 0.6229, "mean_token_accuracy": 0.9193286001682281, "num_tokens": 5124262.0, "step": 2860 }, { "epoch": 0.463282325317788, "grad_norm": 23.752702713012695, "learning_rate": 5.3691709844559595e-06, "loss": 0.6796, "mean_token_accuracy": 0.9160798192024231, "num_tokens": 5126048.0, "step": 2861 }, { "epoch": 0.4634442555258684, "grad_norm": 20.44335174560547, "learning_rate": 5.367551813471503e-06, "loss": 0.5734, "mean_token_accuracy": 0.9229323267936707, "num_tokens": 5127833.0, "step": 2862 }, { "epoch": 0.4636061857339487, "grad_norm": 18.631301879882812, "learning_rate": 5.3659326424870475e-06, "loss": 0.5039, "mean_token_accuracy": 0.9239130616188049, "num_tokens": 5129621.0, "step": 2863 }, { "epoch": 0.463768115942029, "grad_norm": 18.931396484375, "learning_rate": 5.364313471502591e-06, "loss": 0.4985, "mean_token_accuracy": 0.9220321774482727, "num_tokens": 5131415.0, "step": 2864 }, { "epoch": 0.4639300461501093, "grad_norm": 8.640852928161621, "learning_rate": 5.3626943005181356e-06, "loss": 0.45, "mean_token_accuracy": 0.9319812953472137, "num_tokens": 5133192.0, "step": 2865 }, { "epoch": 0.4640919763581896, "grad_norm": 19.540283203125, "learning_rate": 5.361075129533679e-06, "loss": 0.5046, "mean_token_accuracy": 0.9296531975269318, "num_tokens": 5134974.0, "step": 2866 }, { "epoch": 0.4642539065662699, "grad_norm": 19.483684539794922, "learning_rate": 5.359455958549224e-06, "loss": 0.5599, "mean_token_accuracy": 0.9189277589321136, "num_tokens": 5136757.0, "step": 2867 }, { "epoch": 0.4644158367743503, "grad_norm": 19.582748413085938, "learning_rate": 5.357836787564767e-06, "loss": 0.5479, "mean_token_accuracy": 0.925253301858902, "num_tokens": 5138550.0, "step": 2868 }, { "epoch": 0.4645777669824306, "grad_norm": 25.55223846435547, "learning_rate": 5.356217616580312e-06, "loss": 0.6178, "mean_token_accuracy": 0.9044606685638428, "num_tokens": 5140334.0, "step": 2869 }, { "epoch": 0.4647396971905109, "grad_norm": 27.396039962768555, "learning_rate": 5.354598445595855e-06, "loss": 0.5731, "mean_token_accuracy": 0.9157095551490784, "num_tokens": 5142119.0, "step": 2870 }, { "epoch": 0.4649016273985912, "grad_norm": 17.082460403442383, "learning_rate": 5.3529792746114e-06, "loss": 0.4912, "mean_token_accuracy": 0.9304637312889099, "num_tokens": 5143918.0, "step": 2871 }, { "epoch": 0.4650635576066715, "grad_norm": 27.788162231445312, "learning_rate": 5.351360103626943e-06, "loss": 0.5862, "mean_token_accuracy": 0.9222605228424072, "num_tokens": 5145713.0, "step": 2872 }, { "epoch": 0.4652254878147518, "grad_norm": 28.16967010498047, "learning_rate": 5.349740932642488e-06, "loss": 0.6885, "mean_token_accuracy": 0.9074721336364746, "num_tokens": 5147506.0, "step": 2873 }, { "epoch": 0.4653874180228322, "grad_norm": 19.386356353759766, "learning_rate": 5.348121761658031e-06, "loss": 0.5515, "mean_token_accuracy": 0.9280538260936737, "num_tokens": 5149296.0, "step": 2874 }, { "epoch": 0.4655493482309125, "grad_norm": 27.669445037841797, "learning_rate": 5.346502590673576e-06, "loss": 0.6598, "mean_token_accuracy": 0.9144883453845978, "num_tokens": 5151089.0, "step": 2875 }, { "epoch": 0.4657112784389928, "grad_norm": 25.646547317504883, "learning_rate": 5.34488341968912e-06, "loss": 0.6391, "mean_token_accuracy": 0.9154929518699646, "num_tokens": 5152885.0, "step": 2876 }, { "epoch": 0.4658732086470731, "grad_norm": 12.353198051452637, "learning_rate": 5.343264248704664e-06, "loss": 0.4645, "mean_token_accuracy": 0.9358339011669159, "num_tokens": 5154662.0, "step": 2877 }, { "epoch": 0.4660351388551534, "grad_norm": 28.414976119995117, "learning_rate": 5.341645077720208e-06, "loss": 0.7167, "mean_token_accuracy": 0.9062483906745911, "num_tokens": 5156452.0, "step": 2878 }, { "epoch": 0.46619706906323377, "grad_norm": 21.641206741333008, "learning_rate": 5.340025906735752e-06, "loss": 0.5588, "mean_token_accuracy": 0.9247687458992004, "num_tokens": 5158243.0, "step": 2879 }, { "epoch": 0.4663589992713141, "grad_norm": 27.501628875732422, "learning_rate": 5.338406735751296e-06, "loss": 0.6976, "mean_token_accuracy": 0.9107498526573181, "num_tokens": 5160035.0, "step": 2880 }, { "epoch": 0.4665209294793944, "grad_norm": 30.56386375427246, "learning_rate": 5.33678756476684e-06, "loss": 0.6775, "mean_token_accuracy": 0.9067992568016052, "num_tokens": 5161826.0, "step": 2881 }, { "epoch": 0.4666828596874747, "grad_norm": 16.881032943725586, "learning_rate": 5.335168393782384e-06, "loss": 0.4897, "mean_token_accuracy": 0.9225217700004578, "num_tokens": 5163609.0, "step": 2882 }, { "epoch": 0.466844789895555, "grad_norm": 21.027667999267578, "learning_rate": 5.333549222797928e-06, "loss": 0.5789, "mean_token_accuracy": 0.9174043536186218, "num_tokens": 5165399.0, "step": 2883 }, { "epoch": 0.4670067201036353, "grad_norm": 27.815034866333008, "learning_rate": 5.331930051813472e-06, "loss": 0.6248, "mean_token_accuracy": 0.9103453755378723, "num_tokens": 5167190.0, "step": 2884 }, { "epoch": 0.46716865031171567, "grad_norm": 22.24852752685547, "learning_rate": 5.330310880829016e-06, "loss": 0.548, "mean_token_accuracy": 0.9185613691806793, "num_tokens": 5168984.0, "step": 2885 }, { "epoch": 0.467330580519796, "grad_norm": 31.455965042114258, "learning_rate": 5.32869170984456e-06, "loss": 0.7856, "mean_token_accuracy": 0.8947044312953949, "num_tokens": 5170781.0, "step": 2886 }, { "epoch": 0.4674925107278763, "grad_norm": 24.850784301757812, "learning_rate": 5.327072538860104e-06, "loss": 0.6134, "mean_token_accuracy": 0.9040032625198364, "num_tokens": 5172573.0, "step": 2887 }, { "epoch": 0.4676544409359566, "grad_norm": 29.294872283935547, "learning_rate": 5.325453367875648e-06, "loss": 0.6597, "mean_token_accuracy": 0.9121934175491333, "num_tokens": 5174370.0, "step": 2888 }, { "epoch": 0.4678163711440369, "grad_norm": 24.771955490112305, "learning_rate": 5.323834196891192e-06, "loss": 0.584, "mean_token_accuracy": 0.9157004952430725, "num_tokens": 5176155.0, "step": 2889 }, { "epoch": 0.46797830135211727, "grad_norm": 22.884859085083008, "learning_rate": 5.322215025906736e-06, "loss": 0.5046, "mean_token_accuracy": 0.9274798929691315, "num_tokens": 5177943.0, "step": 2890 }, { "epoch": 0.4681402315601976, "grad_norm": 25.530378341674805, "learning_rate": 5.32059585492228e-06, "loss": 0.5515, "mean_token_accuracy": 0.9188898801803589, "num_tokens": 5179738.0, "step": 2891 }, { "epoch": 0.4683021617682779, "grad_norm": 36.57127380371094, "learning_rate": 5.318976683937824e-06, "loss": 0.8807, "mean_token_accuracy": 0.8946167230606079, "num_tokens": 5181534.0, "step": 2892 }, { "epoch": 0.4684640919763582, "grad_norm": 24.721677780151367, "learning_rate": 5.317357512953368e-06, "loss": 0.6147, "mean_token_accuracy": 0.9181610941886902, "num_tokens": 5183327.0, "step": 2893 }, { "epoch": 0.4686260221844385, "grad_norm": 29.362308502197266, "learning_rate": 5.315738341968912e-06, "loss": 0.6518, "mean_token_accuracy": 0.9158130884170532, "num_tokens": 5185124.0, "step": 2894 }, { "epoch": 0.4687879523925188, "grad_norm": 18.696104049682617, "learning_rate": 5.314119170984457e-06, "loss": 0.5216, "mean_token_accuracy": 0.92356076836586, "num_tokens": 5186910.0, "step": 2895 }, { "epoch": 0.46894988260059917, "grad_norm": 18.09763526916504, "learning_rate": 5.3125e-06, "loss": 0.5734, "mean_token_accuracy": 0.9239495694637299, "num_tokens": 5188698.0, "step": 2896 }, { "epoch": 0.4691118128086795, "grad_norm": 20.345279693603516, "learning_rate": 5.310880829015545e-06, "loss": 0.5513, "mean_token_accuracy": 0.928205132484436, "num_tokens": 5190488.0, "step": 2897 }, { "epoch": 0.4692737430167598, "grad_norm": 21.41593360900879, "learning_rate": 5.309261658031088e-06, "loss": 0.5923, "mean_token_accuracy": 0.9211202263832092, "num_tokens": 5192279.0, "step": 2898 }, { "epoch": 0.4694356732248401, "grad_norm": 22.710124969482422, "learning_rate": 5.307642487046633e-06, "loss": 0.5929, "mean_token_accuracy": 0.9117647111415863, "num_tokens": 5194063.0, "step": 2899 }, { "epoch": 0.4695976034329204, "grad_norm": 26.75022315979004, "learning_rate": 5.3060233160621764e-06, "loss": 0.6559, "mean_token_accuracy": 0.9060872197151184, "num_tokens": 5195863.0, "step": 2900 }, { "epoch": 0.4697595336410007, "grad_norm": 18.813129425048828, "learning_rate": 5.304404145077721e-06, "loss": 0.7102, "mean_token_accuracy": 0.9177893698215485, "num_tokens": 5197655.0, "step": 2901 }, { "epoch": 0.46992146384908107, "grad_norm": 25.961584091186523, "learning_rate": 5.3027849740932645e-06, "loss": 0.5489, "mean_token_accuracy": 0.9199818968772888, "num_tokens": 5199465.0, "step": 2902 }, { "epoch": 0.4700833940571614, "grad_norm": 26.278345108032227, "learning_rate": 5.301165803108809e-06, "loss": 0.5734, "mean_token_accuracy": 0.9147329926490784, "num_tokens": 5201258.0, "step": 2903 }, { "epoch": 0.4702453242652417, "grad_norm": 22.301340103149414, "learning_rate": 5.2995466321243525e-06, "loss": 0.5778, "mean_token_accuracy": 0.9258565604686737, "num_tokens": 5203053.0, "step": 2904 }, { "epoch": 0.470407254473322, "grad_norm": 24.686975479125977, "learning_rate": 5.297927461139897e-06, "loss": 0.6746, "mean_token_accuracy": 0.9020900130271912, "num_tokens": 5204841.0, "step": 2905 }, { "epoch": 0.4705691846814023, "grad_norm": 23.020736694335938, "learning_rate": 5.2963082901554405e-06, "loss": 0.5858, "mean_token_accuracy": 0.9146870970726013, "num_tokens": 5206633.0, "step": 2906 }, { "epoch": 0.47073111488948266, "grad_norm": 19.296911239624023, "learning_rate": 5.294689119170985e-06, "loss": 0.5249, "mean_token_accuracy": 0.9206710755825043, "num_tokens": 5208423.0, "step": 2907 }, { "epoch": 0.47089304509756297, "grad_norm": 31.81449317932129, "learning_rate": 5.2930699481865285e-06, "loss": 0.6815, "mean_token_accuracy": 0.9136128425598145, "num_tokens": 5210213.0, "step": 2908 }, { "epoch": 0.4710549753056433, "grad_norm": 17.322669982910156, "learning_rate": 5.291450777202073e-06, "loss": 0.5352, "mean_token_accuracy": 0.9223621189594269, "num_tokens": 5212008.0, "step": 2909 }, { "epoch": 0.4712169055137236, "grad_norm": 29.945405960083008, "learning_rate": 5.2898316062176166e-06, "loss": 0.6949, "mean_token_accuracy": 0.9190655052661896, "num_tokens": 5213814.0, "step": 2910 }, { "epoch": 0.4713788357218039, "grad_norm": 28.96602439880371, "learning_rate": 5.288212435233161e-06, "loss": 0.7812, "mean_token_accuracy": 0.8920530378818512, "num_tokens": 5215605.0, "step": 2911 }, { "epoch": 0.4715407659298842, "grad_norm": 31.174510955810547, "learning_rate": 5.286593264248705e-06, "loss": 0.7049, "mean_token_accuracy": 0.9063625335693359, "num_tokens": 5217417.0, "step": 2912 }, { "epoch": 0.47170269613796456, "grad_norm": 28.651702880859375, "learning_rate": 5.284974093264249e-06, "loss": 0.6401, "mean_token_accuracy": 0.9039416313171387, "num_tokens": 5219210.0, "step": 2913 }, { "epoch": 0.47186462634604487, "grad_norm": 27.769548416137695, "learning_rate": 5.2833549222797935e-06, "loss": 0.5983, "mean_token_accuracy": 0.9201631844043732, "num_tokens": 5220997.0, "step": 2914 }, { "epoch": 0.4720265565541252, "grad_norm": 27.756732940673828, "learning_rate": 5.281735751295337e-06, "loss": 0.6688, "mean_token_accuracy": 0.9071381092071533, "num_tokens": 5222789.0, "step": 2915 }, { "epoch": 0.4721884867622055, "grad_norm": 25.227214813232422, "learning_rate": 5.2801165803108815e-06, "loss": 0.6198, "mean_token_accuracy": 0.91366907954216, "num_tokens": 5224579.0, "step": 2916 }, { "epoch": 0.4723504169702858, "grad_norm": 28.95039939880371, "learning_rate": 5.278497409326425e-06, "loss": 0.6966, "mean_token_accuracy": 0.9052895903587341, "num_tokens": 5226376.0, "step": 2917 }, { "epoch": 0.4725123471783661, "grad_norm": 18.444847106933594, "learning_rate": 5.2768782383419695e-06, "loss": 0.5126, "mean_token_accuracy": 0.9286729693412781, "num_tokens": 5228169.0, "step": 2918 }, { "epoch": 0.47267427738644646, "grad_norm": 32.053131103515625, "learning_rate": 5.275259067357513e-06, "loss": 0.6099, "mean_token_accuracy": 0.9108525216579437, "num_tokens": 5229950.0, "step": 2919 }, { "epoch": 0.47283620759452677, "grad_norm": 30.479393005371094, "learning_rate": 5.2736398963730575e-06, "loss": 0.7284, "mean_token_accuracy": 0.9009911119937897, "num_tokens": 5231744.0, "step": 2920 }, { "epoch": 0.4729981378026071, "grad_norm": 29.874521255493164, "learning_rate": 5.272020725388601e-06, "loss": 0.6863, "mean_token_accuracy": 0.9142303168773651, "num_tokens": 5233536.0, "step": 2921 }, { "epoch": 0.4731600680106874, "grad_norm": 19.59438705444336, "learning_rate": 5.2704015544041456e-06, "loss": 0.5935, "mean_token_accuracy": 0.9199579954147339, "num_tokens": 5235324.0, "step": 2922 }, { "epoch": 0.4733219982187677, "grad_norm": 30.61916160583496, "learning_rate": 5.268782383419689e-06, "loss": 0.7661, "mean_token_accuracy": 0.8919772505760193, "num_tokens": 5237114.0, "step": 2923 }, { "epoch": 0.47348392842684806, "grad_norm": 22.354694366455078, "learning_rate": 5.267163212435234e-06, "loss": 0.6044, "mean_token_accuracy": 0.9122854769229889, "num_tokens": 5238911.0, "step": 2924 }, { "epoch": 0.47364585863492836, "grad_norm": 18.398378372192383, "learning_rate": 5.265544041450777e-06, "loss": 0.5121, "mean_token_accuracy": 0.9285568594932556, "num_tokens": 5240703.0, "step": 2925 }, { "epoch": 0.47380778884300867, "grad_norm": 18.617631912231445, "learning_rate": 5.263924870466322e-06, "loss": 0.4792, "mean_token_accuracy": 0.9296690821647644, "num_tokens": 5242485.0, "step": 2926 }, { "epoch": 0.473969719051089, "grad_norm": 30.68703269958496, "learning_rate": 5.262305699481865e-06, "loss": 0.7908, "mean_token_accuracy": 0.9034347832202911, "num_tokens": 5244276.0, "step": 2927 }, { "epoch": 0.4741316492591693, "grad_norm": 24.17237663269043, "learning_rate": 5.26068652849741e-06, "loss": 0.5989, "mean_token_accuracy": 0.9156014919281006, "num_tokens": 5246061.0, "step": 2928 }, { "epoch": 0.4742935794672496, "grad_norm": 28.16291046142578, "learning_rate": 5.259067357512953e-06, "loss": 0.5671, "mean_token_accuracy": 0.9071002006530762, "num_tokens": 5247842.0, "step": 2929 }, { "epoch": 0.47445550967532996, "grad_norm": 20.256938934326172, "learning_rate": 5.257448186528498e-06, "loss": 0.5971, "mean_token_accuracy": 0.9247859120368958, "num_tokens": 5249631.0, "step": 2930 }, { "epoch": 0.47461743988341026, "grad_norm": 31.084367752075195, "learning_rate": 5.255829015544041e-06, "loss": 0.6111, "mean_token_accuracy": 0.8967473804950714, "num_tokens": 5251424.0, "step": 2931 }, { "epoch": 0.47477937009149057, "grad_norm": 29.46352767944336, "learning_rate": 5.254209844559586e-06, "loss": 0.7257, "mean_token_accuracy": 0.9000969231128693, "num_tokens": 5253216.0, "step": 2932 }, { "epoch": 0.4749413002995709, "grad_norm": 27.638547897338867, "learning_rate": 5.25259067357513e-06, "loss": 0.6544, "mean_token_accuracy": 0.9124966859817505, "num_tokens": 5255002.0, "step": 2933 }, { "epoch": 0.4751032305076512, "grad_norm": 24.843135833740234, "learning_rate": 5.250971502590674e-06, "loss": 0.8274, "mean_token_accuracy": 0.9029503166675568, "num_tokens": 5256792.0, "step": 2934 }, { "epoch": 0.4752651607157315, "grad_norm": 24.82257080078125, "learning_rate": 5.249352331606218e-06, "loss": 0.5774, "mean_token_accuracy": 0.9170937538146973, "num_tokens": 5258582.0, "step": 2935 }, { "epoch": 0.47542709092381186, "grad_norm": 26.72260856628418, "learning_rate": 5.247733160621762e-06, "loss": 0.6017, "mean_token_accuracy": 0.915909081697464, "num_tokens": 5260366.0, "step": 2936 }, { "epoch": 0.47558902113189216, "grad_norm": 19.526792526245117, "learning_rate": 5.246113989637306e-06, "loss": 0.5785, "mean_token_accuracy": 0.9261710345745087, "num_tokens": 5262149.0, "step": 2937 }, { "epoch": 0.47575095133997247, "grad_norm": 27.608476638793945, "learning_rate": 5.24449481865285e-06, "loss": 0.7315, "mean_token_accuracy": 0.9204832017421722, "num_tokens": 5263937.0, "step": 2938 }, { "epoch": 0.4759128815480528, "grad_norm": 20.302509307861328, "learning_rate": 5.242875647668394e-06, "loss": 0.6104, "mean_token_accuracy": 0.9283144772052765, "num_tokens": 5265728.0, "step": 2939 }, { "epoch": 0.4760748117561331, "grad_norm": 27.7907772064209, "learning_rate": 5.241256476683938e-06, "loss": 0.6075, "mean_token_accuracy": 0.9052895903587341, "num_tokens": 5267525.0, "step": 2940 }, { "epoch": 0.47623674196421345, "grad_norm": 24.1446590423584, "learning_rate": 5.239637305699482e-06, "loss": 0.6232, "mean_token_accuracy": 0.9198294281959534, "num_tokens": 5269311.0, "step": 2941 }, { "epoch": 0.47639867217229376, "grad_norm": 26.522228240966797, "learning_rate": 5.238018134715026e-06, "loss": 0.6945, "mean_token_accuracy": 0.9090448617935181, "num_tokens": 5271118.0, "step": 2942 }, { "epoch": 0.47656060238037407, "grad_norm": 17.190567016601562, "learning_rate": 5.23639896373057e-06, "loss": 0.5789, "mean_token_accuracy": 0.9256495237350464, "num_tokens": 5272899.0, "step": 2943 }, { "epoch": 0.4767225325884544, "grad_norm": 24.518218994140625, "learning_rate": 5.234779792746114e-06, "loss": 0.5842, "mean_token_accuracy": 0.9070110321044922, "num_tokens": 5274701.0, "step": 2944 }, { "epoch": 0.4768844627965347, "grad_norm": 27.35640525817871, "learning_rate": 5.233160621761658e-06, "loss": 0.5894, "mean_token_accuracy": 0.9179335236549377, "num_tokens": 5276494.0, "step": 2945 }, { "epoch": 0.477046393004615, "grad_norm": 16.56838607788086, "learning_rate": 5.231541450777202e-06, "loss": 0.5281, "mean_token_accuracy": 0.9310924410820007, "num_tokens": 5278282.0, "step": 2946 }, { "epoch": 0.47720832321269535, "grad_norm": 21.148740768432617, "learning_rate": 5.229922279792746e-06, "loss": 0.5593, "mean_token_accuracy": 0.9239130616188049, "num_tokens": 5280070.0, "step": 2947 }, { "epoch": 0.47737025342077566, "grad_norm": 28.640382766723633, "learning_rate": 5.22830310880829e-06, "loss": 0.6467, "mean_token_accuracy": 0.9025510251522064, "num_tokens": 5281869.0, "step": 2948 }, { "epoch": 0.47753218362885597, "grad_norm": 25.223848342895508, "learning_rate": 5.226683937823834e-06, "loss": 0.6037, "mean_token_accuracy": 0.9097852110862732, "num_tokens": 5283658.0, "step": 2949 }, { "epoch": 0.4776941138369363, "grad_norm": 19.175058364868164, "learning_rate": 5.225064766839378e-06, "loss": 0.5493, "mean_token_accuracy": 0.930431067943573, "num_tokens": 5285444.0, "step": 2950 }, { "epoch": 0.4778560440450166, "grad_norm": 25.689964294433594, "learning_rate": 5.223445595854922e-06, "loss": 0.5523, "mean_token_accuracy": 0.9234882295131683, "num_tokens": 5287243.0, "step": 2951 }, { "epoch": 0.4780179742530969, "grad_norm": 27.593448638916016, "learning_rate": 5.221826424870467e-06, "loss": 0.8076, "mean_token_accuracy": 0.9061359763145447, "num_tokens": 5289032.0, "step": 2952 }, { "epoch": 0.47817990446117725, "grad_norm": 19.62053680419922, "learning_rate": 5.22020725388601e-06, "loss": 0.5584, "mean_token_accuracy": 0.925144374370575, "num_tokens": 5290825.0, "step": 2953 }, { "epoch": 0.47834183466925756, "grad_norm": 14.325468063354492, "learning_rate": 5.218588082901555e-06, "loss": 0.5025, "mean_token_accuracy": 0.9305888414382935, "num_tokens": 5292611.0, "step": 2954 }, { "epoch": 0.47850376487733787, "grad_norm": 28.998231887817383, "learning_rate": 5.216968911917098e-06, "loss": 0.7067, "mean_token_accuracy": 0.9110942184925079, "num_tokens": 5294404.0, "step": 2955 }, { "epoch": 0.4786656950854182, "grad_norm": 22.359689712524414, "learning_rate": 5.215349740932643e-06, "loss": 0.5845, "mean_token_accuracy": 0.9170056283473969, "num_tokens": 5296193.0, "step": 2956 }, { "epoch": 0.4788276252934985, "grad_norm": 29.983604431152344, "learning_rate": 5.2137305699481864e-06, "loss": 0.6961, "mean_token_accuracy": 0.8951023519039154, "num_tokens": 5298001.0, "step": 2957 }, { "epoch": 0.47898955550157885, "grad_norm": 25.94442367553711, "learning_rate": 5.212111398963731e-06, "loss": 0.5792, "mean_token_accuracy": 0.916137307882309, "num_tokens": 5299811.0, "step": 2958 }, { "epoch": 0.47915148570965915, "grad_norm": 18.060644149780273, "learning_rate": 5.2104922279792745e-06, "loss": 0.5792, "mean_token_accuracy": 0.9217325150966644, "num_tokens": 5301604.0, "step": 2959 }, { "epoch": 0.47931341591773946, "grad_norm": 27.37018394470215, "learning_rate": 5.208873056994819e-06, "loss": 0.7947, "mean_token_accuracy": 0.9093098342418671, "num_tokens": 5303391.0, "step": 2960 }, { "epoch": 0.47947534612581977, "grad_norm": 32.403934478759766, "learning_rate": 5.2072538860103625e-06, "loss": 0.8079, "mean_token_accuracy": 0.89775151014328, "num_tokens": 5305177.0, "step": 2961 }, { "epoch": 0.4796372763339001, "grad_norm": 26.418136596679688, "learning_rate": 5.205634715025907e-06, "loss": 0.5591, "mean_token_accuracy": 0.9303635060787201, "num_tokens": 5306963.0, "step": 2962 }, { "epoch": 0.4797992065419804, "grad_norm": 16.83273696899414, "learning_rate": 5.2040155440414505e-06, "loss": 0.5487, "mean_token_accuracy": 0.9293177723884583, "num_tokens": 5308744.0, "step": 2963 }, { "epoch": 0.47996113675006075, "grad_norm": 21.232816696166992, "learning_rate": 5.202396373056995e-06, "loss": 0.5468, "mean_token_accuracy": 0.9186103940010071, "num_tokens": 5310526.0, "step": 2964 }, { "epoch": 0.48012306695814105, "grad_norm": 21.434364318847656, "learning_rate": 5.2007772020725386e-06, "loss": 0.5708, "mean_token_accuracy": 0.9103163778781891, "num_tokens": 5312306.0, "step": 2965 }, { "epoch": 0.48028499716622136, "grad_norm": 27.445018768310547, "learning_rate": 5.199158031088083e-06, "loss": 0.7364, "mean_token_accuracy": 0.9068088531494141, "num_tokens": 5314097.0, "step": 2966 }, { "epoch": 0.48044692737430167, "grad_norm": 21.907724380493164, "learning_rate": 5.197538860103627e-06, "loss": 0.6708, "mean_token_accuracy": 0.9169968664646149, "num_tokens": 5315886.0, "step": 2967 }, { "epoch": 0.480608857582382, "grad_norm": 21.375431060791016, "learning_rate": 5.195919689119171e-06, "loss": 0.5612, "mean_token_accuracy": 0.9162167906761169, "num_tokens": 5317673.0, "step": 2968 }, { "epoch": 0.4807707877904623, "grad_norm": 29.330354690551758, "learning_rate": 5.1943005181347155e-06, "loss": 0.7018, "mean_token_accuracy": 0.9070242643356323, "num_tokens": 5319465.0, "step": 2969 }, { "epoch": 0.48093271799854265, "grad_norm": 28.010086059570312, "learning_rate": 5.192681347150259e-06, "loss": 0.7313, "mean_token_accuracy": 0.903486430644989, "num_tokens": 5321246.0, "step": 2970 }, { "epoch": 0.48109464820662295, "grad_norm": 31.358449935913086, "learning_rate": 5.1910621761658035e-06, "loss": 0.6857, "mean_token_accuracy": 0.9015277624130249, "num_tokens": 5323052.0, "step": 2971 }, { "epoch": 0.48125657841470326, "grad_norm": 25.900650024414062, "learning_rate": 5.189443005181347e-06, "loss": 0.747, "mean_token_accuracy": 0.9117058515548706, "num_tokens": 5324836.0, "step": 2972 }, { "epoch": 0.48141850862278357, "grad_norm": 18.731250762939453, "learning_rate": 5.1878238341968915e-06, "loss": 0.5436, "mean_token_accuracy": 0.9287525415420532, "num_tokens": 5326629.0, "step": 2973 }, { "epoch": 0.4815804388308639, "grad_norm": 30.721891403198242, "learning_rate": 5.186204663212435e-06, "loss": 0.7351, "mean_token_accuracy": 0.9066331386566162, "num_tokens": 5328430.0, "step": 2974 }, { "epoch": 0.48174236903894424, "grad_norm": 18.47876739501953, "learning_rate": 5.1845854922279795e-06, "loss": 0.5239, "mean_token_accuracy": 0.9295634925365448, "num_tokens": 5330226.0, "step": 2975 }, { "epoch": 0.48190429924702455, "grad_norm": 17.191940307617188, "learning_rate": 5.182966321243523e-06, "loss": 0.5795, "mean_token_accuracy": 0.9239878058433533, "num_tokens": 5332014.0, "step": 2976 }, { "epoch": 0.48206622945510486, "grad_norm": 30.276260375976562, "learning_rate": 5.1813471502590676e-06, "loss": 0.8157, "mean_token_accuracy": 0.9014598727226257, "num_tokens": 5333800.0, "step": 2977 }, { "epoch": 0.48222815966318516, "grad_norm": 20.260787963867188, "learning_rate": 5.179727979274611e-06, "loss": 0.5451, "mean_token_accuracy": 0.9211378395557404, "num_tokens": 5335591.0, "step": 2978 }, { "epoch": 0.48239008987126547, "grad_norm": 21.172622680664062, "learning_rate": 5.178108808290156e-06, "loss": 0.6007, "mean_token_accuracy": 0.9207678437232971, "num_tokens": 5337368.0, "step": 2979 }, { "epoch": 0.4825520200793458, "grad_norm": 25.751380920410156, "learning_rate": 5.176489637305699e-06, "loss": 0.6691, "mean_token_accuracy": 0.9152900874614716, "num_tokens": 5339162.0, "step": 2980 }, { "epoch": 0.48271395028742614, "grad_norm": 16.326499938964844, "learning_rate": 5.174870466321244e-06, "loss": 0.5208, "mean_token_accuracy": 0.9212839901447296, "num_tokens": 5340954.0, "step": 2981 }, { "epoch": 0.48287588049550645, "grad_norm": 22.657331466674805, "learning_rate": 5.173251295336787e-06, "loss": 0.7048, "mean_token_accuracy": 0.9103802740573883, "num_tokens": 5342745.0, "step": 2982 }, { "epoch": 0.48303781070358676, "grad_norm": 27.320241928100586, "learning_rate": 5.171632124352332e-06, "loss": 0.6268, "mean_token_accuracy": 0.9119634330272675, "num_tokens": 5344541.0, "step": 2983 }, { "epoch": 0.48319974091166706, "grad_norm": 26.14234161376953, "learning_rate": 5.170012953367875e-06, "loss": 0.6794, "mean_token_accuracy": 0.9091990888118744, "num_tokens": 5346328.0, "step": 2984 }, { "epoch": 0.48336167111974737, "grad_norm": 35.31435775756836, "learning_rate": 5.16839378238342e-06, "loss": 0.5644, "mean_token_accuracy": 0.9124755561351776, "num_tokens": 5348126.0, "step": 2985 }, { "epoch": 0.4835236013278277, "grad_norm": 22.177021026611328, "learning_rate": 5.166774611398963e-06, "loss": 0.5356, "mean_token_accuracy": 0.924717366695404, "num_tokens": 5349917.0, "step": 2986 }, { "epoch": 0.48368553153590804, "grad_norm": 20.260520935058594, "learning_rate": 5.165155440414508e-06, "loss": 0.5815, "mean_token_accuracy": 0.9114106595516205, "num_tokens": 5351700.0, "step": 2987 }, { "epoch": 0.48384746174398835, "grad_norm": 22.355167388916016, "learning_rate": 5.163536269430052e-06, "loss": 0.5893, "mean_token_accuracy": 0.9194128215312958, "num_tokens": 5353485.0, "step": 2988 }, { "epoch": 0.48400939195206866, "grad_norm": 23.14637565612793, "learning_rate": 5.161917098445596e-06, "loss": 0.6222, "mean_token_accuracy": 0.9128788113594055, "num_tokens": 5355284.0, "step": 2989 }, { "epoch": 0.48417132216014896, "grad_norm": 21.406024932861328, "learning_rate": 5.16029792746114e-06, "loss": 0.5813, "mean_token_accuracy": 0.9181225001811981, "num_tokens": 5357065.0, "step": 2990 }, { "epoch": 0.4843332523682293, "grad_norm": 16.63289451599121, "learning_rate": 5.158678756476684e-06, "loss": 0.4801, "mean_token_accuracy": 0.9243197441101074, "num_tokens": 5358868.0, "step": 2991 }, { "epoch": 0.48449518257630964, "grad_norm": 22.046833038330078, "learning_rate": 5.157059585492228e-06, "loss": 0.624, "mean_token_accuracy": 0.9187424778938293, "num_tokens": 5360663.0, "step": 2992 }, { "epoch": 0.48465711278438994, "grad_norm": 26.663820266723633, "learning_rate": 5.155440414507773e-06, "loss": 0.6302, "mean_token_accuracy": 0.9023735225200653, "num_tokens": 5362452.0, "step": 2993 }, { "epoch": 0.48481904299247025, "grad_norm": 24.68050193786621, "learning_rate": 5.153821243523317e-06, "loss": 0.7155, "mean_token_accuracy": 0.9097970426082611, "num_tokens": 5364242.0, "step": 2994 }, { "epoch": 0.48498097320055056, "grad_norm": 27.26254653930664, "learning_rate": 5.1522020725388615e-06, "loss": 0.7125, "mean_token_accuracy": 0.8963338732719421, "num_tokens": 5366032.0, "step": 2995 }, { "epoch": 0.48514290340863087, "grad_norm": 21.771852493286133, "learning_rate": 5.150582901554405e-06, "loss": 0.5501, "mean_token_accuracy": 0.9178571403026581, "num_tokens": 5367824.0, "step": 2996 }, { "epoch": 0.4853048336167112, "grad_norm": 18.900741577148438, "learning_rate": 5.1489637305699495e-06, "loss": 0.5003, "mean_token_accuracy": 0.9257739782333374, "num_tokens": 5369618.0, "step": 2997 }, { "epoch": 0.48546676382479154, "grad_norm": 37.06663131713867, "learning_rate": 5.147344559585493e-06, "loss": 0.7315, "mean_token_accuracy": 0.9115897119045258, "num_tokens": 5371401.0, "step": 2998 }, { "epoch": 0.48562869403287184, "grad_norm": 27.482330322265625, "learning_rate": 5.1457253886010375e-06, "loss": 0.7695, "mean_token_accuracy": 0.9165938198566437, "num_tokens": 5373200.0, "step": 2999 }, { "epoch": 0.48579062424095215, "grad_norm": 22.539199829101562, "learning_rate": 5.144106217616581e-06, "loss": 0.5962, "mean_token_accuracy": 0.9197974801063538, "num_tokens": 5374986.0, "step": 3000 }, { "epoch": 0.48595255444903246, "grad_norm": 24.7188777923584, "learning_rate": 5.1424870466321256e-06, "loss": 0.5699, "mean_token_accuracy": 0.9130434989929199, "num_tokens": 5376774.0, "step": 3001 }, { "epoch": 0.48611448465711277, "grad_norm": 27.121692657470703, "learning_rate": 5.140867875647669e-06, "loss": 0.7608, "mean_token_accuracy": 0.9027452766895294, "num_tokens": 5378582.0, "step": 3002 }, { "epoch": 0.48627641486519313, "grad_norm": 32.31459426879883, "learning_rate": 5.139248704663214e-06, "loss": 0.8394, "mean_token_accuracy": 0.9036190807819366, "num_tokens": 5380364.0, "step": 3003 }, { "epoch": 0.48643834507327344, "grad_norm": 18.34467124938965, "learning_rate": 5.137629533678757e-06, "loss": 0.5393, "mean_token_accuracy": 0.9326991438865662, "num_tokens": 5382157.0, "step": 3004 }, { "epoch": 0.48660027528135374, "grad_norm": 31.747346878051758, "learning_rate": 5.136010362694302e-06, "loss": 0.8055, "mean_token_accuracy": 0.9042553305625916, "num_tokens": 5383951.0, "step": 3005 }, { "epoch": 0.48676220548943405, "grad_norm": 35.95388412475586, "learning_rate": 5.134391191709845e-06, "loss": 0.7617, "mean_token_accuracy": 0.8956834673881531, "num_tokens": 5385741.0, "step": 3006 }, { "epoch": 0.48692413569751436, "grad_norm": 33.44746398925781, "learning_rate": 5.13277202072539e-06, "loss": 0.8513, "mean_token_accuracy": 0.9012691080570221, "num_tokens": 5387527.0, "step": 3007 }, { "epoch": 0.48708606590559467, "grad_norm": 25.80466079711914, "learning_rate": 5.131152849740933e-06, "loss": 0.6592, "mean_token_accuracy": 0.8974113762378693, "num_tokens": 5389321.0, "step": 3008 }, { "epoch": 0.48724799611367503, "grad_norm": 35.45637512207031, "learning_rate": 5.129533678756478e-06, "loss": 1.035, "mean_token_accuracy": 0.8858107924461365, "num_tokens": 5391121.0, "step": 3009 }, { "epoch": 0.48740992632175534, "grad_norm": 25.505640029907227, "learning_rate": 5.127914507772021e-06, "loss": 0.7404, "mean_token_accuracy": 0.9088345766067505, "num_tokens": 5392906.0, "step": 3010 }, { "epoch": 0.48757185652983565, "grad_norm": 24.818864822387695, "learning_rate": 5.126295336787566e-06, "loss": 0.6682, "mean_token_accuracy": 0.909489631652832, "num_tokens": 5394683.0, "step": 3011 }, { "epoch": 0.48773378673791595, "grad_norm": 28.48109245300293, "learning_rate": 5.124676165803109e-06, "loss": 0.6925, "mean_token_accuracy": 0.9065458476543427, "num_tokens": 5396485.0, "step": 3012 }, { "epoch": 0.48789571694599626, "grad_norm": 36.29601287841797, "learning_rate": 5.123056994818654e-06, "loss": 0.7834, "mean_token_accuracy": 0.8908576667308807, "num_tokens": 5398281.0, "step": 3013 }, { "epoch": 0.48805764715407657, "grad_norm": 22.847387313842773, "learning_rate": 5.121437823834198e-06, "loss": 0.6694, "mean_token_accuracy": 0.9162943363189697, "num_tokens": 5400080.0, "step": 3014 }, { "epoch": 0.48821957736215693, "grad_norm": 24.827085494995117, "learning_rate": 5.119818652849742e-06, "loss": 0.5996, "mean_token_accuracy": 0.9145347476005554, "num_tokens": 5401871.0, "step": 3015 }, { "epoch": 0.48838150757023724, "grad_norm": 27.44051742553711, "learning_rate": 5.118199481865286e-06, "loss": 0.8433, "mean_token_accuracy": 0.8899087309837341, "num_tokens": 5403664.0, "step": 3016 }, { "epoch": 0.48854343777831755, "grad_norm": 20.68699836730957, "learning_rate": 5.11658031088083e-06, "loss": 0.5723, "mean_token_accuracy": 0.9229323267936707, "num_tokens": 5405449.0, "step": 3017 }, { "epoch": 0.48870536798639785, "grad_norm": 21.14838409423828, "learning_rate": 5.114961139896374e-06, "loss": 0.6086, "mean_token_accuracy": 0.9212149381637573, "num_tokens": 5407240.0, "step": 3018 }, { "epoch": 0.48886729819447816, "grad_norm": 24.232927322387695, "learning_rate": 5.113341968911918e-06, "loss": 0.6271, "mean_token_accuracy": 0.9087995290756226, "num_tokens": 5409027.0, "step": 3019 }, { "epoch": 0.4890292284025585, "grad_norm": 24.3666934967041, "learning_rate": 5.111722797927462e-06, "loss": 0.57, "mean_token_accuracy": 0.9190065264701843, "num_tokens": 5410835.0, "step": 3020 }, { "epoch": 0.48919115861063883, "grad_norm": 31.462709426879883, "learning_rate": 5.110103626943006e-06, "loss": 0.6625, "mean_token_accuracy": 0.8957125246524811, "num_tokens": 5412625.0, "step": 3021 }, { "epoch": 0.48935308881871914, "grad_norm": 39.11698913574219, "learning_rate": 5.10848445595855e-06, "loss": 0.7605, "mean_token_accuracy": 0.895560085773468, "num_tokens": 5414424.0, "step": 3022 }, { "epoch": 0.48951501902679945, "grad_norm": 19.440439224243164, "learning_rate": 5.106865284974094e-06, "loss": 0.6222, "mean_token_accuracy": 0.9205517172813416, "num_tokens": 5416213.0, "step": 3023 }, { "epoch": 0.48967694923487975, "grad_norm": 23.849119186401367, "learning_rate": 5.105246113989638e-06, "loss": 0.565, "mean_token_accuracy": 0.9218370020389557, "num_tokens": 5418006.0, "step": 3024 }, { "epoch": 0.48983887944296006, "grad_norm": 30.319480895996094, "learning_rate": 5.103626943005182e-06, "loss": 0.764, "mean_token_accuracy": 0.8976001739501953, "num_tokens": 5419811.0, "step": 3025 }, { "epoch": 0.4900008096510404, "grad_norm": 25.434173583984375, "learning_rate": 5.102007772020726e-06, "loss": 0.5891, "mean_token_accuracy": 0.9194001257419586, "num_tokens": 5421608.0, "step": 3026 }, { "epoch": 0.49016273985912073, "grad_norm": 18.40189552307129, "learning_rate": 5.10038860103627e-06, "loss": 0.5062, "mean_token_accuracy": 0.923776239156723, "num_tokens": 5423393.0, "step": 3027 }, { "epoch": 0.49032467006720104, "grad_norm": 22.239166259765625, "learning_rate": 5.098769430051814e-06, "loss": 0.6115, "mean_token_accuracy": 0.9151596128940582, "num_tokens": 5425176.0, "step": 3028 }, { "epoch": 0.49048660027528135, "grad_norm": 28.200517654418945, "learning_rate": 5.097150259067358e-06, "loss": 0.6546, "mean_token_accuracy": 0.9147758781909943, "num_tokens": 5426970.0, "step": 3029 }, { "epoch": 0.49064853048336166, "grad_norm": 11.054727554321289, "learning_rate": 5.095531088082902e-06, "loss": 0.4395, "mean_token_accuracy": 0.940364271402359, "num_tokens": 5428768.0, "step": 3030 }, { "epoch": 0.49081046069144196, "grad_norm": 19.688766479492188, "learning_rate": 5.093911917098446e-06, "loss": 0.5935, "mean_token_accuracy": 0.9120773077011108, "num_tokens": 5430553.0, "step": 3031 }, { "epoch": 0.4909723908995223, "grad_norm": 23.41379737854004, "learning_rate": 5.09229274611399e-06, "loss": 0.6742, "mean_token_accuracy": 0.9201680421829224, "num_tokens": 5432341.0, "step": 3032 }, { "epoch": 0.49113432110760263, "grad_norm": 13.503064155578613, "learning_rate": 5.090673575129535e-06, "loss": 0.4857, "mean_token_accuracy": 0.9277893602848053, "num_tokens": 5434130.0, "step": 3033 }, { "epoch": 0.49129625131568294, "grad_norm": 21.570785522460938, "learning_rate": 5.089054404145078e-06, "loss": 0.5267, "mean_token_accuracy": 0.9215146005153656, "num_tokens": 5435922.0, "step": 3034 }, { "epoch": 0.49145818152376325, "grad_norm": 19.1263427734375, "learning_rate": 5.087435233160623e-06, "loss": 0.539, "mean_token_accuracy": 0.9208633005619049, "num_tokens": 5437712.0, "step": 3035 }, { "epoch": 0.49162011173184356, "grad_norm": 22.503528594970703, "learning_rate": 5.0858160621761664e-06, "loss": 0.5902, "mean_token_accuracy": 0.9201492369174957, "num_tokens": 5439498.0, "step": 3036 }, { "epoch": 0.4917820419399239, "grad_norm": 19.080575942993164, "learning_rate": 5.084196891191711e-06, "loss": 0.5725, "mean_token_accuracy": 0.9224673211574554, "num_tokens": 5441281.0, "step": 3037 }, { "epoch": 0.4919439721480042, "grad_norm": 22.11385154724121, "learning_rate": 5.0825777202072545e-06, "loss": 0.5479, "mean_token_accuracy": 0.9240615367889404, "num_tokens": 5443070.0, "step": 3038 }, { "epoch": 0.49210590235608453, "grad_norm": 21.53031349182129, "learning_rate": 5.080958549222799e-06, "loss": 0.5758, "mean_token_accuracy": 0.9276065528392792, "num_tokens": 5444872.0, "step": 3039 }, { "epoch": 0.49226783256416484, "grad_norm": 38.85481643676758, "learning_rate": 5.0793393782383425e-06, "loss": 0.8277, "mean_token_accuracy": 0.8947316110134125, "num_tokens": 5446668.0, "step": 3040 }, { "epoch": 0.49242976277224515, "grad_norm": 21.9334774017334, "learning_rate": 5.077720207253887e-06, "loss": 0.5597, "mean_token_accuracy": 0.9217071831226349, "num_tokens": 5448461.0, "step": 3041 }, { "epoch": 0.49259169298032546, "grad_norm": 25.68813133239746, "learning_rate": 5.0761010362694305e-06, "loss": 0.5999, "mean_token_accuracy": 0.9102693796157837, "num_tokens": 5450240.0, "step": 3042 }, { "epoch": 0.4927536231884058, "grad_norm": 26.759706497192383, "learning_rate": 5.074481865284975e-06, "loss": 0.5864, "mean_token_accuracy": 0.9143833816051483, "num_tokens": 5452032.0, "step": 3043 }, { "epoch": 0.49291555339648613, "grad_norm": 28.56093406677246, "learning_rate": 5.0728626943005186e-06, "loss": 0.7838, "mean_token_accuracy": 0.90159872174263, "num_tokens": 5453818.0, "step": 3044 }, { "epoch": 0.49307748360456644, "grad_norm": 27.25768280029297, "learning_rate": 5.071243523316063e-06, "loss": 0.6151, "mean_token_accuracy": 0.9106077551841736, "num_tokens": 5455609.0, "step": 3045 }, { "epoch": 0.49323941381264674, "grad_norm": 22.179088592529297, "learning_rate": 5.069624352331607e-06, "loss": 0.5843, "mean_token_accuracy": 0.9246068596839905, "num_tokens": 5457400.0, "step": 3046 }, { "epoch": 0.49340134402072705, "grad_norm": 29.503969192504883, "learning_rate": 5.068005181347151e-06, "loss": 0.6232, "mean_token_accuracy": 0.9218687415122986, "num_tokens": 5459195.0, "step": 3047 }, { "epoch": 0.49356327422880736, "grad_norm": 31.340065002441406, "learning_rate": 5.066386010362695e-06, "loss": 0.7433, "mean_token_accuracy": 0.9051197171211243, "num_tokens": 5460992.0, "step": 3048 }, { "epoch": 0.4937252044368877, "grad_norm": 19.16147232055664, "learning_rate": 5.064766839378239e-06, "loss": 0.505, "mean_token_accuracy": 0.9289807379245758, "num_tokens": 5462785.0, "step": 3049 }, { "epoch": 0.49388713464496803, "grad_norm": 25.503454208374023, "learning_rate": 5.063147668393783e-06, "loss": 0.5647, "mean_token_accuracy": 0.9213643670082092, "num_tokens": 5464577.0, "step": 3050 }, { "epoch": 0.49404906485304834, "grad_norm": 29.64555549621582, "learning_rate": 5.061528497409327e-06, "loss": 0.6814, "mean_token_accuracy": 0.9025370180606842, "num_tokens": 5466376.0, "step": 3051 }, { "epoch": 0.49421099506112864, "grad_norm": 26.07890510559082, "learning_rate": 5.0599093264248715e-06, "loss": 0.5372, "mean_token_accuracy": 0.9245370626449585, "num_tokens": 5468167.0, "step": 3052 }, { "epoch": 0.49437292526920895, "grad_norm": 29.486907958984375, "learning_rate": 5.058290155440415e-06, "loss": 0.7235, "mean_token_accuracy": 0.8994667828083038, "num_tokens": 5469957.0, "step": 3053 }, { "epoch": 0.4945348554772893, "grad_norm": 34.593841552734375, "learning_rate": 5.0566709844559595e-06, "loss": 0.7031, "mean_token_accuracy": 0.9030612111091614, "num_tokens": 5471756.0, "step": 3054 }, { "epoch": 0.4946967856853696, "grad_norm": 36.80933380126953, "learning_rate": 5.055051813471503e-06, "loss": 0.7874, "mean_token_accuracy": 0.8978835940361023, "num_tokens": 5473543.0, "step": 3055 }, { "epoch": 0.49485871589344993, "grad_norm": 20.747751235961914, "learning_rate": 5.0534326424870476e-06, "loss": 0.5315, "mean_token_accuracy": 0.9206026494503021, "num_tokens": 5475332.0, "step": 3056 }, { "epoch": 0.49502064610153024, "grad_norm": 33.02836227416992, "learning_rate": 5.051813471502591e-06, "loss": 0.6668, "mean_token_accuracy": 0.9016009867191315, "num_tokens": 5477129.0, "step": 3057 }, { "epoch": 0.49518257630961054, "grad_norm": 29.7998104095459, "learning_rate": 5.050194300518136e-06, "loss": 0.6947, "mean_token_accuracy": 0.9015302062034607, "num_tokens": 5478925.0, "step": 3058 }, { "epoch": 0.49534450651769085, "grad_norm": 29.436275482177734, "learning_rate": 5.048575129533679e-06, "loss": 0.6536, "mean_token_accuracy": 0.9068117141723633, "num_tokens": 5480715.0, "step": 3059 }, { "epoch": 0.4955064367257712, "grad_norm": 20.1005859375, "learning_rate": 5.046955958549224e-06, "loss": 0.5619, "mean_token_accuracy": 0.9200661182403564, "num_tokens": 5482501.0, "step": 3060 }, { "epoch": 0.4956683669338515, "grad_norm": 44.073246002197266, "learning_rate": 5.045336787564767e-06, "loss": 0.9933, "mean_token_accuracy": 0.883424699306488, "num_tokens": 5484304.0, "step": 3061 }, { "epoch": 0.49583029714193183, "grad_norm": 22.542896270751953, "learning_rate": 5.043717616580312e-06, "loss": 0.4989, "mean_token_accuracy": 0.9309523701667786, "num_tokens": 5486091.0, "step": 3062 }, { "epoch": 0.49599222735001214, "grad_norm": 14.202231407165527, "learning_rate": 5.042098445595855e-06, "loss": 0.4977, "mean_token_accuracy": 0.929380863904953, "num_tokens": 5487872.0, "step": 3063 }, { "epoch": 0.49615415755809245, "grad_norm": 29.298137664794922, "learning_rate": 5.0404792746114e-06, "loss": 0.7579, "mean_token_accuracy": 0.9045239984989166, "num_tokens": 5489668.0, "step": 3064 }, { "epoch": 0.49631608776617275, "grad_norm": 24.303762435913086, "learning_rate": 5.038860103626943e-06, "loss": 0.5539, "mean_token_accuracy": 0.9151596128940582, "num_tokens": 5491451.0, "step": 3065 }, { "epoch": 0.4964780179742531, "grad_norm": 24.276966094970703, "learning_rate": 5.037240932642488e-06, "loss": 0.5449, "mean_token_accuracy": 0.9299584329128265, "num_tokens": 5493249.0, "step": 3066 }, { "epoch": 0.4966399481823334, "grad_norm": 28.88763427734375, "learning_rate": 5.035621761658031e-06, "loss": 0.6596, "mean_token_accuracy": 0.9113465547561646, "num_tokens": 5495032.0, "step": 3067 }, { "epoch": 0.49680187839041373, "grad_norm": 14.301376342773438, "learning_rate": 5.034002590673576e-06, "loss": 0.4716, "mean_token_accuracy": 0.9395537674427032, "num_tokens": 5496825.0, "step": 3068 }, { "epoch": 0.49696380859849404, "grad_norm": 35.044189453125, "learning_rate": 5.032383419689119e-06, "loss": 0.8414, "mean_token_accuracy": 0.8989678025245667, "num_tokens": 5498614.0, "step": 3069 }, { "epoch": 0.49712573880657435, "grad_norm": 26.74432945251465, "learning_rate": 5.030764248704664e-06, "loss": 0.588, "mean_token_accuracy": 0.9208264350891113, "num_tokens": 5500404.0, "step": 3070 }, { "epoch": 0.4972876690146547, "grad_norm": 25.613605499267578, "learning_rate": 5.029145077720208e-06, "loss": 0.6502, "mean_token_accuracy": 0.9144460260868073, "num_tokens": 5502185.0, "step": 3071 }, { "epoch": 0.497449599222735, "grad_norm": 27.34073829650879, "learning_rate": 5.027525906735752e-06, "loss": 0.6938, "mean_token_accuracy": 0.9103453755378723, "num_tokens": 5503976.0, "step": 3072 }, { "epoch": 0.4976115294308153, "grad_norm": 17.895008087158203, "learning_rate": 5.025906735751296e-06, "loss": 0.5042, "mean_token_accuracy": 0.9291643500328064, "num_tokens": 5505770.0, "step": 3073 }, { "epoch": 0.49777345963889563, "grad_norm": 21.76251983642578, "learning_rate": 5.02428756476684e-06, "loss": 0.5487, "mean_token_accuracy": 0.9258969724178314, "num_tokens": 5507564.0, "step": 3074 }, { "epoch": 0.49793538984697594, "grad_norm": 30.428112030029297, "learning_rate": 5.022668393782384e-06, "loss": 0.8168, "mean_token_accuracy": 0.9073173403739929, "num_tokens": 5509357.0, "step": 3075 }, { "epoch": 0.49809732005505625, "grad_norm": 32.509490966796875, "learning_rate": 5.021049222797928e-06, "loss": 0.7768, "mean_token_accuracy": 0.9009523689746857, "num_tokens": 5511159.0, "step": 3076 }, { "epoch": 0.4982592502631366, "grad_norm": 22.4287166595459, "learning_rate": 5.019430051813472e-06, "loss": 0.5846, "mean_token_accuracy": 0.9184782803058624, "num_tokens": 5512953.0, "step": 3077 }, { "epoch": 0.4984211804712169, "grad_norm": 31.916921615600586, "learning_rate": 5.017810880829016e-06, "loss": 0.8953, "mean_token_accuracy": 0.896999180316925, "num_tokens": 5514746.0, "step": 3078 }, { "epoch": 0.4985831106792972, "grad_norm": 24.6771297454834, "learning_rate": 5.01619170984456e-06, "loss": 0.6828, "mean_token_accuracy": 0.9120295643806458, "num_tokens": 5516544.0, "step": 3079 }, { "epoch": 0.49874504088737753, "grad_norm": 24.779296875, "learning_rate": 5.014572538860104e-06, "loss": 0.615, "mean_token_accuracy": 0.9171842634677887, "num_tokens": 5518334.0, "step": 3080 }, { "epoch": 0.49890697109545784, "grad_norm": 13.718116760253906, "learning_rate": 5.012953367875648e-06, "loss": 0.4877, "mean_token_accuracy": 0.9293532371520996, "num_tokens": 5520115.0, "step": 3081 }, { "epoch": 0.49906890130353815, "grad_norm": 23.878095626831055, "learning_rate": 5.011334196891192e-06, "loss": 0.5253, "mean_token_accuracy": 0.9238230586051941, "num_tokens": 5521913.0, "step": 3082 }, { "epoch": 0.4992308315116185, "grad_norm": 23.816272735595703, "learning_rate": 5.009715025906736e-06, "loss": 0.589, "mean_token_accuracy": 0.9171972870826721, "num_tokens": 5523703.0, "step": 3083 }, { "epoch": 0.4993927617196988, "grad_norm": 25.58169174194336, "learning_rate": 5.00809585492228e-06, "loss": 0.6082, "mean_token_accuracy": 0.9080895781517029, "num_tokens": 5525509.0, "step": 3084 }, { "epoch": 0.4995546919277791, "grad_norm": 26.280405044555664, "learning_rate": 5.006476683937824e-06, "loss": 0.6946, "mean_token_accuracy": 0.9057921469211578, "num_tokens": 5527297.0, "step": 3085 }, { "epoch": 0.49971662213585943, "grad_norm": 25.21198272705078, "learning_rate": 5.004857512953368e-06, "loss": 0.7727, "mean_token_accuracy": 0.9059889316558838, "num_tokens": 5529086.0, "step": 3086 }, { "epoch": 0.49987855234393974, "grad_norm": 22.442089080810547, "learning_rate": 5.003238341968912e-06, "loss": 0.5538, "mean_token_accuracy": 0.9139516949653625, "num_tokens": 5530877.0, "step": 3087 }, { "epoch": 0.50004048255202, "grad_norm": 21.99388885498047, "learning_rate": 5.001619170984456e-06, "loss": 0.6024, "mean_token_accuracy": 0.9236485958099365, "num_tokens": 5532664.0, "step": 3088 }, { "epoch": 0.5002024127601004, "grad_norm": 23.198711395263672, "learning_rate": 5e-06, "loss": 0.5244, "mean_token_accuracy": 0.927532434463501, "num_tokens": 5534452.0, "step": 3089 }, { "epoch": 0.5003643429681807, "grad_norm": 34.14920425415039, "learning_rate": 4.998380829015545e-06, "loss": 0.8785, "mean_token_accuracy": 0.8876847326755524, "num_tokens": 5536249.0, "step": 3090 }, { "epoch": 0.5005262731762611, "grad_norm": 30.41759490966797, "learning_rate": 4.9967616580310884e-06, "loss": 0.6984, "mean_token_accuracy": 0.8971927165985107, "num_tokens": 5538043.0, "step": 3091 }, { "epoch": 0.5006882033843414, "grad_norm": 33.016761779785156, "learning_rate": 4.995142487046633e-06, "loss": 0.7599, "mean_token_accuracy": 0.906521737575531, "num_tokens": 5539833.0, "step": 3092 }, { "epoch": 0.5008501335924217, "grad_norm": 18.510164260864258, "learning_rate": 4.9935233160621765e-06, "loss": 0.5385, "mean_token_accuracy": 0.9204118847846985, "num_tokens": 5541622.0, "step": 3093 }, { "epoch": 0.501012063800502, "grad_norm": 22.790176391601562, "learning_rate": 4.991904145077721e-06, "loss": 0.5684, "mean_token_accuracy": 0.920228511095047, "num_tokens": 5543409.0, "step": 3094 }, { "epoch": 0.5011739940085823, "grad_norm": 19.51079750061035, "learning_rate": 4.9902849740932645e-06, "loss": 0.4969, "mean_token_accuracy": 0.928893506526947, "num_tokens": 5545202.0, "step": 3095 }, { "epoch": 0.5013359242166626, "grad_norm": 31.858049392700195, "learning_rate": 4.988665803108809e-06, "loss": 0.6233, "mean_token_accuracy": 0.9172256290912628, "num_tokens": 5547004.0, "step": 3096 }, { "epoch": 0.5014978544247429, "grad_norm": 23.543933868408203, "learning_rate": 4.9870466321243525e-06, "loss": 0.6227, "mean_token_accuracy": 0.9173052906990051, "num_tokens": 5548794.0, "step": 3097 }, { "epoch": 0.5016597846328232, "grad_norm": 23.4957332611084, "learning_rate": 4.985427461139897e-06, "loss": 0.6142, "mean_token_accuracy": 0.9063536524772644, "num_tokens": 5550584.0, "step": 3098 }, { "epoch": 0.5018217148409035, "grad_norm": 33.95772171020508, "learning_rate": 4.9838082901554405e-06, "loss": 0.8427, "mean_token_accuracy": 0.8994092047214508, "num_tokens": 5552374.0, "step": 3099 }, { "epoch": 0.5019836450489839, "grad_norm": 25.336759567260742, "learning_rate": 4.982189119170985e-06, "loss": 0.6396, "mean_token_accuracy": 0.9056650102138519, "num_tokens": 5554171.0, "step": 3100 }, { "epoch": 0.5021455752570642, "grad_norm": 27.51428985595703, "learning_rate": 4.9805699481865286e-06, "loss": 0.6303, "mean_token_accuracy": 0.9198970198631287, "num_tokens": 5555970.0, "step": 3101 }, { "epoch": 0.5023075054651446, "grad_norm": 24.61184310913086, "learning_rate": 4.978950777202073e-06, "loss": 0.6404, "mean_token_accuracy": 0.919047623872757, "num_tokens": 5557754.0, "step": 3102 }, { "epoch": 0.5024694356732249, "grad_norm": 30.319303512573242, "learning_rate": 4.977331606217617e-06, "loss": 0.7975, "mean_token_accuracy": 0.8855316936969757, "num_tokens": 5559554.0, "step": 3103 }, { "epoch": 0.5026313658813052, "grad_norm": 24.716384887695312, "learning_rate": 4.975712435233161e-06, "loss": 0.6098, "mean_token_accuracy": 0.9092400968074799, "num_tokens": 5561332.0, "step": 3104 }, { "epoch": 0.5027932960893855, "grad_norm": 23.923810958862305, "learning_rate": 4.974093264248705e-06, "loss": 0.5533, "mean_token_accuracy": 0.9146403074264526, "num_tokens": 5563125.0, "step": 3105 }, { "epoch": 0.5029552262974658, "grad_norm": 28.690021514892578, "learning_rate": 4.972474093264249e-06, "loss": 0.7893, "mean_token_accuracy": 0.9021420180797577, "num_tokens": 5564923.0, "step": 3106 }, { "epoch": 0.5031171565055461, "grad_norm": 23.697677612304688, "learning_rate": 4.970854922279793e-06, "loss": 0.6551, "mean_token_accuracy": 0.9196009635925293, "num_tokens": 5566721.0, "step": 3107 }, { "epoch": 0.5032790867136264, "grad_norm": 24.035978317260742, "learning_rate": 4.969235751295337e-06, "loss": 0.6393, "mean_token_accuracy": 0.908129870891571, "num_tokens": 5568515.0, "step": 3108 }, { "epoch": 0.5034410169217067, "grad_norm": 17.372512817382812, "learning_rate": 4.9676165803108815e-06, "loss": 0.5353, "mean_token_accuracy": 0.9220321774482727, "num_tokens": 5570309.0, "step": 3109 }, { "epoch": 0.503602947129787, "grad_norm": 31.005531311035156, "learning_rate": 4.965997409326425e-06, "loss": 0.7193, "mean_token_accuracy": 0.9067992568016052, "num_tokens": 5572100.0, "step": 3110 }, { "epoch": 0.5037648773378673, "grad_norm": 22.811525344848633, "learning_rate": 4.9643782383419695e-06, "loss": 0.5433, "mean_token_accuracy": 0.9227941036224365, "num_tokens": 5573884.0, "step": 3111 }, { "epoch": 0.5039268075459477, "grad_norm": 33.46623229980469, "learning_rate": 4.962759067357513e-06, "loss": 0.6856, "mean_token_accuracy": 0.9057226777076721, "num_tokens": 5575682.0, "step": 3112 }, { "epoch": 0.504088737754028, "grad_norm": 22.027273178100586, "learning_rate": 4.9611398963730576e-06, "loss": 0.5656, "mean_token_accuracy": 0.9185907244682312, "num_tokens": 5577477.0, "step": 3113 }, { "epoch": 0.5042506679621084, "grad_norm": 27.57921600341797, "learning_rate": 4.959520725388601e-06, "loss": 0.6476, "mean_token_accuracy": 0.9070360660552979, "num_tokens": 5579269.0, "step": 3114 }, { "epoch": 0.5044125981701887, "grad_norm": 27.70969581604004, "learning_rate": 4.957901554404146e-06, "loss": 0.7025, "mean_token_accuracy": 0.9250127077102661, "num_tokens": 5581062.0, "step": 3115 }, { "epoch": 0.504574528378269, "grad_norm": 32.048744201660156, "learning_rate": 4.956282383419689e-06, "loss": 0.7537, "mean_token_accuracy": 0.9077905118465424, "num_tokens": 5582867.0, "step": 3116 }, { "epoch": 0.5047364585863493, "grad_norm": 30.043624877929688, "learning_rate": 4.954663212435234e-06, "loss": 0.7991, "mean_token_accuracy": 0.8915310800075531, "num_tokens": 5584656.0, "step": 3117 }, { "epoch": 0.5048983887944296, "grad_norm": 16.69049072265625, "learning_rate": 4.953044041450777e-06, "loss": 0.491, "mean_token_accuracy": 0.9252786040306091, "num_tokens": 5586449.0, "step": 3118 }, { "epoch": 0.5050603190025099, "grad_norm": 24.523834228515625, "learning_rate": 4.951424870466322e-06, "loss": 0.749, "mean_token_accuracy": 0.8919464945793152, "num_tokens": 5588239.0, "step": 3119 }, { "epoch": 0.5052222492105902, "grad_norm": 18.17458152770996, "learning_rate": 4.949805699481865e-06, "loss": 0.629, "mean_token_accuracy": 0.9185140430927277, "num_tokens": 5590021.0, "step": 3120 }, { "epoch": 0.5053841794186705, "grad_norm": 27.78891372680664, "learning_rate": 4.94818652849741e-06, "loss": 0.6926, "mean_token_accuracy": 0.9081889390945435, "num_tokens": 5591815.0, "step": 3121 }, { "epoch": 0.5055461096267508, "grad_norm": 26.635297775268555, "learning_rate": 4.946567357512953e-06, "loss": 0.6864, "mean_token_accuracy": 0.9131884276866913, "num_tokens": 5593615.0, "step": 3122 }, { "epoch": 0.5057080398348311, "grad_norm": 15.540252685546875, "learning_rate": 4.944948186528498e-06, "loss": 0.5022, "mean_token_accuracy": 0.926991730928421, "num_tokens": 5595401.0, "step": 3123 }, { "epoch": 0.5058699700429115, "grad_norm": 25.944604873657227, "learning_rate": 4.943329015544041e-06, "loss": 0.544, "mean_token_accuracy": 0.9194128215312958, "num_tokens": 5597186.0, "step": 3124 }, { "epoch": 0.5060319002509919, "grad_norm": 27.062040328979492, "learning_rate": 4.941709844559586e-06, "loss": 0.6089, "mean_token_accuracy": 0.9166885912418365, "num_tokens": 5598974.0, "step": 3125 }, { "epoch": 0.5061938304590722, "grad_norm": 35.009090423583984, "learning_rate": 4.940090673575129e-06, "loss": 0.8405, "mean_token_accuracy": 0.8907828330993652, "num_tokens": 5600762.0, "step": 3126 }, { "epoch": 0.5063557606671525, "grad_norm": 23.119449615478516, "learning_rate": 4.938471502590674e-06, "loss": 0.589, "mean_token_accuracy": 0.9234496355056763, "num_tokens": 5602547.0, "step": 3127 }, { "epoch": 0.5065176908752328, "grad_norm": 31.09634780883789, "learning_rate": 4.936852331606218e-06, "loss": 0.685, "mean_token_accuracy": 0.8980016112327576, "num_tokens": 5604333.0, "step": 3128 }, { "epoch": 0.5066796210833131, "grad_norm": 23.628664016723633, "learning_rate": 4.935233160621762e-06, "loss": 0.6576, "mean_token_accuracy": 0.8974365592002869, "num_tokens": 5606127.0, "step": 3129 }, { "epoch": 0.5068415512913934, "grad_norm": 23.543800354003906, "learning_rate": 4.933613989637306e-06, "loss": 0.6196, "mean_token_accuracy": 0.9214285612106323, "num_tokens": 5607919.0, "step": 3130 }, { "epoch": 0.5070034814994737, "grad_norm": 18.900848388671875, "learning_rate": 4.93199481865285e-06, "loss": 0.5513, "mean_token_accuracy": 0.9285568594932556, "num_tokens": 5609711.0, "step": 3131 }, { "epoch": 0.507165411707554, "grad_norm": 23.889690399169922, "learning_rate": 4.930375647668394e-06, "loss": 0.6157, "mean_token_accuracy": 0.9176711738109589, "num_tokens": 5611513.0, "step": 3132 }, { "epoch": 0.5073273419156343, "grad_norm": 25.092618942260742, "learning_rate": 4.928756476683938e-06, "loss": 0.6514, "mean_token_accuracy": 0.9155176877975464, "num_tokens": 5613297.0, "step": 3133 }, { "epoch": 0.5074892721237146, "grad_norm": 22.767011642456055, "learning_rate": 4.927137305699482e-06, "loss": 0.6873, "mean_token_accuracy": 0.9263209402561188, "num_tokens": 5615095.0, "step": 3134 }, { "epoch": 0.507651202331795, "grad_norm": 36.97871398925781, "learning_rate": 4.925518134715026e-06, "loss": 0.7892, "mean_token_accuracy": 0.9035714268684387, "num_tokens": 5616887.0, "step": 3135 }, { "epoch": 0.5078131325398754, "grad_norm": 31.4343318939209, "learning_rate": 4.92389896373057e-06, "loss": 0.9033, "mean_token_accuracy": 0.8829307556152344, "num_tokens": 5618672.0, "step": 3136 }, { "epoch": 0.5079750627479557, "grad_norm": 24.045391082763672, "learning_rate": 4.922279792746114e-06, "loss": 0.5625, "mean_token_accuracy": 0.9164723455905914, "num_tokens": 5620471.0, "step": 3137 }, { "epoch": 0.508136992956036, "grad_norm": 23.62331771850586, "learning_rate": 4.920660621761658e-06, "loss": 0.5726, "mean_token_accuracy": 0.9205517172813416, "num_tokens": 5622260.0, "step": 3138 }, { "epoch": 0.5082989231641163, "grad_norm": 23.12000274658203, "learning_rate": 4.919041450777203e-06, "loss": 0.6057, "mean_token_accuracy": 0.9199735522270203, "num_tokens": 5624047.0, "step": 3139 }, { "epoch": 0.5084608533721966, "grad_norm": 32.7731819152832, "learning_rate": 4.917422279792747e-06, "loss": 0.8042, "mean_token_accuracy": 0.8896402716636658, "num_tokens": 5625848.0, "step": 3140 }, { "epoch": 0.5086227835802769, "grad_norm": 25.463199615478516, "learning_rate": 4.915803108808291e-06, "loss": 0.5909, "mean_token_accuracy": 0.9107498526573181, "num_tokens": 5627629.0, "step": 3141 }, { "epoch": 0.5087847137883572, "grad_norm": 29.587570190429688, "learning_rate": 4.914183937823835e-06, "loss": 0.6845, "mean_token_accuracy": 0.9059281051158905, "num_tokens": 5629417.0, "step": 3142 }, { "epoch": 0.5089466439964375, "grad_norm": 24.350513458251953, "learning_rate": 4.912564766839379e-06, "loss": 0.5863, "mean_token_accuracy": 0.9228169620037079, "num_tokens": 5631201.0, "step": 3143 }, { "epoch": 0.5091085742045178, "grad_norm": 18.100515365600586, "learning_rate": 4.910945595854923e-06, "loss": 0.5784, "mean_token_accuracy": 0.9295460283756256, "num_tokens": 5632997.0, "step": 3144 }, { "epoch": 0.5092705044125981, "grad_norm": 28.804550170898438, "learning_rate": 4.909326424870467e-06, "loss": 0.8288, "mean_token_accuracy": 0.8866026103496552, "num_tokens": 5634791.0, "step": 3145 }, { "epoch": 0.5094324346206784, "grad_norm": 29.25710678100586, "learning_rate": 4.907707253886011e-06, "loss": 0.6732, "mean_token_accuracy": 0.907142847776413, "num_tokens": 5636583.0, "step": 3146 }, { "epoch": 0.5095943648287588, "grad_norm": 31.203798294067383, "learning_rate": 4.906088082901555e-06, "loss": 0.7966, "mean_token_accuracy": 0.901867538690567, "num_tokens": 5638380.0, "step": 3147 }, { "epoch": 0.5097562950368392, "grad_norm": 27.93453598022461, "learning_rate": 4.904468911917099e-06, "loss": 0.6924, "mean_token_accuracy": 0.9079633057117462, "num_tokens": 5640176.0, "step": 3148 }, { "epoch": 0.5099182252449195, "grad_norm": 27.137514114379883, "learning_rate": 4.902849740932643e-06, "loss": 0.6446, "mean_token_accuracy": 0.9014925360679626, "num_tokens": 5641962.0, "step": 3149 }, { "epoch": 0.5100801554529998, "grad_norm": 27.561450958251953, "learning_rate": 4.901230569948187e-06, "loss": 0.5859, "mean_token_accuracy": 0.908223420381546, "num_tokens": 5643757.0, "step": 3150 }, { "epoch": 0.5102420856610801, "grad_norm": 31.550500869750977, "learning_rate": 4.899611398963731e-06, "loss": 0.6486, "mean_token_accuracy": 0.9077979624271393, "num_tokens": 5645541.0, "step": 3151 }, { "epoch": 0.5104040158691604, "grad_norm": 36.09164047241211, "learning_rate": 4.897992227979275e-06, "loss": 0.7804, "mean_token_accuracy": 0.8914532661437988, "num_tokens": 5647337.0, "step": 3152 }, { "epoch": 0.5105659460772407, "grad_norm": 19.480131149291992, "learning_rate": 4.896373056994819e-06, "loss": 0.5214, "mean_token_accuracy": 0.9304347932338715, "num_tokens": 5649122.0, "step": 3153 }, { "epoch": 0.510727876285321, "grad_norm": 16.59333610534668, "learning_rate": 4.894753886010363e-06, "loss": 0.5097, "mean_token_accuracy": 0.9314528703689575, "num_tokens": 5650911.0, "step": 3154 }, { "epoch": 0.5108898064934013, "grad_norm": 26.649478912353516, "learning_rate": 4.893134715025907e-06, "loss": 0.6972, "mean_token_accuracy": 0.9250216782093048, "num_tokens": 5652703.0, "step": 3155 }, { "epoch": 0.5110517367014816, "grad_norm": 21.956741333007812, "learning_rate": 4.891515544041451e-06, "loss": 0.5115, "mean_token_accuracy": 0.9214300215244293, "num_tokens": 5654496.0, "step": 3156 }, { "epoch": 0.5112136669095619, "grad_norm": 30.248903274536133, "learning_rate": 4.889896373056995e-06, "loss": 0.6262, "mean_token_accuracy": 0.9186813235282898, "num_tokens": 5656291.0, "step": 3157 }, { "epoch": 0.5113755971176422, "grad_norm": 18.268016815185547, "learning_rate": 4.8882772020725394e-06, "loss": 0.5434, "mean_token_accuracy": 0.9188909530639648, "num_tokens": 5658074.0, "step": 3158 }, { "epoch": 0.5115375273257227, "grad_norm": 24.37735939025879, "learning_rate": 4.886658031088084e-06, "loss": 0.6935, "mean_token_accuracy": 0.9012155830860138, "num_tokens": 5659870.0, "step": 3159 }, { "epoch": 0.511699457533803, "grad_norm": 21.751869201660156, "learning_rate": 4.8850388601036275e-06, "loss": 0.6167, "mean_token_accuracy": 0.9097758233547211, "num_tokens": 5661659.0, "step": 3160 }, { "epoch": 0.5118613877418833, "grad_norm": 19.527822494506836, "learning_rate": 4.883419689119172e-06, "loss": 0.5489, "mean_token_accuracy": 0.9254851043224335, "num_tokens": 5663465.0, "step": 3161 }, { "epoch": 0.5120233179499636, "grad_norm": 26.259260177612305, "learning_rate": 4.8818005181347155e-06, "loss": 0.598, "mean_token_accuracy": 0.9196009635925293, "num_tokens": 5665263.0, "step": 3162 }, { "epoch": 0.5121852481580439, "grad_norm": 22.342472076416016, "learning_rate": 4.88018134715026e-06, "loss": 0.5724, "mean_token_accuracy": 0.9328171610832214, "num_tokens": 5667058.0, "step": 3163 }, { "epoch": 0.5123471783661242, "grad_norm": 18.374914169311523, "learning_rate": 4.8785621761658035e-06, "loss": 0.5249, "mean_token_accuracy": 0.9212393462657928, "num_tokens": 5668850.0, "step": 3164 }, { "epoch": 0.5125091085742045, "grad_norm": 22.808595657348633, "learning_rate": 4.876943005181348e-06, "loss": 0.6087, "mean_token_accuracy": 0.9309645891189575, "num_tokens": 5670636.0, "step": 3165 }, { "epoch": 0.5126710387822848, "grad_norm": 21.37262535095215, "learning_rate": 4.8753238341968915e-06, "loss": 0.5526, "mean_token_accuracy": 0.9253092110157013, "num_tokens": 5672429.0, "step": 3166 }, { "epoch": 0.5128329689903651, "grad_norm": 21.01248550415039, "learning_rate": 4.873704663212436e-06, "loss": 0.5889, "mean_token_accuracy": 0.9208920300006866, "num_tokens": 5674218.0, "step": 3167 }, { "epoch": 0.5129948991984454, "grad_norm": 19.431440353393555, "learning_rate": 4.8720854922279796e-06, "loss": 0.5699, "mean_token_accuracy": 0.9172360301017761, "num_tokens": 5676008.0, "step": 3168 }, { "epoch": 0.5131568294065257, "grad_norm": 25.13711929321289, "learning_rate": 4.870466321243524e-06, "loss": 0.5437, "mean_token_accuracy": 0.9104059636592865, "num_tokens": 5677799.0, "step": 3169 }, { "epoch": 0.5133187596146062, "grad_norm": 20.09469223022461, "learning_rate": 4.868847150259068e-06, "loss": 0.5708, "mean_token_accuracy": 0.9192849397659302, "num_tokens": 5679583.0, "step": 3170 }, { "epoch": 0.5134806898226865, "grad_norm": 14.375593185424805, "learning_rate": 4.867227979274612e-06, "loss": 0.4728, "mean_token_accuracy": 0.9386600852012634, "num_tokens": 5681372.0, "step": 3171 }, { "epoch": 0.5136426200307668, "grad_norm": 21.145713806152344, "learning_rate": 4.865608808290156e-06, "loss": 0.6416, "mean_token_accuracy": 0.9228060841560364, "num_tokens": 5683169.0, "step": 3172 }, { "epoch": 0.5138045502388471, "grad_norm": 16.526351928710938, "learning_rate": 4.8639896373057e-06, "loss": 0.4702, "mean_token_accuracy": 0.9345445930957794, "num_tokens": 5684956.0, "step": 3173 }, { "epoch": 0.5139664804469274, "grad_norm": 26.206796646118164, "learning_rate": 4.862370466321244e-06, "loss": 0.784, "mean_token_accuracy": 0.9096069931983948, "num_tokens": 5686754.0, "step": 3174 }, { "epoch": 0.5141284106550077, "grad_norm": 22.978458404541016, "learning_rate": 4.860751295336788e-06, "loss": 0.6112, "mean_token_accuracy": 0.9222849309444427, "num_tokens": 5688547.0, "step": 3175 }, { "epoch": 0.514290340863088, "grad_norm": 34.91913604736328, "learning_rate": 4.859132124352332e-06, "loss": 0.6897, "mean_token_accuracy": 0.9001225531101227, "num_tokens": 5690339.0, "step": 3176 }, { "epoch": 0.5144522710711683, "grad_norm": 21.252050399780273, "learning_rate": 4.857512953367876e-06, "loss": 0.5904, "mean_token_accuracy": 0.9163931012153625, "num_tokens": 5692126.0, "step": 3177 }, { "epoch": 0.5146142012792486, "grad_norm": 16.567798614501953, "learning_rate": 4.8558937823834205e-06, "loss": 0.4863, "mean_token_accuracy": 0.930802047252655, "num_tokens": 5693912.0, "step": 3178 }, { "epoch": 0.5147761314873289, "grad_norm": 24.064016342163086, "learning_rate": 4.854274611398964e-06, "loss": 0.6146, "mean_token_accuracy": 0.9217752516269684, "num_tokens": 5695705.0, "step": 3179 }, { "epoch": 0.5149380616954092, "grad_norm": 24.991971969604492, "learning_rate": 4.8526554404145086e-06, "loss": 0.5655, "mean_token_accuracy": 0.9147057235240936, "num_tokens": 5697497.0, "step": 3180 }, { "epoch": 0.5150999919034897, "grad_norm": 29.236038208007812, "learning_rate": 4.851036269430052e-06, "loss": 0.6134, "mean_token_accuracy": 0.9132352769374847, "num_tokens": 5699285.0, "step": 3181 }, { "epoch": 0.51526192211157, "grad_norm": 21.264066696166992, "learning_rate": 4.849417098445597e-06, "loss": 0.5452, "mean_token_accuracy": 0.9265628457069397, "num_tokens": 5701069.0, "step": 3182 }, { "epoch": 0.5154238523196503, "grad_norm": 24.06795883178711, "learning_rate": 4.84779792746114e-06, "loss": 0.7248, "mean_token_accuracy": 0.9138657748699188, "num_tokens": 5702848.0, "step": 3183 }, { "epoch": 0.5155857825277306, "grad_norm": 30.608985900878906, "learning_rate": 4.846178756476685e-06, "loss": 0.6925, "mean_token_accuracy": 0.9011540710926056, "num_tokens": 5704643.0, "step": 3184 }, { "epoch": 0.5157477127358109, "grad_norm": 27.0435848236084, "learning_rate": 4.844559585492228e-06, "loss": 0.6751, "mean_token_accuracy": 0.9168742299079895, "num_tokens": 5706444.0, "step": 3185 }, { "epoch": 0.5159096429438912, "grad_norm": 25.459152221679688, "learning_rate": 4.842940414507773e-06, "loss": 0.6159, "mean_token_accuracy": 0.9194777309894562, "num_tokens": 5708230.0, "step": 3186 }, { "epoch": 0.5160715731519715, "grad_norm": 34.461395263671875, "learning_rate": 4.841321243523316e-06, "loss": 0.7678, "mean_token_accuracy": 0.8959807753562927, "num_tokens": 5710011.0, "step": 3187 }, { "epoch": 0.5162335033600518, "grad_norm": 22.986474990844727, "learning_rate": 4.839702072538861e-06, "loss": 0.5628, "mean_token_accuracy": 0.9218654632568359, "num_tokens": 5711804.0, "step": 3188 }, { "epoch": 0.5163954335681321, "grad_norm": 19.097637176513672, "learning_rate": 4.838082901554404e-06, "loss": 0.5046, "mean_token_accuracy": 0.9208074510097504, "num_tokens": 5713594.0, "step": 3189 }, { "epoch": 0.5165573637762124, "grad_norm": 32.008941650390625, "learning_rate": 4.836463730569949e-06, "loss": 0.6807, "mean_token_accuracy": 0.9115604162216187, "num_tokens": 5715390.0, "step": 3190 }, { "epoch": 0.5167192939842927, "grad_norm": 19.023216247558594, "learning_rate": 4.834844559585492e-06, "loss": 0.5795, "mean_token_accuracy": 0.9270983338356018, "num_tokens": 5717176.0, "step": 3191 }, { "epoch": 0.516881224192373, "grad_norm": 21.860477447509766, "learning_rate": 4.833225388601037e-06, "loss": 0.5516, "mean_token_accuracy": 0.920981764793396, "num_tokens": 5718966.0, "step": 3192 }, { "epoch": 0.5170431544004535, "grad_norm": 30.08136749267578, "learning_rate": 4.83160621761658e-06, "loss": 0.5458, "mean_token_accuracy": 0.9120581448078156, "num_tokens": 5720782.0, "step": 3193 }, { "epoch": 0.5172050846085338, "grad_norm": 24.68195915222168, "learning_rate": 4.829987046632125e-06, "loss": 0.6252, "mean_token_accuracy": 0.9226865768432617, "num_tokens": 5722578.0, "step": 3194 }, { "epoch": 0.5173670148166141, "grad_norm": 24.32181739807129, "learning_rate": 4.828367875647668e-06, "loss": 0.6692, "mean_token_accuracy": 0.9149965345859528, "num_tokens": 5724384.0, "step": 3195 }, { "epoch": 0.5175289450246944, "grad_norm": 16.229299545288086, "learning_rate": 4.826748704663213e-06, "loss": 0.6334, "mean_token_accuracy": 0.9291443824768066, "num_tokens": 5726164.0, "step": 3196 }, { "epoch": 0.5176908752327747, "grad_norm": 20.368091583251953, "learning_rate": 4.825129533678757e-06, "loss": 0.5886, "mean_token_accuracy": 0.9257525205612183, "num_tokens": 5727944.0, "step": 3197 }, { "epoch": 0.517852805440855, "grad_norm": 31.058956146240234, "learning_rate": 4.823510362694301e-06, "loss": 0.6184, "mean_token_accuracy": 0.9202437698841095, "num_tokens": 5729757.0, "step": 3198 }, { "epoch": 0.5180147356489353, "grad_norm": 24.40435791015625, "learning_rate": 4.821891191709845e-06, "loss": 0.6017, "mean_token_accuracy": 0.9134595096111298, "num_tokens": 5731546.0, "step": 3199 }, { "epoch": 0.5181766658570156, "grad_norm": 25.60504150390625, "learning_rate": 4.820272020725389e-06, "loss": 0.52, "mean_token_accuracy": 0.9207285344600677, "num_tokens": 5733335.0, "step": 3200 }, { "epoch": 0.5183385960650959, "grad_norm": 33.42374038696289, "learning_rate": 4.818652849740933e-06, "loss": 0.7869, "mean_token_accuracy": 0.8986698091030121, "num_tokens": 5735133.0, "step": 3201 }, { "epoch": 0.5185005262731762, "grad_norm": 31.141822814941406, "learning_rate": 4.817033678756477e-06, "loss": 0.6987, "mean_token_accuracy": 0.9061620235443115, "num_tokens": 5736922.0, "step": 3202 }, { "epoch": 0.5186624564812565, "grad_norm": 27.583694458007812, "learning_rate": 4.815414507772021e-06, "loss": 0.6036, "mean_token_accuracy": 0.9141042828559875, "num_tokens": 5738713.0, "step": 3203 }, { "epoch": 0.518824386689337, "grad_norm": 24.821687698364258, "learning_rate": 4.813795336787565e-06, "loss": 0.5783, "mean_token_accuracy": 0.9173012375831604, "num_tokens": 5740503.0, "step": 3204 }, { "epoch": 0.5189863168974173, "grad_norm": 22.333023071289062, "learning_rate": 4.812176165803109e-06, "loss": 0.5323, "mean_token_accuracy": 0.9267153441905975, "num_tokens": 5742288.0, "step": 3205 }, { "epoch": 0.5191482471054976, "grad_norm": 27.891273498535156, "learning_rate": 4.810556994818653e-06, "loss": 0.6097, "mean_token_accuracy": 0.9145390093326569, "num_tokens": 5744081.0, "step": 3206 }, { "epoch": 0.5193101773135779, "grad_norm": 42.02451705932617, "learning_rate": 4.808937823834197e-06, "loss": 0.9123, "mean_token_accuracy": 0.8926622867584229, "num_tokens": 5745890.0, "step": 3207 }, { "epoch": 0.5194721075216582, "grad_norm": 26.871685028076172, "learning_rate": 4.807318652849741e-06, "loss": 0.7477, "mean_token_accuracy": 0.9087412357330322, "num_tokens": 5747675.0, "step": 3208 }, { "epoch": 0.5196340377297385, "grad_norm": 28.634862899780273, "learning_rate": 4.805699481865285e-06, "loss": 0.7294, "mean_token_accuracy": 0.9107352197170258, "num_tokens": 5749467.0, "step": 3209 }, { "epoch": 0.5197959679378188, "grad_norm": 16.785398483276367, "learning_rate": 4.804080310880829e-06, "loss": 0.5018, "mean_token_accuracy": 0.9255028665065765, "num_tokens": 5751247.0, "step": 3210 }, { "epoch": 0.5199578981458991, "grad_norm": 29.749080657958984, "learning_rate": 4.802461139896373e-06, "loss": 0.6191, "mean_token_accuracy": 0.9064182341098785, "num_tokens": 5753037.0, "step": 3211 }, { "epoch": 0.5201198283539794, "grad_norm": 21.897811889648438, "learning_rate": 4.800841968911917e-06, "loss": 0.5094, "mean_token_accuracy": 0.9309405386447906, "num_tokens": 5754824.0, "step": 3212 }, { "epoch": 0.5202817585620597, "grad_norm": 39.71262741088867, "learning_rate": 4.799222797927461e-06, "loss": 0.8776, "mean_token_accuracy": 0.893934041261673, "num_tokens": 5756618.0, "step": 3213 }, { "epoch": 0.52044368877014, "grad_norm": 19.655574798583984, "learning_rate": 4.797603626943005e-06, "loss": 0.5229, "mean_token_accuracy": 0.931654691696167, "num_tokens": 5758408.0, "step": 3214 }, { "epoch": 0.5206056189782204, "grad_norm": 34.73652648925781, "learning_rate": 4.7959844559585494e-06, "loss": 0.7424, "mean_token_accuracy": 0.8951486647129059, "num_tokens": 5760206.0, "step": 3215 }, { "epoch": 0.5207675491863007, "grad_norm": 25.752023696899414, "learning_rate": 4.794365284974094e-06, "loss": 0.5434, "mean_token_accuracy": 0.9186748266220093, "num_tokens": 5762000.0, "step": 3216 }, { "epoch": 0.5209294793943811, "grad_norm": 23.28451156616211, "learning_rate": 4.7927461139896375e-06, "loss": 0.5692, "mean_token_accuracy": 0.9148925542831421, "num_tokens": 5763793.0, "step": 3217 }, { "epoch": 0.5210914096024614, "grad_norm": 24.838577270507812, "learning_rate": 4.791126943005182e-06, "loss": 0.6159, "mean_token_accuracy": 0.917545735836029, "num_tokens": 5765572.0, "step": 3218 }, { "epoch": 0.5212533398105417, "grad_norm": 21.141876220703125, "learning_rate": 4.7895077720207255e-06, "loss": 0.4941, "mean_token_accuracy": 0.9260774850845337, "num_tokens": 5767368.0, "step": 3219 }, { "epoch": 0.521415270018622, "grad_norm": 33.55743408203125, "learning_rate": 4.78788860103627e-06, "loss": 0.6929, "mean_token_accuracy": 0.8968351483345032, "num_tokens": 5769171.0, "step": 3220 }, { "epoch": 0.5215772002267023, "grad_norm": 32.839271545410156, "learning_rate": 4.7862694300518135e-06, "loss": 0.7399, "mean_token_accuracy": 0.9036430418491364, "num_tokens": 5770963.0, "step": 3221 }, { "epoch": 0.5217391304347826, "grad_norm": 29.301279067993164, "learning_rate": 4.784650259067358e-06, "loss": 0.5528, "mean_token_accuracy": 0.9256495237350464, "num_tokens": 5772744.0, "step": 3222 }, { "epoch": 0.5219010606428629, "grad_norm": 20.398630142211914, "learning_rate": 4.7830310880829015e-06, "loss": 0.5367, "mean_token_accuracy": 0.9255533218383789, "num_tokens": 5774538.0, "step": 3223 }, { "epoch": 0.5220629908509432, "grad_norm": 21.35122299194336, "learning_rate": 4.781411917098446e-06, "loss": 0.5945, "mean_token_accuracy": 0.9153417944908142, "num_tokens": 5776334.0, "step": 3224 }, { "epoch": 0.5222249210590235, "grad_norm": 20.598468780517578, "learning_rate": 4.7797927461139896e-06, "loss": 0.5477, "mean_token_accuracy": 0.9174016416072845, "num_tokens": 5778112.0, "step": 3225 }, { "epoch": 0.5223868512671038, "grad_norm": 23.522504806518555, "learning_rate": 4.778173575129534e-06, "loss": 0.627, "mean_token_accuracy": 0.9160583913326263, "num_tokens": 5779898.0, "step": 3226 }, { "epoch": 0.5225487814751842, "grad_norm": 16.0715389251709, "learning_rate": 4.776554404145078e-06, "loss": 0.4949, "mean_token_accuracy": 0.9371417462825775, "num_tokens": 5781697.0, "step": 3227 }, { "epoch": 0.5227107116832646, "grad_norm": 18.35127830505371, "learning_rate": 4.774935233160622e-06, "loss": 0.4786, "mean_token_accuracy": 0.9282300472259521, "num_tokens": 5783488.0, "step": 3228 }, { "epoch": 0.5228726418913449, "grad_norm": 28.10984992980957, "learning_rate": 4.773316062176166e-06, "loss": 0.6624, "mean_token_accuracy": 0.9129156172275543, "num_tokens": 5785287.0, "step": 3229 }, { "epoch": 0.5230345720994252, "grad_norm": 25.66130256652832, "learning_rate": 4.77169689119171e-06, "loss": 0.6423, "mean_token_accuracy": 0.9164130985736847, "num_tokens": 5787098.0, "step": 3230 }, { "epoch": 0.5231965023075055, "grad_norm": 18.254972457885742, "learning_rate": 4.770077720207254e-06, "loss": 0.5517, "mean_token_accuracy": 0.924217939376831, "num_tokens": 5788887.0, "step": 3231 }, { "epoch": 0.5233584325155858, "grad_norm": 17.91209602355957, "learning_rate": 4.768458549222798e-06, "loss": 0.5382, "mean_token_accuracy": 0.9255494475364685, "num_tokens": 5790669.0, "step": 3232 }, { "epoch": 0.5235203627236661, "grad_norm": 24.230043411254883, "learning_rate": 4.766839378238342e-06, "loss": 0.5318, "mean_token_accuracy": 0.9195504486560822, "num_tokens": 5792466.0, "step": 3233 }, { "epoch": 0.5236822929317464, "grad_norm": 22.80183219909668, "learning_rate": 4.765220207253887e-06, "loss": 0.6187, "mean_token_accuracy": 0.9176159799098969, "num_tokens": 5794256.0, "step": 3234 }, { "epoch": 0.5238442231398267, "grad_norm": 22.230703353881836, "learning_rate": 4.7636010362694306e-06, "loss": 0.5472, "mean_token_accuracy": 0.9278229773044586, "num_tokens": 5796045.0, "step": 3235 }, { "epoch": 0.524006153347907, "grad_norm": 23.797130584716797, "learning_rate": 4.761981865284975e-06, "loss": 0.5867, "mean_token_accuracy": 0.9162554144859314, "num_tokens": 5797842.0, "step": 3236 }, { "epoch": 0.5241680835559873, "grad_norm": 28.26338005065918, "learning_rate": 4.760362694300519e-06, "loss": 0.6723, "mean_token_accuracy": 0.9147329926490784, "num_tokens": 5799635.0, "step": 3237 }, { "epoch": 0.5243300137640677, "grad_norm": 32.87785339355469, "learning_rate": 4.758743523316063e-06, "loss": 0.7597, "mean_token_accuracy": 0.8946414291858673, "num_tokens": 5801423.0, "step": 3238 }, { "epoch": 0.524491943972148, "grad_norm": 26.376771926879883, "learning_rate": 4.757124352331607e-06, "loss": 0.6708, "mean_token_accuracy": 0.9102478623390198, "num_tokens": 5803214.0, "step": 3239 }, { "epoch": 0.5246538741802284, "grad_norm": 22.24866485595703, "learning_rate": 4.755505181347151e-06, "loss": 0.6436, "mean_token_accuracy": 0.9244824051856995, "num_tokens": 5805004.0, "step": 3240 }, { "epoch": 0.5248158043883087, "grad_norm": 16.914201736450195, "learning_rate": 4.753886010362695e-06, "loss": 0.5114, "mean_token_accuracy": 0.9229840040206909, "num_tokens": 5806789.0, "step": 3241 }, { "epoch": 0.524977734596389, "grad_norm": 25.600482940673828, "learning_rate": 4.752266839378239e-06, "loss": 0.6114, "mean_token_accuracy": 0.9145643413066864, "num_tokens": 5808582.0, "step": 3242 }, { "epoch": 0.5251396648044693, "grad_norm": 21.872465133666992, "learning_rate": 4.750647668393783e-06, "loss": 0.6043, "mean_token_accuracy": 0.9221014678478241, "num_tokens": 5810376.0, "step": 3243 }, { "epoch": 0.5253015950125496, "grad_norm": 24.721261978149414, "learning_rate": 4.749028497409327e-06, "loss": 0.5904, "mean_token_accuracy": 0.9139400720596313, "num_tokens": 5812168.0, "step": 3244 }, { "epoch": 0.5254635252206299, "grad_norm": 29.457611083984375, "learning_rate": 4.747409326424871e-06, "loss": 0.7615, "mean_token_accuracy": 0.9130252003669739, "num_tokens": 5813956.0, "step": 3245 }, { "epoch": 0.5256254554287102, "grad_norm": 27.688304901123047, "learning_rate": 4.745790155440415e-06, "loss": 0.725, "mean_token_accuracy": 0.9120689630508423, "num_tokens": 5815753.0, "step": 3246 }, { "epoch": 0.5257873856367905, "grad_norm": 22.55941390991211, "learning_rate": 4.7441709844559596e-06, "loss": 0.609, "mean_token_accuracy": 0.9209627509117126, "num_tokens": 5817543.0, "step": 3247 }, { "epoch": 0.5259493158448708, "grad_norm": 22.25295066833496, "learning_rate": 4.742551813471503e-06, "loss": 0.5271, "mean_token_accuracy": 0.9259218573570251, "num_tokens": 5819325.0, "step": 3248 }, { "epoch": 0.5261112460529512, "grad_norm": 26.40397071838379, "learning_rate": 4.740932642487048e-06, "loss": 0.5601, "mean_token_accuracy": 0.925000011920929, "num_tokens": 5821117.0, "step": 3249 }, { "epoch": 0.5262731762610315, "grad_norm": 27.036094665527344, "learning_rate": 4.739313471502591e-06, "loss": 0.7278, "mean_token_accuracy": 0.9033782184123993, "num_tokens": 5822906.0, "step": 3250 }, { "epoch": 0.5264351064691118, "grad_norm": 22.441499710083008, "learning_rate": 4.737694300518136e-06, "loss": 0.5347, "mean_token_accuracy": 0.9305555820465088, "num_tokens": 5824706.0, "step": 3251 }, { "epoch": 0.5265970366771922, "grad_norm": 29.68516731262207, "learning_rate": 4.736075129533679e-06, "loss": 0.6662, "mean_token_accuracy": 0.8968908786773682, "num_tokens": 5826500.0, "step": 3252 }, { "epoch": 0.5267589668852725, "grad_norm": 14.240768432617188, "learning_rate": 4.734455958549224e-06, "loss": 0.4621, "mean_token_accuracy": 0.9335511922836304, "num_tokens": 5828283.0, "step": 3253 }, { "epoch": 0.5269208970933528, "grad_norm": 30.80999183654785, "learning_rate": 4.732836787564767e-06, "loss": 0.6644, "mean_token_accuracy": 0.9068345129489899, "num_tokens": 5830074.0, "step": 3254 }, { "epoch": 0.5270828273014331, "grad_norm": 16.276378631591797, "learning_rate": 4.731217616580312e-06, "loss": 0.524, "mean_token_accuracy": 0.9244604408740997, "num_tokens": 5831864.0, "step": 3255 }, { "epoch": 0.5272447575095134, "grad_norm": 30.29820442199707, "learning_rate": 4.729598445595855e-06, "loss": 0.855, "mean_token_accuracy": 0.8985449969768524, "num_tokens": 5833651.0, "step": 3256 }, { "epoch": 0.5274066877175937, "grad_norm": 21.22198486328125, "learning_rate": 4.7279792746114e-06, "loss": 0.5453, "mean_token_accuracy": 0.9183647632598877, "num_tokens": 5835444.0, "step": 3257 }, { "epoch": 0.527568617925674, "grad_norm": 25.498836517333984, "learning_rate": 4.726360103626943e-06, "loss": 0.5817, "mean_token_accuracy": 0.9121601581573486, "num_tokens": 5837241.0, "step": 3258 }, { "epoch": 0.5277305481337543, "grad_norm": 20.432722091674805, "learning_rate": 4.724740932642488e-06, "loss": 0.5515, "mean_token_accuracy": 0.9210049211978912, "num_tokens": 5839031.0, "step": 3259 }, { "epoch": 0.5278924783418346, "grad_norm": 22.283710479736328, "learning_rate": 4.723121761658031e-06, "loss": 0.5763, "mean_token_accuracy": 0.9211459457874298, "num_tokens": 5840822.0, "step": 3260 }, { "epoch": 0.528054408549915, "grad_norm": 24.2774658203125, "learning_rate": 4.721502590673576e-06, "loss": 0.6978, "mean_token_accuracy": 0.9052200019359589, "num_tokens": 5842618.0, "step": 3261 }, { "epoch": 0.5282163387579953, "grad_norm": 32.79093933105469, "learning_rate": 4.719883419689119e-06, "loss": 0.7571, "mean_token_accuracy": 0.9064554274082184, "num_tokens": 5844408.0, "step": 3262 }, { "epoch": 0.5283782689660756, "grad_norm": 29.020587921142578, "learning_rate": 4.718264248704664e-06, "loss": 0.6789, "mean_token_accuracy": 0.9155213236808777, "num_tokens": 5846193.0, "step": 3263 }, { "epoch": 0.528540199174156, "grad_norm": 26.841073989868164, "learning_rate": 4.716645077720207e-06, "loss": 0.5989, "mean_token_accuracy": 0.9155176877975464, "num_tokens": 5847977.0, "step": 3264 }, { "epoch": 0.5287021293822363, "grad_norm": 24.67628288269043, "learning_rate": 4.715025906735752e-06, "loss": 0.6106, "mean_token_accuracy": 0.9150060415267944, "num_tokens": 5849771.0, "step": 3265 }, { "epoch": 0.5288640595903166, "grad_norm": 13.892492294311523, "learning_rate": 4.713406735751296e-06, "loss": 0.4845, "mean_token_accuracy": 0.9319115877151489, "num_tokens": 5851562.0, "step": 3266 }, { "epoch": 0.5290259897983969, "grad_norm": 25.920000076293945, "learning_rate": 4.71178756476684e-06, "loss": 0.6623, "mean_token_accuracy": 0.9067660570144653, "num_tokens": 5853352.0, "step": 3267 }, { "epoch": 0.5291879200064772, "grad_norm": 32.46107864379883, "learning_rate": 4.710168393782384e-06, "loss": 0.8235, "mean_token_accuracy": 0.9011238813400269, "num_tokens": 5855147.0, "step": 3268 }, { "epoch": 0.5293498502145575, "grad_norm": 25.851436614990234, "learning_rate": 4.708549222797928e-06, "loss": 0.7111, "mean_token_accuracy": 0.9108265936374664, "num_tokens": 5856940.0, "step": 3269 }, { "epoch": 0.5295117804226378, "grad_norm": 33.249656677246094, "learning_rate": 4.706930051813472e-06, "loss": 0.8553, "mean_token_accuracy": 0.8916058242321014, "num_tokens": 5858719.0, "step": 3270 }, { "epoch": 0.5296737106307181, "grad_norm": 19.06661605834961, "learning_rate": 4.705310880829016e-06, "loss": 0.5882, "mean_token_accuracy": 0.9184104800224304, "num_tokens": 5860513.0, "step": 3271 }, { "epoch": 0.5298356408387985, "grad_norm": 22.882741928100586, "learning_rate": 4.70369170984456e-06, "loss": 0.57, "mean_token_accuracy": 0.9269450306892395, "num_tokens": 5862299.0, "step": 3272 }, { "epoch": 0.5299975710468788, "grad_norm": 31.506717681884766, "learning_rate": 4.702072538860104e-06, "loss": 0.6995, "mean_token_accuracy": 0.894209623336792, "num_tokens": 5864094.0, "step": 3273 }, { "epoch": 0.5301595012549591, "grad_norm": 23.07085418701172, "learning_rate": 4.700453367875648e-06, "loss": 0.6604, "mean_token_accuracy": 0.9136485755443573, "num_tokens": 5865895.0, "step": 3274 }, { "epoch": 0.5303214314630394, "grad_norm": 28.44403648376465, "learning_rate": 4.698834196891192e-06, "loss": 0.622, "mean_token_accuracy": 0.9064748287200928, "num_tokens": 5867685.0, "step": 3275 }, { "epoch": 0.5304833616711198, "grad_norm": 15.705784797668457, "learning_rate": 4.697215025906736e-06, "loss": 0.4992, "mean_token_accuracy": 0.9300665259361267, "num_tokens": 5869483.0, "step": 3276 }, { "epoch": 0.5306452918792001, "grad_norm": 22.40789794921875, "learning_rate": 4.69559585492228e-06, "loss": 0.6449, "mean_token_accuracy": 0.909722238779068, "num_tokens": 5871271.0, "step": 3277 }, { "epoch": 0.5308072220872804, "grad_norm": 29.81925392150879, "learning_rate": 4.693976683937824e-06, "loss": 0.7248, "mean_token_accuracy": 0.8999184370040894, "num_tokens": 5873072.0, "step": 3278 }, { "epoch": 0.5309691522953607, "grad_norm": 30.950777053833008, "learning_rate": 4.692357512953368e-06, "loss": 0.8592, "mean_token_accuracy": 0.8946863114833832, "num_tokens": 5874869.0, "step": 3279 }, { "epoch": 0.531131082503441, "grad_norm": 21.334619522094727, "learning_rate": 4.690738341968912e-06, "loss": 0.5009, "mean_token_accuracy": 0.9268405139446259, "num_tokens": 5876668.0, "step": 3280 }, { "epoch": 0.5312930127115213, "grad_norm": 30.601539611816406, "learning_rate": 4.689119170984456e-06, "loss": 0.7466, "mean_token_accuracy": 0.9077341854572296, "num_tokens": 5878451.0, "step": 3281 }, { "epoch": 0.5314549429196016, "grad_norm": 28.87685203552246, "learning_rate": 4.6875000000000004e-06, "loss": 0.6488, "mean_token_accuracy": 0.9045454561710358, "num_tokens": 5880246.0, "step": 3282 }, { "epoch": 0.531616873127682, "grad_norm": 14.6832275390625, "learning_rate": 4.685880829015544e-06, "loss": 0.4694, "mean_token_accuracy": 0.9363949596881866, "num_tokens": 5882041.0, "step": 3283 }, { "epoch": 0.5317788033357623, "grad_norm": 29.889806747436523, "learning_rate": 4.6842616580310885e-06, "loss": 0.8502, "mean_token_accuracy": 0.9056248366832733, "num_tokens": 5883839.0, "step": 3284 }, { "epoch": 0.5319407335438426, "grad_norm": 22.20687484741211, "learning_rate": 4.682642487046633e-06, "loss": 0.5611, "mean_token_accuracy": 0.9266775846481323, "num_tokens": 5885622.0, "step": 3285 }, { "epoch": 0.5321026637519229, "grad_norm": 22.255115509033203, "learning_rate": 4.6810233160621765e-06, "loss": 0.6832, "mean_token_accuracy": 0.915778249502182, "num_tokens": 5887408.0, "step": 3286 }, { "epoch": 0.5322645939600033, "grad_norm": 24.455339431762695, "learning_rate": 4.679404145077721e-06, "loss": 0.6669, "mean_token_accuracy": 0.9180491268634796, "num_tokens": 5889200.0, "step": 3287 }, { "epoch": 0.5324265241680836, "grad_norm": 23.976558685302734, "learning_rate": 4.6777849740932645e-06, "loss": 0.5587, "mean_token_accuracy": 0.9140793681144714, "num_tokens": 5890991.0, "step": 3288 }, { "epoch": 0.5325884543761639, "grad_norm": 22.580522537231445, "learning_rate": 4.676165803108809e-06, "loss": 0.503, "mean_token_accuracy": 0.9313608109951019, "num_tokens": 5892780.0, "step": 3289 }, { "epoch": 0.5327503845842442, "grad_norm": 25.41205406188965, "learning_rate": 4.6745466321243525e-06, "loss": 0.6878, "mean_token_accuracy": 0.9052004218101501, "num_tokens": 5894577.0, "step": 3290 }, { "epoch": 0.5329123147923245, "grad_norm": 17.420106887817383, "learning_rate": 4.672927461139897e-06, "loss": 0.5194, "mean_token_accuracy": 0.9288175106048584, "num_tokens": 5896370.0, "step": 3291 }, { "epoch": 0.5330742450004048, "grad_norm": 34.279117584228516, "learning_rate": 4.6713082901554406e-06, "loss": 0.8493, "mean_token_accuracy": 0.904699444770813, "num_tokens": 5898177.0, "step": 3292 }, { "epoch": 0.5332361752084851, "grad_norm": 21.430469512939453, "learning_rate": 4.669689119170985e-06, "loss": 0.5408, "mean_token_accuracy": 0.9122180640697479, "num_tokens": 5899962.0, "step": 3293 }, { "epoch": 0.5333981054165655, "grad_norm": 22.764860153198242, "learning_rate": 4.668069948186529e-06, "loss": 0.6188, "mean_token_accuracy": 0.9151371717453003, "num_tokens": 5901765.0, "step": 3294 }, { "epoch": 0.5335600356246458, "grad_norm": 23.35373306274414, "learning_rate": 4.666450777202073e-06, "loss": 0.5751, "mean_token_accuracy": 0.9236221313476562, "num_tokens": 5903552.0, "step": 3295 }, { "epoch": 0.5337219658327261, "grad_norm": 23.841981887817383, "learning_rate": 4.664831606217617e-06, "loss": 0.5516, "mean_token_accuracy": 0.9072797000408173, "num_tokens": 5905344.0, "step": 3296 }, { "epoch": 0.5338838960408064, "grad_norm": 15.606760025024414, "learning_rate": 4.663212435233161e-06, "loss": 0.4954, "mean_token_accuracy": 0.9268648028373718, "num_tokens": 5907131.0, "step": 3297 }, { "epoch": 0.5340458262488867, "grad_norm": 17.10336685180664, "learning_rate": 4.661593264248705e-06, "loss": 0.534, "mean_token_accuracy": 0.9301941990852356, "num_tokens": 5908915.0, "step": 3298 }, { "epoch": 0.534207756456967, "grad_norm": 20.359485626220703, "learning_rate": 4.659974093264249e-06, "loss": 0.5342, "mean_token_accuracy": 0.9192011952400208, "num_tokens": 5910711.0, "step": 3299 }, { "epoch": 0.5343696866650474, "grad_norm": 21.940690994262695, "learning_rate": 4.658354922279793e-06, "loss": 0.5679, "mean_token_accuracy": 0.9133472442626953, "num_tokens": 5912500.0, "step": 3300 }, { "epoch": 0.5345316168731277, "grad_norm": 27.70854949951172, "learning_rate": 4.656735751295337e-06, "loss": 0.6474, "mean_token_accuracy": 0.9068073034286499, "num_tokens": 5914301.0, "step": 3301 }, { "epoch": 0.534693547081208, "grad_norm": 16.91778564453125, "learning_rate": 4.655116580310881e-06, "loss": 0.5396, "mean_token_accuracy": 0.916292667388916, "num_tokens": 5916090.0, "step": 3302 }, { "epoch": 0.5348554772892883, "grad_norm": 26.937002182006836, "learning_rate": 4.653497409326425e-06, "loss": 0.5543, "mean_token_accuracy": 0.9205682873725891, "num_tokens": 5917879.0, "step": 3303 }, { "epoch": 0.5350174074973686, "grad_norm": 24.79198455810547, "learning_rate": 4.6518782383419696e-06, "loss": 0.6584, "mean_token_accuracy": 0.9208920300006866, "num_tokens": 5919668.0, "step": 3304 }, { "epoch": 0.5351793377054489, "grad_norm": 21.163257598876953, "learning_rate": 4.650259067357513e-06, "loss": 0.5302, "mean_token_accuracy": 0.9231884181499481, "num_tokens": 5921453.0, "step": 3305 }, { "epoch": 0.5353412679135293, "grad_norm": 32.04161071777344, "learning_rate": 4.648639896373058e-06, "loss": 0.7986, "mean_token_accuracy": 0.9063608050346375, "num_tokens": 5923242.0, "step": 3306 }, { "epoch": 0.5355031981216096, "grad_norm": 20.623491287231445, "learning_rate": 4.647020725388601e-06, "loss": 0.5671, "mean_token_accuracy": 0.9171499609947205, "num_tokens": 5925032.0, "step": 3307 }, { "epoch": 0.5356651283296899, "grad_norm": 24.486120223999023, "learning_rate": 4.645401554404146e-06, "loss": 0.6191, "mean_token_accuracy": 0.9154887795448303, "num_tokens": 5926828.0, "step": 3308 }, { "epoch": 0.5358270585377702, "grad_norm": 17.307029724121094, "learning_rate": 4.643782383419689e-06, "loss": 0.5895, "mean_token_accuracy": 0.9227039813995361, "num_tokens": 5928611.0, "step": 3309 }, { "epoch": 0.5359889887458505, "grad_norm": 23.798681259155273, "learning_rate": 4.642163212435234e-06, "loss": 0.5676, "mean_token_accuracy": 0.913159966468811, "num_tokens": 5930399.0, "step": 3310 }, { "epoch": 0.5361509189539309, "grad_norm": 25.3358154296875, "learning_rate": 4.640544041450777e-06, "loss": 0.5827, "mean_token_accuracy": 0.9119522571563721, "num_tokens": 5932182.0, "step": 3311 }, { "epoch": 0.5363128491620112, "grad_norm": 41.298397064208984, "learning_rate": 4.638924870466322e-06, "loss": 0.9911, "mean_token_accuracy": 0.8780686259269714, "num_tokens": 5933979.0, "step": 3312 }, { "epoch": 0.5364747793700915, "grad_norm": 27.232192993164062, "learning_rate": 4.637305699481865e-06, "loss": 0.7612, "mean_token_accuracy": 0.9053481817245483, "num_tokens": 5935776.0, "step": 3313 }, { "epoch": 0.5366367095781718, "grad_norm": 17.515901565551758, "learning_rate": 4.63568652849741e-06, "loss": 0.5446, "mean_token_accuracy": 0.9252215027809143, "num_tokens": 5937557.0, "step": 3314 }, { "epoch": 0.5367986397862521, "grad_norm": 30.752872467041016, "learning_rate": 4.634067357512953e-06, "loss": 0.7314, "mean_token_accuracy": 0.9034347832202911, "num_tokens": 5939348.0, "step": 3315 }, { "epoch": 0.5369605699943324, "grad_norm": 28.745569229125977, "learning_rate": 4.632448186528498e-06, "loss": 0.7115, "mean_token_accuracy": 0.9015345275402069, "num_tokens": 5941134.0, "step": 3316 }, { "epoch": 0.5371225002024128, "grad_norm": 21.264015197753906, "learning_rate": 4.630829015544041e-06, "loss": 0.6348, "mean_token_accuracy": 0.9246070981025696, "num_tokens": 5942938.0, "step": 3317 }, { "epoch": 0.5372844304104931, "grad_norm": 24.994874954223633, "learning_rate": 4.629209844559586e-06, "loss": 0.5864, "mean_token_accuracy": 0.9150778949260712, "num_tokens": 5944721.0, "step": 3318 }, { "epoch": 0.5374463606185734, "grad_norm": 27.732799530029297, "learning_rate": 4.627590673575129e-06, "loss": 0.7156, "mean_token_accuracy": 0.9074074327945709, "num_tokens": 5946503.0, "step": 3319 }, { "epoch": 0.5376082908266537, "grad_norm": 16.936355590820312, "learning_rate": 4.625971502590674e-06, "loss": 0.4955, "mean_token_accuracy": 0.9231182336807251, "num_tokens": 5948288.0, "step": 3320 }, { "epoch": 0.537770221034734, "grad_norm": 24.789987564086914, "learning_rate": 4.624352331606217e-06, "loss": 0.613, "mean_token_accuracy": 0.9123079180717468, "num_tokens": 5950085.0, "step": 3321 }, { "epoch": 0.5379321512428143, "grad_norm": 10.298727989196777, "learning_rate": 4.622733160621762e-06, "loss": 0.456, "mean_token_accuracy": 0.9349911510944366, "num_tokens": 5951874.0, "step": 3322 }, { "epoch": 0.5380940814508947, "grad_norm": 21.85140037536621, "learning_rate": 4.621113989637306e-06, "loss": 0.5867, "mean_token_accuracy": 0.9145390093326569, "num_tokens": 5953657.0, "step": 3323 }, { "epoch": 0.538256011658975, "grad_norm": 29.054805755615234, "learning_rate": 4.61949481865285e-06, "loss": 0.6674, "mean_token_accuracy": 0.9007092118263245, "num_tokens": 5955451.0, "step": 3324 }, { "epoch": 0.5384179418670553, "grad_norm": 15.269128799438477, "learning_rate": 4.617875647668394e-06, "loss": 0.5479, "mean_token_accuracy": 0.9285130798816681, "num_tokens": 5957243.0, "step": 3325 }, { "epoch": 0.5385798720751356, "grad_norm": 27.347911834716797, "learning_rate": 4.616256476683938e-06, "loss": 0.6878, "mean_token_accuracy": 0.9085317552089691, "num_tokens": 5959039.0, "step": 3326 }, { "epoch": 0.5387418022832159, "grad_norm": 26.464378356933594, "learning_rate": 4.614637305699482e-06, "loss": 0.7229, "mean_token_accuracy": 0.9178784787654877, "num_tokens": 5960831.0, "step": 3327 }, { "epoch": 0.5389037324912963, "grad_norm": 20.680988311767578, "learning_rate": 4.613018134715026e-06, "loss": 0.6419, "mean_token_accuracy": 0.9202331602573395, "num_tokens": 5962619.0, "step": 3328 }, { "epoch": 0.5390656626993766, "grad_norm": 22.59561538696289, "learning_rate": 4.61139896373057e-06, "loss": 0.5197, "mean_token_accuracy": 0.9267317354679108, "num_tokens": 5964405.0, "step": 3329 }, { "epoch": 0.5392275929074569, "grad_norm": 25.261756896972656, "learning_rate": 4.609779792746114e-06, "loss": 0.6459, "mean_token_accuracy": 0.9053235650062561, "num_tokens": 5966202.0, "step": 3330 }, { "epoch": 0.5393895231155372, "grad_norm": 18.882295608520508, "learning_rate": 4.608160621761658e-06, "loss": 0.5269, "mean_token_accuracy": 0.918519139289856, "num_tokens": 5967985.0, "step": 3331 }, { "epoch": 0.5395514533236175, "grad_norm": 20.336748123168945, "learning_rate": 4.606541450777203e-06, "loss": 0.5219, "mean_token_accuracy": 0.9278618693351746, "num_tokens": 5969788.0, "step": 3332 }, { "epoch": 0.5397133835316978, "grad_norm": 29.584484100341797, "learning_rate": 4.604922279792746e-06, "loss": 0.7074, "mean_token_accuracy": 0.8960237205028534, "num_tokens": 5971587.0, "step": 3333 }, { "epoch": 0.5398753137397782, "grad_norm": 27.636363983154297, "learning_rate": 4.603303108808291e-06, "loss": 0.6279, "mean_token_accuracy": 0.9137344062328339, "num_tokens": 5973388.0, "step": 3334 }, { "epoch": 0.5400372439478585, "grad_norm": 23.142065048217773, "learning_rate": 4.601683937823835e-06, "loss": 0.5569, "mean_token_accuracy": 0.9152278006076813, "num_tokens": 5975182.0, "step": 3335 }, { "epoch": 0.5401991741559388, "grad_norm": 31.19795036315918, "learning_rate": 4.600064766839379e-06, "loss": 0.6246, "mean_token_accuracy": 0.9099322259426117, "num_tokens": 5976971.0, "step": 3336 }, { "epoch": 0.5403611043640191, "grad_norm": 27.82547950744629, "learning_rate": 4.598445595854923e-06, "loss": 0.5775, "mean_token_accuracy": 0.9072259664535522, "num_tokens": 5978763.0, "step": 3337 }, { "epoch": 0.5405230345720994, "grad_norm": 24.315357208251953, "learning_rate": 4.596826424870467e-06, "loss": 0.5793, "mean_token_accuracy": 0.9188596606254578, "num_tokens": 5980571.0, "step": 3338 }, { "epoch": 0.5406849647801797, "grad_norm": 27.79184913635254, "learning_rate": 4.595207253886011e-06, "loss": 0.6611, "mean_token_accuracy": 0.9031609296798706, "num_tokens": 5982372.0, "step": 3339 }, { "epoch": 0.5408468949882601, "grad_norm": 25.85364532470703, "learning_rate": 4.593588082901555e-06, "loss": 0.6143, "mean_token_accuracy": 0.9119634628295898, "num_tokens": 5984180.0, "step": 3340 }, { "epoch": 0.5410088251963404, "grad_norm": 15.509039878845215, "learning_rate": 4.591968911917099e-06, "loss": 0.4748, "mean_token_accuracy": 0.9353419542312622, "num_tokens": 5985970.0, "step": 3341 }, { "epoch": 0.5411707554044207, "grad_norm": 20.80098533630371, "learning_rate": 4.590349740932643e-06, "loss": 0.5612, "mean_token_accuracy": 0.9211459457874298, "num_tokens": 5987761.0, "step": 3342 }, { "epoch": 0.541332685612501, "grad_norm": 24.21620750427246, "learning_rate": 4.588730569948187e-06, "loss": 0.6099, "mean_token_accuracy": 0.9127168655395508, "num_tokens": 5989548.0, "step": 3343 }, { "epoch": 0.5414946158205813, "grad_norm": 32.148719787597656, "learning_rate": 4.587111398963731e-06, "loss": 0.907, "mean_token_accuracy": 0.9039174318313599, "num_tokens": 5991352.0, "step": 3344 }, { "epoch": 0.5416565460286616, "grad_norm": 22.388635635375977, "learning_rate": 4.585492227979275e-06, "loss": 0.6226, "mean_token_accuracy": 0.9076087176799774, "num_tokens": 5993134.0, "step": 3345 }, { "epoch": 0.541818476236742, "grad_norm": 32.44016647338867, "learning_rate": 4.583873056994819e-06, "loss": 0.8243, "mean_token_accuracy": 0.9028957486152649, "num_tokens": 5994934.0, "step": 3346 }, { "epoch": 0.5419804064448223, "grad_norm": 23.766708374023438, "learning_rate": 4.582253886010363e-06, "loss": 0.5108, "mean_token_accuracy": 0.9224817752838135, "num_tokens": 5996730.0, "step": 3347 }, { "epoch": 0.5421423366529026, "grad_norm": 19.531980514526367, "learning_rate": 4.580634715025907e-06, "loss": 0.5039, "mean_token_accuracy": 0.9212393462657928, "num_tokens": 5998522.0, "step": 3348 }, { "epoch": 0.5423042668609829, "grad_norm": 29.46169090270996, "learning_rate": 4.5790155440414514e-06, "loss": 0.7667, "mean_token_accuracy": 0.9092390239238739, "num_tokens": 6000310.0, "step": 3349 }, { "epoch": 0.5424661970690632, "grad_norm": 25.975584030151367, "learning_rate": 4.577396373056995e-06, "loss": 0.532, "mean_token_accuracy": 0.9221243560314178, "num_tokens": 6002104.0, "step": 3350 }, { "epoch": 0.5426281272771436, "grad_norm": 22.205385208129883, "learning_rate": 4.5757772020725395e-06, "loss": 0.5281, "mean_token_accuracy": 0.925253301858902, "num_tokens": 6003897.0, "step": 3351 }, { "epoch": 0.5427900574852239, "grad_norm": 24.8353271484375, "learning_rate": 4.574158031088083e-06, "loss": 0.626, "mean_token_accuracy": 0.9058675467967987, "num_tokens": 6005694.0, "step": 3352 }, { "epoch": 0.5429519876933042, "grad_norm": 33.30318832397461, "learning_rate": 4.5725388601036275e-06, "loss": 0.7284, "mean_token_accuracy": 0.9043208360671997, "num_tokens": 6007500.0, "step": 3353 }, { "epoch": 0.5431139179013845, "grad_norm": 21.73312759399414, "learning_rate": 4.570919689119172e-06, "loss": 0.6493, "mean_token_accuracy": 0.9169534146785736, "num_tokens": 6009289.0, "step": 3354 }, { "epoch": 0.5432758481094648, "grad_norm": 26.999486923217773, "learning_rate": 4.5693005181347155e-06, "loss": 0.6115, "mean_token_accuracy": 0.9213786423206329, "num_tokens": 6011080.0, "step": 3355 }, { "epoch": 0.5434377783175451, "grad_norm": 22.595088958740234, "learning_rate": 4.56768134715026e-06, "loss": 0.6032, "mean_token_accuracy": 0.9203667938709259, "num_tokens": 6012880.0, "step": 3356 }, { "epoch": 0.5435997085256254, "grad_norm": 24.318208694458008, "learning_rate": 4.5660621761658035e-06, "loss": 0.6668, "mean_token_accuracy": 0.9236669540405273, "num_tokens": 6014667.0, "step": 3357 }, { "epoch": 0.5437616387337058, "grad_norm": 24.818958282470703, "learning_rate": 4.564443005181348e-06, "loss": 0.5907, "mean_token_accuracy": 0.9182733595371246, "num_tokens": 6016461.0, "step": 3358 }, { "epoch": 0.5439235689417861, "grad_norm": 20.26692008972168, "learning_rate": 4.5628238341968916e-06, "loss": 0.5402, "mean_token_accuracy": 0.9258498251438141, "num_tokens": 6018254.0, "step": 3359 }, { "epoch": 0.5440854991498664, "grad_norm": 25.99869155883789, "learning_rate": 4.561204663212436e-06, "loss": 0.6833, "mean_token_accuracy": 0.9171532690525055, "num_tokens": 6020043.0, "step": 3360 }, { "epoch": 0.5442474293579467, "grad_norm": 22.137868881225586, "learning_rate": 4.55958549222798e-06, "loss": 0.5536, "mean_token_accuracy": 0.9120462834835052, "num_tokens": 6021827.0, "step": 3361 }, { "epoch": 0.5444093595660271, "grad_norm": 33.363590240478516, "learning_rate": 4.557966321243524e-06, "loss": 0.7337, "mean_token_accuracy": 0.9103012084960938, "num_tokens": 6023618.0, "step": 3362 }, { "epoch": 0.5445712897741074, "grad_norm": 30.149311065673828, "learning_rate": 4.556347150259068e-06, "loss": 0.5347, "mean_token_accuracy": 0.925567239522934, "num_tokens": 6025412.0, "step": 3363 }, { "epoch": 0.5447332199821877, "grad_norm": 26.410747528076172, "learning_rate": 4.554727979274612e-06, "loss": 0.5687, "mean_token_accuracy": 0.9087591171264648, "num_tokens": 6027198.0, "step": 3364 }, { "epoch": 0.544895150190268, "grad_norm": 23.280202865600586, "learning_rate": 4.553108808290156e-06, "loss": 0.5797, "mean_token_accuracy": 0.9260977506637573, "num_tokens": 6028994.0, "step": 3365 }, { "epoch": 0.5450570803983483, "grad_norm": 24.00787925720215, "learning_rate": 4.5514896373057e-06, "loss": 0.6157, "mean_token_accuracy": 0.90947225689888, "num_tokens": 6030793.0, "step": 3366 }, { "epoch": 0.5452190106064286, "grad_norm": 33.04943084716797, "learning_rate": 4.549870466321244e-06, "loss": 0.7738, "mean_token_accuracy": 0.9010291695594788, "num_tokens": 6032588.0, "step": 3367 }, { "epoch": 0.5453809408145089, "grad_norm": 19.426136016845703, "learning_rate": 4.548251295336788e-06, "loss": 0.5174, "mean_token_accuracy": 0.9386403858661652, "num_tokens": 6034377.0, "step": 3368 }, { "epoch": 0.5455428710225892, "grad_norm": 33.3045768737793, "learning_rate": 4.546632124352332e-06, "loss": 0.7405, "mean_token_accuracy": 0.9010841548442841, "num_tokens": 6036162.0, "step": 3369 }, { "epoch": 0.5457048012306696, "grad_norm": 22.850669860839844, "learning_rate": 4.545012953367876e-06, "loss": 0.5161, "mean_token_accuracy": 0.9254666268825531, "num_tokens": 6037956.0, "step": 3370 }, { "epoch": 0.5458667314387499, "grad_norm": 34.92522430419922, "learning_rate": 4.54339378238342e-06, "loss": 0.652, "mean_token_accuracy": 0.9084407687187195, "num_tokens": 6039750.0, "step": 3371 }, { "epoch": 0.5460286616468302, "grad_norm": 31.60854148864746, "learning_rate": 4.541774611398964e-06, "loss": 0.6056, "mean_token_accuracy": 0.9003778994083405, "num_tokens": 6041553.0, "step": 3372 }, { "epoch": 0.5461905918549105, "grad_norm": 27.703815460205078, "learning_rate": 4.540155440414509e-06, "loss": 0.7103, "mean_token_accuracy": 0.9090197384357452, "num_tokens": 6043351.0, "step": 3373 }, { "epoch": 0.5463525220629909, "grad_norm": 17.604507446289062, "learning_rate": 4.538536269430052e-06, "loss": 0.4945, "mean_token_accuracy": 0.925038605928421, "num_tokens": 6045142.0, "step": 3374 }, { "epoch": 0.5465144522710712, "grad_norm": 11.845253944396973, "learning_rate": 4.536917098445597e-06, "loss": 0.4554, "mean_token_accuracy": 0.9340918958187103, "num_tokens": 6046927.0, "step": 3375 }, { "epoch": 0.5466763824791515, "grad_norm": 30.648853302001953, "learning_rate": 4.53529792746114e-06, "loss": 0.6859, "mean_token_accuracy": 0.9020787477493286, "num_tokens": 6048725.0, "step": 3376 }, { "epoch": 0.5468383126872318, "grad_norm": 25.15959358215332, "learning_rate": 4.533678756476685e-06, "loss": 0.5399, "mean_token_accuracy": 0.9091269969940186, "num_tokens": 6050512.0, "step": 3377 }, { "epoch": 0.5470002428953121, "grad_norm": 33.98947525024414, "learning_rate": 4.532059585492228e-06, "loss": 0.732, "mean_token_accuracy": 0.8936116695404053, "num_tokens": 6052306.0, "step": 3378 }, { "epoch": 0.5471621731033924, "grad_norm": 32.97720718383789, "learning_rate": 4.530440414507773e-06, "loss": 0.7095, "mean_token_accuracy": 0.9016696214675903, "num_tokens": 6054103.0, "step": 3379 }, { "epoch": 0.5473241033114727, "grad_norm": 25.53755760192871, "learning_rate": 4.528821243523316e-06, "loss": 0.5488, "mean_token_accuracy": 0.9171883165836334, "num_tokens": 6055892.0, "step": 3380 }, { "epoch": 0.547486033519553, "grad_norm": 16.071685791015625, "learning_rate": 4.527202072538861e-06, "loss": 0.4976, "mean_token_accuracy": 0.9271321892738342, "num_tokens": 6057678.0, "step": 3381 }, { "epoch": 0.5476479637276334, "grad_norm": 26.60755729675293, "learning_rate": 4.525582901554404e-06, "loss": 0.7038, "mean_token_accuracy": 0.9217752516269684, "num_tokens": 6059471.0, "step": 3382 }, { "epoch": 0.5478098939357137, "grad_norm": 30.898523330688477, "learning_rate": 4.523963730569949e-06, "loss": 0.7523, "mean_token_accuracy": 0.9077968001365662, "num_tokens": 6061265.0, "step": 3383 }, { "epoch": 0.547971824143794, "grad_norm": 24.920360565185547, "learning_rate": 4.522344559585492e-06, "loss": 0.5478, "mean_token_accuracy": 0.9229063987731934, "num_tokens": 6063062.0, "step": 3384 }, { "epoch": 0.5481337543518744, "grad_norm": 30.06410026550293, "learning_rate": 4.520725388601037e-06, "loss": 0.6639, "mean_token_accuracy": 0.9201333820819855, "num_tokens": 6064849.0, "step": 3385 }, { "epoch": 0.5482956845599547, "grad_norm": 17.98684310913086, "learning_rate": 4.51910621761658e-06, "loss": 0.5257, "mean_token_accuracy": 0.9197037518024445, "num_tokens": 6066635.0, "step": 3386 }, { "epoch": 0.548457614768035, "grad_norm": 18.87660789489746, "learning_rate": 4.517487046632125e-06, "loss": 0.5243, "mean_token_accuracy": 0.9224673211574554, "num_tokens": 6068418.0, "step": 3387 }, { "epoch": 0.5486195449761153, "grad_norm": 20.49905776977539, "learning_rate": 4.515867875647668e-06, "loss": 0.5558, "mean_token_accuracy": 0.9176878929138184, "num_tokens": 6070210.0, "step": 3388 }, { "epoch": 0.5487814751841956, "grad_norm": 22.309688568115234, "learning_rate": 4.514248704663213e-06, "loss": 0.5803, "mean_token_accuracy": 0.9176195561885834, "num_tokens": 6072013.0, "step": 3389 }, { "epoch": 0.5489434053922759, "grad_norm": 14.70454216003418, "learning_rate": 4.512629533678756e-06, "loss": 0.4669, "mean_token_accuracy": 0.9347826242446899, "num_tokens": 6073801.0, "step": 3390 }, { "epoch": 0.5491053356003562, "grad_norm": 25.066940307617188, "learning_rate": 4.511010362694301e-06, "loss": 0.5904, "mean_token_accuracy": 0.9167752265930176, "num_tokens": 6075601.0, "step": 3391 }, { "epoch": 0.5492672658084365, "grad_norm": 17.751445770263672, "learning_rate": 4.509391191709845e-06, "loss": 0.5183, "mean_token_accuracy": 0.9216715395450592, "num_tokens": 6077380.0, "step": 3392 }, { "epoch": 0.5494291960165169, "grad_norm": 25.87751579284668, "learning_rate": 4.507772020725389e-06, "loss": 0.5472, "mean_token_accuracy": 0.9210945665836334, "num_tokens": 6079171.0, "step": 3393 }, { "epoch": 0.5495911262245972, "grad_norm": 23.470722198486328, "learning_rate": 4.506152849740933e-06, "loss": 0.5208, "mean_token_accuracy": 0.9152742624282837, "num_tokens": 6080977.0, "step": 3394 }, { "epoch": 0.5497530564326775, "grad_norm": 30.11971664428711, "learning_rate": 4.504533678756477e-06, "loss": 0.5745, "mean_token_accuracy": 0.9252873659133911, "num_tokens": 6082769.0, "step": 3395 }, { "epoch": 0.5499149866407579, "grad_norm": 21.674415588378906, "learning_rate": 4.502914507772021e-06, "loss": 0.5839, "mean_token_accuracy": 0.9148935973644257, "num_tokens": 6084563.0, "step": 3396 }, { "epoch": 0.5500769168488382, "grad_norm": 18.486682891845703, "learning_rate": 4.501295336787565e-06, "loss": 0.5036, "mean_token_accuracy": 0.9282407462596893, "num_tokens": 6086354.0, "step": 3397 }, { "epoch": 0.5502388470569185, "grad_norm": 26.99074363708496, "learning_rate": 4.499676165803109e-06, "loss": 0.5899, "mean_token_accuracy": 0.9072089195251465, "num_tokens": 6088147.0, "step": 3398 }, { "epoch": 0.5504007772649988, "grad_norm": 29.032562255859375, "learning_rate": 4.498056994818653e-06, "loss": 0.6014, "mean_token_accuracy": 0.9144144356250763, "num_tokens": 6089939.0, "step": 3399 }, { "epoch": 0.5505627074730791, "grad_norm": 20.895023345947266, "learning_rate": 4.496437823834197e-06, "loss": 0.5439, "mean_token_accuracy": 0.9237553477287292, "num_tokens": 6091728.0, "step": 3400 }, { "epoch": 0.5507246376811594, "grad_norm": 19.834373474121094, "learning_rate": 4.494818652849741e-06, "loss": 0.5193, "mean_token_accuracy": 0.9270557165145874, "num_tokens": 6093528.0, "step": 3401 }, { "epoch": 0.5508865678892397, "grad_norm": 29.816694259643555, "learning_rate": 4.493199481865285e-06, "loss": 0.792, "mean_token_accuracy": 0.9021425247192383, "num_tokens": 6095316.0, "step": 3402 }, { "epoch": 0.55104849809732, "grad_norm": 25.919754028320312, "learning_rate": 4.491580310880829e-06, "loss": 0.5545, "mean_token_accuracy": 0.9157004952430725, "num_tokens": 6097101.0, "step": 3403 }, { "epoch": 0.5512104283054003, "grad_norm": 16.491609573364258, "learning_rate": 4.489961139896373e-06, "loss": 0.4693, "mean_token_accuracy": 0.9252786040306091, "num_tokens": 6098894.0, "step": 3404 }, { "epoch": 0.5513723585134807, "grad_norm": 23.422494888305664, "learning_rate": 4.488341968911917e-06, "loss": 0.5706, "mean_token_accuracy": 0.9108880758285522, "num_tokens": 6100675.0, "step": 3405 }, { "epoch": 0.551534288721561, "grad_norm": 16.785755157470703, "learning_rate": 4.4867227979274614e-06, "loss": 0.4825, "mean_token_accuracy": 0.928893506526947, "num_tokens": 6102468.0, "step": 3406 }, { "epoch": 0.5516962189296414, "grad_norm": 16.99287986755371, "learning_rate": 4.485103626943005e-06, "loss": 0.4706, "mean_token_accuracy": 0.9289297759532928, "num_tokens": 6104261.0, "step": 3407 }, { "epoch": 0.5518581491377217, "grad_norm": 22.296308517456055, "learning_rate": 4.4834844559585495e-06, "loss": 0.5793, "mean_token_accuracy": 0.9225463271141052, "num_tokens": 6106044.0, "step": 3408 }, { "epoch": 0.552020079345802, "grad_norm": 29.526416778564453, "learning_rate": 4.481865284974093e-06, "loss": 0.5857, "mean_token_accuracy": 0.9151678681373596, "num_tokens": 6107839.0, "step": 3409 }, { "epoch": 0.5521820095538823, "grad_norm": 23.606788635253906, "learning_rate": 4.4802461139896375e-06, "loss": 0.5456, "mean_token_accuracy": 0.9253092110157013, "num_tokens": 6109632.0, "step": 3410 }, { "epoch": 0.5523439397619626, "grad_norm": 31.241962432861328, "learning_rate": 4.478626943005182e-06, "loss": 0.7977, "mean_token_accuracy": 0.9171842634677887, "num_tokens": 6111422.0, "step": 3411 }, { "epoch": 0.5525058699700429, "grad_norm": 25.66417121887207, "learning_rate": 4.4770077720207255e-06, "loss": 0.596, "mean_token_accuracy": 0.9096779227256775, "num_tokens": 6113211.0, "step": 3412 }, { "epoch": 0.5526678001781232, "grad_norm": 32.516788482666016, "learning_rate": 4.47538860103627e-06, "loss": 0.7383, "mean_token_accuracy": 0.9028212130069733, "num_tokens": 6115000.0, "step": 3413 }, { "epoch": 0.5528297303862035, "grad_norm": 19.48625373840332, "learning_rate": 4.4737694300518135e-06, "loss": 0.4986, "mean_token_accuracy": 0.9206026494503021, "num_tokens": 6116789.0, "step": 3414 }, { "epoch": 0.5529916605942838, "grad_norm": 30.01239013671875, "learning_rate": 4.472150259067358e-06, "loss": 0.6134, "mean_token_accuracy": 0.9178589880466461, "num_tokens": 6118580.0, "step": 3415 }, { "epoch": 0.5531535908023641, "grad_norm": 36.29997253417969, "learning_rate": 4.4705310880829016e-06, "loss": 0.8181, "mean_token_accuracy": 0.8907604217529297, "num_tokens": 6120385.0, "step": 3416 }, { "epoch": 0.5533155210104445, "grad_norm": 28.68462371826172, "learning_rate": 4.468911917098446e-06, "loss": 0.6218, "mean_token_accuracy": 0.9115812182426453, "num_tokens": 6122179.0, "step": 3417 }, { "epoch": 0.5534774512185248, "grad_norm": 24.67974090576172, "learning_rate": 4.46729274611399e-06, "loss": 0.6111, "mean_token_accuracy": 0.9260912537574768, "num_tokens": 6123975.0, "step": 3418 }, { "epoch": 0.5536393814266052, "grad_norm": 28.53451156616211, "learning_rate": 4.465673575129534e-06, "loss": 0.69, "mean_token_accuracy": 0.9137205183506012, "num_tokens": 6125754.0, "step": 3419 }, { "epoch": 0.5538013116346855, "grad_norm": 28.087703704833984, "learning_rate": 4.464054404145078e-06, "loss": 0.7032, "mean_token_accuracy": 0.9165835082530975, "num_tokens": 6127542.0, "step": 3420 }, { "epoch": 0.5539632418427658, "grad_norm": 27.522611618041992, "learning_rate": 4.462435233160622e-06, "loss": 0.5284, "mean_token_accuracy": 0.9197037518024445, "num_tokens": 6129328.0, "step": 3421 }, { "epoch": 0.5541251720508461, "grad_norm": 31.729354858398438, "learning_rate": 4.460816062176166e-06, "loss": 0.6141, "mean_token_accuracy": 0.9142156839370728, "num_tokens": 6131120.0, "step": 3422 }, { "epoch": 0.5542871022589264, "grad_norm": 30.813610076904297, "learning_rate": 4.45919689119171e-06, "loss": 0.735, "mean_token_accuracy": 0.9048126935958862, "num_tokens": 6132916.0, "step": 3423 }, { "epoch": 0.5544490324670067, "grad_norm": 33.84776306152344, "learning_rate": 4.457577720207254e-06, "loss": 0.583, "mean_token_accuracy": 0.9138047397136688, "num_tokens": 6134706.0, "step": 3424 }, { "epoch": 0.554610962675087, "grad_norm": 29.813026428222656, "learning_rate": 4.455958549222798e-06, "loss": 0.658, "mean_token_accuracy": 0.9007028937339783, "num_tokens": 6136490.0, "step": 3425 }, { "epoch": 0.5547728928831673, "grad_norm": 21.390344619750977, "learning_rate": 4.454339378238342e-06, "loss": 0.5099, "mean_token_accuracy": 0.9166666865348816, "num_tokens": 6138278.0, "step": 3426 }, { "epoch": 0.5549348230912476, "grad_norm": 35.12249755859375, "learning_rate": 4.452720207253887e-06, "loss": 0.6242, "mean_token_accuracy": 0.9117614924907684, "num_tokens": 6140061.0, "step": 3427 }, { "epoch": 0.555096753299328, "grad_norm": 27.178850173950195, "learning_rate": 4.451101036269431e-06, "loss": 0.6893, "mean_token_accuracy": 0.9130477011203766, "num_tokens": 6141860.0, "step": 3428 }, { "epoch": 0.5552586835074083, "grad_norm": 25.129758834838867, "learning_rate": 4.449481865284975e-06, "loss": 0.5626, "mean_token_accuracy": 0.9172320365905762, "num_tokens": 6143649.0, "step": 3429 }, { "epoch": 0.5554206137154887, "grad_norm": 13.15129280090332, "learning_rate": 4.447862694300519e-06, "loss": 0.4767, "mean_token_accuracy": 0.929921567440033, "num_tokens": 6145432.0, "step": 3430 }, { "epoch": 0.555582543923569, "grad_norm": 35.81571578979492, "learning_rate": 4.446243523316063e-06, "loss": 0.7853, "mean_token_accuracy": 0.8895420432090759, "num_tokens": 6147224.0, "step": 3431 }, { "epoch": 0.5557444741316493, "grad_norm": 24.94693374633789, "learning_rate": 4.444624352331607e-06, "loss": 0.7703, "mean_token_accuracy": 0.9133601486682892, "num_tokens": 6149025.0, "step": 3432 }, { "epoch": 0.5559064043397296, "grad_norm": 33.639076232910156, "learning_rate": 4.443005181347151e-06, "loss": 0.6128, "mean_token_accuracy": 0.9141661822795868, "num_tokens": 6150817.0, "step": 3433 }, { "epoch": 0.5560683345478099, "grad_norm": 26.695627212524414, "learning_rate": 4.441386010362695e-06, "loss": 0.6054, "mean_token_accuracy": 0.9193286001682281, "num_tokens": 6152614.0, "step": 3434 }, { "epoch": 0.5562302647558902, "grad_norm": 22.153039932250977, "learning_rate": 4.439766839378239e-06, "loss": 0.4846, "mean_token_accuracy": 0.9326691031455994, "num_tokens": 6154408.0, "step": 3435 }, { "epoch": 0.5563921949639705, "grad_norm": 34.817508697509766, "learning_rate": 4.438147668393783e-06, "loss": 0.6228, "mean_token_accuracy": 0.8978950083255768, "num_tokens": 6156194.0, "step": 3436 }, { "epoch": 0.5565541251720508, "grad_norm": 30.309364318847656, "learning_rate": 4.436528497409327e-06, "loss": 0.6014, "mean_token_accuracy": 0.9158846139907837, "num_tokens": 6157990.0, "step": 3437 }, { "epoch": 0.5567160553801311, "grad_norm": 30.47202491760254, "learning_rate": 4.434909326424871e-06, "loss": 0.7308, "mean_token_accuracy": 0.8949397504329681, "num_tokens": 6159783.0, "step": 3438 }, { "epoch": 0.5568779855882114, "grad_norm": 22.867259979248047, "learning_rate": 4.433290155440415e-06, "loss": 0.5414, "mean_token_accuracy": 0.9116471707820892, "num_tokens": 6161578.0, "step": 3439 }, { "epoch": 0.5570399157962918, "grad_norm": 23.13840103149414, "learning_rate": 4.431670984455959e-06, "loss": 0.5589, "mean_token_accuracy": 0.9164775907993317, "num_tokens": 6163375.0, "step": 3440 }, { "epoch": 0.5572018460043722, "grad_norm": 16.12798309326172, "learning_rate": 4.430051813471503e-06, "loss": 0.5629, "mean_token_accuracy": 0.9260278642177582, "num_tokens": 6165171.0, "step": 3441 }, { "epoch": 0.5573637762124525, "grad_norm": 33.908668518066406, "learning_rate": 4.428432642487047e-06, "loss": 0.6629, "mean_token_accuracy": 0.8948566019535065, "num_tokens": 6166978.0, "step": 3442 }, { "epoch": 0.5575257064205328, "grad_norm": 35.57543182373047, "learning_rate": 4.426813471502591e-06, "loss": 0.6658, "mean_token_accuracy": 0.9033960998058319, "num_tokens": 6168780.0, "step": 3443 }, { "epoch": 0.5576876366286131, "grad_norm": 26.46402931213379, "learning_rate": 4.425194300518136e-06, "loss": 0.6183, "mean_token_accuracy": 0.922982782125473, "num_tokens": 6170578.0, "step": 3444 }, { "epoch": 0.5578495668366934, "grad_norm": 25.53522491455078, "learning_rate": 4.423575129533679e-06, "loss": 0.6804, "mean_token_accuracy": 0.9198103249073029, "num_tokens": 6172364.0, "step": 3445 }, { "epoch": 0.5580114970447737, "grad_norm": 24.76997184753418, "learning_rate": 4.421955958549224e-06, "loss": 0.5994, "mean_token_accuracy": 0.9176018536090851, "num_tokens": 6174143.0, "step": 3446 }, { "epoch": 0.558173427252854, "grad_norm": 28.09873390197754, "learning_rate": 4.420336787564767e-06, "loss": 0.6365, "mean_token_accuracy": 0.9221028983592987, "num_tokens": 6175938.0, "step": 3447 }, { "epoch": 0.5583353574609343, "grad_norm": 30.89769744873047, "learning_rate": 4.418717616580312e-06, "loss": 0.6437, "mean_token_accuracy": 0.9095440208911896, "num_tokens": 6177735.0, "step": 3448 }, { "epoch": 0.5584972876690146, "grad_norm": 40.70014572143555, "learning_rate": 4.417098445595855e-06, "loss": 0.9704, "mean_token_accuracy": 0.8758874237537384, "num_tokens": 6179528.0, "step": 3449 }, { "epoch": 0.5586592178770949, "grad_norm": 31.241683959960938, "learning_rate": 4.4154792746114e-06, "loss": 0.6936, "mean_token_accuracy": 0.8986429274082184, "num_tokens": 6181306.0, "step": 3450 }, { "epoch": 0.5588211480851752, "grad_norm": 22.433712005615234, "learning_rate": 4.413860103626943e-06, "loss": 0.5788, "mean_token_accuracy": 0.9200661182403564, "num_tokens": 6183092.0, "step": 3451 }, { "epoch": 0.5589830782932556, "grad_norm": 35.89522171020508, "learning_rate": 4.412240932642488e-06, "loss": 0.773, "mean_token_accuracy": 0.9011110067367554, "num_tokens": 6184877.0, "step": 3452 }, { "epoch": 0.559145008501336, "grad_norm": 18.537832260131836, "learning_rate": 4.410621761658031e-06, "loss": 0.5403, "mean_token_accuracy": 0.9179058969020844, "num_tokens": 6186657.0, "step": 3453 }, { "epoch": 0.5593069387094163, "grad_norm": 23.643512725830078, "learning_rate": 4.409002590673576e-06, "loss": 0.5729, "mean_token_accuracy": 0.9207285344600677, "num_tokens": 6188446.0, "step": 3454 }, { "epoch": 0.5594688689174966, "grad_norm": 35.087486267089844, "learning_rate": 4.407383419689119e-06, "loss": 0.5701, "mean_token_accuracy": 0.9265037775039673, "num_tokens": 6190231.0, "step": 3455 }, { "epoch": 0.5596307991255769, "grad_norm": 20.429126739501953, "learning_rate": 4.405764248704664e-06, "loss": 0.547, "mean_token_accuracy": 0.9333432614803314, "num_tokens": 6192027.0, "step": 3456 }, { "epoch": 0.5597927293336572, "grad_norm": 25.923635482788086, "learning_rate": 4.404145077720207e-06, "loss": 0.7166, "mean_token_accuracy": 0.909731537103653, "num_tokens": 6193828.0, "step": 3457 }, { "epoch": 0.5599546595417375, "grad_norm": 37.788047790527344, "learning_rate": 4.402525906735752e-06, "loss": 0.6852, "mean_token_accuracy": 0.8909301459789276, "num_tokens": 6195625.0, "step": 3458 }, { "epoch": 0.5601165897498178, "grad_norm": 16.530229568481445, "learning_rate": 4.400906735751295e-06, "loss": 0.4644, "mean_token_accuracy": 0.9288030862808228, "num_tokens": 6197418.0, "step": 3459 }, { "epoch": 0.5602785199578981, "grad_norm": 29.025163650512695, "learning_rate": 4.39928756476684e-06, "loss": 0.6781, "mean_token_accuracy": 0.9032630920410156, "num_tokens": 6199209.0, "step": 3460 }, { "epoch": 0.5604404501659784, "grad_norm": 28.432897567749023, "learning_rate": 4.397668393782384e-06, "loss": 0.625, "mean_token_accuracy": 0.9061205685138702, "num_tokens": 6200999.0, "step": 3461 }, { "epoch": 0.5606023803740587, "grad_norm": 15.997380256652832, "learning_rate": 4.396049222797928e-06, "loss": 0.4929, "mean_token_accuracy": 0.9308949708938599, "num_tokens": 6202786.0, "step": 3462 }, { "epoch": 0.560764310582139, "grad_norm": 34.19835662841797, "learning_rate": 4.394430051813472e-06, "loss": 0.8442, "mean_token_accuracy": 0.8912703394889832, "num_tokens": 6204575.0, "step": 3463 }, { "epoch": 0.5609262407902195, "grad_norm": 20.301677703857422, "learning_rate": 4.392810880829016e-06, "loss": 0.5913, "mean_token_accuracy": 0.9235082268714905, "num_tokens": 6206362.0, "step": 3464 }, { "epoch": 0.5610881709982998, "grad_norm": 15.669766426086426, "learning_rate": 4.39119170984456e-06, "loss": 0.4834, "mean_token_accuracy": 0.9304511249065399, "num_tokens": 6208147.0, "step": 3465 }, { "epoch": 0.5612501012063801, "grad_norm": 26.02204132080078, "learning_rate": 4.389572538860104e-06, "loss": 0.6128, "mean_token_accuracy": 0.9233216643333435, "num_tokens": 6209946.0, "step": 3466 }, { "epoch": 0.5614120314144604, "grad_norm": 24.204301834106445, "learning_rate": 4.387953367875648e-06, "loss": 0.5841, "mean_token_accuracy": 0.9177290201187134, "num_tokens": 6211737.0, "step": 3467 }, { "epoch": 0.5615739616225407, "grad_norm": 28.54127311706543, "learning_rate": 4.386334196891192e-06, "loss": 0.6131, "mean_token_accuracy": 0.9066092073917389, "num_tokens": 6213526.0, "step": 3468 }, { "epoch": 0.561735891830621, "grad_norm": 16.19049072265625, "learning_rate": 4.384715025906736e-06, "loss": 0.4689, "mean_token_accuracy": 0.9327787756919861, "num_tokens": 6215321.0, "step": 3469 }, { "epoch": 0.5618978220387013, "grad_norm": 31.826154708862305, "learning_rate": 4.38309585492228e-06, "loss": 0.8545, "mean_token_accuracy": 0.8938117325305939, "num_tokens": 6217106.0, "step": 3470 }, { "epoch": 0.5620597522467816, "grad_norm": 27.895505905151367, "learning_rate": 4.381476683937824e-06, "loss": 0.597, "mean_token_accuracy": 0.9089610874652863, "num_tokens": 6218893.0, "step": 3471 }, { "epoch": 0.5622216824548619, "grad_norm": 21.718050003051758, "learning_rate": 4.379857512953368e-06, "loss": 0.5366, "mean_token_accuracy": 0.9270833432674408, "num_tokens": 6220693.0, "step": 3472 }, { "epoch": 0.5623836126629422, "grad_norm": 30.873859405517578, "learning_rate": 4.3782383419689124e-06, "loss": 0.6934, "mean_token_accuracy": 0.9107142984867096, "num_tokens": 6222485.0, "step": 3473 }, { "epoch": 0.5625455428710225, "grad_norm": 32.97699737548828, "learning_rate": 4.376619170984456e-06, "loss": 0.6975, "mean_token_accuracy": 0.9064815044403076, "num_tokens": 6224276.0, "step": 3474 }, { "epoch": 0.562707473079103, "grad_norm": 25.58026695251465, "learning_rate": 4.3750000000000005e-06, "loss": 0.5816, "mean_token_accuracy": 0.9146021008491516, "num_tokens": 6226080.0, "step": 3475 }, { "epoch": 0.5628694032871833, "grad_norm": 25.022464752197266, "learning_rate": 4.373380829015544e-06, "loss": 0.6368, "mean_token_accuracy": 0.9065589904785156, "num_tokens": 6227870.0, "step": 3476 }, { "epoch": 0.5630313334952636, "grad_norm": 27.34522819519043, "learning_rate": 4.3717616580310885e-06, "loss": 0.6149, "mean_token_accuracy": 0.9079061448574066, "num_tokens": 6229654.0, "step": 3477 }, { "epoch": 0.5631932637033439, "grad_norm": 25.846668243408203, "learning_rate": 4.370142487046632e-06, "loss": 0.6212, "mean_token_accuracy": 0.9098980128765106, "num_tokens": 6231444.0, "step": 3478 }, { "epoch": 0.5633551939114242, "grad_norm": 25.948686599731445, "learning_rate": 4.3685233160621765e-06, "loss": 0.5554, "mean_token_accuracy": 0.9290320873260498, "num_tokens": 6233239.0, "step": 3479 }, { "epoch": 0.5635171241195045, "grad_norm": 26.796560287475586, "learning_rate": 4.366904145077721e-06, "loss": 0.6247, "mean_token_accuracy": 0.9089736044406891, "num_tokens": 6235035.0, "step": 3480 }, { "epoch": 0.5636790543275848, "grad_norm": 24.360271453857422, "learning_rate": 4.3652849740932645e-06, "loss": 0.6114, "mean_token_accuracy": 0.9210199117660522, "num_tokens": 6236813.0, "step": 3481 }, { "epoch": 0.5638409845356651, "grad_norm": 21.627534866333008, "learning_rate": 4.363665803108809e-06, "loss": 0.57, "mean_token_accuracy": 0.9214646518230438, "num_tokens": 6238592.0, "step": 3482 }, { "epoch": 0.5640029147437454, "grad_norm": 39.132568359375, "learning_rate": 4.3620466321243526e-06, "loss": 0.8255, "mean_token_accuracy": 0.8964584767818451, "num_tokens": 6240384.0, "step": 3483 }, { "epoch": 0.5641648449518257, "grad_norm": 24.986648559570312, "learning_rate": 4.360427461139897e-06, "loss": 0.6182, "mean_token_accuracy": 0.9252451062202454, "num_tokens": 6242176.0, "step": 3484 }, { "epoch": 0.564326775159906, "grad_norm": 24.716169357299805, "learning_rate": 4.358808290155441e-06, "loss": 0.592, "mean_token_accuracy": 0.9246070981025696, "num_tokens": 6243980.0, "step": 3485 }, { "epoch": 0.5644887053679863, "grad_norm": 19.94791030883789, "learning_rate": 4.357189119170985e-06, "loss": 0.5791, "mean_token_accuracy": 0.915575385093689, "num_tokens": 6245776.0, "step": 3486 }, { "epoch": 0.5646506355760668, "grad_norm": 20.240619659423828, "learning_rate": 4.355569948186529e-06, "loss": 0.5365, "mean_token_accuracy": 0.9338199496269226, "num_tokens": 6247560.0, "step": 3487 }, { "epoch": 0.5648125657841471, "grad_norm": 29.253957748413086, "learning_rate": 4.353950777202073e-06, "loss": 0.7842, "mean_token_accuracy": 0.8954933881759644, "num_tokens": 6249350.0, "step": 3488 }, { "epoch": 0.5649744959922274, "grad_norm": 19.998720169067383, "learning_rate": 4.352331606217617e-06, "loss": 0.4862, "mean_token_accuracy": 0.9321800470352173, "num_tokens": 6251142.0, "step": 3489 }, { "epoch": 0.5651364262003077, "grad_norm": 13.082011222839355, "learning_rate": 4.350712435233161e-06, "loss": 0.4887, "mean_token_accuracy": 0.9304466843605042, "num_tokens": 6252927.0, "step": 3490 }, { "epoch": 0.565298356408388, "grad_norm": 20.88640785217285, "learning_rate": 4.349093264248705e-06, "loss": 0.5497, "mean_token_accuracy": 0.9270833432674408, "num_tokens": 6254727.0, "step": 3491 }, { "epoch": 0.5654602866164683, "grad_norm": 18.046716690063477, "learning_rate": 4.347474093264249e-06, "loss": 0.5155, "mean_token_accuracy": 0.9285494089126587, "num_tokens": 6256518.0, "step": 3492 }, { "epoch": 0.5656222168245486, "grad_norm": 25.79833984375, "learning_rate": 4.345854922279793e-06, "loss": 0.5163, "mean_token_accuracy": 0.9246582388877869, "num_tokens": 6258308.0, "step": 3493 }, { "epoch": 0.5657841470326289, "grad_norm": 33.047569274902344, "learning_rate": 4.344235751295337e-06, "loss": 0.6758, "mean_token_accuracy": 0.9082609713077545, "num_tokens": 6260104.0, "step": 3494 }, { "epoch": 0.5659460772407092, "grad_norm": 25.285409927368164, "learning_rate": 4.342616580310881e-06, "loss": 0.569, "mean_token_accuracy": 0.9205268919467926, "num_tokens": 6261893.0, "step": 3495 }, { "epoch": 0.5661080074487895, "grad_norm": 27.62537384033203, "learning_rate": 4.340997409326425e-06, "loss": 0.5388, "mean_token_accuracy": 0.9231898188591003, "num_tokens": 6263691.0, "step": 3496 }, { "epoch": 0.5662699376568698, "grad_norm": 31.901121139526367, "learning_rate": 4.339378238341969e-06, "loss": 0.7459, "mean_token_accuracy": 0.8994689583778381, "num_tokens": 6265479.0, "step": 3497 }, { "epoch": 0.5664318678649503, "grad_norm": 28.196399688720703, "learning_rate": 4.337759067357513e-06, "loss": 0.6552, "mean_token_accuracy": 0.9097830355167389, "num_tokens": 6267268.0, "step": 3498 }, { "epoch": 0.5665937980730306, "grad_norm": 18.343788146972656, "learning_rate": 4.336139896373058e-06, "loss": 0.4987, "mean_token_accuracy": 0.929848313331604, "num_tokens": 6269065.0, "step": 3499 }, { "epoch": 0.5667557282811109, "grad_norm": 19.563533782958984, "learning_rate": 4.334520725388601e-06, "loss": 0.5002, "mean_token_accuracy": 0.9246068596839905, "num_tokens": 6270856.0, "step": 3500 }, { "epoch": 0.5669176584891912, "grad_norm": 26.151309967041016, "learning_rate": 4.332901554404146e-06, "loss": 0.5479, "mean_token_accuracy": 0.9213137030601501, "num_tokens": 6272645.0, "step": 3501 }, { "epoch": 0.5670795886972715, "grad_norm": 22.066959381103516, "learning_rate": 4.331282383419689e-06, "loss": 0.5423, "mean_token_accuracy": 0.9283071458339691, "num_tokens": 6274436.0, "step": 3502 }, { "epoch": 0.5672415189053518, "grad_norm": 34.8818473815918, "learning_rate": 4.329663212435234e-06, "loss": 0.5935, "mean_token_accuracy": 0.907052606344223, "num_tokens": 6276226.0, "step": 3503 }, { "epoch": 0.5674034491134321, "grad_norm": 27.884471893310547, "learning_rate": 4.328044041450777e-06, "loss": 0.6415, "mean_token_accuracy": 0.9047606289386749, "num_tokens": 6278011.0, "step": 3504 }, { "epoch": 0.5675653793215124, "grad_norm": 30.48784637451172, "learning_rate": 4.326424870466322e-06, "loss": 0.7038, "mean_token_accuracy": 0.9084407687187195, "num_tokens": 6279805.0, "step": 3505 }, { "epoch": 0.5677273095295927, "grad_norm": 24.371286392211914, "learning_rate": 4.324805699481865e-06, "loss": 0.5831, "mean_token_accuracy": 0.9187375009059906, "num_tokens": 6281589.0, "step": 3506 }, { "epoch": 0.567889239737673, "grad_norm": 19.476707458496094, "learning_rate": 4.32318652849741e-06, "loss": 0.4861, "mean_token_accuracy": 0.9298029541969299, "num_tokens": 6283386.0, "step": 3507 }, { "epoch": 0.5680511699457533, "grad_norm": 29.63750457763672, "learning_rate": 4.321567357512953e-06, "loss": 0.69, "mean_token_accuracy": 0.9135036468505859, "num_tokens": 6285175.0, "step": 3508 }, { "epoch": 0.5682131001538338, "grad_norm": 25.0279483795166, "learning_rate": 4.319948186528498e-06, "loss": 0.5705, "mean_token_accuracy": 0.9130310118198395, "num_tokens": 6286964.0, "step": 3509 }, { "epoch": 0.5683750303619141, "grad_norm": 33.23414993286133, "learning_rate": 4.318329015544041e-06, "loss": 0.8755, "mean_token_accuracy": 0.8847853541374207, "num_tokens": 6288752.0, "step": 3510 }, { "epoch": 0.5685369605699944, "grad_norm": 16.57394027709961, "learning_rate": 4.316709844559586e-06, "loss": 0.4728, "mean_token_accuracy": 0.9356481730937958, "num_tokens": 6290543.0, "step": 3511 }, { "epoch": 0.5686988907780747, "grad_norm": 34.508941650390625, "learning_rate": 4.315090673575129e-06, "loss": 0.788, "mean_token_accuracy": 0.9035409688949585, "num_tokens": 6292335.0, "step": 3512 }, { "epoch": 0.568860820986155, "grad_norm": 20.83152961730957, "learning_rate": 4.313471502590674e-06, "loss": 0.5101, "mean_token_accuracy": 0.9249590635299683, "num_tokens": 6294127.0, "step": 3513 }, { "epoch": 0.5690227511942353, "grad_norm": 25.568204879760742, "learning_rate": 4.311852331606217e-06, "loss": 0.6219, "mean_token_accuracy": 0.9015345275402069, "num_tokens": 6295913.0, "step": 3514 }, { "epoch": 0.5691846814023156, "grad_norm": 26.2885684967041, "learning_rate": 4.310233160621762e-06, "loss": 0.6793, "mean_token_accuracy": 0.9024864137172699, "num_tokens": 6297703.0, "step": 3515 }, { "epoch": 0.5693466116103959, "grad_norm": 26.928274154663086, "learning_rate": 4.308613989637305e-06, "loss": 0.5462, "mean_token_accuracy": 0.9094594419002533, "num_tokens": 6299503.0, "step": 3516 }, { "epoch": 0.5695085418184762, "grad_norm": 17.5095272064209, "learning_rate": 4.30699481865285e-06, "loss": 0.5309, "mean_token_accuracy": 0.930820107460022, "num_tokens": 6301290.0, "step": 3517 }, { "epoch": 0.5696704720265565, "grad_norm": 27.616561889648438, "learning_rate": 4.305375647668394e-06, "loss": 0.6353, "mean_token_accuracy": 0.9178043603897095, "num_tokens": 6303094.0, "step": 3518 }, { "epoch": 0.5698324022346368, "grad_norm": 26.010149002075195, "learning_rate": 4.303756476683938e-06, "loss": 0.6485, "mean_token_accuracy": 0.9111787378787994, "num_tokens": 6304888.0, "step": 3519 }, { "epoch": 0.5699943324427172, "grad_norm": 31.747882843017578, "learning_rate": 4.302137305699482e-06, "loss": 0.618, "mean_token_accuracy": 0.9105128347873688, "num_tokens": 6306680.0, "step": 3520 }, { "epoch": 0.5701562626507976, "grad_norm": 33.44720458984375, "learning_rate": 4.300518134715026e-06, "loss": 0.6993, "mean_token_accuracy": 0.9088472425937653, "num_tokens": 6308483.0, "step": 3521 }, { "epoch": 0.5703181928588779, "grad_norm": 25.704713821411133, "learning_rate": 4.29889896373057e-06, "loss": 0.6008, "mean_token_accuracy": 0.9219229817390442, "num_tokens": 6310277.0, "step": 3522 }, { "epoch": 0.5704801230669582, "grad_norm": 29.204517364501953, "learning_rate": 4.297279792746114e-06, "loss": 0.6062, "mean_token_accuracy": 0.9158229231834412, "num_tokens": 6312073.0, "step": 3523 }, { "epoch": 0.5706420532750385, "grad_norm": 21.058008193969727, "learning_rate": 4.295660621761658e-06, "loss": 0.5334, "mean_token_accuracy": 0.9250925779342651, "num_tokens": 6313852.0, "step": 3524 }, { "epoch": 0.5708039834831188, "grad_norm": 31.2490291595459, "learning_rate": 4.294041450777203e-06, "loss": 0.6724, "mean_token_accuracy": 0.9077968001365662, "num_tokens": 6315646.0, "step": 3525 }, { "epoch": 0.5709659136911991, "grad_norm": 17.613039016723633, "learning_rate": 4.292422279792746e-06, "loss": 0.4864, "mean_token_accuracy": 0.9283527433872223, "num_tokens": 6317438.0, "step": 3526 }, { "epoch": 0.5711278438992794, "grad_norm": 21.228052139282227, "learning_rate": 4.290803108808291e-06, "loss": 0.5421, "mean_token_accuracy": 0.9190140962600708, "num_tokens": 6319234.0, "step": 3527 }, { "epoch": 0.5712897741073597, "grad_norm": 34.676734924316406, "learning_rate": 4.2891839378238344e-06, "loss": 0.7331, "mean_token_accuracy": 0.9099942147731781, "num_tokens": 6321034.0, "step": 3528 }, { "epoch": 0.57145170431544, "grad_norm": 30.754173278808594, "learning_rate": 4.287564766839379e-06, "loss": 0.5841, "mean_token_accuracy": 0.9189201891422272, "num_tokens": 6322841.0, "step": 3529 }, { "epoch": 0.5716136345235203, "grad_norm": 21.248600006103516, "learning_rate": 4.2859455958549225e-06, "loss": 0.5075, "mean_token_accuracy": 0.9264705777168274, "num_tokens": 6324625.0, "step": 3530 }, { "epoch": 0.5717755647316006, "grad_norm": 22.567075729370117, "learning_rate": 4.284326424870467e-06, "loss": 0.6122, "mean_token_accuracy": 0.9266505837440491, "num_tokens": 6326410.0, "step": 3531 }, { "epoch": 0.571937494939681, "grad_norm": 22.986690521240234, "learning_rate": 4.282707253886011e-06, "loss": 0.5593, "mean_token_accuracy": 0.9095434844493866, "num_tokens": 6328209.0, "step": 3532 }, { "epoch": 0.5720994251477614, "grad_norm": 22.414566040039062, "learning_rate": 4.281088082901555e-06, "loss": 0.5829, "mean_token_accuracy": 0.9151932895183563, "num_tokens": 6330004.0, "step": 3533 }, { "epoch": 0.5722613553558417, "grad_norm": 23.611675262451172, "learning_rate": 4.279468911917099e-06, "loss": 0.5981, "mean_token_accuracy": 0.9219858050346375, "num_tokens": 6331798.0, "step": 3534 }, { "epoch": 0.572423285563922, "grad_norm": 16.818462371826172, "learning_rate": 4.277849740932643e-06, "loss": 0.491, "mean_token_accuracy": 0.9310256242752075, "num_tokens": 6333590.0, "step": 3535 }, { "epoch": 0.5725852157720023, "grad_norm": 24.466516494750977, "learning_rate": 4.276230569948187e-06, "loss": 0.6069, "mean_token_accuracy": 0.9213924705982208, "num_tokens": 6335382.0, "step": 3536 }, { "epoch": 0.5727471459800826, "grad_norm": 31.244674682617188, "learning_rate": 4.274611398963731e-06, "loss": 0.6527, "mean_token_accuracy": 0.9117632210254669, "num_tokens": 6337177.0, "step": 3537 }, { "epoch": 0.5729090761881629, "grad_norm": 18.467866897583008, "learning_rate": 4.272992227979275e-06, "loss": 0.5646, "mean_token_accuracy": 0.9163140058517456, "num_tokens": 6338964.0, "step": 3538 }, { "epoch": 0.5730710063962432, "grad_norm": 21.92582130432129, "learning_rate": 4.271373056994819e-06, "loss": 0.557, "mean_token_accuracy": 0.9218527674674988, "num_tokens": 6340757.0, "step": 3539 }, { "epoch": 0.5732329366043235, "grad_norm": 13.40427303314209, "learning_rate": 4.2697538860103634e-06, "loss": 0.4984, "mean_token_accuracy": 0.9328358173370361, "num_tokens": 6342537.0, "step": 3540 }, { "epoch": 0.5733948668124038, "grad_norm": 25.579177856445312, "learning_rate": 4.268134715025907e-06, "loss": 0.6601, "mean_token_accuracy": 0.9215202629566193, "num_tokens": 6344331.0, "step": 3541 }, { "epoch": 0.5735567970204841, "grad_norm": 21.58597755432129, "learning_rate": 4.2665155440414515e-06, "loss": 0.4944, "mean_token_accuracy": 0.9265734255313873, "num_tokens": 6346129.0, "step": 3542 }, { "epoch": 0.5737187272285645, "grad_norm": 22.14926528930664, "learning_rate": 4.264896373056995e-06, "loss": 0.5825, "mean_token_accuracy": 0.9255260229110718, "num_tokens": 6347910.0, "step": 3543 }, { "epoch": 0.5738806574366448, "grad_norm": 26.317655563354492, "learning_rate": 4.2632772020725395e-06, "loss": 0.6637, "mean_token_accuracy": 0.913891464471817, "num_tokens": 6349701.0, "step": 3544 }, { "epoch": 0.5740425876447252, "grad_norm": 28.914287567138672, "learning_rate": 4.261658031088083e-06, "loss": 0.7843, "mean_token_accuracy": 0.9022997617721558, "num_tokens": 6351490.0, "step": 3545 }, { "epoch": 0.5742045178528055, "grad_norm": 18.756023406982422, "learning_rate": 4.2600388601036275e-06, "loss": 0.5361, "mean_token_accuracy": 0.9238255023956299, "num_tokens": 6353276.0, "step": 3546 }, { "epoch": 0.5743664480608858, "grad_norm": 32.8216438293457, "learning_rate": 4.258419689119171e-06, "loss": 0.6628, "mean_token_accuracy": 0.9124149680137634, "num_tokens": 6355075.0, "step": 3547 }, { "epoch": 0.5745283782689661, "grad_norm": 22.16446304321289, "learning_rate": 4.2568005181347155e-06, "loss": 0.5172, "mean_token_accuracy": 0.9232880473136902, "num_tokens": 6356861.0, "step": 3548 }, { "epoch": 0.5746903084770464, "grad_norm": 24.55711555480957, "learning_rate": 4.255181347150259e-06, "loss": 0.6189, "mean_token_accuracy": 0.923116147518158, "num_tokens": 6358646.0, "step": 3549 }, { "epoch": 0.5748522386851267, "grad_norm": 29.500802993774414, "learning_rate": 4.2535621761658036e-06, "loss": 0.593, "mean_token_accuracy": 0.9172928631305695, "num_tokens": 6360437.0, "step": 3550 }, { "epoch": 0.575014168893207, "grad_norm": 15.286648750305176, "learning_rate": 4.251943005181348e-06, "loss": 0.4755, "mean_token_accuracy": 0.9306266009807587, "num_tokens": 6362223.0, "step": 3551 }, { "epoch": 0.5751760991012873, "grad_norm": 25.840396881103516, "learning_rate": 4.250323834196892e-06, "loss": 0.6326, "mean_token_accuracy": 0.9208469092845917, "num_tokens": 6364013.0, "step": 3552 }, { "epoch": 0.5753380293093676, "grad_norm": 31.959102630615234, "learning_rate": 4.248704663212436e-06, "loss": 0.5809, "mean_token_accuracy": 0.9154411852359772, "num_tokens": 6365797.0, "step": 3553 }, { "epoch": 0.575499959517448, "grad_norm": 31.157106399536133, "learning_rate": 4.24708549222798e-06, "loss": 0.599, "mean_token_accuracy": 0.9136288166046143, "num_tokens": 6367587.0, "step": 3554 }, { "epoch": 0.5756618897255283, "grad_norm": 24.798954010009766, "learning_rate": 4.245466321243524e-06, "loss": 0.5447, "mean_token_accuracy": 0.9196504652500153, "num_tokens": 6369373.0, "step": 3555 }, { "epoch": 0.5758238199336086, "grad_norm": 37.87710952758789, "learning_rate": 4.243847150259068e-06, "loss": 0.775, "mean_token_accuracy": 0.9021505415439606, "num_tokens": 6371175.0, "step": 3556 }, { "epoch": 0.575985750141689, "grad_norm": 21.68278694152832, "learning_rate": 4.242227979274612e-06, "loss": 0.4999, "mean_token_accuracy": 0.9238015413284302, "num_tokens": 6372976.0, "step": 3557 }, { "epoch": 0.5761476803497693, "grad_norm": 36.19822311401367, "learning_rate": 4.240608808290156e-06, "loss": 0.8588, "mean_token_accuracy": 0.9045666456222534, "num_tokens": 6374781.0, "step": 3558 }, { "epoch": 0.5763096105578496, "grad_norm": 24.155576705932617, "learning_rate": 4.2389896373057e-06, "loss": 0.5312, "mean_token_accuracy": 0.9176002144813538, "num_tokens": 6376572.0, "step": 3559 }, { "epoch": 0.5764715407659299, "grad_norm": 32.75246810913086, "learning_rate": 4.237370466321244e-06, "loss": 0.5947, "mean_token_accuracy": 0.9073200225830078, "num_tokens": 6378354.0, "step": 3560 }, { "epoch": 0.5766334709740102, "grad_norm": 24.438316345214844, "learning_rate": 4.235751295336788e-06, "loss": 0.6519, "mean_token_accuracy": 0.9104297459125519, "num_tokens": 6380145.0, "step": 3561 }, { "epoch": 0.5767954011820905, "grad_norm": 34.70059585571289, "learning_rate": 4.234132124352332e-06, "loss": 0.6933, "mean_token_accuracy": 0.9185907244682312, "num_tokens": 6381940.0, "step": 3562 }, { "epoch": 0.5769573313901708, "grad_norm": 27.871532440185547, "learning_rate": 4.232512953367876e-06, "loss": 0.5387, "mean_token_accuracy": 0.9205244481563568, "num_tokens": 6383729.0, "step": 3563 }, { "epoch": 0.5771192615982511, "grad_norm": 28.184967041015625, "learning_rate": 4.23089378238342e-06, "loss": 0.5584, "mean_token_accuracy": 0.923501193523407, "num_tokens": 6385515.0, "step": 3564 }, { "epoch": 0.5772811918063314, "grad_norm": 24.472591400146484, "learning_rate": 4.229274611398964e-06, "loss": 0.5543, "mean_token_accuracy": 0.9175926148891449, "num_tokens": 6387306.0, "step": 3565 }, { "epoch": 0.5774431220144118, "grad_norm": 40.821266174316406, "learning_rate": 4.227655440414508e-06, "loss": 0.8135, "mean_token_accuracy": 0.8963977694511414, "num_tokens": 6389098.0, "step": 3566 }, { "epoch": 0.5776050522224921, "grad_norm": 24.554664611816406, "learning_rate": 4.226036269430052e-06, "loss": 0.5869, "mean_token_accuracy": 0.9185742437839508, "num_tokens": 6390892.0, "step": 3567 }, { "epoch": 0.5777669824305725, "grad_norm": 30.932598114013672, "learning_rate": 4.224417098445597e-06, "loss": 0.5074, "mean_token_accuracy": 0.9335784316062927, "num_tokens": 6392675.0, "step": 3568 }, { "epoch": 0.5779289126386528, "grad_norm": 32.80553436279297, "learning_rate": 4.22279792746114e-06, "loss": 0.7184, "mean_token_accuracy": 0.9106046259403229, "num_tokens": 6394478.0, "step": 3569 }, { "epoch": 0.5780908428467331, "grad_norm": 25.202241897583008, "learning_rate": 4.221178756476685e-06, "loss": 0.5485, "mean_token_accuracy": 0.9150778949260712, "num_tokens": 6396261.0, "step": 3570 }, { "epoch": 0.5782527730548134, "grad_norm": 30.182010650634766, "learning_rate": 4.219559585492228e-06, "loss": 0.6644, "mean_token_accuracy": 0.9107471108436584, "num_tokens": 6398053.0, "step": 3571 }, { "epoch": 0.5784147032628937, "grad_norm": 27.293420791625977, "learning_rate": 4.217940414507773e-06, "loss": 0.507, "mean_token_accuracy": 0.9282702505588531, "num_tokens": 6399844.0, "step": 3572 }, { "epoch": 0.578576633470974, "grad_norm": 22.497621536254883, "learning_rate": 4.216321243523316e-06, "loss": 0.5567, "mean_token_accuracy": 0.9323180317878723, "num_tokens": 6401635.0, "step": 3573 }, { "epoch": 0.5787385636790543, "grad_norm": 37.42566680908203, "learning_rate": 4.214702072538861e-06, "loss": 0.541, "mean_token_accuracy": 0.9222372174263, "num_tokens": 6403430.0, "step": 3574 }, { "epoch": 0.5789004938871346, "grad_norm": 39.466102600097656, "learning_rate": 4.213082901554404e-06, "loss": 0.7405, "mean_token_accuracy": 0.9066676497459412, "num_tokens": 6405229.0, "step": 3575 }, { "epoch": 0.5790624240952149, "grad_norm": 39.57737731933594, "learning_rate": 4.211463730569949e-06, "loss": 0.8382, "mean_token_accuracy": 0.8958951830863953, "num_tokens": 6407030.0, "step": 3576 }, { "epoch": 0.5792243543032953, "grad_norm": 38.141170501708984, "learning_rate": 4.209844559585492e-06, "loss": 0.7973, "mean_token_accuracy": 0.8979581892490387, "num_tokens": 6408827.0, "step": 3577 }, { "epoch": 0.5793862845113756, "grad_norm": 28.011844635009766, "learning_rate": 4.208225388601037e-06, "loss": 0.6533, "mean_token_accuracy": 0.9024396538734436, "num_tokens": 6410615.0, "step": 3578 }, { "epoch": 0.579548214719456, "grad_norm": 23.022579193115234, "learning_rate": 4.20660621761658e-06, "loss": 0.5895, "mean_token_accuracy": 0.9172374606132507, "num_tokens": 6412417.0, "step": 3579 }, { "epoch": 0.5797101449275363, "grad_norm": 24.619352340698242, "learning_rate": 4.204987046632125e-06, "loss": 0.5102, "mean_token_accuracy": 0.9265942871570587, "num_tokens": 6414215.0, "step": 3580 }, { "epoch": 0.5798720751356166, "grad_norm": 21.680452346801758, "learning_rate": 4.203367875647668e-06, "loss": 0.4796, "mean_token_accuracy": 0.9310199022293091, "num_tokens": 6416002.0, "step": 3581 }, { "epoch": 0.5800340053436969, "grad_norm": 30.229331970214844, "learning_rate": 4.201748704663213e-06, "loss": 0.6105, "mean_token_accuracy": 0.9115338325500488, "num_tokens": 6417796.0, "step": 3582 }, { "epoch": 0.5801959355517772, "grad_norm": 28.286911010742188, "learning_rate": 4.200129533678756e-06, "loss": 0.5877, "mean_token_accuracy": 0.9098455607891083, "num_tokens": 6419596.0, "step": 3583 }, { "epoch": 0.5803578657598575, "grad_norm": 29.158273696899414, "learning_rate": 4.198510362694301e-06, "loss": 0.5836, "mean_token_accuracy": 0.9200698137283325, "num_tokens": 6421383.0, "step": 3584 }, { "epoch": 0.5805197959679378, "grad_norm": 32.44015884399414, "learning_rate": 4.1968911917098444e-06, "loss": 0.7368, "mean_token_accuracy": 0.9001736044883728, "num_tokens": 6423167.0, "step": 3585 }, { "epoch": 0.5806817261760181, "grad_norm": 23.043378829956055, "learning_rate": 4.195272020725389e-06, "loss": 0.5258, "mean_token_accuracy": 0.9283071458339691, "num_tokens": 6424958.0, "step": 3586 }, { "epoch": 0.5808436563840984, "grad_norm": 26.61720085144043, "learning_rate": 4.193652849740933e-06, "loss": 0.6098, "mean_token_accuracy": 0.9155851304531097, "num_tokens": 6426742.0, "step": 3587 }, { "epoch": 0.5810055865921788, "grad_norm": 29.629486083984375, "learning_rate": 4.192033678756477e-06, "loss": 0.6529, "mean_token_accuracy": 0.9078240692615509, "num_tokens": 6428535.0, "step": 3588 }, { "epoch": 0.5811675168002591, "grad_norm": 23.47311782836914, "learning_rate": 4.190414507772021e-06, "loss": 0.5693, "mean_token_accuracy": 0.915340930223465, "num_tokens": 6430321.0, "step": 3589 }, { "epoch": 0.5813294470083394, "grad_norm": 34.31197738647461, "learning_rate": 4.188795336787565e-06, "loss": 0.6128, "mean_token_accuracy": 0.9081944525241852, "num_tokens": 6432127.0, "step": 3590 }, { "epoch": 0.5814913772164197, "grad_norm": 14.147363662719727, "learning_rate": 4.187176165803109e-06, "loss": 0.5194, "mean_token_accuracy": 0.9336529076099396, "num_tokens": 6433910.0, "step": 3591 }, { "epoch": 0.5816533074245, "grad_norm": 32.367034912109375, "learning_rate": 4.185556994818653e-06, "loss": 0.6687, "mean_token_accuracy": 0.9140287637710571, "num_tokens": 6435701.0, "step": 3592 }, { "epoch": 0.5818152376325804, "grad_norm": 26.745838165283203, "learning_rate": 4.183937823834197e-06, "loss": 0.6549, "mean_token_accuracy": 0.9092592597007751, "num_tokens": 6437488.0, "step": 3593 }, { "epoch": 0.5819771678406607, "grad_norm": 22.89157485961914, "learning_rate": 4.182318652849741e-06, "loss": 0.5065, "mean_token_accuracy": 0.9296627044677734, "num_tokens": 6439284.0, "step": 3594 }, { "epoch": 0.582139098048741, "grad_norm": 32.1548957824707, "learning_rate": 4.180699481865285e-06, "loss": 0.7902, "mean_token_accuracy": 0.9018301069736481, "num_tokens": 6441071.0, "step": 3595 }, { "epoch": 0.5823010282568213, "grad_norm": 21.557720184326172, "learning_rate": 4.179080310880829e-06, "loss": 0.5491, "mean_token_accuracy": 0.9258066415786743, "num_tokens": 6442866.0, "step": 3596 }, { "epoch": 0.5824629584649016, "grad_norm": 30.659692764282227, "learning_rate": 4.1774611398963734e-06, "loss": 0.7055, "mean_token_accuracy": 0.9090061187744141, "num_tokens": 6444652.0, "step": 3597 }, { "epoch": 0.5826248886729819, "grad_norm": 26.35416030883789, "learning_rate": 4.175841968911917e-06, "loss": 0.6158, "mean_token_accuracy": 0.9180491268634796, "num_tokens": 6446433.0, "step": 3598 }, { "epoch": 0.5827868188810622, "grad_norm": 22.53962516784668, "learning_rate": 4.1742227979274615e-06, "loss": 0.659, "mean_token_accuracy": 0.9160572290420532, "num_tokens": 6448219.0, "step": 3599 }, { "epoch": 0.5829487490891426, "grad_norm": 17.339033126831055, "learning_rate": 4.172603626943005e-06, "loss": 0.5488, "mean_token_accuracy": 0.9293256103992462, "num_tokens": 6450000.0, "step": 3600 }, { "epoch": 0.5831106792972229, "grad_norm": 24.7391300201416, "learning_rate": 4.1709844559585495e-06, "loss": 0.5893, "mean_token_accuracy": 0.9140287637710571, "num_tokens": 6451801.0, "step": 3601 }, { "epoch": 0.5832726095053032, "grad_norm": 24.247093200683594, "learning_rate": 4.169365284974093e-06, "loss": 0.5974, "mean_token_accuracy": 0.9239353239536285, "num_tokens": 6453589.0, "step": 3602 }, { "epoch": 0.5834345397133835, "grad_norm": 26.65892791748047, "learning_rate": 4.1677461139896375e-06, "loss": 0.7513, "mean_token_accuracy": 0.9120599031448364, "num_tokens": 6455374.0, "step": 3603 }, { "epoch": 0.5835964699214639, "grad_norm": 17.594148635864258, "learning_rate": 4.166126943005181e-06, "loss": 0.4744, "mean_token_accuracy": 0.9306569397449493, "num_tokens": 6457160.0, "step": 3604 }, { "epoch": 0.5837584001295442, "grad_norm": 17.84099769592285, "learning_rate": 4.1645077720207256e-06, "loss": 0.4963, "mean_token_accuracy": 0.9311594367027283, "num_tokens": 6458948.0, "step": 3605 }, { "epoch": 0.5839203303376245, "grad_norm": 32.53583908081055, "learning_rate": 4.16288860103627e-06, "loss": 0.6977, "mean_token_accuracy": 0.8943827748298645, "num_tokens": 6460743.0, "step": 3606 }, { "epoch": 0.5840822605457048, "grad_norm": 12.079717636108398, "learning_rate": 4.161269430051814e-06, "loss": 0.5557, "mean_token_accuracy": 0.9253689646720886, "num_tokens": 6462523.0, "step": 3607 }, { "epoch": 0.5842441907537851, "grad_norm": 23.461336135864258, "learning_rate": 4.159650259067358e-06, "loss": 0.5507, "mean_token_accuracy": 0.9112319052219391, "num_tokens": 6464305.0, "step": 3608 }, { "epoch": 0.5844061209618654, "grad_norm": 25.60858154296875, "learning_rate": 4.158031088082902e-06, "loss": 0.6387, "mean_token_accuracy": 0.905193418264389, "num_tokens": 6466098.0, "step": 3609 }, { "epoch": 0.5845680511699457, "grad_norm": 26.739316940307617, "learning_rate": 4.156411917098446e-06, "loss": 0.7401, "mean_token_accuracy": 0.9072310924530029, "num_tokens": 6467889.0, "step": 3610 }, { "epoch": 0.5847299813780261, "grad_norm": 24.455007553100586, "learning_rate": 4.15479274611399e-06, "loss": 0.6871, "mean_token_accuracy": 0.9025605022907257, "num_tokens": 6469678.0, "step": 3611 }, { "epoch": 0.5848919115861064, "grad_norm": 30.066936492919922, "learning_rate": 4.153173575129534e-06, "loss": 0.6045, "mean_token_accuracy": 0.8987827003002167, "num_tokens": 6471468.0, "step": 3612 }, { "epoch": 0.5850538417941867, "grad_norm": 26.420854568481445, "learning_rate": 4.151554404145078e-06, "loss": 0.6292, "mean_token_accuracy": 0.9057773351669312, "num_tokens": 6473256.0, "step": 3613 }, { "epoch": 0.585215772002267, "grad_norm": 22.476177215576172, "learning_rate": 4.149935233160622e-06, "loss": 0.5927, "mean_token_accuracy": 0.9169968664646149, "num_tokens": 6475045.0, "step": 3614 }, { "epoch": 0.5853777022103474, "grad_norm": 27.663366317749023, "learning_rate": 4.148316062176166e-06, "loss": 0.6204, "mean_token_accuracy": 0.9119718372821808, "num_tokens": 6476841.0, "step": 3615 }, { "epoch": 0.5855396324184277, "grad_norm": 27.441537857055664, "learning_rate": 4.14669689119171e-06, "loss": 0.6085, "mean_token_accuracy": 0.9127962291240692, "num_tokens": 6478628.0, "step": 3616 }, { "epoch": 0.585701562626508, "grad_norm": 16.526952743530273, "learning_rate": 4.145077720207254e-06, "loss": 0.5253, "mean_token_accuracy": 0.929679811000824, "num_tokens": 6480425.0, "step": 3617 }, { "epoch": 0.5858634928345883, "grad_norm": 25.384193420410156, "learning_rate": 4.143458549222798e-06, "loss": 0.5944, "mean_token_accuracy": 0.9155879616737366, "num_tokens": 6482221.0, "step": 3618 }, { "epoch": 0.5860254230426686, "grad_norm": 22.535831451416016, "learning_rate": 4.141839378238342e-06, "loss": 0.5873, "mean_token_accuracy": 0.922939658164978, "num_tokens": 6484005.0, "step": 3619 }, { "epoch": 0.5861873532507489, "grad_norm": 31.863630294799805, "learning_rate": 4.140220207253887e-06, "loss": 0.7287, "mean_token_accuracy": 0.9001617133617401, "num_tokens": 6485807.0, "step": 3620 }, { "epoch": 0.5863492834588292, "grad_norm": 29.700624465942383, "learning_rate": 4.138601036269431e-06, "loss": 0.7314, "mean_token_accuracy": 0.9048641622066498, "num_tokens": 6487592.0, "step": 3621 }, { "epoch": 0.5865112136669096, "grad_norm": 21.672607421875, "learning_rate": 4.136981865284975e-06, "loss": 0.5021, "mean_token_accuracy": 0.920273095369339, "num_tokens": 6489380.0, "step": 3622 }, { "epoch": 0.5866731438749899, "grad_norm": 28.994285583496094, "learning_rate": 4.135362694300519e-06, "loss": 0.6587, "mean_token_accuracy": 0.9087194800376892, "num_tokens": 6491165.0, "step": 3623 }, { "epoch": 0.5868350740830702, "grad_norm": 17.27082633972168, "learning_rate": 4.133743523316063e-06, "loss": 0.489, "mean_token_accuracy": 0.9359357357025146, "num_tokens": 6492958.0, "step": 3624 }, { "epoch": 0.5869970042911505, "grad_norm": 23.989273071289062, "learning_rate": 4.132124352331607e-06, "loss": 0.5903, "mean_token_accuracy": 0.919911116361618, "num_tokens": 6494745.0, "step": 3625 }, { "epoch": 0.5871589344992308, "grad_norm": 33.401512145996094, "learning_rate": 4.130505181347151e-06, "loss": 0.6601, "mean_token_accuracy": 0.9052895903587341, "num_tokens": 6496542.0, "step": 3626 }, { "epoch": 0.5873208647073112, "grad_norm": 15.182997703552246, "learning_rate": 4.128886010362695e-06, "loss": 0.5045, "mean_token_accuracy": 0.9323708117008209, "num_tokens": 6498335.0, "step": 3627 }, { "epoch": 0.5874827949153915, "grad_norm": 35.03440856933594, "learning_rate": 4.127266839378239e-06, "loss": 1.001, "mean_token_accuracy": 0.8903170228004456, "num_tokens": 6500128.0, "step": 3628 }, { "epoch": 0.5876447251234718, "grad_norm": 29.223005294799805, "learning_rate": 4.125647668393783e-06, "loss": 0.7201, "mean_token_accuracy": 0.9097620248794556, "num_tokens": 6501927.0, "step": 3629 }, { "epoch": 0.5878066553315521, "grad_norm": 29.118810653686523, "learning_rate": 4.124028497409327e-06, "loss": 0.7444, "mean_token_accuracy": 0.9126032292842865, "num_tokens": 6503713.0, "step": 3630 }, { "epoch": 0.5879685855396324, "grad_norm": 23.20722770690918, "learning_rate": 4.122409326424871e-06, "loss": 0.5845, "mean_token_accuracy": 0.921798974275589, "num_tokens": 6505494.0, "step": 3631 }, { "epoch": 0.5881305157477127, "grad_norm": 14.8585205078125, "learning_rate": 4.120790155440415e-06, "loss": 0.4643, "mean_token_accuracy": 0.9293177723884583, "num_tokens": 6507275.0, "step": 3632 }, { "epoch": 0.5882924459557931, "grad_norm": 27.070327758789062, "learning_rate": 4.119170984455959e-06, "loss": 0.5946, "mean_token_accuracy": 0.916402131319046, "num_tokens": 6509062.0, "step": 3633 }, { "epoch": 0.5884543761638734, "grad_norm": 23.33502769470215, "learning_rate": 4.117551813471503e-06, "loss": 0.5717, "mean_token_accuracy": 0.9244987666606903, "num_tokens": 6510851.0, "step": 3634 }, { "epoch": 0.5886163063719537, "grad_norm": 27.617918014526367, "learning_rate": 4.115932642487047e-06, "loss": 0.6104, "mean_token_accuracy": 0.9107434451580048, "num_tokens": 6512643.0, "step": 3635 }, { "epoch": 0.588778236580034, "grad_norm": 33.694908142089844, "learning_rate": 4.114313471502591e-06, "loss": 0.621, "mean_token_accuracy": 0.9089947044849396, "num_tokens": 6514430.0, "step": 3636 }, { "epoch": 0.5889401667881143, "grad_norm": 14.677541732788086, "learning_rate": 4.112694300518135e-06, "loss": 0.4496, "mean_token_accuracy": 0.933833509683609, "num_tokens": 6516229.0, "step": 3637 }, { "epoch": 0.5891020969961946, "grad_norm": 25.318374633789062, "learning_rate": 4.111075129533679e-06, "loss": 0.6218, "mean_token_accuracy": 0.9170559346675873, "num_tokens": 6518006.0, "step": 3638 }, { "epoch": 0.589264027204275, "grad_norm": 29.502891540527344, "learning_rate": 4.109455958549224e-06, "loss": 0.6634, "mean_token_accuracy": 0.9103894531726837, "num_tokens": 6519819.0, "step": 3639 }, { "epoch": 0.5894259574123553, "grad_norm": 31.912513732910156, "learning_rate": 4.107836787564767e-06, "loss": 0.6375, "mean_token_accuracy": 0.8946863114833832, "num_tokens": 6521605.0, "step": 3640 }, { "epoch": 0.5895878876204356, "grad_norm": 20.212072372436523, "learning_rate": 4.106217616580312e-06, "loss": 0.5306, "mean_token_accuracy": 0.9313632845878601, "num_tokens": 6523393.0, "step": 3641 }, { "epoch": 0.5897498178285159, "grad_norm": 30.334707260131836, "learning_rate": 4.104598445595855e-06, "loss": 0.7239, "mean_token_accuracy": 0.9060223400592804, "num_tokens": 6525192.0, "step": 3642 }, { "epoch": 0.5899117480365962, "grad_norm": 43.30109405517578, "learning_rate": 4.1029792746114e-06, "loss": 1.0903, "mean_token_accuracy": 0.8868494629859924, "num_tokens": 6526987.0, "step": 3643 }, { "epoch": 0.5900736782446765, "grad_norm": 29.774261474609375, "learning_rate": 4.101360103626943e-06, "loss": 0.6808, "mean_token_accuracy": 0.9091590940952301, "num_tokens": 6528782.0, "step": 3644 }, { "epoch": 0.5902356084527569, "grad_norm": 21.97923469543457, "learning_rate": 4.099740932642488e-06, "loss": 0.6813, "mean_token_accuracy": 0.9142021834850311, "num_tokens": 6530562.0, "step": 3645 }, { "epoch": 0.5903975386608372, "grad_norm": 26.86726188659668, "learning_rate": 4.098121761658031e-06, "loss": 0.5173, "mean_token_accuracy": 0.9290780425071716, "num_tokens": 6532356.0, "step": 3646 }, { "epoch": 0.5905594688689175, "grad_norm": 19.118139266967773, "learning_rate": 4.096502590673576e-06, "loss": 0.4725, "mean_token_accuracy": 0.9302109181880951, "num_tokens": 6534154.0, "step": 3647 }, { "epoch": 0.5907213990769978, "grad_norm": 23.16008949279785, "learning_rate": 4.094883419689119e-06, "loss": 0.591, "mean_token_accuracy": 0.9232684075832367, "num_tokens": 6535938.0, "step": 3648 }, { "epoch": 0.5908833292850781, "grad_norm": 23.8983154296875, "learning_rate": 4.093264248704664e-06, "loss": 0.5776, "mean_token_accuracy": 0.9138889014720917, "num_tokens": 6537717.0, "step": 3649 }, { "epoch": 0.5910452594931584, "grad_norm": 27.30613136291504, "learning_rate": 4.091645077720207e-06, "loss": 0.6056, "mean_token_accuracy": 0.9282497465610504, "num_tokens": 6539507.0, "step": 3650 }, { "epoch": 0.5912071897012388, "grad_norm": 26.831071853637695, "learning_rate": 4.090025906735752e-06, "loss": 0.682, "mean_token_accuracy": 0.905089259147644, "num_tokens": 6541293.0, "step": 3651 }, { "epoch": 0.5913691199093191, "grad_norm": 22.436786651611328, "learning_rate": 4.0884067357512954e-06, "loss": 0.5594, "mean_token_accuracy": 0.9113799929618835, "num_tokens": 6543076.0, "step": 3652 }, { "epoch": 0.5915310501173994, "grad_norm": 27.08148765563965, "learning_rate": 4.08678756476684e-06, "loss": 0.5851, "mean_token_accuracy": 0.9168752431869507, "num_tokens": 6544865.0, "step": 3653 }, { "epoch": 0.5916929803254797, "grad_norm": 32.13164138793945, "learning_rate": 4.0851683937823835e-06, "loss": 0.6539, "mean_token_accuracy": 0.9135036468505859, "num_tokens": 6546654.0, "step": 3654 }, { "epoch": 0.59185491053356, "grad_norm": 21.681386947631836, "learning_rate": 4.083549222797928e-06, "loss": 0.5318, "mean_token_accuracy": 0.9292744994163513, "num_tokens": 6548435.0, "step": 3655 }, { "epoch": 0.5920168407416404, "grad_norm": 22.89409065246582, "learning_rate": 4.0819300518134715e-06, "loss": 0.5896, "mean_token_accuracy": 0.9189726412296295, "num_tokens": 6550218.0, "step": 3656 }, { "epoch": 0.5921787709497207, "grad_norm": 21.351774215698242, "learning_rate": 4.080310880829016e-06, "loss": 0.5117, "mean_token_accuracy": 0.9235875904560089, "num_tokens": 6552005.0, "step": 3657 }, { "epoch": 0.592340701157801, "grad_norm": 31.21608543395996, "learning_rate": 4.07869170984456e-06, "loss": 0.6262, "mean_token_accuracy": 0.9150060415267944, "num_tokens": 6553799.0, "step": 3658 }, { "epoch": 0.5925026313658813, "grad_norm": 26.250547409057617, "learning_rate": 4.077072538860104e-06, "loss": 0.5537, "mean_token_accuracy": 0.9205673635005951, "num_tokens": 6555602.0, "step": 3659 }, { "epoch": 0.5926645615739616, "grad_norm": 25.410783767700195, "learning_rate": 4.075453367875648e-06, "loss": 0.5944, "mean_token_accuracy": 0.9230892956256866, "num_tokens": 6557387.0, "step": 3660 }, { "epoch": 0.5928264917820419, "grad_norm": 27.4699649810791, "learning_rate": 4.073834196891192e-06, "loss": 0.6557, "mean_token_accuracy": 0.9270073175430298, "num_tokens": 6559173.0, "step": 3661 }, { "epoch": 0.5929884219901222, "grad_norm": 26.440393447875977, "learning_rate": 4.072215025906736e-06, "loss": 0.6325, "mean_token_accuracy": 0.9184104800224304, "num_tokens": 6560967.0, "step": 3662 }, { "epoch": 0.5931503521982026, "grad_norm": 28.47506332397461, "learning_rate": 4.07059585492228e-06, "loss": 0.6139, "mean_token_accuracy": 0.9164723455905914, "num_tokens": 6562766.0, "step": 3663 }, { "epoch": 0.5933122824062829, "grad_norm": 33.27669143676758, "learning_rate": 4.0689766839378244e-06, "loss": 0.6871, "mean_token_accuracy": 0.9172661900520325, "num_tokens": 6564556.0, "step": 3664 }, { "epoch": 0.5934742126143632, "grad_norm": 28.420377731323242, "learning_rate": 4.067357512953368e-06, "loss": 0.571, "mean_token_accuracy": 0.9183273017406464, "num_tokens": 6566350.0, "step": 3665 }, { "epoch": 0.5936361428224435, "grad_norm": 29.539932250976562, "learning_rate": 4.0657383419689125e-06, "loss": 0.6323, "mean_token_accuracy": 0.9178382456302643, "num_tokens": 6568144.0, "step": 3666 }, { "epoch": 0.5937980730305239, "grad_norm": 30.816614151000977, "learning_rate": 4.064119170984456e-06, "loss": 0.7001, "mean_token_accuracy": 0.9115451872348785, "num_tokens": 6569927.0, "step": 3667 }, { "epoch": 0.5939600032386042, "grad_norm": 22.580835342407227, "learning_rate": 4.0625000000000005e-06, "loss": 0.4757, "mean_token_accuracy": 0.9306755661964417, "num_tokens": 6571725.0, "step": 3668 }, { "epoch": 0.5941219334466845, "grad_norm": 21.536479949951172, "learning_rate": 4.060880829015544e-06, "loss": 0.4974, "mean_token_accuracy": 0.9228707551956177, "num_tokens": 6573521.0, "step": 3669 }, { "epoch": 0.5942838636547648, "grad_norm": 31.76894760131836, "learning_rate": 4.0592616580310885e-06, "loss": 0.5117, "mean_token_accuracy": 0.9217016398906708, "num_tokens": 6575326.0, "step": 3670 }, { "epoch": 0.5944457938628451, "grad_norm": 27.422283172607422, "learning_rate": 4.057642487046632e-06, "loss": 0.5867, "mean_token_accuracy": 0.9133181571960449, "num_tokens": 6577116.0, "step": 3671 }, { "epoch": 0.5946077240709254, "grad_norm": 20.023258209228516, "learning_rate": 4.0560233160621765e-06, "loss": 0.4659, "mean_token_accuracy": 0.9366661608219147, "num_tokens": 6578912.0, "step": 3672 }, { "epoch": 0.5947696542790057, "grad_norm": 33.03738784790039, "learning_rate": 4.05440414507772e-06, "loss": 0.7897, "mean_token_accuracy": 0.9132374227046967, "num_tokens": 6580713.0, "step": 3673 }, { "epoch": 0.594931584487086, "grad_norm": 26.31281280517578, "learning_rate": 4.0527849740932646e-06, "loss": 0.6022, "mean_token_accuracy": 0.9239332377910614, "num_tokens": 6582515.0, "step": 3674 }, { "epoch": 0.5950935146951664, "grad_norm": 21.916072845458984, "learning_rate": 4.051165803108808e-06, "loss": 0.5279, "mean_token_accuracy": 0.9145643413066864, "num_tokens": 6584308.0, "step": 3675 }, { "epoch": 0.5952554449032467, "grad_norm": 16.831350326538086, "learning_rate": 4.049546632124353e-06, "loss": 0.4702, "mean_token_accuracy": 0.9287993907928467, "num_tokens": 6586101.0, "step": 3676 }, { "epoch": 0.595417375111327, "grad_norm": 22.027729034423828, "learning_rate": 4.047927461139897e-06, "loss": 0.5347, "mean_token_accuracy": 0.924933671951294, "num_tokens": 6587893.0, "step": 3677 }, { "epoch": 0.5955793053194073, "grad_norm": 24.40011978149414, "learning_rate": 4.046308290155441e-06, "loss": 0.5614, "mean_token_accuracy": 0.918353796005249, "num_tokens": 6589674.0, "step": 3678 }, { "epoch": 0.5957412355274877, "grad_norm": 31.515626907348633, "learning_rate": 4.044689119170985e-06, "loss": 0.6215, "mean_token_accuracy": 0.9161653220653534, "num_tokens": 6591472.0, "step": 3679 }, { "epoch": 0.595903165735568, "grad_norm": 22.767898559570312, "learning_rate": 4.043069948186529e-06, "loss": 0.5265, "mean_token_accuracy": 0.9255845248699188, "num_tokens": 6593267.0, "step": 3680 }, { "epoch": 0.5960650959436483, "grad_norm": 36.727088928222656, "learning_rate": 4.041450777202073e-06, "loss": 0.7765, "mean_token_accuracy": 0.9032207727432251, "num_tokens": 6595057.0, "step": 3681 }, { "epoch": 0.5962270261517286, "grad_norm": 24.581951141357422, "learning_rate": 4.039831606217617e-06, "loss": 0.5469, "mean_token_accuracy": 0.913891464471817, "num_tokens": 6596848.0, "step": 3682 }, { "epoch": 0.5963889563598089, "grad_norm": 40.105064392089844, "learning_rate": 4.038212435233161e-06, "loss": 0.6906, "mean_token_accuracy": 0.9013539850711823, "num_tokens": 6598644.0, "step": 3683 }, { "epoch": 0.5965508865678892, "grad_norm": 19.64065170288086, "learning_rate": 4.036593264248705e-06, "loss": 0.513, "mean_token_accuracy": 0.9225490093231201, "num_tokens": 6600427.0, "step": 3684 }, { "epoch": 0.5967128167759695, "grad_norm": 27.96965217590332, "learning_rate": 4.034974093264249e-06, "loss": 0.6054, "mean_token_accuracy": 0.925253301858902, "num_tokens": 6602220.0, "step": 3685 }, { "epoch": 0.5968747469840499, "grad_norm": 32.57131576538086, "learning_rate": 4.033354922279793e-06, "loss": 0.7404, "mean_token_accuracy": 0.8995442986488342, "num_tokens": 6604010.0, "step": 3686 }, { "epoch": 0.5970366771921302, "grad_norm": 32.82229232788086, "learning_rate": 4.031735751295337e-06, "loss": 0.6617, "mean_token_accuracy": 0.9109405279159546, "num_tokens": 6605803.0, "step": 3687 }, { "epoch": 0.5971986074002105, "grad_norm": 21.351390838623047, "learning_rate": 4.030116580310881e-06, "loss": 0.6382, "mean_token_accuracy": 0.9186631441116333, "num_tokens": 6607600.0, "step": 3688 }, { "epoch": 0.5973605376082908, "grad_norm": 30.04303741455078, "learning_rate": 4.028497409326425e-06, "loss": 0.6877, "mean_token_accuracy": 0.9051948189735413, "num_tokens": 6609406.0, "step": 3689 }, { "epoch": 0.5975224678163712, "grad_norm": 25.815378189086914, "learning_rate": 4.026878238341969e-06, "loss": 0.5924, "mean_token_accuracy": 0.9198676943778992, "num_tokens": 6611192.0, "step": 3690 }, { "epoch": 0.5976843980244515, "grad_norm": 30.091291427612305, "learning_rate": 4.025259067357513e-06, "loss": 0.6431, "mean_token_accuracy": 0.9119961261749268, "num_tokens": 6612985.0, "step": 3691 }, { "epoch": 0.5978463282325318, "grad_norm": 34.82086944580078, "learning_rate": 4.023639896373057e-06, "loss": 0.905, "mean_token_accuracy": 0.8985185027122498, "num_tokens": 6614782.0, "step": 3692 }, { "epoch": 0.5980082584406121, "grad_norm": 36.073421478271484, "learning_rate": 4.022020725388601e-06, "loss": 0.7377, "mean_token_accuracy": 0.9118259847164154, "num_tokens": 6616588.0, "step": 3693 }, { "epoch": 0.5981701886486924, "grad_norm": 32.50791549682617, "learning_rate": 4.020401554404146e-06, "loss": 0.6739, "mean_token_accuracy": 0.9113631248474121, "num_tokens": 6618394.0, "step": 3694 }, { "epoch": 0.5983321188567727, "grad_norm": 28.352083206176758, "learning_rate": 4.018782383419689e-06, "loss": 0.6707, "mean_token_accuracy": 0.9162254929542542, "num_tokens": 6620192.0, "step": 3695 }, { "epoch": 0.598494049064853, "grad_norm": 30.50720977783203, "learning_rate": 4.017163212435234e-06, "loss": 0.5888, "mean_token_accuracy": 0.9082609713077545, "num_tokens": 6621988.0, "step": 3696 }, { "epoch": 0.5986559792729333, "grad_norm": 23.101003646850586, "learning_rate": 4.015544041450777e-06, "loss": 0.518, "mean_token_accuracy": 0.9206465184688568, "num_tokens": 6623777.0, "step": 3697 }, { "epoch": 0.5988179094810137, "grad_norm": 21.81392478942871, "learning_rate": 4.013924870466322e-06, "loss": 0.6805, "mean_token_accuracy": 0.920639842748642, "num_tokens": 6625567.0, "step": 3698 }, { "epoch": 0.598979839689094, "grad_norm": 23.16607666015625, "learning_rate": 4.012305699481865e-06, "loss": 0.6186, "mean_token_accuracy": 0.918313592672348, "num_tokens": 6627349.0, "step": 3699 }, { "epoch": 0.5991417698971743, "grad_norm": 33.53606414794922, "learning_rate": 4.01068652849741e-06, "loss": 0.8272, "mean_token_accuracy": 0.8991917371749878, "num_tokens": 6629128.0, "step": 3700 }, { "epoch": 0.5993037001052547, "grad_norm": 28.027090072631836, "learning_rate": 4.009067357512953e-06, "loss": 0.6099, "mean_token_accuracy": 0.9151678681373596, "num_tokens": 6630911.0, "step": 3701 }, { "epoch": 0.599465630313335, "grad_norm": 15.572281837463379, "learning_rate": 4.007448186528498e-06, "loss": 0.4571, "mean_token_accuracy": 0.9324262738227844, "num_tokens": 6632704.0, "step": 3702 }, { "epoch": 0.5996275605214153, "grad_norm": 36.786128997802734, "learning_rate": 4.005829015544041e-06, "loss": 0.6933, "mean_token_accuracy": 0.9109029471874237, "num_tokens": 6634496.0, "step": 3703 }, { "epoch": 0.5997894907294956, "grad_norm": 27.85297393798828, "learning_rate": 4.004209844559586e-06, "loss": 0.5826, "mean_token_accuracy": 0.9171826541423798, "num_tokens": 6636296.0, "step": 3704 }, { "epoch": 0.5999514209375759, "grad_norm": 14.017465591430664, "learning_rate": 4.002590673575129e-06, "loss": 0.473, "mean_token_accuracy": 0.93160080909729, "num_tokens": 6638086.0, "step": 3705 }, { "epoch": 0.6001133511456562, "grad_norm": 17.05233383178711, "learning_rate": 4.000971502590674e-06, "loss": 0.4945, "mean_token_accuracy": 0.9260194301605225, "num_tokens": 6639868.0, "step": 3706 }, { "epoch": 0.6002752813537365, "grad_norm": 33.7879638671875, "learning_rate": 3.9993523316062174e-06, "loss": 0.7174, "mean_token_accuracy": 0.9114553928375244, "num_tokens": 6641672.0, "step": 3707 }, { "epoch": 0.6004372115618168, "grad_norm": 28.628454208374023, "learning_rate": 3.997733160621762e-06, "loss": 0.6426, "mean_token_accuracy": 0.9168866872787476, "num_tokens": 6643462.0, "step": 3708 }, { "epoch": 0.6005991417698971, "grad_norm": 31.869678497314453, "learning_rate": 3.9961139896373055e-06, "loss": 0.64, "mean_token_accuracy": 0.9047606289386749, "num_tokens": 6645247.0, "step": 3709 }, { "epoch": 0.6007610719779775, "grad_norm": 19.07252311706543, "learning_rate": 3.99449481865285e-06, "loss": 0.6437, "mean_token_accuracy": 0.9226754009723663, "num_tokens": 6647031.0, "step": 3710 }, { "epoch": 0.6009230021860578, "grad_norm": 26.679035186767578, "learning_rate": 3.9928756476683935e-06, "loss": 0.5673, "mean_token_accuracy": 0.9185742437839508, "num_tokens": 6648825.0, "step": 3711 }, { "epoch": 0.6010849323941381, "grad_norm": 38.0206413269043, "learning_rate": 3.991256476683938e-06, "loss": 0.7106, "mean_token_accuracy": 0.898194432258606, "num_tokens": 6650631.0, "step": 3712 }, { "epoch": 0.6012468626022185, "grad_norm": 33.41487121582031, "learning_rate": 3.989637305699482e-06, "loss": 0.7466, "mean_token_accuracy": 0.9028554856777191, "num_tokens": 6652431.0, "step": 3713 }, { "epoch": 0.6014087928102988, "grad_norm": 26.38525390625, "learning_rate": 3.988018134715026e-06, "loss": 0.5905, "mean_token_accuracy": 0.9182541370391846, "num_tokens": 6654224.0, "step": 3714 }, { "epoch": 0.6015707230183791, "grad_norm": 25.297746658325195, "learning_rate": 3.98639896373057e-06, "loss": 0.6568, "mean_token_accuracy": 0.9181488752365112, "num_tokens": 6656006.0, "step": 3715 }, { "epoch": 0.6017326532264594, "grad_norm": 37.08112335205078, "learning_rate": 3.984779792746114e-06, "loss": 0.6702, "mean_token_accuracy": 0.9148701429367065, "num_tokens": 6657810.0, "step": 3716 }, { "epoch": 0.6018945834345397, "grad_norm": 24.39055824279785, "learning_rate": 3.983160621761658e-06, "loss": 0.587, "mean_token_accuracy": 0.9113828539848328, "num_tokens": 6659604.0, "step": 3717 }, { "epoch": 0.60205651364262, "grad_norm": 28.963029861450195, "learning_rate": 3.981541450777203e-06, "loss": 0.6176, "mean_token_accuracy": 0.9030910730361938, "num_tokens": 6661384.0, "step": 3718 }, { "epoch": 0.6022184438507003, "grad_norm": 34.459083557128906, "learning_rate": 3.9799222797927464e-06, "loss": 0.7297, "mean_token_accuracy": 0.9078014194965363, "num_tokens": 6663178.0, "step": 3719 }, { "epoch": 0.6023803740587806, "grad_norm": 21.510969161987305, "learning_rate": 3.978303108808291e-06, "loss": 0.6008, "mean_token_accuracy": 0.916793555021286, "num_tokens": 6664966.0, "step": 3720 }, { "epoch": 0.602542304266861, "grad_norm": 24.454227447509766, "learning_rate": 3.9766839378238345e-06, "loss": 0.5782, "mean_token_accuracy": 0.9210526347160339, "num_tokens": 6666744.0, "step": 3721 }, { "epoch": 0.6027042344749413, "grad_norm": 24.702714920043945, "learning_rate": 3.975064766839379e-06, "loss": 0.5617, "mean_token_accuracy": 0.9157062470912933, "num_tokens": 6668541.0, "step": 3722 }, { "epoch": 0.6028661646830216, "grad_norm": 26.115219116210938, "learning_rate": 3.9734455958549225e-06, "loss": 0.649, "mean_token_accuracy": 0.9074974656105042, "num_tokens": 6670334.0, "step": 3723 }, { "epoch": 0.603028094891102, "grad_norm": 33.99589538574219, "learning_rate": 3.971826424870467e-06, "loss": 0.7372, "mean_token_accuracy": 0.90386563539505, "num_tokens": 6672127.0, "step": 3724 }, { "epoch": 0.6031900250991823, "grad_norm": 20.441484451293945, "learning_rate": 3.9702072538860105e-06, "loss": 0.5208, "mean_token_accuracy": 0.9299903213977814, "num_tokens": 6673912.0, "step": 3725 }, { "epoch": 0.6033519553072626, "grad_norm": 36.506874084472656, "learning_rate": 3.968588082901555e-06, "loss": 0.9251, "mean_token_accuracy": 0.8910548090934753, "num_tokens": 6675699.0, "step": 3726 }, { "epoch": 0.6035138855153429, "grad_norm": 26.96748924255371, "learning_rate": 3.966968911917099e-06, "loss": 0.6854, "mean_token_accuracy": 0.907131016254425, "num_tokens": 6677480.0, "step": 3727 }, { "epoch": 0.6036758157234232, "grad_norm": 24.486268997192383, "learning_rate": 3.965349740932643e-06, "loss": 0.5984, "mean_token_accuracy": 0.9217728674411774, "num_tokens": 6679272.0, "step": 3728 }, { "epoch": 0.6038377459315035, "grad_norm": 34.572322845458984, "learning_rate": 3.963730569948187e-06, "loss": 0.9161, "mean_token_accuracy": 0.9017195999622345, "num_tokens": 6681059.0, "step": 3729 }, { "epoch": 0.6039996761395838, "grad_norm": 26.056943893432617, "learning_rate": 3.962111398963731e-06, "loss": 0.5463, "mean_token_accuracy": 0.9211378395557404, "num_tokens": 6682850.0, "step": 3730 }, { "epoch": 0.6041616063476641, "grad_norm": 27.31075668334961, "learning_rate": 3.9604922279792754e-06, "loss": 0.6572, "mean_token_accuracy": 0.9054268598556519, "num_tokens": 6684637.0, "step": 3731 }, { "epoch": 0.6043235365557444, "grad_norm": 32.93107604980469, "learning_rate": 3.958873056994819e-06, "loss": 0.7366, "mean_token_accuracy": 0.9107471108436584, "num_tokens": 6686429.0, "step": 3732 }, { "epoch": 0.6044854667638248, "grad_norm": 29.372608184814453, "learning_rate": 3.9572538860103635e-06, "loss": 0.6593, "mean_token_accuracy": 0.8982490599155426, "num_tokens": 6688216.0, "step": 3733 }, { "epoch": 0.6046473969719051, "grad_norm": 25.47175407409668, "learning_rate": 3.955634715025907e-06, "loss": 0.6061, "mean_token_accuracy": 0.9180261492729187, "num_tokens": 6690009.0, "step": 3734 }, { "epoch": 0.6048093271799855, "grad_norm": 28.501943588256836, "learning_rate": 3.9540155440414515e-06, "loss": 0.5638, "mean_token_accuracy": 0.9119718372821808, "num_tokens": 6691805.0, "step": 3735 }, { "epoch": 0.6049712573880658, "grad_norm": 32.363433837890625, "learning_rate": 3.952396373056995e-06, "loss": 0.8526, "mean_token_accuracy": 0.9058353900909424, "num_tokens": 6693595.0, "step": 3736 }, { "epoch": 0.6051331875961461, "grad_norm": 38.678653717041016, "learning_rate": 3.9507772020725395e-06, "loss": 0.7392, "mean_token_accuracy": 0.9062816500663757, "num_tokens": 6695395.0, "step": 3737 }, { "epoch": 0.6052951178042264, "grad_norm": 26.37191390991211, "learning_rate": 3.949158031088083e-06, "loss": 0.603, "mean_token_accuracy": 0.9134595096111298, "num_tokens": 6697184.0, "step": 3738 }, { "epoch": 0.6054570480123067, "grad_norm": 31.75313377380371, "learning_rate": 3.9475388601036275e-06, "loss": 0.742, "mean_token_accuracy": 0.9120039641857147, "num_tokens": 6698980.0, "step": 3739 }, { "epoch": 0.605618978220387, "grad_norm": 17.165111541748047, "learning_rate": 3.945919689119171e-06, "loss": 0.5018, "mean_token_accuracy": 0.9313608109951019, "num_tokens": 6700769.0, "step": 3740 }, { "epoch": 0.6057809084284673, "grad_norm": 34.97706985473633, "learning_rate": 3.9443005181347156e-06, "loss": 0.7269, "mean_token_accuracy": 0.9084957540035248, "num_tokens": 6702565.0, "step": 3741 }, { "epoch": 0.6059428386365476, "grad_norm": 26.550935745239258, "learning_rate": 3.942681347150259e-06, "loss": 0.6801, "mean_token_accuracy": 0.9218875765800476, "num_tokens": 6704359.0, "step": 3742 }, { "epoch": 0.6061047688446279, "grad_norm": 31.897624969482422, "learning_rate": 3.941062176165804e-06, "loss": 0.7863, "mean_token_accuracy": 0.9066407978534698, "num_tokens": 6706151.0, "step": 3743 }, { "epoch": 0.6062666990527082, "grad_norm": 36.83315658569336, "learning_rate": 3.939443005181347e-06, "loss": 0.8991, "mean_token_accuracy": 0.8851260840892792, "num_tokens": 6707942.0, "step": 3744 }, { "epoch": 0.6064286292607886, "grad_norm": 23.051773071289062, "learning_rate": 3.937823834196892e-06, "loss": 0.553, "mean_token_accuracy": 0.9232993125915527, "num_tokens": 6709741.0, "step": 3745 }, { "epoch": 0.606590559468869, "grad_norm": 28.415992736816406, "learning_rate": 3.936204663212436e-06, "loss": 0.6961, "mean_token_accuracy": 0.9119967818260193, "num_tokens": 6711526.0, "step": 3746 }, { "epoch": 0.6067524896769493, "grad_norm": 32.81734848022461, "learning_rate": 3.93458549222798e-06, "loss": 0.6967, "mean_token_accuracy": 0.9024762809276581, "num_tokens": 6713314.0, "step": 3747 }, { "epoch": 0.6069144198850296, "grad_norm": 22.28242301940918, "learning_rate": 3.932966321243524e-06, "loss": 0.5363, "mean_token_accuracy": 0.9215686321258545, "num_tokens": 6715106.0, "step": 3748 }, { "epoch": 0.6070763500931099, "grad_norm": 21.49435043334961, "learning_rate": 3.931347150259068e-06, "loss": 0.5697, "mean_token_accuracy": 0.9236485958099365, "num_tokens": 6716893.0, "step": 3749 }, { "epoch": 0.6072382803011902, "grad_norm": 27.520023345947266, "learning_rate": 3.929727979274612e-06, "loss": 0.6193, "mean_token_accuracy": 0.9122975468635559, "num_tokens": 6718679.0, "step": 3750 }, { "epoch": 0.6074002105092705, "grad_norm": 31.730144500732422, "learning_rate": 3.928108808290156e-06, "loss": 0.6926, "mean_token_accuracy": 0.9118930697441101, "num_tokens": 6720475.0, "step": 3751 }, { "epoch": 0.6075621407173508, "grad_norm": 29.855321884155273, "learning_rate": 3.9264896373057e-06, "loss": 0.6372, "mean_token_accuracy": 0.9167623519897461, "num_tokens": 6722264.0, "step": 3752 }, { "epoch": 0.6077240709254311, "grad_norm": 30.293182373046875, "learning_rate": 3.924870466321244e-06, "loss": 0.6414, "mean_token_accuracy": 0.9075596034526825, "num_tokens": 6724056.0, "step": 3753 }, { "epoch": 0.6078860011335114, "grad_norm": 34.557804107666016, "learning_rate": 3.923251295336788e-06, "loss": 0.5937, "mean_token_accuracy": 0.9066252708435059, "num_tokens": 6725846.0, "step": 3754 }, { "epoch": 0.6080479313415917, "grad_norm": 30.57419204711914, "learning_rate": 3.921632124352332e-06, "loss": 0.6759, "mean_token_accuracy": 0.9142877459526062, "num_tokens": 6727649.0, "step": 3755 }, { "epoch": 0.608209861549672, "grad_norm": 32.914615631103516, "learning_rate": 3.920012953367876e-06, "loss": 0.7624, "mean_token_accuracy": 0.9007092118263245, "num_tokens": 6729443.0, "step": 3756 }, { "epoch": 0.6083717917577524, "grad_norm": 26.21811294555664, "learning_rate": 3.91839378238342e-06, "loss": 0.5819, "mean_token_accuracy": 0.9114106595516205, "num_tokens": 6731226.0, "step": 3757 }, { "epoch": 0.6085337219658328, "grad_norm": 23.061723709106445, "learning_rate": 3.916774611398964e-06, "loss": 0.5779, "mean_token_accuracy": 0.9187144339084625, "num_tokens": 6733021.0, "step": 3758 }, { "epoch": 0.6086956521739131, "grad_norm": 30.69961166381836, "learning_rate": 3.915155440414508e-06, "loss": 0.7285, "mean_token_accuracy": 0.9183647632598877, "num_tokens": 6734814.0, "step": 3759 }, { "epoch": 0.6088575823819934, "grad_norm": 25.092193603515625, "learning_rate": 3.913536269430052e-06, "loss": 0.6371, "mean_token_accuracy": 0.9091029465198517, "num_tokens": 6736601.0, "step": 3760 }, { "epoch": 0.6090195125900737, "grad_norm": 29.371593475341797, "learning_rate": 3.911917098445596e-06, "loss": 0.7075, "mean_token_accuracy": 0.9028784930706024, "num_tokens": 6738412.0, "step": 3761 }, { "epoch": 0.609181442798154, "grad_norm": 21.791093826293945, "learning_rate": 3.91029792746114e-06, "loss": 0.5906, "mean_token_accuracy": 0.9190140962600708, "num_tokens": 6740208.0, "step": 3762 }, { "epoch": 0.6093433730062343, "grad_norm": 22.272323608398438, "learning_rate": 3.908678756476684e-06, "loss": 0.6172, "mean_token_accuracy": 0.9109416007995605, "num_tokens": 6741990.0, "step": 3763 }, { "epoch": 0.6095053032143146, "grad_norm": 22.989259719848633, "learning_rate": 3.907059585492228e-06, "loss": 0.5625, "mean_token_accuracy": 0.9178784787654877, "num_tokens": 6743782.0, "step": 3764 }, { "epoch": 0.6096672334223949, "grad_norm": 23.15787696838379, "learning_rate": 3.905440414507773e-06, "loss": 0.5982, "mean_token_accuracy": 0.9171754717826843, "num_tokens": 6745560.0, "step": 3765 }, { "epoch": 0.6098291636304752, "grad_norm": 24.93471908569336, "learning_rate": 3.903821243523316e-06, "loss": 0.6987, "mean_token_accuracy": 0.9097852110862732, "num_tokens": 6747349.0, "step": 3766 }, { "epoch": 0.6099910938385555, "grad_norm": 15.27294921875, "learning_rate": 3.902202072538861e-06, "loss": 0.4945, "mean_token_accuracy": 0.9336365461349487, "num_tokens": 6749147.0, "step": 3767 }, { "epoch": 0.6101530240466359, "grad_norm": 19.41769027709961, "learning_rate": 3.900582901554404e-06, "loss": 0.5289, "mean_token_accuracy": 0.9269450306892395, "num_tokens": 6750933.0, "step": 3768 }, { "epoch": 0.6103149542547163, "grad_norm": 17.94277572631836, "learning_rate": 3.898963730569949e-06, "loss": 0.5441, "mean_token_accuracy": 0.9216800332069397, "num_tokens": 6752713.0, "step": 3769 }, { "epoch": 0.6104768844627966, "grad_norm": 20.330320358276367, "learning_rate": 3.897344559585492e-06, "loss": 0.5621, "mean_token_accuracy": 0.9091245830059052, "num_tokens": 6754500.0, "step": 3770 }, { "epoch": 0.6106388146708769, "grad_norm": 31.550535202026367, "learning_rate": 3.895725388601037e-06, "loss": 0.7151, "mean_token_accuracy": 0.9107434451580048, "num_tokens": 6756292.0, "step": 3771 }, { "epoch": 0.6108007448789572, "grad_norm": 29.74262809753418, "learning_rate": 3.89410621761658e-06, "loss": 0.7201, "mean_token_accuracy": 0.9045474529266357, "num_tokens": 6758075.0, "step": 3772 }, { "epoch": 0.6109626750870375, "grad_norm": 18.766494750976562, "learning_rate": 3.892487046632125e-06, "loss": 0.5046, "mean_token_accuracy": 0.9255318939685822, "num_tokens": 6759869.0, "step": 3773 }, { "epoch": 0.6111246052951178, "grad_norm": 30.482812881469727, "learning_rate": 3.890867875647668e-06, "loss": 0.6489, "mean_token_accuracy": 0.9046145975589752, "num_tokens": 6761662.0, "step": 3774 }, { "epoch": 0.6112865355031981, "grad_norm": 36.873958587646484, "learning_rate": 3.889248704663213e-06, "loss": 0.6974, "mean_token_accuracy": 0.889857143163681, "num_tokens": 6763455.0, "step": 3775 }, { "epoch": 0.6114484657112784, "grad_norm": 19.92529296875, "learning_rate": 3.8876295336787564e-06, "loss": 0.5501, "mean_token_accuracy": 0.9272717833518982, "num_tokens": 6765242.0, "step": 3776 }, { "epoch": 0.6116103959193587, "grad_norm": 22.154050827026367, "learning_rate": 3.886010362694301e-06, "loss": 0.5527, "mean_token_accuracy": 0.924217939376831, "num_tokens": 6767031.0, "step": 3777 }, { "epoch": 0.611772326127439, "grad_norm": 22.282154083251953, "learning_rate": 3.8843911917098445e-06, "loss": 0.5947, "mean_token_accuracy": 0.9162260293960571, "num_tokens": 6768829.0, "step": 3778 }, { "epoch": 0.6119342563355193, "grad_norm": 16.399154663085938, "learning_rate": 3.882772020725389e-06, "loss": 0.4504, "mean_token_accuracy": 0.9401785731315613, "num_tokens": 6770625.0, "step": 3779 }, { "epoch": 0.6120961865435998, "grad_norm": 30.0355281829834, "learning_rate": 3.8811528497409325e-06, "loss": 0.5614, "mean_token_accuracy": 0.9181813895702362, "num_tokens": 6772430.0, "step": 3780 }, { "epoch": 0.6122581167516801, "grad_norm": 21.679079055786133, "learning_rate": 3.879533678756477e-06, "loss": 0.5718, "mean_token_accuracy": 0.9293956160545349, "num_tokens": 6774225.0, "step": 3781 }, { "epoch": 0.6124200469597604, "grad_norm": 40.652687072753906, "learning_rate": 3.8779145077720205e-06, "loss": 1.0383, "mean_token_accuracy": 0.8844460844993591, "num_tokens": 6776023.0, "step": 3782 }, { "epoch": 0.6125819771678407, "grad_norm": 13.871591567993164, "learning_rate": 3.876295336787565e-06, "loss": 0.4154, "mean_token_accuracy": 0.9423837065696716, "num_tokens": 6777830.0, "step": 3783 }, { "epoch": 0.612743907375921, "grad_norm": 22.459936141967773, "learning_rate": 3.874676165803109e-06, "loss": 0.5932, "mean_token_accuracy": 0.9188725650310516, "num_tokens": 6779613.0, "step": 3784 }, { "epoch": 0.6129058375840013, "grad_norm": 31.759794235229492, "learning_rate": 3.873056994818653e-06, "loss": 0.6472, "mean_token_accuracy": 0.9177459180355072, "num_tokens": 6781404.0, "step": 3785 }, { "epoch": 0.6130677677920816, "grad_norm": 21.124162673950195, "learning_rate": 3.871437823834197e-06, "loss": 0.5099, "mean_token_accuracy": 0.9199119508266449, "num_tokens": 6783202.0, "step": 3786 }, { "epoch": 0.6132296980001619, "grad_norm": 31.745376586914062, "learning_rate": 3.869818652849741e-06, "loss": 0.7684, "mean_token_accuracy": 0.9053639471530914, "num_tokens": 6784989.0, "step": 3787 }, { "epoch": 0.6133916282082422, "grad_norm": 26.238304138183594, "learning_rate": 3.8681994818652854e-06, "loss": 0.6572, "mean_token_accuracy": 0.9134325683116913, "num_tokens": 6786788.0, "step": 3788 }, { "epoch": 0.6135535584163225, "grad_norm": 32.47900390625, "learning_rate": 3.866580310880829e-06, "loss": 0.571, "mean_token_accuracy": 0.9181869029998779, "num_tokens": 6788581.0, "step": 3789 }, { "epoch": 0.6137154886244028, "grad_norm": 23.9670352935791, "learning_rate": 3.8649611398963735e-06, "loss": 0.5882, "mean_token_accuracy": 0.9187581241130829, "num_tokens": 6790376.0, "step": 3790 }, { "epoch": 0.6138774188324831, "grad_norm": 27.3259220123291, "learning_rate": 3.863341968911917e-06, "loss": 0.62, "mean_token_accuracy": 0.9175078868865967, "num_tokens": 6792166.0, "step": 3791 }, { "epoch": 0.6140393490405636, "grad_norm": 21.81146812438965, "learning_rate": 3.8617227979274615e-06, "loss": 0.547, "mean_token_accuracy": 0.9223888218402863, "num_tokens": 6793961.0, "step": 3792 }, { "epoch": 0.6142012792486439, "grad_norm": 23.613964080810547, "learning_rate": 3.860103626943005e-06, "loss": 0.5144, "mean_token_accuracy": 0.9290209114551544, "num_tokens": 6795755.0, "step": 3793 }, { "epoch": 0.6143632094567242, "grad_norm": 25.392786026000977, "learning_rate": 3.8584844559585495e-06, "loss": 0.53, "mean_token_accuracy": 0.9244921207427979, "num_tokens": 6797545.0, "step": 3794 }, { "epoch": 0.6145251396648045, "grad_norm": 32.05488204956055, "learning_rate": 3.856865284974093e-06, "loss": 0.5741, "mean_token_accuracy": 0.9196043908596039, "num_tokens": 6799343.0, "step": 3795 }, { "epoch": 0.6146870698728848, "grad_norm": 20.420400619506836, "learning_rate": 3.8552461139896376e-06, "loss": 0.4936, "mean_token_accuracy": 0.925607442855835, "num_tokens": 6801138.0, "step": 3796 }, { "epoch": 0.6148490000809651, "grad_norm": 30.513477325439453, "learning_rate": 3.853626943005181e-06, "loss": 0.7329, "mean_token_accuracy": 0.9052910208702087, "num_tokens": 6802925.0, "step": 3797 }, { "epoch": 0.6150109302890454, "grad_norm": 28.486637115478516, "learning_rate": 3.852007772020726e-06, "loss": 0.603, "mean_token_accuracy": 0.9122854769229889, "num_tokens": 6804722.0, "step": 3798 }, { "epoch": 0.6151728604971257, "grad_norm": 30.109798431396484, "learning_rate": 3.850388601036269e-06, "loss": 0.8165, "mean_token_accuracy": 0.9060952067375183, "num_tokens": 6806521.0, "step": 3799 }, { "epoch": 0.615334790705206, "grad_norm": 18.956100463867188, "learning_rate": 3.848769430051814e-06, "loss": 0.4939, "mean_token_accuracy": 0.9273809492588043, "num_tokens": 6808308.0, "step": 3800 }, { "epoch": 0.6154967209132863, "grad_norm": 26.668197631835938, "learning_rate": 3.847150259067358e-06, "loss": 0.5909, "mean_token_accuracy": 0.9249196350574493, "num_tokens": 6810100.0, "step": 3801 }, { "epoch": 0.6156586511213666, "grad_norm": 24.940509796142578, "learning_rate": 3.845531088082902e-06, "loss": 0.6047, "mean_token_accuracy": 0.9209931194782257, "num_tokens": 6811891.0, "step": 3802 }, { "epoch": 0.6158205813294471, "grad_norm": 30.22850799560547, "learning_rate": 3.843911917098446e-06, "loss": 0.6112, "mean_token_accuracy": 0.917723149061203, "num_tokens": 6813684.0, "step": 3803 }, { "epoch": 0.6159825115375274, "grad_norm": 27.589496612548828, "learning_rate": 3.84229274611399e-06, "loss": 0.6488, "mean_token_accuracy": 0.9193262457847595, "num_tokens": 6815481.0, "step": 3804 }, { "epoch": 0.6161444417456077, "grad_norm": 27.943086624145508, "learning_rate": 3.840673575129534e-06, "loss": 0.6977, "mean_token_accuracy": 0.9083566069602966, "num_tokens": 6817266.0, "step": 3805 }, { "epoch": 0.616306371953688, "grad_norm": 19.584903717041016, "learning_rate": 3.839054404145078e-06, "loss": 0.5865, "mean_token_accuracy": 0.9224945604801178, "num_tokens": 6819049.0, "step": 3806 }, { "epoch": 0.6164683021617683, "grad_norm": 20.051246643066406, "learning_rate": 3.837435233160622e-06, "loss": 0.4735, "mean_token_accuracy": 0.9391339719295502, "num_tokens": 6820841.0, "step": 3807 }, { "epoch": 0.6166302323698486, "grad_norm": 15.651829719543457, "learning_rate": 3.835816062176166e-06, "loss": 0.5039, "mean_token_accuracy": 0.9247439801692963, "num_tokens": 6822619.0, "step": 3808 }, { "epoch": 0.6167921625779289, "grad_norm": 30.62786865234375, "learning_rate": 3.83419689119171e-06, "loss": 0.6881, "mean_token_accuracy": 0.9024867117404938, "num_tokens": 6824408.0, "step": 3809 }, { "epoch": 0.6169540927860092, "grad_norm": 28.820493698120117, "learning_rate": 3.832577720207254e-06, "loss": 0.573, "mean_token_accuracy": 0.9138931930065155, "num_tokens": 6826208.0, "step": 3810 }, { "epoch": 0.6171160229940895, "grad_norm": 21.29700469970703, "learning_rate": 3.830958549222798e-06, "loss": 0.5448, "mean_token_accuracy": 0.9277743101119995, "num_tokens": 6827997.0, "step": 3811 }, { "epoch": 0.6172779532021698, "grad_norm": 29.305561065673828, "learning_rate": 3.829339378238342e-06, "loss": 0.5409, "mean_token_accuracy": 0.9260841310024261, "num_tokens": 6829792.0, "step": 3812 }, { "epoch": 0.6174398834102501, "grad_norm": 27.338274002075195, "learning_rate": 3.827720207253886e-06, "loss": 0.5942, "mean_token_accuracy": 0.9142078757286072, "num_tokens": 6831583.0, "step": 3813 }, { "epoch": 0.6176018136183306, "grad_norm": 27.450578689575195, "learning_rate": 3.826101036269431e-06, "loss": 0.5322, "mean_token_accuracy": 0.9161375463008881, "num_tokens": 6833370.0, "step": 3814 }, { "epoch": 0.6177637438264109, "grad_norm": 30.09908103942871, "learning_rate": 3.824481865284975e-06, "loss": 0.6357, "mean_token_accuracy": 0.9142303168773651, "num_tokens": 6835162.0, "step": 3815 }, { "epoch": 0.6179256740344912, "grad_norm": 34.357730865478516, "learning_rate": 3.822862694300519e-06, "loss": 0.6996, "mean_token_accuracy": 0.916083574295044, "num_tokens": 6836959.0, "step": 3816 }, { "epoch": 0.6180876042425715, "grad_norm": 27.420150756835938, "learning_rate": 3.821243523316063e-06, "loss": 0.6842, "mean_token_accuracy": 0.8966230750083923, "num_tokens": 6838742.0, "step": 3817 }, { "epoch": 0.6182495344506518, "grad_norm": 23.396629333496094, "learning_rate": 3.819624352331607e-06, "loss": 0.5521, "mean_token_accuracy": 0.9242894947528839, "num_tokens": 6840532.0, "step": 3818 }, { "epoch": 0.6184114646587321, "grad_norm": 26.299522399902344, "learning_rate": 3.818005181347151e-06, "loss": 0.6181, "mean_token_accuracy": 0.9216232597827911, "num_tokens": 6842325.0, "step": 3819 }, { "epoch": 0.6185733948668124, "grad_norm": 27.156272888183594, "learning_rate": 3.816386010362695e-06, "loss": 0.6697, "mean_token_accuracy": 0.903769850730896, "num_tokens": 6844128.0, "step": 3820 }, { "epoch": 0.6187353250748927, "grad_norm": 22.009973526000977, "learning_rate": 3.814766839378239e-06, "loss": 0.5486, "mean_token_accuracy": 0.9231078922748566, "num_tokens": 6845913.0, "step": 3821 }, { "epoch": 0.618897255282973, "grad_norm": 26.79131507873535, "learning_rate": 3.8131476683937827e-06, "loss": 0.63, "mean_token_accuracy": 0.9200210571289062, "num_tokens": 6847701.0, "step": 3822 }, { "epoch": 0.6190591854910533, "grad_norm": 18.714893341064453, "learning_rate": 3.8115284974093268e-06, "loss": 0.5246, "mean_token_accuracy": 0.9271561801433563, "num_tokens": 6849488.0, "step": 3823 }, { "epoch": 0.6192211156991336, "grad_norm": 33.2025260925293, "learning_rate": 3.809909326424871e-06, "loss": 0.6857, "mean_token_accuracy": 0.9107142686843872, "num_tokens": 6851280.0, "step": 3824 }, { "epoch": 0.6193830459072139, "grad_norm": 11.177903175354004, "learning_rate": 3.808290155440415e-06, "loss": 0.4367, "mean_token_accuracy": 0.9370370507240295, "num_tokens": 6853062.0, "step": 3825 }, { "epoch": 0.6195449761152944, "grad_norm": 37.468692779541016, "learning_rate": 3.8066709844559592e-06, "loss": 0.6632, "mean_token_accuracy": 0.9169968664646149, "num_tokens": 6854851.0, "step": 3826 }, { "epoch": 0.6197069063233747, "grad_norm": 24.48048973083496, "learning_rate": 3.8050518134715032e-06, "loss": 0.6075, "mean_token_accuracy": 0.9166666567325592, "num_tokens": 6856648.0, "step": 3827 }, { "epoch": 0.619868836531455, "grad_norm": 50.5008544921875, "learning_rate": 3.8034326424870472e-06, "loss": 0.9816, "mean_token_accuracy": 0.8976364433765411, "num_tokens": 6858444.0, "step": 3828 }, { "epoch": 0.6200307667395353, "grad_norm": 26.1619815826416, "learning_rate": 3.8018134715025913e-06, "loss": 0.5247, "mean_token_accuracy": 0.925452709197998, "num_tokens": 6860238.0, "step": 3829 }, { "epoch": 0.6201926969476156, "grad_norm": 26.874610900878906, "learning_rate": 3.8001943005181353e-06, "loss": 0.5905, "mean_token_accuracy": 0.9128559231758118, "num_tokens": 6862025.0, "step": 3830 }, { "epoch": 0.6203546271556959, "grad_norm": 30.82623863220215, "learning_rate": 3.7985751295336793e-06, "loss": 0.6604, "mean_token_accuracy": 0.9100041687488556, "num_tokens": 6863814.0, "step": 3831 }, { "epoch": 0.6205165573637762, "grad_norm": 26.21592903137207, "learning_rate": 3.7969559585492233e-06, "loss": 0.763, "mean_token_accuracy": 0.9129201769828796, "num_tokens": 6865602.0, "step": 3832 }, { "epoch": 0.6206784875718565, "grad_norm": 23.0000057220459, "learning_rate": 3.7953367875647673e-06, "loss": 0.5184, "mean_token_accuracy": 0.9192083179950714, "num_tokens": 6867386.0, "step": 3833 }, { "epoch": 0.6208404177799368, "grad_norm": 28.99321174621582, "learning_rate": 3.7937176165803113e-06, "loss": 0.6856, "mean_token_accuracy": 0.9027210772037506, "num_tokens": 6869185.0, "step": 3834 }, { "epoch": 0.6210023479880171, "grad_norm": 27.852075576782227, "learning_rate": 3.7920984455958553e-06, "loss": 0.6399, "mean_token_accuracy": 0.9103163778781891, "num_tokens": 6870965.0, "step": 3835 }, { "epoch": 0.6211642781960974, "grad_norm": 27.417694091796875, "learning_rate": 3.7904792746113993e-06, "loss": 0.5679, "mean_token_accuracy": 0.918163388967514, "num_tokens": 6872758.0, "step": 3836 }, { "epoch": 0.6213262084041778, "grad_norm": 32.20460510253906, "learning_rate": 3.7888601036269434e-06, "loss": 0.6857, "mean_token_accuracy": 0.9036674499511719, "num_tokens": 6874539.0, "step": 3837 }, { "epoch": 0.6214881386122582, "grad_norm": 25.17333221435547, "learning_rate": 3.7872409326424874e-06, "loss": 0.6015, "mean_token_accuracy": 0.9170294404029846, "num_tokens": 6876329.0, "step": 3838 }, { "epoch": 0.6216500688203385, "grad_norm": 36.569908142089844, "learning_rate": 3.7856217616580314e-06, "loss": 0.7854, "mean_token_accuracy": 0.9045454561710358, "num_tokens": 6878124.0, "step": 3839 }, { "epoch": 0.6218119990284188, "grad_norm": 25.838119506835938, "learning_rate": 3.7840025906735754e-06, "loss": 0.6243, "mean_token_accuracy": 0.9171971678733826, "num_tokens": 6879937.0, "step": 3840 }, { "epoch": 0.6219739292364991, "grad_norm": 38.13341522216797, "learning_rate": 3.7823834196891194e-06, "loss": 0.7494, "mean_token_accuracy": 0.8968901038169861, "num_tokens": 6881731.0, "step": 3841 }, { "epoch": 0.6221358594445794, "grad_norm": 37.35586166381836, "learning_rate": 3.7807642487046634e-06, "loss": 1.0192, "mean_token_accuracy": 0.8850767314434052, "num_tokens": 6883530.0, "step": 3842 }, { "epoch": 0.6222977896526597, "grad_norm": 26.050010681152344, "learning_rate": 3.779145077720208e-06, "loss": 0.6005, "mean_token_accuracy": 0.9114651679992676, "num_tokens": 6885313.0, "step": 3843 }, { "epoch": 0.62245971986074, "grad_norm": 20.554168701171875, "learning_rate": 3.777525906735752e-06, "loss": 0.5177, "mean_token_accuracy": 0.9287814497947693, "num_tokens": 6887106.0, "step": 3844 }, { "epoch": 0.6226216500688203, "grad_norm": 35.419403076171875, "learning_rate": 3.775906735751296e-06, "loss": 0.8893, "mean_token_accuracy": 0.8914404511451721, "num_tokens": 6888904.0, "step": 3845 }, { "epoch": 0.6227835802769006, "grad_norm": 32.76790237426758, "learning_rate": 3.77428756476684e-06, "loss": 0.6952, "mean_token_accuracy": 0.8984722197055817, "num_tokens": 6890710.0, "step": 3846 }, { "epoch": 0.6229455104849809, "grad_norm": 34.707923889160156, "learning_rate": 3.772668393782384e-06, "loss": 0.7324, "mean_token_accuracy": 0.8991894721984863, "num_tokens": 6892510.0, "step": 3847 }, { "epoch": 0.6231074406930613, "grad_norm": 17.059738159179688, "learning_rate": 3.771049222797928e-06, "loss": 0.472, "mean_token_accuracy": 0.9340969324111938, "num_tokens": 6894310.0, "step": 3848 }, { "epoch": 0.6232693709011417, "grad_norm": 24.240140914916992, "learning_rate": 3.769430051813472e-06, "loss": 0.6523, "mean_token_accuracy": 0.9148893356323242, "num_tokens": 6896104.0, "step": 3849 }, { "epoch": 0.623431301109222, "grad_norm": 27.741924285888672, "learning_rate": 3.767810880829016e-06, "loss": 0.7679, "mean_token_accuracy": 0.9082667231559753, "num_tokens": 6897899.0, "step": 3850 }, { "epoch": 0.6235932313173023, "grad_norm": 36.087921142578125, "learning_rate": 3.76619170984456e-06, "loss": 0.8804, "mean_token_accuracy": 0.9043117463588715, "num_tokens": 6899694.0, "step": 3851 }, { "epoch": 0.6237551615253826, "grad_norm": 34.343379974365234, "learning_rate": 3.764572538860104e-06, "loss": 0.7528, "mean_token_accuracy": 0.905139833688736, "num_tokens": 6901488.0, "step": 3852 }, { "epoch": 0.6239170917334629, "grad_norm": 19.885026931762695, "learning_rate": 3.762953367875648e-06, "loss": 0.6097, "mean_token_accuracy": 0.919117659330368, "num_tokens": 6903272.0, "step": 3853 }, { "epoch": 0.6240790219415432, "grad_norm": 37.40237045288086, "learning_rate": 3.761334196891192e-06, "loss": 0.8988, "mean_token_accuracy": 0.885185182094574, "num_tokens": 6905063.0, "step": 3854 }, { "epoch": 0.6242409521496235, "grad_norm": 17.243078231811523, "learning_rate": 3.759715025906736e-06, "loss": 0.4941, "mean_token_accuracy": 0.9273434281349182, "num_tokens": 6906850.0, "step": 3855 }, { "epoch": 0.6244028823577038, "grad_norm": 28.773216247558594, "learning_rate": 3.75809585492228e-06, "loss": 0.5792, "mean_token_accuracy": 0.9146616458892822, "num_tokens": 6908654.0, "step": 3856 }, { "epoch": 0.6245648125657841, "grad_norm": 22.9847354888916, "learning_rate": 3.756476683937824e-06, "loss": 0.5481, "mean_token_accuracy": 0.9183006584644318, "num_tokens": 6910446.0, "step": 3857 }, { "epoch": 0.6247267427738644, "grad_norm": 16.167688369750977, "learning_rate": 3.754857512953368e-06, "loss": 0.4918, "mean_token_accuracy": 0.9342130422592163, "num_tokens": 6912247.0, "step": 3858 }, { "epoch": 0.6248886729819448, "grad_norm": 19.28745460510254, "learning_rate": 3.753238341968912e-06, "loss": 0.5388, "mean_token_accuracy": 0.9184104800224304, "num_tokens": 6914041.0, "step": 3859 }, { "epoch": 0.6250506031900251, "grad_norm": 21.27528190612793, "learning_rate": 3.751619170984456e-06, "loss": 0.5133, "mean_token_accuracy": 0.9240615367889404, "num_tokens": 6915830.0, "step": 3860 }, { "epoch": 0.6252125333981055, "grad_norm": 22.08339500427246, "learning_rate": 3.7500000000000005e-06, "loss": 0.5484, "mean_token_accuracy": 0.9166505932807922, "num_tokens": 6917630.0, "step": 3861 }, { "epoch": 0.6253744636061858, "grad_norm": 28.899700164794922, "learning_rate": 3.7483808290155445e-06, "loss": 0.5946, "mean_token_accuracy": 0.9172877967357635, "num_tokens": 6919420.0, "step": 3862 }, { "epoch": 0.6255363938142661, "grad_norm": 28.444189071655273, "learning_rate": 3.7467616580310885e-06, "loss": 0.5929, "mean_token_accuracy": 0.9182111024856567, "num_tokens": 6921214.0, "step": 3863 }, { "epoch": 0.6256983240223464, "grad_norm": 33.272071838378906, "learning_rate": 3.7451424870466326e-06, "loss": 0.7393, "mean_token_accuracy": 0.9063766002655029, "num_tokens": 6923014.0, "step": 3864 }, { "epoch": 0.6258602542304267, "grad_norm": 33.116668701171875, "learning_rate": 3.7435233160621766e-06, "loss": 0.9162, "mean_token_accuracy": 0.9050492644309998, "num_tokens": 6924811.0, "step": 3865 }, { "epoch": 0.626022184438507, "grad_norm": 20.595605850219727, "learning_rate": 3.7419041450777206e-06, "loss": 0.608, "mean_token_accuracy": 0.9046897888183594, "num_tokens": 6926595.0, "step": 3866 }, { "epoch": 0.6261841146465873, "grad_norm": 24.082242965698242, "learning_rate": 3.7402849740932646e-06, "loss": 0.598, "mean_token_accuracy": 0.916402131319046, "num_tokens": 6928382.0, "step": 3867 }, { "epoch": 0.6263460448546676, "grad_norm": 29.156993865966797, "learning_rate": 3.7386658031088086e-06, "loss": 0.6996, "mean_token_accuracy": 0.9047702252864838, "num_tokens": 6930177.0, "step": 3868 }, { "epoch": 0.6265079750627479, "grad_norm": 29.050241470336914, "learning_rate": 3.7370466321243526e-06, "loss": 0.6635, "mean_token_accuracy": 0.9064671695232391, "num_tokens": 6931977.0, "step": 3869 }, { "epoch": 0.6266699052708282, "grad_norm": 23.61697769165039, "learning_rate": 3.7354274611398966e-06, "loss": 0.5875, "mean_token_accuracy": 0.9177428483963013, "num_tokens": 6933769.0, "step": 3870 }, { "epoch": 0.6268318354789086, "grad_norm": 24.783767700195312, "learning_rate": 3.7338082901554406e-06, "loss": 0.6391, "mean_token_accuracy": 0.9117632210254669, "num_tokens": 6935564.0, "step": 3871 }, { "epoch": 0.626993765686989, "grad_norm": 24.826244354248047, "learning_rate": 3.7321891191709847e-06, "loss": 0.591, "mean_token_accuracy": 0.9235645532608032, "num_tokens": 6937363.0, "step": 3872 }, { "epoch": 0.6271556958950693, "grad_norm": 30.99048614501953, "learning_rate": 3.7305699481865287e-06, "loss": 0.7138, "mean_token_accuracy": 0.9094326794147491, "num_tokens": 6939151.0, "step": 3873 }, { "epoch": 0.6273176261031496, "grad_norm": 23.033340454101562, "learning_rate": 3.7289507772020727e-06, "loss": 0.606, "mean_token_accuracy": 0.9244449734687805, "num_tokens": 6940954.0, "step": 3874 }, { "epoch": 0.6274795563112299, "grad_norm": 27.47634506225586, "learning_rate": 3.7273316062176167e-06, "loss": 0.6578, "mean_token_accuracy": 0.9157631993293762, "num_tokens": 6942739.0, "step": 3875 }, { "epoch": 0.6276414865193102, "grad_norm": 14.449963569641113, "learning_rate": 3.7257124352331607e-06, "loss": 0.5368, "mean_token_accuracy": 0.9285714328289032, "num_tokens": 6944517.0, "step": 3876 }, { "epoch": 0.6278034167273905, "grad_norm": 23.790767669677734, "learning_rate": 3.7240932642487047e-06, "loss": 0.5338, "mean_token_accuracy": 0.9184397161006927, "num_tokens": 6946311.0, "step": 3877 }, { "epoch": 0.6279653469354708, "grad_norm": 33.56352996826172, "learning_rate": 3.7224740932642487e-06, "loss": 0.5983, "mean_token_accuracy": 0.9181795120239258, "num_tokens": 6948104.0, "step": 3878 }, { "epoch": 0.6281272771435511, "grad_norm": 22.858064651489258, "learning_rate": 3.7208549222797928e-06, "loss": 0.5906, "mean_token_accuracy": 0.9177290201187134, "num_tokens": 6949895.0, "step": 3879 }, { "epoch": 0.6282892073516314, "grad_norm": 15.735260963439941, "learning_rate": 3.719235751295337e-06, "loss": 0.5132, "mean_token_accuracy": 0.9262527227401733, "num_tokens": 6951678.0, "step": 3880 }, { "epoch": 0.6284511375597117, "grad_norm": 30.318920135498047, "learning_rate": 3.717616580310881e-06, "loss": 0.9192, "mean_token_accuracy": 0.9026936292648315, "num_tokens": 6953468.0, "step": 3881 }, { "epoch": 0.6286130677677921, "grad_norm": 30.7633056640625, "learning_rate": 3.7159974093264252e-06, "loss": 0.6508, "mean_token_accuracy": 0.9145810008049011, "num_tokens": 6955261.0, "step": 3882 }, { "epoch": 0.6287749979758724, "grad_norm": 17.587066650390625, "learning_rate": 3.7143782383419692e-06, "loss": 0.5122, "mean_token_accuracy": 0.9282888174057007, "num_tokens": 6957052.0, "step": 3883 }, { "epoch": 0.6289369281839527, "grad_norm": 31.08521842956543, "learning_rate": 3.7127590673575132e-06, "loss": 0.7563, "mean_token_accuracy": 0.9106450378894806, "num_tokens": 6958844.0, "step": 3884 }, { "epoch": 0.6290988583920331, "grad_norm": 22.40519142150879, "learning_rate": 3.7111398963730573e-06, "loss": 0.5317, "mean_token_accuracy": 0.9297545254230499, "num_tokens": 6960641.0, "step": 3885 }, { "epoch": 0.6292607886001134, "grad_norm": 18.38348388671875, "learning_rate": 3.7095207253886013e-06, "loss": 0.5503, "mean_token_accuracy": 0.9253731369972229, "num_tokens": 6962421.0, "step": 3886 }, { "epoch": 0.6294227188081937, "grad_norm": 35.30379867553711, "learning_rate": 3.7079015544041453e-06, "loss": 0.7557, "mean_token_accuracy": 0.8851491510868073, "num_tokens": 6964211.0, "step": 3887 }, { "epoch": 0.629584649016274, "grad_norm": 24.300065994262695, "learning_rate": 3.7062823834196893e-06, "loss": 0.6381, "mean_token_accuracy": 0.907021552324295, "num_tokens": 6966013.0, "step": 3888 }, { "epoch": 0.6297465792243543, "grad_norm": 31.942567825317383, "learning_rate": 3.7046632124352333e-06, "loss": 0.573, "mean_token_accuracy": 0.9090595841407776, "num_tokens": 6967800.0, "step": 3889 }, { "epoch": 0.6299085094324346, "grad_norm": 34.48097610473633, "learning_rate": 3.7030440414507773e-06, "loss": 0.74, "mean_token_accuracy": 0.9079427421092987, "num_tokens": 6969601.0, "step": 3890 }, { "epoch": 0.6300704396405149, "grad_norm": 29.454008102416992, "learning_rate": 3.7014248704663213e-06, "loss": 0.722, "mean_token_accuracy": 0.9096866250038147, "num_tokens": 6971391.0, "step": 3891 }, { "epoch": 0.6302323698485952, "grad_norm": 25.57354164123535, "learning_rate": 3.6998056994818653e-06, "loss": 0.6581, "mean_token_accuracy": 0.9094942808151245, "num_tokens": 6973179.0, "step": 3892 }, { "epoch": 0.6303943000566756, "grad_norm": 17.95140838623047, "learning_rate": 3.6981865284974094e-06, "loss": 0.507, "mean_token_accuracy": 0.9291283786296844, "num_tokens": 6974959.0, "step": 3893 }, { "epoch": 0.6305562302647559, "grad_norm": 28.256980895996094, "learning_rate": 3.6965673575129534e-06, "loss": 0.6552, "mean_token_accuracy": 0.9074419438838959, "num_tokens": 6976741.0, "step": 3894 }, { "epoch": 0.6307181604728362, "grad_norm": 31.554920196533203, "learning_rate": 3.6949481865284974e-06, "loss": 0.8858, "mean_token_accuracy": 0.8919501602649689, "num_tokens": 6978529.0, "step": 3895 }, { "epoch": 0.6308800906809165, "grad_norm": 26.896007537841797, "learning_rate": 3.6933290155440414e-06, "loss": 0.5971, "mean_token_accuracy": 0.9038753807544708, "num_tokens": 6980322.0, "step": 3896 }, { "epoch": 0.6310420208889969, "grad_norm": 26.34307289123535, "learning_rate": 3.6917098445595854e-06, "loss": 0.5787, "mean_token_accuracy": 0.9217216372489929, "num_tokens": 6982116.0, "step": 3897 }, { "epoch": 0.6312039510970772, "grad_norm": 25.917030334472656, "learning_rate": 3.6900906735751294e-06, "loss": 0.6013, "mean_token_accuracy": 0.9157635569572449, "num_tokens": 6983913.0, "step": 3898 }, { "epoch": 0.6313658813051575, "grad_norm": 28.111858367919922, "learning_rate": 3.688471502590674e-06, "loss": 0.6805, "mean_token_accuracy": 0.9156745970249176, "num_tokens": 6985709.0, "step": 3899 }, { "epoch": 0.6315278115132378, "grad_norm": 20.250635147094727, "learning_rate": 3.686852331606218e-06, "loss": 0.5391, "mean_token_accuracy": 0.9263465404510498, "num_tokens": 6987493.0, "step": 3900 }, { "epoch": 0.6316897417213181, "grad_norm": 18.9285945892334, "learning_rate": 3.685233160621762e-06, "loss": 0.6869, "mean_token_accuracy": 0.9077706634998322, "num_tokens": 6989286.0, "step": 3901 }, { "epoch": 0.6318516719293984, "grad_norm": 21.333555221557617, "learning_rate": 3.683613989637306e-06, "loss": 0.6838, "mean_token_accuracy": 0.9204900860786438, "num_tokens": 6991075.0, "step": 3902 }, { "epoch": 0.6320136021374787, "grad_norm": 21.34879493713379, "learning_rate": 3.68199481865285e-06, "loss": 0.5428, "mean_token_accuracy": 0.9295739829540253, "num_tokens": 6992871.0, "step": 3903 }, { "epoch": 0.632175532345559, "grad_norm": 24.16707420349121, "learning_rate": 3.680375647668394e-06, "loss": 0.5636, "mean_token_accuracy": 0.913907915353775, "num_tokens": 6994673.0, "step": 3904 }, { "epoch": 0.6323374625536394, "grad_norm": 19.35509490966797, "learning_rate": 3.678756476683938e-06, "loss": 0.5073, "mean_token_accuracy": 0.9265811145305634, "num_tokens": 6996458.0, "step": 3905 }, { "epoch": 0.6324993927617197, "grad_norm": 29.268035888671875, "learning_rate": 3.677137305699482e-06, "loss": 0.6165, "mean_token_accuracy": 0.9124059975147247, "num_tokens": 6998243.0, "step": 3906 }, { "epoch": 0.6326613229698, "grad_norm": 28.522531509399414, "learning_rate": 3.675518134715026e-06, "loss": 0.5831, "mean_token_accuracy": 0.9235521256923676, "num_tokens": 7000043.0, "step": 3907 }, { "epoch": 0.6328232531778804, "grad_norm": 24.959781646728516, "learning_rate": 3.67389896373057e-06, "loss": 0.6096, "mean_token_accuracy": 0.9117303192615509, "num_tokens": 7001836.0, "step": 3908 }, { "epoch": 0.6329851833859607, "grad_norm": 30.961679458618164, "learning_rate": 3.672279792746114e-06, "loss": 0.7134, "mean_token_accuracy": 0.9039416313171387, "num_tokens": 7003629.0, "step": 3909 }, { "epoch": 0.633147113594041, "grad_norm": 30.942033767700195, "learning_rate": 3.6706606217616584e-06, "loss": 0.5916, "mean_token_accuracy": 0.9170055389404297, "num_tokens": 7005418.0, "step": 3910 }, { "epoch": 0.6333090438021213, "grad_norm": 29.44158935546875, "learning_rate": 3.6690414507772024e-06, "loss": 0.6526, "mean_token_accuracy": 0.9149852097034454, "num_tokens": 7007224.0, "step": 3911 }, { "epoch": 0.6334709740102016, "grad_norm": 19.448265075683594, "learning_rate": 3.667422279792747e-06, "loss": 0.5053, "mean_token_accuracy": 0.9271235466003418, "num_tokens": 7009024.0, "step": 3912 }, { "epoch": 0.6336329042182819, "grad_norm": 26.805383682250977, "learning_rate": 3.665803108808291e-06, "loss": 0.5593, "mean_token_accuracy": 0.9213188886642456, "num_tokens": 7010818.0, "step": 3913 }, { "epoch": 0.6337948344263622, "grad_norm": 27.27880096435547, "learning_rate": 3.664183937823835e-06, "loss": 0.6715, "mean_token_accuracy": 0.9093508422374725, "num_tokens": 7012605.0, "step": 3914 }, { "epoch": 0.6339567646344425, "grad_norm": 34.75452423095703, "learning_rate": 3.662564766839379e-06, "loss": 0.796, "mean_token_accuracy": 0.9056878387928009, "num_tokens": 7014392.0, "step": 3915 }, { "epoch": 0.6341186948425229, "grad_norm": 41.40286636352539, "learning_rate": 3.660945595854923e-06, "loss": 0.84, "mean_token_accuracy": 0.8972837030887604, "num_tokens": 7016186.0, "step": 3916 }, { "epoch": 0.6342806250506032, "grad_norm": 29.32868766784668, "learning_rate": 3.659326424870467e-06, "loss": 0.718, "mean_token_accuracy": 0.9085586667060852, "num_tokens": 7017982.0, "step": 3917 }, { "epoch": 0.6344425552586835, "grad_norm": 18.295459747314453, "learning_rate": 3.657707253886011e-06, "loss": 0.4841, "mean_token_accuracy": 0.9259873926639557, "num_tokens": 7019763.0, "step": 3918 }, { "epoch": 0.6346044854667638, "grad_norm": 30.088726043701172, "learning_rate": 3.656088082901555e-06, "loss": 0.563, "mean_token_accuracy": 0.9142156839370728, "num_tokens": 7021555.0, "step": 3919 }, { "epoch": 0.6347664156748442, "grad_norm": 34.729549407958984, "learning_rate": 3.654468911917099e-06, "loss": 0.6942, "mean_token_accuracy": 0.9172413945198059, "num_tokens": 7023357.0, "step": 3920 }, { "epoch": 0.6349283458829245, "grad_norm": 36.46147537231445, "learning_rate": 3.652849740932643e-06, "loss": 0.857, "mean_token_accuracy": 0.8920139968395233, "num_tokens": 7025156.0, "step": 3921 }, { "epoch": 0.6350902760910048, "grad_norm": 26.019275665283203, "learning_rate": 3.651230569948187e-06, "loss": 0.571, "mean_token_accuracy": 0.9231078922748566, "num_tokens": 7026941.0, "step": 3922 }, { "epoch": 0.6352522062990851, "grad_norm": 15.746135711669922, "learning_rate": 3.649611398963731e-06, "loss": 0.4975, "mean_token_accuracy": 0.9258066415786743, "num_tokens": 7028736.0, "step": 3923 }, { "epoch": 0.6354141365071654, "grad_norm": 24.64112091064453, "learning_rate": 3.647992227979275e-06, "loss": 0.7046, "mean_token_accuracy": 0.9069499373435974, "num_tokens": 7030518.0, "step": 3924 }, { "epoch": 0.6355760667152457, "grad_norm": 29.56614112854004, "learning_rate": 3.646373056994819e-06, "loss": 0.6407, "mean_token_accuracy": 0.9186462461948395, "num_tokens": 7032310.0, "step": 3925 }, { "epoch": 0.635737996923326, "grad_norm": 25.114641189575195, "learning_rate": 3.644753886010363e-06, "loss": 0.5943, "mean_token_accuracy": 0.9129549264907837, "num_tokens": 7034097.0, "step": 3926 }, { "epoch": 0.6358999271314064, "grad_norm": 27.614503860473633, "learning_rate": 3.643134715025907e-06, "loss": 0.6117, "mean_token_accuracy": 0.9077828824520111, "num_tokens": 7035880.0, "step": 3927 }, { "epoch": 0.6360618573394867, "grad_norm": 33.71192169189453, "learning_rate": 3.641515544041451e-06, "loss": 0.659, "mean_token_accuracy": 0.9014345109462738, "num_tokens": 7037677.0, "step": 3928 }, { "epoch": 0.636223787547567, "grad_norm": 37.18674850463867, "learning_rate": 3.639896373056995e-06, "loss": 0.8867, "mean_token_accuracy": 0.9086325168609619, "num_tokens": 7039471.0, "step": 3929 }, { "epoch": 0.6363857177556473, "grad_norm": 27.030799865722656, "learning_rate": 3.638277202072539e-06, "loss": 0.5729, "mean_token_accuracy": 0.9227983057498932, "num_tokens": 7041267.0, "step": 3930 }, { "epoch": 0.6365476479637276, "grad_norm": 34.76551055908203, "learning_rate": 3.6366580310880836e-06, "loss": 0.6748, "mean_token_accuracy": 0.9083834290504456, "num_tokens": 7043052.0, "step": 3931 }, { "epoch": 0.636709578171808, "grad_norm": 28.928834915161133, "learning_rate": 3.6350388601036276e-06, "loss": 0.5537, "mean_token_accuracy": 0.9145744144916534, "num_tokens": 7044844.0, "step": 3932 }, { "epoch": 0.6368715083798883, "grad_norm": 26.747303009033203, "learning_rate": 3.6334196891191716e-06, "loss": 0.585, "mean_token_accuracy": 0.9210945665836334, "num_tokens": 7046635.0, "step": 3933 }, { "epoch": 0.6370334385879686, "grad_norm": 27.86298942565918, "learning_rate": 3.6318005181347156e-06, "loss": 0.6984, "mean_token_accuracy": 0.9195342361927032, "num_tokens": 7048431.0, "step": 3934 }, { "epoch": 0.6371953687960489, "grad_norm": 23.24553680419922, "learning_rate": 3.6301813471502596e-06, "loss": 0.6336, "mean_token_accuracy": 0.9113166928291321, "num_tokens": 7050223.0, "step": 3935 }, { "epoch": 0.6373572990041292, "grad_norm": 28.358461380004883, "learning_rate": 3.6285621761658036e-06, "loss": 0.6825, "mean_token_accuracy": 0.910344123840332, "num_tokens": 7052013.0, "step": 3936 }, { "epoch": 0.6375192292122095, "grad_norm": 25.419719696044922, "learning_rate": 3.6269430051813476e-06, "loss": 0.6042, "mean_token_accuracy": 0.9108070731163025, "num_tokens": 7053794.0, "step": 3937 }, { "epoch": 0.6376811594202898, "grad_norm": 33.608699798583984, "learning_rate": 3.6253238341968916e-06, "loss": 0.6902, "mean_token_accuracy": 0.8931034505367279, "num_tokens": 7055596.0, "step": 3938 }, { "epoch": 0.6378430896283702, "grad_norm": 20.32437515258789, "learning_rate": 3.6237046632124357e-06, "loss": 0.4781, "mean_token_accuracy": 0.9347826242446899, "num_tokens": 7057384.0, "step": 3939 }, { "epoch": 0.6380050198364505, "grad_norm": 25.781030654907227, "learning_rate": 3.6220854922279797e-06, "loss": 0.5446, "mean_token_accuracy": 0.9247430562973022, "num_tokens": 7059175.0, "step": 3940 }, { "epoch": 0.6381669500445308, "grad_norm": 33.92966079711914, "learning_rate": 3.6204663212435237e-06, "loss": 0.6046, "mean_token_accuracy": 0.9137355089187622, "num_tokens": 7060964.0, "step": 3941 }, { "epoch": 0.6383288802526111, "grad_norm": 32.363685607910156, "learning_rate": 3.6188471502590677e-06, "loss": 0.5977, "mean_token_accuracy": 0.9139516949653625, "num_tokens": 7062755.0, "step": 3942 }, { "epoch": 0.6384908104606914, "grad_norm": 25.286544799804688, "learning_rate": 3.6172279792746117e-06, "loss": 0.5894, "mean_token_accuracy": 0.9151683747768402, "num_tokens": 7064538.0, "step": 3943 }, { "epoch": 0.6386527406687718, "grad_norm": 24.947399139404297, "learning_rate": 3.6156088082901557e-06, "loss": 0.5357, "mean_token_accuracy": 0.9258343279361725, "num_tokens": 7066333.0, "step": 3944 }, { "epoch": 0.6388146708768521, "grad_norm": 17.55426597595215, "learning_rate": 3.6139896373056997e-06, "loss": 0.5526, "mean_token_accuracy": 0.9315025210380554, "num_tokens": 7068121.0, "step": 3945 }, { "epoch": 0.6389766010849324, "grad_norm": 17.16888999938965, "learning_rate": 3.6123704663212437e-06, "loss": 0.4958, "mean_token_accuracy": 0.93031245470047, "num_tokens": 7069906.0, "step": 3946 }, { "epoch": 0.6391385312930127, "grad_norm": 26.1005859375, "learning_rate": 3.6107512953367878e-06, "loss": 0.5703, "mean_token_accuracy": 0.9173833131790161, "num_tokens": 7071709.0, "step": 3947 }, { "epoch": 0.639300461501093, "grad_norm": 23.8436336517334, "learning_rate": 3.6091321243523318e-06, "loss": 0.4877, "mean_token_accuracy": 0.9242250323295593, "num_tokens": 7073498.0, "step": 3948 }, { "epoch": 0.6394623917091733, "grad_norm": 20.450000762939453, "learning_rate": 3.6075129533678758e-06, "loss": 0.5572, "mean_token_accuracy": 0.926174134016037, "num_tokens": 7075281.0, "step": 3949 }, { "epoch": 0.6396243219172537, "grad_norm": 25.024497985839844, "learning_rate": 3.6058937823834202e-06, "loss": 0.5331, "mean_token_accuracy": 0.9184607565402985, "num_tokens": 7077075.0, "step": 3950 }, { "epoch": 0.639786252125334, "grad_norm": 27.83124542236328, "learning_rate": 3.6042746113989642e-06, "loss": 0.6047, "mean_token_accuracy": 0.9117646813392639, "num_tokens": 7078859.0, "step": 3951 }, { "epoch": 0.6399481823334143, "grad_norm": 20.56722068786621, "learning_rate": 3.6026554404145082e-06, "loss": 0.5512, "mean_token_accuracy": 0.9266431927680969, "num_tokens": 7080657.0, "step": 3952 }, { "epoch": 0.6401101125414946, "grad_norm": 27.27486228942871, "learning_rate": 3.6010362694300523e-06, "loss": 0.5952, "mean_token_accuracy": 0.9147057235240936, "num_tokens": 7082449.0, "step": 3953 }, { "epoch": 0.6402720427495749, "grad_norm": 33.001556396484375, "learning_rate": 3.5994170984455963e-06, "loss": 0.6354, "mean_token_accuracy": 0.9067221879959106, "num_tokens": 7084240.0, "step": 3954 }, { "epoch": 0.6404339729576553, "grad_norm": 25.626216888427734, "learning_rate": 3.5977979274611403e-06, "loss": 0.5506, "mean_token_accuracy": 0.9249537289142609, "num_tokens": 7086031.0, "step": 3955 }, { "epoch": 0.6405959031657356, "grad_norm": 38.346561431884766, "learning_rate": 3.5961787564766843e-06, "loss": 0.6513, "mean_token_accuracy": 0.9123580157756805, "num_tokens": 7087829.0, "step": 3956 }, { "epoch": 0.6407578333738159, "grad_norm": 30.982160568237305, "learning_rate": 3.5945595854922283e-06, "loss": 0.67, "mean_token_accuracy": 0.91215580701828, "num_tokens": 7089625.0, "step": 3957 }, { "epoch": 0.6409197635818962, "grad_norm": 26.56120491027832, "learning_rate": 3.5929404145077723e-06, "loss": 0.6951, "mean_token_accuracy": 0.9053263068199158, "num_tokens": 7091401.0, "step": 3958 }, { "epoch": 0.6410816937899765, "grad_norm": 32.32514190673828, "learning_rate": 3.5913212435233163e-06, "loss": 0.6492, "mean_token_accuracy": 0.9076152145862579, "num_tokens": 7093184.0, "step": 3959 }, { "epoch": 0.6412436239980568, "grad_norm": 20.913759231567383, "learning_rate": 3.5897020725388604e-06, "loss": 0.532, "mean_token_accuracy": 0.9232880473136902, "num_tokens": 7094970.0, "step": 3960 }, { "epoch": 0.6414055542061372, "grad_norm": 29.14267349243164, "learning_rate": 3.5880829015544044e-06, "loss": 0.5788, "mean_token_accuracy": 0.9181873500347137, "num_tokens": 7096763.0, "step": 3961 }, { "epoch": 0.6415674844142175, "grad_norm": 34.89345169067383, "learning_rate": 3.5864637305699484e-06, "loss": 0.7605, "mean_token_accuracy": 0.8952554762363434, "num_tokens": 7098552.0, "step": 3962 }, { "epoch": 0.6417294146222978, "grad_norm": 33.273502349853516, "learning_rate": 3.5848445595854924e-06, "loss": 0.6853, "mean_token_accuracy": 0.9072793126106262, "num_tokens": 7100334.0, "step": 3963 }, { "epoch": 0.6418913448303781, "grad_norm": 17.590497970581055, "learning_rate": 3.5832253886010364e-06, "loss": 0.4723, "mean_token_accuracy": 0.9314852058887482, "num_tokens": 7102124.0, "step": 3964 }, { "epoch": 0.6420532750384584, "grad_norm": 37.113136291503906, "learning_rate": 3.5816062176165804e-06, "loss": 0.6929, "mean_token_accuracy": 0.9016793072223663, "num_tokens": 7103921.0, "step": 3965 }, { "epoch": 0.6422152052465387, "grad_norm": 37.9775505065918, "learning_rate": 3.5799870466321244e-06, "loss": 0.9025, "mean_token_accuracy": 0.8858951032161713, "num_tokens": 7105713.0, "step": 3966 }, { "epoch": 0.642377135454619, "grad_norm": 28.24148178100586, "learning_rate": 3.5783678756476684e-06, "loss": 0.5886, "mean_token_accuracy": 0.920736163854599, "num_tokens": 7107502.0, "step": 3967 }, { "epoch": 0.6425390656626994, "grad_norm": 23.76460838317871, "learning_rate": 3.5767487046632125e-06, "loss": 0.5593, "mean_token_accuracy": 0.9266505837440491, "num_tokens": 7109287.0, "step": 3968 }, { "epoch": 0.6427009958707797, "grad_norm": 27.81460189819336, "learning_rate": 3.575129533678757e-06, "loss": 0.6773, "mean_token_accuracy": 0.9084613025188446, "num_tokens": 7111072.0, "step": 3969 }, { "epoch": 0.64286292607886, "grad_norm": 18.791671752929688, "learning_rate": 3.573510362694301e-06, "loss": 0.5925, "mean_token_accuracy": 0.9250357449054718, "num_tokens": 7112864.0, "step": 3970 }, { "epoch": 0.6430248562869403, "grad_norm": 28.244186401367188, "learning_rate": 3.571891191709845e-06, "loss": 0.63, "mean_token_accuracy": 0.9150694608688354, "num_tokens": 7114646.0, "step": 3971 }, { "epoch": 0.6431867864950207, "grad_norm": 31.945478439331055, "learning_rate": 3.570272020725389e-06, "loss": 0.6909, "mean_token_accuracy": 0.9060380160808563, "num_tokens": 7116435.0, "step": 3972 }, { "epoch": 0.643348716703101, "grad_norm": 24.323877334594727, "learning_rate": 3.568652849740933e-06, "loss": 0.6058, "mean_token_accuracy": 0.9245142340660095, "num_tokens": 7118226.0, "step": 3973 }, { "epoch": 0.6435106469111813, "grad_norm": 12.806122779846191, "learning_rate": 3.567033678756477e-06, "loss": 0.5242, "mean_token_accuracy": 0.9333845376968384, "num_tokens": 7120008.0, "step": 3974 }, { "epoch": 0.6436725771192616, "grad_norm": 24.855051040649414, "learning_rate": 3.565414507772021e-06, "loss": 0.5768, "mean_token_accuracy": 0.9099896550178528, "num_tokens": 7121798.0, "step": 3975 }, { "epoch": 0.6438345073273419, "grad_norm": 30.180490493774414, "learning_rate": 3.563795336787565e-06, "loss": 0.6457, "mean_token_accuracy": 0.906272828578949, "num_tokens": 7123587.0, "step": 3976 }, { "epoch": 0.6439964375354222, "grad_norm": 21.050554275512695, "learning_rate": 3.562176165803109e-06, "loss": 0.4929, "mean_token_accuracy": 0.9244724810123444, "num_tokens": 7125378.0, "step": 3977 }, { "epoch": 0.6441583677435025, "grad_norm": 26.60388946533203, "learning_rate": 3.560556994818653e-06, "loss": 0.5712, "mean_token_accuracy": 0.9151099026203156, "num_tokens": 7127173.0, "step": 3978 }, { "epoch": 0.6443202979515829, "grad_norm": 17.773040771484375, "learning_rate": 3.558937823834197e-06, "loss": 0.4969, "mean_token_accuracy": 0.9279643595218658, "num_tokens": 7128963.0, "step": 3979 }, { "epoch": 0.6444822281596632, "grad_norm": 27.30126953125, "learning_rate": 3.557318652849741e-06, "loss": 0.5699, "mean_token_accuracy": 0.9145637154579163, "num_tokens": 7130756.0, "step": 3980 }, { "epoch": 0.6446441583677435, "grad_norm": 24.299997329711914, "learning_rate": 3.555699481865285e-06, "loss": 0.4939, "mean_token_accuracy": 0.9235875904560089, "num_tokens": 7132543.0, "step": 3981 }, { "epoch": 0.6448060885758238, "grad_norm": 36.111351013183594, "learning_rate": 3.554080310880829e-06, "loss": 0.9165, "mean_token_accuracy": 0.9024215638637543, "num_tokens": 7134340.0, "step": 3982 }, { "epoch": 0.6449680187839041, "grad_norm": 41.428157806396484, "learning_rate": 3.552461139896373e-06, "loss": 0.6967, "mean_token_accuracy": 0.8927203118801117, "num_tokens": 7136141.0, "step": 3983 }, { "epoch": 0.6451299489919845, "grad_norm": 23.4683780670166, "learning_rate": 3.550841968911917e-06, "loss": 0.594, "mean_token_accuracy": 0.9193591773509979, "num_tokens": 7137926.0, "step": 3984 }, { "epoch": 0.6452918792000648, "grad_norm": 18.577917098999023, "learning_rate": 3.549222797927461e-06, "loss": 0.5496, "mean_token_accuracy": 0.9229573309421539, "num_tokens": 7139725.0, "step": 3985 }, { "epoch": 0.6454538094081451, "grad_norm": 22.314523696899414, "learning_rate": 3.547603626943005e-06, "loss": 0.4902, "mean_token_accuracy": 0.9335517883300781, "num_tokens": 7141522.0, "step": 3986 }, { "epoch": 0.6456157396162254, "grad_norm": 30.128273010253906, "learning_rate": 3.5459844559585496e-06, "loss": 0.5723, "mean_token_accuracy": 0.9081010818481445, "num_tokens": 7143317.0, "step": 3987 }, { "epoch": 0.6457776698243057, "grad_norm": 22.064788818359375, "learning_rate": 3.5443652849740936e-06, "loss": 0.4927, "mean_token_accuracy": 0.9269450306892395, "num_tokens": 7145103.0, "step": 3988 }, { "epoch": 0.645939600032386, "grad_norm": 29.914934158325195, "learning_rate": 3.5427461139896376e-06, "loss": 0.5646, "mean_token_accuracy": 0.9183111488819122, "num_tokens": 7146898.0, "step": 3989 }, { "epoch": 0.6461015302404663, "grad_norm": 27.50421714782715, "learning_rate": 3.5411269430051816e-06, "loss": 0.5851, "mean_token_accuracy": 0.9204936921596527, "num_tokens": 7148687.0, "step": 3990 }, { "epoch": 0.6462634604485467, "grad_norm": 29.823270797729492, "learning_rate": 3.5395077720207256e-06, "loss": 0.8048, "mean_token_accuracy": 0.9050143361091614, "num_tokens": 7150471.0, "step": 3991 }, { "epoch": 0.646425390656627, "grad_norm": 29.062028884887695, "learning_rate": 3.5378886010362696e-06, "loss": 0.6259, "mean_token_accuracy": 0.9156323969364166, "num_tokens": 7152268.0, "step": 3992 }, { "epoch": 0.6465873208647073, "grad_norm": 16.323213577270508, "learning_rate": 3.5362694300518136e-06, "loss": 0.4745, "mean_token_accuracy": 0.9307036101818085, "num_tokens": 7154054.0, "step": 3993 }, { "epoch": 0.6467492510727876, "grad_norm": 25.366973876953125, "learning_rate": 3.5346502590673576e-06, "loss": 0.5303, "mean_token_accuracy": 0.9213786423206329, "num_tokens": 7155845.0, "step": 3994 }, { "epoch": 0.646911181280868, "grad_norm": 20.914785385131836, "learning_rate": 3.5330310880829017e-06, "loss": 0.5457, "mean_token_accuracy": 0.9300562739372253, "num_tokens": 7157643.0, "step": 3995 }, { "epoch": 0.6470731114889483, "grad_norm": 30.377864837646484, "learning_rate": 3.5314119170984457e-06, "loss": 0.6476, "mean_token_accuracy": 0.911697119474411, "num_tokens": 7159438.0, "step": 3996 }, { "epoch": 0.6472350416970286, "grad_norm": 35.38301467895508, "learning_rate": 3.5297927461139897e-06, "loss": 0.6195, "mean_token_accuracy": 0.9117632210254669, "num_tokens": 7161233.0, "step": 3997 }, { "epoch": 0.6473969719051089, "grad_norm": 24.891189575195312, "learning_rate": 3.5281735751295337e-06, "loss": 0.6083, "mean_token_accuracy": 0.9224931597709656, "num_tokens": 7163031.0, "step": 3998 }, { "epoch": 0.6475589021131892, "grad_norm": 27.8299560546875, "learning_rate": 3.5265544041450777e-06, "loss": 0.6279, "mean_token_accuracy": 0.9064423739910126, "num_tokens": 7164820.0, "step": 3999 }, { "epoch": 0.6477208323212695, "grad_norm": 33.034236907958984, "learning_rate": 3.5249352331606217e-06, "loss": 0.7418, "mean_token_accuracy": 0.8992805778980255, "num_tokens": 7166610.0, "step": 4000 }, { "epoch": 0.6478827625293498, "grad_norm": 28.056472778320312, "learning_rate": 3.5233160621761657e-06, "loss": 0.5608, "mean_token_accuracy": 0.9202331602573395, "num_tokens": 7168398.0, "step": 4001 }, { "epoch": 0.6480446927374302, "grad_norm": 23.79387092590332, "learning_rate": 3.5216968911917097e-06, "loss": 0.5133, "mean_token_accuracy": 0.9296690225601196, "num_tokens": 7170195.0, "step": 4002 }, { "epoch": 0.6482066229455105, "grad_norm": 29.284326553344727, "learning_rate": 3.5200777202072538e-06, "loss": 0.6893, "mean_token_accuracy": 0.9033879339694977, "num_tokens": 7171987.0, "step": 4003 }, { "epoch": 0.6483685531535908, "grad_norm": 24.550888061523438, "learning_rate": 3.5184585492227978e-06, "loss": 0.4938, "mean_token_accuracy": 0.9292853772640228, "num_tokens": 7173782.0, "step": 4004 }, { "epoch": 0.6485304833616711, "grad_norm": 32.4036979675293, "learning_rate": 3.5168393782383418e-06, "loss": 0.654, "mean_token_accuracy": 0.9061261713504791, "num_tokens": 7175571.0, "step": 4005 }, { "epoch": 0.6486924135697515, "grad_norm": 22.52859115600586, "learning_rate": 3.5152202072538866e-06, "loss": 0.5107, "mean_token_accuracy": 0.9306386113166809, "num_tokens": 7177371.0, "step": 4006 }, { "epoch": 0.6488543437778318, "grad_norm": 21.782211303710938, "learning_rate": 3.5136010362694307e-06, "loss": 0.5394, "mean_token_accuracy": 0.9237439632415771, "num_tokens": 7179159.0, "step": 4007 }, { "epoch": 0.6490162739859121, "grad_norm": 25.6978759765625, "learning_rate": 3.5119818652849747e-06, "loss": 0.593, "mean_token_accuracy": 0.9242961406707764, "num_tokens": 7180948.0, "step": 4008 }, { "epoch": 0.6491782041939924, "grad_norm": 22.900150299072266, "learning_rate": 3.5103626943005187e-06, "loss": 0.5799, "mean_token_accuracy": 0.9264069199562073, "num_tokens": 7182732.0, "step": 4009 }, { "epoch": 0.6493401344020727, "grad_norm": 21.268558502197266, "learning_rate": 3.5087435233160627e-06, "loss": 0.5266, "mean_token_accuracy": 0.9253484010696411, "num_tokens": 7184524.0, "step": 4010 }, { "epoch": 0.649502064610153, "grad_norm": 39.48198699951172, "learning_rate": 3.5071243523316067e-06, "loss": 0.8242, "mean_token_accuracy": 0.8874479532241821, "num_tokens": 7186327.0, "step": 4011 }, { "epoch": 0.6496639948182333, "grad_norm": 31.47050666809082, "learning_rate": 3.5055051813471507e-06, "loss": 0.6096, "mean_token_accuracy": 0.9037947356700897, "num_tokens": 7188118.0, "step": 4012 }, { "epoch": 0.6498259250263136, "grad_norm": 27.42321014404297, "learning_rate": 3.5038860103626947e-06, "loss": 0.6361, "mean_token_accuracy": 0.9099584519863129, "num_tokens": 7189906.0, "step": 4013 }, { "epoch": 0.649987855234394, "grad_norm": 28.187917709350586, "learning_rate": 3.5022668393782388e-06, "loss": 0.6362, "mean_token_accuracy": 0.9074857831001282, "num_tokens": 7191710.0, "step": 4014 }, { "epoch": 0.6501497854424743, "grad_norm": 25.513425827026367, "learning_rate": 3.5006476683937828e-06, "loss": 0.7608, "mean_token_accuracy": 0.9145256876945496, "num_tokens": 7193492.0, "step": 4015 }, { "epoch": 0.6503117156505546, "grad_norm": 23.447725296020508, "learning_rate": 3.4990284974093268e-06, "loss": 0.6777, "mean_token_accuracy": 0.9142748713493347, "num_tokens": 7195285.0, "step": 4016 }, { "epoch": 0.6504736458586349, "grad_norm": 36.816287994384766, "learning_rate": 3.497409326424871e-06, "loss": 0.6813, "mean_token_accuracy": 0.9069208204746246, "num_tokens": 7197087.0, "step": 4017 }, { "epoch": 0.6506355760667153, "grad_norm": 30.002910614013672, "learning_rate": 3.495790155440415e-06, "loss": 0.6465, "mean_token_accuracy": 0.9091558456420898, "num_tokens": 7198874.0, "step": 4018 }, { "epoch": 0.6507975062747956, "grad_norm": 23.461652755737305, "learning_rate": 3.4941709844559592e-06, "loss": 0.5069, "mean_token_accuracy": 0.9281461834907532, "num_tokens": 7200664.0, "step": 4019 }, { "epoch": 0.6509594364828759, "grad_norm": 19.912803649902344, "learning_rate": 3.4925518134715033e-06, "loss": 0.4935, "mean_token_accuracy": 0.9288800358772278, "num_tokens": 7202443.0, "step": 4020 }, { "epoch": 0.6511213666909562, "grad_norm": 31.394453048706055, "learning_rate": 3.4909326424870473e-06, "loss": 0.6814, "mean_token_accuracy": 0.9102316498756409, "num_tokens": 7204243.0, "step": 4021 }, { "epoch": 0.6512832968990365, "grad_norm": 24.90923500061035, "learning_rate": 3.4893134715025913e-06, "loss": 0.5653, "mean_token_accuracy": 0.9149594306945801, "num_tokens": 7206036.0, "step": 4022 }, { "epoch": 0.6514452271071168, "grad_norm": 19.383697509765625, "learning_rate": 3.4876943005181353e-06, "loss": 0.4895, "mean_token_accuracy": 0.9255736768245697, "num_tokens": 7207830.0, "step": 4023 }, { "epoch": 0.6516071573151971, "grad_norm": 28.540752410888672, "learning_rate": 3.4860751295336793e-06, "loss": 0.5302, "mean_token_accuracy": 0.9243475496768951, "num_tokens": 7209620.0, "step": 4024 }, { "epoch": 0.6517690875232774, "grad_norm": 19.40833854675293, "learning_rate": 3.4844559585492233e-06, "loss": 0.5132, "mean_token_accuracy": 0.927194744348526, "num_tokens": 7211407.0, "step": 4025 }, { "epoch": 0.6519310177313578, "grad_norm": 32.680145263671875, "learning_rate": 3.4828367875647673e-06, "loss": 0.632, "mean_token_accuracy": 0.9115099608898163, "num_tokens": 7213190.0, "step": 4026 }, { "epoch": 0.6520929479394381, "grad_norm": 38.18278503417969, "learning_rate": 3.4812176165803113e-06, "loss": 0.676, "mean_token_accuracy": 0.9078000783920288, "num_tokens": 7214974.0, "step": 4027 }, { "epoch": 0.6522548781475184, "grad_norm": 33.32498550415039, "learning_rate": 3.4795984455958554e-06, "loss": 0.6567, "mean_token_accuracy": 0.9106754958629608, "num_tokens": 7216777.0, "step": 4028 }, { "epoch": 0.6524168083555988, "grad_norm": 19.744749069213867, "learning_rate": 3.4779792746113994e-06, "loss": 0.522, "mean_token_accuracy": 0.92356076836586, "num_tokens": 7218563.0, "step": 4029 }, { "epoch": 0.6525787385636791, "grad_norm": 26.144132614135742, "learning_rate": 3.4763601036269434e-06, "loss": 0.5688, "mean_token_accuracy": 0.9269892275333405, "num_tokens": 7220362.0, "step": 4030 }, { "epoch": 0.6527406687717594, "grad_norm": 31.8227481842041, "learning_rate": 3.4747409326424874e-06, "loss": 0.7492, "mean_token_accuracy": 0.9090404212474823, "num_tokens": 7222149.0, "step": 4031 }, { "epoch": 0.6529025989798397, "grad_norm": 12.841814994812012, "learning_rate": 3.4731217616580314e-06, "loss": 0.4438, "mean_token_accuracy": 0.9363886117935181, "num_tokens": 7223944.0, "step": 4032 }, { "epoch": 0.65306452918792, "grad_norm": 31.269855499267578, "learning_rate": 3.4715025906735754e-06, "loss": 0.6506, "mean_token_accuracy": 0.914426326751709, "num_tokens": 7225734.0, "step": 4033 }, { "epoch": 0.6532264593960003, "grad_norm": 23.391603469848633, "learning_rate": 3.4698834196891194e-06, "loss": 0.4728, "mean_token_accuracy": 0.9328988194465637, "num_tokens": 7227529.0, "step": 4034 }, { "epoch": 0.6533883896040806, "grad_norm": 37.719146728515625, "learning_rate": 3.4682642487046635e-06, "loss": 0.6967, "mean_token_accuracy": 0.9092244505882263, "num_tokens": 7229327.0, "step": 4035 }, { "epoch": 0.6535503198121609, "grad_norm": 31.31080436706543, "learning_rate": 3.4666450777202075e-06, "loss": 0.6934, "mean_token_accuracy": 0.9121822416782379, "num_tokens": 7231121.0, "step": 4036 }, { "epoch": 0.6537122500202412, "grad_norm": 26.320165634155273, "learning_rate": 3.4650259067357515e-06, "loss": 0.5495, "mean_token_accuracy": 0.9219858348369598, "num_tokens": 7232915.0, "step": 4037 }, { "epoch": 0.6538741802283216, "grad_norm": 31.74606704711914, "learning_rate": 3.463406735751296e-06, "loss": 0.5997, "mean_token_accuracy": 0.917929470539093, "num_tokens": 7234707.0, "step": 4038 }, { "epoch": 0.6540361104364019, "grad_norm": 27.640714645385742, "learning_rate": 3.46178756476684e-06, "loss": 0.5901, "mean_token_accuracy": 0.9202451109886169, "num_tokens": 7236505.0, "step": 4039 }, { "epoch": 0.6541980406444823, "grad_norm": 38.41910934448242, "learning_rate": 3.460168393782384e-06, "loss": 0.6878, "mean_token_accuracy": 0.9064731001853943, "num_tokens": 7238305.0, "step": 4040 }, { "epoch": 0.6543599708525626, "grad_norm": 30.75510597229004, "learning_rate": 3.458549222797928e-06, "loss": 0.6069, "mean_token_accuracy": 0.9129273593425751, "num_tokens": 7240104.0, "step": 4041 }, { "epoch": 0.6545219010606429, "grad_norm": 22.84029769897461, "learning_rate": 3.456930051813472e-06, "loss": 0.6498, "mean_token_accuracy": 0.920981764793396, "num_tokens": 7241894.0, "step": 4042 }, { "epoch": 0.6546838312687232, "grad_norm": 32.36472702026367, "learning_rate": 3.455310880829016e-06, "loss": 0.6209, "mean_token_accuracy": 0.9137873351573944, "num_tokens": 7243695.0, "step": 4043 }, { "epoch": 0.6548457614768035, "grad_norm": 26.274927139282227, "learning_rate": 3.45369170984456e-06, "loss": 0.6185, "mean_token_accuracy": 0.9136690497398376, "num_tokens": 7245485.0, "step": 4044 }, { "epoch": 0.6550076916848838, "grad_norm": 32.575828552246094, "learning_rate": 3.452072538860104e-06, "loss": 0.7054, "mean_token_accuracy": 0.8991447389125824, "num_tokens": 7247274.0, "step": 4045 }, { "epoch": 0.6551696218929641, "grad_norm": 26.599124908447266, "learning_rate": 3.450453367875648e-06, "loss": 0.4931, "mean_token_accuracy": 0.9269305467605591, "num_tokens": 7249073.0, "step": 4046 }, { "epoch": 0.6553315521010444, "grad_norm": 23.48540496826172, "learning_rate": 3.448834196891192e-06, "loss": 0.4897, "mean_token_accuracy": 0.92771115899086, "num_tokens": 7250862.0, "step": 4047 }, { "epoch": 0.6554934823091247, "grad_norm": 19.32307243347168, "learning_rate": 3.447215025906736e-06, "loss": 0.5495, "mean_token_accuracy": 0.9309523701667786, "num_tokens": 7252649.0, "step": 4048 }, { "epoch": 0.655655412517205, "grad_norm": 26.650726318359375, "learning_rate": 3.44559585492228e-06, "loss": 0.6159, "mean_token_accuracy": 0.9114089608192444, "num_tokens": 7254432.0, "step": 4049 }, { "epoch": 0.6558173427252854, "grad_norm": 21.824199676513672, "learning_rate": 3.443976683937824e-06, "loss": 0.5309, "mean_token_accuracy": 0.9318532347679138, "num_tokens": 7256223.0, "step": 4050 }, { "epoch": 0.6559792729333657, "grad_norm": 24.318891525268555, "learning_rate": 3.442357512953368e-06, "loss": 0.6347, "mean_token_accuracy": 0.9205301403999329, "num_tokens": 7258013.0, "step": 4051 }, { "epoch": 0.6561412031414461, "grad_norm": 34.56660461425781, "learning_rate": 3.440738341968912e-06, "loss": 0.8564, "mean_token_accuracy": 0.9117429554462433, "num_tokens": 7259810.0, "step": 4052 }, { "epoch": 0.6563031333495264, "grad_norm": 27.054893493652344, "learning_rate": 3.439119170984456e-06, "loss": 0.6146, "mean_token_accuracy": 0.9218875765800476, "num_tokens": 7261604.0, "step": 4053 }, { "epoch": 0.6564650635576067, "grad_norm": 35.610008239746094, "learning_rate": 3.4375e-06, "loss": 0.724, "mean_token_accuracy": 0.9066002666950226, "num_tokens": 7263405.0, "step": 4054 }, { "epoch": 0.656626993765687, "grad_norm": 32.48411178588867, "learning_rate": 3.435880829015544e-06, "loss": 0.6993, "mean_token_accuracy": 0.9046227037906647, "num_tokens": 7265200.0, "step": 4055 }, { "epoch": 0.6567889239737673, "grad_norm": 27.101337432861328, "learning_rate": 3.434261658031088e-06, "loss": 0.8743, "mean_token_accuracy": 0.9063673317432404, "num_tokens": 7266989.0, "step": 4056 }, { "epoch": 0.6569508541818476, "grad_norm": 22.184051513671875, "learning_rate": 3.4326424870466326e-06, "loss": 0.5301, "mean_token_accuracy": 0.9264705777168274, "num_tokens": 7268773.0, "step": 4057 }, { "epoch": 0.6571127843899279, "grad_norm": 21.04471206665039, "learning_rate": 3.4310233160621766e-06, "loss": 0.5268, "mean_token_accuracy": 0.9271039366722107, "num_tokens": 7270573.0, "step": 4058 }, { "epoch": 0.6572747145980082, "grad_norm": 18.429393768310547, "learning_rate": 3.4294041450777206e-06, "loss": 0.5078, "mean_token_accuracy": 0.9205682873725891, "num_tokens": 7272362.0, "step": 4059 }, { "epoch": 0.6574366448060885, "grad_norm": 28.869848251342773, "learning_rate": 3.4277849740932646e-06, "loss": 0.6805, "mean_token_accuracy": 0.9071428775787354, "num_tokens": 7274154.0, "step": 4060 }, { "epoch": 0.6575985750141689, "grad_norm": 21.936565399169922, "learning_rate": 3.4261658031088086e-06, "loss": 0.5931, "mean_token_accuracy": 0.9192001819610596, "num_tokens": 7275937.0, "step": 4061 }, { "epoch": 0.6577605052222492, "grad_norm": 22.220081329345703, "learning_rate": 3.4245466321243527e-06, "loss": 0.5425, "mean_token_accuracy": 0.9248997569084167, "num_tokens": 7277728.0, "step": 4062 }, { "epoch": 0.6579224354303296, "grad_norm": 20.669706344604492, "learning_rate": 3.4229274611398967e-06, "loss": 0.5436, "mean_token_accuracy": 0.9232880473136902, "num_tokens": 7279514.0, "step": 4063 }, { "epoch": 0.6580843656384099, "grad_norm": 24.72484588623047, "learning_rate": 3.4213082901554407e-06, "loss": 0.5314, "mean_token_accuracy": 0.921973466873169, "num_tokens": 7281295.0, "step": 4064 }, { "epoch": 0.6582462958464902, "grad_norm": 35.96552658081055, "learning_rate": 3.4196891191709847e-06, "loss": 0.9394, "mean_token_accuracy": 0.8867307305335999, "num_tokens": 7283087.0, "step": 4065 }, { "epoch": 0.6584082260545705, "grad_norm": 38.397769927978516, "learning_rate": 3.4180699481865287e-06, "loss": 0.8469, "mean_token_accuracy": 0.9015027582645416, "num_tokens": 7284883.0, "step": 4066 }, { "epoch": 0.6585701562626508, "grad_norm": 30.753400802612305, "learning_rate": 3.4164507772020727e-06, "loss": 0.6908, "mean_token_accuracy": 0.9022058844566345, "num_tokens": 7286671.0, "step": 4067 }, { "epoch": 0.6587320864707311, "grad_norm": 29.82662010192871, "learning_rate": 3.4148316062176167e-06, "loss": 0.6662, "mean_token_accuracy": 0.9106434285640717, "num_tokens": 7288474.0, "step": 4068 }, { "epoch": 0.6588940166788114, "grad_norm": 31.837980270385742, "learning_rate": 3.4132124352331607e-06, "loss": 0.6237, "mean_token_accuracy": 0.9119047820568085, "num_tokens": 7290270.0, "step": 4069 }, { "epoch": 0.6590559468868917, "grad_norm": 24.83180046081543, "learning_rate": 3.4115932642487048e-06, "loss": 0.5731, "mean_token_accuracy": 0.9184402525424957, "num_tokens": 7292063.0, "step": 4070 }, { "epoch": 0.659217877094972, "grad_norm": 30.984506607055664, "learning_rate": 3.4099740932642488e-06, "loss": 0.6058, "mean_token_accuracy": 0.9161776304244995, "num_tokens": 7293861.0, "step": 4071 }, { "epoch": 0.6593798073030523, "grad_norm": 19.947614669799805, "learning_rate": 3.4083549222797928e-06, "loss": 0.5064, "mean_token_accuracy": 0.9264546930789948, "num_tokens": 7295645.0, "step": 4072 }, { "epoch": 0.6595417375111327, "grad_norm": 32.25144958496094, "learning_rate": 3.406735751295337e-06, "loss": 0.7022, "mean_token_accuracy": 0.9150349497795105, "num_tokens": 7297440.0, "step": 4073 }, { "epoch": 0.6597036677192131, "grad_norm": 32.69015884399414, "learning_rate": 3.405116580310881e-06, "loss": 0.7144, "mean_token_accuracy": 0.9015957415103912, "num_tokens": 7299237.0, "step": 4074 }, { "epoch": 0.6598655979272934, "grad_norm": 20.569913864135742, "learning_rate": 3.403497409326425e-06, "loss": 0.5102, "mean_token_accuracy": 0.925253301858902, "num_tokens": 7301030.0, "step": 4075 }, { "epoch": 0.6600275281353737, "grad_norm": 21.782926559448242, "learning_rate": 3.4018782383419693e-06, "loss": 0.5284, "mean_token_accuracy": 0.924717366695404, "num_tokens": 7302821.0, "step": 4076 }, { "epoch": 0.660189458343454, "grad_norm": 28.79715347290039, "learning_rate": 3.4002590673575133e-06, "loss": 0.624, "mean_token_accuracy": 0.9102086126804352, "num_tokens": 7304611.0, "step": 4077 }, { "epoch": 0.6603513885515343, "grad_norm": 21.243640899658203, "learning_rate": 3.3986398963730573e-06, "loss": 0.5457, "mean_token_accuracy": 0.9211202263832092, "num_tokens": 7306402.0, "step": 4078 }, { "epoch": 0.6605133187596146, "grad_norm": 19.101961135864258, "learning_rate": 3.3970207253886013e-06, "loss": 0.4692, "mean_token_accuracy": 0.9341780245304108, "num_tokens": 7308187.0, "step": 4079 }, { "epoch": 0.6606752489676949, "grad_norm": 29.71619415283203, "learning_rate": 3.3954015544041453e-06, "loss": 0.5924, "mean_token_accuracy": 0.9175745248794556, "num_tokens": 7309978.0, "step": 4080 }, { "epoch": 0.6608371791757752, "grad_norm": 20.572898864746094, "learning_rate": 3.3937823834196893e-06, "loss": 0.5529, "mean_token_accuracy": 0.930587500333786, "num_tokens": 7311779.0, "step": 4081 }, { "epoch": 0.6609991093838555, "grad_norm": 31.080326080322266, "learning_rate": 3.3921632124352333e-06, "loss": 0.6659, "mean_token_accuracy": 0.9075351357460022, "num_tokens": 7313571.0, "step": 4082 }, { "epoch": 0.6611610395919358, "grad_norm": 30.32560157775879, "learning_rate": 3.3905440414507773e-06, "loss": 0.7722, "mean_token_accuracy": 0.9190140962600708, "num_tokens": 7315367.0, "step": 4083 }, { "epoch": 0.6613229698000161, "grad_norm": 28.505826950073242, "learning_rate": 3.3889248704663214e-06, "loss": 0.6641, "mean_token_accuracy": 0.9087809026241302, "num_tokens": 7317153.0, "step": 4084 }, { "epoch": 0.6614849000080966, "grad_norm": 24.09868812561035, "learning_rate": 3.3873056994818654e-06, "loss": 0.6035, "mean_token_accuracy": 0.9208633005619049, "num_tokens": 7318943.0, "step": 4085 }, { "epoch": 0.6616468302161769, "grad_norm": 13.648755073547363, "learning_rate": 3.3856865284974094e-06, "loss": 0.4903, "mean_token_accuracy": 0.9312936961650848, "num_tokens": 7320717.0, "step": 4086 }, { "epoch": 0.6618087604242572, "grad_norm": 30.157011032104492, "learning_rate": 3.3840673575129534e-06, "loss": 0.6453, "mean_token_accuracy": 0.9094308614730835, "num_tokens": 7322516.0, "step": 4087 }, { "epoch": 0.6619706906323375, "grad_norm": 29.7043514251709, "learning_rate": 3.3824481865284974e-06, "loss": 0.6341, "mean_token_accuracy": 0.9122862815856934, "num_tokens": 7324314.0, "step": 4088 }, { "epoch": 0.6621326208404178, "grad_norm": 34.41802215576172, "learning_rate": 3.3808290155440414e-06, "loss": 0.6611, "mean_token_accuracy": 0.9134818017482758, "num_tokens": 7326115.0, "step": 4089 }, { "epoch": 0.6622945510484981, "grad_norm": 28.995506286621094, "learning_rate": 3.3792098445595854e-06, "loss": 0.6065, "mean_token_accuracy": 0.9114184975624084, "num_tokens": 7327909.0, "step": 4090 }, { "epoch": 0.6624564812565784, "grad_norm": 20.490962982177734, "learning_rate": 3.3775906735751295e-06, "loss": 0.4973, "mean_token_accuracy": 0.9258565604686737, "num_tokens": 7329704.0, "step": 4091 }, { "epoch": 0.6626184114646587, "grad_norm": 25.37592315673828, "learning_rate": 3.3759715025906735e-06, "loss": 0.5798, "mean_token_accuracy": 0.9228707551956177, "num_tokens": 7331500.0, "step": 4092 }, { "epoch": 0.662780341672739, "grad_norm": 19.97695541381836, "learning_rate": 3.3743523316062175e-06, "loss": 0.4948, "mean_token_accuracy": 0.9295739829540253, "num_tokens": 7333296.0, "step": 4093 }, { "epoch": 0.6629422718808193, "grad_norm": 21.26305389404297, "learning_rate": 3.372733160621762e-06, "loss": 0.6249, "mean_token_accuracy": 0.921380490064621, "num_tokens": 7335075.0, "step": 4094 }, { "epoch": 0.6631042020888996, "grad_norm": 37.45973205566406, "learning_rate": 3.371113989637306e-06, "loss": 0.8166, "mean_token_accuracy": 0.9033972918987274, "num_tokens": 7336878.0, "step": 4095 }, { "epoch": 0.66326613229698, "grad_norm": 37.923309326171875, "learning_rate": 3.36949481865285e-06, "loss": 1.0657, "mean_token_accuracy": 0.8914105594158173, "num_tokens": 7338666.0, "step": 4096 }, { "epoch": 0.6634280625050604, "grad_norm": 16.70155143737793, "learning_rate": 3.367875647668394e-06, "loss": 0.5574, "mean_token_accuracy": 0.9338235259056091, "num_tokens": 7340450.0, "step": 4097 }, { "epoch": 0.6635899927131407, "grad_norm": 34.92170333862305, "learning_rate": 3.366256476683938e-06, "loss": 0.6818, "mean_token_accuracy": 0.902599424123764, "num_tokens": 7342240.0, "step": 4098 }, { "epoch": 0.663751922921221, "grad_norm": 21.28485679626465, "learning_rate": 3.364637305699482e-06, "loss": 0.5578, "mean_token_accuracy": 0.9264125525951385, "num_tokens": 7344024.0, "step": 4099 }, { "epoch": 0.6639138531293013, "grad_norm": 31.46330451965332, "learning_rate": 3.363018134715026e-06, "loss": 0.6614, "mean_token_accuracy": 0.9095588028430939, "num_tokens": 7345812.0, "step": 4100 }, { "epoch": 0.6640757833373816, "grad_norm": 20.872543334960938, "learning_rate": 3.36139896373057e-06, "loss": 0.4944, "mean_token_accuracy": 0.935181736946106, "num_tokens": 7347617.0, "step": 4101 }, { "epoch": 0.6642377135454619, "grad_norm": 29.819311141967773, "learning_rate": 3.359779792746114e-06, "loss": 0.63, "mean_token_accuracy": 0.9222372174263, "num_tokens": 7349412.0, "step": 4102 }, { "epoch": 0.6643996437535422, "grad_norm": 22.996164321899414, "learning_rate": 3.3581606217616585e-06, "loss": 0.6132, "mean_token_accuracy": 0.9154656827449799, "num_tokens": 7351195.0, "step": 4103 }, { "epoch": 0.6645615739616225, "grad_norm": 28.97639274597168, "learning_rate": 3.3565414507772025e-06, "loss": 0.591, "mean_token_accuracy": 0.9189356565475464, "num_tokens": 7352991.0, "step": 4104 }, { "epoch": 0.6647235041697028, "grad_norm": 24.251434326171875, "learning_rate": 3.3549222797927465e-06, "loss": 0.6389, "mean_token_accuracy": 0.9193357825279236, "num_tokens": 7354788.0, "step": 4105 }, { "epoch": 0.6648854343777831, "grad_norm": 23.537084579467773, "learning_rate": 3.3533031088082905e-06, "loss": 0.5709, "mean_token_accuracy": 0.9238370954990387, "num_tokens": 7356577.0, "step": 4106 }, { "epoch": 0.6650473645858634, "grad_norm": 21.477855682373047, "learning_rate": 3.351683937823835e-06, "loss": 0.5524, "mean_token_accuracy": 0.9203788638114929, "num_tokens": 7358353.0, "step": 4107 }, { "epoch": 0.6652092947939439, "grad_norm": 32.9747428894043, "learning_rate": 3.350064766839379e-06, "loss": 0.5767, "mean_token_accuracy": 0.9162943363189697, "num_tokens": 7360152.0, "step": 4108 }, { "epoch": 0.6653712250020242, "grad_norm": 20.007253646850586, "learning_rate": 3.348445595854923e-06, "loss": 0.5231, "mean_token_accuracy": 0.9309831857681274, "num_tokens": 7361941.0, "step": 4109 }, { "epoch": 0.6655331552101045, "grad_norm": 23.31011199951172, "learning_rate": 3.346826424870467e-06, "loss": 0.5956, "mean_token_accuracy": 0.9160980880260468, "num_tokens": 7363727.0, "step": 4110 }, { "epoch": 0.6656950854181848, "grad_norm": 26.979835510253906, "learning_rate": 3.345207253886011e-06, "loss": 0.551, "mean_token_accuracy": 0.9145896732807159, "num_tokens": 7365520.0, "step": 4111 }, { "epoch": 0.6658570156262651, "grad_norm": 25.96749496459961, "learning_rate": 3.343588082901555e-06, "loss": 0.6432, "mean_token_accuracy": 0.9119619429111481, "num_tokens": 7367304.0, "step": 4112 }, { "epoch": 0.6660189458343454, "grad_norm": 22.079469680786133, "learning_rate": 3.341968911917099e-06, "loss": 0.556, "mean_token_accuracy": 0.9103453755378723, "num_tokens": 7369095.0, "step": 4113 }, { "epoch": 0.6661808760424257, "grad_norm": 24.190614700317383, "learning_rate": 3.340349740932643e-06, "loss": 0.4922, "mean_token_accuracy": 0.9300176203250885, "num_tokens": 7370893.0, "step": 4114 }, { "epoch": 0.666342806250506, "grad_norm": 30.60780906677246, "learning_rate": 3.338730569948187e-06, "loss": 0.6554, "mean_token_accuracy": 0.9051418602466583, "num_tokens": 7372678.0, "step": 4115 }, { "epoch": 0.6665047364585863, "grad_norm": 27.995006561279297, "learning_rate": 3.337111398963731e-06, "loss": 0.6169, "mean_token_accuracy": 0.9225313663482666, "num_tokens": 7374474.0, "step": 4116 }, { "epoch": 0.6666666666666666, "grad_norm": 22.01808738708496, "learning_rate": 3.335492227979275e-06, "loss": 0.4991, "mean_token_accuracy": 0.9307036101818085, "num_tokens": 7376260.0, "step": 4117 }, { "epoch": 0.6668285968747469, "grad_norm": 38.67413330078125, "learning_rate": 3.333873056994819e-06, "loss": 0.7762, "mean_token_accuracy": 0.9016009867191315, "num_tokens": 7378057.0, "step": 4118 }, { "epoch": 0.6669905270828274, "grad_norm": 21.717626571655273, "learning_rate": 3.332253886010363e-06, "loss": 0.51, "mean_token_accuracy": 0.9299335479736328, "num_tokens": 7379840.0, "step": 4119 }, { "epoch": 0.6671524572909077, "grad_norm": 21.459293365478516, "learning_rate": 3.330634715025907e-06, "loss": 0.4818, "mean_token_accuracy": 0.9265749752521515, "num_tokens": 7381637.0, "step": 4120 }, { "epoch": 0.667314387498988, "grad_norm": 15.305166244506836, "learning_rate": 3.329015544041451e-06, "loss": 0.466, "mean_token_accuracy": 0.9350432753562927, "num_tokens": 7383426.0, "step": 4121 }, { "epoch": 0.6674763177070683, "grad_norm": 37.940284729003906, "learning_rate": 3.327396373056995e-06, "loss": 0.8837, "mean_token_accuracy": 0.8948412537574768, "num_tokens": 7385222.0, "step": 4122 }, { "epoch": 0.6676382479151486, "grad_norm": 40.06706237792969, "learning_rate": 3.325777202072539e-06, "loss": 0.6731, "mean_token_accuracy": 0.8996402621269226, "num_tokens": 7387023.0, "step": 4123 }, { "epoch": 0.6678001781232289, "grad_norm": 30.599830627441406, "learning_rate": 3.324158031088083e-06, "loss": 0.6446, "mean_token_accuracy": 0.9265443980693817, "num_tokens": 7388823.0, "step": 4124 }, { "epoch": 0.6679621083313092, "grad_norm": 35.77900695800781, "learning_rate": 3.322538860103627e-06, "loss": 0.8884, "mean_token_accuracy": 0.900324672460556, "num_tokens": 7390607.0, "step": 4125 }, { "epoch": 0.6681240385393895, "grad_norm": 28.442115783691406, "learning_rate": 3.3209196891191716e-06, "loss": 0.5922, "mean_token_accuracy": 0.9201680421829224, "num_tokens": 7392395.0, "step": 4126 }, { "epoch": 0.6682859687474698, "grad_norm": 25.203189849853516, "learning_rate": 3.3193005181347156e-06, "loss": 0.5992, "mean_token_accuracy": 0.9123993813991547, "num_tokens": 7394180.0, "step": 4127 }, { "epoch": 0.6684478989555501, "grad_norm": 29.367996215820312, "learning_rate": 3.3176813471502596e-06, "loss": 0.6078, "mean_token_accuracy": 0.9122835099697113, "num_tokens": 7395966.0, "step": 4128 }, { "epoch": 0.6686098291636304, "grad_norm": 32.791717529296875, "learning_rate": 3.3160621761658036e-06, "loss": 0.6717, "mean_token_accuracy": 0.901408463716507, "num_tokens": 7397762.0, "step": 4129 }, { "epoch": 0.6687717593717107, "grad_norm": 31.23275375366211, "learning_rate": 3.3144430051813477e-06, "loss": 0.6285, "mean_token_accuracy": 0.9110547602176666, "num_tokens": 7399555.0, "step": 4130 }, { "epoch": 0.6689336895797912, "grad_norm": 18.428125381469727, "learning_rate": 3.3128238341968917e-06, "loss": 0.5448, "mean_token_accuracy": 0.9250216782093048, "num_tokens": 7401347.0, "step": 4131 }, { "epoch": 0.6690956197878715, "grad_norm": 28.220844268798828, "learning_rate": 3.3112046632124357e-06, "loss": 0.5377, "mean_token_accuracy": 0.9276260435581207, "num_tokens": 7403135.0, "step": 4132 }, { "epoch": 0.6692575499959518, "grad_norm": 25.233781814575195, "learning_rate": 3.3095854922279797e-06, "loss": 0.6034, "mean_token_accuracy": 0.9201986491680145, "num_tokens": 7404938.0, "step": 4133 }, { "epoch": 0.6694194802040321, "grad_norm": 25.159303665161133, "learning_rate": 3.3079663212435237e-06, "loss": 0.6044, "mean_token_accuracy": 0.9166133105754852, "num_tokens": 7406724.0, "step": 4134 }, { "epoch": 0.6695814104121124, "grad_norm": 24.508403778076172, "learning_rate": 3.3063471502590677e-06, "loss": 0.5652, "mean_token_accuracy": 0.9208920300006866, "num_tokens": 7408513.0, "step": 4135 }, { "epoch": 0.6697433406201927, "grad_norm": 33.23428726196289, "learning_rate": 3.3047279792746117e-06, "loss": 0.7201, "mean_token_accuracy": 0.9074468016624451, "num_tokens": 7410306.0, "step": 4136 }, { "epoch": 0.669905270828273, "grad_norm": 27.269845962524414, "learning_rate": 3.3031088082901557e-06, "loss": 0.6619, "mean_token_accuracy": 0.9084325432777405, "num_tokens": 7412102.0, "step": 4137 }, { "epoch": 0.6700672010363533, "grad_norm": 31.97282600402832, "learning_rate": 3.3014896373056998e-06, "loss": 0.691, "mean_token_accuracy": 0.9195847511291504, "num_tokens": 7413899.0, "step": 4138 }, { "epoch": 0.6702291312444336, "grad_norm": 26.36349868774414, "learning_rate": 3.2998704663212438e-06, "loss": 0.6079, "mean_token_accuracy": 0.9260977506637573, "num_tokens": 7415695.0, "step": 4139 }, { "epoch": 0.6703910614525139, "grad_norm": 30.884103775024414, "learning_rate": 3.2982512953367878e-06, "loss": 0.6043, "mean_token_accuracy": 0.9151724278926849, "num_tokens": 7417502.0, "step": 4140 }, { "epoch": 0.6705529916605942, "grad_norm": 20.716890335083008, "learning_rate": 3.296632124352332e-06, "loss": 0.5484, "mean_token_accuracy": 0.9231884181499481, "num_tokens": 7419287.0, "step": 4141 }, { "epoch": 0.6707149218686747, "grad_norm": 37.35674285888672, "learning_rate": 3.295012953367876e-06, "loss": 0.8434, "mean_token_accuracy": 0.9035087823867798, "num_tokens": 7421079.0, "step": 4142 }, { "epoch": 0.670876852076755, "grad_norm": 30.0382080078125, "learning_rate": 3.29339378238342e-06, "loss": 0.7545, "mean_token_accuracy": 0.8996212184429169, "num_tokens": 7422859.0, "step": 4143 }, { "epoch": 0.6710387822848353, "grad_norm": 22.871402740478516, "learning_rate": 3.291774611398964e-06, "loss": 0.5776, "mean_token_accuracy": 0.91094771027565, "num_tokens": 7424651.0, "step": 4144 }, { "epoch": 0.6712007124929156, "grad_norm": 31.28093147277832, "learning_rate": 3.2901554404145083e-06, "loss": 0.6551, "mean_token_accuracy": 0.9203781485557556, "num_tokens": 7426439.0, "step": 4145 }, { "epoch": 0.6713626427009959, "grad_norm": 36.13066101074219, "learning_rate": 3.2885362694300523e-06, "loss": 0.7395, "mean_token_accuracy": 0.9022015631198883, "num_tokens": 7428237.0, "step": 4146 }, { "epoch": 0.6715245729090762, "grad_norm": 31.17274284362793, "learning_rate": 3.2869170984455963e-06, "loss": 0.6474, "mean_token_accuracy": 0.9184534549713135, "num_tokens": 7430033.0, "step": 4147 }, { "epoch": 0.6716865031171565, "grad_norm": 34.50510025024414, "learning_rate": 3.2852979274611403e-06, "loss": 0.6998, "mean_token_accuracy": 0.905601978302002, "num_tokens": 7431820.0, "step": 4148 }, { "epoch": 0.6718484333252368, "grad_norm": 29.901945114135742, "learning_rate": 3.2836787564766843e-06, "loss": 0.6654, "mean_token_accuracy": 0.9154095649719238, "num_tokens": 7433604.0, "step": 4149 }, { "epoch": 0.6720103635333171, "grad_norm": 21.766742706298828, "learning_rate": 3.2820595854922283e-06, "loss": 0.5486, "mean_token_accuracy": 0.928205132484436, "num_tokens": 7435394.0, "step": 4150 }, { "epoch": 0.6721722937413974, "grad_norm": 25.516538619995117, "learning_rate": 3.2804404145077724e-06, "loss": 0.6397, "mean_token_accuracy": 0.9198092520236969, "num_tokens": 7437192.0, "step": 4151 }, { "epoch": 0.6723342239494777, "grad_norm": 41.40788650512695, "learning_rate": 3.2788212435233164e-06, "loss": 1.0756, "mean_token_accuracy": 0.8910714387893677, "num_tokens": 7438980.0, "step": 4152 }, { "epoch": 0.6724961541575581, "grad_norm": 28.43753433227539, "learning_rate": 3.2772020725388604e-06, "loss": 0.532, "mean_token_accuracy": 0.9283088147640228, "num_tokens": 7440772.0, "step": 4153 }, { "epoch": 0.6726580843656385, "grad_norm": 25.482227325439453, "learning_rate": 3.2755829015544044e-06, "loss": 0.6175, "mean_token_accuracy": 0.9138143956661224, "num_tokens": 7442563.0, "step": 4154 }, { "epoch": 0.6728200145737188, "grad_norm": 22.197038650512695, "learning_rate": 3.2739637305699484e-06, "loss": 0.5724, "mean_token_accuracy": 0.9184782803058624, "num_tokens": 7444357.0, "step": 4155 }, { "epoch": 0.6729819447817991, "grad_norm": 22.777578353881836, "learning_rate": 3.2723445595854924e-06, "loss": 0.546, "mean_token_accuracy": 0.9239495694637299, "num_tokens": 7446145.0, "step": 4156 }, { "epoch": 0.6731438749898794, "grad_norm": 35.817501068115234, "learning_rate": 3.2707253886010364e-06, "loss": 0.6589, "mean_token_accuracy": 0.9107017517089844, "num_tokens": 7447940.0, "step": 4157 }, { "epoch": 0.6733058051979597, "grad_norm": 28.14497947692871, "learning_rate": 3.2691062176165804e-06, "loss": 0.5327, "mean_token_accuracy": 0.9149396419525146, "num_tokens": 7449734.0, "step": 4158 }, { "epoch": 0.67346773540604, "grad_norm": 20.760400772094727, "learning_rate": 3.2674870466321245e-06, "loss": 0.581, "mean_token_accuracy": 0.9210199117660522, "num_tokens": 7451512.0, "step": 4159 }, { "epoch": 0.6736296656141203, "grad_norm": 31.46258544921875, "learning_rate": 3.2658678756476685e-06, "loss": 0.7696, "mean_token_accuracy": 0.9079329371452332, "num_tokens": 7453307.0, "step": 4160 }, { "epoch": 0.6737915958222006, "grad_norm": 40.968109130859375, "learning_rate": 3.2642487046632125e-06, "loss": 0.8828, "mean_token_accuracy": 0.8833173513412476, "num_tokens": 7455108.0, "step": 4161 }, { "epoch": 0.6739535260302809, "grad_norm": 28.158157348632812, "learning_rate": 3.2626295336787565e-06, "loss": 0.5722, "mean_token_accuracy": 0.9217686951160431, "num_tokens": 7456914.0, "step": 4162 }, { "epoch": 0.6741154562383612, "grad_norm": 24.687936782836914, "learning_rate": 3.2610103626943005e-06, "loss": 0.6051, "mean_token_accuracy": 0.9168752431869507, "num_tokens": 7458703.0, "step": 4163 }, { "epoch": 0.6742773864464415, "grad_norm": 28.493154525756836, "learning_rate": 3.259391191709845e-06, "loss": 0.6484, "mean_token_accuracy": 0.9087591171264648, "num_tokens": 7460489.0, "step": 4164 }, { "epoch": 0.674439316654522, "grad_norm": 25.452533721923828, "learning_rate": 3.257772020725389e-06, "loss": 0.573, "mean_token_accuracy": 0.9306824803352356, "num_tokens": 7462288.0, "step": 4165 }, { "epoch": 0.6746012468626023, "grad_norm": 23.6773681640625, "learning_rate": 3.256152849740933e-06, "loss": 0.6245, "mean_token_accuracy": 0.9147312343120575, "num_tokens": 7464081.0, "step": 4166 }, { "epoch": 0.6747631770706826, "grad_norm": 19.944725036621094, "learning_rate": 3.254533678756477e-06, "loss": 0.5404, "mean_token_accuracy": 0.9264705777168274, "num_tokens": 7465865.0, "step": 4167 }, { "epoch": 0.6749251072787629, "grad_norm": 31.108671188354492, "learning_rate": 3.252914507772021e-06, "loss": 0.6896, "mean_token_accuracy": 0.9095184504985809, "num_tokens": 7467663.0, "step": 4168 }, { "epoch": 0.6750870374868432, "grad_norm": 19.126888275146484, "learning_rate": 3.251295336787565e-06, "loss": 0.4749, "mean_token_accuracy": 0.9296235740184784, "num_tokens": 7469446.0, "step": 4169 }, { "epoch": 0.6752489676949235, "grad_norm": 17.891489028930664, "learning_rate": 3.249676165803109e-06, "loss": 0.4718, "mean_token_accuracy": 0.9244965612888336, "num_tokens": 7471236.0, "step": 4170 }, { "epoch": 0.6754108979030038, "grad_norm": 24.59279441833496, "learning_rate": 3.248056994818653e-06, "loss": 0.6318, "mean_token_accuracy": 0.9174375832080841, "num_tokens": 7473027.0, "step": 4171 }, { "epoch": 0.6755728281110841, "grad_norm": 26.764751434326172, "learning_rate": 3.246437823834197e-06, "loss": 0.7489, "mean_token_accuracy": 0.9064748287200928, "num_tokens": 7474817.0, "step": 4172 }, { "epoch": 0.6757347583191644, "grad_norm": 29.436477661132812, "learning_rate": 3.244818652849741e-06, "loss": 0.6467, "mean_token_accuracy": 0.9207108914852142, "num_tokens": 7476594.0, "step": 4173 }, { "epoch": 0.6758966885272447, "grad_norm": 28.81219482421875, "learning_rate": 3.243199481865285e-06, "loss": 0.6155, "mean_token_accuracy": 0.9222605228424072, "num_tokens": 7478389.0, "step": 4174 }, { "epoch": 0.676058618735325, "grad_norm": 19.002336502075195, "learning_rate": 3.241580310880829e-06, "loss": 0.5235, "mean_token_accuracy": 0.9302941858768463, "num_tokens": 7480173.0, "step": 4175 }, { "epoch": 0.6762205489434054, "grad_norm": 24.259191513061523, "learning_rate": 3.239961139896373e-06, "loss": 0.6041, "mean_token_accuracy": 0.9243930280208588, "num_tokens": 7481963.0, "step": 4176 }, { "epoch": 0.6763824791514857, "grad_norm": 28.416440963745117, "learning_rate": 3.238341968911917e-06, "loss": 0.6613, "mean_token_accuracy": 0.912962943315506, "num_tokens": 7483750.0, "step": 4177 }, { "epoch": 0.6765444093595661, "grad_norm": 27.9654483795166, "learning_rate": 3.236722797927461e-06, "loss": 0.6543, "mean_token_accuracy": 0.9087591171264648, "num_tokens": 7485536.0, "step": 4178 }, { "epoch": 0.6767063395676464, "grad_norm": 42.680240631103516, "learning_rate": 3.235103626943005e-06, "loss": 0.7791, "mean_token_accuracy": 0.9029786884784698, "num_tokens": 7487338.0, "step": 4179 }, { "epoch": 0.6768682697757267, "grad_norm": 26.36026954650879, "learning_rate": 3.233484455958549e-06, "loss": 0.5774, "mean_token_accuracy": 0.9208772778511047, "num_tokens": 7489139.0, "step": 4180 }, { "epoch": 0.677030199983807, "grad_norm": 29.242258071899414, "learning_rate": 3.231865284974093e-06, "loss": 0.6634, "mean_token_accuracy": 0.9154411852359772, "num_tokens": 7490923.0, "step": 4181 }, { "epoch": 0.6771921301918873, "grad_norm": 29.985437393188477, "learning_rate": 3.230246113989637e-06, "loss": 0.5793, "mean_token_accuracy": 0.9109512269496918, "num_tokens": 7492715.0, "step": 4182 }, { "epoch": 0.6773540603999676, "grad_norm": 27.38640022277832, "learning_rate": 3.2286269430051816e-06, "loss": 0.6332, "mean_token_accuracy": 0.918371319770813, "num_tokens": 7494496.0, "step": 4183 }, { "epoch": 0.6775159906080479, "grad_norm": 29.038549423217773, "learning_rate": 3.2270077720207256e-06, "loss": 0.597, "mean_token_accuracy": 0.906521737575531, "num_tokens": 7496286.0, "step": 4184 }, { "epoch": 0.6776779208161282, "grad_norm": 29.188644409179688, "learning_rate": 3.2253886010362696e-06, "loss": 0.6243, "mean_token_accuracy": 0.9112829566001892, "num_tokens": 7498079.0, "step": 4185 }, { "epoch": 0.6778398510242085, "grad_norm": 29.625587463378906, "learning_rate": 3.2237694300518137e-06, "loss": 0.5738, "mean_token_accuracy": 0.9126662909984589, "num_tokens": 7499867.0, "step": 4186 }, { "epoch": 0.6780017812322889, "grad_norm": 21.250776290893555, "learning_rate": 3.2221502590673577e-06, "loss": 0.5259, "mean_token_accuracy": 0.9211202263832092, "num_tokens": 7501658.0, "step": 4187 }, { "epoch": 0.6781637114403692, "grad_norm": 16.9936580657959, "learning_rate": 3.2205310880829017e-06, "loss": 0.51, "mean_token_accuracy": 0.9263710677623749, "num_tokens": 7503442.0, "step": 4188 }, { "epoch": 0.6783256416484496, "grad_norm": 11.794110298156738, "learning_rate": 3.2189119170984457e-06, "loss": 0.4593, "mean_token_accuracy": 0.9274809062480927, "num_tokens": 7505216.0, "step": 4189 }, { "epoch": 0.6784875718565299, "grad_norm": 21.56904411315918, "learning_rate": 3.2172927461139897e-06, "loss": 0.5549, "mean_token_accuracy": 0.9230731725692749, "num_tokens": 7507014.0, "step": 4190 }, { "epoch": 0.6786495020646102, "grad_norm": 25.205673217773438, "learning_rate": 3.2156735751295337e-06, "loss": 0.517, "mean_token_accuracy": 0.9243339002132416, "num_tokens": 7508815.0, "step": 4191 }, { "epoch": 0.6788114322726905, "grad_norm": 21.605981826782227, "learning_rate": 3.2140544041450777e-06, "loss": 0.513, "mean_token_accuracy": 0.9277989864349365, "num_tokens": 7510602.0, "step": 4192 }, { "epoch": 0.6789733624807708, "grad_norm": 12.93326187133789, "learning_rate": 3.2124352331606218e-06, "loss": 0.4349, "mean_token_accuracy": 0.9395328760147095, "num_tokens": 7512395.0, "step": 4193 }, { "epoch": 0.6791352926888511, "grad_norm": 19.103681564331055, "learning_rate": 3.2108160621761658e-06, "loss": 0.5089, "mean_token_accuracy": 0.925775021314621, "num_tokens": 7514191.0, "step": 4194 }, { "epoch": 0.6792972228969314, "grad_norm": 25.725322723388672, "learning_rate": 3.2091968911917098e-06, "loss": 0.4969, "mean_token_accuracy": 0.9288247227668762, "num_tokens": 7515984.0, "step": 4195 }, { "epoch": 0.6794591531050117, "grad_norm": 31.619583129882812, "learning_rate": 3.207577720207254e-06, "loss": 0.7052, "mean_token_accuracy": 0.9084407687187195, "num_tokens": 7517778.0, "step": 4196 }, { "epoch": 0.679621083313092, "grad_norm": 26.227676391601562, "learning_rate": 3.205958549222798e-06, "loss": 0.5905, "mean_token_accuracy": 0.9131537079811096, "num_tokens": 7519555.0, "step": 4197 }, { "epoch": 0.6797830135211724, "grad_norm": 24.51641082763672, "learning_rate": 3.204339378238342e-06, "loss": 0.6513, "mean_token_accuracy": 0.9210607707500458, "num_tokens": 7521346.0, "step": 4198 }, { "epoch": 0.6799449437292527, "grad_norm": 23.914867401123047, "learning_rate": 3.2027202072538867e-06, "loss": 0.5545, "mean_token_accuracy": 0.9178240597248077, "num_tokens": 7523137.0, "step": 4199 }, { "epoch": 0.680106873937333, "grad_norm": 23.75826072692871, "learning_rate": 3.2011010362694307e-06, "loss": 0.5531, "mean_token_accuracy": 0.9243475496768951, "num_tokens": 7524927.0, "step": 4200 }, { "epoch": 0.6802688041454134, "grad_norm": 36.00688934326172, "learning_rate": 3.1994818652849747e-06, "loss": 0.8003, "mean_token_accuracy": 0.9084208011627197, "num_tokens": 7526722.0, "step": 4201 }, { "epoch": 0.6804307343534937, "grad_norm": 27.803722381591797, "learning_rate": 3.1978626943005187e-06, "loss": 0.6001, "mean_token_accuracy": 0.9178959727287292, "num_tokens": 7528514.0, "step": 4202 }, { "epoch": 0.680592664561574, "grad_norm": 28.25230598449707, "learning_rate": 3.1962435233160627e-06, "loss": 0.647, "mean_token_accuracy": 0.9155176877975464, "num_tokens": 7530298.0, "step": 4203 }, { "epoch": 0.6807545947696543, "grad_norm": 31.916336059570312, "learning_rate": 3.1946243523316067e-06, "loss": 0.6637, "mean_token_accuracy": 0.8989050984382629, "num_tokens": 7532087.0, "step": 4204 }, { "epoch": 0.6809165249777346, "grad_norm": 28.49579620361328, "learning_rate": 3.1930051813471508e-06, "loss": 0.8094, "mean_token_accuracy": 0.8965517282485962, "num_tokens": 7533889.0, "step": 4205 }, { "epoch": 0.6810784551858149, "grad_norm": 13.097058296203613, "learning_rate": 3.1913860103626948e-06, "loss": 0.4638, "mean_token_accuracy": 0.9332089424133301, "num_tokens": 7535671.0, "step": 4206 }, { "epoch": 0.6812403853938952, "grad_norm": 26.408447265625, "learning_rate": 3.1897668393782388e-06, "loss": 0.6082, "mean_token_accuracy": 0.919334203004837, "num_tokens": 7537468.0, "step": 4207 }, { "epoch": 0.6814023156019755, "grad_norm": 33.12641143798828, "learning_rate": 3.188147668393783e-06, "loss": 0.7915, "mean_token_accuracy": 0.9042984545230865, "num_tokens": 7539252.0, "step": 4208 }, { "epoch": 0.6815642458100558, "grad_norm": 21.051509857177734, "learning_rate": 3.186528497409327e-06, "loss": 0.4994, "mean_token_accuracy": 0.9237982928752899, "num_tokens": 7541040.0, "step": 4209 }, { "epoch": 0.6817261760181362, "grad_norm": 35.04060363769531, "learning_rate": 3.184909326424871e-06, "loss": 0.6311, "mean_token_accuracy": 0.9052895903587341, "num_tokens": 7542837.0, "step": 4210 }, { "epoch": 0.6818881062262165, "grad_norm": 31.298126220703125, "learning_rate": 3.183290155440415e-06, "loss": 0.5482, "mean_token_accuracy": 0.9166054725646973, "num_tokens": 7544635.0, "step": 4211 }, { "epoch": 0.6820500364342968, "grad_norm": 18.305156707763672, "learning_rate": 3.181670984455959e-06, "loss": 0.566, "mean_token_accuracy": 0.9269837737083435, "num_tokens": 7546420.0, "step": 4212 }, { "epoch": 0.6822119666423772, "grad_norm": 33.741493225097656, "learning_rate": 3.180051813471503e-06, "loss": 0.5805, "mean_token_accuracy": 0.9083735942840576, "num_tokens": 7548205.0, "step": 4213 }, { "epoch": 0.6823738968504575, "grad_norm": 31.62444496154785, "learning_rate": 3.178432642487047e-06, "loss": 0.6409, "mean_token_accuracy": 0.9177764356136322, "num_tokens": 7549997.0, "step": 4214 }, { "epoch": 0.6825358270585378, "grad_norm": 37.6870002746582, "learning_rate": 3.1768134715025913e-06, "loss": 0.7075, "mean_token_accuracy": 0.9097591638565063, "num_tokens": 7551786.0, "step": 4215 }, { "epoch": 0.6826977572666181, "grad_norm": 37.843257904052734, "learning_rate": 3.1751943005181353e-06, "loss": 0.7683, "mean_token_accuracy": 0.8941354155540466, "num_tokens": 7553580.0, "step": 4216 }, { "epoch": 0.6828596874746984, "grad_norm": 38.16155242919922, "learning_rate": 3.1735751295336793e-06, "loss": 0.6663, "mean_token_accuracy": 0.9055489599704742, "num_tokens": 7555376.0, "step": 4217 }, { "epoch": 0.6830216176827787, "grad_norm": 38.955291748046875, "learning_rate": 3.1719559585492233e-06, "loss": 0.6666, "mean_token_accuracy": 0.9054322242736816, "num_tokens": 7557172.0, "step": 4218 }, { "epoch": 0.683183547890859, "grad_norm": 23.4375, "learning_rate": 3.1703367875647674e-06, "loss": 0.565, "mean_token_accuracy": 0.9145896732807159, "num_tokens": 7558965.0, "step": 4219 }, { "epoch": 0.6833454780989393, "grad_norm": 38.129207611083984, "learning_rate": 3.1687176165803114e-06, "loss": 0.6275, "mean_token_accuracy": 0.9103172123432159, "num_tokens": 7560766.0, "step": 4220 }, { "epoch": 0.6835074083070197, "grad_norm": 26.179931640625, "learning_rate": 3.1670984455958554e-06, "loss": 0.6168, "mean_token_accuracy": 0.9207393527030945, "num_tokens": 7562543.0, "step": 4221 }, { "epoch": 0.6836693385151, "grad_norm": 19.7171688079834, "learning_rate": 3.1654792746113994e-06, "loss": 0.5253, "mean_token_accuracy": 0.9257525205612183, "num_tokens": 7564323.0, "step": 4222 }, { "epoch": 0.6838312687231803, "grad_norm": 31.97835350036621, "learning_rate": 3.1638601036269434e-06, "loss": 0.6042, "mean_token_accuracy": 0.9165835082530975, "num_tokens": 7566111.0, "step": 4223 }, { "epoch": 0.6839931989312606, "grad_norm": 24.31029510498047, "learning_rate": 3.1622409326424874e-06, "loss": 0.5513, "mean_token_accuracy": 0.9231970608234406, "num_tokens": 7567898.0, "step": 4224 }, { "epoch": 0.684155129139341, "grad_norm": 28.73821258544922, "learning_rate": 3.1606217616580314e-06, "loss": 0.5935, "mean_token_accuracy": 0.9104229211807251, "num_tokens": 7569700.0, "step": 4225 }, { "epoch": 0.6843170593474213, "grad_norm": 27.546123504638672, "learning_rate": 3.1590025906735755e-06, "loss": 0.5138, "mean_token_accuracy": 0.922649472951889, "num_tokens": 7571496.0, "step": 4226 }, { "epoch": 0.6844789895555016, "grad_norm": 26.646446228027344, "learning_rate": 3.1573834196891195e-06, "loss": 0.5754, "mean_token_accuracy": 0.915268212556839, "num_tokens": 7573291.0, "step": 4227 }, { "epoch": 0.6846409197635819, "grad_norm": 25.911571502685547, "learning_rate": 3.1557642487046635e-06, "loss": 0.5571, "mean_token_accuracy": 0.9187424778938293, "num_tokens": 7575086.0, "step": 4228 }, { "epoch": 0.6848028499716622, "grad_norm": 17.31951904296875, "learning_rate": 3.1541450777202075e-06, "loss": 0.476, "mean_token_accuracy": 0.930771678686142, "num_tokens": 7576873.0, "step": 4229 }, { "epoch": 0.6849647801797425, "grad_norm": 27.885652542114258, "learning_rate": 3.1525259067357515e-06, "loss": 0.6167, "mean_token_accuracy": 0.9126984179019928, "num_tokens": 7578660.0, "step": 4230 }, { "epoch": 0.6851267103878228, "grad_norm": 23.07590675354004, "learning_rate": 3.1509067357512955e-06, "loss": 0.5202, "mean_token_accuracy": 0.9251798987388611, "num_tokens": 7580453.0, "step": 4231 }, { "epoch": 0.6852886405959032, "grad_norm": 28.671772003173828, "learning_rate": 3.1492875647668395e-06, "loss": 0.5473, "mean_token_accuracy": 0.9266331791877747, "num_tokens": 7582253.0, "step": 4232 }, { "epoch": 0.6854505708039835, "grad_norm": 24.947860717773438, "learning_rate": 3.147668393782384e-06, "loss": 0.5909, "mean_token_accuracy": 0.9163933396339417, "num_tokens": 7584040.0, "step": 4233 }, { "epoch": 0.6856125010120638, "grad_norm": 37.60129928588867, "learning_rate": 3.146049222797928e-06, "loss": 0.8463, "mean_token_accuracy": 0.9011110067367554, "num_tokens": 7585825.0, "step": 4234 }, { "epoch": 0.6857744312201441, "grad_norm": 23.55774688720703, "learning_rate": 3.144430051813472e-06, "loss": 0.4987, "mean_token_accuracy": 0.932934045791626, "num_tokens": 7587622.0, "step": 4235 }, { "epoch": 0.6859363614282245, "grad_norm": 40.561241149902344, "learning_rate": 3.142810880829016e-06, "loss": 0.695, "mean_token_accuracy": 0.9062924981117249, "num_tokens": 7589421.0, "step": 4236 }, { "epoch": 0.6860982916363048, "grad_norm": 21.919212341308594, "learning_rate": 3.14119170984456e-06, "loss": 0.5963, "mean_token_accuracy": 0.9260774850845337, "num_tokens": 7591217.0, "step": 4237 }, { "epoch": 0.6862602218443851, "grad_norm": 33.35395050048828, "learning_rate": 3.139572538860104e-06, "loss": 0.76, "mean_token_accuracy": 0.8960212171077728, "num_tokens": 7593017.0, "step": 4238 }, { "epoch": 0.6864221520524654, "grad_norm": 25.844959259033203, "learning_rate": 3.137953367875648e-06, "loss": 0.6421, "mean_token_accuracy": 0.9150778949260712, "num_tokens": 7594800.0, "step": 4239 }, { "epoch": 0.6865840822605457, "grad_norm": 28.68640899658203, "learning_rate": 3.136334196891192e-06, "loss": 0.6571, "mean_token_accuracy": 0.9140820801258087, "num_tokens": 7596589.0, "step": 4240 }, { "epoch": 0.686746012468626, "grad_norm": 30.82794189453125, "learning_rate": 3.134715025906736e-06, "loss": 0.5644, "mean_token_accuracy": 0.9208633303642273, "num_tokens": 7598379.0, "step": 4241 }, { "epoch": 0.6869079426767063, "grad_norm": 24.230995178222656, "learning_rate": 3.13309585492228e-06, "loss": 0.5914, "mean_token_accuracy": 0.9140544533729553, "num_tokens": 7600170.0, "step": 4242 }, { "epoch": 0.6870698728847866, "grad_norm": 31.77448844909668, "learning_rate": 3.131476683937824e-06, "loss": 0.6626, "mean_token_accuracy": 0.9148550927639008, "num_tokens": 7601964.0, "step": 4243 }, { "epoch": 0.687231803092867, "grad_norm": 26.769216537475586, "learning_rate": 3.129857512953368e-06, "loss": 0.5339, "mean_token_accuracy": 0.9269722700119019, "num_tokens": 7603750.0, "step": 4244 }, { "epoch": 0.6873937333009473, "grad_norm": 33.159339904785156, "learning_rate": 3.128238341968912e-06, "loss": 0.719, "mean_token_accuracy": 0.8930845856666565, "num_tokens": 7605546.0, "step": 4245 }, { "epoch": 0.6875556635090276, "grad_norm": 26.52994155883789, "learning_rate": 3.126619170984456e-06, "loss": 0.529, "mean_token_accuracy": 0.9239353239536285, "num_tokens": 7607334.0, "step": 4246 }, { "epoch": 0.687717593717108, "grad_norm": 26.5125789642334, "learning_rate": 3.125e-06, "loss": 0.5847, "mean_token_accuracy": 0.9206432700157166, "num_tokens": 7609122.0, "step": 4247 }, { "epoch": 0.6878795239251883, "grad_norm": 33.758140563964844, "learning_rate": 3.123380829015544e-06, "loss": 0.6379, "mean_token_accuracy": 0.9157638251781464, "num_tokens": 7610919.0, "step": 4248 }, { "epoch": 0.6880414541332686, "grad_norm": 31.36656951904297, "learning_rate": 3.121761658031088e-06, "loss": 0.6648, "mean_token_accuracy": 0.9121997356414795, "num_tokens": 7612715.0, "step": 4249 }, { "epoch": 0.6882033843413489, "grad_norm": 24.905214309692383, "learning_rate": 3.120142487046632e-06, "loss": 0.5669, "mean_token_accuracy": 0.9204588532447815, "num_tokens": 7614518.0, "step": 4250 }, { "epoch": 0.6883653145494292, "grad_norm": 29.66878318786621, "learning_rate": 3.118523316062176e-06, "loss": 0.5721, "mean_token_accuracy": 0.9060952067375183, "num_tokens": 7616317.0, "step": 4251 }, { "epoch": 0.6885272447575095, "grad_norm": 15.72887897491455, "learning_rate": 3.1169041450777206e-06, "loss": 0.434, "mean_token_accuracy": 0.9365669786930084, "num_tokens": 7618113.0, "step": 4252 }, { "epoch": 0.6886891749655898, "grad_norm": 31.513444900512695, "learning_rate": 3.1152849740932647e-06, "loss": 0.6441, "mean_token_accuracy": 0.9004052579402924, "num_tokens": 7619906.0, "step": 4253 }, { "epoch": 0.6888511051736701, "grad_norm": 41.84278869628906, "learning_rate": 3.1136658031088087e-06, "loss": 0.6486, "mean_token_accuracy": 0.9062213599681854, "num_tokens": 7621706.0, "step": 4254 }, { "epoch": 0.6890130353817505, "grad_norm": 42.813560485839844, "learning_rate": 3.1120466321243527e-06, "loss": 0.9463, "mean_token_accuracy": 0.8859060406684875, "num_tokens": 7623516.0, "step": 4255 }, { "epoch": 0.6891749655898308, "grad_norm": 31.858644485473633, "learning_rate": 3.1104274611398967e-06, "loss": 0.5804, "mean_token_accuracy": 0.9147057235240936, "num_tokens": 7625308.0, "step": 4256 }, { "epoch": 0.6893368957979111, "grad_norm": 33.77197265625, "learning_rate": 3.1088082901554407e-06, "loss": 0.5978, "mean_token_accuracy": 0.906272828578949, "num_tokens": 7627097.0, "step": 4257 }, { "epoch": 0.6894988260059914, "grad_norm": 28.079593658447266, "learning_rate": 3.1071891191709847e-06, "loss": 0.6073, "mean_token_accuracy": 0.9194001257419586, "num_tokens": 7628894.0, "step": 4258 }, { "epoch": 0.6896607562140717, "grad_norm": 24.356002807617188, "learning_rate": 3.1055699481865287e-06, "loss": 0.5828, "mean_token_accuracy": 0.9141001403331757, "num_tokens": 7630697.0, "step": 4259 }, { "epoch": 0.689822686422152, "grad_norm": 23.325708389282227, "learning_rate": 3.1039507772020727e-06, "loss": 0.5866, "mean_token_accuracy": 0.9172928631305695, "num_tokens": 7632488.0, "step": 4260 }, { "epoch": 0.6899846166302324, "grad_norm": 27.013219833374023, "learning_rate": 3.1023316062176168e-06, "loss": 0.7161, "mean_token_accuracy": 0.9067800939083099, "num_tokens": 7634279.0, "step": 4261 }, { "epoch": 0.6901465468383127, "grad_norm": 28.254985809326172, "learning_rate": 3.1007124352331608e-06, "loss": 0.6293, "mean_token_accuracy": 0.9079623520374298, "num_tokens": 7636063.0, "step": 4262 }, { "epoch": 0.690308477046393, "grad_norm": 32.78112030029297, "learning_rate": 3.0990932642487048e-06, "loss": 0.8793, "mean_token_accuracy": 0.8981858789920807, "num_tokens": 7637869.0, "step": 4263 }, { "epoch": 0.6904704072544733, "grad_norm": 23.27775001525879, "learning_rate": 3.097474093264249e-06, "loss": 0.5121, "mean_token_accuracy": 0.9230295717716217, "num_tokens": 7639666.0, "step": 4264 }, { "epoch": 0.6906323374625536, "grad_norm": 22.289094924926758, "learning_rate": 3.095854922279793e-06, "loss": 0.4922, "mean_token_accuracy": 0.9306526780128479, "num_tokens": 7641453.0, "step": 4265 }, { "epoch": 0.690794267670634, "grad_norm": 24.208086013793945, "learning_rate": 3.094235751295337e-06, "loss": 0.5308, "mean_token_accuracy": 0.9287993907928467, "num_tokens": 7643246.0, "step": 4266 }, { "epoch": 0.6909561978787143, "grad_norm": 30.610450744628906, "learning_rate": 3.092616580310881e-06, "loss": 0.6017, "mean_token_accuracy": 0.9104995429515839, "num_tokens": 7645037.0, "step": 4267 }, { "epoch": 0.6911181280867946, "grad_norm": 37.16124725341797, "learning_rate": 3.090997409326425e-06, "loss": 0.8379, "mean_token_accuracy": 0.8937198221683502, "num_tokens": 7646831.0, "step": 4268 }, { "epoch": 0.6912800582948749, "grad_norm": 21.48700714111328, "learning_rate": 3.089378238341969e-06, "loss": 0.5071, "mean_token_accuracy": 0.930820107460022, "num_tokens": 7648618.0, "step": 4269 }, { "epoch": 0.6914419885029552, "grad_norm": 26.752824783325195, "learning_rate": 3.087759067357513e-06, "loss": 0.5357, "mean_token_accuracy": 0.9242281913757324, "num_tokens": 7650407.0, "step": 4270 }, { "epoch": 0.6916039187110355, "grad_norm": 21.06236457824707, "learning_rate": 3.0861398963730573e-06, "loss": 0.6069, "mean_token_accuracy": 0.9192849397659302, "num_tokens": 7652204.0, "step": 4271 }, { "epoch": 0.6917658489191159, "grad_norm": 18.28443145751953, "learning_rate": 3.0845207253886013e-06, "loss": 0.4806, "mean_token_accuracy": 0.93425053358078, "num_tokens": 7653990.0, "step": 4272 }, { "epoch": 0.6919277791271962, "grad_norm": 19.159950256347656, "learning_rate": 3.0829015544041453e-06, "loss": 0.5474, "mean_token_accuracy": 0.9350432753562927, "num_tokens": 7655779.0, "step": 4273 }, { "epoch": 0.6920897093352765, "grad_norm": 26.919113159179688, "learning_rate": 3.0812823834196893e-06, "loss": 0.5996, "mean_token_accuracy": 0.9255533218383789, "num_tokens": 7657573.0, "step": 4274 }, { "epoch": 0.6922516395433568, "grad_norm": 21.134170532226562, "learning_rate": 3.0796632124352334e-06, "loss": 0.5444, "mean_token_accuracy": 0.9240978360176086, "num_tokens": 7659362.0, "step": 4275 }, { "epoch": 0.6924135697514371, "grad_norm": 21.6868839263916, "learning_rate": 3.0780440414507774e-06, "loss": 0.5036, "mean_token_accuracy": 0.9319303333759308, "num_tokens": 7661153.0, "step": 4276 }, { "epoch": 0.6925754999595174, "grad_norm": 23.270296096801758, "learning_rate": 3.0764248704663214e-06, "loss": 0.5146, "mean_token_accuracy": 0.9315811395645142, "num_tokens": 7662942.0, "step": 4277 }, { "epoch": 0.6927374301675978, "grad_norm": 26.62202262878418, "learning_rate": 3.0748056994818654e-06, "loss": 0.6611, "mean_token_accuracy": 0.9183098375797272, "num_tokens": 7664736.0, "step": 4278 }, { "epoch": 0.6928993603756781, "grad_norm": 42.976409912109375, "learning_rate": 3.0731865284974094e-06, "loss": 0.7277, "mean_token_accuracy": 0.9002669751644135, "num_tokens": 7666538.0, "step": 4279 }, { "epoch": 0.6930612905837584, "grad_norm": 23.27865219116211, "learning_rate": 3.0715673575129534e-06, "loss": 0.5562, "mean_token_accuracy": 0.9337849617004395, "num_tokens": 7668337.0, "step": 4280 }, { "epoch": 0.6932232207918387, "grad_norm": 36.487632751464844, "learning_rate": 3.0699481865284974e-06, "loss": 0.6443, "mean_token_accuracy": 0.9060479700565338, "num_tokens": 7670126.0, "step": 4281 }, { "epoch": 0.693385150999919, "grad_norm": 41.34493637084961, "learning_rate": 3.0683290155440415e-06, "loss": 0.776, "mean_token_accuracy": 0.8948439657688141, "num_tokens": 7671915.0, "step": 4282 }, { "epoch": 0.6935470812079993, "grad_norm": 25.194103240966797, "learning_rate": 3.0667098445595855e-06, "loss": 0.4887, "mean_token_accuracy": 0.9239130616188049, "num_tokens": 7673703.0, "step": 4283 }, { "epoch": 0.6937090114160797, "grad_norm": 15.45263385772705, "learning_rate": 3.0650906735751295e-06, "loss": 0.4725, "mean_token_accuracy": 0.9233576655387878, "num_tokens": 7675489.0, "step": 4284 }, { "epoch": 0.69387094162416, "grad_norm": 25.46445083618164, "learning_rate": 3.0634715025906735e-06, "loss": 0.5488, "mean_token_accuracy": 0.9295215606689453, "num_tokens": 7677285.0, "step": 4285 }, { "epoch": 0.6940328718322403, "grad_norm": 18.48753547668457, "learning_rate": 3.0618523316062175e-06, "loss": 0.4952, "mean_token_accuracy": 0.9313940107822418, "num_tokens": 7679074.0, "step": 4286 }, { "epoch": 0.6941948020403206, "grad_norm": 23.47724151611328, "learning_rate": 3.0602331606217615e-06, "loss": 0.6217, "mean_token_accuracy": 0.9191176295280457, "num_tokens": 7680858.0, "step": 4287 }, { "epoch": 0.6943567322484009, "grad_norm": 23.56111717224121, "learning_rate": 3.0586139896373055e-06, "loss": 0.5676, "mean_token_accuracy": 0.9166666567325592, "num_tokens": 7682645.0, "step": 4288 }, { "epoch": 0.6945186624564813, "grad_norm": 26.695964813232422, "learning_rate": 3.0569948186528495e-06, "loss": 0.637, "mean_token_accuracy": 0.9152795374393463, "num_tokens": 7684431.0, "step": 4289 }, { "epoch": 0.6946805926645616, "grad_norm": 24.464126586914062, "learning_rate": 3.055375647668394e-06, "loss": 0.6472, "mean_token_accuracy": 0.9179058969020844, "num_tokens": 7686211.0, "step": 4290 }, { "epoch": 0.6948425228726419, "grad_norm": 29.139787673950195, "learning_rate": 3.053756476683938e-06, "loss": 0.5642, "mean_token_accuracy": 0.9081889390945435, "num_tokens": 7688005.0, "step": 4291 }, { "epoch": 0.6950044530807222, "grad_norm": 29.348642349243164, "learning_rate": 3.052137305699482e-06, "loss": 0.5124, "mean_token_accuracy": 0.9227604269981384, "num_tokens": 7689802.0, "step": 4292 }, { "epoch": 0.6951663832888025, "grad_norm": 21.27211570739746, "learning_rate": 3.050518134715026e-06, "loss": 0.5271, "mean_token_accuracy": 0.9242961406707764, "num_tokens": 7691591.0, "step": 4293 }, { "epoch": 0.6953283134968828, "grad_norm": 26.362018585205078, "learning_rate": 3.04889896373057e-06, "loss": 0.5568, "mean_token_accuracy": 0.9265734255313873, "num_tokens": 7693389.0, "step": 4294 }, { "epoch": 0.6954902437049632, "grad_norm": 31.844682693481445, "learning_rate": 3.047279792746114e-06, "loss": 0.5831, "mean_token_accuracy": 0.918739378452301, "num_tokens": 7695184.0, "step": 4295 }, { "epoch": 0.6956521739130435, "grad_norm": 21.85762596130371, "learning_rate": 3.0456606217616585e-06, "loss": 0.5585, "mean_token_accuracy": 0.9192603230476379, "num_tokens": 7696981.0, "step": 4296 }, { "epoch": 0.6958141041211238, "grad_norm": 31.3807315826416, "learning_rate": 3.0440414507772025e-06, "loss": 0.5828, "mean_token_accuracy": 0.9163228571414948, "num_tokens": 7698768.0, "step": 4297 }, { "epoch": 0.6959760343292041, "grad_norm": 26.4704532623291, "learning_rate": 3.0424222797927465e-06, "loss": 0.5622, "mean_token_accuracy": 0.9224869906902313, "num_tokens": 7700563.0, "step": 4298 }, { "epoch": 0.6961379645372844, "grad_norm": 32.80046081542969, "learning_rate": 3.0408031088082905e-06, "loss": 0.6378, "mean_token_accuracy": 0.9093822836875916, "num_tokens": 7702350.0, "step": 4299 }, { "epoch": 0.6962998947453648, "grad_norm": 16.963529586791992, "learning_rate": 3.0391839378238345e-06, "loss": 0.4839, "mean_token_accuracy": 0.9406462609767914, "num_tokens": 7704149.0, "step": 4300 }, { "epoch": 0.6964618249534451, "grad_norm": 29.477937698364258, "learning_rate": 3.0375647668393785e-06, "loss": 0.6388, "mean_token_accuracy": 0.918635904788971, "num_tokens": 7705942.0, "step": 4301 }, { "epoch": 0.6966237551615254, "grad_norm": 27.048709869384766, "learning_rate": 3.0359455958549226e-06, "loss": 0.641, "mean_token_accuracy": 0.9206465184688568, "num_tokens": 7707731.0, "step": 4302 }, { "epoch": 0.6967856853696057, "grad_norm": 28.267881393432617, "learning_rate": 3.034326424870467e-06, "loss": 0.5988, "mean_token_accuracy": 0.9135975241661072, "num_tokens": 7709521.0, "step": 4303 }, { "epoch": 0.696947615577686, "grad_norm": 24.507450103759766, "learning_rate": 3.032707253886011e-06, "loss": 0.6163, "mean_token_accuracy": 0.9230769276618958, "num_tokens": 7711319.0, "step": 4304 }, { "epoch": 0.6971095457857663, "grad_norm": 29.955137252807617, "learning_rate": 3.031088082901555e-06, "loss": 0.5741, "mean_token_accuracy": 0.9198475182056427, "num_tokens": 7713118.0, "step": 4305 }, { "epoch": 0.6972714759938466, "grad_norm": 18.128271102905273, "learning_rate": 3.029468911917099e-06, "loss": 0.4972, "mean_token_accuracy": 0.9311560094356537, "num_tokens": 7714907.0, "step": 4306 }, { "epoch": 0.697433406201927, "grad_norm": 35.9201545715332, "learning_rate": 3.027849740932643e-06, "loss": 0.669, "mean_token_accuracy": 0.9038960933685303, "num_tokens": 7716691.0, "step": 4307 }, { "epoch": 0.6975953364100073, "grad_norm": 24.874080657958984, "learning_rate": 3.026230569948187e-06, "loss": 0.5187, "mean_token_accuracy": 0.9295327067375183, "num_tokens": 7718488.0, "step": 4308 }, { "epoch": 0.6977572666180876, "grad_norm": 30.239694595336914, "learning_rate": 3.024611398963731e-06, "loss": 0.6013, "mean_token_accuracy": 0.9156638383865356, "num_tokens": 7720285.0, "step": 4309 }, { "epoch": 0.6979191968261679, "grad_norm": 33.590797424316406, "learning_rate": 3.022992227979275e-06, "loss": 0.5802, "mean_token_accuracy": 0.9171359837055206, "num_tokens": 7722074.0, "step": 4310 }, { "epoch": 0.6980811270342483, "grad_norm": 19.81442642211914, "learning_rate": 3.021373056994819e-06, "loss": 0.5003, "mean_token_accuracy": 0.9251798987388611, "num_tokens": 7723867.0, "step": 4311 }, { "epoch": 0.6982430572423286, "grad_norm": 33.890472412109375, "learning_rate": 3.019753886010363e-06, "loss": 0.6095, "mean_token_accuracy": 0.9172417223453522, "num_tokens": 7725668.0, "step": 4312 }, { "epoch": 0.6984049874504089, "grad_norm": 35.70350646972656, "learning_rate": 3.018134715025907e-06, "loss": 0.7301, "mean_token_accuracy": 0.9070101678371429, "num_tokens": 7727449.0, "step": 4313 }, { "epoch": 0.6985669176584892, "grad_norm": 33.863189697265625, "learning_rate": 3.016515544041451e-06, "loss": 0.7298, "mean_token_accuracy": 0.9046953022480011, "num_tokens": 7729244.0, "step": 4314 }, { "epoch": 0.6987288478665695, "grad_norm": 15.445845603942871, "learning_rate": 3.014896373056995e-06, "loss": 0.449, "mean_token_accuracy": 0.9350871741771698, "num_tokens": 7731034.0, "step": 4315 }, { "epoch": 0.6988907780746498, "grad_norm": 29.000606536865234, "learning_rate": 3.013277202072539e-06, "loss": 0.6496, "mean_token_accuracy": 0.9129368960857391, "num_tokens": 7732823.0, "step": 4316 }, { "epoch": 0.6990527082827301, "grad_norm": 29.32222557067871, "learning_rate": 3.011658031088083e-06, "loss": 0.6226, "mean_token_accuracy": 0.9175084233283997, "num_tokens": 7734602.0, "step": 4317 }, { "epoch": 0.6992146384908104, "grad_norm": 22.666032791137695, "learning_rate": 3.010038860103627e-06, "loss": 0.4835, "mean_token_accuracy": 0.9347517788410187, "num_tokens": 7736405.0, "step": 4318 }, { "epoch": 0.6993765686988908, "grad_norm": 30.00943374633789, "learning_rate": 3.008419689119171e-06, "loss": 0.6601, "mean_token_accuracy": 0.9130684435367584, "num_tokens": 7738194.0, "step": 4319 }, { "epoch": 0.6995384989069711, "grad_norm": 28.176820755004883, "learning_rate": 3.0068005181347152e-06, "loss": 0.5792, "mean_token_accuracy": 0.9087075293064117, "num_tokens": 7739990.0, "step": 4320 }, { "epoch": 0.6997004291150514, "grad_norm": 25.183198928833008, "learning_rate": 3.0051813471502592e-06, "loss": 0.5443, "mean_token_accuracy": 0.9227814376354218, "num_tokens": 7741787.0, "step": 4321 }, { "epoch": 0.6998623593231317, "grad_norm": 21.352293014526367, "learning_rate": 3.0035621761658037e-06, "loss": 0.5107, "mean_token_accuracy": 0.9190886914730072, "num_tokens": 7743584.0, "step": 4322 }, { "epoch": 0.7000242895312121, "grad_norm": 36.539398193359375, "learning_rate": 3.0019430051813477e-06, "loss": 0.7015, "mean_token_accuracy": 0.9082512557506561, "num_tokens": 7745381.0, "step": 4323 }, { "epoch": 0.7001862197392924, "grad_norm": 28.199247360229492, "learning_rate": 3.0003238341968917e-06, "loss": 0.6262, "mean_token_accuracy": 0.9255030155181885, "num_tokens": 7747175.0, "step": 4324 }, { "epoch": 0.7003481499473727, "grad_norm": 41.741004943847656, "learning_rate": 2.9987046632124357e-06, "loss": 0.6535, "mean_token_accuracy": 0.9109512269496918, "num_tokens": 7748967.0, "step": 4325 }, { "epoch": 0.700510080155453, "grad_norm": 34.35584259033203, "learning_rate": 2.9970854922279797e-06, "loss": 0.5946, "mean_token_accuracy": 0.9097178876399994, "num_tokens": 7750767.0, "step": 4326 }, { "epoch": 0.7006720103635333, "grad_norm": 32.58032989501953, "learning_rate": 2.9954663212435237e-06, "loss": 0.5794, "mean_token_accuracy": 0.9154135286808014, "num_tokens": 7752552.0, "step": 4327 }, { "epoch": 0.7008339405716136, "grad_norm": 27.898983001708984, "learning_rate": 2.9938471502590677e-06, "loss": 0.589, "mean_token_accuracy": 0.9160839319229126, "num_tokens": 7754350.0, "step": 4328 }, { "epoch": 0.7009958707796939, "grad_norm": 26.859020233154297, "learning_rate": 2.9922279792746118e-06, "loss": 0.5983, "mean_token_accuracy": 0.9262315332889557, "num_tokens": 7756147.0, "step": 4329 }, { "epoch": 0.7011578009877742, "grad_norm": 27.141902923583984, "learning_rate": 2.9906088082901558e-06, "loss": 0.6475, "mean_token_accuracy": 0.9248120486736298, "num_tokens": 7757925.0, "step": 4330 }, { "epoch": 0.7013197311958546, "grad_norm": 17.903593063354492, "learning_rate": 2.9889896373057e-06, "loss": 0.4808, "mean_token_accuracy": 0.9269501268863678, "num_tokens": 7759711.0, "step": 4331 }, { "epoch": 0.7014816614039349, "grad_norm": 29.495555877685547, "learning_rate": 2.987370466321244e-06, "loss": 0.6019, "mean_token_accuracy": 0.9200627207756042, "num_tokens": 7761511.0, "step": 4332 }, { "epoch": 0.7016435916120152, "grad_norm": 18.326393127441406, "learning_rate": 2.985751295336788e-06, "loss": 0.4862, "mean_token_accuracy": 0.932802140712738, "num_tokens": 7763291.0, "step": 4333 }, { "epoch": 0.7018055218200956, "grad_norm": 28.416234970092773, "learning_rate": 2.984132124352332e-06, "loss": 0.5807, "mean_token_accuracy": 0.9195520281791687, "num_tokens": 7765089.0, "step": 4334 }, { "epoch": 0.7019674520281759, "grad_norm": 29.08116340637207, "learning_rate": 2.982512953367876e-06, "loss": 0.5336, "mean_token_accuracy": 0.9219858050346375, "num_tokens": 7766883.0, "step": 4335 }, { "epoch": 0.7021293822362562, "grad_norm": 27.274030685424805, "learning_rate": 2.98089378238342e-06, "loss": 0.6151, "mean_token_accuracy": 0.91094771027565, "num_tokens": 7768675.0, "step": 4336 }, { "epoch": 0.7022913124443365, "grad_norm": 32.33250045776367, "learning_rate": 2.979274611398964e-06, "loss": 0.6556, "mean_token_accuracy": 0.8999382853507996, "num_tokens": 7770466.0, "step": 4337 }, { "epoch": 0.7024532426524168, "grad_norm": 21.898845672607422, "learning_rate": 2.977655440414508e-06, "loss": 0.554, "mean_token_accuracy": 0.9254246950149536, "num_tokens": 7772246.0, "step": 4338 }, { "epoch": 0.7026151728604971, "grad_norm": 33.45396423339844, "learning_rate": 2.976036269430052e-06, "loss": 0.5589, "mean_token_accuracy": 0.9080985188484192, "num_tokens": 7774041.0, "step": 4339 }, { "epoch": 0.7027771030685774, "grad_norm": 21.81068229675293, "learning_rate": 2.9744170984455963e-06, "loss": 0.5188, "mean_token_accuracy": 0.9322995841503143, "num_tokens": 7775834.0, "step": 4340 }, { "epoch": 0.7029390332766577, "grad_norm": 21.69127082824707, "learning_rate": 2.9727979274611403e-06, "loss": 0.4695, "mean_token_accuracy": 0.9313203990459442, "num_tokens": 7777623.0, "step": 4341 }, { "epoch": 0.703100963484738, "grad_norm": 29.01365852355957, "learning_rate": 2.9711787564766844e-06, "loss": 0.5941, "mean_token_accuracy": 0.9241737127304077, "num_tokens": 7779412.0, "step": 4342 }, { "epoch": 0.7032628936928184, "grad_norm": 25.276811599731445, "learning_rate": 2.9695595854922284e-06, "loss": 0.5165, "mean_token_accuracy": 0.9357620477676392, "num_tokens": 7781204.0, "step": 4343 }, { "epoch": 0.7034248239008987, "grad_norm": 15.474091529846191, "learning_rate": 2.9679404145077724e-06, "loss": 0.4697, "mean_token_accuracy": 0.93312007188797, "num_tokens": 7783000.0, "step": 4344 }, { "epoch": 0.7035867541089791, "grad_norm": 38.84894561767578, "learning_rate": 2.9663212435233164e-06, "loss": 0.5634, "mean_token_accuracy": 0.915492981672287, "num_tokens": 7784796.0, "step": 4345 }, { "epoch": 0.7037486843170594, "grad_norm": 29.220617294311523, "learning_rate": 2.9647020725388604e-06, "loss": 0.527, "mean_token_accuracy": 0.9233440160751343, "num_tokens": 7786595.0, "step": 4346 }, { "epoch": 0.7039106145251397, "grad_norm": 36.90061950683594, "learning_rate": 2.9630829015544044e-06, "loss": 0.8594, "mean_token_accuracy": 0.9002314805984497, "num_tokens": 7788386.0, "step": 4347 }, { "epoch": 0.70407254473322, "grad_norm": 36.549171447753906, "learning_rate": 2.9614637305699484e-06, "loss": 0.6485, "mean_token_accuracy": 0.8953166306018829, "num_tokens": 7790185.0, "step": 4348 }, { "epoch": 0.7042344749413003, "grad_norm": 21.962106704711914, "learning_rate": 2.9598445595854924e-06, "loss": 0.6139, "mean_token_accuracy": 0.9254679083824158, "num_tokens": 7791965.0, "step": 4349 }, { "epoch": 0.7043964051493806, "grad_norm": 24.022844314575195, "learning_rate": 2.9582253886010365e-06, "loss": 0.4825, "mean_token_accuracy": 0.9324262738227844, "num_tokens": 7793758.0, "step": 4350 }, { "epoch": 0.7045583353574609, "grad_norm": 43.5592155456543, "learning_rate": 2.9566062176165805e-06, "loss": 0.6465, "mean_token_accuracy": 0.9098112881183624, "num_tokens": 7795547.0, "step": 4351 }, { "epoch": 0.7047202655655412, "grad_norm": 38.72248458862305, "learning_rate": 2.9549870466321245e-06, "loss": 0.6066, "mean_token_accuracy": 0.8943609595298767, "num_tokens": 7797343.0, "step": 4352 }, { "epoch": 0.7048821957736215, "grad_norm": 33.18864059448242, "learning_rate": 2.9533678756476685e-06, "loss": 0.6814, "mean_token_accuracy": 0.9085443913936615, "num_tokens": 7799140.0, "step": 4353 }, { "epoch": 0.7050441259817019, "grad_norm": 19.67289924621582, "learning_rate": 2.9517487046632125e-06, "loss": 0.4543, "mean_token_accuracy": 0.9323961734771729, "num_tokens": 7800933.0, "step": 4354 }, { "epoch": 0.7052060561897822, "grad_norm": 18.349987030029297, "learning_rate": 2.9501295336787565e-06, "loss": 0.4813, "mean_token_accuracy": 0.9336122870445251, "num_tokens": 7802731.0, "step": 4355 }, { "epoch": 0.7053679863978625, "grad_norm": 17.120546340942383, "learning_rate": 2.9485103626943005e-06, "loss": 0.4586, "mean_token_accuracy": 0.935251772403717, "num_tokens": 7804521.0, "step": 4356 }, { "epoch": 0.7055299166059429, "grad_norm": 25.03181266784668, "learning_rate": 2.9468911917098446e-06, "loss": 0.6299, "mean_token_accuracy": 0.9258608222007751, "num_tokens": 7806303.0, "step": 4357 }, { "epoch": 0.7056918468140232, "grad_norm": 23.878915786743164, "learning_rate": 2.9452720207253886e-06, "loss": 0.5284, "mean_token_accuracy": 0.9212084114551544, "num_tokens": 7808095.0, "step": 4358 }, { "epoch": 0.7058537770221035, "grad_norm": 19.38775634765625, "learning_rate": 2.943652849740933e-06, "loss": 0.483, "mean_token_accuracy": 0.9377751350402832, "num_tokens": 7809880.0, "step": 4359 }, { "epoch": 0.7060157072301838, "grad_norm": 28.026166915893555, "learning_rate": 2.942033678756477e-06, "loss": 0.609, "mean_token_accuracy": 0.9195478558540344, "num_tokens": 7811677.0, "step": 4360 }, { "epoch": 0.7061776374382641, "grad_norm": 42.87164306640625, "learning_rate": 2.940414507772021e-06, "loss": 0.9019, "mean_token_accuracy": 0.912478119134903, "num_tokens": 7813476.0, "step": 4361 }, { "epoch": 0.7063395676463444, "grad_norm": 37.81462478637695, "learning_rate": 2.938795336787565e-06, "loss": 0.6164, "mean_token_accuracy": 0.9110561013221741, "num_tokens": 7815268.0, "step": 4362 }, { "epoch": 0.7065014978544247, "grad_norm": 39.03481674194336, "learning_rate": 2.937176165803109e-06, "loss": 0.6615, "mean_token_accuracy": 0.916801244020462, "num_tokens": 7817066.0, "step": 4363 }, { "epoch": 0.706663428062505, "grad_norm": 36.72638702392578, "learning_rate": 2.935556994818653e-06, "loss": 0.8502, "mean_token_accuracy": 0.8996493816375732, "num_tokens": 7818871.0, "step": 4364 }, { "epoch": 0.7068253582705853, "grad_norm": 33.811092376708984, "learning_rate": 2.933937823834197e-06, "loss": 0.6034, "mean_token_accuracy": 0.9201717376708984, "num_tokens": 7820671.0, "step": 4365 }, { "epoch": 0.7069872884786657, "grad_norm": 37.00471115112305, "learning_rate": 2.932318652849741e-06, "loss": 0.644, "mean_token_accuracy": 0.905694991350174, "num_tokens": 7822471.0, "step": 4366 }, { "epoch": 0.707149218686746, "grad_norm": 28.73087501525879, "learning_rate": 2.930699481865285e-06, "loss": 0.4927, "mean_token_accuracy": 0.9208633303642273, "num_tokens": 7824261.0, "step": 4367 }, { "epoch": 0.7073111488948264, "grad_norm": 25.49989891052246, "learning_rate": 2.929080310880829e-06, "loss": 0.5105, "mean_token_accuracy": 0.9233683049678802, "num_tokens": 7826048.0, "step": 4368 }, { "epoch": 0.7074730791029067, "grad_norm": 24.82110595703125, "learning_rate": 2.927461139896373e-06, "loss": 0.4955, "mean_token_accuracy": 0.9296235740184784, "num_tokens": 7827844.0, "step": 4369 }, { "epoch": 0.707635009310987, "grad_norm": 31.496789932250977, "learning_rate": 2.925841968911917e-06, "loss": 0.5895, "mean_token_accuracy": 0.9057525396347046, "num_tokens": 7829632.0, "step": 4370 }, { "epoch": 0.7077969395190673, "grad_norm": 34.46229934692383, "learning_rate": 2.924222797927461e-06, "loss": 0.5708, "mean_token_accuracy": 0.918371707201004, "num_tokens": 7831435.0, "step": 4371 }, { "epoch": 0.7079588697271476, "grad_norm": 29.72195816040039, "learning_rate": 2.922603626943005e-06, "loss": 0.5293, "mean_token_accuracy": 0.9192849397659302, "num_tokens": 7833232.0, "step": 4372 }, { "epoch": 0.7081207999352279, "grad_norm": 30.362388610839844, "learning_rate": 2.920984455958549e-06, "loss": 0.5725, "mean_token_accuracy": 0.9190140962600708, "num_tokens": 7835028.0, "step": 4373 }, { "epoch": 0.7082827301433082, "grad_norm": 30.46919059753418, "learning_rate": 2.919365284974093e-06, "loss": 0.71, "mean_token_accuracy": 0.9212526381015778, "num_tokens": 7836832.0, "step": 4374 }, { "epoch": 0.7084446603513885, "grad_norm": 27.92829132080078, "learning_rate": 2.917746113989637e-06, "loss": 0.5855, "mean_token_accuracy": 0.9133029878139496, "num_tokens": 7838633.0, "step": 4375 }, { "epoch": 0.7086065905594688, "grad_norm": 38.85553741455078, "learning_rate": 2.9161269430051812e-06, "loss": 0.7465, "mean_token_accuracy": 0.907444030046463, "num_tokens": 7840415.0, "step": 4376 }, { "epoch": 0.7087685207675491, "grad_norm": 35.91222381591797, "learning_rate": 2.9145077720207252e-06, "loss": 0.6461, "mean_token_accuracy": 0.9150060415267944, "num_tokens": 7842209.0, "step": 4377 }, { "epoch": 0.7089304509756295, "grad_norm": 33.09619140625, "learning_rate": 2.9128886010362697e-06, "loss": 0.837, "mean_token_accuracy": 0.8928003907203674, "num_tokens": 7844014.0, "step": 4378 }, { "epoch": 0.7090923811837099, "grad_norm": 35.421180725097656, "learning_rate": 2.9112694300518137e-06, "loss": 0.6862, "mean_token_accuracy": 0.913329154253006, "num_tokens": 7845803.0, "step": 4379 }, { "epoch": 0.7092543113917902, "grad_norm": 26.26021957397461, "learning_rate": 2.9096502590673577e-06, "loss": 0.538, "mean_token_accuracy": 0.9158168733119965, "num_tokens": 7847588.0, "step": 4380 }, { "epoch": 0.7094162415998705, "grad_norm": 26.682254791259766, "learning_rate": 2.9080310880829017e-06, "loss": 0.6159, "mean_token_accuracy": 0.9165966212749481, "num_tokens": 7849376.0, "step": 4381 }, { "epoch": 0.7095781718079508, "grad_norm": 31.46900749206543, "learning_rate": 2.9064119170984457e-06, "loss": 0.7501, "mean_token_accuracy": 0.9051958322525024, "num_tokens": 7851162.0, "step": 4382 }, { "epoch": 0.7097401020160311, "grad_norm": 23.13848114013672, "learning_rate": 2.9047927461139897e-06, "loss": 0.5271, "mean_token_accuracy": 0.9225352108478546, "num_tokens": 7852958.0, "step": 4383 }, { "epoch": 0.7099020322241114, "grad_norm": 31.579944610595703, "learning_rate": 2.9031735751295338e-06, "loss": 0.5524, "mean_token_accuracy": 0.9226190447807312, "num_tokens": 7854754.0, "step": 4384 }, { "epoch": 0.7100639624321917, "grad_norm": 40.66126251220703, "learning_rate": 2.9015544041450778e-06, "loss": 0.7051, "mean_token_accuracy": 0.9120703339576721, "num_tokens": 7856558.0, "step": 4385 }, { "epoch": 0.710225892640272, "grad_norm": 31.179441452026367, "learning_rate": 2.8999352331606218e-06, "loss": 0.6122, "mean_token_accuracy": 0.9107279777526855, "num_tokens": 7858350.0, "step": 4386 }, { "epoch": 0.7103878228483523, "grad_norm": 31.148160934448242, "learning_rate": 2.898316062176166e-06, "loss": 0.5199, "mean_token_accuracy": 0.9174720048904419, "num_tokens": 7860141.0, "step": 4387 }, { "epoch": 0.7105497530564326, "grad_norm": 25.27857208251953, "learning_rate": 2.89669689119171e-06, "loss": 0.5794, "mean_token_accuracy": 0.9199904799461365, "num_tokens": 7861928.0, "step": 4388 }, { "epoch": 0.710711683264513, "grad_norm": 18.702856063842773, "learning_rate": 2.895077720207254e-06, "loss": 0.4812, "mean_token_accuracy": 0.9325181245803833, "num_tokens": 7863722.0, "step": 4389 }, { "epoch": 0.7108736134725933, "grad_norm": 30.728200912475586, "learning_rate": 2.893458549222798e-06, "loss": 0.6522, "mean_token_accuracy": 0.9195520281791687, "num_tokens": 7865520.0, "step": 4390 }, { "epoch": 0.7110355436806737, "grad_norm": 26.364105224609375, "learning_rate": 2.891839378238342e-06, "loss": 0.6409, "mean_token_accuracy": 0.920550525188446, "num_tokens": 7867309.0, "step": 4391 }, { "epoch": 0.711197473888754, "grad_norm": 27.728567123413086, "learning_rate": 2.8902202072538867e-06, "loss": 0.4944, "mean_token_accuracy": 0.9181869029998779, "num_tokens": 7869102.0, "step": 4392 }, { "epoch": 0.7113594040968343, "grad_norm": 27.701045989990234, "learning_rate": 2.8886010362694307e-06, "loss": 0.7513, "mean_token_accuracy": 0.9184782803058624, "num_tokens": 7870884.0, "step": 4393 }, { "epoch": 0.7115213343049146, "grad_norm": 30.105783462524414, "learning_rate": 2.8869818652849747e-06, "loss": 0.6149, "mean_token_accuracy": 0.9230892956256866, "num_tokens": 7872669.0, "step": 4394 }, { "epoch": 0.7116832645129949, "grad_norm": 15.815881729125977, "learning_rate": 2.8853626943005187e-06, "loss": 0.4514, "mean_token_accuracy": 0.9305888414382935, "num_tokens": 7874455.0, "step": 4395 }, { "epoch": 0.7118451947210752, "grad_norm": 25.80306625366211, "learning_rate": 2.8837435233160628e-06, "loss": 0.6352, "mean_token_accuracy": 0.9146499931812286, "num_tokens": 7876239.0, "step": 4396 }, { "epoch": 0.7120071249291555, "grad_norm": 25.850324630737305, "learning_rate": 2.8821243523316068e-06, "loss": 0.5609, "mean_token_accuracy": 0.921527773141861, "num_tokens": 7878030.0, "step": 4397 }, { "epoch": 0.7121690551372358, "grad_norm": 18.85869598388672, "learning_rate": 2.8805051813471508e-06, "loss": 0.4535, "mean_token_accuracy": 0.9351348578929901, "num_tokens": 7879820.0, "step": 4398 }, { "epoch": 0.7123309853453161, "grad_norm": 35.36299514770508, "learning_rate": 2.878886010362695e-06, "loss": 0.852, "mean_token_accuracy": 0.8908450603485107, "num_tokens": 7881616.0, "step": 4399 }, { "epoch": 0.7124929155533964, "grad_norm": 25.404550552368164, "learning_rate": 2.877266839378239e-06, "loss": 0.582, "mean_token_accuracy": 0.9257492423057556, "num_tokens": 7883411.0, "step": 4400 }, { "epoch": 0.7126548457614768, "grad_norm": 27.899307250976562, "learning_rate": 2.875647668393783e-06, "loss": 0.5715, "mean_token_accuracy": 0.9274753034114838, "num_tokens": 7885199.0, "step": 4401 }, { "epoch": 0.7128167759695572, "grad_norm": 39.53618621826172, "learning_rate": 2.874028497409327e-06, "loss": 0.7018, "mean_token_accuracy": 0.8980299532413483, "num_tokens": 7886987.0, "step": 4402 }, { "epoch": 0.7129787061776375, "grad_norm": 26.686351776123047, "learning_rate": 2.872409326424871e-06, "loss": 0.5543, "mean_token_accuracy": 0.9277743101119995, "num_tokens": 7888776.0, "step": 4403 }, { "epoch": 0.7131406363857178, "grad_norm": 24.15049934387207, "learning_rate": 2.870790155440415e-06, "loss": 0.5368, "mean_token_accuracy": 0.9253731369972229, "num_tokens": 7890556.0, "step": 4404 }, { "epoch": 0.7133025665937981, "grad_norm": 31.79214096069336, "learning_rate": 2.869170984455959e-06, "loss": 0.5685, "mean_token_accuracy": 0.919584333896637, "num_tokens": 7892342.0, "step": 4405 }, { "epoch": 0.7134644968018784, "grad_norm": 24.539613723754883, "learning_rate": 2.867551813471503e-06, "loss": 0.5995, "mean_token_accuracy": 0.9186519980430603, "num_tokens": 7894138.0, "step": 4406 }, { "epoch": 0.7136264270099587, "grad_norm": 29.856191635131836, "learning_rate": 2.865932642487047e-06, "loss": 0.5708, "mean_token_accuracy": 0.9257739782333374, "num_tokens": 7895932.0, "step": 4407 }, { "epoch": 0.713788357218039, "grad_norm": 23.091562271118164, "learning_rate": 2.864313471502591e-06, "loss": 0.4961, "mean_token_accuracy": 0.9288030862808228, "num_tokens": 7897725.0, "step": 4408 }, { "epoch": 0.7139502874261193, "grad_norm": 32.099342346191406, "learning_rate": 2.862694300518135e-06, "loss": 0.5776, "mean_token_accuracy": 0.9060677886009216, "num_tokens": 7899513.0, "step": 4409 }, { "epoch": 0.7141122176341996, "grad_norm": 31.167266845703125, "learning_rate": 2.8610751295336794e-06, "loss": 0.6998, "mean_token_accuracy": 0.9065865576267242, "num_tokens": 7901303.0, "step": 4410 }, { "epoch": 0.7142741478422799, "grad_norm": 25.934648513793945, "learning_rate": 2.8594559585492234e-06, "loss": 0.6515, "mean_token_accuracy": 0.9172661900520325, "num_tokens": 7903093.0, "step": 4411 }, { "epoch": 0.7144360780503602, "grad_norm": 34.78670883178711, "learning_rate": 2.8578367875647674e-06, "loss": 0.7245, "mean_token_accuracy": 0.8984077274799347, "num_tokens": 7904880.0, "step": 4412 }, { "epoch": 0.7145980082584407, "grad_norm": 37.25510025024414, "learning_rate": 2.8562176165803114e-06, "loss": 0.7319, "mean_token_accuracy": 0.8980726301670074, "num_tokens": 7906667.0, "step": 4413 }, { "epoch": 0.714759938466521, "grad_norm": 16.740596771240234, "learning_rate": 2.8545984455958554e-06, "loss": 0.4514, "mean_token_accuracy": 0.938446968793869, "num_tokens": 7908455.0, "step": 4414 }, { "epoch": 0.7149218686746013, "grad_norm": 32.48001480102539, "learning_rate": 2.8529792746113994e-06, "loss": 0.7921, "mean_token_accuracy": 0.89768186211586, "num_tokens": 7910250.0, "step": 4415 }, { "epoch": 0.7150837988826816, "grad_norm": 25.95272445678711, "learning_rate": 2.8513601036269434e-06, "loss": 0.5786, "mean_token_accuracy": 0.9211378395557404, "num_tokens": 7912041.0, "step": 4416 }, { "epoch": 0.7152457290907619, "grad_norm": 24.422197341918945, "learning_rate": 2.8497409326424875e-06, "loss": 0.5009, "mean_token_accuracy": 0.9373228549957275, "num_tokens": 7913824.0, "step": 4417 }, { "epoch": 0.7154076592988422, "grad_norm": 27.63548469543457, "learning_rate": 2.8481217616580315e-06, "loss": 0.6108, "mean_token_accuracy": 0.9194128215312958, "num_tokens": 7915609.0, "step": 4418 }, { "epoch": 0.7155695895069225, "grad_norm": 24.415332794189453, "learning_rate": 2.8465025906735755e-06, "loss": 0.4754, "mean_token_accuracy": 0.9291534125804901, "num_tokens": 7917405.0, "step": 4419 }, { "epoch": 0.7157315197150028, "grad_norm": 40.579830169677734, "learning_rate": 2.8448834196891195e-06, "loss": 0.7516, "mean_token_accuracy": 0.9033996760845184, "num_tokens": 7919195.0, "step": 4420 }, { "epoch": 0.7158934499230831, "grad_norm": 32.452816009521484, "learning_rate": 2.8432642487046635e-06, "loss": 0.7971, "mean_token_accuracy": 0.8941283226013184, "num_tokens": 7920981.0, "step": 4421 }, { "epoch": 0.7160553801311634, "grad_norm": 17.314289093017578, "learning_rate": 2.8416450777202075e-06, "loss": 0.4769, "mean_token_accuracy": 0.9340579807758331, "num_tokens": 7922766.0, "step": 4422 }, { "epoch": 0.7162173103392437, "grad_norm": 26.498666763305664, "learning_rate": 2.8400259067357515e-06, "loss": 0.5645, "mean_token_accuracy": 0.9185185432434082, "num_tokens": 7924548.0, "step": 4423 }, { "epoch": 0.7163792405473242, "grad_norm": 25.58248519897461, "learning_rate": 2.8384067357512955e-06, "loss": 0.5523, "mean_token_accuracy": 0.9154388010501862, "num_tokens": 7926320.0, "step": 4424 }, { "epoch": 0.7165411707554045, "grad_norm": 34.241920471191406, "learning_rate": 2.8367875647668396e-06, "loss": 0.5666, "mean_token_accuracy": 0.9182370901107788, "num_tokens": 7928113.0, "step": 4425 }, { "epoch": 0.7167031009634848, "grad_norm": 28.584026336669922, "learning_rate": 2.8351683937823836e-06, "loss": 0.649, "mean_token_accuracy": 0.9256640374660492, "num_tokens": 7929906.0, "step": 4426 }, { "epoch": 0.7168650311715651, "grad_norm": 13.715279579162598, "learning_rate": 2.8335492227979276e-06, "loss": 0.4479, "mean_token_accuracy": 0.9361573457717896, "num_tokens": 7931700.0, "step": 4427 }, { "epoch": 0.7170269613796454, "grad_norm": 30.5115909576416, "learning_rate": 2.8319300518134716e-06, "loss": 0.5861, "mean_token_accuracy": 0.9213517606258392, "num_tokens": 7933480.0, "step": 4428 }, { "epoch": 0.7171888915877257, "grad_norm": 29.673900604248047, "learning_rate": 2.830310880829016e-06, "loss": 0.5741, "mean_token_accuracy": 0.9187915623188019, "num_tokens": 7935263.0, "step": 4429 }, { "epoch": 0.717350821795806, "grad_norm": 32.55278015136719, "learning_rate": 2.82869170984456e-06, "loss": 0.6214, "mean_token_accuracy": 0.9115812182426453, "num_tokens": 7937057.0, "step": 4430 }, { "epoch": 0.7175127520038863, "grad_norm": 25.465343475341797, "learning_rate": 2.827072538860104e-06, "loss": 0.5419, "mean_token_accuracy": 0.9177807569503784, "num_tokens": 7938848.0, "step": 4431 }, { "epoch": 0.7176746822119666, "grad_norm": 23.165142059326172, "learning_rate": 2.825453367875648e-06, "loss": 0.593, "mean_token_accuracy": 0.917548805475235, "num_tokens": 7940639.0, "step": 4432 }, { "epoch": 0.7178366124200469, "grad_norm": 26.611894607543945, "learning_rate": 2.823834196891192e-06, "loss": 0.5541, "mean_token_accuracy": 0.9219181835651398, "num_tokens": 7942420.0, "step": 4433 }, { "epoch": 0.7179985426281272, "grad_norm": 24.82908821105957, "learning_rate": 2.822215025906736e-06, "loss": 0.5788, "mean_token_accuracy": 0.9200018048286438, "num_tokens": 7944231.0, "step": 4434 }, { "epoch": 0.7181604728362075, "grad_norm": 33.258602142333984, "learning_rate": 2.82059585492228e-06, "loss": 0.5889, "mean_token_accuracy": 0.9182238876819611, "num_tokens": 7946036.0, "step": 4435 }, { "epoch": 0.718322403044288, "grad_norm": 38.56667709350586, "learning_rate": 2.818976683937824e-06, "loss": 0.7323, "mean_token_accuracy": 0.9082205593585968, "num_tokens": 7947843.0, "step": 4436 }, { "epoch": 0.7184843332523683, "grad_norm": 29.333974838256836, "learning_rate": 2.817357512953368e-06, "loss": 0.5525, "mean_token_accuracy": 0.9229983687400818, "num_tokens": 7949652.0, "step": 4437 }, { "epoch": 0.7186462634604486, "grad_norm": 34.60757827758789, "learning_rate": 2.815738341968912e-06, "loss": 0.674, "mean_token_accuracy": 0.9056751430034637, "num_tokens": 7951459.0, "step": 4438 }, { "epoch": 0.7188081936685289, "grad_norm": 33.5838737487793, "learning_rate": 2.814119170984456e-06, "loss": 0.6963, "mean_token_accuracy": 0.9107142686843872, "num_tokens": 7953251.0, "step": 4439 }, { "epoch": 0.7189701238766092, "grad_norm": 28.21237564086914, "learning_rate": 2.8125e-06, "loss": 0.5349, "mean_token_accuracy": 0.9224817752838135, "num_tokens": 7955047.0, "step": 4440 }, { "epoch": 0.7191320540846895, "grad_norm": 24.667551040649414, "learning_rate": 2.810880829015544e-06, "loss": 0.5641, "mean_token_accuracy": 0.9178914129734039, "num_tokens": 7956839.0, "step": 4441 }, { "epoch": 0.7192939842927698, "grad_norm": 20.769487380981445, "learning_rate": 2.809261658031088e-06, "loss": 0.5314, "mean_token_accuracy": 0.9259096682071686, "num_tokens": 7958621.0, "step": 4442 }, { "epoch": 0.7194559145008501, "grad_norm": 24.97512435913086, "learning_rate": 2.8076424870466322e-06, "loss": 0.4959, "mean_token_accuracy": 0.9330339431762695, "num_tokens": 7960418.0, "step": 4443 }, { "epoch": 0.7196178447089304, "grad_norm": 23.201566696166992, "learning_rate": 2.8060233160621762e-06, "loss": 0.5209, "mean_token_accuracy": 0.9287699460983276, "num_tokens": 7962209.0, "step": 4444 }, { "epoch": 0.7197797749170107, "grad_norm": 30.520273208618164, "learning_rate": 2.8044041450777202e-06, "loss": 0.6005, "mean_token_accuracy": 0.9152927696704865, "num_tokens": 7964004.0, "step": 4445 }, { "epoch": 0.719941705125091, "grad_norm": 31.200077056884766, "learning_rate": 2.8027849740932643e-06, "loss": 0.5741, "mean_token_accuracy": 0.9131302535533905, "num_tokens": 7965792.0, "step": 4446 }, { "epoch": 0.7201036353331715, "grad_norm": 27.254308700561523, "learning_rate": 2.8011658031088083e-06, "loss": 0.7011, "mean_token_accuracy": 0.9178068339824677, "num_tokens": 7967585.0, "step": 4447 }, { "epoch": 0.7202655655412518, "grad_norm": 28.498950958251953, "learning_rate": 2.7995466321243527e-06, "loss": 0.5694, "mean_token_accuracy": 0.9281594455242157, "num_tokens": 7969376.0, "step": 4448 }, { "epoch": 0.7204274957493321, "grad_norm": 37.301177978515625, "learning_rate": 2.7979274611398967e-06, "loss": 0.7295, "mean_token_accuracy": 0.9143702983856201, "num_tokens": 7971168.0, "step": 4449 }, { "epoch": 0.7205894259574124, "grad_norm": 17.07199478149414, "learning_rate": 2.7963082901554407e-06, "loss": 0.4634, "mean_token_accuracy": 0.9329670369625092, "num_tokens": 7972963.0, "step": 4450 }, { "epoch": 0.7207513561654927, "grad_norm": 28.770893096923828, "learning_rate": 2.7946891191709847e-06, "loss": 0.5203, "mean_token_accuracy": 0.9230273962020874, "num_tokens": 7974748.0, "step": 4451 }, { "epoch": 0.720913286373573, "grad_norm": 23.010498046875, "learning_rate": 2.7930699481865288e-06, "loss": 0.5193, "mean_token_accuracy": 0.9219458401203156, "num_tokens": 7976529.0, "step": 4452 }, { "epoch": 0.7210752165816533, "grad_norm": 20.701244354248047, "learning_rate": 2.7914507772020728e-06, "loss": 0.5074, "mean_token_accuracy": 0.9202856719493866, "num_tokens": 7978317.0, "step": 4453 }, { "epoch": 0.7212371467897336, "grad_norm": 21.25913429260254, "learning_rate": 2.7898316062176168e-06, "loss": 0.4822, "mean_token_accuracy": 0.9330285787582397, "num_tokens": 7980112.0, "step": 4454 }, { "epoch": 0.7213990769978139, "grad_norm": 16.468021392822266, "learning_rate": 2.788212435233161e-06, "loss": 0.4816, "mean_token_accuracy": 0.9295434653759003, "num_tokens": 7981894.0, "step": 4455 }, { "epoch": 0.7215610072058942, "grad_norm": 31.411575317382812, "learning_rate": 2.786593264248705e-06, "loss": 0.7138, "mean_token_accuracy": 0.9133903086185455, "num_tokens": 7983684.0, "step": 4456 }, { "epoch": 0.7217229374139745, "grad_norm": 29.316164016723633, "learning_rate": 2.784974093264249e-06, "loss": 0.6085, "mean_token_accuracy": 0.9164022207260132, "num_tokens": 7985471.0, "step": 4457 }, { "epoch": 0.721884867622055, "grad_norm": 39.83155059814453, "learning_rate": 2.783354922279793e-06, "loss": 0.803, "mean_token_accuracy": 0.898330569267273, "num_tokens": 7987268.0, "step": 4458 }, { "epoch": 0.7220467978301353, "grad_norm": 27.66533851623535, "learning_rate": 2.781735751295337e-06, "loss": 0.5158, "mean_token_accuracy": 0.924761027097702, "num_tokens": 7989059.0, "step": 4459 }, { "epoch": 0.7222087280382156, "grad_norm": 22.771656036376953, "learning_rate": 2.780116580310881e-06, "loss": 0.6052, "mean_token_accuracy": 0.9199460446834564, "num_tokens": 7990846.0, "step": 4460 }, { "epoch": 0.7223706582462959, "grad_norm": 21.277753829956055, "learning_rate": 2.778497409326425e-06, "loss": 0.4968, "mean_token_accuracy": 0.9264546930789948, "num_tokens": 7992630.0, "step": 4461 }, { "epoch": 0.7225325884543762, "grad_norm": 32.62319564819336, "learning_rate": 2.776878238341969e-06, "loss": 0.6276, "mean_token_accuracy": 0.9145377278327942, "num_tokens": 7994423.0, "step": 4462 }, { "epoch": 0.7226945186624565, "grad_norm": 23.01576805114746, "learning_rate": 2.775259067357513e-06, "loss": 0.5065, "mean_token_accuracy": 0.9253723621368408, "num_tokens": 7996216.0, "step": 4463 }, { "epoch": 0.7228564488705368, "grad_norm": 22.82634162902832, "learning_rate": 2.773639896373057e-06, "loss": 0.5065, "mean_token_accuracy": 0.9188725650310516, "num_tokens": 7997999.0, "step": 4464 }, { "epoch": 0.7230183790786171, "grad_norm": 24.165660858154297, "learning_rate": 2.772020725388601e-06, "loss": 0.5013, "mean_token_accuracy": 0.9330986142158508, "num_tokens": 7999795.0, "step": 4465 }, { "epoch": 0.7231803092866974, "grad_norm": 30.761465072631836, "learning_rate": 2.7704015544041454e-06, "loss": 0.5493, "mean_token_accuracy": 0.9239272475242615, "num_tokens": 8001610.0, "step": 4466 }, { "epoch": 0.7233422394947777, "grad_norm": 24.10455894470215, "learning_rate": 2.7687823834196894e-06, "loss": 0.7414, "mean_token_accuracy": 0.8966230750083923, "num_tokens": 8003393.0, "step": 4467 }, { "epoch": 0.723504169702858, "grad_norm": 26.58614730834961, "learning_rate": 2.7671632124352334e-06, "loss": 0.5859, "mean_token_accuracy": 0.9163140058517456, "num_tokens": 8005180.0, "step": 4468 }, { "epoch": 0.7236660999109383, "grad_norm": 29.153697967529297, "learning_rate": 2.7655440414507774e-06, "loss": 0.5736, "mean_token_accuracy": 0.9194581210613251, "num_tokens": 8006977.0, "step": 4469 }, { "epoch": 0.7238280301190188, "grad_norm": 28.157649993896484, "learning_rate": 2.7639248704663214e-06, "loss": 0.5522, "mean_token_accuracy": 0.9168067276477814, "num_tokens": 8008765.0, "step": 4470 }, { "epoch": 0.7239899603270991, "grad_norm": 35.398372650146484, "learning_rate": 2.7623056994818654e-06, "loss": 0.616, "mean_token_accuracy": 0.9041857421398163, "num_tokens": 8010559.0, "step": 4471 }, { "epoch": 0.7241518905351794, "grad_norm": 39.47248077392578, "learning_rate": 2.7606865284974094e-06, "loss": 0.8334, "mean_token_accuracy": 0.8973684012889862, "num_tokens": 8012344.0, "step": 4472 }, { "epoch": 0.7243138207432597, "grad_norm": 21.398977279663086, "learning_rate": 2.7590673575129535e-06, "loss": 0.5884, "mean_token_accuracy": 0.9199346303939819, "num_tokens": 8014145.0, "step": 4473 }, { "epoch": 0.72447575095134, "grad_norm": 32.29393768310547, "learning_rate": 2.7574481865284975e-06, "loss": 0.6709, "mean_token_accuracy": 0.9151960909366608, "num_tokens": 8015928.0, "step": 4474 }, { "epoch": 0.7246376811594203, "grad_norm": 24.342317581176758, "learning_rate": 2.7558290155440415e-06, "loss": 0.5865, "mean_token_accuracy": 0.9176767766475677, "num_tokens": 8017707.0, "step": 4475 }, { "epoch": 0.7247996113675006, "grad_norm": 27.52862548828125, "learning_rate": 2.7542098445595855e-06, "loss": 0.5252, "mean_token_accuracy": 0.9293971955776215, "num_tokens": 8019503.0, "step": 4476 }, { "epoch": 0.7249615415755809, "grad_norm": 24.93927001953125, "learning_rate": 2.7525906735751295e-06, "loss": 0.6039, "mean_token_accuracy": 0.9150778949260712, "num_tokens": 8021286.0, "step": 4477 }, { "epoch": 0.7251234717836612, "grad_norm": 31.143369674682617, "learning_rate": 2.7509715025906735e-06, "loss": 0.657, "mean_token_accuracy": 0.9091029465198517, "num_tokens": 8023073.0, "step": 4478 }, { "epoch": 0.7252854019917415, "grad_norm": 23.28335189819336, "learning_rate": 2.7493523316062175e-06, "loss": 0.5369, "mean_token_accuracy": 0.9248132109642029, "num_tokens": 8024863.0, "step": 4479 }, { "epoch": 0.7254473321998218, "grad_norm": 35.27939987182617, "learning_rate": 2.7477331606217615e-06, "loss": 0.6116, "mean_token_accuracy": 0.9028554856777191, "num_tokens": 8026663.0, "step": 4480 }, { "epoch": 0.7256092624079022, "grad_norm": 28.665430068969727, "learning_rate": 2.7461139896373056e-06, "loss": 0.6576, "mean_token_accuracy": 0.9157810211181641, "num_tokens": 8028448.0, "step": 4481 }, { "epoch": 0.7257711926159826, "grad_norm": 30.171079635620117, "learning_rate": 2.7444948186528496e-06, "loss": 0.5759, "mean_token_accuracy": 0.9107106626033783, "num_tokens": 8030241.0, "step": 4482 }, { "epoch": 0.7259331228240629, "grad_norm": 25.86968231201172, "learning_rate": 2.7428756476683936e-06, "loss": 0.5721, "mean_token_accuracy": 0.9139243066310883, "num_tokens": 8032032.0, "step": 4483 }, { "epoch": 0.7260950530321432, "grad_norm": 35.32841491699219, "learning_rate": 2.7412564766839376e-06, "loss": 0.6923, "mean_token_accuracy": 0.9093185067176819, "num_tokens": 8033831.0, "step": 4484 }, { "epoch": 0.7262569832402235, "grad_norm": 20.021224975585938, "learning_rate": 2.739637305699482e-06, "loss": 0.534, "mean_token_accuracy": 0.9287814497947693, "num_tokens": 8035624.0, "step": 4485 }, { "epoch": 0.7264189134483038, "grad_norm": 30.640483856201172, "learning_rate": 2.738018134715026e-06, "loss": 0.5977, "mean_token_accuracy": 0.9145525991916656, "num_tokens": 8037416.0, "step": 4486 }, { "epoch": 0.7265808436563841, "grad_norm": 21.73912239074707, "learning_rate": 2.73639896373057e-06, "loss": 0.5377, "mean_token_accuracy": 0.9213893115520477, "num_tokens": 8039195.0, "step": 4487 }, { "epoch": 0.7267427738644644, "grad_norm": 42.398555755615234, "learning_rate": 2.734779792746114e-06, "loss": 0.9556, "mean_token_accuracy": 0.8822393715381622, "num_tokens": 8040995.0, "step": 4488 }, { "epoch": 0.7269047040725447, "grad_norm": 28.758895874023438, "learning_rate": 2.7331606217616585e-06, "loss": 0.5637, "mean_token_accuracy": 0.9237777590751648, "num_tokens": 8042783.0, "step": 4489 }, { "epoch": 0.727066634280625, "grad_norm": 33.264892578125, "learning_rate": 2.7315414507772025e-06, "loss": 0.7722, "mean_token_accuracy": 0.8986244201660156, "num_tokens": 8044590.0, "step": 4490 }, { "epoch": 0.7272285644887053, "grad_norm": 31.843368530273438, "learning_rate": 2.7299222797927465e-06, "loss": 0.619, "mean_token_accuracy": 0.9055226147174835, "num_tokens": 8046377.0, "step": 4491 }, { "epoch": 0.7273904946967857, "grad_norm": 30.81119155883789, "learning_rate": 2.7283031088082906e-06, "loss": 0.6423, "mean_token_accuracy": 0.9065735042095184, "num_tokens": 8048167.0, "step": 4492 }, { "epoch": 0.727552424904866, "grad_norm": 28.915250778198242, "learning_rate": 2.7266839378238346e-06, "loss": 0.6557, "mean_token_accuracy": 0.9042074978351593, "num_tokens": 8049959.0, "step": 4493 }, { "epoch": 0.7277143551129464, "grad_norm": 21.78362274169922, "learning_rate": 2.7250647668393786e-06, "loss": 0.5262, "mean_token_accuracy": 0.9322995841503143, "num_tokens": 8051752.0, "step": 4494 }, { "epoch": 0.7278762853210267, "grad_norm": 30.810924530029297, "learning_rate": 2.7234455958549226e-06, "loss": 0.6185, "mean_token_accuracy": 0.9154135286808014, "num_tokens": 8053549.0, "step": 4495 }, { "epoch": 0.728038215529107, "grad_norm": 32.02534484863281, "learning_rate": 2.7218264248704666e-06, "loss": 0.5856, "mean_token_accuracy": 0.9167470633983612, "num_tokens": 8055349.0, "step": 4496 }, { "epoch": 0.7282001457371873, "grad_norm": 26.702011108398438, "learning_rate": 2.7202072538860106e-06, "loss": 0.5317, "mean_token_accuracy": 0.9259259104728699, "num_tokens": 8057131.0, "step": 4497 }, { "epoch": 0.7283620759452676, "grad_norm": 19.676074981689453, "learning_rate": 2.718588082901555e-06, "loss": 0.4841, "mean_token_accuracy": 0.9384453892707825, "num_tokens": 8058919.0, "step": 4498 }, { "epoch": 0.7285240061533479, "grad_norm": 30.8987979888916, "learning_rate": 2.716968911917099e-06, "loss": 0.6404, "mean_token_accuracy": 0.9179501235485077, "num_tokens": 8060712.0, "step": 4499 }, { "epoch": 0.7286859363614282, "grad_norm": 21.482254028320312, "learning_rate": 2.715349740932643e-06, "loss": 0.5554, "mean_token_accuracy": 0.9185061454772949, "num_tokens": 8062492.0, "step": 4500 }, { "epoch": 0.7288478665695085, "grad_norm": 32.7093620300293, "learning_rate": 2.713730569948187e-06, "loss": 0.76, "mean_token_accuracy": 0.9074721336364746, "num_tokens": 8064285.0, "step": 4501 }, { "epoch": 0.7290097967775888, "grad_norm": 30.86467933654785, "learning_rate": 2.712111398963731e-06, "loss": 0.6555, "mean_token_accuracy": 0.9074627757072449, "num_tokens": 8066078.0, "step": 4502 }, { "epoch": 0.7291717269856691, "grad_norm": 30.52082633972168, "learning_rate": 2.710492227979275e-06, "loss": 0.5933, "mean_token_accuracy": 0.9227604269981384, "num_tokens": 8067875.0, "step": 4503 }, { "epoch": 0.7293336571937495, "grad_norm": 17.34865379333496, "learning_rate": 2.708873056994819e-06, "loss": 0.4981, "mean_token_accuracy": 0.9340579807758331, "num_tokens": 8069675.0, "step": 4504 }, { "epoch": 0.7294955874018298, "grad_norm": 19.844480514526367, "learning_rate": 2.707253886010363e-06, "loss": 0.5766, "mean_token_accuracy": 0.927152156829834, "num_tokens": 8071475.0, "step": 4505 }, { "epoch": 0.7296575176099102, "grad_norm": 31.885820388793945, "learning_rate": 2.705634715025907e-06, "loss": 0.7214, "mean_token_accuracy": 0.9118930697441101, "num_tokens": 8073271.0, "step": 4506 }, { "epoch": 0.7298194478179905, "grad_norm": 25.198123931884766, "learning_rate": 2.704015544041451e-06, "loss": 0.5735, "mean_token_accuracy": 0.9150778949260712, "num_tokens": 8075054.0, "step": 4507 }, { "epoch": 0.7299813780260708, "grad_norm": 29.360118865966797, "learning_rate": 2.702396373056995e-06, "loss": 0.5405, "mean_token_accuracy": 0.9232352077960968, "num_tokens": 8076852.0, "step": 4508 }, { "epoch": 0.7301433082341511, "grad_norm": 35.02790832519531, "learning_rate": 2.700777202072539e-06, "loss": 0.6386, "mean_token_accuracy": 0.9157635569572449, "num_tokens": 8078649.0, "step": 4509 }, { "epoch": 0.7303052384422314, "grad_norm": 23.5816650390625, "learning_rate": 2.699158031088083e-06, "loss": 0.5482, "mean_token_accuracy": 0.9192083179950714, "num_tokens": 8080433.0, "step": 4510 }, { "epoch": 0.7304671686503117, "grad_norm": 17.632251739501953, "learning_rate": 2.6975388601036272e-06, "loss": 0.471, "mean_token_accuracy": 0.9327344298362732, "num_tokens": 8082212.0, "step": 4511 }, { "epoch": 0.730629098858392, "grad_norm": 29.55930519104004, "learning_rate": 2.6959196891191712e-06, "loss": 0.8239, "mean_token_accuracy": 0.913509339094162, "num_tokens": 8084002.0, "step": 4512 }, { "epoch": 0.7307910290664723, "grad_norm": 17.11616325378418, "learning_rate": 2.6943005181347152e-06, "loss": 0.4746, "mean_token_accuracy": 0.9365563690662384, "num_tokens": 8085797.0, "step": 4513 }, { "epoch": 0.7309529592745526, "grad_norm": 29.031143188476562, "learning_rate": 2.6926813471502593e-06, "loss": 0.5994, "mean_token_accuracy": 0.9231182336807251, "num_tokens": 8087582.0, "step": 4514 }, { "epoch": 0.731114889482633, "grad_norm": 25.365745544433594, "learning_rate": 2.6910621761658033e-06, "loss": 0.5936, "mean_token_accuracy": 0.9157062470912933, "num_tokens": 8089379.0, "step": 4515 }, { "epoch": 0.7312768196907133, "grad_norm": 41.268287658691406, "learning_rate": 2.6894430051813473e-06, "loss": 0.677, "mean_token_accuracy": 0.905601978302002, "num_tokens": 8091166.0, "step": 4516 }, { "epoch": 0.7314387498987936, "grad_norm": 27.64450454711914, "learning_rate": 2.6878238341968917e-06, "loss": 0.6157, "mean_token_accuracy": 0.918313592672348, "num_tokens": 8092948.0, "step": 4517 }, { "epoch": 0.731600680106874, "grad_norm": 28.30603790283203, "learning_rate": 2.6862046632124357e-06, "loss": 0.4931, "mean_token_accuracy": 0.9302924573421478, "num_tokens": 8094747.0, "step": 4518 }, { "epoch": 0.7317626103149543, "grad_norm": 34.9337272644043, "learning_rate": 2.6845854922279798e-06, "loss": 0.6212, "mean_token_accuracy": 0.9106077551841736, "num_tokens": 8096538.0, "step": 4519 }, { "epoch": 0.7319245405230346, "grad_norm": 34.09270477294922, "learning_rate": 2.6829663212435238e-06, "loss": 0.6642, "mean_token_accuracy": 0.9173611104488373, "num_tokens": 8098329.0, "step": 4520 }, { "epoch": 0.7320864707311149, "grad_norm": 24.97817611694336, "learning_rate": 2.6813471502590678e-06, "loss": 0.5868, "mean_token_accuracy": 0.9181795120239258, "num_tokens": 8100122.0, "step": 4521 }, { "epoch": 0.7322484009391952, "grad_norm": 25.789073944091797, "learning_rate": 2.679727979274612e-06, "loss": 0.6038, "mean_token_accuracy": 0.9198764860630035, "num_tokens": 8101921.0, "step": 4522 }, { "epoch": 0.7324103311472755, "grad_norm": 32.63774490356445, "learning_rate": 2.678108808290156e-06, "loss": 0.5825, "mean_token_accuracy": 0.9186813235282898, "num_tokens": 8103716.0, "step": 4523 }, { "epoch": 0.7325722613553558, "grad_norm": 38.643985748291016, "learning_rate": 2.6764896373057e-06, "loss": 0.7739, "mean_token_accuracy": 0.9055226147174835, "num_tokens": 8105503.0, "step": 4524 }, { "epoch": 0.7327341915634361, "grad_norm": 26.946096420288086, "learning_rate": 2.674870466321244e-06, "loss": 0.6258, "mean_token_accuracy": 0.9114170372486115, "num_tokens": 8107309.0, "step": 4525 }, { "epoch": 0.7328961217715165, "grad_norm": 43.800350189208984, "learning_rate": 2.673251295336788e-06, "loss": 0.7907, "mean_token_accuracy": 0.8983269035816193, "num_tokens": 8109116.0, "step": 4526 }, { "epoch": 0.7330580519795968, "grad_norm": 21.17057991027832, "learning_rate": 2.671632124352332e-06, "loss": 0.5786, "mean_token_accuracy": 0.9265628457069397, "num_tokens": 8110900.0, "step": 4527 }, { "epoch": 0.7332199821876771, "grad_norm": 23.218595504760742, "learning_rate": 2.670012953367876e-06, "loss": 0.5027, "mean_token_accuracy": 0.9279391169548035, "num_tokens": 8112690.0, "step": 4528 }, { "epoch": 0.7333819123957575, "grad_norm": 36.31922912597656, "learning_rate": 2.66839378238342e-06, "loss": 0.7467, "mean_token_accuracy": 0.9055076837539673, "num_tokens": 8114478.0, "step": 4529 }, { "epoch": 0.7335438426038378, "grad_norm": 37.84391784667969, "learning_rate": 2.666774611398964e-06, "loss": 0.7327, "mean_token_accuracy": 0.9069534540176392, "num_tokens": 8116269.0, "step": 4530 }, { "epoch": 0.7337057728119181, "grad_norm": 19.089780807495117, "learning_rate": 2.665155440414508e-06, "loss": 0.4857, "mean_token_accuracy": 0.9347689151763916, "num_tokens": 8118057.0, "step": 4531 }, { "epoch": 0.7338677030199984, "grad_norm": 23.26643180847168, "learning_rate": 2.663536269430052e-06, "loss": 0.7302, "mean_token_accuracy": 0.9172661900520325, "num_tokens": 8119847.0, "step": 4532 }, { "epoch": 0.7340296332280787, "grad_norm": 34.35528564453125, "learning_rate": 2.661917098445596e-06, "loss": 0.7298, "mean_token_accuracy": 0.9210858941078186, "num_tokens": 8121641.0, "step": 4533 }, { "epoch": 0.734191563436159, "grad_norm": 26.72820472717285, "learning_rate": 2.66029792746114e-06, "loss": 0.5964, "mean_token_accuracy": 0.9178895652294159, "num_tokens": 8123431.0, "step": 4534 }, { "epoch": 0.7343534936442393, "grad_norm": 20.602807998657227, "learning_rate": 2.658678756476684e-06, "loss": 0.461, "mean_token_accuracy": 0.9303059875965118, "num_tokens": 8125230.0, "step": 4535 }, { "epoch": 0.7345154238523196, "grad_norm": 34.85890197753906, "learning_rate": 2.6570595854922284e-06, "loss": 0.5697, "mean_token_accuracy": 0.9105250835418701, "num_tokens": 8127033.0, "step": 4536 }, { "epoch": 0.7346773540604, "grad_norm": 18.087966918945312, "learning_rate": 2.6554404145077724e-06, "loss": 0.4503, "mean_token_accuracy": 0.9372571706771851, "num_tokens": 8128832.0, "step": 4537 }, { "epoch": 0.7348392842684803, "grad_norm": 23.630273818969727, "learning_rate": 2.6538212435233164e-06, "loss": 0.7388, "mean_token_accuracy": 0.9117899835109711, "num_tokens": 8130617.0, "step": 4538 }, { "epoch": 0.7350012144765606, "grad_norm": 20.222585678100586, "learning_rate": 2.6522020725388604e-06, "loss": 0.5172, "mean_token_accuracy": 0.931367963552475, "num_tokens": 8132406.0, "step": 4539 }, { "epoch": 0.735163144684641, "grad_norm": 22.488021850585938, "learning_rate": 2.6505829015544044e-06, "loss": 0.5004, "mean_token_accuracy": 0.928437352180481, "num_tokens": 8134199.0, "step": 4540 }, { "epoch": 0.7353250748927213, "grad_norm": 39.27302551269531, "learning_rate": 2.6489637305699485e-06, "loss": 0.5943, "mean_token_accuracy": 0.9212057292461395, "num_tokens": 8136003.0, "step": 4541 }, { "epoch": 0.7354870051008016, "grad_norm": 10.930018424987793, "learning_rate": 2.6473445595854925e-06, "loss": 0.4453, "mean_token_accuracy": 0.935300201177597, "num_tokens": 8137793.0, "step": 4542 }, { "epoch": 0.7356489353088819, "grad_norm": 27.647703170776367, "learning_rate": 2.6457253886010365e-06, "loss": 0.5296, "mean_token_accuracy": 0.9278675615787506, "num_tokens": 8139582.0, "step": 4543 }, { "epoch": 0.7358108655169622, "grad_norm": 26.010183334350586, "learning_rate": 2.6441062176165805e-06, "loss": 0.5596, "mean_token_accuracy": 0.9241397082805634, "num_tokens": 8141371.0, "step": 4544 }, { "epoch": 0.7359727957250425, "grad_norm": 18.476205825805664, "learning_rate": 2.6424870466321245e-06, "loss": 0.5472, "mean_token_accuracy": 0.9265206754207611, "num_tokens": 8143155.0, "step": 4545 }, { "epoch": 0.7361347259331228, "grad_norm": 26.902023315429688, "learning_rate": 2.6408678756476685e-06, "loss": 0.5594, "mean_token_accuracy": 0.9239118993282318, "num_tokens": 8144942.0, "step": 4546 }, { "epoch": 0.7362966561412031, "grad_norm": 21.286592483520508, "learning_rate": 2.6392487046632125e-06, "loss": 0.5086, "mean_token_accuracy": 0.9273109138011932, "num_tokens": 8146730.0, "step": 4547 }, { "epoch": 0.7364585863492834, "grad_norm": 22.702796936035156, "learning_rate": 2.6376295336787566e-06, "loss": 0.53, "mean_token_accuracy": 0.9247430562973022, "num_tokens": 8148521.0, "step": 4548 }, { "epoch": 0.7366205165573638, "grad_norm": 28.53870391845703, "learning_rate": 2.6360103626943006e-06, "loss": 0.6609, "mean_token_accuracy": 0.9134083986282349, "num_tokens": 8150310.0, "step": 4549 }, { "epoch": 0.7367824467654441, "grad_norm": 33.73854064941406, "learning_rate": 2.6343911917098446e-06, "loss": 0.6063, "mean_token_accuracy": 0.9062696099281311, "num_tokens": 8152110.0, "step": 4550 }, { "epoch": 0.7369443769735244, "grad_norm": 30.758499145507812, "learning_rate": 2.6327720207253886e-06, "loss": 0.5122, "mean_token_accuracy": 0.9248366057872772, "num_tokens": 8153902.0, "step": 4551 }, { "epoch": 0.7371063071816047, "grad_norm": 34.09508514404297, "learning_rate": 2.6311528497409326e-06, "loss": 0.7049, "mean_token_accuracy": 0.9076973795890808, "num_tokens": 8155695.0, "step": 4552 }, { "epoch": 0.737268237389685, "grad_norm": 35.46419143676758, "learning_rate": 2.6295336787564766e-06, "loss": 0.6093, "mean_token_accuracy": 0.9127551019191742, "num_tokens": 8157494.0, "step": 4553 }, { "epoch": 0.7374301675977654, "grad_norm": 26.774829864501953, "learning_rate": 2.6279145077720206e-06, "loss": 0.5553, "mean_token_accuracy": 0.9240111112594604, "num_tokens": 8159280.0, "step": 4554 }, { "epoch": 0.7375920978058457, "grad_norm": 28.504465103149414, "learning_rate": 2.626295336787565e-06, "loss": 0.561, "mean_token_accuracy": 0.9186064004898071, "num_tokens": 8161075.0, "step": 4555 }, { "epoch": 0.737754028013926, "grad_norm": 23.045183181762695, "learning_rate": 2.624676165803109e-06, "loss": 0.6546, "mean_token_accuracy": 0.9279979169368744, "num_tokens": 8162865.0, "step": 4556 }, { "epoch": 0.7379159582220063, "grad_norm": 36.4827880859375, "learning_rate": 2.623056994818653e-06, "loss": 0.6372, "mean_token_accuracy": 0.905844658613205, "num_tokens": 8164653.0, "step": 4557 }, { "epoch": 0.7380778884300866, "grad_norm": 34.03557586669922, "learning_rate": 2.621437823834197e-06, "loss": 0.7525, "mean_token_accuracy": 0.9111787378787994, "num_tokens": 8166447.0, "step": 4558 }, { "epoch": 0.7382398186381669, "grad_norm": 21.248807907104492, "learning_rate": 2.619818652849741e-06, "loss": 0.4792, "mean_token_accuracy": 0.931494414806366, "num_tokens": 8168237.0, "step": 4559 }, { "epoch": 0.7384017488462473, "grad_norm": 27.218454360961914, "learning_rate": 2.618199481865285e-06, "loss": 0.5675, "mean_token_accuracy": 0.9199725091457367, "num_tokens": 8170024.0, "step": 4560 }, { "epoch": 0.7385636790543276, "grad_norm": 27.11505699157715, "learning_rate": 2.616580310880829e-06, "loss": 0.6031, "mean_token_accuracy": 0.920443207025528, "num_tokens": 8171812.0, "step": 4561 }, { "epoch": 0.7387256092624079, "grad_norm": 36.43574905395508, "learning_rate": 2.614961139896373e-06, "loss": 0.577, "mean_token_accuracy": 0.905918687582016, "num_tokens": 8173611.0, "step": 4562 }, { "epoch": 0.7388875394704882, "grad_norm": 21.802791595458984, "learning_rate": 2.613341968911917e-06, "loss": 0.479, "mean_token_accuracy": 0.9290744364261627, "num_tokens": 8175405.0, "step": 4563 }, { "epoch": 0.7390494696785685, "grad_norm": 25.826353073120117, "learning_rate": 2.611722797927461e-06, "loss": 0.5334, "mean_token_accuracy": 0.9202898740768433, "num_tokens": 8177193.0, "step": 4564 }, { "epoch": 0.7392113998866489, "grad_norm": 35.67538070678711, "learning_rate": 2.610103626943005e-06, "loss": 0.5741, "mean_token_accuracy": 0.9167016744613647, "num_tokens": 8178981.0, "step": 4565 }, { "epoch": 0.7393733300947292, "grad_norm": 30.726619720458984, "learning_rate": 2.608484455958549e-06, "loss": 0.6001, "mean_token_accuracy": 0.9203213453292847, "num_tokens": 8180781.0, "step": 4566 }, { "epoch": 0.7395352603028095, "grad_norm": 26.228023529052734, "learning_rate": 2.6068652849740932e-06, "loss": 0.6774, "mean_token_accuracy": 0.9026198387145996, "num_tokens": 8182559.0, "step": 4567 }, { "epoch": 0.7396971905108898, "grad_norm": 27.367204666137695, "learning_rate": 2.6052461139896372e-06, "loss": 0.6934, "mean_token_accuracy": 0.9166931509971619, "num_tokens": 8184346.0, "step": 4568 }, { "epoch": 0.7398591207189701, "grad_norm": 20.97516441345215, "learning_rate": 2.6036269430051813e-06, "loss": 0.4958, "mean_token_accuracy": 0.9273166060447693, "num_tokens": 8186146.0, "step": 4569 }, { "epoch": 0.7400210509270504, "grad_norm": 28.53069305419922, "learning_rate": 2.6020077720207253e-06, "loss": 0.5744, "mean_token_accuracy": 0.9271235466003418, "num_tokens": 8187946.0, "step": 4570 }, { "epoch": 0.7401829811351308, "grad_norm": 39.1164665222168, "learning_rate": 2.6003886010362693e-06, "loss": 0.5109, "mean_token_accuracy": 0.9281156361103058, "num_tokens": 8189750.0, "step": 4571 }, { "epoch": 0.7403449113432111, "grad_norm": 35.342411041259766, "learning_rate": 2.5987694300518133e-06, "loss": 0.6741, "mean_token_accuracy": 0.9109818339347839, "num_tokens": 8191532.0, "step": 4572 }, { "epoch": 0.7405068415512914, "grad_norm": 32.37525939941406, "learning_rate": 2.5971502590673577e-06, "loss": 0.7673, "mean_token_accuracy": 0.9048504829406738, "num_tokens": 8193328.0, "step": 4573 }, { "epoch": 0.7406687717593717, "grad_norm": 19.774675369262695, "learning_rate": 2.5955310880829017e-06, "loss": 0.469, "mean_token_accuracy": 0.936292290687561, "num_tokens": 8195122.0, "step": 4574 }, { "epoch": 0.740830701967452, "grad_norm": 35.81763458251953, "learning_rate": 2.5939119170984458e-06, "loss": 0.5771, "mean_token_accuracy": 0.9225564002990723, "num_tokens": 8196907.0, "step": 4575 }, { "epoch": 0.7409926321755324, "grad_norm": 26.13691520690918, "learning_rate": 2.5922927461139898e-06, "loss": 0.5997, "mean_token_accuracy": 0.9167953729629517, "num_tokens": 8198707.0, "step": 4576 }, { "epoch": 0.7411545623836127, "grad_norm": 23.173473358154297, "learning_rate": 2.5906735751295338e-06, "loss": 0.4721, "mean_token_accuracy": 0.923865407705307, "num_tokens": 8200496.0, "step": 4577 }, { "epoch": 0.741316492591693, "grad_norm": 26.636085510253906, "learning_rate": 2.589054404145078e-06, "loss": 0.5835, "mean_token_accuracy": 0.9238302707672119, "num_tokens": 8202284.0, "step": 4578 }, { "epoch": 0.7414784227997733, "grad_norm": 28.24758529663086, "learning_rate": 2.587435233160622e-06, "loss": 0.7794, "mean_token_accuracy": 0.9003292620182037, "num_tokens": 8204077.0, "step": 4579 }, { "epoch": 0.7416403530078536, "grad_norm": 27.013755798339844, "learning_rate": 2.585816062176166e-06, "loss": 0.5682, "mean_token_accuracy": 0.9155176877975464, "num_tokens": 8205861.0, "step": 4580 }, { "epoch": 0.7418022832159339, "grad_norm": 22.328622817993164, "learning_rate": 2.58419689119171e-06, "loss": 0.5624, "mean_token_accuracy": 0.9242043793201447, "num_tokens": 8207635.0, "step": 4581 }, { "epoch": 0.7419642134240142, "grad_norm": 21.46522331237793, "learning_rate": 2.582577720207254e-06, "loss": 0.5156, "mean_token_accuracy": 0.9261177480220795, "num_tokens": 8209418.0, "step": 4582 }, { "epoch": 0.7421261436320946, "grad_norm": 31.285398483276367, "learning_rate": 2.580958549222798e-06, "loss": 0.6783, "mean_token_accuracy": 0.9101460576057434, "num_tokens": 8211207.0, "step": 4583 }, { "epoch": 0.7422880738401749, "grad_norm": 33.974853515625, "learning_rate": 2.579339378238342e-06, "loss": 0.6429, "mean_token_accuracy": 0.9106192588806152, "num_tokens": 8213011.0, "step": 4584 }, { "epoch": 0.7424500040482552, "grad_norm": 25.935009002685547, "learning_rate": 2.5777202072538863e-06, "loss": 0.51, "mean_token_accuracy": 0.9252786040306091, "num_tokens": 8214804.0, "step": 4585 }, { "epoch": 0.7426119342563355, "grad_norm": 24.187061309814453, "learning_rate": 2.5761010362694307e-06, "loss": 0.5393, "mean_token_accuracy": 0.9250700175762177, "num_tokens": 8216609.0, "step": 4586 }, { "epoch": 0.7427738644644158, "grad_norm": 29.433578491210938, "learning_rate": 2.5744818652849748e-06, "loss": 0.6014, "mean_token_accuracy": 0.9217752516269684, "num_tokens": 8218402.0, "step": 4587 }, { "epoch": 0.7429357946724962, "grad_norm": 32.3162727355957, "learning_rate": 2.5728626943005188e-06, "loss": 0.6237, "mean_token_accuracy": 0.917548805475235, "num_tokens": 8220193.0, "step": 4588 }, { "epoch": 0.7430977248805765, "grad_norm": 32.099700927734375, "learning_rate": 2.5712435233160628e-06, "loss": 0.586, "mean_token_accuracy": 0.9170315861701965, "num_tokens": 8221982.0, "step": 4589 }, { "epoch": 0.7432596550886568, "grad_norm": 29.042383193969727, "learning_rate": 2.569624352331607e-06, "loss": 0.6318, "mean_token_accuracy": 0.9181869029998779, "num_tokens": 8223775.0, "step": 4590 }, { "epoch": 0.7434215852967371, "grad_norm": 27.842449188232422, "learning_rate": 2.568005181347151e-06, "loss": 0.725, "mean_token_accuracy": 0.9128378331661224, "num_tokens": 8225565.0, "step": 4591 }, { "epoch": 0.7435835155048174, "grad_norm": 28.261457443237305, "learning_rate": 2.566386010362695e-06, "loss": 0.5156, "mean_token_accuracy": 0.9254246950149536, "num_tokens": 8227345.0, "step": 4592 }, { "epoch": 0.7437454457128977, "grad_norm": 30.498554229736328, "learning_rate": 2.564766839378239e-06, "loss": 0.6534, "mean_token_accuracy": 0.9200068414211273, "num_tokens": 8229143.0, "step": 4593 }, { "epoch": 0.7439073759209781, "grad_norm": 29.88692283630371, "learning_rate": 2.563147668393783e-06, "loss": 0.6413, "mean_token_accuracy": 0.9191001653671265, "num_tokens": 8230927.0, "step": 4594 }, { "epoch": 0.7440693061290584, "grad_norm": 29.168485641479492, "learning_rate": 2.561528497409327e-06, "loss": 0.5558, "mean_token_accuracy": 0.9114203155040741, "num_tokens": 8232720.0, "step": 4595 }, { "epoch": 0.7442312363371387, "grad_norm": 19.69017791748047, "learning_rate": 2.559909326424871e-06, "loss": 0.5573, "mean_token_accuracy": 0.9266602396965027, "num_tokens": 8234505.0, "step": 4596 }, { "epoch": 0.744393166545219, "grad_norm": 24.87456512451172, "learning_rate": 2.558290155440415e-06, "loss": 0.5787, "mean_token_accuracy": 0.9115867018699646, "num_tokens": 8236288.0, "step": 4597 }, { "epoch": 0.7445550967532993, "grad_norm": 32.47265625, "learning_rate": 2.556670984455959e-06, "loss": 0.6707, "mean_token_accuracy": 0.9211202263832092, "num_tokens": 8238079.0, "step": 4598 }, { "epoch": 0.7447170269613796, "grad_norm": 29.497760772705078, "learning_rate": 2.555051813471503e-06, "loss": 0.6759, "mean_token_accuracy": 0.9106115698814392, "num_tokens": 8239860.0, "step": 4599 }, { "epoch": 0.74487895716946, "grad_norm": 25.900714874267578, "learning_rate": 2.553432642487047e-06, "loss": 0.5471, "mean_token_accuracy": 0.9231597185134888, "num_tokens": 8241658.0, "step": 4600 }, { "epoch": 0.7450408873775403, "grad_norm": 35.44560623168945, "learning_rate": 2.551813471502591e-06, "loss": 0.7375, "mean_token_accuracy": 0.9112118780612946, "num_tokens": 8243451.0, "step": 4601 }, { "epoch": 0.7452028175856206, "grad_norm": 30.347888946533203, "learning_rate": 2.550194300518135e-06, "loss": 0.5692, "mean_token_accuracy": 0.9249999821186066, "num_tokens": 8245243.0, "step": 4602 }, { "epoch": 0.7453647477937009, "grad_norm": 18.550918579101562, "learning_rate": 2.548575129533679e-06, "loss": 0.5234, "mean_token_accuracy": 0.9314507842063904, "num_tokens": 8247032.0, "step": 4603 }, { "epoch": 0.7455266780017812, "grad_norm": 30.45406723022461, "learning_rate": 2.546955958549223e-06, "loss": 0.5682, "mean_token_accuracy": 0.9149391055107117, "num_tokens": 8248828.0, "step": 4604 }, { "epoch": 0.7456886082098616, "grad_norm": 26.664411544799805, "learning_rate": 2.5453367875647674e-06, "loss": 0.5496, "mean_token_accuracy": 0.9168159067630768, "num_tokens": 8250624.0, "step": 4605 }, { "epoch": 0.7458505384179419, "grad_norm": 25.156368255615234, "learning_rate": 2.5437176165803114e-06, "loss": 0.5099, "mean_token_accuracy": 0.9264123439788818, "num_tokens": 8252422.0, "step": 4606 }, { "epoch": 0.7460124686260222, "grad_norm": 36.718666076660156, "learning_rate": 2.5420984455958554e-06, "loss": 0.6701, "mean_token_accuracy": 0.91087207198143, "num_tokens": 8254204.0, "step": 4607 }, { "epoch": 0.7461743988341025, "grad_norm": 32.656837463378906, "learning_rate": 2.5404792746113995e-06, "loss": 0.5872, "mean_token_accuracy": 0.9159872233867645, "num_tokens": 8255990.0, "step": 4608 }, { "epoch": 0.7463363290421828, "grad_norm": 27.766300201416016, "learning_rate": 2.5388601036269435e-06, "loss": 0.6702, "mean_token_accuracy": 0.9062694609165192, "num_tokens": 8257768.0, "step": 4609 }, { "epoch": 0.7464982592502631, "grad_norm": 32.55924987792969, "learning_rate": 2.5372409326424875e-06, "loss": 0.5781, "mean_token_accuracy": 0.9174365699291229, "num_tokens": 8259560.0, "step": 4610 }, { "epoch": 0.7466601894583434, "grad_norm": 25.397869110107422, "learning_rate": 2.5356217616580315e-06, "loss": 0.5062, "mean_token_accuracy": 0.920992910861969, "num_tokens": 8261363.0, "step": 4611 }, { "epoch": 0.7468221196664238, "grad_norm": 23.16935157775879, "learning_rate": 2.5340025906735755e-06, "loss": 0.4883, "mean_token_accuracy": 0.9278171956539154, "num_tokens": 8263151.0, "step": 4612 }, { "epoch": 0.7469840498745041, "grad_norm": 19.186124801635742, "learning_rate": 2.5323834196891195e-06, "loss": 0.4591, "mean_token_accuracy": 0.9312020540237427, "num_tokens": 8264953.0, "step": 4613 }, { "epoch": 0.7471459800825844, "grad_norm": 24.751462936401367, "learning_rate": 2.5307642487046635e-06, "loss": 0.5283, "mean_token_accuracy": 0.9265314936637878, "num_tokens": 8266736.0, "step": 4614 }, { "epoch": 0.7473079102906647, "grad_norm": 32.06297302246094, "learning_rate": 2.5291450777202075e-06, "loss": 0.6991, "mean_token_accuracy": 0.9055467844009399, "num_tokens": 8268533.0, "step": 4615 }, { "epoch": 0.747469840498745, "grad_norm": 33.905338287353516, "learning_rate": 2.5275259067357516e-06, "loss": 0.6462, "mean_token_accuracy": 0.9189484119415283, "num_tokens": 8270329.0, "step": 4616 }, { "epoch": 0.7476317707068254, "grad_norm": 42.5034065246582, "learning_rate": 2.5259067357512956e-06, "loss": 0.7555, "mean_token_accuracy": 0.8910945355892181, "num_tokens": 8272125.0, "step": 4617 }, { "epoch": 0.7477937009149057, "grad_norm": 25.228912353515625, "learning_rate": 2.5242875647668396e-06, "loss": 0.5443, "mean_token_accuracy": 0.9131458103656769, "num_tokens": 8273901.0, "step": 4618 }, { "epoch": 0.747955631122986, "grad_norm": 25.824670791625977, "learning_rate": 2.5226683937823836e-06, "loss": 0.4906, "mean_token_accuracy": 0.9333793222904205, "num_tokens": 8275698.0, "step": 4619 }, { "epoch": 0.7481175613310663, "grad_norm": 41.90966033935547, "learning_rate": 2.5210492227979276e-06, "loss": 0.8572, "mean_token_accuracy": 0.8914726674556732, "num_tokens": 8277495.0, "step": 4620 }, { "epoch": 0.7482794915391466, "grad_norm": 43.290367126464844, "learning_rate": 2.5194300518134716e-06, "loss": 0.8416, "mean_token_accuracy": 0.8785714209079742, "num_tokens": 8279287.0, "step": 4621 }, { "epoch": 0.7484414217472269, "grad_norm": 36.1877555847168, "learning_rate": 2.5178108808290156e-06, "loss": 0.6258, "mean_token_accuracy": 0.9169794619083405, "num_tokens": 8281076.0, "step": 4622 }, { "epoch": 0.7486033519553073, "grad_norm": 16.613479614257812, "learning_rate": 2.5161917098445597e-06, "loss": 0.4541, "mean_token_accuracy": 0.9309405386447906, "num_tokens": 8282863.0, "step": 4623 }, { "epoch": 0.7487652821633876, "grad_norm": 26.51201057434082, "learning_rate": 2.514572538860104e-06, "loss": 0.4932, "mean_token_accuracy": 0.9281793832778931, "num_tokens": 8284653.0, "step": 4624 }, { "epoch": 0.7489272123714679, "grad_norm": 37.206119537353516, "learning_rate": 2.512953367875648e-06, "loss": 0.5792, "mean_token_accuracy": 0.9136128425598145, "num_tokens": 8286443.0, "step": 4625 }, { "epoch": 0.7490891425795482, "grad_norm": 29.836753845214844, "learning_rate": 2.511334196891192e-06, "loss": 0.565, "mean_token_accuracy": 0.9248905181884766, "num_tokens": 8288248.0, "step": 4626 }, { "epoch": 0.7492510727876285, "grad_norm": 31.03231430053711, "learning_rate": 2.509715025906736e-06, "loss": 0.5686, "mean_token_accuracy": 0.9151099026203156, "num_tokens": 8290043.0, "step": 4627 }, { "epoch": 0.7494130029957089, "grad_norm": 24.713272094726562, "learning_rate": 2.50809585492228e-06, "loss": 0.58, "mean_token_accuracy": 0.9167449176311493, "num_tokens": 8291832.0, "step": 4628 }, { "epoch": 0.7495749332037892, "grad_norm": 22.026756286621094, "learning_rate": 2.506476683937824e-06, "loss": 0.5052, "mean_token_accuracy": 0.9290744364261627, "num_tokens": 8293626.0, "step": 4629 }, { "epoch": 0.7497368634118695, "grad_norm": 33.0578498840332, "learning_rate": 2.504857512953368e-06, "loss": 0.6267, "mean_token_accuracy": 0.9198528230190277, "num_tokens": 8295412.0, "step": 4630 }, { "epoch": 0.7498987936199498, "grad_norm": 19.807037353515625, "learning_rate": 2.503238341968912e-06, "loss": 0.4726, "mean_token_accuracy": 0.9306386113166809, "num_tokens": 8297212.0, "step": 4631 }, { "epoch": 0.7500607238280301, "grad_norm": 33.38854217529297, "learning_rate": 2.501619170984456e-06, "loss": 0.7026, "mean_token_accuracy": 0.9045868515968323, "num_tokens": 8299007.0, "step": 4632 }, { "epoch": 0.7502226540361104, "grad_norm": 23.449975967407227, "learning_rate": 2.5e-06, "loss": 0.5267, "mean_token_accuracy": 0.9205673635005951, "num_tokens": 8300795.0, "step": 4633 }, { "epoch": 0.7503845842441907, "grad_norm": 43.37062072753906, "learning_rate": 2.4983808290155442e-06, "loss": 0.7625, "mean_token_accuracy": 0.9068877696990967, "num_tokens": 8302606.0, "step": 4634 }, { "epoch": 0.750546514452271, "grad_norm": 30.077634811401367, "learning_rate": 2.4967616580310882e-06, "loss": 0.5079, "mean_token_accuracy": 0.9325833320617676, "num_tokens": 8304385.0, "step": 4635 }, { "epoch": 0.7507084446603514, "grad_norm": 20.742412567138672, "learning_rate": 2.4951424870466322e-06, "loss": 0.4805, "mean_token_accuracy": 0.929352730512619, "num_tokens": 8306180.0, "step": 4636 }, { "epoch": 0.7508703748684317, "grad_norm": 25.69804573059082, "learning_rate": 2.4935233160621763e-06, "loss": 0.5227, "mean_token_accuracy": 0.9270072877407074, "num_tokens": 8307966.0, "step": 4637 }, { "epoch": 0.751032305076512, "grad_norm": 34.1600227355957, "learning_rate": 2.4919041450777203e-06, "loss": 0.6319, "mean_token_accuracy": 0.91131791472435, "num_tokens": 8309760.0, "step": 4638 }, { "epoch": 0.7511942352845924, "grad_norm": 33.87798309326172, "learning_rate": 2.4902849740932643e-06, "loss": 0.5594, "mean_token_accuracy": 0.9168231785297394, "num_tokens": 8311549.0, "step": 4639 }, { "epoch": 0.7513561654926727, "grad_norm": 32.02516174316406, "learning_rate": 2.4886658031088083e-06, "loss": 0.5984, "mean_token_accuracy": 0.91366907954216, "num_tokens": 8313339.0, "step": 4640 }, { "epoch": 0.751518095700753, "grad_norm": 35.43289566040039, "learning_rate": 2.4870466321243523e-06, "loss": 0.557, "mean_token_accuracy": 0.9243023991584778, "num_tokens": 8315143.0, "step": 4641 }, { "epoch": 0.7516800259088333, "grad_norm": 18.416576385498047, "learning_rate": 2.4854274611398963e-06, "loss": 0.5044, "mean_token_accuracy": 0.9332747459411621, "num_tokens": 8316925.0, "step": 4642 }, { "epoch": 0.7518419561169136, "grad_norm": 23.071779251098633, "learning_rate": 2.4838082901554408e-06, "loss": 0.5413, "mean_token_accuracy": 0.927532434463501, "num_tokens": 8318713.0, "step": 4643 }, { "epoch": 0.7520038863249939, "grad_norm": 26.339641571044922, "learning_rate": 2.4821891191709848e-06, "loss": 0.5486, "mean_token_accuracy": 0.9337091147899628, "num_tokens": 8320526.0, "step": 4644 }, { "epoch": 0.7521658165330742, "grad_norm": 23.68473243713379, "learning_rate": 2.4805699481865288e-06, "loss": 0.5677, "mean_token_accuracy": 0.9181873500347137, "num_tokens": 8322307.0, "step": 4645 }, { "epoch": 0.7523277467411545, "grad_norm": 32.67833709716797, "learning_rate": 2.478950777202073e-06, "loss": 0.6478, "mean_token_accuracy": 0.9097140729427338, "num_tokens": 8324105.0, "step": 4646 }, { "epoch": 0.7524896769492349, "grad_norm": 29.60883140563965, "learning_rate": 2.477331606217617e-06, "loss": 0.5686, "mean_token_accuracy": 0.9205682873725891, "num_tokens": 8325894.0, "step": 4647 }, { "epoch": 0.7526516071573152, "grad_norm": 32.173824310302734, "learning_rate": 2.475712435233161e-06, "loss": 0.7635, "mean_token_accuracy": 0.9168404340744019, "num_tokens": 8327683.0, "step": 4648 }, { "epoch": 0.7528135373653955, "grad_norm": 40.390472412109375, "learning_rate": 2.474093264248705e-06, "loss": 0.7024, "mean_token_accuracy": 0.8936694264411926, "num_tokens": 8329476.0, "step": 4649 }, { "epoch": 0.7529754675734759, "grad_norm": 39.334659576416016, "learning_rate": 2.472474093264249e-06, "loss": 0.7382, "mean_token_accuracy": 0.9029101133346558, "num_tokens": 8331266.0, "step": 4650 }, { "epoch": 0.7531373977815562, "grad_norm": 36.24227523803711, "learning_rate": 2.470854922279793e-06, "loss": 0.6167, "mean_token_accuracy": 0.916788250207901, "num_tokens": 8333065.0, "step": 4651 }, { "epoch": 0.7532993279896365, "grad_norm": 36.09779357910156, "learning_rate": 2.469235751295337e-06, "loss": 0.581, "mean_token_accuracy": 0.9110330045223236, "num_tokens": 8334847.0, "step": 4652 }, { "epoch": 0.7534612581977168, "grad_norm": 35.25068664550781, "learning_rate": 2.467616580310881e-06, "loss": 0.6852, "mean_token_accuracy": 0.9177764356136322, "num_tokens": 8336639.0, "step": 4653 }, { "epoch": 0.7536231884057971, "grad_norm": 38.59444046020508, "learning_rate": 2.465997409326425e-06, "loss": 0.795, "mean_token_accuracy": 0.9070371091365814, "num_tokens": 8338430.0, "step": 4654 }, { "epoch": 0.7537851186138774, "grad_norm": 33.41994094848633, "learning_rate": 2.464378238341969e-06, "loss": 0.7313, "mean_token_accuracy": 0.9203906953334808, "num_tokens": 8340218.0, "step": 4655 }, { "epoch": 0.7539470488219577, "grad_norm": 24.58260154724121, "learning_rate": 2.462759067357513e-06, "loss": 0.5119, "mean_token_accuracy": 0.9277708232402802, "num_tokens": 8342007.0, "step": 4656 }, { "epoch": 0.754108979030038, "grad_norm": 24.311532974243164, "learning_rate": 2.461139896373057e-06, "loss": 0.5517, "mean_token_accuracy": 0.9292216897010803, "num_tokens": 8343802.0, "step": 4657 }, { "epoch": 0.7542709092381183, "grad_norm": 27.534183502197266, "learning_rate": 2.4595207253886014e-06, "loss": 0.545, "mean_token_accuracy": 0.9205517172813416, "num_tokens": 8345591.0, "step": 4658 }, { "epoch": 0.7544328394461987, "grad_norm": 24.61557388305664, "learning_rate": 2.4579015544041454e-06, "loss": 0.562, "mean_token_accuracy": 0.9242710769176483, "num_tokens": 8347379.0, "step": 4659 }, { "epoch": 0.754594769654279, "grad_norm": 12.89478588104248, "learning_rate": 2.4562823834196894e-06, "loss": 0.4565, "mean_token_accuracy": 0.9332089424133301, "num_tokens": 8349161.0, "step": 4660 }, { "epoch": 0.7547566998623593, "grad_norm": 27.645849227905273, "learning_rate": 2.4546632124352334e-06, "loss": 0.5527, "mean_token_accuracy": 0.918974906206131, "num_tokens": 8350944.0, "step": 4661 }, { "epoch": 0.7549186300704397, "grad_norm": 27.31709861755371, "learning_rate": 2.4530440414507774e-06, "loss": 0.5532, "mean_token_accuracy": 0.9161653220653534, "num_tokens": 8352742.0, "step": 4662 }, { "epoch": 0.75508056027852, "grad_norm": 36.87115478515625, "learning_rate": 2.4514248704663214e-06, "loss": 0.6784, "mean_token_accuracy": 0.9064536690711975, "num_tokens": 8354541.0, "step": 4663 }, { "epoch": 0.7552424904866003, "grad_norm": 26.110366821289062, "learning_rate": 2.4498056994818655e-06, "loss": 0.5262, "mean_token_accuracy": 0.9216992855072021, "num_tokens": 8356334.0, "step": 4664 }, { "epoch": 0.7554044206946806, "grad_norm": 45.888031005859375, "learning_rate": 2.4481865284974095e-06, "loss": 0.8579, "mean_token_accuracy": 0.8923611044883728, "num_tokens": 8358134.0, "step": 4665 }, { "epoch": 0.7555663509027609, "grad_norm": 32.6623420715332, "learning_rate": 2.4465673575129535e-06, "loss": 0.5837, "mean_token_accuracy": 0.9144723415374756, "num_tokens": 8359926.0, "step": 4666 }, { "epoch": 0.7557282811108412, "grad_norm": 31.467660903930664, "learning_rate": 2.4449481865284975e-06, "loss": 0.606, "mean_token_accuracy": 0.919433057308197, "num_tokens": 8361721.0, "step": 4667 }, { "epoch": 0.7558902113189215, "grad_norm": 31.99997901916504, "learning_rate": 2.443329015544042e-06, "loss": 0.5508, "mean_token_accuracy": 0.9298729598522186, "num_tokens": 8363518.0, "step": 4668 }, { "epoch": 0.7560521415270018, "grad_norm": 28.23126983642578, "learning_rate": 2.441709844559586e-06, "loss": 0.5914, "mean_token_accuracy": 0.9062043726444244, "num_tokens": 8365307.0, "step": 4669 }, { "epoch": 0.7562140717350821, "grad_norm": 32.10374069213867, "learning_rate": 2.44009067357513e-06, "loss": 0.604, "mean_token_accuracy": 0.9172833859920502, "num_tokens": 8367098.0, "step": 4670 }, { "epoch": 0.7563760019431625, "grad_norm": 40.423118591308594, "learning_rate": 2.438471502590674e-06, "loss": 0.7647, "mean_token_accuracy": 0.8920863270759583, "num_tokens": 8368888.0, "step": 4671 }, { "epoch": 0.7565379321512428, "grad_norm": 24.046968460083008, "learning_rate": 2.436852331606218e-06, "loss": 0.5674, "mean_token_accuracy": 0.9222372174263, "num_tokens": 8370683.0, "step": 4672 }, { "epoch": 0.7566998623593232, "grad_norm": 35.0728645324707, "learning_rate": 2.435233160621762e-06, "loss": 0.6499, "mean_token_accuracy": 0.9087809026241302, "num_tokens": 8372469.0, "step": 4673 }, { "epoch": 0.7568617925674035, "grad_norm": 24.768760681152344, "learning_rate": 2.433613989637306e-06, "loss": 0.5195, "mean_token_accuracy": 0.9199725091457367, "num_tokens": 8374256.0, "step": 4674 }, { "epoch": 0.7570237227754838, "grad_norm": 28.990802764892578, "learning_rate": 2.43199481865285e-06, "loss": 0.6242, "mean_token_accuracy": 0.9111787378787994, "num_tokens": 8376050.0, "step": 4675 }, { "epoch": 0.7571856529835641, "grad_norm": 22.618728637695312, "learning_rate": 2.430375647668394e-06, "loss": 0.5965, "mean_token_accuracy": 0.9192129373550415, "num_tokens": 8377836.0, "step": 4676 }, { "epoch": 0.7573475831916444, "grad_norm": 38.99509048461914, "learning_rate": 2.428756476683938e-06, "loss": 0.6952, "mean_token_accuracy": 0.9091029465198517, "num_tokens": 8379623.0, "step": 4677 }, { "epoch": 0.7575095133997247, "grad_norm": 28.140243530273438, "learning_rate": 2.427137305699482e-06, "loss": 0.5513, "mean_token_accuracy": 0.9213735461235046, "num_tokens": 8381415.0, "step": 4678 }, { "epoch": 0.757671443607805, "grad_norm": 31.008007049560547, "learning_rate": 2.425518134715026e-06, "loss": 0.552, "mean_token_accuracy": 0.9112727642059326, "num_tokens": 8383198.0, "step": 4679 }, { "epoch": 0.7578333738158853, "grad_norm": 37.896514892578125, "learning_rate": 2.42389896373057e-06, "loss": 0.6846, "mean_token_accuracy": 0.902437835931778, "num_tokens": 8384997.0, "step": 4680 }, { "epoch": 0.7579953040239656, "grad_norm": 30.200599670410156, "learning_rate": 2.422279792746114e-06, "loss": 0.5733, "mean_token_accuracy": 0.9153688251972198, "num_tokens": 8386781.0, "step": 4681 }, { "epoch": 0.758157234232046, "grad_norm": 30.259031295776367, "learning_rate": 2.420660621761658e-06, "loss": 0.5403, "mean_token_accuracy": 0.9187537133693695, "num_tokens": 8388565.0, "step": 4682 }, { "epoch": 0.7583191644401263, "grad_norm": 39.184200286865234, "learning_rate": 2.419041450777202e-06, "loss": 0.7682, "mean_token_accuracy": 0.9019865691661835, "num_tokens": 8390353.0, "step": 4683 }, { "epoch": 0.7584810946482067, "grad_norm": 22.71794319152832, "learning_rate": 2.417422279792746e-06, "loss": 0.5195, "mean_token_accuracy": 0.9216834008693695, "num_tokens": 8392146.0, "step": 4684 }, { "epoch": 0.758643024856287, "grad_norm": 31.540409088134766, "learning_rate": 2.41580310880829e-06, "loss": 0.6303, "mean_token_accuracy": 0.9207015037536621, "num_tokens": 8393944.0, "step": 4685 }, { "epoch": 0.7588049550643673, "grad_norm": 27.81080436706543, "learning_rate": 2.414183937823834e-06, "loss": 0.5886, "mean_token_accuracy": 0.918353796005249, "num_tokens": 8395725.0, "step": 4686 }, { "epoch": 0.7589668852724476, "grad_norm": 21.210376739501953, "learning_rate": 2.4125647668393786e-06, "loss": 0.4998, "mean_token_accuracy": 0.9255583882331848, "num_tokens": 8397506.0, "step": 4687 }, { "epoch": 0.7591288154805279, "grad_norm": 13.841824531555176, "learning_rate": 2.4109455958549226e-06, "loss": 0.5008, "mean_token_accuracy": 0.9324916005134583, "num_tokens": 8399285.0, "step": 4688 }, { "epoch": 0.7592907456886082, "grad_norm": 30.768701553344727, "learning_rate": 2.4093264248704666e-06, "loss": 0.6264, "mean_token_accuracy": 0.9124899506568909, "num_tokens": 8401080.0, "step": 4689 }, { "epoch": 0.7594526758966885, "grad_norm": 17.460729598999023, "learning_rate": 2.4077072538860106e-06, "loss": 0.4918, "mean_token_accuracy": 0.9330704808235168, "num_tokens": 8402876.0, "step": 4690 }, { "epoch": 0.7596146061047688, "grad_norm": 35.453426361083984, "learning_rate": 2.4060880829015547e-06, "loss": 0.6314, "mean_token_accuracy": 0.9070870876312256, "num_tokens": 8404668.0, "step": 4691 }, { "epoch": 0.7597765363128491, "grad_norm": 24.08220672607422, "learning_rate": 2.4044689119170987e-06, "loss": 0.5134, "mean_token_accuracy": 0.917548805475235, "num_tokens": 8406459.0, "step": 4692 }, { "epoch": 0.7599384665209294, "grad_norm": 24.43300437927246, "learning_rate": 2.4028497409326427e-06, "loss": 0.5379, "mean_token_accuracy": 0.9188774228096008, "num_tokens": 8408253.0, "step": 4693 }, { "epoch": 0.7601003967290098, "grad_norm": 22.149044036865234, "learning_rate": 2.4012305699481867e-06, "loss": 0.5507, "mean_token_accuracy": 0.9208475053310394, "num_tokens": 8410056.0, "step": 4694 }, { "epoch": 0.7602623269370901, "grad_norm": 23.576396942138672, "learning_rate": 2.3996113989637307e-06, "loss": 0.6056, "mean_token_accuracy": 0.9255533218383789, "num_tokens": 8411850.0, "step": 4695 }, { "epoch": 0.7604242571451705, "grad_norm": 25.82002830505371, "learning_rate": 2.3979922279792747e-06, "loss": 0.5768, "mean_token_accuracy": 0.920152485370636, "num_tokens": 8413637.0, "step": 4696 }, { "epoch": 0.7605861873532508, "grad_norm": 29.509443283081055, "learning_rate": 2.3963730569948187e-06, "loss": 0.6614, "mean_token_accuracy": 0.9179607033729553, "num_tokens": 8415428.0, "step": 4697 }, { "epoch": 0.7607481175613311, "grad_norm": 22.199304580688477, "learning_rate": 2.3947538860103627e-06, "loss": 0.5581, "mean_token_accuracy": 0.9225405752658844, "num_tokens": 8417211.0, "step": 4698 }, { "epoch": 0.7609100477694114, "grad_norm": 28.593584060668945, "learning_rate": 2.3931347150259068e-06, "loss": 0.6197, "mean_token_accuracy": 0.9057729840278625, "num_tokens": 8419009.0, "step": 4699 }, { "epoch": 0.7610719779774917, "grad_norm": 34.31404495239258, "learning_rate": 2.3915155440414508e-06, "loss": 0.6337, "mean_token_accuracy": 0.9113828539848328, "num_tokens": 8420803.0, "step": 4700 }, { "epoch": 0.761233908185572, "grad_norm": 26.927459716796875, "learning_rate": 2.3898963730569948e-06, "loss": 0.6147, "mean_token_accuracy": 0.9112305343151093, "num_tokens": 8422609.0, "step": 4701 }, { "epoch": 0.7613958383936523, "grad_norm": 27.589406967163086, "learning_rate": 2.388277202072539e-06, "loss": 0.5832, "mean_token_accuracy": 0.9190182685852051, "num_tokens": 8424405.0, "step": 4702 }, { "epoch": 0.7615577686017326, "grad_norm": 27.552906036376953, "learning_rate": 2.386658031088083e-06, "loss": 0.5407, "mean_token_accuracy": 0.9192849397659302, "num_tokens": 8426202.0, "step": 4703 }, { "epoch": 0.7617196988098129, "grad_norm": 32.81730651855469, "learning_rate": 2.385038860103627e-06, "loss": 0.6052, "mean_token_accuracy": 0.9151932895183563, "num_tokens": 8427997.0, "step": 4704 }, { "epoch": 0.7618816290178932, "grad_norm": 32.94668197631836, "learning_rate": 2.383419689119171e-06, "loss": 0.7482, "mean_token_accuracy": 0.90427565574646, "num_tokens": 8429791.0, "step": 4705 }, { "epoch": 0.7620435592259736, "grad_norm": 38.35142517089844, "learning_rate": 2.3818005181347153e-06, "loss": 0.7828, "mean_token_accuracy": 0.9066639840602875, "num_tokens": 8431602.0, "step": 4706 }, { "epoch": 0.762205489434054, "grad_norm": 26.179454803466797, "learning_rate": 2.3801813471502593e-06, "loss": 0.5638, "mean_token_accuracy": 0.9137202799320221, "num_tokens": 8433391.0, "step": 4707 }, { "epoch": 0.7623674196421343, "grad_norm": 23.248441696166992, "learning_rate": 2.3785621761658033e-06, "loss": 0.6551, "mean_token_accuracy": 0.9233946204185486, "num_tokens": 8435177.0, "step": 4708 }, { "epoch": 0.7625293498502146, "grad_norm": 29.711692810058594, "learning_rate": 2.3769430051813473e-06, "loss": 0.5935, "mean_token_accuracy": 0.9165835082530975, "num_tokens": 8436965.0, "step": 4709 }, { "epoch": 0.7626912800582949, "grad_norm": 30.165103912353516, "learning_rate": 2.3753238341968913e-06, "loss": 0.6157, "mean_token_accuracy": 0.9077596664428711, "num_tokens": 8438759.0, "step": 4710 }, { "epoch": 0.7628532102663752, "grad_norm": 30.472110748291016, "learning_rate": 2.3737046632124353e-06, "loss": 0.6836, "mean_token_accuracy": 0.9259753525257111, "num_tokens": 8440553.0, "step": 4711 }, { "epoch": 0.7630151404744555, "grad_norm": 18.754375457763672, "learning_rate": 2.3720854922279798e-06, "loss": 0.5275, "mean_token_accuracy": 0.9336056709289551, "num_tokens": 8442336.0, "step": 4712 }, { "epoch": 0.7631770706825358, "grad_norm": 26.887163162231445, "learning_rate": 2.370466321243524e-06, "loss": 0.6045, "mean_token_accuracy": 0.9098075330257416, "num_tokens": 8444128.0, "step": 4713 }, { "epoch": 0.7633390008906161, "grad_norm": 37.439884185791016, "learning_rate": 2.368847150259068e-06, "loss": 0.5815, "mean_token_accuracy": 0.9199119508266449, "num_tokens": 8445926.0, "step": 4714 }, { "epoch": 0.7635009310986964, "grad_norm": 16.836505889892578, "learning_rate": 2.367227979274612e-06, "loss": 0.4705, "mean_token_accuracy": 0.9361188113689423, "num_tokens": 8447720.0, "step": 4715 }, { "epoch": 0.7636628613067767, "grad_norm": 28.99469566345215, "learning_rate": 2.365608808290156e-06, "loss": 0.6049, "mean_token_accuracy": 0.9113828539848328, "num_tokens": 8449514.0, "step": 4716 }, { "epoch": 0.763824791514857, "grad_norm": 24.486297607421875, "learning_rate": 2.3639896373057e-06, "loss": 0.5868, "mean_token_accuracy": 0.9205766022205353, "num_tokens": 8451303.0, "step": 4717 }, { "epoch": 0.7639867217229375, "grad_norm": 20.67795181274414, "learning_rate": 2.362370466321244e-06, "loss": 0.4954, "mean_token_accuracy": 0.9270983338356018, "num_tokens": 8453089.0, "step": 4718 }, { "epoch": 0.7641486519310178, "grad_norm": 23.633642196655273, "learning_rate": 2.360751295336788e-06, "loss": 0.5837, "mean_token_accuracy": 0.9193262457847595, "num_tokens": 8454874.0, "step": 4719 }, { "epoch": 0.7643105821390981, "grad_norm": 37.5645866394043, "learning_rate": 2.359132124352332e-06, "loss": 0.6867, "mean_token_accuracy": 0.901867538690567, "num_tokens": 8456671.0, "step": 4720 }, { "epoch": 0.7644725123471784, "grad_norm": 32.31490707397461, "learning_rate": 2.357512953367876e-06, "loss": 0.6844, "mean_token_accuracy": 0.9169971942901611, "num_tokens": 8458460.0, "step": 4721 }, { "epoch": 0.7646344425552587, "grad_norm": 26.569786071777344, "learning_rate": 2.35589378238342e-06, "loss": 0.5869, "mean_token_accuracy": 0.9225463271141052, "num_tokens": 8460243.0, "step": 4722 }, { "epoch": 0.764796372763339, "grad_norm": 29.161724090576172, "learning_rate": 2.354274611398964e-06, "loss": 0.5473, "mean_token_accuracy": 0.9216987490653992, "num_tokens": 8462035.0, "step": 4723 }, { "epoch": 0.7649583029714193, "grad_norm": 28.16563606262207, "learning_rate": 2.352655440414508e-06, "loss": 0.5542, "mean_token_accuracy": 0.9241643846035004, "num_tokens": 8463823.0, "step": 4724 }, { "epoch": 0.7651202331794996, "grad_norm": 30.769689559936523, "learning_rate": 2.351036269430052e-06, "loss": 0.5844, "mean_token_accuracy": 0.9133403301239014, "num_tokens": 8465611.0, "step": 4725 }, { "epoch": 0.7652821633875799, "grad_norm": 33.77940368652344, "learning_rate": 2.349417098445596e-06, "loss": 0.576, "mean_token_accuracy": 0.9166666865348816, "num_tokens": 8467399.0, "step": 4726 }, { "epoch": 0.7654440935956602, "grad_norm": 26.95114517211914, "learning_rate": 2.34779792746114e-06, "loss": 0.572, "mean_token_accuracy": 0.9237219393253326, "num_tokens": 8469201.0, "step": 4727 }, { "epoch": 0.7656060238037405, "grad_norm": 35.32869338989258, "learning_rate": 2.346178756476684e-06, "loss": 0.6778, "mean_token_accuracy": 0.9130434989929199, "num_tokens": 8470989.0, "step": 4728 }, { "epoch": 0.7657679540118209, "grad_norm": 37.70273208618164, "learning_rate": 2.344559585492228e-06, "loss": 0.6973, "mean_token_accuracy": 0.9060872197151184, "num_tokens": 8472789.0, "step": 4729 }, { "epoch": 0.7659298842199013, "grad_norm": 17.869884490966797, "learning_rate": 2.342940414507772e-06, "loss": 0.5191, "mean_token_accuracy": 0.9311594367027283, "num_tokens": 8474577.0, "step": 4730 }, { "epoch": 0.7660918144279816, "grad_norm": 17.92411994934082, "learning_rate": 2.3413212435233164e-06, "loss": 0.5027, "mean_token_accuracy": 0.9236221313476562, "num_tokens": 8476364.0, "step": 4731 }, { "epoch": 0.7662537446360619, "grad_norm": 32.35110092163086, "learning_rate": 2.3397020725388605e-06, "loss": 0.6546, "mean_token_accuracy": 0.9107346832752228, "num_tokens": 8478166.0, "step": 4732 }, { "epoch": 0.7664156748441422, "grad_norm": 37.07913589477539, "learning_rate": 2.3380829015544045e-06, "loss": 0.7623, "mean_token_accuracy": 0.9038697779178619, "num_tokens": 8479969.0, "step": 4733 }, { "epoch": 0.7665776050522225, "grad_norm": 34.32615661621094, "learning_rate": 2.3364637305699485e-06, "loss": 0.6855, "mean_token_accuracy": 0.900503009557724, "num_tokens": 8481763.0, "step": 4734 }, { "epoch": 0.7667395352603028, "grad_norm": 10.937378883361816, "learning_rate": 2.3348445595854925e-06, "loss": 0.4368, "mean_token_accuracy": 0.9391196966171265, "num_tokens": 8483554.0, "step": 4735 }, { "epoch": 0.7669014654683831, "grad_norm": 36.966796875, "learning_rate": 2.3332253886010365e-06, "loss": 0.7853, "mean_token_accuracy": 0.9075387418270111, "num_tokens": 8485347.0, "step": 4736 }, { "epoch": 0.7670633956764634, "grad_norm": 24.56297492980957, "learning_rate": 2.3316062176165805e-06, "loss": 0.5964, "mean_token_accuracy": 0.9169186651706696, "num_tokens": 8487136.0, "step": 4737 }, { "epoch": 0.7672253258845437, "grad_norm": 31.4870662689209, "learning_rate": 2.3299870466321245e-06, "loss": 0.6979, "mean_token_accuracy": 0.9183647632598877, "num_tokens": 8488929.0, "step": 4738 }, { "epoch": 0.767387256092624, "grad_norm": 33.57414245605469, "learning_rate": 2.3283678756476686e-06, "loss": 0.6023, "mean_token_accuracy": 0.9137547016143799, "num_tokens": 8490719.0, "step": 4739 }, { "epoch": 0.7675491863007043, "grad_norm": 22.86031723022461, "learning_rate": 2.3267487046632126e-06, "loss": 0.5663, "mean_token_accuracy": 0.9298503696918488, "num_tokens": 8492517.0, "step": 4740 }, { "epoch": 0.7677111165087848, "grad_norm": 41.793975830078125, "learning_rate": 2.3251295336787566e-06, "loss": 0.7035, "mean_token_accuracy": 0.8914404511451721, "num_tokens": 8494315.0, "step": 4741 }, { "epoch": 0.7678730467168651, "grad_norm": 27.46380043029785, "learning_rate": 2.3235103626943006e-06, "loss": 0.5037, "mean_token_accuracy": 0.9282702505588531, "num_tokens": 8496106.0, "step": 4742 }, { "epoch": 0.7680349769249454, "grad_norm": 32.72002029418945, "learning_rate": 2.3218911917098446e-06, "loss": 0.6735, "mean_token_accuracy": 0.9085765480995178, "num_tokens": 8497902.0, "step": 4743 }, { "epoch": 0.7681969071330257, "grad_norm": 32.569854736328125, "learning_rate": 2.3202720207253886e-06, "loss": 0.703, "mean_token_accuracy": 0.9122023582458496, "num_tokens": 8499698.0, "step": 4744 }, { "epoch": 0.768358837341106, "grad_norm": 27.73603057861328, "learning_rate": 2.3186528497409326e-06, "loss": 0.6119, "mean_token_accuracy": 0.9216992855072021, "num_tokens": 8501491.0, "step": 4745 }, { "epoch": 0.7685207675491863, "grad_norm": 27.155590057373047, "learning_rate": 2.3170336787564766e-06, "loss": 0.6124, "mean_token_accuracy": 0.9231884181499481, "num_tokens": 8503276.0, "step": 4746 }, { "epoch": 0.7686826977572666, "grad_norm": 23.853473663330078, "learning_rate": 2.3154145077720207e-06, "loss": 0.5089, "mean_token_accuracy": 0.9225198328495026, "num_tokens": 8505072.0, "step": 4747 }, { "epoch": 0.7688446279653469, "grad_norm": 27.608078002929688, "learning_rate": 2.3137953367875647e-06, "loss": 0.5704, "mean_token_accuracy": 0.921171635389328, "num_tokens": 8506863.0, "step": 4748 }, { "epoch": 0.7690065581734272, "grad_norm": 27.092342376708984, "learning_rate": 2.3121761658031087e-06, "loss": 0.543, "mean_token_accuracy": 0.9326961934566498, "num_tokens": 8508657.0, "step": 4749 }, { "epoch": 0.7691684883815075, "grad_norm": 42.29644775390625, "learning_rate": 2.310556994818653e-06, "loss": 0.7802, "mean_token_accuracy": 0.8955715000629425, "num_tokens": 8510455.0, "step": 4750 }, { "epoch": 0.7693304185895878, "grad_norm": 22.910228729248047, "learning_rate": 2.308937823834197e-06, "loss": 0.5827, "mean_token_accuracy": 0.9312728047370911, "num_tokens": 8512244.0, "step": 4751 }, { "epoch": 0.7694923487976683, "grad_norm": 35.55131530761719, "learning_rate": 2.307318652849741e-06, "loss": 0.7388, "mean_token_accuracy": 0.9022064507007599, "num_tokens": 8514032.0, "step": 4752 }, { "epoch": 0.7696542790057486, "grad_norm": 24.13022804260254, "learning_rate": 2.305699481865285e-06, "loss": 0.5343, "mean_token_accuracy": 0.9214245676994324, "num_tokens": 8515824.0, "step": 4753 }, { "epoch": 0.7698162092138289, "grad_norm": 34.95334243774414, "learning_rate": 2.304080310880829e-06, "loss": 0.6495, "mean_token_accuracy": 0.9150264263153076, "num_tokens": 8517618.0, "step": 4754 }, { "epoch": 0.7699781394219092, "grad_norm": 22.432109832763672, "learning_rate": 2.302461139896373e-06, "loss": 0.5564, "mean_token_accuracy": 0.9173611104488373, "num_tokens": 8519409.0, "step": 4755 }, { "epoch": 0.7701400696299895, "grad_norm": 21.526447296142578, "learning_rate": 2.3008419689119176e-06, "loss": 0.5283, "mean_token_accuracy": 0.9362995326519012, "num_tokens": 8521204.0, "step": 4756 }, { "epoch": 0.7703019998380698, "grad_norm": 20.997190475463867, "learning_rate": 2.2992227979274616e-06, "loss": 0.5105, "mean_token_accuracy": 0.9280426800251007, "num_tokens": 8522994.0, "step": 4757 }, { "epoch": 0.7704639300461501, "grad_norm": 16.79971694946289, "learning_rate": 2.2976036269430056e-06, "loss": 0.5558, "mean_token_accuracy": 0.9201631844043732, "num_tokens": 8524781.0, "step": 4758 }, { "epoch": 0.7706258602542304, "grad_norm": 26.214557647705078, "learning_rate": 2.2959844559585497e-06, "loss": 0.5035, "mean_token_accuracy": 0.9293757081031799, "num_tokens": 8526561.0, "step": 4759 }, { "epoch": 0.7707877904623107, "grad_norm": 26.027692794799805, "learning_rate": 2.2943652849740937e-06, "loss": 0.5559, "mean_token_accuracy": 0.9179593324661255, "num_tokens": 8528355.0, "step": 4760 }, { "epoch": 0.770949720670391, "grad_norm": 35.00592803955078, "learning_rate": 2.2927461139896377e-06, "loss": 0.7167, "mean_token_accuracy": 0.9098277688026428, "num_tokens": 8530155.0, "step": 4761 }, { "epoch": 0.7711116508784713, "grad_norm": 24.112192153930664, "learning_rate": 2.2911269430051817e-06, "loss": 0.6061, "mean_token_accuracy": 0.9177893698215485, "num_tokens": 8531947.0, "step": 4762 }, { "epoch": 0.7712735810865518, "grad_norm": 31.94225311279297, "learning_rate": 2.2895077720207257e-06, "loss": 0.7386, "mean_token_accuracy": 0.9119047820568085, "num_tokens": 8533743.0, "step": 4763 }, { "epoch": 0.7714355112946321, "grad_norm": 27.06142234802246, "learning_rate": 2.2878886010362697e-06, "loss": 0.5135, "mean_token_accuracy": 0.9188838601112366, "num_tokens": 8535527.0, "step": 4764 }, { "epoch": 0.7715974415027124, "grad_norm": 30.965436935424805, "learning_rate": 2.2862694300518137e-06, "loss": 0.5861, "mean_token_accuracy": 0.9149631559848785, "num_tokens": 8537333.0, "step": 4765 }, { "epoch": 0.7717593717107927, "grad_norm": 35.833438873291016, "learning_rate": 2.2846502590673578e-06, "loss": 0.8344, "mean_token_accuracy": 0.9075655937194824, "num_tokens": 8539126.0, "step": 4766 }, { "epoch": 0.771921301918873, "grad_norm": 23.763675689697266, "learning_rate": 2.2830310880829018e-06, "loss": 0.5567, "mean_token_accuracy": 0.9263465404510498, "num_tokens": 8540910.0, "step": 4767 }, { "epoch": 0.7720832321269533, "grad_norm": 30.188648223876953, "learning_rate": 2.2814119170984458e-06, "loss": 0.6418, "mean_token_accuracy": 0.9117647111415863, "num_tokens": 8542694.0, "step": 4768 }, { "epoch": 0.7722451623350336, "grad_norm": 17.313716888427734, "learning_rate": 2.27979274611399e-06, "loss": 0.4787, "mean_token_accuracy": 0.9344349503517151, "num_tokens": 8544480.0, "step": 4769 }, { "epoch": 0.7724070925431139, "grad_norm": 19.52690315246582, "learning_rate": 2.278173575129534e-06, "loss": 0.4625, "mean_token_accuracy": 0.9340715110301971, "num_tokens": 8546280.0, "step": 4770 }, { "epoch": 0.7725690227511942, "grad_norm": 34.0203742980957, "learning_rate": 2.276554404145078e-06, "loss": 0.6652, "mean_token_accuracy": 0.9107851386070251, "num_tokens": 8548083.0, "step": 4771 }, { "epoch": 0.7727309529592745, "grad_norm": 28.469274520874023, "learning_rate": 2.274935233160622e-06, "loss": 0.5651, "mean_token_accuracy": 0.9120330810546875, "num_tokens": 8549868.0, "step": 4772 }, { "epoch": 0.7728928831673548, "grad_norm": 31.10840606689453, "learning_rate": 2.273316062176166e-06, "loss": 0.9002, "mean_token_accuracy": 0.9148764908313751, "num_tokens": 8551662.0, "step": 4773 }, { "epoch": 0.7730548133754351, "grad_norm": 36.347286224365234, "learning_rate": 2.27169689119171e-06, "loss": 0.667, "mean_token_accuracy": 0.8953522741794586, "num_tokens": 8553460.0, "step": 4774 }, { "epoch": 0.7732167435835156, "grad_norm": 31.08747100830078, "learning_rate": 2.2700777202072543e-06, "loss": 0.5825, "mean_token_accuracy": 0.9216834008693695, "num_tokens": 8555253.0, "step": 4775 }, { "epoch": 0.7733786737915959, "grad_norm": 30.034589767456055, "learning_rate": 2.2684585492227983e-06, "loss": 0.5449, "mean_token_accuracy": 0.9203781485557556, "num_tokens": 8557041.0, "step": 4776 }, { "epoch": 0.7735406039996762, "grad_norm": 28.906774520874023, "learning_rate": 2.2668393782383423e-06, "loss": 0.7477, "mean_token_accuracy": 0.9107142984867096, "num_tokens": 8558833.0, "step": 4777 }, { "epoch": 0.7737025342077565, "grad_norm": 29.198413848876953, "learning_rate": 2.2652202072538863e-06, "loss": 0.6145, "mean_token_accuracy": 0.9191037714481354, "num_tokens": 8560633.0, "step": 4778 }, { "epoch": 0.7738644644158368, "grad_norm": 29.853670120239258, "learning_rate": 2.2636010362694303e-06, "loss": 0.5517, "mean_token_accuracy": 0.9243197441101074, "num_tokens": 8562424.0, "step": 4779 }, { "epoch": 0.7740263946239171, "grad_norm": 37.71849060058594, "learning_rate": 2.2619818652849744e-06, "loss": 0.6316, "mean_token_accuracy": 0.9079670310020447, "num_tokens": 8564219.0, "step": 4780 }, { "epoch": 0.7741883248319974, "grad_norm": 29.527971267700195, "learning_rate": 2.2603626943005184e-06, "loss": 0.6106, "mean_token_accuracy": 0.9216992855072021, "num_tokens": 8566012.0, "step": 4781 }, { "epoch": 0.7743502550400777, "grad_norm": 21.974746704101562, "learning_rate": 2.2587435233160624e-06, "loss": 0.4855, "mean_token_accuracy": 0.9302884638309479, "num_tokens": 8567811.0, "step": 4782 }, { "epoch": 0.774512185248158, "grad_norm": 32.31019973754883, "learning_rate": 2.2571243523316064e-06, "loss": 0.6468, "mean_token_accuracy": 0.908225953578949, "num_tokens": 8569606.0, "step": 4783 }, { "epoch": 0.7746741154562383, "grad_norm": 29.321130752563477, "learning_rate": 2.2555051813471504e-06, "loss": 0.4931, "mean_token_accuracy": 0.922252744436264, "num_tokens": 8571401.0, "step": 4784 }, { "epoch": 0.7748360456643186, "grad_norm": 28.01386070251465, "learning_rate": 2.2538860103626944e-06, "loss": 0.5188, "mean_token_accuracy": 0.9248905181884766, "num_tokens": 8573206.0, "step": 4785 }, { "epoch": 0.774997975872399, "grad_norm": 27.028675079345703, "learning_rate": 2.2522668393782384e-06, "loss": 0.5818, "mean_token_accuracy": 0.9160839319229126, "num_tokens": 8575004.0, "step": 4786 }, { "epoch": 0.7751599060804794, "grad_norm": 27.31130027770996, "learning_rate": 2.2506476683937825e-06, "loss": 0.5209, "mean_token_accuracy": 0.9285386204719543, "num_tokens": 8576796.0, "step": 4787 }, { "epoch": 0.7753218362885597, "grad_norm": 22.87717056274414, "learning_rate": 2.2490284974093265e-06, "loss": 0.5309, "mean_token_accuracy": 0.9212121367454529, "num_tokens": 8578586.0, "step": 4788 }, { "epoch": 0.77548376649664, "grad_norm": 31.363008499145508, "learning_rate": 2.2474093264248705e-06, "loss": 0.5188, "mean_token_accuracy": 0.9259920716285706, "num_tokens": 8580382.0, "step": 4789 }, { "epoch": 0.7756456967047203, "grad_norm": 25.953182220458984, "learning_rate": 2.2457901554404145e-06, "loss": 0.5333, "mean_token_accuracy": 0.9233365952968597, "num_tokens": 8582180.0, "step": 4790 }, { "epoch": 0.7758076269128006, "grad_norm": 30.845632553100586, "learning_rate": 2.2441709844559585e-06, "loss": 0.6634, "mean_token_accuracy": 0.9121931195259094, "num_tokens": 8583963.0, "step": 4791 }, { "epoch": 0.7759695571208809, "grad_norm": 25.139423370361328, "learning_rate": 2.2425518134715025e-06, "loss": 0.5743, "mean_token_accuracy": 0.916979968547821, "num_tokens": 8585740.0, "step": 4792 }, { "epoch": 0.7761314873289612, "grad_norm": 33.40688705444336, "learning_rate": 2.2409326424870465e-06, "loss": 0.6303, "mean_token_accuracy": 0.9129201769828796, "num_tokens": 8587528.0, "step": 4793 }, { "epoch": 0.7762934175370415, "grad_norm": 27.23075294494629, "learning_rate": 2.239313471502591e-06, "loss": 0.5374, "mean_token_accuracy": 0.922448992729187, "num_tokens": 8589322.0, "step": 4794 }, { "epoch": 0.7764553477451218, "grad_norm": 37.810176849365234, "learning_rate": 2.237694300518135e-06, "loss": 0.693, "mean_token_accuracy": 0.9033302664756775, "num_tokens": 8591113.0, "step": 4795 }, { "epoch": 0.7766172779532021, "grad_norm": 34.69388198852539, "learning_rate": 2.236075129533679e-06, "loss": 0.6295, "mean_token_accuracy": 0.9183673560619354, "num_tokens": 8592919.0, "step": 4796 }, { "epoch": 0.7767792081612825, "grad_norm": 36.328636169433594, "learning_rate": 2.234455958549223e-06, "loss": 0.6683, "mean_token_accuracy": 0.920410692691803, "num_tokens": 8594721.0, "step": 4797 }, { "epoch": 0.7769411383693628, "grad_norm": 21.276636123657227, "learning_rate": 2.232836787564767e-06, "loss": 0.5086, "mean_token_accuracy": 0.9275849461555481, "num_tokens": 8596509.0, "step": 4798 }, { "epoch": 0.7771030685774432, "grad_norm": 23.949460983276367, "learning_rate": 2.231217616580311e-06, "loss": 0.5294, "mean_token_accuracy": 0.9252279698848724, "num_tokens": 8598302.0, "step": 4799 }, { "epoch": 0.7772649987855235, "grad_norm": 20.94979476928711, "learning_rate": 2.229598445595855e-06, "loss": 0.5115, "mean_token_accuracy": 0.9183273017406464, "num_tokens": 8600096.0, "step": 4800 }, { "epoch": 0.7774269289936038, "grad_norm": 35.0922966003418, "learning_rate": 2.227979274611399e-06, "loss": 0.7074, "mean_token_accuracy": 0.9007898569107056, "num_tokens": 8601890.0, "step": 4801 }, { "epoch": 0.7775888592016841, "grad_norm": 33.468990325927734, "learning_rate": 2.2263601036269435e-06, "loss": 0.6852, "mean_token_accuracy": 0.9107142984867096, "num_tokens": 8603682.0, "step": 4802 }, { "epoch": 0.7777507894097644, "grad_norm": 35.45664596557617, "learning_rate": 2.2247409326424875e-06, "loss": 0.5794, "mean_token_accuracy": 0.9195031523704529, "num_tokens": 8605480.0, "step": 4803 }, { "epoch": 0.7779127196178447, "grad_norm": 24.298919677734375, "learning_rate": 2.2231217616580315e-06, "loss": 0.4853, "mean_token_accuracy": 0.9256495237350464, "num_tokens": 8607261.0, "step": 4804 }, { "epoch": 0.778074649825925, "grad_norm": 25.70359230041504, "learning_rate": 2.2215025906735755e-06, "loss": 0.4897, "mean_token_accuracy": 0.9264666140079498, "num_tokens": 8609045.0, "step": 4805 }, { "epoch": 0.7782365800340053, "grad_norm": 19.799482345581055, "learning_rate": 2.2198834196891195e-06, "loss": 0.4681, "mean_token_accuracy": 0.9280538260936737, "num_tokens": 8610835.0, "step": 4806 }, { "epoch": 0.7783985102420856, "grad_norm": 32.9798583984375, "learning_rate": 2.2182642487046636e-06, "loss": 0.5757, "mean_token_accuracy": 0.9200254082679749, "num_tokens": 8612622.0, "step": 4807 }, { "epoch": 0.7785604404501659, "grad_norm": 25.638708114624023, "learning_rate": 2.2166450777202076e-06, "loss": 0.511, "mean_token_accuracy": 0.9263465404510498, "num_tokens": 8614406.0, "step": 4808 }, { "epoch": 0.7787223706582463, "grad_norm": 38.800079345703125, "learning_rate": 2.2150259067357516e-06, "loss": 0.7466, "mean_token_accuracy": 0.8971631228923798, "num_tokens": 8616200.0, "step": 4809 }, { "epoch": 0.7788843008663267, "grad_norm": 32.94105911254883, "learning_rate": 2.2134067357512956e-06, "loss": 0.649, "mean_token_accuracy": 0.9181869029998779, "num_tokens": 8617993.0, "step": 4810 }, { "epoch": 0.779046231074407, "grad_norm": 27.18923568725586, "learning_rate": 2.2117875647668396e-06, "loss": 0.5818, "mean_token_accuracy": 0.9149899184703827, "num_tokens": 8619787.0, "step": 4811 }, { "epoch": 0.7792081612824873, "grad_norm": 39.83348083496094, "learning_rate": 2.2101683937823836e-06, "loss": 0.7021, "mean_token_accuracy": 0.9142982661724091, "num_tokens": 8621601.0, "step": 4812 }, { "epoch": 0.7793700914905676, "grad_norm": 28.60740852355957, "learning_rate": 2.2085492227979276e-06, "loss": 0.5385, "mean_token_accuracy": 0.9256678521633148, "num_tokens": 8623395.0, "step": 4813 }, { "epoch": 0.7795320216986479, "grad_norm": 35.63589096069336, "learning_rate": 2.2069300518134717e-06, "loss": 0.5378, "mean_token_accuracy": 0.9226918518543243, "num_tokens": 8625192.0, "step": 4814 }, { "epoch": 0.7796939519067282, "grad_norm": 30.61024284362793, "learning_rate": 2.2053108808290157e-06, "loss": 0.5195, "mean_token_accuracy": 0.930644690990448, "num_tokens": 8626993.0, "step": 4815 }, { "epoch": 0.7798558821148085, "grad_norm": 31.67363929748535, "learning_rate": 2.2036917098445597e-06, "loss": 0.5714, "mean_token_accuracy": 0.9154835939407349, "num_tokens": 8628790.0, "step": 4816 }, { "epoch": 0.7800178123228888, "grad_norm": 31.31595802307129, "learning_rate": 2.2020725388601037e-06, "loss": 0.5373, "mean_token_accuracy": 0.9266505837440491, "num_tokens": 8630575.0, "step": 4817 }, { "epoch": 0.7801797425309691, "grad_norm": 23.93700408935547, "learning_rate": 2.2004533678756477e-06, "loss": 0.5084, "mean_token_accuracy": 0.9220321774482727, "num_tokens": 8632369.0, "step": 4818 }, { "epoch": 0.7803416727390494, "grad_norm": 12.18370246887207, "learning_rate": 2.198834196891192e-06, "loss": 0.4513, "mean_token_accuracy": 0.9328877627849579, "num_tokens": 8634149.0, "step": 4819 }, { "epoch": 0.7805036029471298, "grad_norm": 41.27445983886719, "learning_rate": 2.197215025906736e-06, "loss": 0.7496, "mean_token_accuracy": 0.8943661749362946, "num_tokens": 8635945.0, "step": 4820 }, { "epoch": 0.7806655331552101, "grad_norm": 25.761497497558594, "learning_rate": 2.19559585492228e-06, "loss": 0.5149, "mean_token_accuracy": 0.9316956996917725, "num_tokens": 8637735.0, "step": 4821 }, { "epoch": 0.7808274633632905, "grad_norm": 21.085548400878906, "learning_rate": 2.193976683937824e-06, "loss": 0.4822, "mean_token_accuracy": 0.9350104331970215, "num_tokens": 8639524.0, "step": 4822 }, { "epoch": 0.7809893935713708, "grad_norm": 35.967491149902344, "learning_rate": 2.192357512953368e-06, "loss": 0.6627, "mean_token_accuracy": 0.9107352197170258, "num_tokens": 8641316.0, "step": 4823 }, { "epoch": 0.7811513237794511, "grad_norm": 29.158815383911133, "learning_rate": 2.190738341968912e-06, "loss": 0.5839, "mean_token_accuracy": 0.9137443602085114, "num_tokens": 8643107.0, "step": 4824 }, { "epoch": 0.7813132539875314, "grad_norm": 36.52337646484375, "learning_rate": 2.1891191709844562e-06, "loss": 0.6592, "mean_token_accuracy": 0.9040176272392273, "num_tokens": 8644900.0, "step": 4825 }, { "epoch": 0.7814751841956117, "grad_norm": 12.187175750732422, "learning_rate": 2.1875000000000002e-06, "loss": 0.4521, "mean_token_accuracy": 0.9343030750751495, "num_tokens": 8646686.0, "step": 4826 }, { "epoch": 0.781637114403692, "grad_norm": 33.27248764038086, "learning_rate": 2.1858808290155442e-06, "loss": 0.6412, "mean_token_accuracy": 0.9012077450752258, "num_tokens": 8648471.0, "step": 4827 }, { "epoch": 0.7817990446117723, "grad_norm": 28.864276885986328, "learning_rate": 2.1842616580310883e-06, "loss": 0.6673, "mean_token_accuracy": 0.9133041501045227, "num_tokens": 8650260.0, "step": 4828 }, { "epoch": 0.7819609748198526, "grad_norm": 26.825626373291016, "learning_rate": 2.1826424870466323e-06, "loss": 0.4909, "mean_token_accuracy": 0.9299019575119019, "num_tokens": 8652043.0, "step": 4829 }, { "epoch": 0.7821229050279329, "grad_norm": 20.098388671875, "learning_rate": 2.1810233160621763e-06, "loss": 0.6166, "mean_token_accuracy": 0.9106098711490631, "num_tokens": 8653823.0, "step": 4830 }, { "epoch": 0.7822848352360133, "grad_norm": 25.42427635192871, "learning_rate": 2.1794041450777203e-06, "loss": 0.5618, "mean_token_accuracy": 0.9236750304698944, "num_tokens": 8655610.0, "step": 4831 }, { "epoch": 0.7824467654440936, "grad_norm": 32.184326171875, "learning_rate": 2.1777849740932643e-06, "loss": 0.5795, "mean_token_accuracy": 0.91847363114357, "num_tokens": 8657404.0, "step": 4832 }, { "epoch": 0.782608695652174, "grad_norm": 17.487751007080078, "learning_rate": 2.1761658031088083e-06, "loss": 0.4633, "mean_token_accuracy": 0.9345445930957794, "num_tokens": 8659191.0, "step": 4833 }, { "epoch": 0.7827706258602543, "grad_norm": 26.082229614257812, "learning_rate": 2.1745466321243523e-06, "loss": 0.5232, "mean_token_accuracy": 0.9234008491039276, "num_tokens": 8660977.0, "step": 4834 }, { "epoch": 0.7829325560683346, "grad_norm": 38.65194320678711, "learning_rate": 2.1729274611398963e-06, "loss": 0.7551, "mean_token_accuracy": 0.9030129015445709, "num_tokens": 8662767.0, "step": 4835 }, { "epoch": 0.7830944862764149, "grad_norm": 20.90742301940918, "learning_rate": 2.1713082901554404e-06, "loss": 0.5081, "mean_token_accuracy": 0.9310612082481384, "num_tokens": 8664553.0, "step": 4836 }, { "epoch": 0.7832564164844952, "grad_norm": 31.811559677124023, "learning_rate": 2.1696891191709844e-06, "loss": 0.572, "mean_token_accuracy": 0.9241343140602112, "num_tokens": 8666355.0, "step": 4837 }, { "epoch": 0.7834183466925755, "grad_norm": 36.27912521362305, "learning_rate": 2.168069948186529e-06, "loss": 0.6736, "mean_token_accuracy": 0.9035409688949585, "num_tokens": 8668147.0, "step": 4838 }, { "epoch": 0.7835802769006558, "grad_norm": 17.592748641967773, "learning_rate": 2.166450777202073e-06, "loss": 0.4482, "mean_token_accuracy": 0.9352216124534607, "num_tokens": 8669937.0, "step": 4839 }, { "epoch": 0.7837422071087361, "grad_norm": 40.53508758544922, "learning_rate": 2.164831606217617e-06, "loss": 0.7288, "mean_token_accuracy": 0.9002532958984375, "num_tokens": 8671730.0, "step": 4840 }, { "epoch": 0.7839041373168164, "grad_norm": 33.6708984375, "learning_rate": 2.163212435233161e-06, "loss": 0.6062, "mean_token_accuracy": 0.9094492495059967, "num_tokens": 8673518.0, "step": 4841 }, { "epoch": 0.7840660675248967, "grad_norm": 32.10221862792969, "learning_rate": 2.161593264248705e-06, "loss": 0.6511, "mean_token_accuracy": 0.905809611082077, "num_tokens": 8675315.0, "step": 4842 }, { "epoch": 0.7842279977329771, "grad_norm": 45.56889343261719, "learning_rate": 2.159974093264249e-06, "loss": 0.7006, "mean_token_accuracy": 0.891838401556015, "num_tokens": 8677122.0, "step": 4843 }, { "epoch": 0.7843899279410574, "grad_norm": 44.523189544677734, "learning_rate": 2.158354922279793e-06, "loss": 0.7301, "mean_token_accuracy": 0.899463564157486, "num_tokens": 8678923.0, "step": 4844 }, { "epoch": 0.7845518581491377, "grad_norm": 43.56492614746094, "learning_rate": 2.156735751295337e-06, "loss": 0.6797, "mean_token_accuracy": 0.9078092277050018, "num_tokens": 8680718.0, "step": 4845 }, { "epoch": 0.7847137883572181, "grad_norm": 36.994571685791016, "learning_rate": 2.155116580310881e-06, "loss": 0.6896, "mean_token_accuracy": 0.9053634703159332, "num_tokens": 8682515.0, "step": 4846 }, { "epoch": 0.7848757185652984, "grad_norm": 19.518110275268555, "learning_rate": 2.153497409326425e-06, "loss": 0.453, "mean_token_accuracy": 0.9306555390357971, "num_tokens": 8684316.0, "step": 4847 }, { "epoch": 0.7850376487733787, "grad_norm": 32.8856201171875, "learning_rate": 2.151878238341969e-06, "loss": 0.5845, "mean_token_accuracy": 0.9245336949825287, "num_tokens": 8686118.0, "step": 4848 }, { "epoch": 0.785199578981459, "grad_norm": 31.418115615844727, "learning_rate": 2.150259067357513e-06, "loss": 0.5939, "mean_token_accuracy": 0.907657653093338, "num_tokens": 8687922.0, "step": 4849 }, { "epoch": 0.7853615091895393, "grad_norm": 27.000045776367188, "learning_rate": 2.148639896373057e-06, "loss": 0.5408, "mean_token_accuracy": 0.9189908504486084, "num_tokens": 8689719.0, "step": 4850 }, { "epoch": 0.7855234393976196, "grad_norm": 30.59200096130371, "learning_rate": 2.1470207253886014e-06, "loss": 0.5563, "mean_token_accuracy": 0.9228060841560364, "num_tokens": 8691516.0, "step": 4851 }, { "epoch": 0.7856853696056999, "grad_norm": 37.32300567626953, "learning_rate": 2.1454015544041454e-06, "loss": 0.7695, "mean_token_accuracy": 0.9141661822795868, "num_tokens": 8693308.0, "step": 4852 }, { "epoch": 0.7858472998137802, "grad_norm": 39.51362991333008, "learning_rate": 2.1437823834196894e-06, "loss": 0.6857, "mean_token_accuracy": 0.8935782313346863, "num_tokens": 8695103.0, "step": 4853 }, { "epoch": 0.7860092300218606, "grad_norm": 35.616764068603516, "learning_rate": 2.1421632124352334e-06, "loss": 0.6938, "mean_token_accuracy": 0.917391300201416, "num_tokens": 8696893.0, "step": 4854 }, { "epoch": 0.7861711602299409, "grad_norm": 35.1362419128418, "learning_rate": 2.1405440414507775e-06, "loss": 0.6862, "mean_token_accuracy": 0.902877688407898, "num_tokens": 8698683.0, "step": 4855 }, { "epoch": 0.7863330904380212, "grad_norm": 25.9366512298584, "learning_rate": 2.1389248704663215e-06, "loss": 0.6074, "mean_token_accuracy": 0.9285386204719543, "num_tokens": 8700475.0, "step": 4856 }, { "epoch": 0.7864950206461016, "grad_norm": 29.31695556640625, "learning_rate": 2.1373056994818655e-06, "loss": 0.582, "mean_token_accuracy": 0.922835499048233, "num_tokens": 8702259.0, "step": 4857 }, { "epoch": 0.7866569508541819, "grad_norm": 25.394182205200195, "learning_rate": 2.1356865284974095e-06, "loss": 0.5487, "mean_token_accuracy": 0.9307131469249725, "num_tokens": 8704059.0, "step": 4858 }, { "epoch": 0.7868188810622622, "grad_norm": 33.61517333984375, "learning_rate": 2.1340673575129535e-06, "loss": 0.5776, "mean_token_accuracy": 0.9166666865348816, "num_tokens": 8705847.0, "step": 4859 }, { "epoch": 0.7869808112703425, "grad_norm": 35.47023010253906, "learning_rate": 2.1324481865284975e-06, "loss": 0.6962, "mean_token_accuracy": 0.9097758233547211, "num_tokens": 8707636.0, "step": 4860 }, { "epoch": 0.7871427414784228, "grad_norm": 37.46030044555664, "learning_rate": 2.1308290155440415e-06, "loss": 0.6382, "mean_token_accuracy": 0.9103462398052216, "num_tokens": 8709429.0, "step": 4861 }, { "epoch": 0.7873046716865031, "grad_norm": 30.227519989013672, "learning_rate": 2.1292098445595855e-06, "loss": 0.6183, "mean_token_accuracy": 0.9176872968673706, "num_tokens": 8711221.0, "step": 4862 }, { "epoch": 0.7874666018945834, "grad_norm": 33.537479400634766, "learning_rate": 2.1275906735751296e-06, "loss": 0.6134, "mean_token_accuracy": 0.9075141549110413, "num_tokens": 8713012.0, "step": 4863 }, { "epoch": 0.7876285321026637, "grad_norm": 33.87030792236328, "learning_rate": 2.125971502590674e-06, "loss": 0.6378, "mean_token_accuracy": 0.9187562465667725, "num_tokens": 8714807.0, "step": 4864 }, { "epoch": 0.7877904623107441, "grad_norm": 35.78220748901367, "learning_rate": 2.124352331606218e-06, "loss": 0.5846, "mean_token_accuracy": 0.9263144731521606, "num_tokens": 8716605.0, "step": 4865 }, { "epoch": 0.7879523925188244, "grad_norm": 19.19643211364746, "learning_rate": 2.122733160621762e-06, "loss": 0.4674, "mean_token_accuracy": 0.9324290156364441, "num_tokens": 8718398.0, "step": 4866 }, { "epoch": 0.7881143227269047, "grad_norm": 32.678890228271484, "learning_rate": 2.121113989637306e-06, "loss": 0.6302, "mean_token_accuracy": 0.9119426608085632, "num_tokens": 8720194.0, "step": 4867 }, { "epoch": 0.788276252934985, "grad_norm": 31.408491134643555, "learning_rate": 2.11949481865285e-06, "loss": 0.6147, "mean_token_accuracy": 0.9159872233867645, "num_tokens": 8721980.0, "step": 4868 }, { "epoch": 0.7884381831430654, "grad_norm": 35.651039123535156, "learning_rate": 2.117875647668394e-06, "loss": 0.5737, "mean_token_accuracy": 0.9194001257419586, "num_tokens": 8723777.0, "step": 4869 }, { "epoch": 0.7886001133511457, "grad_norm": 24.259984970092773, "learning_rate": 2.116256476683938e-06, "loss": 0.5442, "mean_token_accuracy": 0.930820107460022, "num_tokens": 8725564.0, "step": 4870 }, { "epoch": 0.788762043559226, "grad_norm": 31.06159782409668, "learning_rate": 2.114637305699482e-06, "loss": 0.6157, "mean_token_accuracy": 0.919669508934021, "num_tokens": 8727350.0, "step": 4871 }, { "epoch": 0.7889239737673063, "grad_norm": 23.367807388305664, "learning_rate": 2.113018134715026e-06, "loss": 0.501, "mean_token_accuracy": 0.9280426800251007, "num_tokens": 8729140.0, "step": 4872 }, { "epoch": 0.7890859039753866, "grad_norm": 22.92611312866211, "learning_rate": 2.11139896373057e-06, "loss": 0.4848, "mean_token_accuracy": 0.929380863904953, "num_tokens": 8730921.0, "step": 4873 }, { "epoch": 0.7892478341834669, "grad_norm": 37.05588912963867, "learning_rate": 2.109779792746114e-06, "loss": 0.7144, "mean_token_accuracy": 0.911843478679657, "num_tokens": 8732717.0, "step": 4874 }, { "epoch": 0.7894097643915472, "grad_norm": 31.46372413635254, "learning_rate": 2.108160621761658e-06, "loss": 0.5312, "mean_token_accuracy": 0.9193595945835114, "num_tokens": 8734515.0, "step": 4875 }, { "epoch": 0.7895716945996276, "grad_norm": 32.76239013671875, "learning_rate": 2.106541450777202e-06, "loss": 0.5833, "mean_token_accuracy": 0.9167143404483795, "num_tokens": 8736317.0, "step": 4876 }, { "epoch": 0.7897336248077079, "grad_norm": 40.44179153442383, "learning_rate": 2.104922279792746e-06, "loss": 0.6961, "mean_token_accuracy": 0.9103313684463501, "num_tokens": 8738116.0, "step": 4877 }, { "epoch": 0.7898955550157882, "grad_norm": 44.074798583984375, "learning_rate": 2.10330310880829e-06, "loss": 0.8018, "mean_token_accuracy": 0.9016563296318054, "num_tokens": 8739913.0, "step": 4878 }, { "epoch": 0.7900574852238685, "grad_norm": 24.902088165283203, "learning_rate": 2.101683937823834e-06, "loss": 0.4762, "mean_token_accuracy": 0.9304866194725037, "num_tokens": 8741712.0, "step": 4879 }, { "epoch": 0.7902194154319488, "grad_norm": 22.09727668762207, "learning_rate": 2.100064766839378e-06, "loss": 0.472, "mean_token_accuracy": 0.9330623745918274, "num_tokens": 8743493.0, "step": 4880 }, { "epoch": 0.7903813456400292, "grad_norm": 33.19005584716797, "learning_rate": 2.0984455958549222e-06, "loss": 0.584, "mean_token_accuracy": 0.9115451872348785, "num_tokens": 8745276.0, "step": 4881 }, { "epoch": 0.7905432758481095, "grad_norm": 33.45199203491211, "learning_rate": 2.0968264248704667e-06, "loss": 0.5732, "mean_token_accuracy": 0.9236669540405273, "num_tokens": 8747063.0, "step": 4882 }, { "epoch": 0.7907052060561898, "grad_norm": 38.517459869384766, "learning_rate": 2.0952072538860107e-06, "loss": 0.7652, "mean_token_accuracy": 0.9150349497795105, "num_tokens": 8748858.0, "step": 4883 }, { "epoch": 0.7908671362642701, "grad_norm": 31.903398513793945, "learning_rate": 2.0935880829015547e-06, "loss": 0.5562, "mean_token_accuracy": 0.918178141117096, "num_tokens": 8750651.0, "step": 4884 }, { "epoch": 0.7910290664723504, "grad_norm": 33.66367721557617, "learning_rate": 2.0919689119170987e-06, "loss": 0.5642, "mean_token_accuracy": 0.9136288166046143, "num_tokens": 8752441.0, "step": 4885 }, { "epoch": 0.7911909966804307, "grad_norm": 32.21567916870117, "learning_rate": 2.0903497409326427e-06, "loss": 0.6401, "mean_token_accuracy": 0.9151678681373596, "num_tokens": 8754236.0, "step": 4886 }, { "epoch": 0.791352926888511, "grad_norm": 37.72034454345703, "learning_rate": 2.0887305699481867e-06, "loss": 0.6633, "mean_token_accuracy": 0.9047702252864838, "num_tokens": 8756031.0, "step": 4887 }, { "epoch": 0.7915148570965914, "grad_norm": 31.206485748291016, "learning_rate": 2.0871113989637307e-06, "loss": 0.6677, "mean_token_accuracy": 0.9037570655345917, "num_tokens": 8757825.0, "step": 4888 }, { "epoch": 0.7916767873046717, "grad_norm": 32.79133224487305, "learning_rate": 2.0854922279792747e-06, "loss": 0.5992, "mean_token_accuracy": 0.9108372926712036, "num_tokens": 8759617.0, "step": 4889 }, { "epoch": 0.791838717512752, "grad_norm": 22.16862678527832, "learning_rate": 2.0838730569948188e-06, "loss": 0.4994, "mean_token_accuracy": 0.9345376491546631, "num_tokens": 8761404.0, "step": 4890 }, { "epoch": 0.7920006477208323, "grad_norm": 32.187461853027344, "learning_rate": 2.0822538860103628e-06, "loss": 0.6519, "mean_token_accuracy": 0.9046019315719604, "num_tokens": 8763190.0, "step": 4891 }, { "epoch": 0.7921625779289126, "grad_norm": 25.909048080444336, "learning_rate": 2.080634715025907e-06, "loss": 0.575, "mean_token_accuracy": 0.9232880473136902, "num_tokens": 8764976.0, "step": 4892 }, { "epoch": 0.792324508136993, "grad_norm": 25.27642822265625, "learning_rate": 2.079015544041451e-06, "loss": 0.7141, "mean_token_accuracy": 0.9211102426052094, "num_tokens": 8766755.0, "step": 4893 }, { "epoch": 0.7924864383450733, "grad_norm": 27.0764217376709, "learning_rate": 2.077396373056995e-06, "loss": 0.527, "mean_token_accuracy": 0.9177428483963013, "num_tokens": 8768547.0, "step": 4894 }, { "epoch": 0.7926483685531536, "grad_norm": 19.349010467529297, "learning_rate": 2.075777202072539e-06, "loss": 0.488, "mean_token_accuracy": 0.9340579807758331, "num_tokens": 8770332.0, "step": 4895 }, { "epoch": 0.7928102987612339, "grad_norm": 29.76388168334961, "learning_rate": 2.074158031088083e-06, "loss": 0.531, "mean_token_accuracy": 0.92323437333107, "num_tokens": 8772129.0, "step": 4896 }, { "epoch": 0.7929722289693142, "grad_norm": 23.53697395324707, "learning_rate": 2.072538860103627e-06, "loss": 0.6133, "mean_token_accuracy": 0.918645828962326, "num_tokens": 8773912.0, "step": 4897 }, { "epoch": 0.7931341591773945, "grad_norm": 34.235599517822266, "learning_rate": 2.070919689119171e-06, "loss": 0.6104, "mean_token_accuracy": 0.9162139892578125, "num_tokens": 8775710.0, "step": 4898 }, { "epoch": 0.7932960893854749, "grad_norm": 35.62824249267578, "learning_rate": 2.0693005181347153e-06, "loss": 0.5939, "mean_token_accuracy": 0.9228060841560364, "num_tokens": 8777507.0, "step": 4899 }, { "epoch": 0.7934580195935552, "grad_norm": 36.67351531982422, "learning_rate": 2.0676813471502593e-06, "loss": 0.6124, "mean_token_accuracy": 0.91355299949646, "num_tokens": 8779285.0, "step": 4900 }, { "epoch": 0.7936199498016355, "grad_norm": 27.98644256591797, "learning_rate": 2.0660621761658033e-06, "loss": 0.5486, "mean_token_accuracy": 0.9155879616737366, "num_tokens": 8781081.0, "step": 4901 }, { "epoch": 0.7937818800097158, "grad_norm": 28.42221450805664, "learning_rate": 2.0644430051813473e-06, "loss": 0.6263, "mean_token_accuracy": 0.9132690131664276, "num_tokens": 8782870.0, "step": 4902 }, { "epoch": 0.7939438102177961, "grad_norm": 38.13542175292969, "learning_rate": 2.0628238341968914e-06, "loss": 0.7332, "mean_token_accuracy": 0.9028213322162628, "num_tokens": 8784670.0, "step": 4903 }, { "epoch": 0.7941057404258764, "grad_norm": 35.347747802734375, "learning_rate": 2.0612046632124354e-06, "loss": 0.6464, "mean_token_accuracy": 0.9111111164093018, "num_tokens": 8786452.0, "step": 4904 }, { "epoch": 0.7942676706339568, "grad_norm": 34.25712203979492, "learning_rate": 2.0595854922279794e-06, "loss": 0.6613, "mean_token_accuracy": 0.9136646091938019, "num_tokens": 8788242.0, "step": 4905 }, { "epoch": 0.7944296008420371, "grad_norm": 26.22393226623535, "learning_rate": 2.0579663212435234e-06, "loss": 0.5495, "mean_token_accuracy": 0.9239244163036346, "num_tokens": 8790031.0, "step": 4906 }, { "epoch": 0.7945915310501174, "grad_norm": 27.886669158935547, "learning_rate": 2.0563471502590674e-06, "loss": 0.5987, "mean_token_accuracy": 0.9204118847846985, "num_tokens": 8791820.0, "step": 4907 }, { "epoch": 0.7947534612581977, "grad_norm": 31.319236755371094, "learning_rate": 2.054727979274612e-06, "loss": 0.6636, "mean_token_accuracy": 0.9242258369922638, "num_tokens": 8793609.0, "step": 4908 }, { "epoch": 0.794915391466278, "grad_norm": 33.08340835571289, "learning_rate": 2.053108808290156e-06, "loss": 0.5995, "mean_token_accuracy": 0.9230731725692749, "num_tokens": 8795407.0, "step": 4909 }, { "epoch": 0.7950773216743584, "grad_norm": 24.904603958129883, "learning_rate": 2.0514896373057e-06, "loss": 0.5243, "mean_token_accuracy": 0.9261710345745087, "num_tokens": 8797190.0, "step": 4910 }, { "epoch": 0.7952392518824387, "grad_norm": 25.797454833984375, "learning_rate": 2.049870466321244e-06, "loss": 0.5762, "mean_token_accuracy": 0.9210539758205414, "num_tokens": 8798980.0, "step": 4911 }, { "epoch": 0.795401182090519, "grad_norm": 35.37569808959961, "learning_rate": 2.048251295336788e-06, "loss": 0.6591, "mean_token_accuracy": 0.9074468016624451, "num_tokens": 8800783.0, "step": 4912 }, { "epoch": 0.7955631122985993, "grad_norm": 36.26353454589844, "learning_rate": 2.046632124352332e-06, "loss": 0.6785, "mean_token_accuracy": 0.9172413945198059, "num_tokens": 8802585.0, "step": 4913 }, { "epoch": 0.7957250425066796, "grad_norm": 33.37550354003906, "learning_rate": 2.045012953367876e-06, "loss": 0.5926, "mean_token_accuracy": 0.9199725091457367, "num_tokens": 8804372.0, "step": 4914 }, { "epoch": 0.7958869727147599, "grad_norm": 18.971160888671875, "learning_rate": 2.04339378238342e-06, "loss": 0.4584, "mean_token_accuracy": 0.9350087344646454, "num_tokens": 8806163.0, "step": 4915 }, { "epoch": 0.7960489029228403, "grad_norm": 16.852298736572266, "learning_rate": 2.041774611398964e-06, "loss": 0.4445, "mean_token_accuracy": 0.9350886344909668, "num_tokens": 8807952.0, "step": 4916 }, { "epoch": 0.7962108331309206, "grad_norm": 30.142396926879883, "learning_rate": 2.040155440414508e-06, "loss": 0.5566, "mean_token_accuracy": 0.9183647632598877, "num_tokens": 8809745.0, "step": 4917 }, { "epoch": 0.7963727633390009, "grad_norm": 30.790508270263672, "learning_rate": 2.038536269430052e-06, "loss": 0.5644, "mean_token_accuracy": 0.9131302535533905, "num_tokens": 8811533.0, "step": 4918 }, { "epoch": 0.7965346935470812, "grad_norm": 27.842195510864258, "learning_rate": 2.036917098445596e-06, "loss": 0.5435, "mean_token_accuracy": 0.9230769276618958, "num_tokens": 8813331.0, "step": 4919 }, { "epoch": 0.7966966237551615, "grad_norm": 29.73750114440918, "learning_rate": 2.03529792746114e-06, "loss": 0.582, "mean_token_accuracy": 0.920550525188446, "num_tokens": 8815120.0, "step": 4920 }, { "epoch": 0.7968585539632418, "grad_norm": 26.64364242553711, "learning_rate": 2.033678756476684e-06, "loss": 0.5489, "mean_token_accuracy": 0.9187643826007843, "num_tokens": 8816915.0, "step": 4921 }, { "epoch": 0.7970204841713222, "grad_norm": 31.05823516845703, "learning_rate": 2.032059585492228e-06, "loss": 0.7085, "mean_token_accuracy": 0.9124758243560791, "num_tokens": 8818700.0, "step": 4922 }, { "epoch": 0.7971824143794025, "grad_norm": 22.934885025024414, "learning_rate": 2.030440414507772e-06, "loss": 0.5253, "mean_token_accuracy": 0.9160583913326263, "num_tokens": 8820486.0, "step": 4923 }, { "epoch": 0.7973443445874828, "grad_norm": 37.852237701416016, "learning_rate": 2.028821243523316e-06, "loss": 0.6053, "mean_token_accuracy": 0.9090909361839294, "num_tokens": 8822284.0, "step": 4924 }, { "epoch": 0.7975062747955631, "grad_norm": 31.20152473449707, "learning_rate": 2.02720207253886e-06, "loss": 0.6762, "mean_token_accuracy": 0.9095588028430939, "num_tokens": 8824072.0, "step": 4925 }, { "epoch": 0.7976682050036434, "grad_norm": 24.399518966674805, "learning_rate": 2.025582901554404e-06, "loss": 0.6049, "mean_token_accuracy": 0.9180602133274078, "num_tokens": 8825852.0, "step": 4926 }, { "epoch": 0.7978301352117237, "grad_norm": 27.379980087280273, "learning_rate": 2.0239637305699485e-06, "loss": 0.5648, "mean_token_accuracy": 0.9193868935108185, "num_tokens": 8827637.0, "step": 4927 }, { "epoch": 0.797992065419804, "grad_norm": 24.655641555786133, "learning_rate": 2.0223445595854925e-06, "loss": 0.4743, "mean_token_accuracy": 0.927152156829834, "num_tokens": 8829437.0, "step": 4928 }, { "epoch": 0.7981539956278844, "grad_norm": 33.263370513916016, "learning_rate": 2.0207253886010365e-06, "loss": 0.6643, "mean_token_accuracy": 0.9106077551841736, "num_tokens": 8831228.0, "step": 4929 }, { "epoch": 0.7983159258359647, "grad_norm": 34.86585235595703, "learning_rate": 2.0191062176165806e-06, "loss": 0.6132, "mean_token_accuracy": 0.9195588231086731, "num_tokens": 8833026.0, "step": 4930 }, { "epoch": 0.798477856044045, "grad_norm": 22.551219940185547, "learning_rate": 2.0174870466321246e-06, "loss": 0.561, "mean_token_accuracy": 0.9298475086688995, "num_tokens": 8834809.0, "step": 4931 }, { "epoch": 0.7986397862521253, "grad_norm": 30.917335510253906, "learning_rate": 2.0158678756476686e-06, "loss": 0.5721, "mean_token_accuracy": 0.9185907244682312, "num_tokens": 8836604.0, "step": 4932 }, { "epoch": 0.7988017164602057, "grad_norm": 28.22004508972168, "learning_rate": 2.0142487046632126e-06, "loss": 0.6895, "mean_token_accuracy": 0.9184397161006927, "num_tokens": 8838398.0, "step": 4933 }, { "epoch": 0.798963646668286, "grad_norm": 28.48418617248535, "learning_rate": 2.0126295336787566e-06, "loss": 0.6868, "mean_token_accuracy": 0.908056229352951, "num_tokens": 8840182.0, "step": 4934 }, { "epoch": 0.7991255768763663, "grad_norm": 38.426944732666016, "learning_rate": 2.0110103626943006e-06, "loss": 0.6018, "mean_token_accuracy": 0.9211459457874298, "num_tokens": 8841973.0, "step": 4935 }, { "epoch": 0.7992875070844466, "grad_norm": 26.526845932006836, "learning_rate": 2.0093911917098446e-06, "loss": 0.5545, "mean_token_accuracy": 0.9291968643665314, "num_tokens": 8843767.0, "step": 4936 }, { "epoch": 0.7994494372925269, "grad_norm": 28.1953182220459, "learning_rate": 2.0077720207253886e-06, "loss": 0.6856, "mean_token_accuracy": 0.9151570200920105, "num_tokens": 8845561.0, "step": 4937 }, { "epoch": 0.7996113675006072, "grad_norm": 27.99931526184082, "learning_rate": 2.0061528497409327e-06, "loss": 0.575, "mean_token_accuracy": 0.9201068580150604, "num_tokens": 8847360.0, "step": 4938 }, { "epoch": 0.7997732977086875, "grad_norm": 30.18328857421875, "learning_rate": 2.0045336787564767e-06, "loss": 0.6622, "mean_token_accuracy": 0.9166313707828522, "num_tokens": 8849147.0, "step": 4939 }, { "epoch": 0.7999352279167679, "grad_norm": 26.061443328857422, "learning_rate": 2.0029145077720207e-06, "loss": 0.5174, "mean_token_accuracy": 0.9259218573570251, "num_tokens": 8850929.0, "step": 4940 }, { "epoch": 0.8000971581248482, "grad_norm": 12.377771377563477, "learning_rate": 2.0012953367875647e-06, "loss": 0.4546, "mean_token_accuracy": 0.9335711896419525, "num_tokens": 8852712.0, "step": 4941 }, { "epoch": 0.8002590883329285, "grad_norm": 36.25849151611328, "learning_rate": 1.9996761658031087e-06, "loss": 0.6286, "mean_token_accuracy": 0.9162139892578125, "num_tokens": 8854510.0, "step": 4942 }, { "epoch": 0.8004210185410088, "grad_norm": 19.264659881591797, "learning_rate": 1.9980569948186527e-06, "loss": 0.5318, "mean_token_accuracy": 0.928205132484436, "num_tokens": 8856300.0, "step": 4943 }, { "epoch": 0.8005829487490892, "grad_norm": 21.427783966064453, "learning_rate": 1.9964378238341967e-06, "loss": 0.5009, "mean_token_accuracy": 0.9230356216430664, "num_tokens": 8858085.0, "step": 4944 }, { "epoch": 0.8007448789571695, "grad_norm": 27.587329864501953, "learning_rate": 1.994818652849741e-06, "loss": 0.5756, "mean_token_accuracy": 0.9212149381637573, "num_tokens": 8859876.0, "step": 4945 }, { "epoch": 0.8009068091652498, "grad_norm": 26.34791374206543, "learning_rate": 1.993199481865285e-06, "loss": 0.539, "mean_token_accuracy": 0.9259096682071686, "num_tokens": 8861658.0, "step": 4946 }, { "epoch": 0.8010687393733301, "grad_norm": 8.806448936462402, "learning_rate": 1.991580310880829e-06, "loss": 0.4371, "mean_token_accuracy": 0.9402685761451721, "num_tokens": 8863438.0, "step": 4947 }, { "epoch": 0.8012306695814104, "grad_norm": 24.630069732666016, "learning_rate": 1.9899611398963732e-06, "loss": 0.5779, "mean_token_accuracy": 0.9250357449054718, "num_tokens": 8865230.0, "step": 4948 }, { "epoch": 0.8013925997894907, "grad_norm": 22.68179702758789, "learning_rate": 1.9883419689119172e-06, "loss": 0.4842, "mean_token_accuracy": 0.927003413438797, "num_tokens": 8867016.0, "step": 4949 }, { "epoch": 0.801554529997571, "grad_norm": 28.216548919677734, "learning_rate": 1.9867227979274612e-06, "loss": 0.6044, "mean_token_accuracy": 0.9272453188896179, "num_tokens": 8868803.0, "step": 4950 }, { "epoch": 0.8017164602056513, "grad_norm": 30.94228172302246, "learning_rate": 1.9851036269430053e-06, "loss": 0.6035, "mean_token_accuracy": 0.9182596802711487, "num_tokens": 8870609.0, "step": 4951 }, { "epoch": 0.8018783904137317, "grad_norm": 26.164602279663086, "learning_rate": 1.9834844559585497e-06, "loss": 0.7594, "mean_token_accuracy": 0.9185742437839508, "num_tokens": 8872403.0, "step": 4952 }, { "epoch": 0.802040320621812, "grad_norm": 20.47823143005371, "learning_rate": 1.9818652849740937e-06, "loss": 0.4765, "mean_token_accuracy": 0.9307331740856171, "num_tokens": 8874189.0, "step": 4953 }, { "epoch": 0.8022022508298923, "grad_norm": 41.84910202026367, "learning_rate": 1.9802461139896377e-06, "loss": 0.693, "mean_token_accuracy": 0.9099412262439728, "num_tokens": 8875980.0, "step": 4954 }, { "epoch": 0.8023641810379726, "grad_norm": 23.878442764282227, "learning_rate": 1.9786269430051817e-06, "loss": 0.4873, "mean_token_accuracy": 0.9302631616592407, "num_tokens": 8877765.0, "step": 4955 }, { "epoch": 0.802526111246053, "grad_norm": 28.336414337158203, "learning_rate": 1.9770077720207257e-06, "loss": 0.6398, "mean_token_accuracy": 0.9217752516269684, "num_tokens": 8879558.0, "step": 4956 }, { "epoch": 0.8026880414541333, "grad_norm": 34.412994384765625, "learning_rate": 1.9753886010362698e-06, "loss": 0.5159, "mean_token_accuracy": 0.9311330616474152, "num_tokens": 8881361.0, "step": 4957 }, { "epoch": 0.8028499716622136, "grad_norm": 22.01692008972168, "learning_rate": 1.9737694300518138e-06, "loss": 0.522, "mean_token_accuracy": 0.9132490456104279, "num_tokens": 8883138.0, "step": 4958 }, { "epoch": 0.8030119018702939, "grad_norm": 38.56572723388672, "learning_rate": 1.9721502590673578e-06, "loss": 0.7343, "mean_token_accuracy": 0.8962166905403137, "num_tokens": 8884929.0, "step": 4959 }, { "epoch": 0.8031738320783742, "grad_norm": 29.538925170898438, "learning_rate": 1.970531088082902e-06, "loss": 0.5417, "mean_token_accuracy": 0.9315811395645142, "num_tokens": 8886718.0, "step": 4960 }, { "epoch": 0.8033357622864545, "grad_norm": 24.75006675720215, "learning_rate": 1.968911917098446e-06, "loss": 0.6186, "mean_token_accuracy": 0.9185185134410858, "num_tokens": 8888500.0, "step": 4961 }, { "epoch": 0.8034976924945348, "grad_norm": 33.16312789916992, "learning_rate": 1.96729274611399e-06, "loss": 0.643, "mean_token_accuracy": 0.9095345139503479, "num_tokens": 8890289.0, "step": 4962 }, { "epoch": 0.8036596227026152, "grad_norm": 24.376237869262695, "learning_rate": 1.965673575129534e-06, "loss": 0.6063, "mean_token_accuracy": 0.9245014190673828, "num_tokens": 8892079.0, "step": 4963 }, { "epoch": 0.8038215529106955, "grad_norm": 22.80145835876465, "learning_rate": 1.964054404145078e-06, "loss": 0.483, "mean_token_accuracy": 0.9297200739383698, "num_tokens": 8893875.0, "step": 4964 }, { "epoch": 0.8039834831187758, "grad_norm": 39.409786224365234, "learning_rate": 1.962435233160622e-06, "loss": 0.7582, "mean_token_accuracy": 0.9011950492858887, "num_tokens": 8895661.0, "step": 4965 }, { "epoch": 0.8041454133268561, "grad_norm": 27.341442108154297, "learning_rate": 1.960816062176166e-06, "loss": 0.5924, "mean_token_accuracy": 0.9181216359138489, "num_tokens": 8897467.0, "step": 4966 }, { "epoch": 0.8043073435349365, "grad_norm": 25.562259674072266, "learning_rate": 1.95919689119171e-06, "loss": 0.5093, "mean_token_accuracy": 0.9275019764900208, "num_tokens": 8899255.0, "step": 4967 }, { "epoch": 0.8044692737430168, "grad_norm": 25.12514305114746, "learning_rate": 1.957577720207254e-06, "loss": 0.4957, "mean_token_accuracy": 0.9295327067375183, "num_tokens": 8901052.0, "step": 4968 }, { "epoch": 0.8046312039510971, "grad_norm": 26.805830001831055, "learning_rate": 1.955958549222798e-06, "loss": 0.5486, "mean_token_accuracy": 0.9183908700942993, "num_tokens": 8902834.0, "step": 4969 }, { "epoch": 0.8047931341591774, "grad_norm": 21.42875862121582, "learning_rate": 1.954339378238342e-06, "loss": 0.5228, "mean_token_accuracy": 0.9270073175430298, "num_tokens": 8904620.0, "step": 4970 }, { "epoch": 0.8049550643672577, "grad_norm": 27.879472732543945, "learning_rate": 1.9527202072538864e-06, "loss": 0.585, "mean_token_accuracy": 0.9134846329689026, "num_tokens": 8906409.0, "step": 4971 }, { "epoch": 0.805116994575338, "grad_norm": 24.235980987548828, "learning_rate": 1.9511010362694304e-06, "loss": 0.5604, "mean_token_accuracy": 0.9246819317340851, "num_tokens": 8908187.0, "step": 4972 }, { "epoch": 0.8052789247834183, "grad_norm": 43.45313262939453, "learning_rate": 1.9494818652849744e-06, "loss": 0.9403, "mean_token_accuracy": 0.8996002972126007, "num_tokens": 8909989.0, "step": 4973 }, { "epoch": 0.8054408549914986, "grad_norm": 20.992921829223633, "learning_rate": 1.9478626943005184e-06, "loss": 0.4552, "mean_token_accuracy": 0.930149257183075, "num_tokens": 8911785.0, "step": 4974 }, { "epoch": 0.805602785199579, "grad_norm": 29.420166015625, "learning_rate": 1.9462435233160624e-06, "loss": 0.5162, "mean_token_accuracy": 0.9212501347064972, "num_tokens": 8913576.0, "step": 4975 }, { "epoch": 0.8057647154076593, "grad_norm": 26.890817642211914, "learning_rate": 1.9446243523316064e-06, "loss": 0.579, "mean_token_accuracy": 0.9232778251171112, "num_tokens": 8915373.0, "step": 4976 }, { "epoch": 0.8059266456157396, "grad_norm": 34.62211227416992, "learning_rate": 1.9430051813471504e-06, "loss": 0.6254, "mean_token_accuracy": 0.9217016398906708, "num_tokens": 8917178.0, "step": 4977 }, { "epoch": 0.80608857582382, "grad_norm": 23.213016510009766, "learning_rate": 1.9413860103626945e-06, "loss": 0.4886, "mean_token_accuracy": 0.9294007122516632, "num_tokens": 8918959.0, "step": 4978 }, { "epoch": 0.8062505060319003, "grad_norm": 31.16729736328125, "learning_rate": 1.9397668393782385e-06, "loss": 0.6342, "mean_token_accuracy": 0.9093801379203796, "num_tokens": 8920758.0, "step": 4979 }, { "epoch": 0.8064124362399806, "grad_norm": 24.584293365478516, "learning_rate": 1.9381476683937825e-06, "loss": 0.5038, "mean_token_accuracy": 0.9288030862808228, "num_tokens": 8922551.0, "step": 4980 }, { "epoch": 0.8065743664480609, "grad_norm": 24.814489364624023, "learning_rate": 1.9365284974093265e-06, "loss": 0.6404, "mean_token_accuracy": 0.9219858050346375, "num_tokens": 8924345.0, "step": 4981 }, { "epoch": 0.8067362966561412, "grad_norm": 19.52880859375, "learning_rate": 1.9349093264248705e-06, "loss": 0.5159, "mean_token_accuracy": 0.9311594367027283, "num_tokens": 8926133.0, "step": 4982 }, { "epoch": 0.8068982268642215, "grad_norm": 32.615623474121094, "learning_rate": 1.9332901554404145e-06, "loss": 0.5891, "mean_token_accuracy": 0.906274139881134, "num_tokens": 8927933.0, "step": 4983 }, { "epoch": 0.8070601570723018, "grad_norm": 27.989227294921875, "learning_rate": 1.9316709844559585e-06, "loss": 0.5404, "mean_token_accuracy": 0.9232409298419952, "num_tokens": 8929719.0, "step": 4984 }, { "epoch": 0.8072220872803821, "grad_norm": 33.77449417114258, "learning_rate": 1.9300518134715025e-06, "loss": 0.7466, "mean_token_accuracy": 0.9123667478561401, "num_tokens": 8931505.0, "step": 4985 }, { "epoch": 0.8073840174884624, "grad_norm": 33.327735900878906, "learning_rate": 1.9284326424870466e-06, "loss": 0.6666, "mean_token_accuracy": 0.905802845954895, "num_tokens": 8933301.0, "step": 4986 }, { "epoch": 0.8075459476965428, "grad_norm": 23.688329696655273, "learning_rate": 1.9268134715025906e-06, "loss": 0.5632, "mean_token_accuracy": 0.9284613132476807, "num_tokens": 8935092.0, "step": 4987 }, { "epoch": 0.8077078779046231, "grad_norm": 33.938846588134766, "learning_rate": 1.9251943005181346e-06, "loss": 0.6137, "mean_token_accuracy": 0.9131661653518677, "num_tokens": 8936881.0, "step": 4988 }, { "epoch": 0.8078698081127035, "grad_norm": 30.068599700927734, "learning_rate": 1.923575129533679e-06, "loss": 0.6921, "mean_token_accuracy": 0.9058587849140167, "num_tokens": 8938679.0, "step": 4989 }, { "epoch": 0.8080317383207838, "grad_norm": 38.505531311035156, "learning_rate": 1.921955958549223e-06, "loss": 0.6378, "mean_token_accuracy": 0.9060071706771851, "num_tokens": 8940478.0, "step": 4990 }, { "epoch": 0.8081936685288641, "grad_norm": 12.059277534484863, "learning_rate": 1.920336787564767e-06, "loss": 0.4501, "mean_token_accuracy": 0.9357620477676392, "num_tokens": 8942270.0, "step": 4991 }, { "epoch": 0.8083555987369444, "grad_norm": 30.33990478515625, "learning_rate": 1.918717616580311e-06, "loss": 0.5558, "mean_token_accuracy": 0.9227495193481445, "num_tokens": 8944068.0, "step": 4992 }, { "epoch": 0.8085175289450247, "grad_norm": 35.630008697509766, "learning_rate": 1.917098445595855e-06, "loss": 0.6914, "mean_token_accuracy": 0.90386563539505, "num_tokens": 8945861.0, "step": 4993 }, { "epoch": 0.808679459153105, "grad_norm": 27.060644149780273, "learning_rate": 1.915479274611399e-06, "loss": 0.6253, "mean_token_accuracy": 0.9235956966876984, "num_tokens": 8947648.0, "step": 4994 }, { "epoch": 0.8088413893611853, "grad_norm": 27.75632667541504, "learning_rate": 1.913860103626943e-06, "loss": 0.5999, "mean_token_accuracy": 0.9207249879837036, "num_tokens": 8949436.0, "step": 4995 }, { "epoch": 0.8090033195692656, "grad_norm": 37.02408218383789, "learning_rate": 1.9122409326424875e-06, "loss": 0.7204, "mean_token_accuracy": 0.903947502374649, "num_tokens": 8951229.0, "step": 4996 }, { "epoch": 0.8091652497773459, "grad_norm": 29.825345993041992, "learning_rate": 1.9106217616580315e-06, "loss": 0.5626, "mean_token_accuracy": 0.9220841825008392, "num_tokens": 8953022.0, "step": 4997 }, { "epoch": 0.8093271799854262, "grad_norm": 23.454288482666016, "learning_rate": 1.9090025906735756e-06, "loss": 0.5412, "mean_token_accuracy": 0.9195157885551453, "num_tokens": 8954820.0, "step": 4998 }, { "epoch": 0.8094891101935066, "grad_norm": 16.398780822753906, "learning_rate": 1.9073834196891196e-06, "loss": 0.4463, "mean_token_accuracy": 0.9383763074874878, "num_tokens": 8956608.0, "step": 4999 }, { "epoch": 0.8096510404015869, "grad_norm": 34.983497619628906, "learning_rate": 1.9057642487046634e-06, "loss": 0.7075, "mean_token_accuracy": 0.9228723645210266, "num_tokens": 8958405.0, "step": 5000 } ], "logging_steps": 1, "max_steps": 6176, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.757353388414697e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }