| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.999764280663157, | |
| "eval_steps": 500, | |
| "global_step": 3181, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0003142924491239098, | |
| "grad_norm": 19.593584060668945, | |
| "learning_rate": 3.134796238244514e-06, | |
| "loss": 7.2211, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001571462245619549, | |
| "grad_norm": 17.929481506347656, | |
| "learning_rate": 1.567398119122257e-05, | |
| "loss": 7.2713, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.003142924491239098, | |
| "grad_norm": 17.97577476501465, | |
| "learning_rate": 3.134796238244514e-05, | |
| "loss": 7.2643, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.004714386736858647, | |
| "grad_norm": 9.52695369720459, | |
| "learning_rate": 4.7021943573667716e-05, | |
| "loss": 6.9744, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.006285848982478196, | |
| "grad_norm": 4.145598888397217, | |
| "learning_rate": 6.269592476489028e-05, | |
| "loss": 6.5934, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.007857311228097744, | |
| "grad_norm": 3.3066627979278564, | |
| "learning_rate": 7.836990595611286e-05, | |
| "loss": 6.1739, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.009428773473717294, | |
| "grad_norm": 3.240945339202881, | |
| "learning_rate": 9.404388714733543e-05, | |
| "loss": 5.9143, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011000235719336842, | |
| "grad_norm": 1.6531422138214111, | |
| "learning_rate": 0.000109717868338558, | |
| "loss": 5.6534, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.012571697964956392, | |
| "grad_norm": 0.9349701404571533, | |
| "learning_rate": 0.00012539184952978057, | |
| "loss": 5.471, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01414316021057594, | |
| "grad_norm": 0.565790057182312, | |
| "learning_rate": 0.00014106583072100311, | |
| "loss": 5.3034, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.01571462245619549, | |
| "grad_norm": 0.4517938494682312, | |
| "learning_rate": 0.00015673981191222572, | |
| "loss": 5.1125, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01728608470181504, | |
| "grad_norm": 0.4741256833076477, | |
| "learning_rate": 0.0001724137931034483, | |
| "loss": 5.009, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.01885754694743459, | |
| "grad_norm": 0.4407913386821747, | |
| "learning_rate": 0.00018808777429467086, | |
| "loss": 4.9626, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.020429009193054137, | |
| "grad_norm": 0.6502472758293152, | |
| "learning_rate": 0.00020376175548589344, | |
| "loss": 4.837, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.022000471438673685, | |
| "grad_norm": 0.6139042377471924, | |
| "learning_rate": 0.000219435736677116, | |
| "loss": 4.7188, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.023571933684293236, | |
| "grad_norm": 0.5190150141716003, | |
| "learning_rate": 0.00023510971786833856, | |
| "loss": 4.5818, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.025143395929912785, | |
| "grad_norm": 0.4606360197067261, | |
| "learning_rate": 0.00025078369905956113, | |
| "loss": 4.4429, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.026714858175532333, | |
| "grad_norm": 0.38270899653434753, | |
| "learning_rate": 0.0002664576802507837, | |
| "loss": 4.4131, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.02828632042115188, | |
| "grad_norm": 0.4721100330352783, | |
| "learning_rate": 0.00028213166144200623, | |
| "loss": 4.248, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02985778266677143, | |
| "grad_norm": 0.4452402889728546, | |
| "learning_rate": 0.00029780564263322886, | |
| "loss": 4.1921, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.03142924491239098, | |
| "grad_norm": 0.4978654980659485, | |
| "learning_rate": 0.00031347962382445143, | |
| "loss": 4.0356, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.033000707158010525, | |
| "grad_norm": 0.6152021288871765, | |
| "learning_rate": 0.000329153605015674, | |
| "loss": 3.8516, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.03457216940363008, | |
| "grad_norm": 0.4162922203540802, | |
| "learning_rate": 0.0003448275862068966, | |
| "loss": 3.8018, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03614363164924963, | |
| "grad_norm": 0.44047296047210693, | |
| "learning_rate": 0.0003605015673981191, | |
| "loss": 3.5919, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.03771509389486918, | |
| "grad_norm": 0.4196017384529114, | |
| "learning_rate": 0.00037617554858934173, | |
| "loss": 3.5899, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.039286556140488725, | |
| "grad_norm": 0.4823077917098999, | |
| "learning_rate": 0.00039184952978056425, | |
| "loss": 3.4321, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.04085801838610827, | |
| "grad_norm": 0.3624018132686615, | |
| "learning_rate": 0.0004075235109717869, | |
| "loss": 3.3484, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04242948063172782, | |
| "grad_norm": 0.3260553181171417, | |
| "learning_rate": 0.0004231974921630094, | |
| "loss": 3.2639, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.04400094287734737, | |
| "grad_norm": 0.38659989833831787, | |
| "learning_rate": 0.000438871473354232, | |
| "loss": 3.1999, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04557240512296692, | |
| "grad_norm": 0.4590894281864166, | |
| "learning_rate": 0.00045454545454545455, | |
| "loss": 3.1469, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.04714386736858647, | |
| "grad_norm": 0.39775681495666504, | |
| "learning_rate": 0.0004702194357366771, | |
| "loss": 3.0631, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04871532961420602, | |
| "grad_norm": 0.43203604221343994, | |
| "learning_rate": 0.0004858934169278997, | |
| "loss": 3.0505, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.05028679185982557, | |
| "grad_norm": 0.3682110607624054, | |
| "learning_rate": 0.0005015673981191223, | |
| "loss": 3.0316, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.05185825410544512, | |
| "grad_norm": 0.32574963569641113, | |
| "learning_rate": 0.0005172413793103448, | |
| "loss": 3.0132, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.053429716351064666, | |
| "grad_norm": 0.4408009946346283, | |
| "learning_rate": 0.0005329153605015674, | |
| "loss": 3.0166, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.055001178596684214, | |
| "grad_norm": 0.3008413016796112, | |
| "learning_rate": 0.00054858934169279, | |
| "loss": 2.8596, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.05657264084230376, | |
| "grad_norm": 0.3133067786693573, | |
| "learning_rate": 0.0005642633228840125, | |
| "loss": 2.9631, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.05814410308792331, | |
| "grad_norm": 0.4327049255371094, | |
| "learning_rate": 0.0005799373040752351, | |
| "loss": 2.8666, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.05971556533354286, | |
| "grad_norm": 0.44304072856903076, | |
| "learning_rate": 0.0005956112852664577, | |
| "loss": 2.7938, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.06128702757916241, | |
| "grad_norm": 0.4220844805240631, | |
| "learning_rate": 0.0006112852664576803, | |
| "loss": 2.8245, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.06285848982478195, | |
| "grad_norm": 0.36100244522094727, | |
| "learning_rate": 0.0006269592476489029, | |
| "loss": 2.7925, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0644299520704015, | |
| "grad_norm": 0.4607883095741272, | |
| "learning_rate": 0.0006426332288401254, | |
| "loss": 2.7257, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.06600141431602105, | |
| "grad_norm": 0.3919202387332916, | |
| "learning_rate": 0.000658307210031348, | |
| "loss": 2.6557, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06757287656164061, | |
| "grad_norm": 0.3630659282207489, | |
| "learning_rate": 0.0006739811912225705, | |
| "loss": 2.7476, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.06914433880726016, | |
| "grad_norm": 0.3189098834991455, | |
| "learning_rate": 0.0006896551724137932, | |
| "loss": 2.6834, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.07071580105287971, | |
| "grad_norm": 0.3652380108833313, | |
| "learning_rate": 0.0007053291536050157, | |
| "loss": 2.6362, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.07228726329849926, | |
| "grad_norm": 0.35851606726646423, | |
| "learning_rate": 0.0007210031347962382, | |
| "loss": 2.637, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0738587255441188, | |
| "grad_norm": 0.7696281671524048, | |
| "learning_rate": 0.0007366771159874608, | |
| "loss": 2.5676, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.07543018778973835, | |
| "grad_norm": 0.6321612000465393, | |
| "learning_rate": 0.0007523510971786835, | |
| "loss": 2.6147, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.0770016500353579, | |
| "grad_norm": 0.386197030544281, | |
| "learning_rate": 0.000768025078369906, | |
| "loss": 2.5907, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.07857311228097745, | |
| "grad_norm": 0.4827702045440674, | |
| "learning_rate": 0.0007836990595611285, | |
| "loss": 2.5142, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.080144574526597, | |
| "grad_norm": 0.292161762714386, | |
| "learning_rate": 0.0007993730407523511, | |
| "loss": 2.4859, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.08171603677221655, | |
| "grad_norm": 0.33461418747901917, | |
| "learning_rate": 0.0008150470219435738, | |
| "loss": 2.5654, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0832874990178361, | |
| "grad_norm": 0.5843150019645691, | |
| "learning_rate": 0.0008307210031347962, | |
| "loss": 2.5168, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.08485896126345564, | |
| "grad_norm": 0.5514886975288391, | |
| "learning_rate": 0.0008463949843260188, | |
| "loss": 2.4514, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08643042350907519, | |
| "grad_norm": 0.32894566655158997, | |
| "learning_rate": 0.0008620689655172414, | |
| "loss": 2.4504, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.08800188575469474, | |
| "grad_norm": 0.30089420080184937, | |
| "learning_rate": 0.000877742946708464, | |
| "loss": 2.5003, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.08957334800031429, | |
| "grad_norm": 0.41669008135795593, | |
| "learning_rate": 0.0008934169278996865, | |
| "loss": 2.4293, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.09114481024593384, | |
| "grad_norm": 0.25633400678634644, | |
| "learning_rate": 0.0009090909090909091, | |
| "loss": 2.4447, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.09271627249155338, | |
| "grad_norm": 0.27528685331344604, | |
| "learning_rate": 0.0009247648902821318, | |
| "loss": 2.4672, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.09428773473717295, | |
| "grad_norm": 0.3092777132987976, | |
| "learning_rate": 0.0009404388714733542, | |
| "loss": 2.3757, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0958591969827925, | |
| "grad_norm": 0.29513272643089294, | |
| "learning_rate": 0.0009561128526645768, | |
| "loss": 2.4152, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.09743065922841204, | |
| "grad_norm": 0.43028154969215393, | |
| "learning_rate": 0.0009717868338557994, | |
| "loss": 2.4507, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.09900212147403159, | |
| "grad_norm": 0.33603623509407043, | |
| "learning_rate": 0.000987460815047022, | |
| "loss": 2.3948, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.10057358371965114, | |
| "grad_norm": 0.36047083139419556, | |
| "learning_rate": 0.0009999996987684534, | |
| "loss": 2.3747, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.10214504596527069, | |
| "grad_norm": 0.47083067893981934, | |
| "learning_rate": 0.0009999891557024327, | |
| "loss": 2.4144, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.10371650821089023, | |
| "grad_norm": 0.3539304733276367, | |
| "learning_rate": 0.0009999635514220413, | |
| "loss": 2.365, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.10528797045650978, | |
| "grad_norm": 0.3079536557197571, | |
| "learning_rate": 0.0009999228866985584, | |
| "loss": 2.3087, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.10685943270212933, | |
| "grad_norm": 0.3304389715194702, | |
| "learning_rate": 0.0009998671627569314, | |
| "loss": 2.2729, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.10843089494774888, | |
| "grad_norm": 0.3577703535556793, | |
| "learning_rate": 0.0009997963812757367, | |
| "loss": 2.362, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.11000235719336843, | |
| "grad_norm": 0.3626112937927246, | |
| "learning_rate": 0.000999710544387131, | |
| "loss": 2.3232, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.11157381943898798, | |
| "grad_norm": 0.33722984790802, | |
| "learning_rate": 0.000999609654676786, | |
| "loss": 2.3837, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.11314528168460752, | |
| "grad_norm": 0.6007051467895508, | |
| "learning_rate": 0.0009994937151838103, | |
| "loss": 2.2621, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.11471674393022707, | |
| "grad_norm": 0.24785129725933075, | |
| "learning_rate": 0.0009993627294006592, | |
| "loss": 2.3242, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.11628820617584662, | |
| "grad_norm": 0.3804435431957245, | |
| "learning_rate": 0.000999216701273028, | |
| "loss": 2.2901, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.11785966842146617, | |
| "grad_norm": 0.2868124544620514, | |
| "learning_rate": 0.000999055635199734, | |
| "loss": 2.2492, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.11943113066708572, | |
| "grad_norm": 0.45313236117362976, | |
| "learning_rate": 0.0009988795360325836, | |
| "loss": 2.3535, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.12100259291270528, | |
| "grad_norm": 0.37607526779174805, | |
| "learning_rate": 0.0009986884090762266, | |
| "loss": 2.2953, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.12257405515832483, | |
| "grad_norm": 0.27437832951545715, | |
| "learning_rate": 0.000998482260087996, | |
| "loss": 2.2992, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.12414551740394437, | |
| "grad_norm": 1.2427669763565063, | |
| "learning_rate": 0.0009982610952777348, | |
| "loss": 2.23, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.1257169796495639, | |
| "grad_norm": 0.2712255120277405, | |
| "learning_rate": 0.0009980249213076085, | |
| "loss": 2.2312, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.12728844189518346, | |
| "grad_norm": 0.3325655162334442, | |
| "learning_rate": 0.0009977737452919052, | |
| "loss": 2.24, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.128859904140803, | |
| "grad_norm": 0.3058350384235382, | |
| "learning_rate": 0.0009975075747968203, | |
| "loss": 2.2233, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.13043136638642255, | |
| "grad_norm": 0.46767985820770264, | |
| "learning_rate": 0.00099722641784023, | |
| "loss": 2.2153, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.1320028286320421, | |
| "grad_norm": 0.27317899465560913, | |
| "learning_rate": 0.0009969302828914477, | |
| "loss": 2.2048, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.13357429087766168, | |
| "grad_norm": 0.49403005838394165, | |
| "learning_rate": 0.0009966191788709714, | |
| "loss": 2.1963, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.13514575312328123, | |
| "grad_norm": 0.28752318024635315, | |
| "learning_rate": 0.0009962931151502128, | |
| "loss": 2.2472, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.13671721536890077, | |
| "grad_norm": 0.2463676631450653, | |
| "learning_rate": 0.000995952101551216, | |
| "loss": 2.1985, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.13828867761452032, | |
| "grad_norm": 0.24134370684623718, | |
| "learning_rate": 0.000995596148346362, | |
| "loss": 2.2209, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.13986013986013987, | |
| "grad_norm": 0.28822633624076843, | |
| "learning_rate": 0.000995225266258058, | |
| "loss": 2.1771, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.14143160210575942, | |
| "grad_norm": 0.35365989804267883, | |
| "learning_rate": 0.0009948394664584155, | |
| "loss": 2.1384, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.14300306435137897, | |
| "grad_norm": 0.25629669427871704, | |
| "learning_rate": 0.0009944387605689139, | |
| "loss": 2.1782, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.14457452659699851, | |
| "grad_norm": 0.3197285532951355, | |
| "learning_rate": 0.0009940231606600494, | |
| "loss": 2.1927, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.14614598884261806, | |
| "grad_norm": 0.2365492731332779, | |
| "learning_rate": 0.0009935926792509723, | |
| "loss": 2.1824, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.1477174510882376, | |
| "grad_norm": 0.3567672073841095, | |
| "learning_rate": 0.000993147329309109, | |
| "loss": 2.1744, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.14928891333385716, | |
| "grad_norm": 0.28331291675567627, | |
| "learning_rate": 0.0009926871242497731, | |
| "loss": 2.1513, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.1508603755794767, | |
| "grad_norm": 0.2945978343486786, | |
| "learning_rate": 0.0009922120779357587, | |
| "loss": 2.1584, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.15243183782509626, | |
| "grad_norm": 0.2360486388206482, | |
| "learning_rate": 0.0009917222046769252, | |
| "loss": 2.1771, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.1540033000707158, | |
| "grad_norm": 0.26957565546035767, | |
| "learning_rate": 0.0009912175192297648, | |
| "loss": 2.1623, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.15557476231633535, | |
| "grad_norm": 0.2816142141819, | |
| "learning_rate": 0.0009906980367969589, | |
| "loss": 2.1792, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.1571462245619549, | |
| "grad_norm": 0.23650062084197998, | |
| "learning_rate": 0.0009901637730269192, | |
| "loss": 2.1513, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.15871768680757445, | |
| "grad_norm": 0.20437929034233093, | |
| "learning_rate": 0.0009896147440133173, | |
| "loss": 2.1128, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.160289149053194, | |
| "grad_norm": 0.4117949903011322, | |
| "learning_rate": 0.0009890509662945992, | |
| "loss": 2.1282, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.16186061129881354, | |
| "grad_norm": 0.2904440462589264, | |
| "learning_rate": 0.0009884724568534873, | |
| "loss": 2.1036, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.1634320735444331, | |
| "grad_norm": 0.30711230635643005, | |
| "learning_rate": 0.000987879233116469, | |
| "loss": 2.1463, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.16500353579005264, | |
| "grad_norm": 0.28438258171081543, | |
| "learning_rate": 0.0009872713129532717, | |
| "loss": 2.0971, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.1665749980356722, | |
| "grad_norm": 0.2756672203540802, | |
| "learning_rate": 0.000986648714676324, | |
| "loss": 2.1291, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.16814646028129174, | |
| "grad_norm": 0.23554356396198273, | |
| "learning_rate": 0.0009860114570402056, | |
| "loss": 2.0797, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.16971792252691129, | |
| "grad_norm": 0.3189121186733246, | |
| "learning_rate": 0.0009853595592410798, | |
| "loss": 2.1122, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.17128938477253083, | |
| "grad_norm": 0.2167348712682724, | |
| "learning_rate": 0.000984693040916118, | |
| "loss": 2.1103, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.17286084701815038, | |
| "grad_norm": 0.3020518124103546, | |
| "learning_rate": 0.0009840119221429062, | |
| "loss": 2.1314, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.17443230926376993, | |
| "grad_norm": 0.2934524118900299, | |
| "learning_rate": 0.0009833162234388414, | |
| "loss": 2.1118, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.17600377150938948, | |
| "grad_norm": 0.4206439256668091, | |
| "learning_rate": 0.0009826059657605123, | |
| "loss": 2.0729, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.17757523375500903, | |
| "grad_norm": 0.2568371593952179, | |
| "learning_rate": 0.0009818811705030695, | |
| "loss": 2.0941, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.17914669600062857, | |
| "grad_norm": 0.3140527009963989, | |
| "learning_rate": 0.00098114185949958, | |
| "loss": 2.0994, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.18071815824624812, | |
| "grad_norm": 0.2604500651359558, | |
| "learning_rate": 0.0009803880550203698, | |
| "loss": 2.1462, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.18228962049186767, | |
| "grad_norm": 0.3531113266944885, | |
| "learning_rate": 0.0009796197797723532, | |
| "loss": 2.0743, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.18386108273748722, | |
| "grad_norm": 0.24229241907596588, | |
| "learning_rate": 0.0009788370568983484, | |
| "loss": 2.0614, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.18543254498310677, | |
| "grad_norm": 0.43031787872314453, | |
| "learning_rate": 0.000978039909976381, | |
| "loss": 2.1287, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.18700400722872634, | |
| "grad_norm": 0.210410937666893, | |
| "learning_rate": 0.0009772283630189726, | |
| "loss": 2.0234, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.1885754694743459, | |
| "grad_norm": 0.28944680094718933, | |
| "learning_rate": 0.0009764024404724194, | |
| "loss": 2.0951, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.19014693171996544, | |
| "grad_norm": 0.2910195589065552, | |
| "learning_rate": 0.0009755621672160539, | |
| "loss": 2.077, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.191718393965585, | |
| "grad_norm": 0.27437564730644226, | |
| "learning_rate": 0.0009747075685614961, | |
| "loss": 2.0639, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.19328985621120454, | |
| "grad_norm": 0.26965805888175964, | |
| "learning_rate": 0.0009738386702518917, | |
| "loss": 2.0205, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.19486131845682408, | |
| "grad_norm": 0.22133469581604004, | |
| "learning_rate": 0.0009729554984611357, | |
| "loss": 2.0871, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.19643278070244363, | |
| "grad_norm": 0.26669520139694214, | |
| "learning_rate": 0.0009720580797930845, | |
| "loss": 2.1095, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.19800424294806318, | |
| "grad_norm": 0.22317343950271606, | |
| "learning_rate": 0.0009711464412807542, | |
| "loss": 2.0449, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.19957570519368273, | |
| "grad_norm": 0.4366435706615448, | |
| "learning_rate": 0.0009702206103855065, | |
| "loss": 2.0923, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.20114716743930228, | |
| "grad_norm": 0.29953792691230774, | |
| "learning_rate": 0.0009692806149962215, | |
| "loss": 2.0278, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.20271862968492182, | |
| "grad_norm": 0.3391275405883789, | |
| "learning_rate": 0.0009683264834284575, | |
| "loss": 2.0462, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.20429009193054137, | |
| "grad_norm": 0.3684813976287842, | |
| "learning_rate": 0.0009673582444235978, | |
| "loss": 2.0406, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.20586155417616092, | |
| "grad_norm": 0.3173034191131592, | |
| "learning_rate": 0.0009663759271479858, | |
| "loss": 1.9821, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.20743301642178047, | |
| "grad_norm": 0.28240126371383667, | |
| "learning_rate": 0.0009653795611920448, | |
| "loss": 2.0112, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.20900447866740002, | |
| "grad_norm": 0.31004390120506287, | |
| "learning_rate": 0.000964369176569389, | |
| "loss": 2.0155, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.21057594091301957, | |
| "grad_norm": 0.29802826046943665, | |
| "learning_rate": 0.0009633448037159167, | |
| "loss": 2.0641, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.21214740315863911, | |
| "grad_norm": 0.2916626036167145, | |
| "learning_rate": 0.0009623064734888958, | |
| "loss": 1.9948, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.21371886540425866, | |
| "grad_norm": 0.24406270682811737, | |
| "learning_rate": 0.0009612542171660328, | |
| "loss": 1.9714, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2152903276498782, | |
| "grad_norm": 0.31989896297454834, | |
| "learning_rate": 0.0009601880664445312, | |
| "loss": 2.0075, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.21686178989549776, | |
| "grad_norm": 0.16441141068935394, | |
| "learning_rate": 0.000959108053440137, | |
| "loss": 1.9964, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2184332521411173, | |
| "grad_norm": 0.27429190278053284, | |
| "learning_rate": 0.0009580142106861702, | |
| "loss": 2.013, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.22000471438673685, | |
| "grad_norm": 0.21225886046886444, | |
| "learning_rate": 0.0009569065711325461, | |
| "loss": 2.0092, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.2215761766323564, | |
| "grad_norm": 0.24281561374664307, | |
| "learning_rate": 0.0009557851681447816, | |
| "loss": 2.0172, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.22314763887797595, | |
| "grad_norm": 0.20721524953842163, | |
| "learning_rate": 0.0009546500355029912, | |
| "loss": 2.0265, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2247191011235955, | |
| "grad_norm": 0.268388032913208, | |
| "learning_rate": 0.0009535012074008687, | |
| "loss": 2.0329, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.22629056336921505, | |
| "grad_norm": 0.25884491205215454, | |
| "learning_rate": 0.0009523387184446569, | |
| "loss": 1.9673, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2278620256148346, | |
| "grad_norm": 0.26054859161376953, | |
| "learning_rate": 0.0009511626036521064, | |
| "loss": 1.9775, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.22943348786045414, | |
| "grad_norm": 0.23918680846691132, | |
| "learning_rate": 0.0009499728984514194, | |
| "loss": 1.9753, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2310049501060737, | |
| "grad_norm": 0.18698784708976746, | |
| "learning_rate": 0.0009487696386801834, | |
| "loss": 2.0547, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.23257641235169324, | |
| "grad_norm": 0.23608291149139404, | |
| "learning_rate": 0.0009475528605842913, | |
| "loss": 1.9701, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2341478745973128, | |
| "grad_norm": 0.20181405544281006, | |
| "learning_rate": 0.0009463226008168493, | |
| "loss": 1.9994, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.23571933684293234, | |
| "grad_norm": 0.421236515045166, | |
| "learning_rate": 0.0009450788964370737, | |
| "loss": 1.9943, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.23729079908855188, | |
| "grad_norm": 0.2977412939071655, | |
| "learning_rate": 0.0009438217849091732, | |
| "loss": 1.998, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.23886226133417143, | |
| "grad_norm": 0.3429923355579376, | |
| "learning_rate": 0.0009425513041012219, | |
| "loss": 2.0098, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.240433723579791, | |
| "grad_norm": 0.28545793890953064, | |
| "learning_rate": 0.0009412674922840173, | |
| "loss": 1.9684, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.24200518582541056, | |
| "grad_norm": 0.1793077141046524, | |
| "learning_rate": 0.0009399703881299281, | |
| "loss": 2.0074, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2435766480710301, | |
| "grad_norm": 0.280935674905777, | |
| "learning_rate": 0.0009386600307117292, | |
| "loss": 2.0081, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.24514811031664965, | |
| "grad_norm": 0.30971866846084595, | |
| "learning_rate": 0.0009373364595014245, | |
| "loss": 1.9568, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2467195725622692, | |
| "grad_norm": 0.26543349027633667, | |
| "learning_rate": 0.0009359997143690587, | |
| "loss": 1.9926, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.24829103480788875, | |
| "grad_norm": 0.27692025899887085, | |
| "learning_rate": 0.0009346498355815143, | |
| "loss": 1.9465, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.2498624970535083, | |
| "grad_norm": 0.2559816539287567, | |
| "learning_rate": 0.0009332868638013016, | |
| "loss": 1.9473, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.2514339592991278, | |
| "grad_norm": 0.330941766500473, | |
| "learning_rate": 0.0009319108400853309, | |
| "loss": 1.9892, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.25300542154474737, | |
| "grad_norm": 0.33171382546424866, | |
| "learning_rate": 0.0009305218058836777, | |
| "loss": 1.9819, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.2545768837903669, | |
| "grad_norm": 0.49775874614715576, | |
| "learning_rate": 0.0009291198030383335, | |
| "loss": 1.9979, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.25614834603598646, | |
| "grad_norm": 0.4397469758987427, | |
| "learning_rate": 0.0009277048737819445, | |
| "loss": 1.9923, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.257719808281606, | |
| "grad_norm": 0.38636040687561035, | |
| "learning_rate": 0.0009262770607365412, | |
| "loss": 1.989, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.25929127052722556, | |
| "grad_norm": 0.19002190232276917, | |
| "learning_rate": 0.0009248364069122531, | |
| "loss": 1.9449, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.2608627327728451, | |
| "grad_norm": 0.21801802515983582, | |
| "learning_rate": 0.0009233829557060136, | |
| "loss": 1.9449, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.26243419501846466, | |
| "grad_norm": 0.2232774794101715, | |
| "learning_rate": 0.0009219167509002526, | |
| "loss": 2.0174, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.2640056572640842, | |
| "grad_norm": 0.21366550028324127, | |
| "learning_rate": 0.0009204378366615778, | |
| "loss": 1.9458, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.26557711950970375, | |
| "grad_norm": 0.22979532182216644, | |
| "learning_rate": 0.0009189462575394443, | |
| "loss": 1.968, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.26714858175532336, | |
| "grad_norm": 0.3232949376106262, | |
| "learning_rate": 0.0009174420584648122, | |
| "loss": 1.9344, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.2687200440009429, | |
| "grad_norm": 0.3545362055301666, | |
| "learning_rate": 0.0009159252847487938, | |
| "loss": 1.9558, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.27029150624656245, | |
| "grad_norm": 0.25105616450309753, | |
| "learning_rate": 0.0009143959820812882, | |
| "loss": 1.9726, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.271862968492182, | |
| "grad_norm": 0.37967053055763245, | |
| "learning_rate": 0.0009128541965296051, | |
| "loss": 1.9593, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.27343443073780155, | |
| "grad_norm": 0.47272658348083496, | |
| "learning_rate": 0.0009112999745370774, | |
| "loss": 1.9306, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.2750058929834211, | |
| "grad_norm": 0.40520063042640686, | |
| "learning_rate": 0.0009097333629216616, | |
| "loss": 2.0052, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.27657735522904064, | |
| "grad_norm": 0.16213001310825348, | |
| "learning_rate": 0.000908154408874528, | |
| "loss": 1.9685, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2781488174746602, | |
| "grad_norm": 0.2853317856788635, | |
| "learning_rate": 0.000906563159958639, | |
| "loss": 1.972, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.27972027972027974, | |
| "grad_norm": 0.3446250259876251, | |
| "learning_rate": 0.0009049596641073161, | |
| "loss": 1.9114, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2812917419658993, | |
| "grad_norm": 0.19377753138542175, | |
| "learning_rate": 0.0009033439696227966, | |
| "loss": 1.9119, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.28286320421151884, | |
| "grad_norm": 0.249973326921463, | |
| "learning_rate": 0.0009017161251747779, | |
| "loss": 1.9657, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2844346664571384, | |
| "grad_norm": 0.26847654581069946, | |
| "learning_rate": 0.0009000761797989521, | |
| "loss": 1.9543, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.28600612870275793, | |
| "grad_norm": 0.23439465463161469, | |
| "learning_rate": 0.0008984241828955281, | |
| "loss": 1.9104, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2875775909483775, | |
| "grad_norm": 0.37913578748703003, | |
| "learning_rate": 0.0008967601842277444, | |
| "loss": 1.9149, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.28914905319399703, | |
| "grad_norm": 0.2542668581008911, | |
| "learning_rate": 0.0008950842339203695, | |
| "loss": 1.9682, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2907205154396166, | |
| "grad_norm": 0.3627631962299347, | |
| "learning_rate": 0.0008933963824581919, | |
| "loss": 1.9878, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.2922919776852361, | |
| "grad_norm": 0.25258249044418335, | |
| "learning_rate": 0.0008916966806844996, | |
| "loss": 1.9425, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2938634399308557, | |
| "grad_norm": 0.502772867679596, | |
| "learning_rate": 0.0008899851797995489, | |
| "loss": 1.956, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.2954349021764752, | |
| "grad_norm": 0.3263178765773773, | |
| "learning_rate": 0.0008882619313590213, | |
| "loss": 1.9896, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.29700636442209477, | |
| "grad_norm": 0.24707098305225372, | |
| "learning_rate": 0.0008865269872724708, | |
| "loss": 1.901, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.2985778266677143, | |
| "grad_norm": 0.26257702708244324, | |
| "learning_rate": 0.000884780399801761, | |
| "loss": 1.957, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.30014928891333387, | |
| "grad_norm": 0.2532920241355896, | |
| "learning_rate": 0.000883022221559489, | |
| "loss": 1.9059, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.3017207511589534, | |
| "grad_norm": 0.26241347193717957, | |
| "learning_rate": 0.0008812525055074028, | |
| "loss": 1.938, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.30329221340457296, | |
| "grad_norm": 0.28568172454833984, | |
| "learning_rate": 0.000879471304954804, | |
| "loss": 1.8915, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.3048636756501925, | |
| "grad_norm": 0.23444630205631256, | |
| "learning_rate": 0.0008776786735569431, | |
| "loss": 1.9095, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.30643513789581206, | |
| "grad_norm": 0.22640594840049744, | |
| "learning_rate": 0.0008758746653134029, | |
| "loss": 1.8879, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.3080066001414316, | |
| "grad_norm": 0.5037649869918823, | |
| "learning_rate": 0.0008740593345664716, | |
| "loss": 1.889, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.30957806238705116, | |
| "grad_norm": 0.18424461781978607, | |
| "learning_rate": 0.0008722327359995064, | |
| "loss": 1.8806, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.3111495246326707, | |
| "grad_norm": 0.2199254333972931, | |
| "learning_rate": 0.0008703949246352858, | |
| "loss": 1.8767, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.31272098687829025, | |
| "grad_norm": 0.26489025354385376, | |
| "learning_rate": 0.0008685459558343523, | |
| "loss": 1.8609, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.3142924491239098, | |
| "grad_norm": 0.164434015750885, | |
| "learning_rate": 0.000866685885293345, | |
| "loss": 1.8941, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.31586391136952935, | |
| "grad_norm": 0.21282632648944855, | |
| "learning_rate": 0.0008648147690433212, | |
| "loss": 1.8818, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.3174353736151489, | |
| "grad_norm": 0.21168777346611023, | |
| "learning_rate": 0.0008629326634480697, | |
| "loss": 1.917, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.31900683586076845, | |
| "grad_norm": 0.2421833872795105, | |
| "learning_rate": 0.0008610396252024113, | |
| "loss": 1.9548, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.320578298106388, | |
| "grad_norm": 0.17949452996253967, | |
| "learning_rate": 0.0008591357113304927, | |
| "loss": 1.907, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.32214976035200754, | |
| "grad_norm": 0.25564640760421753, | |
| "learning_rate": 0.0008572209791840677, | |
| "loss": 1.9133, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.3237212225976271, | |
| "grad_norm": 0.21499499678611755, | |
| "learning_rate": 0.0008552954864407698, | |
| "loss": 1.8724, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.32529268484324664, | |
| "grad_norm": 0.286145955324173, | |
| "learning_rate": 0.000853359291102375, | |
| "loss": 1.9174, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.3268641470888662, | |
| "grad_norm": 0.216496080160141, | |
| "learning_rate": 0.0008514124514930544, | |
| "loss": 1.9212, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.32843560933448573, | |
| "grad_norm": 0.20780375599861145, | |
| "learning_rate": 0.0008494550262576173, | |
| "loss": 1.9042, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.3300070715801053, | |
| "grad_norm": 0.19447444379329681, | |
| "learning_rate": 0.0008474870743597448, | |
| "loss": 1.9154, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.33157853382572483, | |
| "grad_norm": 0.2113378793001175, | |
| "learning_rate": 0.0008455086550802132, | |
| "loss": 1.9357, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.3331499960713444, | |
| "grad_norm": 0.2649160921573639, | |
| "learning_rate": 0.0008435198280151091, | |
| "loss": 1.9016, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3347214583169639, | |
| "grad_norm": 0.37894386053085327, | |
| "learning_rate": 0.0008415206530740331, | |
| "loss": 1.9078, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.3362929205625835, | |
| "grad_norm": 0.19303838908672333, | |
| "learning_rate": 0.0008395111904782964, | |
| "loss": 1.9018, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.337864382808203, | |
| "grad_norm": 0.25691303610801697, | |
| "learning_rate": 0.0008374915007591053, | |
| "loss": 1.9319, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.33943584505382257, | |
| "grad_norm": 0.3956037759780884, | |
| "learning_rate": 0.0008354616447557392, | |
| "loss": 1.9003, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.3410073072994421, | |
| "grad_norm": 0.29999446868896484, | |
| "learning_rate": 0.000833421683613717, | |
| "loss": 1.9377, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.34257876954506167, | |
| "grad_norm": 0.23759868741035461, | |
| "learning_rate": 0.0008313716787829558, | |
| "loss": 1.8833, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3441502317906812, | |
| "grad_norm": 0.33085566759109497, | |
| "learning_rate": 0.000829311692015919, | |
| "loss": 1.936, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.34572169403630076, | |
| "grad_norm": 0.32732662558555603, | |
| "learning_rate": 0.0008272417853657571, | |
| "loss": 1.8785, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3472931562819203, | |
| "grad_norm": 0.23105382919311523, | |
| "learning_rate": 0.0008251620211844383, | |
| "loss": 1.9553, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.34886461852753986, | |
| "grad_norm": 0.34907397627830505, | |
| "learning_rate": 0.0008230724621208693, | |
| "loss": 1.8749, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.3504360807731594, | |
| "grad_norm": 0.2972997725009918, | |
| "learning_rate": 0.0008209731711190099, | |
| "loss": 1.8532, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.35200754301877896, | |
| "grad_norm": 0.21005867421627045, | |
| "learning_rate": 0.0008188642114159747, | |
| "loss": 1.8804, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3535790052643985, | |
| "grad_norm": 0.1943141371011734, | |
| "learning_rate": 0.0008167456465401299, | |
| "loss": 1.8385, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.35515046751001805, | |
| "grad_norm": 0.2459149956703186, | |
| "learning_rate": 0.0008146175403091795, | |
| "loss": 1.8629, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3567219297556376, | |
| "grad_norm": 0.2223600149154663, | |
| "learning_rate": 0.0008124799568282418, | |
| "loss": 1.8939, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.35829339200125715, | |
| "grad_norm": 0.3591344654560089, | |
| "learning_rate": 0.0008103329604879195, | |
| "loss": 1.835, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.3598648542468767, | |
| "grad_norm": 0.33568888902664185, | |
| "learning_rate": 0.0008081766159623596, | |
| "loss": 1.9111, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.36143631649249625, | |
| "grad_norm": 0.19357386231422424, | |
| "learning_rate": 0.0008060109882073055, | |
| "loss": 1.8748, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3630077787381158, | |
| "grad_norm": 0.20973102748394012, | |
| "learning_rate": 0.0008038361424581392, | |
| "loss": 1.8755, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.36457924098373534, | |
| "grad_norm": 0.19260567426681519, | |
| "learning_rate": 0.000801652144227918, | |
| "loss": 1.8516, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.3661507032293549, | |
| "grad_norm": 0.21391819417476654, | |
| "learning_rate": 0.0007994590593054001, | |
| "loss": 1.8751, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.36772216547497444, | |
| "grad_norm": 0.23929333686828613, | |
| "learning_rate": 0.0007972569537530623, | |
| "loss": 1.8371, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.369293627720594, | |
| "grad_norm": 0.2769240438938141, | |
| "learning_rate": 0.0007950458939051108, | |
| "loss": 1.8872, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.37086508996621353, | |
| "grad_norm": 0.22298012673854828, | |
| "learning_rate": 0.000792825946365483, | |
| "loss": 1.8764, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.37243655221183314, | |
| "grad_norm": 0.1722257286310196, | |
| "learning_rate": 0.000790597178005841, | |
| "loss": 1.802, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.3740080144574527, | |
| "grad_norm": 0.29332205653190613, | |
| "learning_rate": 0.0007883596559635567, | |
| "loss": 1.8884, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.37557947670307223, | |
| "grad_norm": 0.2480962872505188, | |
| "learning_rate": 0.0007861134476396901, | |
| "loss": 1.8287, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.3771509389486918, | |
| "grad_norm": 0.22237135469913483, | |
| "learning_rate": 0.0007838586206969593, | |
| "loss": 1.8414, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.37872240119431133, | |
| "grad_norm": 0.2007599025964737, | |
| "learning_rate": 0.0007815952430577014, | |
| "loss": 1.9258, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.3802938634399309, | |
| "grad_norm": 0.2816416323184967, | |
| "learning_rate": 0.0007793233829018263, | |
| "loss": 1.8544, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.3818653256855504, | |
| "grad_norm": 0.1743992269039154, | |
| "learning_rate": 0.0007770431086647642, | |
| "loss": 1.8735, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.38343678793117, | |
| "grad_norm": 0.20147810876369476, | |
| "learning_rate": 0.000774754489035403, | |
| "loss": 1.8541, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.3850082501767895, | |
| "grad_norm": 0.19584012031555176, | |
| "learning_rate": 0.0007724575929540197, | |
| "loss": 1.821, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.38657971242240907, | |
| "grad_norm": 0.32170602679252625, | |
| "learning_rate": 0.0007701524896102037, | |
| "loss": 1.8277, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3881511746680286, | |
| "grad_norm": 0.16368649899959564, | |
| "learning_rate": 0.0007678392484407717, | |
| "loss": 1.8317, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.38972263691364817, | |
| "grad_norm": 0.17887534201145172, | |
| "learning_rate": 0.0007655179391276775, | |
| "loss": 1.8234, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3912940991592677, | |
| "grad_norm": 0.2335747331380844, | |
| "learning_rate": 0.0007631886315959121, | |
| "loss": 1.8789, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.39286556140488726, | |
| "grad_norm": 0.16466860473155975, | |
| "learning_rate": 0.0007608513960113975, | |
| "loss": 1.8777, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.3944370236505068, | |
| "grad_norm": 0.1995929628610611, | |
| "learning_rate": 0.000758506302778873, | |
| "loss": 1.8159, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.39600848589612636, | |
| "grad_norm": 0.28415942192077637, | |
| "learning_rate": 0.0007561534225397744, | |
| "loss": 1.8151, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.3975799481417459, | |
| "grad_norm": 0.15427584946155548, | |
| "learning_rate": 0.0007537928261701064, | |
| "loss": 1.8402, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.39915141038736546, | |
| "grad_norm": 0.211939737200737, | |
| "learning_rate": 0.0007514245847783069, | |
| "loss": 1.8554, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.400722872632985, | |
| "grad_norm": 0.1703938990831375, | |
| "learning_rate": 0.0007490487697031061, | |
| "loss": 1.8369, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.40229433487860455, | |
| "grad_norm": 0.19989508390426636, | |
| "learning_rate": 0.0007466654525113761, | |
| "loss": 1.8075, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.4038657971242241, | |
| "grad_norm": 0.3463574945926666, | |
| "learning_rate": 0.0007442747049959765, | |
| "loss": 1.8606, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.40543725936984365, | |
| "grad_norm": 0.2496725171804428, | |
| "learning_rate": 0.0007418765991735908, | |
| "loss": 1.8269, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.4070087216154632, | |
| "grad_norm": 0.29229408502578735, | |
| "learning_rate": 0.0007394712072825576, | |
| "loss": 1.8034, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.40858018386108275, | |
| "grad_norm": 0.2583361566066742, | |
| "learning_rate": 0.0007370586017806941, | |
| "loss": 1.8845, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4101516461067023, | |
| "grad_norm": 0.168927401304245, | |
| "learning_rate": 0.0007346388553431141, | |
| "loss": 1.8509, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.41172310835232184, | |
| "grad_norm": 0.24876132607460022, | |
| "learning_rate": 0.0007322120408600379, | |
| "loss": 1.9044, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.4132945705979414, | |
| "grad_norm": 0.16044846177101135, | |
| "learning_rate": 0.0007297782314345972, | |
| "loss": 1.8406, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.41486603284356094, | |
| "grad_norm": 0.18333998322486877, | |
| "learning_rate": 0.0007273375003806335, | |
| "loss": 1.8755, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.4164374950891805, | |
| "grad_norm": 0.2609647512435913, | |
| "learning_rate": 0.0007248899212204883, | |
| "loss": 1.8146, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.41800895733480004, | |
| "grad_norm": 0.20326748490333557, | |
| "learning_rate": 0.0007224355676827897, | |
| "loss": 1.8173, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.4195804195804196, | |
| "grad_norm": 0.32762306928634644, | |
| "learning_rate": 0.0007199745137002305, | |
| "loss": 1.892, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.42115188182603913, | |
| "grad_norm": 0.25635045766830444, | |
| "learning_rate": 0.000717506833407342, | |
| "loss": 1.8104, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.4227233440716587, | |
| "grad_norm": 0.29682666063308716, | |
| "learning_rate": 0.0007150326011382603, | |
| "loss": 1.8018, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.42429480631727823, | |
| "grad_norm": 0.22666814923286438, | |
| "learning_rate": 0.0007125518914244868, | |
| "loss": 1.8358, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.4258662685628978, | |
| "grad_norm": 0.26551052927970886, | |
| "learning_rate": 0.000710064778992644, | |
| "loss": 1.8332, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.4274377308085173, | |
| "grad_norm": 0.16310109198093414, | |
| "learning_rate": 0.0007075713387622236, | |
| "loss": 1.7835, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.4290091930541369, | |
| "grad_norm": 0.19358539581298828, | |
| "learning_rate": 0.0007050716458433305, | |
| "loss": 1.7956, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.4305806552997564, | |
| "grad_norm": 0.16724836826324463, | |
| "learning_rate": 0.0007025657755344197, | |
| "loss": 1.874, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.43215211754537597, | |
| "grad_norm": 0.2080153524875641, | |
| "learning_rate": 0.0007000538033200279, | |
| "loss": 1.8706, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.4337235797909955, | |
| "grad_norm": 0.1837550699710846, | |
| "learning_rate": 0.0006975358048685004, | |
| "loss": 1.8576, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.43529504203661507, | |
| "grad_norm": 0.26517677307128906, | |
| "learning_rate": 0.0006950118560297112, | |
| "loss": 1.8121, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.4368665042822346, | |
| "grad_norm": 0.2400379180908203, | |
| "learning_rate": 0.0006924820328327785, | |
| "loss": 1.7908, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.43843796652785416, | |
| "grad_norm": 0.1548314243555069, | |
| "learning_rate": 0.0006899464114837739, | |
| "loss": 1.8439, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.4400094287734737, | |
| "grad_norm": 0.1790788173675537, | |
| "learning_rate": 0.0006874050683634273, | |
| "loss": 1.8511, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.44158089101909326, | |
| "grad_norm": 0.2156343162059784, | |
| "learning_rate": 0.0006848580800248262, | |
| "loss": 1.8442, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.4431523532647128, | |
| "grad_norm": 0.24280230700969696, | |
| "learning_rate": 0.0006823055231911093, | |
| "loss": 1.8091, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.44472381551033235, | |
| "grad_norm": 0.18132703006267548, | |
| "learning_rate": 0.0006797474747531558, | |
| "loss": 1.8596, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.4462952777559519, | |
| "grad_norm": 0.2499558925628662, | |
| "learning_rate": 0.0006771840117672684, | |
| "loss": 1.7982, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.44786674000157145, | |
| "grad_norm": 0.14542344212532043, | |
| "learning_rate": 0.0006746152114528531, | |
| "loss": 1.8436, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.449438202247191, | |
| "grad_norm": 0.23537123203277588, | |
| "learning_rate": 0.0006720411511900927, | |
| "loss": 1.8342, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.45100966449281055, | |
| "grad_norm": 0.2291416972875595, | |
| "learning_rate": 0.000669461908517616, | |
| "loss": 1.815, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.4525811267384301, | |
| "grad_norm": 0.17745240032672882, | |
| "learning_rate": 0.0006668775611301611, | |
| "loss": 1.807, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.45415258898404964, | |
| "grad_norm": 0.2866009771823883, | |
| "learning_rate": 0.0006642881868762368, | |
| "loss": 1.8313, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 0.4557240512296692, | |
| "grad_norm": 0.23607899248600006, | |
| "learning_rate": 0.0006616938637557761, | |
| "loss": 1.8308, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.45729551347528874, | |
| "grad_norm": 0.25843170285224915, | |
| "learning_rate": 0.0006590946699177875, | |
| "loss": 1.8076, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 0.4588669757209083, | |
| "grad_norm": 0.2201550155878067, | |
| "learning_rate": 0.0006564906836580004, | |
| "loss": 1.7999, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.46043843796652784, | |
| "grad_norm": 0.2689286172389984, | |
| "learning_rate": 0.0006538819834165061, | |
| "loss": 1.8498, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 0.4620099002121474, | |
| "grad_norm": 0.19224600493907928, | |
| "learning_rate": 0.0006512686477753966, | |
| "loss": 1.8525, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.46358136245776693, | |
| "grad_norm": 0.18337461352348328, | |
| "learning_rate": 0.0006486507554563953, | |
| "loss": 1.8378, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.4651528247033865, | |
| "grad_norm": 0.22880521416664124, | |
| "learning_rate": 0.0006460283853184879, | |
| "loss": 1.8124, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.46672428694900603, | |
| "grad_norm": 0.3217616081237793, | |
| "learning_rate": 0.0006434016163555452, | |
| "loss": 1.8509, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 0.4682957491946256, | |
| "grad_norm": 0.1924976408481598, | |
| "learning_rate": 0.000640770527693944, | |
| "loss": 1.8106, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.4698672114402451, | |
| "grad_norm": 0.24654747545719147, | |
| "learning_rate": 0.0006381351985901842, | |
| "loss": 1.79, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 0.4714386736858647, | |
| "grad_norm": 0.18487554788589478, | |
| "learning_rate": 0.0006354957084285007, | |
| "loss": 1.7589, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4730101359314842, | |
| "grad_norm": 0.19454938173294067, | |
| "learning_rate": 0.0006328521367184721, | |
| "loss": 1.7756, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 0.47458159817710377, | |
| "grad_norm": 0.2697226107120514, | |
| "learning_rate": 0.000630204563092626, | |
| "loss": 1.8154, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.4761530604227233, | |
| "grad_norm": 0.2261214405298233, | |
| "learning_rate": 0.0006275530673040401, | |
| "loss": 1.7989, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 0.47772452266834287, | |
| "grad_norm": 0.29914212226867676, | |
| "learning_rate": 0.0006248977292239395, | |
| "loss": 1.7528, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.47929598491396247, | |
| "grad_norm": 0.21949878334999084, | |
| "learning_rate": 0.0006222386288392914, | |
| "loss": 1.7694, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.480867447159582, | |
| "grad_norm": 0.22492919862270355, | |
| "learning_rate": 0.0006195758462503947, | |
| "loss": 1.8911, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.48243890940520157, | |
| "grad_norm": 0.1728420853614807, | |
| "learning_rate": 0.0006169094616684678, | |
| "loss": 1.7795, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 0.4840103716508211, | |
| "grad_norm": 0.1900889128446579, | |
| "learning_rate": 0.0006142395554132324, | |
| "loss": 1.8095, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.48558183389644066, | |
| "grad_norm": 0.2331051081418991, | |
| "learning_rate": 0.0006115662079104937, | |
| "loss": 1.8101, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 0.4871532961420602, | |
| "grad_norm": 0.28242024779319763, | |
| "learning_rate": 0.000608889499689718, | |
| "loss": 1.8349, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.48872475838767976, | |
| "grad_norm": 0.2434227615594864, | |
| "learning_rate": 0.0006062095113816069, | |
| "loss": 1.7964, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 0.4902962206332993, | |
| "grad_norm": 0.2203354686498642, | |
| "learning_rate": 0.0006035263237156676, | |
| "loss": 1.7928, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.49186768287891885, | |
| "grad_norm": 0.2670794427394867, | |
| "learning_rate": 0.0006008400175177827, | |
| "loss": 1.8127, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.4934391451245384, | |
| "grad_norm": 0.3301917016506195, | |
| "learning_rate": 0.0005981506737077743, | |
| "loss": 1.8027, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.49501060737015795, | |
| "grad_norm": 0.20628106594085693, | |
| "learning_rate": 0.0005954583732969666, | |
| "loss": 1.7938, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.4965820696157775, | |
| "grad_norm": 0.16303130984306335, | |
| "learning_rate": 0.000592763197385746, | |
| "loss": 1.8486, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.49815353186139705, | |
| "grad_norm": 0.15457695722579956, | |
| "learning_rate": 0.0005900652271611175, | |
| "loss": 1.773, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 0.4997249941070166, | |
| "grad_norm": 0.1589762419462204, | |
| "learning_rate": 0.0005873645438942595, | |
| "loss": 1.7507, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5012964563526361, | |
| "grad_norm": 0.19667518138885498, | |
| "learning_rate": 0.000584661228938076, | |
| "loss": 1.8197, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 0.5028679185982556, | |
| "grad_norm": 0.40122613310813904, | |
| "learning_rate": 0.000581955363724745, | |
| "loss": 1.8094, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5044393808438752, | |
| "grad_norm": 0.19897930324077606, | |
| "learning_rate": 0.0005792470297632666, | |
| "loss": 1.7865, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 0.5060108430894947, | |
| "grad_norm": 0.3172074854373932, | |
| "learning_rate": 0.0005765363086370068, | |
| "loss": 1.7962, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.5075823053351143, | |
| "grad_norm": 0.20447920262813568, | |
| "learning_rate": 0.0005738232820012407, | |
| "loss": 1.7691, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 0.5091537675807338, | |
| "grad_norm": 0.16863569617271423, | |
| "learning_rate": 0.0005711080315806921, | |
| "loss": 1.8213, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.5107252298263534, | |
| "grad_norm": 0.29164841771125793, | |
| "learning_rate": 0.0005683906391670727, | |
| "loss": 1.7405, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.5122966920719729, | |
| "grad_norm": 0.1419832408428192, | |
| "learning_rate": 0.0005656711866166167, | |
| "loss": 1.8176, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.5138681543175925, | |
| "grad_norm": 0.1718250811100006, | |
| "learning_rate": 0.0005629497558476167, | |
| "loss": 1.8431, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 0.515439616563212, | |
| "grad_norm": 0.14950774610042572, | |
| "learning_rate": 0.0005602264288379551, | |
| "loss": 1.8238, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.5170110788088316, | |
| "grad_norm": 0.204507514834404, | |
| "learning_rate": 0.0005575012876226347, | |
| "loss": 1.831, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 0.5185825410544511, | |
| "grad_norm": 0.16311819851398468, | |
| "learning_rate": 0.0005547744142913084, | |
| "loss": 1.7805, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.5201540033000707, | |
| "grad_norm": 0.17227725684642792, | |
| "learning_rate": 0.0005520458909858048, | |
| "loss": 1.8119, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 0.5217254655456902, | |
| "grad_norm": 0.22001691162586212, | |
| "learning_rate": 0.0005493157998976559, | |
| "loss": 1.7803, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.5232969277913098, | |
| "grad_norm": 0.3537323474884033, | |
| "learning_rate": 0.0005465842232656194, | |
| "loss": 1.8142, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 0.5248683900369293, | |
| "grad_norm": 0.7048746347427368, | |
| "learning_rate": 0.0005438512433732023, | |
| "loss": 1.7881, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.5264398522825489, | |
| "grad_norm": 0.24534624814987183, | |
| "learning_rate": 0.0005411169425461822, | |
| "loss": 1.8228, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.5280113145281684, | |
| "grad_norm": 0.16271162033081055, | |
| "learning_rate": 0.0005383814031501272, | |
| "loss": 1.8155, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.529582776773788, | |
| "grad_norm": 0.3233349323272705, | |
| "learning_rate": 0.0005356447075879153, | |
| "loss": 1.8067, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 0.5311542390194075, | |
| "grad_norm": 0.20079372823238373, | |
| "learning_rate": 0.0005329069382972513, | |
| "loss": 1.8035, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.5327257012650272, | |
| "grad_norm": 0.17062976956367493, | |
| "learning_rate": 0.0005301681777481846, | |
| "loss": 1.7636, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 0.5342971635106467, | |
| "grad_norm": 0.24763086438179016, | |
| "learning_rate": 0.0005274285084406234, | |
| "loss": 1.8238, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.5358686257562663, | |
| "grad_norm": 0.18531553447246552, | |
| "learning_rate": 0.0005246880129018515, | |
| "loss": 1.7826, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 0.5374400880018858, | |
| "grad_norm": 0.13745392858982086, | |
| "learning_rate": 0.0005219467736840409, | |
| "loss": 1.7463, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.5390115502475054, | |
| "grad_norm": 0.2211742103099823, | |
| "learning_rate": 0.0005192048733617654, | |
| "loss": 1.775, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 0.5405830124931249, | |
| "grad_norm": 0.1478564590215683, | |
| "learning_rate": 0.0005164623945295136, | |
| "loss": 1.7752, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.5421544747387445, | |
| "grad_norm": 0.19443662464618683, | |
| "learning_rate": 0.0005137194197992001, | |
| "loss": 1.7751, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.543725936984364, | |
| "grad_norm": 0.27985528111457825, | |
| "learning_rate": 0.0005109760317976782, | |
| "loss": 1.7892, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.5452973992299835, | |
| "grad_norm": 0.3474476635456085, | |
| "learning_rate": 0.0005082323131642496, | |
| "loss": 1.7856, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 0.5468688614756031, | |
| "grad_norm": 0.19026850163936615, | |
| "learning_rate": 0.0005054883465481761, | |
| "loss": 1.7514, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.5484403237212226, | |
| "grad_norm": 0.15917906165122986, | |
| "learning_rate": 0.0005027442146061889, | |
| "loss": 1.8218, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 0.5500117859668422, | |
| "grad_norm": 0.1620151400566101, | |
| "learning_rate": 0.0005, | |
| "loss": 1.7814, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5515832482124617, | |
| "grad_norm": 0.19481198489665985, | |
| "learning_rate": 0.0004972557853938111, | |
| "loss": 1.8251, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 0.5531547104580813, | |
| "grad_norm": 0.20308136940002441, | |
| "learning_rate": 0.000494511653451824, | |
| "loss": 1.8222, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5547261727037008, | |
| "grad_norm": 0.1648699939250946, | |
| "learning_rate": 0.0004917676868357503, | |
| "loss": 1.81, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 0.5562976349493204, | |
| "grad_norm": 0.2805931568145752, | |
| "learning_rate": 0.0004890239682023217, | |
| "loss": 1.7743, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5578690971949399, | |
| "grad_norm": 0.22301295399665833, | |
| "learning_rate": 0.00048628058020080007, | |
| "loss": 1.7481, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.5594405594405595, | |
| "grad_norm": 0.18746043741703033, | |
| "learning_rate": 0.0004835376054704866, | |
| "loss": 1.7545, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.561012021686179, | |
| "grad_norm": 0.22121259570121765, | |
| "learning_rate": 0.00048079512663823474, | |
| "loss": 1.8134, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 0.5625834839317986, | |
| "grad_norm": 0.22781746089458466, | |
| "learning_rate": 0.0004780532263159592, | |
| "loss": 1.8077, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5641549461774181, | |
| "grad_norm": 0.26571694016456604, | |
| "learning_rate": 0.00047531198709814857, | |
| "loss": 1.8487, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 0.5657264084230377, | |
| "grad_norm": 0.1296350359916687, | |
| "learning_rate": 0.00047257149155937667, | |
| "loss": 1.7883, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5672978706686572, | |
| "grad_norm": 0.18556547164916992, | |
| "learning_rate": 0.00046983182225181555, | |
| "loss": 1.7306, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 0.5688693329142768, | |
| "grad_norm": 0.1794668436050415, | |
| "learning_rate": 0.00046709306170274867, | |
| "loss": 1.8121, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5704407951598963, | |
| "grad_norm": 0.14622707664966583, | |
| "learning_rate": 0.0004643552924120847, | |
| "loss": 1.7723, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 0.5720122574055159, | |
| "grad_norm": 0.2089068442583084, | |
| "learning_rate": 0.00046161859684987303, | |
| "loss": 1.792, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5735837196511354, | |
| "grad_norm": 0.18128232657909393, | |
| "learning_rate": 0.000458883057453818, | |
| "loss": 1.7802, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.575155181896755, | |
| "grad_norm": 0.3529801666736603, | |
| "learning_rate": 0.00045614875662679797, | |
| "loss": 1.7848, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.5767266441423745, | |
| "grad_norm": 0.1691349595785141, | |
| "learning_rate": 0.00045341577673438073, | |
| "loss": 1.7563, | |
| "step": 1835 | |
| }, | |
| { | |
| "epoch": 0.5782981063879941, | |
| "grad_norm": 0.2139570415019989, | |
| "learning_rate": 0.00045068420010234417, | |
| "loss": 1.7557, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5798695686336136, | |
| "grad_norm": 0.3797873556613922, | |
| "learning_rate": 0.00044795410901419527, | |
| "loss": 1.8131, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 0.5814410308792332, | |
| "grad_norm": 0.20959897339344025, | |
| "learning_rate": 0.00044522558570869177, | |
| "loss": 1.7783, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5830124931248527, | |
| "grad_norm": 0.21909403800964355, | |
| "learning_rate": 0.0004424987123773653, | |
| "loss": 1.7801, | |
| "step": 1855 | |
| }, | |
| { | |
| "epoch": 0.5845839553704723, | |
| "grad_norm": 0.18797878921031952, | |
| "learning_rate": 0.0004397735711620451, | |
| "loss": 1.7918, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5861554176160918, | |
| "grad_norm": 0.15942728519439697, | |
| "learning_rate": 0.0004370502441523834, | |
| "loss": 1.7746, | |
| "step": 1865 | |
| }, | |
| { | |
| "epoch": 0.5877268798617113, | |
| "grad_norm": 0.14537079632282257, | |
| "learning_rate": 0.0004343288133833835, | |
| "loss": 1.7475, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5892983421073309, | |
| "grad_norm": 0.2374078631401062, | |
| "learning_rate": 0.0004316093608329275, | |
| "loss": 1.802, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.5908698043529504, | |
| "grad_norm": 0.16053801774978638, | |
| "learning_rate": 0.000428891968419308, | |
| "loss": 1.7559, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.59244126659857, | |
| "grad_norm": 0.15984462201595306, | |
| "learning_rate": 0.00042617671799875947, | |
| "loss": 1.7853, | |
| "step": 1885 | |
| }, | |
| { | |
| "epoch": 0.5940127288441895, | |
| "grad_norm": 0.1509033590555191, | |
| "learning_rate": 0.00042346369136299334, | |
| "loss": 1.78, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5955841910898091, | |
| "grad_norm": 0.21747443079948425, | |
| "learning_rate": 0.0004207529702367335, | |
| "loss": 1.7661, | |
| "step": 1895 | |
| }, | |
| { | |
| "epoch": 0.5971556533354286, | |
| "grad_norm": 0.23470966517925262, | |
| "learning_rate": 0.00041804463627525504, | |
| "loss": 1.7534, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.5987271155810482, | |
| "grad_norm": 0.1689888834953308, | |
| "learning_rate": 0.00041533877106192407, | |
| "loss": 1.7705, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 0.6002985778266677, | |
| "grad_norm": 0.1531875878572464, | |
| "learning_rate": 0.0004126354561057404, | |
| "loss": 1.7506, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.6018700400722873, | |
| "grad_norm": 0.22668935358524323, | |
| "learning_rate": 0.00040993477283888266, | |
| "loss": 1.781, | |
| "step": 1915 | |
| }, | |
| { | |
| "epoch": 0.6034415023179068, | |
| "grad_norm": 0.15343786776065826, | |
| "learning_rate": 0.0004072368026142541, | |
| "loss": 1.7676, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.6050129645635264, | |
| "grad_norm": 0.19431781768798828, | |
| "learning_rate": 0.0004045416267030335, | |
| "loss": 1.7892, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.6065844268091459, | |
| "grad_norm": 0.1378578096628189, | |
| "learning_rate": 0.00040184932629222574, | |
| "loss": 1.7712, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.6081558890547655, | |
| "grad_norm": 0.19014595448970795, | |
| "learning_rate": 0.0003991599824822174, | |
| "loss": 1.8178, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 0.609727351300385, | |
| "grad_norm": 0.21545611321926117, | |
| "learning_rate": 0.00039647367628433246, | |
| "loss": 1.8122, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.6112988135460046, | |
| "grad_norm": 0.12300246208906174, | |
| "learning_rate": 0.0003937904886183933, | |
| "loss": 1.7544, | |
| "step": 1945 | |
| }, | |
| { | |
| "epoch": 0.6128702757916241, | |
| "grad_norm": 0.18749244511127472, | |
| "learning_rate": 0.00039111050031028193, | |
| "loss": 1.7679, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.6144417380372437, | |
| "grad_norm": 0.20615063607692719, | |
| "learning_rate": 0.00038843379208950617, | |
| "loss": 1.7784, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 0.6160132002828632, | |
| "grad_norm": 0.1621728390455246, | |
| "learning_rate": 0.0003857604445867677, | |
| "loss": 1.7688, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.6175846625284828, | |
| "grad_norm": 0.2304105907678604, | |
| "learning_rate": 0.00038309053833153234, | |
| "loss": 1.8177, | |
| "step": 1965 | |
| }, | |
| { | |
| "epoch": 0.6191561247741023, | |
| "grad_norm": 0.19086576998233795, | |
| "learning_rate": 0.0003804241537496055, | |
| "loss": 1.7566, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.6207275870197219, | |
| "grad_norm": 0.21728602051734924, | |
| "learning_rate": 0.00037776137116070867, | |
| "loss": 1.7514, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.6222990492653414, | |
| "grad_norm": 0.1872587502002716, | |
| "learning_rate": 0.0003751022707760605, | |
| "loss": 1.8045, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.623870511510961, | |
| "grad_norm": 0.21256718039512634, | |
| "learning_rate": 0.00037244693269596, | |
| "loss": 1.7916, | |
| "step": 1985 | |
| }, | |
| { | |
| "epoch": 0.6254419737565805, | |
| "grad_norm": 0.2095334529876709, | |
| "learning_rate": 0.00036979543690737407, | |
| "loss": 1.7581, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.6270134360022, | |
| "grad_norm": 0.22193801403045654, | |
| "learning_rate": 0.00036714786328152804, | |
| "loss": 1.779, | |
| "step": 1995 | |
| }, | |
| { | |
| "epoch": 0.6285848982478196, | |
| "grad_norm": 0.16215133666992188, | |
| "learning_rate": 0.00036450429157149934, | |
| "loss": 1.7565, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.6301563604934391, | |
| "grad_norm": 0.17526470124721527, | |
| "learning_rate": 0.00036186480140981583, | |
| "loss": 1.789, | |
| "step": 2005 | |
| }, | |
| { | |
| "epoch": 0.6317278227390587, | |
| "grad_norm": 0.3180091679096222, | |
| "learning_rate": 0.00035922947230605605, | |
| "loss": 1.7617, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.6332992849846782, | |
| "grad_norm": 0.11836399137973785, | |
| "learning_rate": 0.00035659838364445503, | |
| "loss": 1.8111, | |
| "step": 2015 | |
| }, | |
| { | |
| "epoch": 0.6348707472302978, | |
| "grad_norm": 0.15318524837493896, | |
| "learning_rate": 0.0003539716146815122, | |
| "loss": 1.7409, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.6364422094759173, | |
| "grad_norm": 0.13768509030342102, | |
| "learning_rate": 0.0003513492445436048, | |
| "loss": 1.7733, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.6380136717215369, | |
| "grad_norm": 0.16969747841358185, | |
| "learning_rate": 0.0003487313522246036, | |
| "loss": 1.8085, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.6395851339671564, | |
| "grad_norm": 0.15154893696308136, | |
| "learning_rate": 0.00034611801658349393, | |
| "loss": 1.7053, | |
| "step": 2035 | |
| }, | |
| { | |
| "epoch": 0.641156596212776, | |
| "grad_norm": 0.16899384558200836, | |
| "learning_rate": 0.0003435093163419998, | |
| "loss": 1.8229, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.6427280584583955, | |
| "grad_norm": 0.23929907381534576, | |
| "learning_rate": 0.00034090533008221234, | |
| "loss": 1.7719, | |
| "step": 2045 | |
| }, | |
| { | |
| "epoch": 0.6442995207040151, | |
| "grad_norm": 0.12470386922359467, | |
| "learning_rate": 0.00033830613624422377, | |
| "loss": 1.8131, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.6458709829496346, | |
| "grad_norm": 0.15960881114006042, | |
| "learning_rate": 0.00033571181312376335, | |
| "loss": 1.7428, | |
| "step": 2055 | |
| }, | |
| { | |
| "epoch": 0.6474424451952542, | |
| "grad_norm": 0.2688332796096802, | |
| "learning_rate": 0.00033312243886983906, | |
| "loss": 1.7652, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.6490139074408737, | |
| "grad_norm": 0.20620514452457428, | |
| "learning_rate": 0.00033053809148238423, | |
| "loss": 1.7579, | |
| "step": 2065 | |
| }, | |
| { | |
| "epoch": 0.6505853696864933, | |
| "grad_norm": 0.18862473964691162, | |
| "learning_rate": 0.0003279588488099073, | |
| "loss": 1.7364, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.6521568319321128, | |
| "grad_norm": 0.155025452375412, | |
| "learning_rate": 0.0003253847885471469, | |
| "loss": 1.7118, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.6537282941777324, | |
| "grad_norm": 0.27940723299980164, | |
| "learning_rate": 0.0003228159882327317, | |
| "loss": 1.6794, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.6552997564233519, | |
| "grad_norm": 0.19810332357883453, | |
| "learning_rate": 0.0003202525252468443, | |
| "loss": 1.7879, | |
| "step": 2085 | |
| }, | |
| { | |
| "epoch": 0.6568712186689715, | |
| "grad_norm": 0.1640649139881134, | |
| "learning_rate": 0.00031769447680889064, | |
| "loss": 1.7904, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.658442680914591, | |
| "grad_norm": 0.2099301815032959, | |
| "learning_rate": 0.00031514191997517385, | |
| "loss": 1.7922, | |
| "step": 2095 | |
| }, | |
| { | |
| "epoch": 0.6600141431602106, | |
| "grad_norm": 0.19281212985515594, | |
| "learning_rate": 0.0003125949316365728, | |
| "loss": 1.7957, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6615856054058301, | |
| "grad_norm": 0.20853348076343536, | |
| "learning_rate": 0.00031005358851622633, | |
| "loss": 1.7531, | |
| "step": 2105 | |
| }, | |
| { | |
| "epoch": 0.6631570676514497, | |
| "grad_norm": 0.16896933317184448, | |
| "learning_rate": 0.00030751796716722157, | |
| "loss": 1.7632, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6647285298970692, | |
| "grad_norm": 0.6465707421302795, | |
| "learning_rate": 0.0003049881439702888, | |
| "loss": 1.7804, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 0.6662999921426888, | |
| "grad_norm": 0.1943897157907486, | |
| "learning_rate": 0.00030246419513149967, | |
| "loss": 1.7897, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6678714543883083, | |
| "grad_norm": 0.20024192333221436, | |
| "learning_rate": 0.00029994619667997216, | |
| "loss": 1.734, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.6694429166339279, | |
| "grad_norm": 0.18751543760299683, | |
| "learning_rate": 0.0002974342244655804, | |
| "loss": 1.7113, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.6710143788795474, | |
| "grad_norm": 0.1718306988477707, | |
| "learning_rate": 0.0002949283541566694, | |
| "loss": 1.6794, | |
| "step": 2135 | |
| }, | |
| { | |
| "epoch": 0.672585841125167, | |
| "grad_norm": 0.18411104381084442, | |
| "learning_rate": 0.0002924286612377764, | |
| "loss": 1.7223, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6741573033707865, | |
| "grad_norm": 0.14647985994815826, | |
| "learning_rate": 0.0002899352210073562, | |
| "loss": 1.7483, | |
| "step": 2145 | |
| }, | |
| { | |
| "epoch": 0.675728765616406, | |
| "grad_norm": 0.27714547514915466, | |
| "learning_rate": 0.0002874481085755133, | |
| "loss": 1.7302, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6773002278620256, | |
| "grad_norm": 0.32130590081214905, | |
| "learning_rate": 0.0002849673988617399, | |
| "loss": 1.812, | |
| "step": 2155 | |
| }, | |
| { | |
| "epoch": 0.6788716901076451, | |
| "grad_norm": 0.17637619376182556, | |
| "learning_rate": 0.000282493166592658, | |
| "loss": 1.729, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6804431523532647, | |
| "grad_norm": 0.19782552123069763, | |
| "learning_rate": 0.0002800254862997695, | |
| "loss": 1.7661, | |
| "step": 2165 | |
| }, | |
| { | |
| "epoch": 0.6820146145988842, | |
| "grad_norm": 0.21681202948093414, | |
| "learning_rate": 0.0002775644323172105, | |
| "loss": 1.7431, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6835860768445038, | |
| "grad_norm": 0.21317927539348602, | |
| "learning_rate": 0.0002751100787795118, | |
| "loss": 1.7573, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.6851575390901233, | |
| "grad_norm": 0.2029709368944168, | |
| "learning_rate": 0.0002726624996193665, | |
| "loss": 1.776, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6867290013357429, | |
| "grad_norm": 0.15296722948551178, | |
| "learning_rate": 0.0002702217685654028, | |
| "loss": 1.7741, | |
| "step": 2185 | |
| }, | |
| { | |
| "epoch": 0.6883004635813624, | |
| "grad_norm": 0.14518578350543976, | |
| "learning_rate": 0.00026778795913996224, | |
| "loss": 1.7665, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.689871925826982, | |
| "grad_norm": 0.17168502509593964, | |
| "learning_rate": 0.0002653611446568861, | |
| "loss": 1.7154, | |
| "step": 2195 | |
| }, | |
| { | |
| "epoch": 0.6914433880726015, | |
| "grad_norm": 0.1342601329088211, | |
| "learning_rate": 0.00026294139821930593, | |
| "loss": 1.7174, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.6930148503182211, | |
| "grad_norm": 0.1369861364364624, | |
| "learning_rate": 0.00026052879271744263, | |
| "loss": 1.753, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 0.6945863125638406, | |
| "grad_norm": 0.1410820037126541, | |
| "learning_rate": 0.00025812340082640936, | |
| "loss": 1.6835, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6961577748094602, | |
| "grad_norm": 0.16122332215309143, | |
| "learning_rate": 0.00025572529500402365, | |
| "loss": 1.7404, | |
| "step": 2215 | |
| }, | |
| { | |
| "epoch": 0.6977292370550797, | |
| "grad_norm": 0.19913320243358612, | |
| "learning_rate": 0.00025333454748862396, | |
| "loss": 1.7498, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6993006993006993, | |
| "grad_norm": 0.14058250188827515, | |
| "learning_rate": 0.0002509512302968941, | |
| "loss": 1.7571, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.7008721615463188, | |
| "grad_norm": 0.2978239059448242, | |
| "learning_rate": 0.0002485754152216931, | |
| "loss": 1.7602, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.7024436237919384, | |
| "grad_norm": 0.13628768920898438, | |
| "learning_rate": 0.0002462071738298936, | |
| "loss": 1.7331, | |
| "step": 2235 | |
| }, | |
| { | |
| "epoch": 0.7040150860375579, | |
| "grad_norm": 0.16833730041980743, | |
| "learning_rate": 0.00024384657746022564, | |
| "loss": 1.7697, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.7055865482831775, | |
| "grad_norm": 0.19926880300045013, | |
| "learning_rate": 0.00024149369722112717, | |
| "loss": 1.7079, | |
| "step": 2245 | |
| }, | |
| { | |
| "epoch": 0.707158010528797, | |
| "grad_norm": 0.13357147574424744, | |
| "learning_rate": 0.00023914860398860255, | |
| "loss": 1.702, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.7087294727744166, | |
| "grad_norm": 0.1692400723695755, | |
| "learning_rate": 0.00023681136840408786, | |
| "loss": 1.7342, | |
| "step": 2255 | |
| }, | |
| { | |
| "epoch": 0.7103009350200361, | |
| "grad_norm": 0.1352614313364029, | |
| "learning_rate": 0.00023448206087232267, | |
| "loss": 1.7437, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.7118723972656557, | |
| "grad_norm": 0.17008154094219208, | |
| "learning_rate": 0.00023216075155922845, | |
| "loss": 1.6892, | |
| "step": 2265 | |
| }, | |
| { | |
| "epoch": 0.7134438595112752, | |
| "grad_norm": 0.17110054194927216, | |
| "learning_rate": 0.0002298475103897964, | |
| "loss": 1.7326, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.7150153217568948, | |
| "grad_norm": 0.16124136745929718, | |
| "learning_rate": 0.0002275424070459803, | |
| "loss": 1.766, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.7165867840025143, | |
| "grad_norm": 0.14922770857810974, | |
| "learning_rate": 0.000225245510964597, | |
| "loss": 1.7667, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.7181582462481338, | |
| "grad_norm": 0.17472444474697113, | |
| "learning_rate": 0.000222956891335236, | |
| "loss": 1.7224, | |
| "step": 2285 | |
| }, | |
| { | |
| "epoch": 0.7197297084937534, | |
| "grad_norm": 0.14927974343299866, | |
| "learning_rate": 0.00022067661709817383, | |
| "loss": 1.7444, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.7213011707393729, | |
| "grad_norm": 0.1595926582813263, | |
| "learning_rate": 0.00021840475694229888, | |
| "loss": 1.7501, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 0.7228726329849925, | |
| "grad_norm": 0.1755470633506775, | |
| "learning_rate": 0.00021614137930304068, | |
| "loss": 1.7742, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.724444095230612, | |
| "grad_norm": 0.15455584228038788, | |
| "learning_rate": 0.00021388655236030985, | |
| "loss": 1.7152, | |
| "step": 2305 | |
| }, | |
| { | |
| "epoch": 0.7260155574762316, | |
| "grad_norm": 0.13549718260765076, | |
| "learning_rate": 0.00021164034403644338, | |
| "loss": 1.7603, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.7275870197218511, | |
| "grad_norm": 0.20018717646598816, | |
| "learning_rate": 0.00020940282199415915, | |
| "loss": 1.7403, | |
| "step": 2315 | |
| }, | |
| { | |
| "epoch": 0.7291584819674707, | |
| "grad_norm": 0.14150027930736542, | |
| "learning_rate": 0.00020717405363451696, | |
| "loss": 1.7578, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.7307299442130902, | |
| "grad_norm": 0.17683018743991852, | |
| "learning_rate": 0.00020495410609488912, | |
| "loss": 1.7105, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.7323014064587098, | |
| "grad_norm": 0.1376308798789978, | |
| "learning_rate": 0.00020274304624693778, | |
| "loss": 1.6991, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.7338728687043293, | |
| "grad_norm": 0.2307780683040619, | |
| "learning_rate": 0.0002005409406946, | |
| "loss": 1.7478, | |
| "step": 2335 | |
| }, | |
| { | |
| "epoch": 0.7354443309499489, | |
| "grad_norm": 0.22559094429016113, | |
| "learning_rate": 0.00019834785577208192, | |
| "loss": 1.7321, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.7370157931955684, | |
| "grad_norm": 0.2081470936536789, | |
| "learning_rate": 0.00019616385754186078, | |
| "loss": 1.7659, | |
| "step": 2345 | |
| }, | |
| { | |
| "epoch": 0.738587255441188, | |
| "grad_norm": 0.17917020618915558, | |
| "learning_rate": 0.00019398901179269474, | |
| "loss": 1.7489, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.7401587176868075, | |
| "grad_norm": 0.1390395164489746, | |
| "learning_rate": 0.00019182338403764038, | |
| "loss": 1.7142, | |
| "step": 2355 | |
| }, | |
| { | |
| "epoch": 0.7417301799324271, | |
| "grad_norm": 0.13578888773918152, | |
| "learning_rate": 0.00018966703951208048, | |
| "loss": 1.7468, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.7433016421780466, | |
| "grad_norm": 0.1478307545185089, | |
| "learning_rate": 0.00018752004317175832, | |
| "loss": 1.7042, | |
| "step": 2365 | |
| }, | |
| { | |
| "epoch": 0.7448731044236663, | |
| "grad_norm": 0.1503387987613678, | |
| "learning_rate": 0.00018538245969082056, | |
| "loss": 1.7176, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.7464445666692858, | |
| "grad_norm": 0.15558657050132751, | |
| "learning_rate": 0.00018325435345986995, | |
| "loss": 1.7821, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.7480160289149054, | |
| "grad_norm": 0.14257632195949554, | |
| "learning_rate": 0.0001811357885840254, | |
| "loss": 1.7148, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.7495874911605249, | |
| "grad_norm": 0.13766352832317352, | |
| "learning_rate": 0.00017902682888099026, | |
| "loss": 1.7506, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 0.7511589534061445, | |
| "grad_norm": 0.16231706738471985, | |
| "learning_rate": 0.00017692753787913057, | |
| "loss": 1.7785, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.752730415651764, | |
| "grad_norm": 0.15460623800754547, | |
| "learning_rate": 0.00017483797881556173, | |
| "loss": 1.7757, | |
| "step": 2395 | |
| }, | |
| { | |
| "epoch": 0.7543018778973836, | |
| "grad_norm": 0.15915700793266296, | |
| "learning_rate": 0.000172758214634243, | |
| "loss": 1.6892, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.7558733401430031, | |
| "grad_norm": 0.14042919874191284, | |
| "learning_rate": 0.0001706883079840812, | |
| "loss": 1.7892, | |
| "step": 2405 | |
| }, | |
| { | |
| "epoch": 0.7574448023886227, | |
| "grad_norm": 0.23500895500183105, | |
| "learning_rate": 0.00016862832121704435, | |
| "loss": 1.7211, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.7590162646342422, | |
| "grad_norm": 0.20954306423664093, | |
| "learning_rate": 0.00016657831638628297, | |
| "loss": 1.7364, | |
| "step": 2415 | |
| }, | |
| { | |
| "epoch": 0.7605877268798618, | |
| "grad_norm": 0.18037594854831696, | |
| "learning_rate": 0.00016453835524426086, | |
| "loss": 1.7445, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.7621591891254813, | |
| "grad_norm": 0.1708739697933197, | |
| "learning_rate": 0.00016250849924089484, | |
| "loss": 1.7493, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.7637306513711009, | |
| "grad_norm": 0.16356390714645386, | |
| "learning_rate": 0.00016048880952170374, | |
| "loss": 1.7218, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.7653021136167204, | |
| "grad_norm": 0.1173071339726448, | |
| "learning_rate": 0.00015847934692596688, | |
| "loss": 1.7069, | |
| "step": 2435 | |
| }, | |
| { | |
| "epoch": 0.76687357586234, | |
| "grad_norm": 0.15243308246135712, | |
| "learning_rate": 0.00015648017198489106, | |
| "loss": 1.7909, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.7684450381079595, | |
| "grad_norm": 0.12692369520664215, | |
| "learning_rate": 0.00015449134491978683, | |
| "loss": 1.7751, | |
| "step": 2445 | |
| }, | |
| { | |
| "epoch": 0.770016500353579, | |
| "grad_norm": 0.13145235180854797, | |
| "learning_rate": 0.00015251292564025527, | |
| "loss": 1.76, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7715879625991986, | |
| "grad_norm": 0.12512874603271484, | |
| "learning_rate": 0.00015054497374238275, | |
| "loss": 1.7219, | |
| "step": 2455 | |
| }, | |
| { | |
| "epoch": 0.7731594248448181, | |
| "grad_norm": 0.1528131067752838, | |
| "learning_rate": 0.0001485875485069456, | |
| "loss": 1.7519, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7747308870904377, | |
| "grad_norm": 0.213288813829422, | |
| "learning_rate": 0.00014664070889762492, | |
| "loss": 1.7176, | |
| "step": 2465 | |
| }, | |
| { | |
| "epoch": 0.7763023493360572, | |
| "grad_norm": 0.12732981145381927, | |
| "learning_rate": 0.00014470451355923025, | |
| "loss": 1.7407, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7778738115816768, | |
| "grad_norm": 0.13688194751739502, | |
| "learning_rate": 0.00014277902081593252, | |
| "loss": 1.7018, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.7794452738272963, | |
| "grad_norm": 0.1379719078540802, | |
| "learning_rate": 0.00014086428866950744, | |
| "loss": 1.7401, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7810167360729159, | |
| "grad_norm": 0.15917198359966278, | |
| "learning_rate": 0.00013896037479758878, | |
| "loss": 1.7188, | |
| "step": 2485 | |
| }, | |
| { | |
| "epoch": 0.7825881983185354, | |
| "grad_norm": 0.16691961884498596, | |
| "learning_rate": 0.00013706733655193055, | |
| "loss": 1.6855, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.784159660564155, | |
| "grad_norm": 0.11223277449607849, | |
| "learning_rate": 0.0001351852309566788, | |
| "loss": 1.7897, | |
| "step": 2495 | |
| }, | |
| { | |
| "epoch": 0.7857311228097745, | |
| "grad_norm": 0.13406723737716675, | |
| "learning_rate": 0.00013331411470665505, | |
| "loss": 1.7386, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7873025850553941, | |
| "grad_norm": 0.2705506980419159, | |
| "learning_rate": 0.0001314540441656476, | |
| "loss": 1.6985, | |
| "step": 2505 | |
| }, | |
| { | |
| "epoch": 0.7888740473010136, | |
| "grad_norm": 0.22507880628108978, | |
| "learning_rate": 0.00012960507536471428, | |
| "loss": 1.721, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7904455095466332, | |
| "grad_norm": 0.1900377720594406, | |
| "learning_rate": 0.0001277672640004936, | |
| "loss": 1.7351, | |
| "step": 2515 | |
| }, | |
| { | |
| "epoch": 0.7920169717922527, | |
| "grad_norm": 0.17875802516937256, | |
| "learning_rate": 0.0001259406654335285, | |
| "loss": 1.7385, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7935884340378723, | |
| "grad_norm": 0.23769760131835938, | |
| "learning_rate": 0.0001241253346865972, | |
| "loss": 1.7105, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.7951598962834918, | |
| "grad_norm": 0.15305249392986298, | |
| "learning_rate": 0.000122321326443057, | |
| "loss": 1.7535, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7967313585291114, | |
| "grad_norm": 0.13712617754936218, | |
| "learning_rate": 0.00012052869504519603, | |
| "loss": 1.6869, | |
| "step": 2535 | |
| }, | |
| { | |
| "epoch": 0.7983028207747309, | |
| "grad_norm": 0.14712879061698914, | |
| "learning_rate": 0.0001187474944925972, | |
| "loss": 1.6889, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7998742830203505, | |
| "grad_norm": 0.20566821098327637, | |
| "learning_rate": 0.00011697777844051105, | |
| "loss": 1.73, | |
| "step": 2545 | |
| }, | |
| { | |
| "epoch": 0.80144574526597, | |
| "grad_norm": 0.16050195693969727, | |
| "learning_rate": 0.00011521960019823913, | |
| "loss": 1.705, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.8030172075115896, | |
| "grad_norm": 0.2171618640422821, | |
| "learning_rate": 0.00011347301272752913, | |
| "loss": 1.7078, | |
| "step": 2555 | |
| }, | |
| { | |
| "epoch": 0.8045886697572091, | |
| "grad_norm": 0.19696617126464844, | |
| "learning_rate": 0.00011173806864097885, | |
| "loss": 1.7577, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.8061601320028287, | |
| "grad_norm": 0.15522879362106323, | |
| "learning_rate": 0.00011001482020045128, | |
| "loss": 1.7271, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 0.8077315942484482, | |
| "grad_norm": 0.13360817730426788, | |
| "learning_rate": 0.00010830331931550047, | |
| "loss": 1.7681, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.8093030564940678, | |
| "grad_norm": 0.17120471596717834, | |
| "learning_rate": 0.0001066036175418082, | |
| "loss": 1.7188, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.8108745187396873, | |
| "grad_norm": 0.13265547156333923, | |
| "learning_rate": 0.00010491576607963066, | |
| "loss": 1.7485, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.8124459809853068, | |
| "grad_norm": 0.14022652804851532, | |
| "learning_rate": 0.0001032398157722556, | |
| "loss": 1.6629, | |
| "step": 2585 | |
| }, | |
| { | |
| "epoch": 0.8140174432309264, | |
| "grad_norm": 0.14860029518604279, | |
| "learning_rate": 0.0001015758171044719, | |
| "loss": 1.6937, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.815588905476546, | |
| "grad_norm": 0.1722240149974823, | |
| "learning_rate": 9.992382020104807e-05, | |
| "loss": 1.7502, | |
| "step": 2595 | |
| }, | |
| { | |
| "epoch": 0.8171603677221655, | |
| "grad_norm": 0.1788044422864914, | |
| "learning_rate": 9.828387482522216e-05, | |
| "loss": 1.6794, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.818731829967785, | |
| "grad_norm": 0.17370399832725525, | |
| "learning_rate": 9.66560303772035e-05, | |
| "loss": 1.6838, | |
| "step": 2605 | |
| }, | |
| { | |
| "epoch": 0.8203032922134046, | |
| "grad_norm": 0.12996020913124084, | |
| "learning_rate": 9.504033589268401e-05, | |
| "loss": 1.7152, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.8218747544590241, | |
| "grad_norm": 0.14151506125926971, | |
| "learning_rate": 9.343684004136121e-05, | |
| "loss": 1.7185, | |
| "step": 2615 | |
| }, | |
| { | |
| "epoch": 0.8234462167046437, | |
| "grad_norm": 0.15836787223815918, | |
| "learning_rate": 9.184559112547208e-05, | |
| "loss": 1.7237, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.8250176789502632, | |
| "grad_norm": 0.1398027092218399, | |
| "learning_rate": 9.026663707833843e-05, | |
| "loss": 1.7814, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.8265891411958828, | |
| "grad_norm": 0.14515486359596252, | |
| "learning_rate": 8.870002546292256e-05, | |
| "loss": 1.6791, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.8281606034415023, | |
| "grad_norm": 0.16488778591156006, | |
| "learning_rate": 8.714580347039492e-05, | |
| "loss": 1.75, | |
| "step": 2635 | |
| }, | |
| { | |
| "epoch": 0.8297320656871219, | |
| "grad_norm": 0.1568734496831894, | |
| "learning_rate": 8.560401791871186e-05, | |
| "loss": 1.7421, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.8313035279327414, | |
| "grad_norm": 0.15567830204963684, | |
| "learning_rate": 8.407471525120625e-05, | |
| "loss": 1.7411, | |
| "step": 2645 | |
| }, | |
| { | |
| "epoch": 0.832874990178361, | |
| "grad_norm": 0.16733458638191223, | |
| "learning_rate": 8.255794153518798e-05, | |
| "loss": 1.7286, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.8344464524239805, | |
| "grad_norm": 0.1331174075603485, | |
| "learning_rate": 8.10537424605558e-05, | |
| "loss": 1.6844, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.8360179146696001, | |
| "grad_norm": 0.18669481575489044, | |
| "learning_rate": 7.95621633384223e-05, | |
| "loss": 1.7329, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.8375893769152196, | |
| "grad_norm": 0.1392640769481659, | |
| "learning_rate": 7.808324909974745e-05, | |
| "loss": 1.7276, | |
| "step": 2665 | |
| }, | |
| { | |
| "epoch": 0.8391608391608392, | |
| "grad_norm": 0.15594840049743652, | |
| "learning_rate": 7.661704429398653e-05, | |
| "loss": 1.6907, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.8407323014064587, | |
| "grad_norm": 0.15163709223270416, | |
| "learning_rate": 7.516359308774695e-05, | |
| "loss": 1.7359, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.8423037636520783, | |
| "grad_norm": 0.14341649413108826, | |
| "learning_rate": 7.37229392634588e-05, | |
| "loss": 1.7746, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.8438752258976978, | |
| "grad_norm": 0.1520870178937912, | |
| "learning_rate": 7.229512621805562e-05, | |
| "loss": 1.7143, | |
| "step": 2685 | |
| }, | |
| { | |
| "epoch": 0.8454466881433174, | |
| "grad_norm": 0.1629820466041565, | |
| "learning_rate": 7.08801969616667e-05, | |
| "loss": 1.818, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.8470181503889369, | |
| "grad_norm": 0.14012764394283295, | |
| "learning_rate": 6.947819411632222e-05, | |
| "loss": 1.7606, | |
| "step": 2695 | |
| }, | |
| { | |
| "epoch": 0.8485896126345565, | |
| "grad_norm": 0.18674196302890778, | |
| "learning_rate": 6.808915991466902e-05, | |
| "loss": 1.7707, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.850161074880176, | |
| "grad_norm": 0.15248626470565796, | |
| "learning_rate": 6.671313619869857e-05, | |
| "loss": 1.7617, | |
| "step": 2705 | |
| }, | |
| { | |
| "epoch": 0.8517325371257956, | |
| "grad_norm": 0.185978963971138, | |
| "learning_rate": 6.535016441848573e-05, | |
| "loss": 1.6929, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.8533039993714151, | |
| "grad_norm": 0.14573198556900024, | |
| "learning_rate": 6.400028563094152e-05, | |
| "loss": 1.725, | |
| "step": 2715 | |
| }, | |
| { | |
| "epoch": 0.8548754616170346, | |
| "grad_norm": 0.18840889632701874, | |
| "learning_rate": 6.266354049857543e-05, | |
| "loss": 1.7846, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.8564469238626542, | |
| "grad_norm": 0.13628707826137543, | |
| "learning_rate": 6.13399692882709e-05, | |
| "loss": 1.7298, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.8580183861082737, | |
| "grad_norm": 0.14333242177963257, | |
| "learning_rate": 6.002961187007194e-05, | |
| "loss": 1.7341, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.8595898483538933, | |
| "grad_norm": 0.13763877749443054, | |
| "learning_rate": 5.873250771598265e-05, | |
| "loss": 1.7192, | |
| "step": 2735 | |
| }, | |
| { | |
| "epoch": 0.8611613105995128, | |
| "grad_norm": 0.14965565502643585, | |
| "learning_rate": 5.7448695898778106e-05, | |
| "loss": 1.7346, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.8627327728451324, | |
| "grad_norm": 0.11032088100910187, | |
| "learning_rate": 5.617821509082671e-05, | |
| "loss": 1.6652, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 0.8643042350907519, | |
| "grad_norm": 0.17685818672180176, | |
| "learning_rate": 5.49211035629264e-05, | |
| "loss": 1.8149, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.8658756973363715, | |
| "grad_norm": 0.14297960698604584, | |
| "learning_rate": 5.3677399183150674e-05, | |
| "loss": 1.7199, | |
| "step": 2755 | |
| }, | |
| { | |
| "epoch": 0.867447159581991, | |
| "grad_norm": 0.1297394186258316, | |
| "learning_rate": 5.244713941570889e-05, | |
| "loss": 1.7095, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.8690186218276106, | |
| "grad_norm": 0.13748817145824432, | |
| "learning_rate": 5.123036131981668e-05, | |
| "loss": 1.7151, | |
| "step": 2765 | |
| }, | |
| { | |
| "epoch": 0.8705900840732301, | |
| "grad_norm": 0.11178262531757355, | |
| "learning_rate": 5.002710154858065e-05, | |
| "loss": 1.7202, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.8721615463188497, | |
| "grad_norm": 0.12300541251897812, | |
| "learning_rate": 4.883739634789375e-05, | |
| "loss": 1.7699, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.8737330085644692, | |
| "grad_norm": 0.12739528715610504, | |
| "learning_rate": 4.7661281555343164e-05, | |
| "loss": 1.7716, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.8753044708100888, | |
| "grad_norm": 0.12120873481035233, | |
| "learning_rate": 4.649879259913137e-05, | |
| "loss": 1.7218, | |
| "step": 2785 | |
| }, | |
| { | |
| "epoch": 0.8768759330557083, | |
| "grad_norm": 0.1548471450805664, | |
| "learning_rate": 4.534996449700879e-05, | |
| "loss": 1.7433, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.8784473953013279, | |
| "grad_norm": 0.16176049411296844, | |
| "learning_rate": 4.421483185521835e-05, | |
| "loss": 1.728, | |
| "step": 2795 | |
| }, | |
| { | |
| "epoch": 0.8800188575469474, | |
| "grad_norm": 0.19101834297180176, | |
| "learning_rate": 4.309342886745399e-05, | |
| "loss": 1.7275, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.881590319792567, | |
| "grad_norm": 0.14591899514198303, | |
| "learning_rate": 4.198578931382979e-05, | |
| "loss": 1.7068, | |
| "step": 2805 | |
| }, | |
| { | |
| "epoch": 0.8831617820381865, | |
| "grad_norm": 0.16475893557071686, | |
| "learning_rate": 4.0891946559863055e-05, | |
| "loss": 1.7479, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.8847332442838061, | |
| "grad_norm": 0.18267378211021423, | |
| "learning_rate": 3.981193355546869e-05, | |
| "loss": 1.7445, | |
| "step": 2815 | |
| }, | |
| { | |
| "epoch": 0.8863047065294256, | |
| "grad_norm": 0.10409973561763763, | |
| "learning_rate": 3.874578283396718e-05, | |
| "loss": 1.7277, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.8878761687750452, | |
| "grad_norm": 0.13992512226104736, | |
| "learning_rate": 3.769352651110419e-05, | |
| "loss": 1.7217, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.8894476310206647, | |
| "grad_norm": 0.1338614523410797, | |
| "learning_rate": 3.6655196284083314e-05, | |
| "loss": 1.7598, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8910190932662843, | |
| "grad_norm": 0.15006397664546967, | |
| "learning_rate": 3.563082343061108e-05, | |
| "loss": 1.7017, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 0.8925905555119038, | |
| "grad_norm": 0.14285215735435486, | |
| "learning_rate": 3.4620438807955125e-05, | |
| "loss": 1.7068, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8941620177575234, | |
| "grad_norm": 0.11642735451459885, | |
| "learning_rate": 3.3624072852014354e-05, | |
| "loss": 1.7363, | |
| "step": 2845 | |
| }, | |
| { | |
| "epoch": 0.8957334800031429, | |
| "grad_norm": 0.1364676058292389, | |
| "learning_rate": 3.2641755576402255e-05, | |
| "loss": 1.79, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8973049422487624, | |
| "grad_norm": 0.13236261904239655, | |
| "learning_rate": 3.16735165715426e-05, | |
| "loss": 1.7463, | |
| "step": 2855 | |
| }, | |
| { | |
| "epoch": 0.898876404494382, | |
| "grad_norm": 0.16500313580036163, | |
| "learning_rate": 3.071938500377852e-05, | |
| "loss": 1.7057, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.9004478667400015, | |
| "grad_norm": 0.15041331946849823, | |
| "learning_rate": 2.9779389614493558e-05, | |
| "loss": 1.8108, | |
| "step": 2865 | |
| }, | |
| { | |
| "epoch": 0.9020193289856211, | |
| "grad_norm": 0.1336473524570465, | |
| "learning_rate": 2.8853558719245833e-05, | |
| "loss": 1.7186, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.9035907912312406, | |
| "grad_norm": 0.1339365541934967, | |
| "learning_rate": 2.794192020691544e-05, | |
| "loss": 1.7605, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.9051622534768602, | |
| "grad_norm": 0.09974883496761322, | |
| "learning_rate": 2.704450153886423e-05, | |
| "loss": 1.7394, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.9067337157224797, | |
| "grad_norm": 0.1279599815607071, | |
| "learning_rate": 2.6161329748108253e-05, | |
| "loss": 1.7415, | |
| "step": 2885 | |
| }, | |
| { | |
| "epoch": 0.9083051779680993, | |
| "grad_norm": 0.1555188149213791, | |
| "learning_rate": 2.5292431438503905e-05, | |
| "loss": 1.8056, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.9098766402137188, | |
| "grad_norm": 0.19093474745750427, | |
| "learning_rate": 2.4437832783946234e-05, | |
| "loss": 1.7738, | |
| "step": 2895 | |
| }, | |
| { | |
| "epoch": 0.9114481024593384, | |
| "grad_norm": 0.09890997409820557, | |
| "learning_rate": 2.3597559527580692e-05, | |
| "loss": 1.6863, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.9130195647049579, | |
| "grad_norm": 0.14242489635944366, | |
| "learning_rate": 2.2771636981027467e-05, | |
| "loss": 1.6858, | |
| "step": 2905 | |
| }, | |
| { | |
| "epoch": 0.9145910269505775, | |
| "grad_norm": 0.15494489669799805, | |
| "learning_rate": 2.1960090023619205e-05, | |
| "loss": 1.7568, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.916162489196197, | |
| "grad_norm": 0.1707945168018341, | |
| "learning_rate": 2.1162943101651622e-05, | |
| "loss": 1.7361, | |
| "step": 2915 | |
| }, | |
| { | |
| "epoch": 0.9177339514418166, | |
| "grad_norm": 0.17627616226673126, | |
| "learning_rate": 2.038022022764685e-05, | |
| "loss": 1.791, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.9193054136874361, | |
| "grad_norm": 0.15799571573734283, | |
| "learning_rate": 1.9611944979630204e-05, | |
| "loss": 1.7233, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.9208768759330557, | |
| "grad_norm": 0.11753001809120178, | |
| "learning_rate": 1.8858140500420005e-05, | |
| "loss": 1.7659, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.9224483381786752, | |
| "grad_norm": 0.11653709411621094, | |
| "learning_rate": 1.8118829496930557e-05, | |
| "loss": 1.7428, | |
| "step": 2935 | |
| }, | |
| { | |
| "epoch": 0.9240198004242948, | |
| "grad_norm": 0.1365276426076889, | |
| "learning_rate": 1.739403423948782e-05, | |
| "loss": 1.728, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.9255912626699143, | |
| "grad_norm": 0.1241711974143982, | |
| "learning_rate": 1.668377656115877e-05, | |
| "loss": 1.7144, | |
| "step": 2945 | |
| }, | |
| { | |
| "epoch": 0.9271627249155339, | |
| "grad_norm": 0.15429584681987762, | |
| "learning_rate": 1.5988077857093775e-05, | |
| "loss": 1.6854, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.9287341871611534, | |
| "grad_norm": 0.11500924080610275, | |
| "learning_rate": 1.5306959083882078e-05, | |
| "loss": 1.753, | |
| "step": 2955 | |
| }, | |
| { | |
| "epoch": 0.930305649406773, | |
| "grad_norm": 0.12633784115314484, | |
| "learning_rate": 1.4640440758920293e-05, | |
| "loss": 1.7387, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.9318771116523925, | |
| "grad_norm": 0.13106156885623932, | |
| "learning_rate": 1.3988542959794625e-05, | |
| "loss": 1.7124, | |
| "step": 2965 | |
| }, | |
| { | |
| "epoch": 0.9334485738980121, | |
| "grad_norm": 0.10034507513046265, | |
| "learning_rate": 1.3351285323676022e-05, | |
| "loss": 1.7571, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.9350200361436316, | |
| "grad_norm": 0.1519390344619751, | |
| "learning_rate": 1.2728687046728526e-05, | |
| "loss": 1.6967, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.9365914983892512, | |
| "grad_norm": 0.15289808809757233, | |
| "learning_rate": 1.2120766883531087e-05, | |
| "loss": 1.7167, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.9381629606348707, | |
| "grad_norm": 0.11695173382759094, | |
| "learning_rate": 1.152754314651283e-05, | |
| "loss": 1.7096, | |
| "step": 2985 | |
| }, | |
| { | |
| "epoch": 0.9397344228804902, | |
| "grad_norm": 0.11231189966201782, | |
| "learning_rate": 1.0949033705400902e-05, | |
| "loss": 1.7092, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.9413058851261098, | |
| "grad_norm": 0.12071159482002258, | |
| "learning_rate": 1.0385255986682718e-05, | |
| "loss": 1.7096, | |
| "step": 2995 | |
| }, | |
| { | |
| "epoch": 0.9428773473717293, | |
| "grad_norm": 0.12497507780790329, | |
| "learning_rate": 9.836226973080786e-06, | |
| "loss": 1.7723, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.9444488096173489, | |
| "grad_norm": 0.11592131853103638, | |
| "learning_rate": 9.30196320304122e-06, | |
| "loss": 1.7458, | |
| "step": 3005 | |
| }, | |
| { | |
| "epoch": 0.9460202718629684, | |
| "grad_norm": 0.16098622977733612, | |
| "learning_rate": 8.782480770235246e-06, | |
| "loss": 1.7568, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.947591734108588, | |
| "grad_norm": 0.1251290738582611, | |
| "learning_rate": 8.277795323074933e-06, | |
| "loss": 1.7218, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 0.9491631963542075, | |
| "grad_norm": 0.15294887125492096, | |
| "learning_rate": 7.787922064241393e-06, | |
| "loss": 1.7038, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.9507346585998271, | |
| "grad_norm": 0.12903992831707, | |
| "learning_rate": 7.312875750227044e-06, | |
| "loss": 1.7287, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.9523061208454466, | |
| "grad_norm": 0.17125993967056274, | |
| "learning_rate": 6.852670690890961e-06, | |
| "loss": 1.7112, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.9538775830910662, | |
| "grad_norm": 0.13478563725948334, | |
| "learning_rate": 6.40732074902789e-06, | |
| "loss": 1.7588, | |
| "step": 3035 | |
| }, | |
| { | |
| "epoch": 0.9554490453366857, | |
| "grad_norm": 0.14833632111549377, | |
| "learning_rate": 5.97683933995069e-06, | |
| "loss": 1.7438, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.9570205075823053, | |
| "grad_norm": 0.16726098954677582, | |
| "learning_rate": 5.561239431086218e-06, | |
| "loss": 1.7639, | |
| "step": 3045 | |
| }, | |
| { | |
| "epoch": 0.9585919698279249, | |
| "grad_norm": 0.16357098519802094, | |
| "learning_rate": 5.160533541584578e-06, | |
| "loss": 1.6912, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.9601634320735445, | |
| "grad_norm": 0.12722498178482056, | |
| "learning_rate": 4.774733741942205e-06, | |
| "loss": 1.7576, | |
| "step": 3055 | |
| }, | |
| { | |
| "epoch": 0.961734894319164, | |
| "grad_norm": 0.16135632991790771, | |
| "learning_rate": 4.403851653638158e-06, | |
| "loss": 1.7702, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.9633063565647836, | |
| "grad_norm": 0.156170055270195, | |
| "learning_rate": 4.0478984487838935e-06, | |
| "loss": 1.7429, | |
| "step": 3065 | |
| }, | |
| { | |
| "epoch": 0.9648778188104031, | |
| "grad_norm": 0.1406138390302658, | |
| "learning_rate": 3.706884849787151e-06, | |
| "loss": 1.7209, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.9664492810560227, | |
| "grad_norm": 0.1389617770910263, | |
| "learning_rate": 3.3808211290284885e-06, | |
| "loss": 1.7126, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.9680207433016422, | |
| "grad_norm": 0.1216338723897934, | |
| "learning_rate": 3.0697171085521946e-06, | |
| "loss": 1.7186, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.9695922055472618, | |
| "grad_norm": 0.13428504765033722, | |
| "learning_rate": 2.7735821597701382e-06, | |
| "loss": 1.7334, | |
| "step": 3085 | |
| }, | |
| { | |
| "epoch": 0.9711636677928813, | |
| "grad_norm": 0.1258799433708191, | |
| "learning_rate": 2.49242520317966e-06, | |
| "loss": 1.7303, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.9727351300385009, | |
| "grad_norm": 0.144920215010643, | |
| "learning_rate": 2.2262547080948992e-06, | |
| "loss": 1.7595, | |
| "step": 3095 | |
| }, | |
| { | |
| "epoch": 0.9743065922841204, | |
| "grad_norm": 0.12858329713344574, | |
| "learning_rate": 1.975078692391552e-06, | |
| "loss": 1.7411, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.97587805452974, | |
| "grad_norm": 0.1087680459022522, | |
| "learning_rate": 1.7389047222652888e-06, | |
| "loss": 1.7469, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 0.9774495167753595, | |
| "grad_norm": 0.12177930027246475, | |
| "learning_rate": 1.5177399120039904e-06, | |
| "loss": 1.6998, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.9790209790209791, | |
| "grad_norm": 0.11609877645969391, | |
| "learning_rate": 1.3115909237734204e-06, | |
| "loss": 1.7057, | |
| "step": 3115 | |
| }, | |
| { | |
| "epoch": 0.9805924412665986, | |
| "grad_norm": 0.13257728517055511, | |
| "learning_rate": 1.1204639674164962e-06, | |
| "loss": 1.7442, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.9821639035122182, | |
| "grad_norm": 0.12050619721412659, | |
| "learning_rate": 9.44364800266162e-07, | |
| "loss": 1.7151, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.9837353657578377, | |
| "grad_norm": 0.12853524088859558, | |
| "learning_rate": 7.832987269720815e-07, | |
| "loss": 1.7062, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.9853068280034573, | |
| "grad_norm": 0.11707114428281784, | |
| "learning_rate": 6.372705993408223e-07, | |
| "loss": 1.7023, | |
| "step": 3135 | |
| }, | |
| { | |
| "epoch": 0.9868782902490768, | |
| "grad_norm": 0.12321787327528, | |
| "learning_rate": 5.062848161896394e-07, | |
| "loss": 1.7308, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.9884497524946964, | |
| "grad_norm": 0.13473524153232574, | |
| "learning_rate": 3.903453232140808e-07, | |
| "loss": 1.7428, | |
| "step": 3145 | |
| }, | |
| { | |
| "epoch": 0.9900212147403159, | |
| "grad_norm": 0.12741941213607788, | |
| "learning_rate": 2.894556128689163e-07, | |
| "loss": 1.6881, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.9915926769859355, | |
| "grad_norm": 0.1763051599264145, | |
| "learning_rate": 2.03618724263277e-07, | |
| "loss": 1.7244, | |
| "step": 3155 | |
| }, | |
| { | |
| "epoch": 0.993164139231555, | |
| "grad_norm": 0.11934591829776764, | |
| "learning_rate": 1.3283724306867306e-07, | |
| "loss": 1.7025, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.9947356014771745, | |
| "grad_norm": 0.12859931588172913, | |
| "learning_rate": 7.711330144161144e-08, | |
| "loss": 1.7378, | |
| "step": 3165 | |
| }, | |
| { | |
| "epoch": 0.9963070637227941, | |
| "grad_norm": 0.14746056497097015, | |
| "learning_rate": 3.644857795886969e-08, | |
| "loss": 1.7293, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.9978785259684136, | |
| "grad_norm": 0.16102327406406403, | |
| "learning_rate": 1.0844297567258466e-08, | |
| "loss": 1.6806, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.9994499882140332, | |
| "grad_norm": 0.15685699880123138, | |
| "learning_rate": 3.012315465955595e-10, | |
| "loss": 1.8048, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.999764280663157, | |
| "eval_loss": 1.7209471464157104, | |
| "eval_runtime": 333.4066, | |
| "eval_samples_per_second": 31.946, | |
| "eval_steps_per_second": 1.998, | |
| "step": 3181 | |
| }, | |
| { | |
| "epoch": 0.999764280663157, | |
| "step": 3181, | |
| "total_flos": 2.656972328528773e+17, | |
| "train_loss": 2.0235598598475426, | |
| "train_runtime": 14726.7001, | |
| "train_samples_per_second": 13.827, | |
| "train_steps_per_second": 0.216 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 3181, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.656972328528773e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |