| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9951534733441034, | |
| "eval_steps": 500, | |
| "global_step": 618, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0032310177705977385, | |
| "grad_norm": 25.128438265728068, | |
| "learning_rate": 8.064516129032259e-08, | |
| "loss": 0.1803, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.006462035541195477, | |
| "grad_norm": 20.481937139410615, | |
| "learning_rate": 1.6129032258064518e-07, | |
| "loss": 0.1841, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.009693053311793215, | |
| "grad_norm": 40.482947214989046, | |
| "learning_rate": 2.4193548387096775e-07, | |
| "loss": 0.2517, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.012924071082390954, | |
| "grad_norm": 56.10659153508464, | |
| "learning_rate": 3.2258064516129035e-07, | |
| "loss": 0.289, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01615508885298869, | |
| "grad_norm": 40.7368526949032, | |
| "learning_rate": 4.032258064516129e-07, | |
| "loss": 0.2432, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01938610662358643, | |
| "grad_norm": 36.018819191325036, | |
| "learning_rate": 4.838709677419355e-07, | |
| "loss": 0.1939, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.022617124394184167, | |
| "grad_norm": 64.22644318288756, | |
| "learning_rate": 5.645161290322581e-07, | |
| "loss": 0.2186, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.025848142164781908, | |
| "grad_norm": 68.80190588802398, | |
| "learning_rate": 6.451612903225807e-07, | |
| "loss": 0.237, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.029079159935379646, | |
| "grad_norm": 53.87483186618541, | |
| "learning_rate": 7.258064516129033e-07, | |
| "loss": 0.1904, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.03231017770597738, | |
| "grad_norm": 27.250940737956068, | |
| "learning_rate": 8.064516129032258e-07, | |
| "loss": 0.1914, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.035541195476575124, | |
| "grad_norm": 19.13516705978285, | |
| "learning_rate": 8.870967741935485e-07, | |
| "loss": 0.21, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03877221324717286, | |
| "grad_norm": 5.971043423480582, | |
| "learning_rate": 9.67741935483871e-07, | |
| "loss": 0.1893, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0420032310177706, | |
| "grad_norm": 8.917076042584409, | |
| "learning_rate": 1.0483870967741936e-06, | |
| "loss": 0.1672, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.045234248788368334, | |
| "grad_norm": 6.564634516149644, | |
| "learning_rate": 1.1290322580645162e-06, | |
| "loss": 0.1639, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.048465266558966075, | |
| "grad_norm": 7.4524362304900995, | |
| "learning_rate": 1.2096774193548388e-06, | |
| "loss": 0.1904, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.051696284329563816, | |
| "grad_norm": 6.424814341877685, | |
| "learning_rate": 1.2903225806451614e-06, | |
| "loss": 0.1539, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.05492730210016155, | |
| "grad_norm": 8.919971119630558, | |
| "learning_rate": 1.3709677419354838e-06, | |
| "loss": 0.1276, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.05815831987075929, | |
| "grad_norm": 8.858198890334634, | |
| "learning_rate": 1.4516129032258066e-06, | |
| "loss": 0.148, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.061389337641357025, | |
| "grad_norm": 8.19809300927439, | |
| "learning_rate": 1.5322580645161292e-06, | |
| "loss": 0.1528, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.06462035541195477, | |
| "grad_norm": 9.263957823023876, | |
| "learning_rate": 1.6129032258064516e-06, | |
| "loss": 0.1654, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06785137318255251, | |
| "grad_norm": 5.211246085563888, | |
| "learning_rate": 1.6935483870967742e-06, | |
| "loss": 0.1478, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.07108239095315025, | |
| "grad_norm": 4.16568459472329, | |
| "learning_rate": 1.774193548387097e-06, | |
| "loss": 0.1548, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.07431340872374798, | |
| "grad_norm": 2.1941339248998233, | |
| "learning_rate": 1.8548387096774196e-06, | |
| "loss": 0.1349, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.07754442649434572, | |
| "grad_norm": 1.5770028567175525, | |
| "learning_rate": 1.935483870967742e-06, | |
| "loss": 0.174, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.08077544426494346, | |
| "grad_norm": 1.606149082877329, | |
| "learning_rate": 2.0161290322580646e-06, | |
| "loss": 0.1468, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0840064620355412, | |
| "grad_norm": 1.4435377235758515, | |
| "learning_rate": 2.096774193548387e-06, | |
| "loss": 0.1482, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.08723747980613894, | |
| "grad_norm": 1.4360703380719761, | |
| "learning_rate": 2.17741935483871e-06, | |
| "loss": 0.1404, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.09046849757673667, | |
| "grad_norm": 0.8101385337166851, | |
| "learning_rate": 2.2580645161290324e-06, | |
| "loss": 0.1142, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.09369951534733441, | |
| "grad_norm": 1.381962668180297, | |
| "learning_rate": 2.338709677419355e-06, | |
| "loss": 0.1331, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.09693053311793215, | |
| "grad_norm": 1.2208818326390372, | |
| "learning_rate": 2.4193548387096776e-06, | |
| "loss": 0.1196, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10016155088852989, | |
| "grad_norm": 1.1209301057498027, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0986, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.10339256865912763, | |
| "grad_norm": 1.9664855281381353, | |
| "learning_rate": 2.580645161290323e-06, | |
| "loss": 0.133, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.10662358642972536, | |
| "grad_norm": 1.2526567677482312, | |
| "learning_rate": 2.6612903225806454e-06, | |
| "loss": 0.0969, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1098546042003231, | |
| "grad_norm": 1.0939478434588246, | |
| "learning_rate": 2.7419354838709676e-06, | |
| "loss": 0.115, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.11308562197092084, | |
| "grad_norm": 0.8286974625967464, | |
| "learning_rate": 2.822580645161291e-06, | |
| "loss": 0.0941, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.11631663974151858, | |
| "grad_norm": 1.1449426230450832, | |
| "learning_rate": 2.903225806451613e-06, | |
| "loss": 0.1182, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.11954765751211632, | |
| "grad_norm": 1.188437444192784, | |
| "learning_rate": 2.983870967741936e-06, | |
| "loss": 0.1107, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.12277867528271405, | |
| "grad_norm": 1.1499792776405726, | |
| "learning_rate": 3.0645161290322584e-06, | |
| "loss": 0.1044, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1260096930533118, | |
| "grad_norm": 1.0118348591126385, | |
| "learning_rate": 3.145161290322581e-06, | |
| "loss": 0.0926, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.12924071082390953, | |
| "grad_norm": 2.2856482367185813, | |
| "learning_rate": 3.225806451612903e-06, | |
| "loss": 0.0935, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13247172859450726, | |
| "grad_norm": 1.3141305507246495, | |
| "learning_rate": 3.306451612903226e-06, | |
| "loss": 0.1411, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.13570274636510501, | |
| "grad_norm": 1.5200078779321133, | |
| "learning_rate": 3.3870967741935484e-06, | |
| "loss": 0.1086, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.13893376413570274, | |
| "grad_norm": 1.199141198842882, | |
| "learning_rate": 3.4677419354838714e-06, | |
| "loss": 0.0963, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.1421647819063005, | |
| "grad_norm": 0.9118534178360143, | |
| "learning_rate": 3.548387096774194e-06, | |
| "loss": 0.0838, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.14539579967689822, | |
| "grad_norm": 0.8313523608482088, | |
| "learning_rate": 3.6290322580645166e-06, | |
| "loss": 0.0822, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.14862681744749595, | |
| "grad_norm": 1.0602464666470106, | |
| "learning_rate": 3.7096774193548392e-06, | |
| "loss": 0.1031, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1518578352180937, | |
| "grad_norm": 1.4189660331134808, | |
| "learning_rate": 3.7903225806451614e-06, | |
| "loss": 0.0985, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.15508885298869143, | |
| "grad_norm": 0.7654402241394538, | |
| "learning_rate": 3.870967741935484e-06, | |
| "loss": 0.0759, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1583198707592892, | |
| "grad_norm": 1.5213372179407378, | |
| "learning_rate": 3.951612903225807e-06, | |
| "loss": 0.1288, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.16155088852988692, | |
| "grad_norm": 1.2908078430353152, | |
| "learning_rate": 4.032258064516129e-06, | |
| "loss": 0.1117, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16478190630048464, | |
| "grad_norm": 1.0569868777193876, | |
| "learning_rate": 4.112903225806452e-06, | |
| "loss": 0.0983, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1680129240710824, | |
| "grad_norm": 0.9711071114660816, | |
| "learning_rate": 4.193548387096774e-06, | |
| "loss": 0.0911, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.17124394184168013, | |
| "grad_norm": 1.102405320517146, | |
| "learning_rate": 4.274193548387097e-06, | |
| "loss": 0.1037, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.17447495961227788, | |
| "grad_norm": 0.9937966694938649, | |
| "learning_rate": 4.35483870967742e-06, | |
| "loss": 0.0815, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1777059773828756, | |
| "grad_norm": 1.0718680838436345, | |
| "learning_rate": 4.435483870967742e-06, | |
| "loss": 0.1004, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.18093699515347333, | |
| "grad_norm": 1.1717408640580886, | |
| "learning_rate": 4.516129032258065e-06, | |
| "loss": 0.1278, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1841680129240711, | |
| "grad_norm": 0.9371050661628969, | |
| "learning_rate": 4.596774193548387e-06, | |
| "loss": 0.09, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.18739903069466882, | |
| "grad_norm": 0.7008244964012689, | |
| "learning_rate": 4.67741935483871e-06, | |
| "loss": 0.0704, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.19063004846526657, | |
| "grad_norm": 1.198678087514701, | |
| "learning_rate": 4.758064516129033e-06, | |
| "loss": 0.0966, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.1938610662358643, | |
| "grad_norm": 1.237062798014623, | |
| "learning_rate": 4.838709677419355e-06, | |
| "loss": 0.108, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19709208400646203, | |
| "grad_norm": 0.9791118203985985, | |
| "learning_rate": 4.919354838709678e-06, | |
| "loss": 0.0991, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.20032310177705978, | |
| "grad_norm": 1.0841634147338435, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0934, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2035541195476575, | |
| "grad_norm": 1.258223416727101, | |
| "learning_rate": 4.999960092086724e-06, | |
| "loss": 0.1219, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.20678513731825526, | |
| "grad_norm": 1.0617428668947297, | |
| "learning_rate": 4.999840369621011e-06, | |
| "loss": 0.1124, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.210016155088853, | |
| "grad_norm": 1.213736182334779, | |
| "learning_rate": 4.999640836425159e-06, | |
| "loss": 0.1008, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.21324717285945072, | |
| "grad_norm": 1.100400949090305, | |
| "learning_rate": 4.99936149886953e-06, | |
| "loss": 0.0732, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.21647819063004847, | |
| "grad_norm": 1.0182260583019065, | |
| "learning_rate": 4.999002365872348e-06, | |
| "loss": 0.0818, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2197092084006462, | |
| "grad_norm": 1.0758854772406632, | |
| "learning_rate": 4.998563448899413e-06, | |
| "loss": 0.1152, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.22294022617124395, | |
| "grad_norm": 0.7655531388319169, | |
| "learning_rate": 4.998044761963731e-06, | |
| "loss": 0.0629, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.22617124394184168, | |
| "grad_norm": 0.9063698842381128, | |
| "learning_rate": 4.9974463216250735e-06, | |
| "loss": 0.0947, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2294022617124394, | |
| "grad_norm": 1.111926907799753, | |
| "learning_rate": 4.996768146989446e-06, | |
| "loss": 0.0869, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.23263327948303716, | |
| "grad_norm": 1.2171632327405464, | |
| "learning_rate": 4.996010259708475e-06, | |
| "loss": 0.0906, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2358642972536349, | |
| "grad_norm": 1.1760501880854652, | |
| "learning_rate": 4.99517268397872e-06, | |
| "loss": 0.0905, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.23909531502423265, | |
| "grad_norm": 0.9925140924590656, | |
| "learning_rate": 4.9942554465409e-06, | |
| "loss": 0.0759, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.24232633279483037, | |
| "grad_norm": 1.1900673708039176, | |
| "learning_rate": 4.993258576679043e-06, | |
| "loss": 0.0959, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2455573505654281, | |
| "grad_norm": 1.0525284959082881, | |
| "learning_rate": 4.9921821062195445e-06, | |
| "loss": 0.0854, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.24878836833602586, | |
| "grad_norm": 0.9782484128440981, | |
| "learning_rate": 4.991026069530156e-06, | |
| "loss": 0.0811, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.2520193861066236, | |
| "grad_norm": 1.099947189666444, | |
| "learning_rate": 4.989790503518888e-06, | |
| "loss": 0.1213, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2552504038772213, | |
| "grad_norm": 0.9763034951592139, | |
| "learning_rate": 4.988475447632829e-06, | |
| "loss": 0.0978, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.25848142164781907, | |
| "grad_norm": 1.1059217088306985, | |
| "learning_rate": 4.987080943856887e-06, | |
| "loss": 0.1188, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2617124394184168, | |
| "grad_norm": 0.8811301090737587, | |
| "learning_rate": 4.985607036712453e-06, | |
| "loss": 0.0959, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2649434571890145, | |
| "grad_norm": 0.876445346062972, | |
| "learning_rate": 4.984053773255971e-06, | |
| "loss": 0.0942, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.2681744749596123, | |
| "grad_norm": 0.9532526347121169, | |
| "learning_rate": 4.982421203077446e-06, | |
| "loss": 0.0877, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.27140549273021003, | |
| "grad_norm": 1.123157927763249, | |
| "learning_rate": 4.980709378298851e-06, | |
| "loss": 0.1278, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.27463651050080773, | |
| "grad_norm": 1.0388291288831657, | |
| "learning_rate": 4.978918353572471e-06, | |
| "loss": 0.0711, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.2778675282714055, | |
| "grad_norm": 0.9767051395631638, | |
| "learning_rate": 4.977048186079155e-06, | |
| "loss": 0.0778, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.28109854604200324, | |
| "grad_norm": 0.796702172980499, | |
| "learning_rate": 4.975098935526487e-06, | |
| "loss": 0.0737, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.284329563812601, | |
| "grad_norm": 0.930052043825522, | |
| "learning_rate": 4.973070664146885e-06, | |
| "loss": 0.0797, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.2875605815831987, | |
| "grad_norm": 0.8599629277716169, | |
| "learning_rate": 4.970963436695612e-06, | |
| "loss": 0.0736, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.29079159935379645, | |
| "grad_norm": 1.2487651297160884, | |
| "learning_rate": 4.968777320448707e-06, | |
| "loss": 0.1234, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2940226171243942, | |
| "grad_norm": 1.3265846927372396, | |
| "learning_rate": 4.966512385200841e-06, | |
| "loss": 0.097, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.2972536348949919, | |
| "grad_norm": 1.2653863711384867, | |
| "learning_rate": 4.964168703263086e-06, | |
| "loss": 0.0851, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.30048465266558966, | |
| "grad_norm": 1.05226163192955, | |
| "learning_rate": 4.961746349460607e-06, | |
| "loss": 0.0901, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3037156704361874, | |
| "grad_norm": 0.7053264391935079, | |
| "learning_rate": 4.959245401130269e-06, | |
| "loss": 0.0472, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3069466882067851, | |
| "grad_norm": 1.1693965687082317, | |
| "learning_rate": 4.956665938118179e-06, | |
| "loss": 0.0909, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.31017770597738287, | |
| "grad_norm": 1.0877301086902282, | |
| "learning_rate": 4.954008042777125e-06, | |
| "loss": 0.0897, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.3134087237479806, | |
| "grad_norm": 0.9145033111059097, | |
| "learning_rate": 4.951271799963952e-06, | |
| "loss": 0.0871, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3166397415185784, | |
| "grad_norm": 0.8706912416502465, | |
| "learning_rate": 4.9484572970368516e-06, | |
| "loss": 0.0656, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.3198707592891761, | |
| "grad_norm": 1.1252067732287794, | |
| "learning_rate": 4.945564623852577e-06, | |
| "loss": 0.0916, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.32310177705977383, | |
| "grad_norm": 0.9809013164181603, | |
| "learning_rate": 4.942593872763566e-06, | |
| "loss": 0.0671, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3263327948303716, | |
| "grad_norm": 0.9939061476745602, | |
| "learning_rate": 4.939545138615003e-06, | |
| "loss": 0.0796, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3295638126009693, | |
| "grad_norm": 0.8729446605586859, | |
| "learning_rate": 4.93641851874178e-06, | |
| "loss": 0.1031, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.33279483037156704, | |
| "grad_norm": 0.8275137143927102, | |
| "learning_rate": 4.933214112965399e-06, | |
| "loss": 0.098, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.3360258481421648, | |
| "grad_norm": 1.1250877000414508, | |
| "learning_rate": 4.929932023590776e-06, | |
| "loss": 0.0818, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.3392568659127625, | |
| "grad_norm": 0.6635247677362335, | |
| "learning_rate": 4.926572355402983e-06, | |
| "loss": 0.069, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.34248788368336025, | |
| "grad_norm": 0.7585575652472656, | |
| "learning_rate": 4.923135215663897e-06, | |
| "loss": 0.0574, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.345718901453958, | |
| "grad_norm": 0.8301240151962479, | |
| "learning_rate": 4.919620714108777e-06, | |
| "loss": 0.0894, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.34894991922455576, | |
| "grad_norm": 0.7584921353525548, | |
| "learning_rate": 4.916028962942763e-06, | |
| "loss": 0.0693, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.35218093699515346, | |
| "grad_norm": 0.8546005601207664, | |
| "learning_rate": 4.912360076837289e-06, | |
| "loss": 0.0853, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3554119547657512, | |
| "grad_norm": 0.7665244261956478, | |
| "learning_rate": 4.908614172926426e-06, | |
| "loss": 0.0555, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.35864297253634897, | |
| "grad_norm": 0.7090721453253053, | |
| "learning_rate": 4.904791370803141e-06, | |
| "loss": 0.0809, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.36187399030694667, | |
| "grad_norm": 0.9004480711031451, | |
| "learning_rate": 4.9008917925154795e-06, | |
| "loss": 0.0798, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3651050080775444, | |
| "grad_norm": 0.6681566874499523, | |
| "learning_rate": 4.896915562562665e-06, | |
| "loss": 0.0567, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3683360258481422, | |
| "grad_norm": 0.7831071753829433, | |
| "learning_rate": 4.892862807891131e-06, | |
| "loss": 0.0886, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.3715670436187399, | |
| "grad_norm": 0.8572105799904943, | |
| "learning_rate": 4.888733657890463e-06, | |
| "loss": 0.0941, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.37479806138933763, | |
| "grad_norm": 0.6839139362191016, | |
| "learning_rate": 4.884528244389269e-06, | |
| "loss": 0.0499, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3780290791599354, | |
| "grad_norm": 0.7535937955610514, | |
| "learning_rate": 4.8802467016509704e-06, | |
| "loss": 0.0648, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.38126009693053314, | |
| "grad_norm": 0.7278973767612748, | |
| "learning_rate": 4.8758891663695165e-06, | |
| "loss": 0.0647, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.38449111470113084, | |
| "grad_norm": 0.8658527730630694, | |
| "learning_rate": 4.87145577766502e-06, | |
| "loss": 0.0694, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3877221324717286, | |
| "grad_norm": 0.5993174371068741, | |
| "learning_rate": 4.866946677079314e-06, | |
| "loss": 0.0532, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.39095315024232635, | |
| "grad_norm": 0.9585898899909292, | |
| "learning_rate": 4.862362008571434e-06, | |
| "loss": 0.081, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.39418416801292405, | |
| "grad_norm": 0.8402658697480521, | |
| "learning_rate": 4.857701918513023e-06, | |
| "loss": 0.0661, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.3974151857835218, | |
| "grad_norm": 0.8695072904889961, | |
| "learning_rate": 4.852966555683657e-06, | |
| "loss": 0.0777, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.40064620355411956, | |
| "grad_norm": 0.8700878343519065, | |
| "learning_rate": 4.848156071266095e-06, | |
| "loss": 0.0669, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.40387722132471726, | |
| "grad_norm": 0.7573866683757483, | |
| "learning_rate": 4.843270618841455e-06, | |
| "loss": 0.0659, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.407108239095315, | |
| "grad_norm": 0.7144861602580213, | |
| "learning_rate": 4.838310354384304e-06, | |
| "loss": 0.0673, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.41033925686591277, | |
| "grad_norm": 0.6521772244924818, | |
| "learning_rate": 4.833275436257684e-06, | |
| "loss": 0.0487, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.4135702746365105, | |
| "grad_norm": 0.9937642189435811, | |
| "learning_rate": 4.828166025208059e-06, | |
| "loss": 0.0748, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.4168012924071082, | |
| "grad_norm": 0.838066231893655, | |
| "learning_rate": 4.822982284360173e-06, | |
| "loss": 0.0732, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.420032310177706, | |
| "grad_norm": 0.9685536935346779, | |
| "learning_rate": 4.8177243792118515e-06, | |
| "loss": 0.1053, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.42326332794830374, | |
| "grad_norm": 0.8362566096259404, | |
| "learning_rate": 4.8123924776287115e-06, | |
| "loss": 0.0767, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.42649434571890144, | |
| "grad_norm": 0.7707131560838676, | |
| "learning_rate": 4.8069867498388066e-06, | |
| "loss": 0.059, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.4297253634894992, | |
| "grad_norm": 0.8446766172300785, | |
| "learning_rate": 4.80150736842719e-06, | |
| "loss": 0.0715, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.43295638126009695, | |
| "grad_norm": 0.9888301615866294, | |
| "learning_rate": 4.795954508330403e-06, | |
| "loss": 0.0816, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.43618739903069464, | |
| "grad_norm": 0.5818611202414509, | |
| "learning_rate": 4.790328346830893e-06, | |
| "loss": 0.0579, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4394184168012924, | |
| "grad_norm": 0.7358503512846166, | |
| "learning_rate": 4.784629063551354e-06, | |
| "loss": 0.0729, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.44264943457189015, | |
| "grad_norm": 0.6475363027245936, | |
| "learning_rate": 4.778856840448985e-06, | |
| "loss": 0.0595, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.4458804523424879, | |
| "grad_norm": 0.8615033443900738, | |
| "learning_rate": 4.773011861809694e-06, | |
| "loss": 0.0951, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.4491114701130856, | |
| "grad_norm": 0.6927048507197057, | |
| "learning_rate": 4.7670943142421955e-06, | |
| "loss": 0.0524, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.45234248788368336, | |
| "grad_norm": 1.051050914153071, | |
| "learning_rate": 4.761104386672074e-06, | |
| "loss": 0.0766, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.4555735056542811, | |
| "grad_norm": 0.5989675474056907, | |
| "learning_rate": 4.7550422703357355e-06, | |
| "loss": 0.0701, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.4588045234248788, | |
| "grad_norm": 0.7775916205990767, | |
| "learning_rate": 4.748908158774312e-06, | |
| "loss": 0.0587, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4620355411954766, | |
| "grad_norm": 0.6567814970993029, | |
| "learning_rate": 4.742702247827476e-06, | |
| "loss": 0.0558, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.46526655896607433, | |
| "grad_norm": 0.8201443957683763, | |
| "learning_rate": 4.736424735627193e-06, | |
| "loss": 0.066, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.46849757673667203, | |
| "grad_norm": 0.767995936704651, | |
| "learning_rate": 4.730075822591392e-06, | |
| "loss": 0.0698, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.4717285945072698, | |
| "grad_norm": 0.8765270563094147, | |
| "learning_rate": 4.7236557114175705e-06, | |
| "loss": 0.0752, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.47495961227786754, | |
| "grad_norm": 0.8060072499383857, | |
| "learning_rate": 4.71716460707632e-06, | |
| "loss": 0.0776, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4781906300484653, | |
| "grad_norm": 0.7249857982210036, | |
| "learning_rate": 4.710602716804784e-06, | |
| "loss": 0.0645, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.481421647819063, | |
| "grad_norm": 0.9856585082866961, | |
| "learning_rate": 4.703970250100041e-06, | |
| "loss": 0.0813, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.48465266558966075, | |
| "grad_norm": 0.7832993384259341, | |
| "learning_rate": 4.697267418712415e-06, | |
| "loss": 0.0855, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4878836833602585, | |
| "grad_norm": 0.7537716514815709, | |
| "learning_rate": 4.690494436638718e-06, | |
| "loss": 0.0662, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.4911147011308562, | |
| "grad_norm": 0.6970576661817561, | |
| "learning_rate": 4.683651520115414e-06, | |
| "loss": 0.0526, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.49434571890145396, | |
| "grad_norm": 0.6025061116050546, | |
| "learning_rate": 4.67673888761172e-06, | |
| "loss": 0.0423, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4975767366720517, | |
| "grad_norm": 0.8666593448997953, | |
| "learning_rate": 4.669756759822625e-06, | |
| "loss": 0.0769, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.5008077544426495, | |
| "grad_norm": 0.8822574390448034, | |
| "learning_rate": 4.66270535966185e-06, | |
| "loss": 0.0835, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.5040387722132472, | |
| "grad_norm": 0.7966361049316173, | |
| "learning_rate": 4.655584912254727e-06, | |
| "loss": 0.0702, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.5072697899838449, | |
| "grad_norm": 0.975295266397441, | |
| "learning_rate": 4.6483956449310155e-06, | |
| "loss": 0.0868, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.5105008077544426, | |
| "grad_norm": 0.6419609425416163, | |
| "learning_rate": 4.64113778721764e-06, | |
| "loss": 0.0563, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.5137318255250404, | |
| "grad_norm": 1.074461428386011, | |
| "learning_rate": 4.633811570831367e-06, | |
| "loss": 0.1019, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5169628432956381, | |
| "grad_norm": 0.7470516638155429, | |
| "learning_rate": 4.626417229671401e-06, | |
| "loss": 0.0699, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5201938610662359, | |
| "grad_norm": 0.6593218212471074, | |
| "learning_rate": 4.6189549998119235e-06, | |
| "loss": 0.0489, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.5234248788368336, | |
| "grad_norm": 0.9595803117243349, | |
| "learning_rate": 4.611425119494552e-06, | |
| "loss": 0.0784, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.5266558966074314, | |
| "grad_norm": 0.6236918909086313, | |
| "learning_rate": 4.603827829120734e-06, | |
| "loss": 0.0581, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.529886914378029, | |
| "grad_norm": 0.94480008993302, | |
| "learning_rate": 4.596163371244076e-06, | |
| "loss": 0.0672, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5331179321486268, | |
| "grad_norm": 0.8140229354538353, | |
| "learning_rate": 4.588431990562593e-06, | |
| "loss": 0.0557, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.5363489499192245, | |
| "grad_norm": 0.6881752667822972, | |
| "learning_rate": 4.580633933910901e-06, | |
| "loss": 0.0499, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5395799676898223, | |
| "grad_norm": 0.6835421331522927, | |
| "learning_rate": 4.572769450252335e-06, | |
| "loss": 0.0726, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.5428109854604201, | |
| "grad_norm": 0.6690598961735207, | |
| "learning_rate": 4.564838790671e-06, | |
| "loss": 0.0553, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5460420032310178, | |
| "grad_norm": 1.0189532436328723, | |
| "learning_rate": 4.556842208363756e-06, | |
| "loss": 0.0865, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5492730210016155, | |
| "grad_norm": 0.6275318178066402, | |
| "learning_rate": 4.548779958632134e-06, | |
| "loss": 0.0549, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.5525040387722132, | |
| "grad_norm": 0.6758827324437686, | |
| "learning_rate": 4.540652298874183e-06, | |
| "loss": 0.055, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.555735056542811, | |
| "grad_norm": 0.7969599802065607, | |
| "learning_rate": 4.532459488576258e-06, | |
| "loss": 0.0667, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5589660743134087, | |
| "grad_norm": 0.8835274962394156, | |
| "learning_rate": 4.524201789304727e-06, | |
| "loss": 0.1042, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5621970920840065, | |
| "grad_norm": 1.0052690224759626, | |
| "learning_rate": 4.515879464697629e-06, | |
| "loss": 0.0676, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5654281098546042, | |
| "grad_norm": 1.2550940752148396, | |
| "learning_rate": 4.507492780456249e-06, | |
| "loss": 0.0873, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.568659127625202, | |
| "grad_norm": 0.7510769288869017, | |
| "learning_rate": 4.499042004336642e-06, | |
| "loss": 0.0593, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5718901453957996, | |
| "grad_norm": 1.0704861891317858, | |
| "learning_rate": 4.490527406141081e-06, | |
| "loss": 0.1084, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5751211631663974, | |
| "grad_norm": 0.6284718527814975, | |
| "learning_rate": 4.481949257709442e-06, | |
| "loss": 0.0508, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5783521809369951, | |
| "grad_norm": 0.7735314263403176, | |
| "learning_rate": 4.4733078329105296e-06, | |
| "loss": 0.071, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5815831987075929, | |
| "grad_norm": 0.7328207313228111, | |
| "learning_rate": 4.464603407633326e-06, | |
| "loss": 0.0616, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5848142164781907, | |
| "grad_norm": 0.8860553425416037, | |
| "learning_rate": 4.455836259778193e-06, | |
| "loss": 0.0933, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.5880452342487884, | |
| "grad_norm": 0.7335813522711138, | |
| "learning_rate": 4.44700666924799e-06, | |
| "loss": 0.0778, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5912762520193862, | |
| "grad_norm": 1.0224460571070495, | |
| "learning_rate": 4.438114917939145e-06, | |
| "loss": 0.0816, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5945072697899838, | |
| "grad_norm": 0.5929045336796159, | |
| "learning_rate": 4.42916128973265e-06, | |
| "loss": 0.0544, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5977382875605816, | |
| "grad_norm": 0.9356825212983167, | |
| "learning_rate": 4.420146070484997e-06, | |
| "loss": 0.0697, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.6009693053311793, | |
| "grad_norm": 0.9940780034167175, | |
| "learning_rate": 4.41106954801906e-06, | |
| "loss": 0.0849, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.6042003231017771, | |
| "grad_norm": 1.1206234673942042, | |
| "learning_rate": 4.401932012114893e-06, | |
| "loss": 0.0822, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.6074313408723748, | |
| "grad_norm": 0.5583759826731935, | |
| "learning_rate": 4.39273375450049e-06, | |
| "loss": 0.0516, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.6106623586429726, | |
| "grad_norm": 0.6454315459498958, | |
| "learning_rate": 4.383475068842464e-06, | |
| "loss": 0.0577, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.6138933764135702, | |
| "grad_norm": 1.1332732216792498, | |
| "learning_rate": 4.3741562507366754e-06, | |
| "loss": 0.0655, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.617124394184168, | |
| "grad_norm": 0.8318517770779469, | |
| "learning_rate": 4.36477759769879e-06, | |
| "loss": 0.0954, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.6203554119547657, | |
| "grad_norm": 1.3985194163380206, | |
| "learning_rate": 4.355339409154788e-06, | |
| "loss": 0.119, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.6235864297253635, | |
| "grad_norm": 0.8270239497445794, | |
| "learning_rate": 4.345841986431396e-06, | |
| "loss": 0.0637, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.6268174474959612, | |
| "grad_norm": 0.7760464412137208, | |
| "learning_rate": 4.336285632746472e-06, | |
| "loss": 0.0884, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.630048465266559, | |
| "grad_norm": 0.7692351681728313, | |
| "learning_rate": 4.326670653199323e-06, | |
| "loss": 0.0585, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.6332794830371568, | |
| "grad_norm": 0.66023769139009, | |
| "learning_rate": 4.316997354760965e-06, | |
| "loss": 0.0678, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.6365105008077544, | |
| "grad_norm": 0.6890195746804293, | |
| "learning_rate": 4.307266046264323e-06, | |
| "loss": 0.0664, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.6397415185783522, | |
| "grad_norm": 0.847865514008143, | |
| "learning_rate": 4.297477038394368e-06, | |
| "loss": 0.0825, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.6429725363489499, | |
| "grad_norm": 0.9550408154272941, | |
| "learning_rate": 4.287630643678204e-06, | |
| "loss": 0.0744, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.6462035541195477, | |
| "grad_norm": 0.857502218920431, | |
| "learning_rate": 4.2777271764750805e-06, | |
| "loss": 0.0671, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6494345718901454, | |
| "grad_norm": 0.8052709789557946, | |
| "learning_rate": 4.267766952966369e-06, | |
| "loss": 0.0612, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.6526655896607432, | |
| "grad_norm": 1.083089645941659, | |
| "learning_rate": 4.257750291145457e-06, | |
| "loss": 0.0822, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.6558966074313409, | |
| "grad_norm": 0.984740295179814, | |
| "learning_rate": 4.247677510807602e-06, | |
| "loss": 0.056, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.6591276252019386, | |
| "grad_norm": 0.8835020576624913, | |
| "learning_rate": 4.237548933539718e-06, | |
| "loss": 0.0758, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6623586429725363, | |
| "grad_norm": 0.6564577266758135, | |
| "learning_rate": 4.227364882710114e-06, | |
| "loss": 0.0563, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6655896607431341, | |
| "grad_norm": 1.1066641198832807, | |
| "learning_rate": 4.217125683458162e-06, | |
| "loss": 0.083, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6688206785137318, | |
| "grad_norm": 0.6596522236176221, | |
| "learning_rate": 4.206831662683922e-06, | |
| "loss": 0.0661, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.6720516962843296, | |
| "grad_norm": 1.3814266436856864, | |
| "learning_rate": 4.196483149037707e-06, | |
| "loss": 0.1065, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6752827140549273, | |
| "grad_norm": 0.8566187355631336, | |
| "learning_rate": 4.186080472909582e-06, | |
| "loss": 0.0798, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.678513731825525, | |
| "grad_norm": 0.7295355541151259, | |
| "learning_rate": 4.1756239664188275e-06, | |
| "loss": 0.0575, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6817447495961227, | |
| "grad_norm": 1.1292799421470592, | |
| "learning_rate": 4.165113963403326e-06, | |
| "loss": 0.0721, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.6849757673667205, | |
| "grad_norm": 0.6756065514670196, | |
| "learning_rate": 4.154550799408906e-06, | |
| "loss": 0.0627, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.6882067851373183, | |
| "grad_norm": 0.7404052802160156, | |
| "learning_rate": 4.143934811678637e-06, | |
| "loss": 0.0535, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.691437802907916, | |
| "grad_norm": 1.022807263526455, | |
| "learning_rate": 4.1332663391420515e-06, | |
| "loss": 0.0854, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6946688206785138, | |
| "grad_norm": 0.8472576700436739, | |
| "learning_rate": 4.1225457224043316e-06, | |
| "loss": 0.0623, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6978998384491115, | |
| "grad_norm": 0.682205907382696, | |
| "learning_rate": 4.111773303735432e-06, | |
| "loss": 0.0523, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.7011308562197092, | |
| "grad_norm": 0.8113418772511839, | |
| "learning_rate": 4.100949427059151e-06, | |
| "loss": 0.0789, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.7043618739903069, | |
| "grad_norm": 0.9832925631785678, | |
| "learning_rate": 4.090074437942155e-06, | |
| "loss": 0.0723, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.7075928917609047, | |
| "grad_norm": 0.8721900501399756, | |
| "learning_rate": 4.079148683582943e-06, | |
| "loss": 0.0882, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.7108239095315024, | |
| "grad_norm": 0.6650598159524921, | |
| "learning_rate": 4.06817251280076e-06, | |
| "loss": 0.0645, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7140549273021002, | |
| "grad_norm": 0.5794028281825343, | |
| "learning_rate": 4.0571462760244626e-06, | |
| "loss": 0.0505, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.7172859450726979, | |
| "grad_norm": 0.766211455982521, | |
| "learning_rate": 4.046070325281333e-06, | |
| "loss": 0.0716, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.7205169628432956, | |
| "grad_norm": 0.6405405389062213, | |
| "learning_rate": 4.034945014185836e-06, | |
| "loss": 0.0682, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.7237479806138933, | |
| "grad_norm": 0.7302480630044077, | |
| "learning_rate": 4.0237706979283306e-06, | |
| "loss": 0.0813, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.7269789983844911, | |
| "grad_norm": 0.8456954793821594, | |
| "learning_rate": 4.012547733263734e-06, | |
| "loss": 0.0672, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7302100161550888, | |
| "grad_norm": 0.6987533622613603, | |
| "learning_rate": 4.001276478500127e-06, | |
| "loss": 0.0606, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.7334410339256866, | |
| "grad_norm": 0.7583800796325042, | |
| "learning_rate": 3.989957293487314e-06, | |
| "loss": 0.048, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.7366720516962844, | |
| "grad_norm": 0.8822810810943023, | |
| "learning_rate": 3.978590539605338e-06, | |
| "loss": 0.0604, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.7399030694668821, | |
| "grad_norm": 0.8557818511714256, | |
| "learning_rate": 3.967176579752943e-06, | |
| "loss": 0.0589, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.7431340872374798, | |
| "grad_norm": 0.5503873101604418, | |
| "learning_rate": 3.955715778335984e-06, | |
| "loss": 0.0528, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7463651050080775, | |
| "grad_norm": 0.8045709480783542, | |
| "learning_rate": 3.944208501255796e-06, | |
| "loss": 0.0821, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.7495961227786753, | |
| "grad_norm": 0.752743452245411, | |
| "learning_rate": 3.932655115897513e-06, | |
| "loss": 0.0916, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.752827140549273, | |
| "grad_norm": 0.9726656057832658, | |
| "learning_rate": 3.9210559911183345e-06, | |
| "loss": 0.0826, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.7560581583198708, | |
| "grad_norm": 0.8247240601878854, | |
| "learning_rate": 3.909411497235752e-06, | |
| "loss": 0.0712, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.7592891760904685, | |
| "grad_norm": 0.5846761529324903, | |
| "learning_rate": 3.89772200601573e-06, | |
| "loss": 0.0497, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7625201938610663, | |
| "grad_norm": 1.215906602352585, | |
| "learning_rate": 3.885987890660828e-06, | |
| "loss": 0.096, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.7657512116316639, | |
| "grad_norm": 0.7228133512999668, | |
| "learning_rate": 3.874209525798293e-06, | |
| "loss": 0.0676, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.7689822294022617, | |
| "grad_norm": 0.628874981721426, | |
| "learning_rate": 3.862387287468095e-06, | |
| "loss": 0.0638, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.7722132471728594, | |
| "grad_norm": 0.8665871369668172, | |
| "learning_rate": 3.850521553110924e-06, | |
| "loss": 0.0676, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.7754442649434572, | |
| "grad_norm": 0.7726368504889838, | |
| "learning_rate": 3.838612701556138e-06, | |
| "loss": 0.0617, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.778675282714055, | |
| "grad_norm": 0.8259876486738208, | |
| "learning_rate": 3.826661113009671e-06, | |
| "loss": 0.0827, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.7819063004846527, | |
| "grad_norm": 0.5825223238179703, | |
| "learning_rate": 3.814667169041887e-06, | |
| "loss": 0.0536, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.7851373182552503, | |
| "grad_norm": 0.8337660440150237, | |
| "learning_rate": 3.8026312525754095e-06, | |
| "loss": 0.0805, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.7883683360258481, | |
| "grad_norm": 1.2051840693103852, | |
| "learning_rate": 3.790553747872885e-06, | |
| "loss": 0.0876, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7915993537964459, | |
| "grad_norm": 1.0177651978897424, | |
| "learning_rate": 3.778435040524722e-06, | |
| "loss": 0.088, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7948303715670436, | |
| "grad_norm": 0.5088711298667009, | |
| "learning_rate": 3.766275517436779e-06, | |
| "loss": 0.0436, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7980613893376414, | |
| "grad_norm": 0.7256417368432645, | |
| "learning_rate": 3.75407556681801e-06, | |
| "loss": 0.0567, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.8012924071082391, | |
| "grad_norm": 0.6647991839006098, | |
| "learning_rate": 3.741835578168071e-06, | |
| "loss": 0.0682, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.8045234248788369, | |
| "grad_norm": 0.6160939622053359, | |
| "learning_rate": 3.7295559422648874e-06, | |
| "loss": 0.0471, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.8077544426494345, | |
| "grad_norm": 0.9179464461903228, | |
| "learning_rate": 3.717237051152175e-06, | |
| "loss": 0.0774, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8109854604200323, | |
| "grad_norm": 1.0639899619677535, | |
| "learning_rate": 3.7048792981269245e-06, | |
| "loss": 0.076, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.81421647819063, | |
| "grad_norm": 0.6225213021803089, | |
| "learning_rate": 3.692483077726843e-06, | |
| "loss": 0.0499, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.8174474959612278, | |
| "grad_norm": 0.6507281120193775, | |
| "learning_rate": 3.6800487857177636e-06, | |
| "loss": 0.0542, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.8206785137318255, | |
| "grad_norm": 0.8342956252285098, | |
| "learning_rate": 3.6675768190810023e-06, | |
| "loss": 0.0563, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.8239095315024233, | |
| "grad_norm": 0.8643206085017306, | |
| "learning_rate": 3.6550675760006904e-06, | |
| "loss": 0.0739, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.827140549273021, | |
| "grad_norm": 0.7946001867850625, | |
| "learning_rate": 3.642521455851058e-06, | |
| "loss": 0.0714, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.8303715670436187, | |
| "grad_norm": 0.8926582386596207, | |
| "learning_rate": 3.629938859183686e-06, | |
| "loss": 0.0713, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.8336025848142165, | |
| "grad_norm": 0.7491907200888189, | |
| "learning_rate": 3.6173201877147134e-06, | |
| "loss": 0.068, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.8368336025848142, | |
| "grad_norm": 1.1183078136862983, | |
| "learning_rate": 3.6046658443120196e-06, | |
| "loss": 0.1055, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.840064620355412, | |
| "grad_norm": 0.7909225982481742, | |
| "learning_rate": 3.5919762329823556e-06, | |
| "loss": 0.0695, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.8432956381260097, | |
| "grad_norm": 0.8739075343916631, | |
| "learning_rate": 3.579251758858447e-06, | |
| "loss": 0.069, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.8465266558966075, | |
| "grad_norm": 0.7730576890810616, | |
| "learning_rate": 3.566492828186063e-06, | |
| "loss": 0.0666, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.8497576736672051, | |
| "grad_norm": 0.9066328657599784, | |
| "learning_rate": 3.5536998483110418e-06, | |
| "loss": 0.0707, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.8529886914378029, | |
| "grad_norm": 0.713312105314591, | |
| "learning_rate": 3.5408732276662882e-06, | |
| "loss": 0.0913, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.8562197092084006, | |
| "grad_norm": 0.7057871040611122, | |
| "learning_rate": 3.5280133757587343e-06, | |
| "loss": 0.0591, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.8594507269789984, | |
| "grad_norm": 1.2427283512085103, | |
| "learning_rate": 3.515120703156264e-06, | |
| "loss": 0.0839, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.8626817447495961, | |
| "grad_norm": 0.7653071583973694, | |
| "learning_rate": 3.5021956214746046e-06, | |
| "loss": 0.0645, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.8659127625201939, | |
| "grad_norm": 0.6634481800817704, | |
| "learning_rate": 3.4892385433641875e-06, | |
| "loss": 0.0459, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.8691437802907916, | |
| "grad_norm": 0.6953364982312946, | |
| "learning_rate": 3.4762498824969726e-06, | |
| "loss": 0.0632, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.8723747980613893, | |
| "grad_norm": 0.6532166155768556, | |
| "learning_rate": 3.4632300535532415e-06, | |
| "loss": 0.053, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.875605815831987, | |
| "grad_norm": 0.685089930364244, | |
| "learning_rate": 3.450179472208356e-06, | |
| "loss": 0.0586, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.8788368336025848, | |
| "grad_norm": 0.79809625541097, | |
| "learning_rate": 3.437098555119493e-06, | |
| "loss": 0.053, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.8820678513731826, | |
| "grad_norm": 1.001360047718671, | |
| "learning_rate": 3.4239877199123343e-06, | |
| "loss": 0.0856, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.8852988691437803, | |
| "grad_norm": 0.9793506266219179, | |
| "learning_rate": 3.4108473851677408e-06, | |
| "loss": 0.0603, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.8885298869143781, | |
| "grad_norm": 1.1216710753897985, | |
| "learning_rate": 3.397677970408384e-06, | |
| "loss": 0.091, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.8917609046849758, | |
| "grad_norm": 1.0349146688296054, | |
| "learning_rate": 3.3844798960853533e-06, | |
| "loss": 0.0712, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.8949919224555735, | |
| "grad_norm": 1.3307977354919367, | |
| "learning_rate": 3.3712535835647326e-06, | |
| "loss": 0.0617, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.8982229402261712, | |
| "grad_norm": 0.7132616053518929, | |
| "learning_rate": 3.357999455114148e-06, | |
| "loss": 0.0659, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.901453957996769, | |
| "grad_norm": 0.9273582042526977, | |
| "learning_rate": 3.344717933889289e-06, | |
| "loss": 0.0641, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.9046849757673667, | |
| "grad_norm": 0.9302335920003337, | |
| "learning_rate": 3.3314094439203903e-06, | |
| "loss": 0.0626, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9079159935379645, | |
| "grad_norm": 0.45357719437194755, | |
| "learning_rate": 3.3180744100987045e-06, | |
| "loss": 0.0431, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.9111470113085622, | |
| "grad_norm": 0.8331062557625248, | |
| "learning_rate": 3.3047132581629297e-06, | |
| "loss": 0.0755, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.9143780290791599, | |
| "grad_norm": 0.7394449396136501, | |
| "learning_rate": 3.29132641468562e-06, | |
| "loss": 0.0566, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.9176090468497576, | |
| "grad_norm": 0.9737240770489647, | |
| "learning_rate": 3.277914307059566e-06, | |
| "loss": 0.0801, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.9208400646203554, | |
| "grad_norm": 0.7702804759383343, | |
| "learning_rate": 3.264477363484151e-06, | |
| "loss": 0.0573, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.9240710823909531, | |
| "grad_norm": 0.5835013641966551, | |
| "learning_rate": 3.251016012951678e-06, | |
| "loss": 0.0562, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.9273021001615509, | |
| "grad_norm": 0.5371255642799613, | |
| "learning_rate": 3.237530685233673e-06, | |
| "loss": 0.0413, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.9305331179321487, | |
| "grad_norm": 0.816748025986031, | |
| "learning_rate": 3.2240218108671683e-06, | |
| "loss": 0.0534, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.9337641357027464, | |
| "grad_norm": 0.712574290987934, | |
| "learning_rate": 3.2104898211409546e-06, | |
| "loss": 0.0635, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.9369951534733441, | |
| "grad_norm": 0.7596552800433577, | |
| "learning_rate": 3.196935148081808e-06, | |
| "loss": 0.0562, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9402261712439418, | |
| "grad_norm": 0.6394269281528583, | |
| "learning_rate": 3.1833582244407036e-06, | |
| "loss": 0.0569, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.9434571890145396, | |
| "grad_norm": 0.8847550945975203, | |
| "learning_rate": 3.1697594836789924e-06, | |
| "loss": 0.0854, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.9466882067851373, | |
| "grad_norm": 0.6380616258246674, | |
| "learning_rate": 3.156139359954569e-06, | |
| "loss": 0.0603, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.9499192245557351, | |
| "grad_norm": 0.8054064646081707, | |
| "learning_rate": 3.142498288108007e-06, | |
| "loss": 0.0811, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.9531502423263328, | |
| "grad_norm": 0.6206438760553665, | |
| "learning_rate": 3.128836703648676e-06, | |
| "loss": 0.0702, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.9563812600969306, | |
| "grad_norm": 0.925371456511262, | |
| "learning_rate": 3.1151550427408383e-06, | |
| "loss": 0.0663, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.9596122778675282, | |
| "grad_norm": 0.8737367079669791, | |
| "learning_rate": 3.1014537421897222e-06, | |
| "loss": 0.078, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.962843295638126, | |
| "grad_norm": 0.8790452308237618, | |
| "learning_rate": 3.0877332394275806e-06, | |
| "loss": 0.0796, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.9660743134087237, | |
| "grad_norm": 0.8200476689531796, | |
| "learning_rate": 3.0739939724997205e-06, | |
| "loss": 0.0777, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.9693053311793215, | |
| "grad_norm": 0.91054817725634, | |
| "learning_rate": 3.0602363800505198e-06, | |
| "loss": 0.0823, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9725363489499192, | |
| "grad_norm": 1.0055625552384646, | |
| "learning_rate": 3.0464609013094244e-06, | |
| "loss": 0.0723, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.975767366720517, | |
| "grad_norm": 0.8948435648737145, | |
| "learning_rate": 3.032667976076923e-06, | |
| "loss": 0.0776, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.9789983844911146, | |
| "grad_norm": 0.6947875633713175, | |
| "learning_rate": 3.0188580447105055e-06, | |
| "loss": 0.0576, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.9822294022617124, | |
| "grad_norm": 0.710748833707824, | |
| "learning_rate": 3.0050315481106074e-06, | |
| "loss": 0.0606, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.9854604200323102, | |
| "grad_norm": 0.4721520789494047, | |
| "learning_rate": 2.9911889277065314e-06, | |
| "loss": 0.043, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.9886914378029079, | |
| "grad_norm": 0.7878237728564794, | |
| "learning_rate": 2.977330625442352e-06, | |
| "loss": 0.0832, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.9919224555735057, | |
| "grad_norm": 0.818350538492891, | |
| "learning_rate": 2.963457083762809e-06, | |
| "loss": 0.0727, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.9951534733441034, | |
| "grad_norm": 0.8240104583346066, | |
| "learning_rate": 2.949568745599182e-06, | |
| "loss": 0.0698, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.9983844911147012, | |
| "grad_norm": 0.6820819240767714, | |
| "learning_rate": 2.935666054355146e-06, | |
| "loss": 0.0604, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.6820819240767714, | |
| "learning_rate": 2.921749453892618e-06, | |
| "loss": 0.0453, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0032310177705976, | |
| "grad_norm": 0.8574995005049099, | |
| "learning_rate": 2.9078193885175875e-06, | |
| "loss": 0.0401, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.0064620355411955, | |
| "grad_norm": 0.7517375531460555, | |
| "learning_rate": 2.893876302965925e-06, | |
| "loss": 0.0646, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.0096930533117932, | |
| "grad_norm": 0.7030688450315203, | |
| "learning_rate": 2.8799206423891895e-06, | |
| "loss": 0.0428, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.012924071082391, | |
| "grad_norm": 0.6810197305641746, | |
| "learning_rate": 2.865952852340417e-06, | |
| "loss": 0.0491, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.0161550888529887, | |
| "grad_norm": 0.6457251139138377, | |
| "learning_rate": 2.8519733787598887e-06, | |
| "loss": 0.0527, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.0193861066235865, | |
| "grad_norm": 0.6750520782194651, | |
| "learning_rate": 2.8379826679609e-06, | |
| "loss": 0.0456, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.0226171243941842, | |
| "grad_norm": 0.7287551036819017, | |
| "learning_rate": 2.8239811666155105e-06, | |
| "loss": 0.0703, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.0258481421647818, | |
| "grad_norm": 0.5506534914240341, | |
| "learning_rate": 2.8099693217402807e-06, | |
| "loss": 0.0461, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.0290791599353797, | |
| "grad_norm": 0.6716945269140621, | |
| "learning_rate": 2.795947580682003e-06, | |
| "loss": 0.0424, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.0323101777059773, | |
| "grad_norm": 0.6029067986975312, | |
| "learning_rate": 2.7819163911034175e-06, | |
| "loss": 0.032, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0355411954765752, | |
| "grad_norm": 0.5754461947410837, | |
| "learning_rate": 2.767876200968923e-06, | |
| "loss": 0.0417, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.0387722132471728, | |
| "grad_norm": 0.5670203427921292, | |
| "learning_rate": 2.7538274585302707e-06, | |
| "loss": 0.0676, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.0420032310177707, | |
| "grad_norm": 0.6371474674240052, | |
| "learning_rate": 2.7397706123122563e-06, | |
| "loss": 0.0401, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.0452342487883683, | |
| "grad_norm": 0.633563817351995, | |
| "learning_rate": 2.7257061110984005e-06, | |
| "loss": 0.0507, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.048465266558966, | |
| "grad_norm": 0.455254402627076, | |
| "learning_rate": 2.7116344039166192e-06, | |
| "loss": 0.0346, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.0516962843295639, | |
| "grad_norm": 0.46808216871215624, | |
| "learning_rate": 2.6975559400248876e-06, | |
| "loss": 0.0447, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.0549273021001615, | |
| "grad_norm": 0.5421960059682478, | |
| "learning_rate": 2.683471168896899e-06, | |
| "loss": 0.0412, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.0581583198707594, | |
| "grad_norm": 0.6365474014428788, | |
| "learning_rate": 2.6693805402077123e-06, | |
| "loss": 0.0482, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.061389337641357, | |
| "grad_norm": 0.5544336402631056, | |
| "learning_rate": 2.6552845038193977e-06, | |
| "loss": 0.0392, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.0646203554119547, | |
| "grad_norm": 0.49713090653008446, | |
| "learning_rate": 2.641183509766675e-06, | |
| "loss": 0.028, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0678513731825525, | |
| "grad_norm": 0.42542651226656086, | |
| "learning_rate": 2.627078008242541e-06, | |
| "loss": 0.0361, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.0710823909531502, | |
| "grad_norm": 0.5295471778809274, | |
| "learning_rate": 2.6129684495839013e-06, | |
| "loss": 0.0657, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.074313408723748, | |
| "grad_norm": 0.5910271655601097, | |
| "learning_rate": 2.5988552842571903e-06, | |
| "loss": 0.0447, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.0775444264943457, | |
| "grad_norm": 0.5391865465723893, | |
| "learning_rate": 2.5847389628439905e-06, | |
| "loss": 0.0468, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.0807754442649435, | |
| "grad_norm": 0.7945092160241262, | |
| "learning_rate": 2.570619936026647e-06, | |
| "loss": 0.0526, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.0840064620355412, | |
| "grad_norm": 0.6384237762747144, | |
| "learning_rate": 2.5564986545738767e-06, | |
| "loss": 0.0435, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.0872374798061388, | |
| "grad_norm": 0.6166409813836169, | |
| "learning_rate": 2.542375569326382e-06, | |
| "loss": 0.0405, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.0904684975767367, | |
| "grad_norm": 0.5773480639364901, | |
| "learning_rate": 2.52825113118245e-06, | |
| "loss": 0.0534, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.0936995153473343, | |
| "grad_norm": 0.5202558074438228, | |
| "learning_rate": 2.514125791083563e-06, | |
| "loss": 0.0507, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.0969305331179322, | |
| "grad_norm": 0.6263121460011797, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0512, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1001615508885298, | |
| "grad_norm": 0.7123374131643287, | |
| "learning_rate": 2.485874208916438e-06, | |
| "loss": 0.0332, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.1033925686591277, | |
| "grad_norm": 0.4202479639460712, | |
| "learning_rate": 2.4717488688175513e-06, | |
| "loss": 0.0407, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.1066235864297254, | |
| "grad_norm": 0.642912112013506, | |
| "learning_rate": 2.4576244306736193e-06, | |
| "loss": 0.0454, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.109854604200323, | |
| "grad_norm": 0.5278433369392868, | |
| "learning_rate": 2.4435013454261246e-06, | |
| "loss": 0.0454, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.1130856219709209, | |
| "grad_norm": 0.49367590472244555, | |
| "learning_rate": 2.4293800639733537e-06, | |
| "loss": 0.0425, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.1163166397415185, | |
| "grad_norm": 0.663577683371772, | |
| "learning_rate": 2.4152610371560095e-06, | |
| "loss": 0.0502, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.1195476575121164, | |
| "grad_norm": 0.6258554932784628, | |
| "learning_rate": 2.40114471574281e-06, | |
| "loss": 0.0503, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.122778675282714, | |
| "grad_norm": 0.6117692229579965, | |
| "learning_rate": 2.3870315504160995e-06, | |
| "loss": 0.0357, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.1260096930533119, | |
| "grad_norm": 0.698709357123754, | |
| "learning_rate": 2.3729219917574597e-06, | |
| "loss": 0.0398, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.1292407108239095, | |
| "grad_norm": 0.5509108322599596, | |
| "learning_rate": 2.358816490233326e-06, | |
| "loss": 0.0504, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1324717285945072, | |
| "grad_norm": 0.5944772383426618, | |
| "learning_rate": 2.3447154961806027e-06, | |
| "loss": 0.0417, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.135702746365105, | |
| "grad_norm": 0.7597386477794142, | |
| "learning_rate": 2.330619459792289e-06, | |
| "loss": 0.0469, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.1389337641357027, | |
| "grad_norm": 0.5719437487716487, | |
| "learning_rate": 2.3165288311031024e-06, | |
| "loss": 0.0294, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.1421647819063006, | |
| "grad_norm": 0.6213273588952939, | |
| "learning_rate": 2.3024440599751132e-06, | |
| "loss": 0.0384, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.1453957996768982, | |
| "grad_norm": 0.6354291435615703, | |
| "learning_rate": 2.288365596083381e-06, | |
| "loss": 0.0349, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.148626817447496, | |
| "grad_norm": 0.7486710035630336, | |
| "learning_rate": 2.274293888901599e-06, | |
| "loss": 0.0406, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.1518578352180937, | |
| "grad_norm": 0.46539243165120686, | |
| "learning_rate": 2.260229387687744e-06, | |
| "loss": 0.0406, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.1550888529886914, | |
| "grad_norm": 0.8582980219163947, | |
| "learning_rate": 2.24617254146973e-06, | |
| "loss": 0.0751, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.1583198707592892, | |
| "grad_norm": 0.5134947351957787, | |
| "learning_rate": 2.232123799031078e-06, | |
| "loss": 0.0257, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.1615508885298869, | |
| "grad_norm": 0.6414286514892072, | |
| "learning_rate": 2.2180836088965833e-06, | |
| "loss": 0.053, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.1647819063004847, | |
| "grad_norm": 0.6893327762289948, | |
| "learning_rate": 2.204052419317998e-06, | |
| "loss": 0.0518, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.1680129240710824, | |
| "grad_norm": 0.6315851441021518, | |
| "learning_rate": 2.19003067825972e-06, | |
| "loss": 0.0311, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.1712439418416802, | |
| "grad_norm": 0.506389589217495, | |
| "learning_rate": 2.1760188333844907e-06, | |
| "loss": 0.03, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.1744749596122779, | |
| "grad_norm": 0.48074703533770824, | |
| "learning_rate": 2.1620173320391007e-06, | |
| "loss": 0.0575, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.1777059773828755, | |
| "grad_norm": 0.7338858093688776, | |
| "learning_rate": 2.1480266212401117e-06, | |
| "loss": 0.0652, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.1809369951534734, | |
| "grad_norm": 0.5215250199335827, | |
| "learning_rate": 2.1340471476595836e-06, | |
| "loss": 0.0404, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.184168012924071, | |
| "grad_norm": 0.5101940305735346, | |
| "learning_rate": 2.1200793576108105e-06, | |
| "loss": 0.04, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.187399030694669, | |
| "grad_norm": 0.6072945357387388, | |
| "learning_rate": 2.1061236970340756e-06, | |
| "loss": 0.0471, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.1906300484652665, | |
| "grad_norm": 0.6485948720338923, | |
| "learning_rate": 2.0921806114824134e-06, | |
| "loss": 0.0402, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.1938610662358644, | |
| "grad_norm": 0.5618119097496929, | |
| "learning_rate": 2.0782505461073822e-06, | |
| "loss": 0.0309, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.197092084006462, | |
| "grad_norm": 0.5710364970799129, | |
| "learning_rate": 2.0643339456448547e-06, | |
| "loss": 0.0534, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.2003231017770597, | |
| "grad_norm": 0.5166825620149433, | |
| "learning_rate": 2.0504312544008193e-06, | |
| "loss": 0.0443, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.2035541195476576, | |
| "grad_norm": 0.6582279195349342, | |
| "learning_rate": 2.0365429162371922e-06, | |
| "loss": 0.042, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.2067851373182552, | |
| "grad_norm": 0.5628923740662667, | |
| "learning_rate": 2.0226693745576494e-06, | |
| "loss": 0.0338, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.210016155088853, | |
| "grad_norm": 0.5269299240421488, | |
| "learning_rate": 2.008811072293469e-06, | |
| "loss": 0.0333, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.2132471728594507, | |
| "grad_norm": 0.37904232204217875, | |
| "learning_rate": 1.9949684518893926e-06, | |
| "loss": 0.0321, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.2164781906300486, | |
| "grad_norm": 0.5084850707267878, | |
| "learning_rate": 1.9811419552894953e-06, | |
| "loss": 0.0408, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.2197092084006462, | |
| "grad_norm": 0.4746627443509293, | |
| "learning_rate": 1.9673320239230783e-06, | |
| "loss": 0.0359, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.2229402261712439, | |
| "grad_norm": 0.6346489110095358, | |
| "learning_rate": 1.9535390986905764e-06, | |
| "loss": 0.0505, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.2261712439418417, | |
| "grad_norm": 0.6162762793044799, | |
| "learning_rate": 1.939763619949481e-06, | |
| "loss": 0.0327, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2294022617124394, | |
| "grad_norm": 0.7282286592219928, | |
| "learning_rate": 1.92600602750028e-06, | |
| "loss": 0.0664, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.2326332794830372, | |
| "grad_norm": 0.687246463772339, | |
| "learning_rate": 1.9122667605724202e-06, | |
| "loss": 0.0485, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.235864297253635, | |
| "grad_norm": 0.6284714240874483, | |
| "learning_rate": 1.8985462578102786e-06, | |
| "loss": 0.0505, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.2390953150242328, | |
| "grad_norm": 0.6820536891979064, | |
| "learning_rate": 1.884844957259163e-06, | |
| "loss": 0.0311, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.2423263327948304, | |
| "grad_norm": 0.549881437893592, | |
| "learning_rate": 1.8711632963513237e-06, | |
| "loss": 0.0375, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.245557350565428, | |
| "grad_norm": 0.6650268250589539, | |
| "learning_rate": 1.857501711891993e-06, | |
| "loss": 0.054, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.248788368336026, | |
| "grad_norm": 0.5113954409802595, | |
| "learning_rate": 1.8438606400454312e-06, | |
| "loss": 0.031, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.2520193861066236, | |
| "grad_norm": 0.5332778481364839, | |
| "learning_rate": 1.830240516321008e-06, | |
| "loss": 0.0463, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.2552504038772212, | |
| "grad_norm": 0.5928101900055703, | |
| "learning_rate": 1.8166417755592975e-06, | |
| "loss": 0.0353, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.258481421647819, | |
| "grad_norm": 0.4719442237818212, | |
| "learning_rate": 1.8030648519181926e-06, | |
| "loss": 0.037, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.261712439418417, | |
| "grad_norm": 0.6879135991203472, | |
| "learning_rate": 1.789510178859046e-06, | |
| "loss": 0.0523, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.2649434571890146, | |
| "grad_norm": 0.6862575200798275, | |
| "learning_rate": 1.7759781891328321e-06, | |
| "loss": 0.0344, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.2681744749596122, | |
| "grad_norm": 0.4157263515958759, | |
| "learning_rate": 1.762469314766328e-06, | |
| "loss": 0.0373, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.27140549273021, | |
| "grad_norm": 0.6888877331536797, | |
| "learning_rate": 1.7489839870483236e-06, | |
| "loss": 0.0493, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.2746365105008077, | |
| "grad_norm": 0.7225121433676083, | |
| "learning_rate": 1.7355226365158489e-06, | |
| "loss": 0.0685, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.2778675282714054, | |
| "grad_norm": 0.7380932165691942, | |
| "learning_rate": 1.7220856929404342e-06, | |
| "loss": 0.068, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.2810985460420032, | |
| "grad_norm": 0.8373452926500001, | |
| "learning_rate": 1.7086735853143803e-06, | |
| "loss": 0.0263, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.284329563812601, | |
| "grad_norm": 0.4057578810529795, | |
| "learning_rate": 1.6952867418370707e-06, | |
| "loss": 0.0435, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.2875605815831987, | |
| "grad_norm": 0.8172156898788309, | |
| "learning_rate": 1.6819255899012963e-06, | |
| "loss": 0.0521, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.2907915993537964, | |
| "grad_norm": 0.5540166018870007, | |
| "learning_rate": 1.6685905560796101e-06, | |
| "loss": 0.0301, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.2940226171243943, | |
| "grad_norm": 0.5152650366438487, | |
| "learning_rate": 1.6552820661107119e-06, | |
| "loss": 0.0357, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.297253634894992, | |
| "grad_norm": 0.5108482274841849, | |
| "learning_rate": 1.6420005448858522e-06, | |
| "loss": 0.0431, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.3004846526655895, | |
| "grad_norm": 0.4240316233127095, | |
| "learning_rate": 1.6287464164352684e-06, | |
| "loss": 0.0342, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.3037156704361874, | |
| "grad_norm": 0.5910340203210401, | |
| "learning_rate": 1.6155201039146478e-06, | |
| "loss": 0.0466, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.306946688206785, | |
| "grad_norm": 0.4716663558161469, | |
| "learning_rate": 1.6023220295916162e-06, | |
| "loss": 0.0247, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.310177705977383, | |
| "grad_norm": 0.5348908739290964, | |
| "learning_rate": 1.5891526148322594e-06, | |
| "loss": 0.0448, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.3134087237479806, | |
| "grad_norm": 0.7405657033049722, | |
| "learning_rate": 1.576012280087666e-06, | |
| "loss": 0.0425, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.3166397415185784, | |
| "grad_norm": 0.5749817118431164, | |
| "learning_rate": 1.562901444880508e-06, | |
| "loss": 0.0415, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.319870759289176, | |
| "grad_norm": 0.659084728824521, | |
| "learning_rate": 1.5498205277916444e-06, | |
| "loss": 0.0525, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.3231017770597737, | |
| "grad_norm": 0.4660035971953124, | |
| "learning_rate": 1.5367699464467596e-06, | |
| "loss": 0.0352, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.3263327948303716, | |
| "grad_norm": 0.6397348475129173, | |
| "learning_rate": 1.523750117503028e-06, | |
| "loss": 0.0456, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.3295638126009692, | |
| "grad_norm": 0.6385407976586193, | |
| "learning_rate": 1.5107614566358136e-06, | |
| "loss": 0.0562, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.332794830371567, | |
| "grad_norm": 0.5922977287717205, | |
| "learning_rate": 1.4978043785253964e-06, | |
| "loss": 0.0333, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.3360258481421647, | |
| "grad_norm": 0.47241930141595956, | |
| "learning_rate": 1.4848792968437376e-06, | |
| "loss": 0.0311, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.3392568659127626, | |
| "grad_norm": 0.43287945232089914, | |
| "learning_rate": 1.4719866242412661e-06, | |
| "loss": 0.0384, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.3424878836833603, | |
| "grad_norm": 0.5476421812276494, | |
| "learning_rate": 1.4591267723337122e-06, | |
| "loss": 0.0285, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.345718901453958, | |
| "grad_norm": 0.6654675392000585, | |
| "learning_rate": 1.4463001516889597e-06, | |
| "loss": 0.0546, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.3489499192245558, | |
| "grad_norm": 0.5265404309231011, | |
| "learning_rate": 1.4335071718139379e-06, | |
| "loss": 0.0563, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.3521809369951534, | |
| "grad_norm": 0.7273168490119778, | |
| "learning_rate": 1.4207482411415532e-06, | |
| "loss": 0.0319, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.3554119547657513, | |
| "grad_norm": 0.5515284785247014, | |
| "learning_rate": 1.4080237670176456e-06, | |
| "loss": 0.0465, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.358642972536349, | |
| "grad_norm": 0.6815587580127385, | |
| "learning_rate": 1.395334155687981e-06, | |
| "loss": 0.0396, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.3618739903069468, | |
| "grad_norm": 0.5046530996438253, | |
| "learning_rate": 1.382679812285287e-06, | |
| "loss": 0.0359, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.3651050080775444, | |
| "grad_norm": 0.636616018716514, | |
| "learning_rate": 1.3700611408163158e-06, | |
| "loss": 0.0364, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.368336025848142, | |
| "grad_norm": 0.8016482324734947, | |
| "learning_rate": 1.357478544148943e-06, | |
| "loss": 0.0548, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.37156704361874, | |
| "grad_norm": 0.7335385001009505, | |
| "learning_rate": 1.3449324239993094e-06, | |
| "loss": 0.0307, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.3747980613893376, | |
| "grad_norm": 0.5405071350061603, | |
| "learning_rate": 1.3324231809189985e-06, | |
| "loss": 0.0435, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.3780290791599354, | |
| "grad_norm": 0.5968950584538801, | |
| "learning_rate": 1.3199512142822374e-06, | |
| "loss": 0.0442, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.381260096930533, | |
| "grad_norm": 0.5586370191823509, | |
| "learning_rate": 1.3075169222731573e-06, | |
| "loss": 0.034, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.384491114701131, | |
| "grad_norm": 0.6462203526051947, | |
| "learning_rate": 1.2951207018730772e-06, | |
| "loss": 0.0591, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.3877221324717286, | |
| "grad_norm": 0.4892957539044203, | |
| "learning_rate": 1.2827629488478254e-06, | |
| "loss": 0.0345, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.3909531502423262, | |
| "grad_norm": 0.543453607383533, | |
| "learning_rate": 1.270444057735113e-06, | |
| "loss": 0.0275, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.394184168012924, | |
| "grad_norm": 0.6522770107038732, | |
| "learning_rate": 1.25816442183193e-06, | |
| "loss": 0.0446, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.3974151857835218, | |
| "grad_norm": 0.54367348046309, | |
| "learning_rate": 1.2459244331819912e-06, | |
| "loss": 0.0381, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.4006462035541196, | |
| "grad_norm": 0.6775444066593574, | |
| "learning_rate": 1.2337244825632217e-06, | |
| "loss": 0.0507, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.4038772213247173, | |
| "grad_norm": 0.7125046807656238, | |
| "learning_rate": 1.2215649594752782e-06, | |
| "loss": 0.0564, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.4071082390953151, | |
| "grad_norm": 0.6675281925628074, | |
| "learning_rate": 1.2094462521271156e-06, | |
| "loss": 0.0337, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.4103392568659128, | |
| "grad_norm": 0.5401273336581622, | |
| "learning_rate": 1.197368747424592e-06, | |
| "loss": 0.0398, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.4135702746365104, | |
| "grad_norm": 0.7065350638753161, | |
| "learning_rate": 1.1853328309581139e-06, | |
| "loss": 0.0435, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.4168012924071083, | |
| "grad_norm": 0.7095730881777876, | |
| "learning_rate": 1.17333888699033e-06, | |
| "loss": 0.0297, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.420032310177706, | |
| "grad_norm": 0.8419216282460489, | |
| "learning_rate": 1.161387298443863e-06, | |
| "loss": 0.0516, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.4232633279483038, | |
| "grad_norm": 0.6570671193798376, | |
| "learning_rate": 1.149478446889077e-06, | |
| "loss": 0.0499, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.4264943457189014, | |
| "grad_norm": 0.4516949531869673, | |
| "learning_rate": 1.1376127125319065e-06, | |
| "loss": 0.0332, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.4297253634894993, | |
| "grad_norm": 0.6804270748390296, | |
| "learning_rate": 1.125790474201708e-06, | |
| "loss": 0.0554, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.432956381260097, | |
| "grad_norm": 0.6237302316612817, | |
| "learning_rate": 1.1140121093391736e-06, | |
| "loss": 0.0406, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.4361873990306946, | |
| "grad_norm": 0.5492979077070937, | |
| "learning_rate": 1.1022779939842704e-06, | |
| "loss": 0.0359, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.4394184168012925, | |
| "grad_norm": 0.5435741772086434, | |
| "learning_rate": 1.0905885027642484e-06, | |
| "loss": 0.0524, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.44264943457189, | |
| "grad_norm": 0.6801306673227177, | |
| "learning_rate": 1.0789440088816666e-06, | |
| "loss": 0.0385, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.445880452342488, | |
| "grad_norm": 0.4434075942240935, | |
| "learning_rate": 1.0673448841024875e-06, | |
| "loss": 0.048, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.4491114701130856, | |
| "grad_norm": 0.6540515881701509, | |
| "learning_rate": 1.0557914987442048e-06, | |
| "loss": 0.0471, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.4523424878836835, | |
| "grad_norm": 0.6361305209643866, | |
| "learning_rate": 1.0442842216640168e-06, | |
| "loss": 0.0388, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.4555735056542811, | |
| "grad_norm": 0.5896519670655204, | |
| "learning_rate": 1.0328234202470574e-06, | |
| "loss": 0.0466, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.4588045234248788, | |
| "grad_norm": 0.49886003535682916, | |
| "learning_rate": 1.021409460394663e-06, | |
| "loss": 0.024, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.4620355411954766, | |
| "grad_norm": 0.4865979088073283, | |
| "learning_rate": 1.0100427065126874e-06, | |
| "loss": 0.0461, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.4652665589660743, | |
| "grad_norm": 0.6304447558055624, | |
| "learning_rate": 9.987235214998741e-07, | |
| "loss": 0.0303, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.468497576736672, | |
| "grad_norm": 0.5998572755132743, | |
| "learning_rate": 9.87452266736266e-07, | |
| "loss": 0.0413, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.4717285945072698, | |
| "grad_norm": 0.475261837629553, | |
| "learning_rate": 9.762293020716696e-07, | |
| "loss": 0.051, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.4749596122778676, | |
| "grad_norm": 0.5717008410694144, | |
| "learning_rate": 9.650549858141646e-07, | |
| "loss": 0.0325, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.4781906300484653, | |
| "grad_norm": 0.5180031308985593, | |
| "learning_rate": 9.53929674718668e-07, | |
| "loss": 0.0296, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.481421647819063, | |
| "grad_norm": 0.5815736580452745, | |
| "learning_rate": 9.428537239755381e-07, | |
| "loss": 0.0462, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.4846526655896608, | |
| "grad_norm": 0.6313068151131468, | |
| "learning_rate": 9.318274871992408e-07, | |
| "loss": 0.0726, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.4878836833602584, | |
| "grad_norm": 1.221847458822884, | |
| "learning_rate": 9.208513164170579e-07, | |
| "loss": 0.0405, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.491114701130856, | |
| "grad_norm": 0.5310642493149078, | |
| "learning_rate": 9.099255620578451e-07, | |
| "loss": 0.0296, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.494345718901454, | |
| "grad_norm": 0.5177627615454572, | |
| "learning_rate": 8.990505729408494e-07, | |
| "loss": 0.0651, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.4975767366720518, | |
| "grad_norm": 0.6991644226087359, | |
| "learning_rate": 8.882266962645695e-07, | |
| "loss": 0.0403, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.5008077544426495, | |
| "grad_norm": 0.5744181895807364, | |
| "learning_rate": 8.774542775956679e-07, | |
| "loss": 0.0512, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.504038772213247, | |
| "grad_norm": 0.5233586593423929, | |
| "learning_rate": 8.667336608579488e-07, | |
| "loss": 0.0408, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.507269789983845, | |
| "grad_norm": 0.569083608134158, | |
| "learning_rate": 8.560651883213633e-07, | |
| "loss": 0.0325, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.5105008077544426, | |
| "grad_norm": 0.5065403358211885, | |
| "learning_rate": 8.454492005910942e-07, | |
| "loss": 0.039, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.5137318255250403, | |
| "grad_norm": 0.5538367181328561, | |
| "learning_rate": 8.34886036596676e-07, | |
| "loss": 0.0461, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.5169628432956381, | |
| "grad_norm": 0.5214647871849324, | |
| "learning_rate": 8.243760335811734e-07, | |
| "loss": 0.0284, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.520193861066236, | |
| "grad_norm": 0.6202345630142632, | |
| "learning_rate": 8.139195270904182e-07, | |
| "loss": 0.0448, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.5234248788368336, | |
| "grad_norm": 0.596159859282425, | |
| "learning_rate": 8.035168509622948e-07, | |
| "loss": 0.0743, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.5266558966074313, | |
| "grad_norm": 0.5888141535757658, | |
| "learning_rate": 7.931683373160789e-07, | |
| "loss": 0.0379, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.5298869143780292, | |
| "grad_norm": 0.7088883925452755, | |
| "learning_rate": 7.828743165418393e-07, | |
| "loss": 0.0676, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.5331179321486268, | |
| "grad_norm": 0.7275077844858919, | |
| "learning_rate": 7.726351172898869e-07, | |
| "loss": 0.0372, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.5363489499192244, | |
| "grad_norm": 0.4568256055898612, | |
| "learning_rate": 7.624510664602819e-07, | |
| "loss": 0.037, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.5395799676898223, | |
| "grad_norm": 0.5900479097481395, | |
| "learning_rate": 7.523224891923983e-07, | |
| "loss": 0.0335, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.5428109854604202, | |
| "grad_norm": 0.5945624497711766, | |
| "learning_rate": 7.422497088545436e-07, | |
| "loss": 0.0473, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.5460420032310178, | |
| "grad_norm": 0.6053475731541442, | |
| "learning_rate": 7.322330470336314e-07, | |
| "loss": 0.0355, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.5492730210016155, | |
| "grad_norm": 0.5924482508513196, | |
| "learning_rate": 7.222728235249196e-07, | |
| "loss": 0.0395, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.5525040387722133, | |
| "grad_norm": 0.6450291567303781, | |
| "learning_rate": 7.123693563217978e-07, | |
| "loss": 0.0597, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.555735056542811, | |
| "grad_norm": 0.8328909187805514, | |
| "learning_rate": 7.025229616056326e-07, | |
| "loss": 0.0472, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.5589660743134086, | |
| "grad_norm": 0.6806153349540757, | |
| "learning_rate": 6.927339537356778e-07, | |
| "loss": 0.0484, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.5621970920840065, | |
| "grad_norm": 0.6602101596852485, | |
| "learning_rate": 6.830026452390354e-07, | |
| "loss": 0.0477, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.5654281098546043, | |
| "grad_norm": 0.5937543973198223, | |
| "learning_rate": 6.733293468006774e-07, | |
| "loss": 0.042, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.568659127625202, | |
| "grad_norm": 0.6210912711298314, | |
| "learning_rate": 6.637143672535282e-07, | |
| "loss": 0.0411, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.5718901453957996, | |
| "grad_norm": 0.563220202424066, | |
| "learning_rate": 6.541580135686046e-07, | |
| "loss": 0.0287, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.5751211631663975, | |
| "grad_norm": 0.4909721841694075, | |
| "learning_rate": 6.446605908452122e-07, | |
| "loss": 0.0341, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.5783521809369951, | |
| "grad_norm": 0.6948201563174986, | |
| "learning_rate": 6.352224023012096e-07, | |
| "loss": 0.046, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.5815831987075928, | |
| "grad_norm": 0.46668729207255394, | |
| "learning_rate": 6.258437492633254e-07, | |
| "loss": 0.0597, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.5848142164781907, | |
| "grad_norm": 0.7349838782964371, | |
| "learning_rate": 6.165249311575361e-07, | |
| "loss": 0.047, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.5880452342487885, | |
| "grad_norm": 0.6329932642072914, | |
| "learning_rate": 6.072662454995101e-07, | |
| "loss": 0.0402, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.5912762520193862, | |
| "grad_norm": 1.1075017060647618, | |
| "learning_rate": 5.980679878851076e-07, | |
| "loss": 0.0644, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.5945072697899838, | |
| "grad_norm": 0.6709210038908787, | |
| "learning_rate": 5.889304519809402e-07, | |
| "loss": 0.0405, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.5977382875605817, | |
| "grad_norm": 0.6039830090199084, | |
| "learning_rate": 5.798539295150027e-07, | |
| "loss": 0.0515, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.6009693053311793, | |
| "grad_norm": 0.47306097411821496, | |
| "learning_rate": 5.708387102673507e-07, | |
| "loss": 0.0452, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.604200323101777, | |
| "grad_norm": 0.7299041964334542, | |
| "learning_rate": 5.618850820608548e-07, | |
| "loss": 0.0386, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.6074313408723748, | |
| "grad_norm": 0.527708763111458, | |
| "learning_rate": 5.529933307520102e-07, | |
| "loss": 0.037, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.6106623586429727, | |
| "grad_norm": 0.6276271955216034, | |
| "learning_rate": 5.441637402218077e-07, | |
| "loss": 0.0566, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.6138933764135701, | |
| "grad_norm": 0.5498853546865076, | |
| "learning_rate": 5.353965923666743e-07, | |
| "loss": 0.0373, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.617124394184168, | |
| "grad_norm": 0.6058328263346706, | |
| "learning_rate": 5.26692167089472e-07, | |
| "loss": 0.0269, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.6203554119547658, | |
| "grad_norm": 0.4158541379346223, | |
| "learning_rate": 5.180507422905585e-07, | |
| "loss": 0.0393, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.6235864297253635, | |
| "grad_norm": 0.5063280186973653, | |
| "learning_rate": 5.094725938589193e-07, | |
| "loss": 0.0299, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.6268174474959611, | |
| "grad_norm": 0.42841992008265245, | |
| "learning_rate": 5.009579956633578e-07, | |
| "loss": 0.0301, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.630048465266559, | |
| "grad_norm": 0.7945718206145054, | |
| "learning_rate": 4.925072195437511e-07, | |
| "loss": 0.0488, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.6332794830371569, | |
| "grad_norm": 0.507794598401741, | |
| "learning_rate": 4.841205353023715e-07, | |
| "loss": 0.0617, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.6365105008077543, | |
| "grad_norm": 0.5915868711150505, | |
| "learning_rate": 4.757982106952735e-07, | |
| "loss": 0.0357, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.6397415185783522, | |
| "grad_norm": 0.5886571451007635, | |
| "learning_rate": 4.6754051142374275e-07, | |
| "loss": 0.0383, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.64297253634895, | |
| "grad_norm": 0.7114424474339083, | |
| "learning_rate": 4.5934770112581713e-07, | |
| "loss": 0.0299, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.6462035541195477, | |
| "grad_norm": 0.4631064089704488, | |
| "learning_rate": 4.512200413678672e-07, | |
| "loss": 0.04, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.6494345718901453, | |
| "grad_norm": 0.4403449444039548, | |
| "learning_rate": 4.4315779163624476e-07, | |
| "loss": 0.0329, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.6526655896607432, | |
| "grad_norm": 0.5420749756119222, | |
| "learning_rate": 4.351612093290006e-07, | |
| "loss": 0.0423, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.655896607431341, | |
| "grad_norm": 0.6010914237823689, | |
| "learning_rate": 4.2723054974766585e-07, | |
| "loss": 0.0381, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.6591276252019385, | |
| "grad_norm": 0.7377468114121554, | |
| "learning_rate": 4.1936606608909887e-07, | |
| "loss": 0.0515, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.6623586429725363, | |
| "grad_norm": 0.3875573660249705, | |
| "learning_rate": 4.115680094374075e-07, | |
| "loss": 0.0375, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.6655896607431342, | |
| "grad_norm": 0.605903732271668, | |
| "learning_rate": 4.038366287559245e-07, | |
| "loss": 0.0359, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.6688206785137318, | |
| "grad_norm": 0.5676552999273701, | |
| "learning_rate": 3.961721708792662e-07, | |
| "loss": 0.0352, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.6720516962843295, | |
| "grad_norm": 0.49258629677672283, | |
| "learning_rate": 3.8857488050544903e-07, | |
| "loss": 0.0427, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.6752827140549273, | |
| "grad_norm": 0.6179633227247718, | |
| "learning_rate": 3.8104500018807806e-07, | |
| "loss": 0.0436, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.678513731825525, | |
| "grad_norm": 0.6093007583811607, | |
| "learning_rate": 3.7358277032860016e-07, | |
| "loss": 0.0285, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.6817447495961226, | |
| "grad_norm": 0.5650933346174037, | |
| "learning_rate": 3.6618842916863377e-07, | |
| "loss": 0.062, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.6849757673667205, | |
| "grad_norm": 0.938334017983649, | |
| "learning_rate": 3.5886221278236045e-07, | |
| "loss": 0.0533, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.6882067851373184, | |
| "grad_norm": 0.5004878719352018, | |
| "learning_rate": 3.5160435506898514e-07, | |
| "loss": 0.0357, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.691437802907916, | |
| "grad_norm": 0.5331645289198377, | |
| "learning_rate": 3.4441508774527345e-07, | |
| "loss": 0.049, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.6946688206785137, | |
| "grad_norm": 0.6457764842086132, | |
| "learning_rate": 3.3729464033815077e-07, | |
| "loss": 0.0428, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.6978998384491115, | |
| "grad_norm": 0.5850353162840484, | |
| "learning_rate": 3.3024324017737555e-07, | |
| "loss": 0.0353, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.7011308562197092, | |
| "grad_norm": 0.5309296730801445, | |
| "learning_rate": 3.232611123882809e-07, | |
| "loss": 0.0423, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.7043618739903068, | |
| "grad_norm": 0.5673210523979683, | |
| "learning_rate": 3.163484798845862e-07, | |
| "loss": 0.0426, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.7075928917609047, | |
| "grad_norm": 0.6324074485656012, | |
| "learning_rate": 3.0950556336128255e-07, | |
| "loss": 0.0306, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.7108239095315025, | |
| "grad_norm": 0.44362940233898746, | |
| "learning_rate": 3.0273258128758585e-07, | |
| "loss": 0.0356, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.7140549273021002, | |
| "grad_norm": 0.6204310781031833, | |
| "learning_rate": 2.960297498999601e-07, | |
| "loss": 0.0375, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.7172859450726978, | |
| "grad_norm": 0.5062348215221578, | |
| "learning_rate": 2.893972831952166e-07, | |
| "loss": 0.0345, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.7205169628432957, | |
| "grad_norm": 0.5125908206118261, | |
| "learning_rate": 2.82835392923681e-07, | |
| "loss": 0.0284, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.7237479806138933, | |
| "grad_norm": 0.4768669694675723, | |
| "learning_rate": 2.7634428858242995e-07, | |
| "loss": 0.0466, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.726978998384491, | |
| "grad_norm": 0.6399021669002876, | |
| "learning_rate": 2.699241774086081e-07, | |
| "loss": 0.0561, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.7302100161550888, | |
| "grad_norm": 0.6404329572646896, | |
| "learning_rate": 2.6357526437280764e-07, | |
| "loss": 0.0322, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.7334410339256867, | |
| "grad_norm": 0.594233151602921, | |
| "learning_rate": 2.572977521725242e-07, | |
| "loss": 0.0343, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.7366720516962844, | |
| "grad_norm": 0.6970207221495152, | |
| "learning_rate": 2.5109184122568797e-07, | |
| "loss": 0.0676, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.739903069466882, | |
| "grad_norm": 0.5196706248162594, | |
| "learning_rate": 2.449577296642647e-07, | |
| "loss": 0.0306, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.7431340872374799, | |
| "grad_norm": 0.4727031774493694, | |
| "learning_rate": 2.388956133279266e-07, | |
| "loss": 0.0318, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.7463651050080775, | |
| "grad_norm": 0.5101584744111383, | |
| "learning_rate": 2.329056857578049e-07, | |
| "loss": 0.0475, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.7495961227786752, | |
| "grad_norm": 0.47288331779975473, | |
| "learning_rate": 2.2698813819030802e-07, | |
| "loss": 0.0349, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.752827140549273, | |
| "grad_norm": 0.5306522527872187, | |
| "learning_rate": 2.2114315955101495e-07, | |
| "loss": 0.0404, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.7560581583198709, | |
| "grad_norm": 0.6700784665030379, | |
| "learning_rate": 2.153709364486467e-07, | |
| "loss": 0.0535, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.7592891760904685, | |
| "grad_norm": 0.5532660459489857, | |
| "learning_rate": 2.0967165316910675e-07, | |
| "loss": 0.0362, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.7625201938610662, | |
| "grad_norm": 0.5690415151498087, | |
| "learning_rate": 2.040454916695972e-07, | |
| "loss": 0.0416, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.765751211631664, | |
| "grad_norm": 0.581032791692991, | |
| "learning_rate": 1.9849263157281057e-07, | |
| "loss": 0.0489, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.7689822294022617, | |
| "grad_norm": 0.5121050632247067, | |
| "learning_rate": 1.9301325016119338e-07, | |
| "loss": 0.0326, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.7722132471728593, | |
| "grad_norm": 0.4844742553742546, | |
| "learning_rate": 1.8760752237128864e-07, | |
| "loss": 0.0495, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.7754442649434572, | |
| "grad_norm": 0.6114242878876927, | |
| "learning_rate": 1.8227562078814903e-07, | |
| "loss": 0.0332, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.778675282714055, | |
| "grad_norm": 0.5118426883532984, | |
| "learning_rate": 1.7701771563982757e-07, | |
| "loss": 0.0369, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.7819063004846527, | |
| "grad_norm": 0.4094343968094117, | |
| "learning_rate": 1.7183397479194175e-07, | |
| "loss": 0.0449, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.7851373182552503, | |
| "grad_norm": 1.1161654981261413, | |
| "learning_rate": 1.667245637423162e-07, | |
| "loss": 0.0461, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.7883683360258482, | |
| "grad_norm": 0.5663414514839669, | |
| "learning_rate": 1.6168964561569716e-07, | |
| "loss": 0.0341, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.7915993537964459, | |
| "grad_norm": 0.5593424866536396, | |
| "learning_rate": 1.5672938115854546e-07, | |
| "loss": 0.0415, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.7948303715670435, | |
| "grad_norm": 0.6317941412989588, | |
| "learning_rate": 1.5184392873390463e-07, | |
| "loss": 0.0393, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.7980613893376414, | |
| "grad_norm": 0.4921503369201142, | |
| "learning_rate": 1.470334443163432e-07, | |
| "loss": 0.0338, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.8012924071082392, | |
| "grad_norm": 0.45350637218576434, | |
| "learning_rate": 1.4229808148697732e-07, | |
| "loss": 0.0307, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.8045234248788369, | |
| "grad_norm": 0.5558616504683307, | |
| "learning_rate": 1.3763799142856693e-07, | |
| "loss": 0.0394, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.8077544426494345, | |
| "grad_norm": 0.5602513622205471, | |
| "learning_rate": 1.3305332292068706e-07, | |
| "loss": 0.0461, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.8109854604200324, | |
| "grad_norm": 0.4909817316983247, | |
| "learning_rate": 1.285442223349806e-07, | |
| "loss": 0.0391, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.81421647819063, | |
| "grad_norm": 0.6251726656596086, | |
| "learning_rate": 1.2411083363048386e-07, | |
| "loss": 0.0384, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.8174474959612277, | |
| "grad_norm": 0.4604846376779599, | |
| "learning_rate": 1.1975329834903017e-07, | |
| "loss": 0.0271, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.8206785137318255, | |
| "grad_norm": 0.5538018269789088, | |
| "learning_rate": 1.1547175561073154e-07, | |
| "loss": 0.0606, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.8239095315024234, | |
| "grad_norm": 0.716319643356168, | |
| "learning_rate": 1.1126634210953751e-07, | |
| "loss": 0.0455, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.827140549273021, | |
| "grad_norm": 0.5640740061628162, | |
| "learning_rate": 1.071371921088693e-07, | |
| "loss": 0.0243, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.8303715670436187, | |
| "grad_norm": 0.31893744573237565, | |
| "learning_rate": 1.0308443743733548e-07, | |
| "loss": 0.0239, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.8336025848142166, | |
| "grad_norm": 0.5743187176077259, | |
| "learning_rate": 9.91082074845215e-08, | |
| "loss": 0.0399, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.8368336025848142, | |
| "grad_norm": 0.555173849824351, | |
| "learning_rate": 9.520862919685903e-08, | |
| "loss": 0.0565, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.8400646203554119, | |
| "grad_norm": 0.5247191749483705, | |
| "learning_rate": 9.138582707357429e-08, | |
| "loss": 0.0317, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.8432956381260097, | |
| "grad_norm": 0.6462019405718445, | |
| "learning_rate": 8.763992316271175e-08, | |
| "loss": 0.0443, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.8465266558966076, | |
| "grad_norm": 0.5199310882556472, | |
| "learning_rate": 8.397103705723774e-08, | |
| "loss": 0.0496, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.849757673667205, | |
| "grad_norm": 0.5797413283647508, | |
| "learning_rate": 8.037928589122306e-08, | |
| "loss": 0.0362, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.8529886914378029, | |
| "grad_norm": 0.5916717165607657, | |
| "learning_rate": 7.686478433610339e-08, | |
| "loss": 0.036, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.8562197092084007, | |
| "grad_norm": 0.6872719525671662, | |
| "learning_rate": 7.342764459701723e-08, | |
| "loss": 0.0433, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.8594507269789984, | |
| "grad_norm": 0.5719774164295217, | |
| "learning_rate": 7.006797640922436e-08, | |
| "loss": 0.045, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.862681744749596, | |
| "grad_norm": 0.4607000516397503, | |
| "learning_rate": 6.678588703460165e-08, | |
| "loss": 0.0308, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.865912762520194, | |
| "grad_norm": 0.47224203890648525, | |
| "learning_rate": 6.358148125822e-08, | |
| "loss": 0.0263, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.8691437802907918, | |
| "grad_norm": 0.4659556647523011, | |
| "learning_rate": 6.045486138499756e-08, | |
| "loss": 0.0331, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.8723747980613892, | |
| "grad_norm": 0.48850435913378176, | |
| "learning_rate": 5.7406127236434016e-08, | |
| "loss": 0.0385, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.875605815831987, | |
| "grad_norm": 0.527755833512547, | |
| "learning_rate": 5.4435376147423945e-08, | |
| "loss": 0.0361, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.878836833602585, | |
| "grad_norm": 0.5906290141554674, | |
| "learning_rate": 5.154270296314878e-08, | |
| "loss": 0.0341, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.8820678513731826, | |
| "grad_norm": 0.5773632179611871, | |
| "learning_rate": 4.872820003604922e-08, | |
| "loss": 0.0436, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.8852988691437802, | |
| "grad_norm": 0.5457918200937637, | |
| "learning_rate": 4.599195722287536e-08, | |
| "loss": 0.0316, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.888529886914378, | |
| "grad_norm": 0.48490285444923714, | |
| "learning_rate": 4.3334061881820934e-08, | |
| "loss": 0.0367, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.891760904684976, | |
| "grad_norm": 0.6250593429748459, | |
| "learning_rate": 4.0754598869730824e-08, | |
| "loss": 0.048, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.8949919224555734, | |
| "grad_norm": 0.7932835837622026, | |
| "learning_rate": 3.825365053939406e-08, | |
| "loss": 0.0444, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.8982229402261712, | |
| "grad_norm": 0.694824220848745, | |
| "learning_rate": 3.583129673691427e-08, | |
| "loss": 0.046, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.901453957996769, | |
| "grad_norm": 0.6400347943270668, | |
| "learning_rate": 3.3487614799159186e-08, | |
| "loss": 0.0447, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.9046849757673667, | |
| "grad_norm": 0.6428387216930922, | |
| "learning_rate": 3.1222679551293486e-08, | |
| "loss": 0.0378, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.9079159935379644, | |
| "grad_norm": 0.5181562999964454, | |
| "learning_rate": 2.9036563304389032e-08, | |
| "loss": 0.0447, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.9111470113085622, | |
| "grad_norm": 0.626996440091958, | |
| "learning_rate": 2.6929335853115302e-08, | |
| "loss": 0.0549, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.9143780290791599, | |
| "grad_norm": 0.46884001067775155, | |
| "learning_rate": 2.490106447351315e-08, | |
| "loss": 0.0277, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.9176090468497575, | |
| "grad_norm": 0.6158232854353491, | |
| "learning_rate": 2.295181392084511e-08, | |
| "loss": 0.0601, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.9208400646203554, | |
| "grad_norm": 0.9359381094535966, | |
| "learning_rate": 2.1081646427528468e-08, | |
| "loss": 0.0318, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.9240710823909533, | |
| "grad_norm": 0.49020001937141494, | |
| "learning_rate": 1.9290621701149315e-08, | |
| "loss": 0.0426, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.927302100161551, | |
| "grad_norm": 0.6612576122540997, | |
| "learning_rate": 1.757879692255493e-08, | |
| "loss": 0.0425, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.9305331179321485, | |
| "grad_norm": 0.6686158894140516, | |
| "learning_rate": 1.5946226744029402e-08, | |
| "loss": 0.0513, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.9337641357027464, | |
| "grad_norm": 0.570200319629059, | |
| "learning_rate": 1.43929632875478e-08, | |
| "loss": 0.0433, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.936995153473344, | |
| "grad_norm": 0.7735949555352855, | |
| "learning_rate": 1.2919056143113062e-08, | |
| "loss": 0.0626, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.9402261712439417, | |
| "grad_norm": 0.870645903135215, | |
| "learning_rate": 1.1524552367171705e-08, | |
| "loss": 0.0325, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.9434571890145396, | |
| "grad_norm": 0.7457181182787564, | |
| "learning_rate": 1.0209496481112247e-08, | |
| "loss": 0.0429, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.9466882067851374, | |
| "grad_norm": 0.4589602142690698, | |
| "learning_rate": 8.973930469844127e-09, | |
| "loss": 0.0456, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.949919224555735, | |
| "grad_norm": 0.6365759321501641, | |
| "learning_rate": 7.81789378045572e-09, | |
| "loss": 0.0444, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.9531502423263327, | |
| "grad_norm": 0.5685501801373803, | |
| "learning_rate": 6.741423320957286e-09, | |
| "loss": 0.0389, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.9563812600969306, | |
| "grad_norm": 0.6248328079624079, | |
| "learning_rate": 5.7445534591002435e-09, | |
| "loss": 0.0359, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.9596122778675282, | |
| "grad_norm": 0.5081816835827963, | |
| "learning_rate": 4.8273160212811145e-09, | |
| "loss": 0.0372, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.9628432956381259, | |
| "grad_norm": 0.6052821334268623, | |
| "learning_rate": 3.989740291526212e-09, | |
| "loss": 0.0342, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.9660743134087237, | |
| "grad_norm": 0.6069914651621039, | |
| "learning_rate": 3.2318530105546198e-09, | |
| "loss": 0.0562, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.9693053311793216, | |
| "grad_norm": 0.6247816362826526, | |
| "learning_rate": 2.553678374926649e-09, | |
| "loss": 0.0345, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.9725363489499192, | |
| "grad_norm": 0.4803333487137274, | |
| "learning_rate": 1.9552380362697355e-09, | |
| "loss": 0.0389, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.975767366720517, | |
| "grad_norm": 0.4064456733315355, | |
| "learning_rate": 1.4365511005878796e-09, | |
| "loss": 0.038, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.9789983844911148, | |
| "grad_norm": 0.6627884418012588, | |
| "learning_rate": 9.976341276521361e-10, | |
| "loss": 0.0499, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.9822294022617124, | |
| "grad_norm": 0.5188386174906152, | |
| "learning_rate": 6.385011304704814e-10, | |
| "loss": 0.0504, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.98546042003231, | |
| "grad_norm": 0.4923517024917629, | |
| "learning_rate": 3.59163574841781e-10, | |
| "loss": 0.0313, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.988691437802908, | |
| "grad_norm": 1.0015315278245285, | |
| "learning_rate": 1.5963037898913957e-10, | |
| "loss": 0.0512, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.9919224555735058, | |
| "grad_norm": 0.6617805237667193, | |
| "learning_rate": 3.9907913275683e-11, | |
| "loss": 0.0487, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.9951534733441034, | |
| "grad_norm": 0.5338181066973476, | |
| "learning_rate": 0.0, | |
| "loss": 0.0393, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.9951534733441034, | |
| "step": 618, | |
| "total_flos": 52320870432768.0, | |
| "train_loss": 0.06418457114014135, | |
| "train_runtime": 2400.3631, | |
| "train_samples_per_second": 4.126, | |
| "train_steps_per_second": 0.257 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 618, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 52320870432768.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |