| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9997159897756319, | |
| "eval_steps": 500, | |
| "global_step": 880, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.001136040897472309, | |
| "grad_norm": 6.713736329359423, | |
| "learning_rate": 1.1363636363636364e-07, | |
| "loss": 0.2307, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.002272081794944618, | |
| "grad_norm": 6.425061477822205, | |
| "learning_rate": 2.2727272727272729e-07, | |
| "loss": 0.2478, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.003408122692416927, | |
| "grad_norm": 6.49997931766097, | |
| "learning_rate": 3.409090909090909e-07, | |
| "loss": 0.2494, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.004544163589889236, | |
| "grad_norm": 6.438878495171851, | |
| "learning_rate": 4.5454545454545457e-07, | |
| "loss": 0.2252, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.005680204487361545, | |
| "grad_norm": 6.0504803408240395, | |
| "learning_rate": 5.681818181818182e-07, | |
| "loss": 0.2356, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.006816245384833854, | |
| "grad_norm": 4.3484407974076635, | |
| "learning_rate": 6.818181818181818e-07, | |
| "loss": 0.2422, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.007952286282306162, | |
| "grad_norm": 4.171396401412328, | |
| "learning_rate": 7.954545454545455e-07, | |
| "loss": 0.2181, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.009088327179778472, | |
| "grad_norm": 3.4177520767635032, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.2267, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.010224368077250781, | |
| "grad_norm": 3.2064592077436522, | |
| "learning_rate": 1.0227272727272729e-06, | |
| "loss": 0.2149, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.01136040897472309, | |
| "grad_norm": 2.430289501917744, | |
| "learning_rate": 1.1363636363636364e-06, | |
| "loss": 0.1955, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.012496449872195399, | |
| "grad_norm": 2.3200048473028634, | |
| "learning_rate": 1.25e-06, | |
| "loss": 0.1946, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.013632490769667709, | |
| "grad_norm": 2.0881837538285373, | |
| "learning_rate": 1.3636363636363636e-06, | |
| "loss": 0.1971, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.014768531667140017, | |
| "grad_norm": 1.9540608830360986, | |
| "learning_rate": 1.4772727272727275e-06, | |
| "loss": 0.186, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.015904572564612324, | |
| "grad_norm": 2.4445533261203884, | |
| "learning_rate": 1.590909090909091e-06, | |
| "loss": 0.1605, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.017040613462084634, | |
| "grad_norm": 2.288800657192205, | |
| "learning_rate": 1.7045454545454546e-06, | |
| "loss": 0.1659, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.018176654359556944, | |
| "grad_norm": 1.9509519762495295, | |
| "learning_rate": 1.8181818181818183e-06, | |
| "loss": 0.1618, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.019312695257029253, | |
| "grad_norm": 1.592909275487469, | |
| "learning_rate": 1.931818181818182e-06, | |
| "loss": 0.1642, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.020448736154501563, | |
| "grad_norm": 1.400794979169675, | |
| "learning_rate": 2.0454545454545457e-06, | |
| "loss": 0.1545, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.021584777051973873, | |
| "grad_norm": 1.3975579360742723, | |
| "learning_rate": 2.1590909090909092e-06, | |
| "loss": 0.1451, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.02272081794944618, | |
| "grad_norm": 1.6759990274286927, | |
| "learning_rate": 2.2727272727272728e-06, | |
| "loss": 0.1531, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02385685884691849, | |
| "grad_norm": 1.6129060518400373, | |
| "learning_rate": 2.3863636363636367e-06, | |
| "loss": 0.1777, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.024992899744390798, | |
| "grad_norm": 1.4028499504088532, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.147, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.026128940641863108, | |
| "grad_norm": 1.319887691895735, | |
| "learning_rate": 2.6136363636363637e-06, | |
| "loss": 0.1418, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.027264981539335417, | |
| "grad_norm": 1.2184018909974619, | |
| "learning_rate": 2.7272727272727272e-06, | |
| "loss": 0.1493, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.028401022436807723, | |
| "grad_norm": 1.202619579827782, | |
| "learning_rate": 2.8409090909090916e-06, | |
| "loss": 0.1379, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.029537063334280033, | |
| "grad_norm": 1.0605303626457392, | |
| "learning_rate": 2.954545454545455e-06, | |
| "loss": 0.1293, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.030673104231752343, | |
| "grad_norm": 1.054431016591944, | |
| "learning_rate": 3.0681818181818186e-06, | |
| "loss": 0.118, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.03180914512922465, | |
| "grad_norm": 1.0923689246123935, | |
| "learning_rate": 3.181818181818182e-06, | |
| "loss": 0.1244, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.03294518602669696, | |
| "grad_norm": 1.1788052159109477, | |
| "learning_rate": 3.2954545454545456e-06, | |
| "loss": 0.133, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.03408122692416927, | |
| "grad_norm": 1.273315281082378, | |
| "learning_rate": 3.409090909090909e-06, | |
| "loss": 0.1443, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03521726782164158, | |
| "grad_norm": 1.1149014713257173, | |
| "learning_rate": 3.522727272727273e-06, | |
| "loss": 0.1259, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.03635330871911389, | |
| "grad_norm": 1.1519957043635232, | |
| "learning_rate": 3.6363636363636366e-06, | |
| "loss": 0.1254, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0374893496165862, | |
| "grad_norm": 1.0308432957952973, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "loss": 0.13, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03862539051405851, | |
| "grad_norm": 1.100380112079228, | |
| "learning_rate": 3.863636363636364e-06, | |
| "loss": 0.126, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.039761431411530816, | |
| "grad_norm": 1.206728692480485, | |
| "learning_rate": 3.9772727272727275e-06, | |
| "loss": 0.1377, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.040897472309003126, | |
| "grad_norm": 1.0754244396472252, | |
| "learning_rate": 4.0909090909090915e-06, | |
| "loss": 0.1239, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.042033513206475435, | |
| "grad_norm": 0.991175124338083, | |
| "learning_rate": 4.204545454545455e-06, | |
| "loss": 0.1208, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.043169554103947745, | |
| "grad_norm": 0.9863230162147564, | |
| "learning_rate": 4.3181818181818185e-06, | |
| "loss": 0.1236, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.04430559500142005, | |
| "grad_norm": 0.9747917351402464, | |
| "learning_rate": 4.4318181818181824e-06, | |
| "loss": 0.1141, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.04544163589889236, | |
| "grad_norm": 0.9526188744768924, | |
| "learning_rate": 4.5454545454545455e-06, | |
| "loss": 0.1123, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04657767679636467, | |
| "grad_norm": 1.1038102555895053, | |
| "learning_rate": 4.6590909090909095e-06, | |
| "loss": 0.1127, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.04771371769383698, | |
| "grad_norm": 0.9623246058987326, | |
| "learning_rate": 4.772727272727273e-06, | |
| "loss": 0.1095, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.048849758591309286, | |
| "grad_norm": 0.995620380472554, | |
| "learning_rate": 4.8863636363636365e-06, | |
| "loss": 0.1136, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.049985799488781596, | |
| "grad_norm": 1.1922367869339325, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1111, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.051121840386253906, | |
| "grad_norm": 1.0650960783331873, | |
| "learning_rate": 4.999982347887264e-06, | |
| "loss": 0.1182, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.052257881283726215, | |
| "grad_norm": 1.097485598148884, | |
| "learning_rate": 4.9999293917983325e-06, | |
| "loss": 0.1166, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.053393922181198525, | |
| "grad_norm": 1.223556692747266, | |
| "learning_rate": 4.999841132481035e-06, | |
| "loss": 0.1135, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.054529963078670834, | |
| "grad_norm": 0.8885931835747594, | |
| "learning_rate": 4.999717571181742e-06, | |
| "loss": 0.1067, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.055666003976143144, | |
| "grad_norm": 0.8978133832333574, | |
| "learning_rate": 4.999558709645349e-06, | |
| "loss": 0.1062, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.05680204487361545, | |
| "grad_norm": 1.167148679651936, | |
| "learning_rate": 4.9993645501152485e-06, | |
| "loss": 0.1126, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.057938085771087756, | |
| "grad_norm": 0.8981796373126424, | |
| "learning_rate": 4.999135095333301e-06, | |
| "loss": 0.1059, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.059074126668560066, | |
| "grad_norm": 0.9930855230943375, | |
| "learning_rate": 4.998870348539797e-06, | |
| "loss": 0.1274, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.060210167566032376, | |
| "grad_norm": 1.098950668279268, | |
| "learning_rate": 4.998570313473408e-06, | |
| "loss": 0.1116, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.061346208463504685, | |
| "grad_norm": 0.9697083904957693, | |
| "learning_rate": 4.998234994371135e-06, | |
| "loss": 0.1051, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.062482249360976995, | |
| "grad_norm": 1.0880364629521353, | |
| "learning_rate": 4.997864395968252e-06, | |
| "loss": 0.112, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.0636182902584493, | |
| "grad_norm": 1.054159789623682, | |
| "learning_rate": 4.997458523498236e-06, | |
| "loss": 0.1146, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.06475433115592161, | |
| "grad_norm": 1.0291021807953078, | |
| "learning_rate": 4.99701738269269e-06, | |
| "loss": 0.1042, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.06589037205339392, | |
| "grad_norm": 1.0281626755590443, | |
| "learning_rate": 4.996540979781269e-06, | |
| "loss": 0.1219, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.06702641295086623, | |
| "grad_norm": 1.1105171034316133, | |
| "learning_rate": 4.996029321491587e-06, | |
| "loss": 0.1216, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.06816245384833854, | |
| "grad_norm": 1.034096902228184, | |
| "learning_rate": 4.995482415049123e-06, | |
| "loss": 0.1051, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06929849474581085, | |
| "grad_norm": 0.889629538614507, | |
| "learning_rate": 4.994900268177121e-06, | |
| "loss": 0.1046, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.07043453564328316, | |
| "grad_norm": 0.9495477142186882, | |
| "learning_rate": 4.99428288909648e-06, | |
| "loss": 0.1018, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.07157057654075547, | |
| "grad_norm": 0.8696989475429725, | |
| "learning_rate": 4.993630286525634e-06, | |
| "loss": 0.1062, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.07270661743822777, | |
| "grad_norm": 0.9504849462967013, | |
| "learning_rate": 4.992942469680437e-06, | |
| "loss": 0.1099, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.07384265833570008, | |
| "grad_norm": 0.9799914580295991, | |
| "learning_rate": 4.992219448274022e-06, | |
| "loss": 0.1076, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.0749786992331724, | |
| "grad_norm": 0.9059465840472486, | |
| "learning_rate": 4.991461232516675e-06, | |
| "loss": 0.0946, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0761147401306447, | |
| "grad_norm": 0.8918398442966295, | |
| "learning_rate": 4.990667833115684e-06, | |
| "loss": 0.1118, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.07725078102811701, | |
| "grad_norm": 1.00335150985273, | |
| "learning_rate": 4.989839261275191e-06, | |
| "loss": 0.1126, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.07838682192558932, | |
| "grad_norm": 1.019420038485551, | |
| "learning_rate": 4.988975528696028e-06, | |
| "loss": 0.1072, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.07952286282306163, | |
| "grad_norm": 0.9291595432428256, | |
| "learning_rate": 4.988076647575562e-06, | |
| "loss": 0.1045, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08065890372053394, | |
| "grad_norm": 1.0634383716539064, | |
| "learning_rate": 4.98714263060751e-06, | |
| "loss": 0.1089, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.08179494461800625, | |
| "grad_norm": 1.0060661248475429, | |
| "learning_rate": 4.986173490981773e-06, | |
| "loss": 0.0987, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.08293098551547856, | |
| "grad_norm": 1.106124562082597, | |
| "learning_rate": 4.9851692423842406e-06, | |
| "loss": 0.1157, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.08406702641295087, | |
| "grad_norm": 0.9260108438954108, | |
| "learning_rate": 4.984129898996599e-06, | |
| "loss": 0.0975, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.08520306731042318, | |
| "grad_norm": 1.0266696611897657, | |
| "learning_rate": 4.983055475496134e-06, | |
| "loss": 0.0987, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.08633910820789549, | |
| "grad_norm": 0.8699860068340203, | |
| "learning_rate": 4.981945987055521e-06, | |
| "loss": 0.101, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.0874751491053678, | |
| "grad_norm": 1.0778844945975843, | |
| "learning_rate": 4.9808014493426124e-06, | |
| "loss": 0.1122, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.0886111900028401, | |
| "grad_norm": 0.9013095921740779, | |
| "learning_rate": 4.979621878520217e-06, | |
| "loss": 0.111, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0897472309003124, | |
| "grad_norm": 0.9663627554637914, | |
| "learning_rate": 4.978407291245866e-06, | |
| "loss": 0.0949, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.09088327179778471, | |
| "grad_norm": 0.8666375684207491, | |
| "learning_rate": 4.977157704671585e-06, | |
| "loss": 0.0945, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09201931269525702, | |
| "grad_norm": 0.9535753414488521, | |
| "learning_rate": 4.975873136443649e-06, | |
| "loss": 0.1015, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.09315535359272933, | |
| "grad_norm": 1.0742330242309999, | |
| "learning_rate": 4.974553604702332e-06, | |
| "loss": 0.0949, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.09429139449020164, | |
| "grad_norm": 1.0596668250713723, | |
| "learning_rate": 4.9731991280816534e-06, | |
| "loss": 0.1159, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.09542743538767395, | |
| "grad_norm": 0.9500662434392095, | |
| "learning_rate": 4.971809725709112e-06, | |
| "loss": 0.1006, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.09656347628514626, | |
| "grad_norm": 1.0736109125994615, | |
| "learning_rate": 4.970385417205418e-06, | |
| "loss": 0.1024, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.09769951718261857, | |
| "grad_norm": 0.8026706228357128, | |
| "learning_rate": 4.968926222684213e-06, | |
| "loss": 0.0908, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.09883555808009088, | |
| "grad_norm": 0.9508589273601138, | |
| "learning_rate": 4.967432162751792e-06, | |
| "loss": 0.0919, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.09997159897756319, | |
| "grad_norm": 0.927289333391325, | |
| "learning_rate": 4.965903258506806e-06, | |
| "loss": 0.0953, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.1011076398750355, | |
| "grad_norm": 0.9403610446156513, | |
| "learning_rate": 4.964339531539967e-06, | |
| "loss": 0.0976, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.10224368077250781, | |
| "grad_norm": 0.9342072851864264, | |
| "learning_rate": 4.9627410039337426e-06, | |
| "loss": 0.0967, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.10337972166998012, | |
| "grad_norm": 0.8774156748861563, | |
| "learning_rate": 4.9611076982620445e-06, | |
| "loss": 0.1033, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.10451576256745243, | |
| "grad_norm": 0.8318162500391886, | |
| "learning_rate": 4.959439637589909e-06, | |
| "loss": 0.1018, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.10565180346492474, | |
| "grad_norm": 0.8899888321299209, | |
| "learning_rate": 4.957736845473173e-06, | |
| "loss": 0.0941, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.10678784436239705, | |
| "grad_norm": 0.789140699658634, | |
| "learning_rate": 4.9559993459581375e-06, | |
| "loss": 0.1047, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.10792388525986936, | |
| "grad_norm": 0.8800492952518975, | |
| "learning_rate": 4.954227163581234e-06, | |
| "loss": 0.1028, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.10905992615734167, | |
| "grad_norm": 0.796766656234688, | |
| "learning_rate": 4.952420323368673e-06, | |
| "loss": 0.0942, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.11019596705481398, | |
| "grad_norm": 0.8613544374379629, | |
| "learning_rate": 4.950578850836092e-06, | |
| "loss": 0.1073, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.11133200795228629, | |
| "grad_norm": 0.8623738224609795, | |
| "learning_rate": 4.948702771988195e-06, | |
| "loss": 0.1004, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.1124680488497586, | |
| "grad_norm": 0.8419305666625628, | |
| "learning_rate": 4.9467921133183864e-06, | |
| "loss": 0.1007, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.1136040897472309, | |
| "grad_norm": 0.8733459281176695, | |
| "learning_rate": 4.944846901808397e-06, | |
| "loss": 0.0964, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1147401306447032, | |
| "grad_norm": 0.8126576547036249, | |
| "learning_rate": 4.942867164927899e-06, | |
| "loss": 0.1115, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.11587617154217551, | |
| "grad_norm": 0.912151238889085, | |
| "learning_rate": 4.940852930634126e-06, | |
| "loss": 0.0974, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.11701221243964782, | |
| "grad_norm": 0.8379629358991061, | |
| "learning_rate": 4.938804227371467e-06, | |
| "loss": 0.0949, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.11814825333712013, | |
| "grad_norm": 0.8825318553017338, | |
| "learning_rate": 4.936721084071079e-06, | |
| "loss": 0.0995, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.11928429423459244, | |
| "grad_norm": 0.7887136184663985, | |
| "learning_rate": 4.9346035301504644e-06, | |
| "loss": 0.0856, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.12042033513206475, | |
| "grad_norm": 0.8694351555924742, | |
| "learning_rate": 4.932451595513063e-06, | |
| "loss": 0.0972, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.12155637602953706, | |
| "grad_norm": 0.8953649088172692, | |
| "learning_rate": 4.930265310547829e-06, | |
| "loss": 0.0997, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.12269241692700937, | |
| "grad_norm": 0.8607731259738107, | |
| "learning_rate": 4.928044706128803e-06, | |
| "loss": 0.0977, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.12382845782448168, | |
| "grad_norm": 0.8777824068539876, | |
| "learning_rate": 4.92578981361467e-06, | |
| "loss": 0.1021, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.12496449872195399, | |
| "grad_norm": 0.8266603261517322, | |
| "learning_rate": 4.923500664848327e-06, | |
| "loss": 0.1006, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1261005396194263, | |
| "grad_norm": 0.8118026763757383, | |
| "learning_rate": 4.9211772921564205e-06, | |
| "loss": 0.0924, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.1272365805168986, | |
| "grad_norm": 0.8366614115506429, | |
| "learning_rate": 4.918819728348901e-06, | |
| "loss": 0.0823, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.12837262141437092, | |
| "grad_norm": 0.898715874913154, | |
| "learning_rate": 4.916428006718555e-06, | |
| "loss": 0.1027, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.12950866231184321, | |
| "grad_norm": 0.8937215198706769, | |
| "learning_rate": 4.9140021610405335e-06, | |
| "loss": 0.0783, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.13064470320931554, | |
| "grad_norm": 0.8093914274815599, | |
| "learning_rate": 4.911542225571877e-06, | |
| "loss": 0.1034, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.13178074410678783, | |
| "grad_norm": 0.9187764615250489, | |
| "learning_rate": 4.909048235051033e-06, | |
| "loss": 0.0929, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.13291678500426016, | |
| "grad_norm": 0.9645110159367966, | |
| "learning_rate": 4.906520224697364e-06, | |
| "loss": 0.1025, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.13405282590173245, | |
| "grad_norm": 0.8397494780778292, | |
| "learning_rate": 4.903958230210647e-06, | |
| "loss": 0.0902, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.13518886679920478, | |
| "grad_norm": 0.9810615634775558, | |
| "learning_rate": 4.901362287770576e-06, | |
| "loss": 0.1, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.13632490769667707, | |
| "grad_norm": 0.8518935095057704, | |
| "learning_rate": 4.8987324340362445e-06, | |
| "loss": 0.0932, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1374609485941494, | |
| "grad_norm": 0.8809729982496561, | |
| "learning_rate": 4.896068706145632e-06, | |
| "loss": 0.0915, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.1385969894916217, | |
| "grad_norm": 1.0731776458211226, | |
| "learning_rate": 4.89337114171508e-06, | |
| "loss": 0.098, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.13973303038909402, | |
| "grad_norm": 0.887795496144111, | |
| "learning_rate": 4.890639778838757e-06, | |
| "loss": 0.0927, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.1408690712865663, | |
| "grad_norm": 0.8194626039786459, | |
| "learning_rate": 4.887874656088124e-06, | |
| "loss": 0.1027, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.14200511218403863, | |
| "grad_norm": 0.8442429261448717, | |
| "learning_rate": 4.885075812511386e-06, | |
| "loss": 0.0961, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.14314115308151093, | |
| "grad_norm": 0.8395001231574464, | |
| "learning_rate": 4.882243287632947e-06, | |
| "loss": 0.1039, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.14427719397898325, | |
| "grad_norm": 0.8363673989585937, | |
| "learning_rate": 4.879377121452844e-06, | |
| "loss": 0.1006, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.14541323487645555, | |
| "grad_norm": 0.7397826384078707, | |
| "learning_rate": 4.8764773544461895e-06, | |
| "loss": 0.087, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.14654927577392787, | |
| "grad_norm": 0.8414577640896602, | |
| "learning_rate": 4.873544027562593e-06, | |
| "loss": 0.0891, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.14768531667140017, | |
| "grad_norm": 0.8972297713907886, | |
| "learning_rate": 4.8705771822255895e-06, | |
| "loss": 0.102, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1488213575688725, | |
| "grad_norm": 0.7843924774512786, | |
| "learning_rate": 4.867576860332048e-06, | |
| "loss": 0.0945, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.1499573984663448, | |
| "grad_norm": 0.8740179298084251, | |
| "learning_rate": 4.864543104251587e-06, | |
| "loss": 0.097, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.15109343936381708, | |
| "grad_norm": 0.9423786129930344, | |
| "learning_rate": 4.8614759568259685e-06, | |
| "loss": 0.0975, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.1522294802612894, | |
| "grad_norm": 0.7928906420896884, | |
| "learning_rate": 4.858375461368499e-06, | |
| "loss": 0.0906, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.1533655211587617, | |
| "grad_norm": 0.9074949015198895, | |
| "learning_rate": 4.855241661663413e-06, | |
| "loss": 0.0959, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.15450156205623403, | |
| "grad_norm": 0.8127866420512039, | |
| "learning_rate": 4.852074601965261e-06, | |
| "loss": 0.0939, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.15563760295370632, | |
| "grad_norm": 0.8076873863138903, | |
| "learning_rate": 4.848874326998279e-06, | |
| "loss": 0.0977, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.15677364385117865, | |
| "grad_norm": 0.8129270738261459, | |
| "learning_rate": 4.845640881955757e-06, | |
| "loss": 0.0978, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.15790968474865094, | |
| "grad_norm": 0.8098881301085323, | |
| "learning_rate": 4.842374312499405e-06, | |
| "loss": 0.0886, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.15904572564612326, | |
| "grad_norm": 0.7756679586652417, | |
| "learning_rate": 4.839074664758705e-06, | |
| "loss": 0.0894, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.16018176654359556, | |
| "grad_norm": 0.8739902401352373, | |
| "learning_rate": 4.835741985330259e-06, | |
| "loss": 0.0915, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.16131780744106788, | |
| "grad_norm": 0.9169987881610346, | |
| "learning_rate": 4.832376321277136e-06, | |
| "loss": 0.0914, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.16245384833854018, | |
| "grad_norm": 0.8037543620615127, | |
| "learning_rate": 4.828977720128198e-06, | |
| "loss": 0.0876, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1635898892360125, | |
| "grad_norm": 0.7992352843679545, | |
| "learning_rate": 4.825546229877439e-06, | |
| "loss": 0.0877, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.1647259301334848, | |
| "grad_norm": 1.0614030660867448, | |
| "learning_rate": 4.822081898983302e-06, | |
| "loss": 0.0941, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.16586197103095712, | |
| "grad_norm": 0.7970505919422197, | |
| "learning_rate": 4.818584776367992e-06, | |
| "loss": 0.0837, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.16699801192842942, | |
| "grad_norm": 0.8532009414479299, | |
| "learning_rate": 4.815054911416795e-06, | |
| "loss": 0.0966, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.16813405282590174, | |
| "grad_norm": 0.8955310251194117, | |
| "learning_rate": 4.811492353977366e-06, | |
| "loss": 0.0969, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.16927009372337404, | |
| "grad_norm": 0.8081962759766569, | |
| "learning_rate": 4.80789715435904e-06, | |
| "loss": 0.0953, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.17040613462084636, | |
| "grad_norm": 0.9554413544294639, | |
| "learning_rate": 4.804269363332112e-06, | |
| "loss": 0.0937, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.17154217551831866, | |
| "grad_norm": 0.8121987703021468, | |
| "learning_rate": 4.800609032127123e-06, | |
| "loss": 0.0853, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.17267821641579098, | |
| "grad_norm": 0.8917282328521219, | |
| "learning_rate": 4.7969162124341354e-06, | |
| "loss": 0.0934, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.17381425731326328, | |
| "grad_norm": 0.8287357539581232, | |
| "learning_rate": 4.793190956402005e-06, | |
| "loss": 0.0991, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.1749502982107356, | |
| "grad_norm": 0.7980156222346841, | |
| "learning_rate": 4.789433316637644e-06, | |
| "loss": 0.0876, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.1760863391082079, | |
| "grad_norm": 0.9256751077111557, | |
| "learning_rate": 4.785643346205277e-06, | |
| "loss": 0.0959, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.1772223800056802, | |
| "grad_norm": 0.8519817551622527, | |
| "learning_rate": 4.781821098625691e-06, | |
| "loss": 0.0885, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.17835842090315251, | |
| "grad_norm": 1.029546823822168, | |
| "learning_rate": 4.777966627875484e-06, | |
| "loss": 0.0979, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1794944618006248, | |
| "grad_norm": 0.8471825516389808, | |
| "learning_rate": 4.7740799883862966e-06, | |
| "loss": 0.1109, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.18063050269809713, | |
| "grad_norm": 0.8232766246509707, | |
| "learning_rate": 4.770161235044047e-06, | |
| "loss": 0.0906, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.18176654359556943, | |
| "grad_norm": 0.7744605824026991, | |
| "learning_rate": 4.766210423188158e-06, | |
| "loss": 0.0865, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.18290258449304175, | |
| "grad_norm": 0.8240753157263881, | |
| "learning_rate": 4.7622276086107685e-06, | |
| "loss": 0.0929, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.18403862539051405, | |
| "grad_norm": 1.0242521571922516, | |
| "learning_rate": 4.758212847555953e-06, | |
| "loss": 0.0868, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.18517466628798637, | |
| "grad_norm": 0.7246225269895837, | |
| "learning_rate": 4.7541661967189225e-06, | |
| "loss": 0.0828, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.18631070718545867, | |
| "grad_norm": 0.8504247119017272, | |
| "learning_rate": 4.750087713245227e-06, | |
| "loss": 0.0925, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.187446748082931, | |
| "grad_norm": 1.0186333734231328, | |
| "learning_rate": 4.745977454729947e-06, | |
| "loss": 0.0858, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.1885827889804033, | |
| "grad_norm": 0.847067525360349, | |
| "learning_rate": 4.74183547921688e-06, | |
| "loss": 0.092, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1897188298778756, | |
| "grad_norm": 0.8536347476054195, | |
| "learning_rate": 4.7376618451977195e-06, | |
| "loss": 0.0972, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.1908548707753479, | |
| "grad_norm": 0.7658131109439913, | |
| "learning_rate": 4.733456611611233e-06, | |
| "loss": 0.0915, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.19199091167282023, | |
| "grad_norm": 0.8219276405969249, | |
| "learning_rate": 4.729219837842427e-06, | |
| "loss": 0.095, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.19312695257029253, | |
| "grad_norm": 0.7164304793044781, | |
| "learning_rate": 4.7249515837217075e-06, | |
| "loss": 0.083, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.19426299346776485, | |
| "grad_norm": 0.6962094522936668, | |
| "learning_rate": 4.720651909524037e-06, | |
| "loss": 0.0813, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.19539903436523715, | |
| "grad_norm": 0.7811734598092144, | |
| "learning_rate": 4.716320875968081e-06, | |
| "loss": 0.0914, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.19653507526270947, | |
| "grad_norm": 0.8570333273830322, | |
| "learning_rate": 4.711958544215355e-06, | |
| "loss": 0.0915, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.19767111616018176, | |
| "grad_norm": 0.8036691820553741, | |
| "learning_rate": 4.707564975869357e-06, | |
| "loss": 0.0986, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.1988071570576541, | |
| "grad_norm": 0.7930663707472128, | |
| "learning_rate": 4.703140232974697e-06, | |
| "loss": 0.088, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.19994319795512638, | |
| "grad_norm": 0.7944858503238109, | |
| "learning_rate": 4.698684378016223e-06, | |
| "loss": 0.0797, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.2010792388525987, | |
| "grad_norm": 0.7708906880680786, | |
| "learning_rate": 4.694197473918139e-06, | |
| "loss": 0.0891, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.202215279750071, | |
| "grad_norm": 0.7294391552603106, | |
| "learning_rate": 4.6896795840431155e-06, | |
| "loss": 0.0836, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.2033513206475433, | |
| "grad_norm": 0.7440970510492587, | |
| "learning_rate": 4.685130772191392e-06, | |
| "loss": 0.0975, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.20448736154501562, | |
| "grad_norm": 0.7924536059191131, | |
| "learning_rate": 4.680551102599881e-06, | |
| "loss": 0.0885, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.20562340244248792, | |
| "grad_norm": 0.7596047990400485, | |
| "learning_rate": 4.675940639941256e-06, | |
| "loss": 0.1072, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.20675944333996024, | |
| "grad_norm": 0.9330680024460848, | |
| "learning_rate": 4.671299449323045e-06, | |
| "loss": 0.086, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.20789548423743254, | |
| "grad_norm": 0.8217560932912764, | |
| "learning_rate": 4.666627596286702e-06, | |
| "loss": 0.0969, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.20903152513490486, | |
| "grad_norm": 0.8223504953929077, | |
| "learning_rate": 4.66192514680669e-06, | |
| "loss": 0.0875, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.21016756603237716, | |
| "grad_norm": 0.8436510077788845, | |
| "learning_rate": 4.657192167289542e-06, | |
| "loss": 0.1025, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.21130360692984948, | |
| "grad_norm": 0.7432485961316236, | |
| "learning_rate": 4.652428724572929e-06, | |
| "loss": 0.0898, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.21243964782732178, | |
| "grad_norm": 0.8263428490118125, | |
| "learning_rate": 4.647634885924713e-06, | |
| "loss": 0.0914, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.2135756887247941, | |
| "grad_norm": 0.7006252126044292, | |
| "learning_rate": 4.642810719041999e-06, | |
| "loss": 0.0769, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.2147117296222664, | |
| "grad_norm": 0.7046589326705855, | |
| "learning_rate": 4.637956292050176e-06, | |
| "loss": 0.084, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.21584777051973872, | |
| "grad_norm": 0.7611962581820568, | |
| "learning_rate": 4.63307167350196e-06, | |
| "loss": 0.0876, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.21698381141721101, | |
| "grad_norm": 0.7833240772094479, | |
| "learning_rate": 4.628156932376419e-06, | |
| "loss": 0.0873, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.21811985231468334, | |
| "grad_norm": 0.7377511037071964, | |
| "learning_rate": 4.623212138078004e-06, | |
| "loss": 0.0945, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.21925589321215563, | |
| "grad_norm": 0.8377382456599733, | |
| "learning_rate": 4.61823736043557e-06, | |
| "loss": 0.0887, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.22039193410962796, | |
| "grad_norm": 0.8116009894693861, | |
| "learning_rate": 4.613232669701384e-06, | |
| "loss": 0.1012, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.22152797500710025, | |
| "grad_norm": 0.7422452860486192, | |
| "learning_rate": 4.60819813655014e-06, | |
| "loss": 0.0838, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.22266401590457258, | |
| "grad_norm": 0.7828779245377948, | |
| "learning_rate": 4.603133832077953e-06, | |
| "loss": 0.09, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.22380005680204487, | |
| "grad_norm": 0.7827054714406323, | |
| "learning_rate": 4.598039827801364e-06, | |
| "loss": 0.0826, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.2249360976995172, | |
| "grad_norm": 0.7486696522988175, | |
| "learning_rate": 4.592916195656322e-06, | |
| "loss": 0.0776, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.2260721385969895, | |
| "grad_norm": 0.86990777525941, | |
| "learning_rate": 4.587763007997173e-06, | |
| "loss": 0.0862, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.2272081794944618, | |
| "grad_norm": 0.8814024837126304, | |
| "learning_rate": 4.582580337595636e-06, | |
| "loss": 0.0916, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2283442203919341, | |
| "grad_norm": 0.7612514997934728, | |
| "learning_rate": 4.577368257639778e-06, | |
| "loss": 0.0883, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.2294802612894064, | |
| "grad_norm": 0.8819736494589635, | |
| "learning_rate": 4.572126841732977e-06, | |
| "loss": 0.0906, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.23061630218687873, | |
| "grad_norm": 0.8685465339651437, | |
| "learning_rate": 4.566856163892884e-06, | |
| "loss": 0.0888, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.23175234308435103, | |
| "grad_norm": 0.8923434811809764, | |
| "learning_rate": 4.561556298550379e-06, | |
| "loss": 0.0841, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.23288838398182335, | |
| "grad_norm": 0.8683618736939278, | |
| "learning_rate": 4.556227320548519e-06, | |
| "loss": 0.0848, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.23402442487929564, | |
| "grad_norm": 0.8133017920049119, | |
| "learning_rate": 4.550869305141478e-06, | |
| "loss": 0.1032, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.23516046577676797, | |
| "grad_norm": 1.0077751828505999, | |
| "learning_rate": 4.5454823279934924e-06, | |
| "loss": 0.0939, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.23629650667424026, | |
| "grad_norm": 0.8941773669569191, | |
| "learning_rate": 4.5400664651777835e-06, | |
| "loss": 0.0736, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.2374325475717126, | |
| "grad_norm": 0.9625846478259737, | |
| "learning_rate": 4.534621793175488e-06, | |
| "loss": 0.0786, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.23856858846918488, | |
| "grad_norm": 0.8867810392749393, | |
| "learning_rate": 4.529148388874577e-06, | |
| "loss": 0.0882, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2397046293666572, | |
| "grad_norm": 0.8856076921546335, | |
| "learning_rate": 4.523646329568771e-06, | |
| "loss": 0.084, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2408406702641295, | |
| "grad_norm": 0.9900335853980446, | |
| "learning_rate": 4.518115692956445e-06, | |
| "loss": 0.0882, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.24197671116160183, | |
| "grad_norm": 0.8154742650873136, | |
| "learning_rate": 4.512556557139538e-06, | |
| "loss": 0.0999, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.24311275205907412, | |
| "grad_norm": 1.0349253631685067, | |
| "learning_rate": 4.506969000622443e-06, | |
| "loss": 0.0914, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.24424879295654645, | |
| "grad_norm": 0.8592000952917487, | |
| "learning_rate": 4.501353102310901e-06, | |
| "loss": 0.0892, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.24538483385401874, | |
| "grad_norm": 0.7760366725602739, | |
| "learning_rate": 4.49570894151089e-06, | |
| "loss": 0.0854, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.24652087475149106, | |
| "grad_norm": 0.8943917514409343, | |
| "learning_rate": 4.490036597927499e-06, | |
| "loss": 0.0894, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.24765691564896336, | |
| "grad_norm": 0.8522941541639423, | |
| "learning_rate": 4.484336151663807e-06, | |
| "loss": 0.1003, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.24879295654643568, | |
| "grad_norm": 0.8967087502728155, | |
| "learning_rate": 4.47860768321975e-06, | |
| "loss": 0.0948, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.24992899744390798, | |
| "grad_norm": 0.7189838300322855, | |
| "learning_rate": 4.472851273490985e-06, | |
| "loss": 0.098, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.2510650383413803, | |
| "grad_norm": 0.7078624951530635, | |
| "learning_rate": 4.467067003767745e-06, | |
| "loss": 0.0829, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.2522010792388526, | |
| "grad_norm": 0.7348078666330551, | |
| "learning_rate": 4.4612549557336975e-06, | |
| "loss": 0.0854, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.2533371201363249, | |
| "grad_norm": 0.7586832543393554, | |
| "learning_rate": 4.455415211464783e-06, | |
| "loss": 0.0871, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.2544731610337972, | |
| "grad_norm": 0.7930770514967493, | |
| "learning_rate": 4.449547853428061e-06, | |
| "loss": 0.0953, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.2556092019312695, | |
| "grad_norm": 0.7534958358029722, | |
| "learning_rate": 4.443652964480544e-06, | |
| "loss": 0.0917, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.25674524282874184, | |
| "grad_norm": 0.760763727741596, | |
| "learning_rate": 4.437730627868028e-06, | |
| "loss": 0.0842, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.25788128372621416, | |
| "grad_norm": 0.6899506937858009, | |
| "learning_rate": 4.4317809272239145e-06, | |
| "loss": 0.0835, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.25901732462368643, | |
| "grad_norm": 0.8301938179486986, | |
| "learning_rate": 4.425803946568033e-06, | |
| "loss": 0.0837, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.26015336552115875, | |
| "grad_norm": 0.806195952391347, | |
| "learning_rate": 4.419799770305453e-06, | |
| "loss": 0.0939, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.2612894064186311, | |
| "grad_norm": 0.7874803780113002, | |
| "learning_rate": 4.413768483225292e-06, | |
| "loss": 0.0927, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.2624254473161034, | |
| "grad_norm": 0.778443263649366, | |
| "learning_rate": 4.407710170499517e-06, | |
| "loss": 0.0842, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.26356148821357567, | |
| "grad_norm": 0.7837747021199533, | |
| "learning_rate": 4.401624917681743e-06, | |
| "loss": 0.0932, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.264697529111048, | |
| "grad_norm": 0.8282894247426811, | |
| "learning_rate": 4.395512810706026e-06, | |
| "loss": 0.0869, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.2658335700085203, | |
| "grad_norm": 0.829901206986068, | |
| "learning_rate": 4.3893739358856465e-06, | |
| "loss": 0.0865, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.26696961090599264, | |
| "grad_norm": 0.7444682225944242, | |
| "learning_rate": 4.383208379911893e-06, | |
| "loss": 0.0897, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.2681056518034649, | |
| "grad_norm": 0.7012792726547673, | |
| "learning_rate": 4.377016229852836e-06, | |
| "loss": 0.086, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.26924169270093723, | |
| "grad_norm": 0.8500432213095706, | |
| "learning_rate": 4.370797573152101e-06, | |
| "loss": 0.0813, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.27037773359840955, | |
| "grad_norm": 0.7570382330116664, | |
| "learning_rate": 4.364552497627632e-06, | |
| "loss": 0.089, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.2715137744958819, | |
| "grad_norm": 0.8335036015037836, | |
| "learning_rate": 4.35828109147045e-06, | |
| "loss": 0.0851, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.27264981539335414, | |
| "grad_norm": 0.8054974855210708, | |
| "learning_rate": 4.3519834432434095e-06, | |
| "loss": 0.0896, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.27378585629082647, | |
| "grad_norm": 0.7073705410015672, | |
| "learning_rate": 4.345659641879948e-06, | |
| "loss": 0.0909, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.2749218971882988, | |
| "grad_norm": 0.8852333441267097, | |
| "learning_rate": 4.33930977668283e-06, | |
| "loss": 0.0927, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.2760579380857711, | |
| "grad_norm": 0.8425230580400942, | |
| "learning_rate": 4.332933937322883e-06, | |
| "loss": 0.0845, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.2771939789832434, | |
| "grad_norm": 0.8758257547967765, | |
| "learning_rate": 4.326532213837735e-06, | |
| "loss": 0.0956, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.2783300198807157, | |
| "grad_norm": 0.816120585942606, | |
| "learning_rate": 4.320104696630544e-06, | |
| "loss": 0.0847, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.27946606077818803, | |
| "grad_norm": 0.7280485699649355, | |
| "learning_rate": 4.3136514764687155e-06, | |
| "loss": 0.0835, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.2806021016756603, | |
| "grad_norm": 0.7641526018539088, | |
| "learning_rate": 4.3071726444826244e-06, | |
| "loss": 0.0737, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2817381425731326, | |
| "grad_norm": 0.696074958902299, | |
| "learning_rate": 4.300668292164329e-06, | |
| "loss": 0.0849, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.28287418347060495, | |
| "grad_norm": 0.7670647555778207, | |
| "learning_rate": 4.29413851136628e-06, | |
| "loss": 0.0923, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.28401022436807727, | |
| "grad_norm": 0.7773991448116256, | |
| "learning_rate": 4.287583394300016e-06, | |
| "loss": 0.09, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.28514626526554954, | |
| "grad_norm": 0.7573657225369796, | |
| "learning_rate": 4.28100303353487e-06, | |
| "loss": 0.0841, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.28628230616302186, | |
| "grad_norm": 0.7452647302721194, | |
| "learning_rate": 4.274397521996658e-06, | |
| "loss": 0.0822, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.2874183470604942, | |
| "grad_norm": 0.677199297450376, | |
| "learning_rate": 4.267766952966369e-06, | |
| "loss": 0.0786, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2885543879579665, | |
| "grad_norm": 0.8159060891220854, | |
| "learning_rate": 4.261111420078844e-06, | |
| "loss": 0.0855, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.2896904288554388, | |
| "grad_norm": 0.7560132821786525, | |
| "learning_rate": 4.2544310173214546e-06, | |
| "loss": 0.0896, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2908264697529111, | |
| "grad_norm": 0.73318307350379, | |
| "learning_rate": 4.247725839032781e-06, | |
| "loss": 0.0768, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.2919625106503834, | |
| "grad_norm": 0.6654523142661359, | |
| "learning_rate": 4.240995979901273e-06, | |
| "loss": 0.0788, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.29309855154785575, | |
| "grad_norm": 0.6869374811575061, | |
| "learning_rate": 4.234241534963916e-06, | |
| "loss": 0.0842, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.294234592445328, | |
| "grad_norm": 0.7030551454665738, | |
| "learning_rate": 4.227462599604889e-06, | |
| "loss": 0.0771, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.29537063334280034, | |
| "grad_norm": 0.758359859205596, | |
| "learning_rate": 4.220659269554217e-06, | |
| "loss": 0.0865, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.29650667424027266, | |
| "grad_norm": 0.7754592301638612, | |
| "learning_rate": 4.21383164088642e-06, | |
| "loss": 0.0871, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.297642715137745, | |
| "grad_norm": 0.6840112409540218, | |
| "learning_rate": 4.206979810019153e-06, | |
| "loss": 0.0773, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.29877875603521725, | |
| "grad_norm": 0.6848481267287457, | |
| "learning_rate": 4.20010387371185e-06, | |
| "loss": 0.0782, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2999147969326896, | |
| "grad_norm": 0.7061222716573403, | |
| "learning_rate": 4.1932039290643534e-06, | |
| "loss": 0.0777, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.3010508378301619, | |
| "grad_norm": 0.7448997259987848, | |
| "learning_rate": 4.186280073515543e-06, | |
| "loss": 0.0847, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.30218687872763417, | |
| "grad_norm": 0.7380145268386641, | |
| "learning_rate": 4.179332404841963e-06, | |
| "loss": 0.0713, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.3033229196251065, | |
| "grad_norm": 0.7441635416413879, | |
| "learning_rate": 4.172361021156436e-06, | |
| "loss": 0.0827, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.3044589605225788, | |
| "grad_norm": 0.7528609351049764, | |
| "learning_rate": 4.1653660209066835e-06, | |
| "loss": 0.0877, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.30559500142005114, | |
| "grad_norm": 0.8275998852209175, | |
| "learning_rate": 4.158347502873933e-06, | |
| "loss": 0.0804, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.3067310423175234, | |
| "grad_norm": 0.7437237554431169, | |
| "learning_rate": 4.151305566171521e-06, | |
| "loss": 0.0797, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.30786708321499573, | |
| "grad_norm": 0.8019081711906108, | |
| "learning_rate": 4.144240310243496e-06, | |
| "loss": 0.0841, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.30900312411246805, | |
| "grad_norm": 0.7291775823706078, | |
| "learning_rate": 4.137151834863213e-06, | |
| "loss": 0.0873, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.3101391650099404, | |
| "grad_norm": 0.7571411166672053, | |
| "learning_rate": 4.130040240131925e-06, | |
| "loss": 0.0845, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.31127520590741264, | |
| "grad_norm": 0.8067283136621284, | |
| "learning_rate": 4.122905626477371e-06, | |
| "loss": 0.0965, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.31241124680488497, | |
| "grad_norm": 0.7733213314503384, | |
| "learning_rate": 4.115748094652352e-06, | |
| "loss": 0.0783, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.3135472877023573, | |
| "grad_norm": 0.8024692635641963, | |
| "learning_rate": 4.108567745733318e-06, | |
| "loss": 0.0814, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.3146833285998296, | |
| "grad_norm": 0.9972625903967001, | |
| "learning_rate": 4.10136468111893e-06, | |
| "loss": 0.0814, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.3158193694973019, | |
| "grad_norm": 0.8366578826907879, | |
| "learning_rate": 4.094139002528635e-06, | |
| "loss": 0.0868, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.3169554103947742, | |
| "grad_norm": 0.6756245861915039, | |
| "learning_rate": 4.086890812001228e-06, | |
| "loss": 0.0815, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.31809145129224653, | |
| "grad_norm": 0.865443271304209, | |
| "learning_rate": 4.07962021189341e-06, | |
| "loss": 0.1015, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.31922749218971885, | |
| "grad_norm": 0.7942215693194814, | |
| "learning_rate": 4.0723273048783426e-06, | |
| "loss": 0.0824, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.3203635330871911, | |
| "grad_norm": 0.7941199716368488, | |
| "learning_rate": 4.065012193944201e-06, | |
| "loss": 0.0846, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.32149957398466344, | |
| "grad_norm": 0.7194857749286357, | |
| "learning_rate": 4.057674982392713e-06, | |
| "loss": 0.0851, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.32263561488213577, | |
| "grad_norm": 0.7058130985999059, | |
| "learning_rate": 4.050315773837708e-06, | |
| "loss": 0.0828, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.3237716557796081, | |
| "grad_norm": 0.8582057260744234, | |
| "learning_rate": 4.042934672203651e-06, | |
| "loss": 0.089, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.32490769667708036, | |
| "grad_norm": 0.8183999060210416, | |
| "learning_rate": 4.0355317817241705e-06, | |
| "loss": 0.0866, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.3260437375745527, | |
| "grad_norm": 0.8343617132623691, | |
| "learning_rate": 4.028107206940592e-06, | |
| "loss": 0.0853, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.327179778472025, | |
| "grad_norm": 0.718035241670308, | |
| "learning_rate": 4.020661052700462e-06, | |
| "loss": 0.0757, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.3283158193694973, | |
| "grad_norm": 0.736559511530064, | |
| "learning_rate": 4.013193424156062e-06, | |
| "loss": 0.0846, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.3294518602669696, | |
| "grad_norm": 0.7881644130060897, | |
| "learning_rate": 4.00570442676293e-06, | |
| "loss": 0.0801, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.3305879011644419, | |
| "grad_norm": 0.8339054711607803, | |
| "learning_rate": 3.9981941662783675e-06, | |
| "loss": 0.0832, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.33172394206191425, | |
| "grad_norm": 0.6576881392840723, | |
| "learning_rate": 3.990662748759946e-06, | |
| "loss": 0.0748, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.3328599829593865, | |
| "grad_norm": 0.7151497791042993, | |
| "learning_rate": 3.983110280564009e-06, | |
| "loss": 0.0788, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.33399602385685884, | |
| "grad_norm": 0.803862729134459, | |
| "learning_rate": 3.975536868344174e-06, | |
| "loss": 0.0856, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.33513206475433116, | |
| "grad_norm": 0.797700895368388, | |
| "learning_rate": 3.96794261904982e-06, | |
| "loss": 0.085, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.3362681056518035, | |
| "grad_norm": 0.7063931073253098, | |
| "learning_rate": 3.9603276399245864e-06, | |
| "loss": 0.0874, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.33740414654927575, | |
| "grad_norm": 0.8196874744289926, | |
| "learning_rate": 3.9526920385048465e-06, | |
| "loss": 0.0858, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.3385401874467481, | |
| "grad_norm": 0.6616725384562748, | |
| "learning_rate": 3.945035922618198e-06, | |
| "loss": 0.073, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.3396762283442204, | |
| "grad_norm": 0.7164040094642763, | |
| "learning_rate": 3.937359400381938e-06, | |
| "loss": 0.0756, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.3408122692416927, | |
| "grad_norm": 0.7262888078379157, | |
| "learning_rate": 3.929662580201536e-06, | |
| "loss": 0.0865, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.341948310139165, | |
| "grad_norm": 0.7363762038223817, | |
| "learning_rate": 3.9219455707691004e-06, | |
| "loss": 0.0811, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.3430843510366373, | |
| "grad_norm": 0.7072534911733872, | |
| "learning_rate": 3.91420848106185e-06, | |
| "loss": 0.0756, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.34422039193410964, | |
| "grad_norm": 0.6760504100426291, | |
| "learning_rate": 3.906451420340566e-06, | |
| "loss": 0.0768, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.34535643283158196, | |
| "grad_norm": 0.6941199007978212, | |
| "learning_rate": 3.898674498148058e-06, | |
| "loss": 0.0831, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.34649247372905423, | |
| "grad_norm": 0.7638852373461658, | |
| "learning_rate": 3.890877824307611e-06, | |
| "loss": 0.0837, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.34762851462652655, | |
| "grad_norm": 0.8602638611119576, | |
| "learning_rate": 3.883061508921439e-06, | |
| "loss": 0.0758, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.3487645555239989, | |
| "grad_norm": 0.7902801585559203, | |
| "learning_rate": 3.875225662369125e-06, | |
| "loss": 0.0852, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.3499005964214712, | |
| "grad_norm": 0.7206101025736867, | |
| "learning_rate": 3.8673703953060685e-06, | |
| "loss": 0.0805, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.35103663731894347, | |
| "grad_norm": 0.8588244629042325, | |
| "learning_rate": 3.859495818661914e-06, | |
| "loss": 0.0759, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.3521726782164158, | |
| "grad_norm": 0.8040786942739016, | |
| "learning_rate": 3.8516020436389945e-06, | |
| "loss": 0.0834, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.3533087191138881, | |
| "grad_norm": 0.7672683122350898, | |
| "learning_rate": 3.843689181710756e-06, | |
| "loss": 0.0826, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.3544447600113604, | |
| "grad_norm": 0.7239437740106832, | |
| "learning_rate": 3.835757344620183e-06, | |
| "loss": 0.0726, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.3555808009088327, | |
| "grad_norm": 0.707817772964158, | |
| "learning_rate": 3.827806644378221e-06, | |
| "loss": 0.0774, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.35671684180630503, | |
| "grad_norm": 0.7814771723692865, | |
| "learning_rate": 3.819837193262197e-06, | |
| "loss": 0.0816, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.35785288270377735, | |
| "grad_norm": 0.7018548527687214, | |
| "learning_rate": 3.811849103814229e-06, | |
| "loss": 0.0834, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.3589889236012496, | |
| "grad_norm": 0.7411725676109678, | |
| "learning_rate": 3.803842488839642e-06, | |
| "loss": 0.0745, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.36012496449872194, | |
| "grad_norm": 0.7575659597309948, | |
| "learning_rate": 3.795817461405372e-06, | |
| "loss": 0.0758, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.36126100539619427, | |
| "grad_norm": 0.7126296926711662, | |
| "learning_rate": 3.7877741348383703e-06, | |
| "loss": 0.0871, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.3623970462936666, | |
| "grad_norm": 0.7647606176666816, | |
| "learning_rate": 3.779712622724003e-06, | |
| "loss": 0.0813, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.36353308719113886, | |
| "grad_norm": 0.7394294466894481, | |
| "learning_rate": 3.7716330389044463e-06, | |
| "loss": 0.0781, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3646691280886112, | |
| "grad_norm": 0.7442083160908898, | |
| "learning_rate": 3.76353549747708e-06, | |
| "loss": 0.0868, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.3658051689860835, | |
| "grad_norm": 0.7080436398478851, | |
| "learning_rate": 3.7554201127928747e-06, | |
| "loss": 0.078, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.36694120988355583, | |
| "grad_norm": 0.6967853094410319, | |
| "learning_rate": 3.74728699945478e-06, | |
| "loss": 0.0803, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.3680772507810281, | |
| "grad_norm": 0.7828060588699257, | |
| "learning_rate": 3.739136272316102e-06, | |
| "loss": 0.0845, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.3692132916785004, | |
| "grad_norm": 0.7825472362344883, | |
| "learning_rate": 3.7309680464788835e-06, | |
| "loss": 0.0741, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.37034933257597275, | |
| "grad_norm": 0.7968905209840789, | |
| "learning_rate": 3.72278243729228e-06, | |
| "loss": 0.0801, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.37148537347344507, | |
| "grad_norm": 0.7412746573627451, | |
| "learning_rate": 3.7145795603509282e-06, | |
| "loss": 0.0814, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.37262141437091734, | |
| "grad_norm": 0.8150346697048992, | |
| "learning_rate": 3.706359531493316e-06, | |
| "loss": 0.0788, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.37375745526838966, | |
| "grad_norm": 0.7931274032845024, | |
| "learning_rate": 3.6981224668001427e-06, | |
| "loss": 0.0827, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.374893496165862, | |
| "grad_norm": 0.7843395757066731, | |
| "learning_rate": 3.6898684825926845e-06, | |
| "loss": 0.0743, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.3760295370633343, | |
| "grad_norm": 0.7346940737464841, | |
| "learning_rate": 3.681597695431149e-06, | |
| "loss": 0.0786, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.3771655779608066, | |
| "grad_norm": 0.8796304428964914, | |
| "learning_rate": 3.6733102221130303e-06, | |
| "loss": 0.0812, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.3783016188582789, | |
| "grad_norm": 0.6688582439986495, | |
| "learning_rate": 3.6650061796714597e-06, | |
| "loss": 0.073, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.3794376597557512, | |
| "grad_norm": 0.7754423124852141, | |
| "learning_rate": 3.656685685373552e-06, | |
| "loss": 0.0789, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.3805737006532235, | |
| "grad_norm": 0.7590512262588398, | |
| "learning_rate": 3.6483488567187473e-06, | |
| "loss": 0.0849, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.3817097415506958, | |
| "grad_norm": 0.781321406354808, | |
| "learning_rate": 3.6399958114371597e-06, | |
| "loss": 0.083, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.38284578244816814, | |
| "grad_norm": 0.692794052076511, | |
| "learning_rate": 3.631626667487906e-06, | |
| "loss": 0.0652, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.38398182334564046, | |
| "grad_norm": 0.7897147258296024, | |
| "learning_rate": 3.623241543057445e-06, | |
| "loss": 0.0802, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.38511786424311273, | |
| "grad_norm": 0.7425771430389372, | |
| "learning_rate": 3.614840556557905e-06, | |
| "loss": 0.0809, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.38625390514058505, | |
| "grad_norm": 0.7333094111793276, | |
| "learning_rate": 3.606423826625414e-06, | |
| "loss": 0.0674, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3873899460380574, | |
| "grad_norm": 0.7352398600810227, | |
| "learning_rate": 3.5979914721184263e-06, | |
| "loss": 0.0837, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.3885259869355297, | |
| "grad_norm": 0.8231872248613965, | |
| "learning_rate": 3.5895436121160388e-06, | |
| "loss": 0.0772, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.38966202783300197, | |
| "grad_norm": 0.7367149834483241, | |
| "learning_rate": 3.5810803659163136e-06, | |
| "loss": 0.0775, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.3907980687304743, | |
| "grad_norm": 0.7712185528181968, | |
| "learning_rate": 3.5726018530345913e-06, | |
| "loss": 0.0771, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3919341096279466, | |
| "grad_norm": 0.7715161299771612, | |
| "learning_rate": 3.564108193201804e-06, | |
| "loss": 0.0727, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.39307015052541894, | |
| "grad_norm": 0.8012727862178366, | |
| "learning_rate": 3.5555995063627842e-06, | |
| "loss": 0.0797, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3942061914228912, | |
| "grad_norm": 0.7337481236349752, | |
| "learning_rate": 3.5470759126745726e-06, | |
| "loss": 0.0798, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.39534223232036353, | |
| "grad_norm": 0.7350551910786791, | |
| "learning_rate": 3.5385375325047167e-06, | |
| "loss": 0.0875, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.39647827321783585, | |
| "grad_norm": 0.7048639304094766, | |
| "learning_rate": 3.5299844864295773e-06, | |
| "loss": 0.0754, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.3976143141153082, | |
| "grad_norm": 1.1334161441853856, | |
| "learning_rate": 3.5214168952326205e-06, | |
| "loss": 0.0746, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.39875035501278044, | |
| "grad_norm": 0.6989505797799814, | |
| "learning_rate": 3.5128348799027157e-06, | |
| "loss": 0.0747, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.39988639591025277, | |
| "grad_norm": 0.7187861112920556, | |
| "learning_rate": 3.5042385616324243e-06, | |
| "loss": 0.0825, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.4010224368077251, | |
| "grad_norm": 0.6972138154142447, | |
| "learning_rate": 3.4956280618162887e-06, | |
| "loss": 0.0779, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.4021584777051974, | |
| "grad_norm": 0.8929740592234533, | |
| "learning_rate": 3.4870035020491216e-06, | |
| "loss": 0.0887, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.4032945186026697, | |
| "grad_norm": 0.734887534070193, | |
| "learning_rate": 3.4783650041242823e-06, | |
| "loss": 0.0787, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.404430559500142, | |
| "grad_norm": 0.6976813270054868, | |
| "learning_rate": 3.469712690031962e-06, | |
| "loss": 0.0745, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.40556660039761433, | |
| "grad_norm": 0.8271656519707578, | |
| "learning_rate": 3.4610466819574617e-06, | |
| "loss": 0.0762, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.4067026412950866, | |
| "grad_norm": 0.7662647730775376, | |
| "learning_rate": 3.4523671022794612e-06, | |
| "loss": 0.0764, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.4078386821925589, | |
| "grad_norm": 0.8034388047927019, | |
| "learning_rate": 3.443674073568296e-06, | |
| "loss": 0.0851, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.40897472309003124, | |
| "grad_norm": 0.8132216242516986, | |
| "learning_rate": 3.4349677185842246e-06, | |
| "loss": 0.0717, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.41011076398750357, | |
| "grad_norm": 0.8537641846408835, | |
| "learning_rate": 3.4262481602756937e-06, | |
| "loss": 0.0687, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.41124680488497584, | |
| "grad_norm": 0.7776722907105323, | |
| "learning_rate": 3.4175155217776057e-06, | |
| "loss": 0.086, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.41238284578244816, | |
| "grad_norm": 0.7639319056207042, | |
| "learning_rate": 3.4087699264095746e-06, | |
| "loss": 0.0792, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.4135188866799205, | |
| "grad_norm": 0.9498236411990728, | |
| "learning_rate": 3.4000114976741905e-06, | |
| "loss": 0.0735, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.4146549275773928, | |
| "grad_norm": 0.8078922681417617, | |
| "learning_rate": 3.391240359255269e-06, | |
| "loss": 0.0859, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.4157909684748651, | |
| "grad_norm": 0.701305404529687, | |
| "learning_rate": 3.38245663501611e-06, | |
| "loss": 0.0754, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.4169270093723374, | |
| "grad_norm": 0.9535861270195332, | |
| "learning_rate": 3.3736604489977465e-06, | |
| "loss": 0.0844, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.4180630502698097, | |
| "grad_norm": 0.7631817995714191, | |
| "learning_rate": 3.3648519254171906e-06, | |
| "loss": 0.0685, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.41919909116728205, | |
| "grad_norm": 0.8104822580614385, | |
| "learning_rate": 3.3560311886656855e-06, | |
| "loss": 0.0855, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.4203351320647543, | |
| "grad_norm": 0.8298476446641734, | |
| "learning_rate": 3.3471983633069414e-06, | |
| "loss": 0.0795, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.42147117296222664, | |
| "grad_norm": 0.7256692328377535, | |
| "learning_rate": 3.3383535740753813e-06, | |
| "loss": 0.0812, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.42260721385969896, | |
| "grad_norm": 0.7210661181409769, | |
| "learning_rate": 3.32949694587438e-06, | |
| "loss": 0.0918, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.4237432547571713, | |
| "grad_norm": 0.7174750225509191, | |
| "learning_rate": 3.320628603774496e-06, | |
| "loss": 0.0815, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.42487929565464355, | |
| "grad_norm": 0.7537467160289135, | |
| "learning_rate": 3.3117486730117092e-06, | |
| "loss": 0.0829, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.4260153365521159, | |
| "grad_norm": 0.7277497535392837, | |
| "learning_rate": 3.3028572789856507e-06, | |
| "loss": 0.0696, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.4271513774495882, | |
| "grad_norm": 0.6443734430583794, | |
| "learning_rate": 3.2939545472578314e-06, | |
| "loss": 0.069, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.42828741834706047, | |
| "grad_norm": 0.7695280037358156, | |
| "learning_rate": 3.285040603549872e-06, | |
| "loss": 0.0747, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.4294234592445328, | |
| "grad_norm": 0.7294107932414593, | |
| "learning_rate": 3.276115573741724e-06, | |
| "loss": 0.0699, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.4305595001420051, | |
| "grad_norm": 0.6901445691629573, | |
| "learning_rate": 3.267179583869892e-06, | |
| "loss": 0.0653, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.43169554103947744, | |
| "grad_norm": 0.7479692537488529, | |
| "learning_rate": 3.2582327601256567e-06, | |
| "loss": 0.0729, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4328315819369497, | |
| "grad_norm": 0.8489891426282298, | |
| "learning_rate": 3.249275228853292e-06, | |
| "loss": 0.0671, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.43396762283442203, | |
| "grad_norm": 0.8342540424379653, | |
| "learning_rate": 3.240307116548279e-06, | |
| "loss": 0.0857, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.43510366373189435, | |
| "grad_norm": 0.855952003925093, | |
| "learning_rate": 3.231328549855522e-06, | |
| "loss": 0.0841, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.4362397046293667, | |
| "grad_norm": 0.8171052821070877, | |
| "learning_rate": 3.222339655567556e-06, | |
| "loss": 0.0724, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.43737574552683894, | |
| "grad_norm": 0.7191540982661123, | |
| "learning_rate": 3.2133405606227636e-06, | |
| "loss": 0.0705, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.43851178642431127, | |
| "grad_norm": 0.8227659710377888, | |
| "learning_rate": 3.2043313921035747e-06, | |
| "loss": 0.0816, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.4396478273217836, | |
| "grad_norm": 0.7965981656056659, | |
| "learning_rate": 3.1953122772346757e-06, | |
| "loss": 0.0716, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.4407838682192559, | |
| "grad_norm": 0.7306236376054158, | |
| "learning_rate": 3.1862833433812137e-06, | |
| "loss": 0.07, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.4419199091167282, | |
| "grad_norm": 0.7687699723670527, | |
| "learning_rate": 3.1772447180469934e-06, | |
| "loss": 0.0787, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.4430559500142005, | |
| "grad_norm": 0.8313217943349993, | |
| "learning_rate": 3.1681965288726825e-06, | |
| "loss": 0.0716, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.44419199091167283, | |
| "grad_norm": 0.7173908314094287, | |
| "learning_rate": 3.1591389036340064e-06, | |
| "loss": 0.0742, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.44532803180914515, | |
| "grad_norm": 0.8407757996115441, | |
| "learning_rate": 3.1500719702399406e-06, | |
| "loss": 0.0734, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.4464640727066174, | |
| "grad_norm": 0.7682407487902735, | |
| "learning_rate": 3.1409958567309114e-06, | |
| "loss": 0.0895, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.44760011360408974, | |
| "grad_norm": 0.7226606623749933, | |
| "learning_rate": 3.1319106912769797e-06, | |
| "loss": 0.0757, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.44873615450156207, | |
| "grad_norm": 0.6935224939852427, | |
| "learning_rate": 3.122816602176039e-06, | |
| "loss": 0.0715, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.4498721953990344, | |
| "grad_norm": 0.7857347629017143, | |
| "learning_rate": 3.1137137178519983e-06, | |
| "loss": 0.0796, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.45100823629650666, | |
| "grad_norm": 0.8462391224537944, | |
| "learning_rate": 3.1046021668529684e-06, | |
| "loss": 0.0778, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.452144277193979, | |
| "grad_norm": 0.7803762723068312, | |
| "learning_rate": 3.0954820778494516e-06, | |
| "loss": 0.0792, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.4532803180914513, | |
| "grad_norm": 0.8208303464346856, | |
| "learning_rate": 3.0863535796325173e-06, | |
| "loss": 0.0822, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.4544163589889236, | |
| "grad_norm": 0.7406949289191436, | |
| "learning_rate": 3.0772168011119894e-06, | |
| "loss": 0.0722, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4555523998863959, | |
| "grad_norm": 0.7232653457612589, | |
| "learning_rate": 3.068071871314626e-06, | |
| "loss": 0.0754, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.4566884407838682, | |
| "grad_norm": 0.6527667928894365, | |
| "learning_rate": 3.0589189193822894e-06, | |
| "loss": 0.0711, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.45782448168134054, | |
| "grad_norm": 0.767420451370671, | |
| "learning_rate": 3.0497580745701334e-06, | |
| "loss": 0.0834, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.4589605225788128, | |
| "grad_norm": 0.7645556615921287, | |
| "learning_rate": 3.0405894662447682e-06, | |
| "loss": 0.0789, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.46009656347628514, | |
| "grad_norm": 0.7810648726320831, | |
| "learning_rate": 3.0314132238824416e-06, | |
| "loss": 0.0818, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.46123260437375746, | |
| "grad_norm": 0.7115760882284773, | |
| "learning_rate": 3.0222294770672054e-06, | |
| "loss": 0.0705, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.4623686452712298, | |
| "grad_norm": 0.649480771602918, | |
| "learning_rate": 3.013038355489086e-06, | |
| "loss": 0.0689, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.46350468616870205, | |
| "grad_norm": 0.7592672186320847, | |
| "learning_rate": 3.0038399889422553e-06, | |
| "loss": 0.0777, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.4646407270661744, | |
| "grad_norm": 0.8172328817027344, | |
| "learning_rate": 2.9946345073231964e-06, | |
| "loss": 0.074, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.4657767679636467, | |
| "grad_norm": 0.7859879017666365, | |
| "learning_rate": 2.985422040628867e-06, | |
| "loss": 0.0769, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.466912808861119, | |
| "grad_norm": 0.7177354483721196, | |
| "learning_rate": 2.976202718954869e-06, | |
| "loss": 0.0752, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.4680488497585913, | |
| "grad_norm": 0.6542212761323943, | |
| "learning_rate": 2.9669766724936074e-06, | |
| "loss": 0.0699, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.4691848906560636, | |
| "grad_norm": 0.7320537820608451, | |
| "learning_rate": 2.957744031532451e-06, | |
| "loss": 0.0698, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.47032093155353594, | |
| "grad_norm": 0.7284037776279783, | |
| "learning_rate": 2.948504926451896e-06, | |
| "loss": 0.0735, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.47145697245100826, | |
| "grad_norm": 0.702970854905207, | |
| "learning_rate": 2.9392594877237194e-06, | |
| "loss": 0.065, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.47259301334848053, | |
| "grad_norm": 0.6558338370706804, | |
| "learning_rate": 2.930007845909146e-06, | |
| "loss": 0.0711, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.47372905424595285, | |
| "grad_norm": 0.667963355409311, | |
| "learning_rate": 2.9207501316569936e-06, | |
| "loss": 0.0669, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.4748650951434252, | |
| "grad_norm": 0.6856434352453329, | |
| "learning_rate": 2.911486475701835e-06, | |
| "loss": 0.0612, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.4760011360408975, | |
| "grad_norm": 0.7150424311335496, | |
| "learning_rate": 2.9022170088621497e-06, | |
| "loss": 0.0708, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.47713717693836977, | |
| "grad_norm": 0.7191644625060256, | |
| "learning_rate": 2.892941862038475e-06, | |
| "loss": 0.0792, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4782732178358421, | |
| "grad_norm": 0.740499459461677, | |
| "learning_rate": 2.883661166211564e-06, | |
| "loss": 0.0729, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4794092587333144, | |
| "grad_norm": 0.8211073931673529, | |
| "learning_rate": 2.8743750524405254e-06, | |
| "loss": 0.0759, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.4805452996307867, | |
| "grad_norm": 0.7278022551256821, | |
| "learning_rate": 2.8650836518609814e-06, | |
| "loss": 0.0649, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.481681340528259, | |
| "grad_norm": 0.7105414452804, | |
| "learning_rate": 2.8557870956832135e-06, | |
| "loss": 0.0719, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.48281738142573133, | |
| "grad_norm": 0.7882490477262348, | |
| "learning_rate": 2.8464855151903065e-06, | |
| "loss": 0.0752, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.48395342232320365, | |
| "grad_norm": 0.7614124126399766, | |
| "learning_rate": 2.837179041736299e-06, | |
| "loss": 0.0656, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.4850894632206759, | |
| "grad_norm": 0.7482163459741915, | |
| "learning_rate": 2.8278678067443255e-06, | |
| "loss": 0.0811, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.48622550411814824, | |
| "grad_norm": 0.7408052511491365, | |
| "learning_rate": 2.8185519417047624e-06, | |
| "loss": 0.0711, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.48736154501562057, | |
| "grad_norm": 0.7402789101662246, | |
| "learning_rate": 2.80923157817337e-06, | |
| "loss": 0.0691, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4884975859130929, | |
| "grad_norm": 0.7071544319741676, | |
| "learning_rate": 2.799906847769433e-06, | |
| "loss": 0.0729, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.48963362681056516, | |
| "grad_norm": 0.7353065824078855, | |
| "learning_rate": 2.790577882173906e-06, | |
| "loss": 0.0732, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.4907696677080375, | |
| "grad_norm": 0.7209493364089367, | |
| "learning_rate": 2.781244813127552e-06, | |
| "loss": 0.0794, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.4919057086055098, | |
| "grad_norm": 0.7468157532688993, | |
| "learning_rate": 2.7719077724290793e-06, | |
| "loss": 0.0718, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.49304174950298213, | |
| "grad_norm": 0.6890756859527285, | |
| "learning_rate": 2.762566891933285e-06, | |
| "loss": 0.0753, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.4941777904004544, | |
| "grad_norm": 0.7788104679693914, | |
| "learning_rate": 2.7532223035491877e-06, | |
| "loss": 0.0716, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4953138312979267, | |
| "grad_norm": 0.7723914173864724, | |
| "learning_rate": 2.743874139238171e-06, | |
| "loss": 0.0775, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.49644987219539904, | |
| "grad_norm": 0.743928252813702, | |
| "learning_rate": 2.7345225310121155e-06, | |
| "loss": 0.0748, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.49758591309287137, | |
| "grad_norm": 0.709937023384648, | |
| "learning_rate": 2.725167610931534e-06, | |
| "loss": 0.0731, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.49872195399034364, | |
| "grad_norm": 0.7257850662776226, | |
| "learning_rate": 2.715809511103711e-06, | |
| "loss": 0.0739, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.49985799488781596, | |
| "grad_norm": 0.7244919657561524, | |
| "learning_rate": 2.7064483636808314e-06, | |
| "loss": 0.0626, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.5009940357852882, | |
| "grad_norm": 0.6763467559532471, | |
| "learning_rate": 2.69708430085812e-06, | |
| "loss": 0.0672, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.5021300766827606, | |
| "grad_norm": 0.6998161361615932, | |
| "learning_rate": 2.687717454871971e-06, | |
| "loss": 0.066, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.5032661175802329, | |
| "grad_norm": 0.7167142257204601, | |
| "learning_rate": 2.678347957998081e-06, | |
| "loss": 0.0675, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.5044021584777052, | |
| "grad_norm": 0.8043133103525217, | |
| "learning_rate": 2.6689759425495833e-06, | |
| "loss": 0.0736, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.5055381993751775, | |
| "grad_norm": 0.6878575755122527, | |
| "learning_rate": 2.659601540875174e-06, | |
| "loss": 0.0746, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.5066742402726498, | |
| "grad_norm": 0.7525753170166554, | |
| "learning_rate": 2.650224885357251e-06, | |
| "loss": 0.0801, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.5078102811701222, | |
| "grad_norm": 0.7941324249115771, | |
| "learning_rate": 2.640846108410039e-06, | |
| "loss": 0.0685, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.5089463220675944, | |
| "grad_norm": 0.6913302757604484, | |
| "learning_rate": 2.6314653424777194e-06, | |
| "loss": 0.0705, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.5100823629650667, | |
| "grad_norm": 0.7174450522921005, | |
| "learning_rate": 2.6220827200325628e-06, | |
| "loss": 0.0759, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.511218403862539, | |
| "grad_norm": 0.7973535459102329, | |
| "learning_rate": 2.612698373573056e-06, | |
| "loss": 0.0736, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.5123544447600114, | |
| "grad_norm": 0.8074198728981284, | |
| "learning_rate": 2.603312435622033e-06, | |
| "loss": 0.0758, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.5134904856574837, | |
| "grad_norm": 0.7604997282120738, | |
| "learning_rate": 2.593925038724802e-06, | |
| "loss": 0.0737, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.514626526554956, | |
| "grad_norm": 0.7476276338719816, | |
| "learning_rate": 2.5845363154472725e-06, | |
| "loss": 0.0722, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.5157625674524283, | |
| "grad_norm": 0.735936044826126, | |
| "learning_rate": 2.575146398374087e-06, | |
| "loss": 0.0722, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.5168986083499006, | |
| "grad_norm": 0.7379849599595595, | |
| "learning_rate": 2.565755420106744e-06, | |
| "loss": 0.0861, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.5180346492473729, | |
| "grad_norm": 0.7802069151430526, | |
| "learning_rate": 2.5563635132617305e-06, | |
| "loss": 0.0699, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.5191706901448452, | |
| "grad_norm": 0.7850066911880272, | |
| "learning_rate": 2.5469708104686452e-06, | |
| "loss": 0.0786, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.5203067310423175, | |
| "grad_norm": 0.7173680507506982, | |
| "learning_rate": 2.5375774443683263e-06, | |
| "loss": 0.0662, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.5214427719397898, | |
| "grad_norm": 0.7215484657257588, | |
| "learning_rate": 2.5281835476109796e-06, | |
| "loss": 0.0691, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.5225788128372622, | |
| "grad_norm": 0.6809593038075415, | |
| "learning_rate": 2.518789252854305e-06, | |
| "loss": 0.0697, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.5237148537347345, | |
| "grad_norm": 0.8606317588371037, | |
| "learning_rate": 2.5093946927616227e-06, | |
| "loss": 0.0714, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.5248508946322068, | |
| "grad_norm": 0.7678392851383928, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0727, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.5259869355296791, | |
| "grad_norm": 0.7979050686259359, | |
| "learning_rate": 2.4906053072383773e-06, | |
| "loss": 0.0731, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.5271229764271513, | |
| "grad_norm": 0.6681326332305534, | |
| "learning_rate": 2.4812107471456958e-06, | |
| "loss": 0.0626, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.5282590173246237, | |
| "grad_norm": 0.7627768324930073, | |
| "learning_rate": 2.4718164523890212e-06, | |
| "loss": 0.0712, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.529395058222096, | |
| "grad_norm": 0.7904275908906134, | |
| "learning_rate": 2.4624225556316745e-06, | |
| "loss": 0.0637, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.5305310991195683, | |
| "grad_norm": 0.7155499492449455, | |
| "learning_rate": 2.453029189531356e-06, | |
| "loss": 0.0703, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.5316671400170406, | |
| "grad_norm": 0.7250622453734636, | |
| "learning_rate": 2.44363648673827e-06, | |
| "loss": 0.069, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.532803180914513, | |
| "grad_norm": 0.7249989036503823, | |
| "learning_rate": 2.4342445798932563e-06, | |
| "loss": 0.063, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.5339392218119853, | |
| "grad_norm": 0.743744157976681, | |
| "learning_rate": 2.4248536016259137e-06, | |
| "loss": 0.077, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.5350752627094575, | |
| "grad_norm": 0.7582738981974823, | |
| "learning_rate": 2.4154636845527284e-06, | |
| "loss": 0.0732, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.5362113036069298, | |
| "grad_norm": 0.7328578027294108, | |
| "learning_rate": 2.4060749612751987e-06, | |
| "loss": 0.071, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.5373473445044021, | |
| "grad_norm": 0.7002570282552395, | |
| "learning_rate": 2.396687564377967e-06, | |
| "loss": 0.0659, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.5384833854018745, | |
| "grad_norm": 0.7359719709178979, | |
| "learning_rate": 2.3873016264269446e-06, | |
| "loss": 0.065, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.5396194262993468, | |
| "grad_norm": 0.7105844692533961, | |
| "learning_rate": 2.3779172799674377e-06, | |
| "loss": 0.0696, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.5407554671968191, | |
| "grad_norm": 0.6506193455984971, | |
| "learning_rate": 2.368534657522281e-06, | |
| "loss": 0.0676, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.5418915080942914, | |
| "grad_norm": 0.7291302216363645, | |
| "learning_rate": 2.359153891589962e-06, | |
| "loss": 0.0801, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.5430275489917638, | |
| "grad_norm": 0.7447378296478038, | |
| "learning_rate": 2.3497751146427494e-06, | |
| "loss": 0.0728, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.544163589889236, | |
| "grad_norm": 0.7252193208819527, | |
| "learning_rate": 2.3403984591248265e-06, | |
| "loss": 0.0754, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.5452996307867083, | |
| "grad_norm": 0.7138381216438716, | |
| "learning_rate": 2.3310240574504184e-06, | |
| "loss": 0.0722, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5464356716841806, | |
| "grad_norm": 0.6530875177989481, | |
| "learning_rate": 2.3216520420019194e-06, | |
| "loss": 0.0638, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.5475717125816529, | |
| "grad_norm": 0.8473481511613769, | |
| "learning_rate": 2.3122825451280294e-06, | |
| "loss": 0.081, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.5487077534791253, | |
| "grad_norm": 0.7553901465399981, | |
| "learning_rate": 2.30291569914188e-06, | |
| "loss": 0.0599, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.5498437943765976, | |
| "grad_norm": 0.7885325178229121, | |
| "learning_rate": 2.2935516363191695e-06, | |
| "loss": 0.0848, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.5509798352740699, | |
| "grad_norm": 0.7822535293047203, | |
| "learning_rate": 2.2841904888962903e-06, | |
| "loss": 0.0746, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.5521158761715422, | |
| "grad_norm": 0.7752683853284668, | |
| "learning_rate": 2.2748323890684664e-06, | |
| "loss": 0.0745, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.5532519170690144, | |
| "grad_norm": 0.7713264960564677, | |
| "learning_rate": 2.2654774689878862e-06, | |
| "loss": 0.076, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.5543879579664868, | |
| "grad_norm": 0.629660342616562, | |
| "learning_rate": 2.2561258607618296e-06, | |
| "loss": 0.0712, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.5555239988639591, | |
| "grad_norm": 0.7243836087765395, | |
| "learning_rate": 2.246777696450813e-06, | |
| "loss": 0.0819, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.5566600397614314, | |
| "grad_norm": 0.6376006175785872, | |
| "learning_rate": 2.2374331080667168e-06, | |
| "loss": 0.0635, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5577960806589037, | |
| "grad_norm": 0.6720742104662947, | |
| "learning_rate": 2.2280922275709216e-06, | |
| "loss": 0.0655, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.5589321215563761, | |
| "grad_norm": 0.7322632128140908, | |
| "learning_rate": 2.2187551868724487e-06, | |
| "loss": 0.0733, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.5600681624538484, | |
| "grad_norm": 0.7942425591087324, | |
| "learning_rate": 2.209422117826094e-06, | |
| "loss": 0.0708, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.5612042033513206, | |
| "grad_norm": 0.6415846065352803, | |
| "learning_rate": 2.200093152230568e-06, | |
| "loss": 0.0625, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.5623402442487929, | |
| "grad_norm": 0.791673886780985, | |
| "learning_rate": 2.190768421826631e-06, | |
| "loss": 0.0712, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.5634762851462652, | |
| "grad_norm": 0.8434695674081621, | |
| "learning_rate": 2.1814480582952376e-06, | |
| "loss": 0.0679, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.5646123260437376, | |
| "grad_norm": 0.7110416920872518, | |
| "learning_rate": 2.1721321932556753e-06, | |
| "loss": 0.0636, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.5657483669412099, | |
| "grad_norm": 0.8221680308072801, | |
| "learning_rate": 2.1628209582637024e-06, | |
| "loss": 0.0613, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.5668844078386822, | |
| "grad_norm": 0.7622218183878603, | |
| "learning_rate": 2.1535144848096943e-06, | |
| "loss": 0.0708, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.5680204487361545, | |
| "grad_norm": 0.7782722183282532, | |
| "learning_rate": 2.1442129043167877e-06, | |
| "loss": 0.0757, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5680204487361545, | |
| "eval_loss": 0.06981126964092255, | |
| "eval_runtime": 11.0092, | |
| "eval_samples_per_second": 51.775, | |
| "eval_steps_per_second": 6.54, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5691564896336269, | |
| "grad_norm": 1.0163760342592565, | |
| "learning_rate": 2.134916348139019e-06, | |
| "loss": 0.0686, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.5702925305310991, | |
| "grad_norm": 0.7322989198729203, | |
| "learning_rate": 2.125624947559475e-06, | |
| "loss": 0.0634, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.6931871597884127, | |
| "learning_rate": 2.116338833788437e-06, | |
| "loss": 0.0616, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.5725646123260437, | |
| "grad_norm": 0.7359829849611855, | |
| "learning_rate": 2.1070581379615253e-06, | |
| "loss": 0.0671, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.573700653223516, | |
| "grad_norm": 0.6657442150083017, | |
| "learning_rate": 2.0977829911378507e-06, | |
| "loss": 0.0643, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.5748366941209884, | |
| "grad_norm": 0.6926944123781523, | |
| "learning_rate": 2.088513524298165e-06, | |
| "loss": 0.0735, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.5759727350184607, | |
| "grad_norm": 0.6648038490491777, | |
| "learning_rate": 2.0792498683430072e-06, | |
| "loss": 0.0642, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.577108775915933, | |
| "grad_norm": 0.7126968053240703, | |
| "learning_rate": 2.0699921540908542e-06, | |
| "loss": 0.0628, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.5782448168134053, | |
| "grad_norm": 0.6620429659025622, | |
| "learning_rate": 2.0607405122762806e-06, | |
| "loss": 0.0659, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.5793808577108776, | |
| "grad_norm": 0.7959716020355202, | |
| "learning_rate": 2.0514950735481053e-06, | |
| "loss": 0.0812, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5805168986083499, | |
| "grad_norm": 0.7170097707725962, | |
| "learning_rate": 2.0422559684675498e-06, | |
| "loss": 0.0677, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.5816529395058222, | |
| "grad_norm": 0.6830917403644267, | |
| "learning_rate": 2.033023327506393e-06, | |
| "loss": 0.0694, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.5827889804032945, | |
| "grad_norm": 0.7191751035977458, | |
| "learning_rate": 2.023797281045132e-06, | |
| "loss": 0.0679, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.5839250213007668, | |
| "grad_norm": 0.7346106234856803, | |
| "learning_rate": 2.014577959371134e-06, | |
| "loss": 0.0718, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.5850610621982392, | |
| "grad_norm": 0.6813644114828487, | |
| "learning_rate": 2.0053654926768044e-06, | |
| "loss": 0.0593, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.5861971030957115, | |
| "grad_norm": 0.80336979808499, | |
| "learning_rate": 1.996160011057746e-06, | |
| "loss": 0.0729, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.5873331439931837, | |
| "grad_norm": 0.7514984326231717, | |
| "learning_rate": 1.9869616445109146e-06, | |
| "loss": 0.0673, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.588469184890656, | |
| "grad_norm": 0.7606405433072686, | |
| "learning_rate": 1.9777705229327954e-06, | |
| "loss": 0.0664, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.5896052257881284, | |
| "grad_norm": 0.7583627182956107, | |
| "learning_rate": 1.9685867761175584e-06, | |
| "loss": 0.0716, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.5907412666856007, | |
| "grad_norm": 0.700178818885243, | |
| "learning_rate": 1.959410533755232e-06, | |
| "loss": 0.067, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.591877307583073, | |
| "grad_norm": 0.7422231248355664, | |
| "learning_rate": 1.9502419254298674e-06, | |
| "loss": 0.0608, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5930133484805453, | |
| "grad_norm": 0.6555516297292772, | |
| "learning_rate": 1.9410810806177105e-06, | |
| "loss": 0.0652, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5941493893780176, | |
| "grad_norm": 0.8479046695169662, | |
| "learning_rate": 1.931928128685375e-06, | |
| "loss": 0.0778, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.59528543027549, | |
| "grad_norm": 0.8290532060534326, | |
| "learning_rate": 1.922783198888011e-06, | |
| "loss": 0.0741, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5964214711729622, | |
| "grad_norm": 0.7494299014958242, | |
| "learning_rate": 1.913646420367483e-06, | |
| "loss": 0.0784, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5975575120704345, | |
| "grad_norm": 0.7799356992873004, | |
| "learning_rate": 1.9045179221505497e-06, | |
| "loss": 0.0679, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5986935529679068, | |
| "grad_norm": 0.669194670284561, | |
| "learning_rate": 1.8953978331470322e-06, | |
| "loss": 0.0667, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5998295938653792, | |
| "grad_norm": 0.6788295538214086, | |
| "learning_rate": 1.8862862821480023e-06, | |
| "loss": 0.0727, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.6009656347628515, | |
| "grad_norm": 0.6939565933254241, | |
| "learning_rate": 1.8771833978239615e-06, | |
| "loss": 0.0694, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.6021016756603238, | |
| "grad_norm": 0.6537870877173654, | |
| "learning_rate": 1.8680893087230207e-06, | |
| "loss": 0.0653, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.6032377165577961, | |
| "grad_norm": 0.6637094837876996, | |
| "learning_rate": 1.8590041432690895e-06, | |
| "loss": 0.0614, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.6043737574552683, | |
| "grad_norm": 0.7093390779006853, | |
| "learning_rate": 1.8499280297600594e-06, | |
| "loss": 0.0617, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.6055097983527407, | |
| "grad_norm": 0.6755471903725316, | |
| "learning_rate": 1.840861096365995e-06, | |
| "loss": 0.064, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.606645839250213, | |
| "grad_norm": 0.774838419360385, | |
| "learning_rate": 1.8318034711273181e-06, | |
| "loss": 0.0637, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.6077818801476853, | |
| "grad_norm": 0.8065627164393625, | |
| "learning_rate": 1.822755281953007e-06, | |
| "loss": 0.0646, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.6089179210451576, | |
| "grad_norm": 0.7275113641331161, | |
| "learning_rate": 1.813716656618788e-06, | |
| "loss": 0.0582, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.61005396194263, | |
| "grad_norm": 0.7398892395016525, | |
| "learning_rate": 1.8046877227653248e-06, | |
| "loss": 0.0743, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.6111900028401023, | |
| "grad_norm": 0.7497489776755677, | |
| "learning_rate": 1.7956686078964257e-06, | |
| "loss": 0.07, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.6123260437375746, | |
| "grad_norm": 0.8216722715800929, | |
| "learning_rate": 1.7866594393772375e-06, | |
| "loss": 0.0686, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.6134620846350468, | |
| "grad_norm": 0.730615553098809, | |
| "learning_rate": 1.7776603444324445e-06, | |
| "loss": 0.0647, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.6145981255325191, | |
| "grad_norm": 0.7520686987142878, | |
| "learning_rate": 1.7686714501444791e-06, | |
| "loss": 0.0627, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.6157341664299915, | |
| "grad_norm": 0.7034476625476764, | |
| "learning_rate": 1.759692883451721e-06, | |
| "loss": 0.0718, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.6168702073274638, | |
| "grad_norm": 0.6589075115743108, | |
| "learning_rate": 1.750724771146709e-06, | |
| "loss": 0.0576, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.6180062482249361, | |
| "grad_norm": 0.7840335260584612, | |
| "learning_rate": 1.741767239874344e-06, | |
| "loss": 0.0628, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.6191422891224084, | |
| "grad_norm": 0.81310922260959, | |
| "learning_rate": 1.7328204161301084e-06, | |
| "loss": 0.078, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.6202783300198808, | |
| "grad_norm": 0.6591918365661746, | |
| "learning_rate": 1.723884426258277e-06, | |
| "loss": 0.0667, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.6214143709173531, | |
| "grad_norm": 0.7690500250019386, | |
| "learning_rate": 1.7149593964501285e-06, | |
| "loss": 0.0737, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.6225504118148253, | |
| "grad_norm": 0.708482385265233, | |
| "learning_rate": 1.7060454527421688e-06, | |
| "loss": 0.0659, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.6236864527122976, | |
| "grad_norm": 0.7262622089914782, | |
| "learning_rate": 1.6971427210143503e-06, | |
| "loss": 0.0649, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.6248224936097699, | |
| "grad_norm": 0.6756807736567372, | |
| "learning_rate": 1.6882513269882916e-06, | |
| "loss": 0.056, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.6259585345072423, | |
| "grad_norm": 0.7073460144734544, | |
| "learning_rate": 1.6793713962255043e-06, | |
| "loss": 0.0653, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.6270945754047146, | |
| "grad_norm": 0.7142037806342341, | |
| "learning_rate": 1.6705030541256211e-06, | |
| "loss": 0.0571, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.6282306163021869, | |
| "grad_norm": 0.7850076042825073, | |
| "learning_rate": 1.661646425924619e-06, | |
| "loss": 0.0619, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.6293666571996592, | |
| "grad_norm": 0.7204376620154082, | |
| "learning_rate": 1.6528016366930594e-06, | |
| "loss": 0.0732, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.6305026980971314, | |
| "grad_norm": 0.7608581987391684, | |
| "learning_rate": 1.643968811334315e-06, | |
| "loss": 0.0616, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.6316387389946038, | |
| "grad_norm": 0.8576111862705111, | |
| "learning_rate": 1.6351480745828098e-06, | |
| "loss": 0.0705, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.6327747798920761, | |
| "grad_norm": 0.6680467044643426, | |
| "learning_rate": 1.6263395510022546e-06, | |
| "loss": 0.0622, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.6339108207895484, | |
| "grad_norm": 0.8068649968829243, | |
| "learning_rate": 1.6175433649838901e-06, | |
| "loss": 0.0646, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.6350468616870207, | |
| "grad_norm": 0.6817820680144767, | |
| "learning_rate": 1.6087596407447314e-06, | |
| "loss": 0.0632, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.6361829025844931, | |
| "grad_norm": 0.7221690001860933, | |
| "learning_rate": 1.5999885023258099e-06, | |
| "loss": 0.0675, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.6373189434819654, | |
| "grad_norm": 0.7186532155986466, | |
| "learning_rate": 1.5912300735904252e-06, | |
| "loss": 0.0642, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.6384549843794377, | |
| "grad_norm": 0.6768918962633247, | |
| "learning_rate": 1.5824844782223956e-06, | |
| "loss": 0.065, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.6395910252769099, | |
| "grad_norm": 0.7455736530308621, | |
| "learning_rate": 1.5737518397243074e-06, | |
| "loss": 0.0606, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.6407270661743822, | |
| "grad_norm": 0.6548968684524288, | |
| "learning_rate": 1.5650322814157764e-06, | |
| "loss": 0.0636, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.6418631070718546, | |
| "grad_norm": 0.7155782582180678, | |
| "learning_rate": 1.5563259264317048e-06, | |
| "loss": 0.0717, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.6429991479693269, | |
| "grad_norm": 0.7240624483076444, | |
| "learning_rate": 1.5476328977205396e-06, | |
| "loss": 0.0673, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.6441351888667992, | |
| "grad_norm": 0.6718198739936723, | |
| "learning_rate": 1.5389533180425387e-06, | |
| "loss": 0.0663, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.6452712297642715, | |
| "grad_norm": 0.7380737125658104, | |
| "learning_rate": 1.5302873099680378e-06, | |
| "loss": 0.0703, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.6464072706617439, | |
| "grad_norm": 0.6948227028870951, | |
| "learning_rate": 1.5216349958757187e-06, | |
| "loss": 0.0643, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.6475433115592162, | |
| "grad_norm": 0.6963304517404344, | |
| "learning_rate": 1.5129964979508792e-06, | |
| "loss": 0.0671, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.6486793524566884, | |
| "grad_norm": 0.6972931207318768, | |
| "learning_rate": 1.5043719381837113e-06, | |
| "loss": 0.063, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.6498153933541607, | |
| "grad_norm": 0.6485613523241838, | |
| "learning_rate": 1.495761438367577e-06, | |
| "loss": 0.0596, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.650951434251633, | |
| "grad_norm": 0.7001853980454908, | |
| "learning_rate": 1.4871651200972854e-06, | |
| "loss": 0.067, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.6520874751491054, | |
| "grad_norm": 0.7272042380715532, | |
| "learning_rate": 1.47858310476738e-06, | |
| "loss": 0.0598, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.6532235160465777, | |
| "grad_norm": 0.7187945090994169, | |
| "learning_rate": 1.470015513570424e-06, | |
| "loss": 0.0694, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.65435955694405, | |
| "grad_norm": 0.7007096317128729, | |
| "learning_rate": 1.4614624674952843e-06, | |
| "loss": 0.0669, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.6554955978415223, | |
| "grad_norm": 0.7244179388243616, | |
| "learning_rate": 1.452924087325428e-06, | |
| "loss": 0.067, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.6566316387389945, | |
| "grad_norm": 0.7033512029008061, | |
| "learning_rate": 1.4444004936372166e-06, | |
| "loss": 0.0621, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.6577676796364669, | |
| "grad_norm": 0.6990155654636586, | |
| "learning_rate": 1.4358918067981969e-06, | |
| "loss": 0.0596, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.6589037205339392, | |
| "grad_norm": 0.760212990286638, | |
| "learning_rate": 1.4273981469654093e-06, | |
| "loss": 0.0624, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.6600397614314115, | |
| "grad_norm": 0.6977342847520188, | |
| "learning_rate": 1.4189196340836866e-06, | |
| "loss": 0.0568, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.6611758023288838, | |
| "grad_norm": 0.6780400957221138, | |
| "learning_rate": 1.4104563878839623e-06, | |
| "loss": 0.0586, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.6623118432263562, | |
| "grad_norm": 0.6673962911333318, | |
| "learning_rate": 1.4020085278815745e-06, | |
| "loss": 0.0653, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.6634478841238285, | |
| "grad_norm": 0.7198642911954184, | |
| "learning_rate": 1.3935761733745865e-06, | |
| "loss": 0.0635, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.6645839250213008, | |
| "grad_norm": 0.7115497850987673, | |
| "learning_rate": 1.3851594434420968e-06, | |
| "loss": 0.0622, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.665719965918773, | |
| "grad_norm": 0.6864557026455775, | |
| "learning_rate": 1.3767584569425562e-06, | |
| "loss": 0.0605, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.6668560068162454, | |
| "grad_norm": 0.7558879903480649, | |
| "learning_rate": 1.3683733325120934e-06, | |
| "loss": 0.0629, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.6679920477137177, | |
| "grad_norm": 0.6666337739350352, | |
| "learning_rate": 1.360004188562841e-06, | |
| "loss": 0.0645, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.66912808861119, | |
| "grad_norm": 0.7137577962203598, | |
| "learning_rate": 1.351651143281253e-06, | |
| "loss": 0.0643, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.6702641295086623, | |
| "grad_norm": 0.6319040066279115, | |
| "learning_rate": 1.3433143146264494e-06, | |
| "loss": 0.0508, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6714001704061346, | |
| "grad_norm": 0.6909411839212513, | |
| "learning_rate": 1.3349938203285412e-06, | |
| "loss": 0.0545, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.672536211303607, | |
| "grad_norm": 0.7930681686093434, | |
| "learning_rate": 1.3266897778869704e-06, | |
| "loss": 0.0743, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.6736722522010793, | |
| "grad_norm": 0.7524271746971427, | |
| "learning_rate": 1.3184023045688515e-06, | |
| "loss": 0.065, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.6748082930985515, | |
| "grad_norm": 0.7593574572396692, | |
| "learning_rate": 1.3101315174073162e-06, | |
| "loss": 0.071, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.6759443339960238, | |
| "grad_norm": 0.7307972692757313, | |
| "learning_rate": 1.301877533199859e-06, | |
| "loss": 0.063, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.6770803748934962, | |
| "grad_norm": 0.71668422907411, | |
| "learning_rate": 1.2936404685066852e-06, | |
| "loss": 0.0642, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.6782164157909685, | |
| "grad_norm": 0.6457093799654868, | |
| "learning_rate": 1.2854204396490722e-06, | |
| "loss": 0.0551, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.6793524566884408, | |
| "grad_norm": 0.719388282295858, | |
| "learning_rate": 1.2772175627077204e-06, | |
| "loss": 0.0611, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.6804884975859131, | |
| "grad_norm": 0.6880869590502872, | |
| "learning_rate": 1.2690319535211171e-06, | |
| "loss": 0.0609, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.6816245384833854, | |
| "grad_norm": 0.6516612119128604, | |
| "learning_rate": 1.2608637276838987e-06, | |
| "loss": 0.054, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6827605793808577, | |
| "grad_norm": 0.7511572391468219, | |
| "learning_rate": 1.2527130005452212e-06, | |
| "loss": 0.0663, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.68389662027833, | |
| "grad_norm": 0.7651843581040089, | |
| "learning_rate": 1.244579887207126e-06, | |
| "loss": 0.0699, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.6850326611758023, | |
| "grad_norm": 0.7373402269865651, | |
| "learning_rate": 1.236464502522921e-06, | |
| "loss": 0.0684, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.6861687020732746, | |
| "grad_norm": 0.6786418211884958, | |
| "learning_rate": 1.2283669610955543e-06, | |
| "loss": 0.0671, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.687304742970747, | |
| "grad_norm": 0.698068956478052, | |
| "learning_rate": 1.2202873772759983e-06, | |
| "loss": 0.0618, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.6884407838682193, | |
| "grad_norm": 0.6851653623325352, | |
| "learning_rate": 1.2122258651616305e-06, | |
| "loss": 0.059, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.6895768247656916, | |
| "grad_norm": 0.6974766060397587, | |
| "learning_rate": 1.2041825385946288e-06, | |
| "loss": 0.0659, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.6907128656631639, | |
| "grad_norm": 0.7519282757382589, | |
| "learning_rate": 1.1961575111603588e-06, | |
| "loss": 0.067, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.6918489065606361, | |
| "grad_norm": 0.8372671010396519, | |
| "learning_rate": 1.1881508961857716e-06, | |
| "loss": 0.0726, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.6929849474581085, | |
| "grad_norm": 0.7072832731190405, | |
| "learning_rate": 1.1801628067378033e-06, | |
| "loss": 0.0656, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6941209883555808, | |
| "grad_norm": 0.7352876648138319, | |
| "learning_rate": 1.1721933556217793e-06, | |
| "loss": 0.0733, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.6952570292530531, | |
| "grad_norm": 0.686736638297048, | |
| "learning_rate": 1.1642426553798175e-06, | |
| "loss": 0.062, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.6963930701505254, | |
| "grad_norm": 0.6804914622631124, | |
| "learning_rate": 1.1563108182892447e-06, | |
| "loss": 0.0558, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.6975291110479978, | |
| "grad_norm": 0.7283887982191483, | |
| "learning_rate": 1.148397956361007e-06, | |
| "loss": 0.0644, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.6986651519454701, | |
| "grad_norm": 0.6925134320068936, | |
| "learning_rate": 1.1405041813380879e-06, | |
| "loss": 0.0679, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.6998011928429424, | |
| "grad_norm": 0.7087695259064026, | |
| "learning_rate": 1.1326296046939334e-06, | |
| "loss": 0.0562, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.7009372337404146, | |
| "grad_norm": 0.6799067610660522, | |
| "learning_rate": 1.1247743376308754e-06, | |
| "loss": 0.0554, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.7020732746378869, | |
| "grad_norm": 0.650598765409007, | |
| "learning_rate": 1.1169384910785613e-06, | |
| "loss": 0.0512, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.7032093155353593, | |
| "grad_norm": 0.7180760489339335, | |
| "learning_rate": 1.1091221756923888e-06, | |
| "loss": 0.0676, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.7043453564328316, | |
| "grad_norm": 0.771129010138558, | |
| "learning_rate": 1.1013255018519426e-06, | |
| "loss": 0.0744, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.7054813973303039, | |
| "grad_norm": 0.7354640458010835, | |
| "learning_rate": 1.0935485796594352e-06, | |
| "loss": 0.069, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.7066174382277762, | |
| "grad_norm": 0.7116533320646369, | |
| "learning_rate": 1.0857915189381512e-06, | |
| "loss": 0.0625, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.7077534791252486, | |
| "grad_norm": 0.7329261029533441, | |
| "learning_rate": 1.0780544292308998e-06, | |
| "loss": 0.0607, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.7088895200227208, | |
| "grad_norm": 0.7486549384519606, | |
| "learning_rate": 1.0703374197984654e-06, | |
| "loss": 0.0682, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.7100255609201931, | |
| "grad_norm": 0.7165004116848969, | |
| "learning_rate": 1.0626405996180628e-06, | |
| "loss": 0.0613, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.7111616018176654, | |
| "grad_norm": 0.7262023853060127, | |
| "learning_rate": 1.054964077381803e-06, | |
| "loss": 0.0645, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.7122976427151377, | |
| "grad_norm": 0.7113638076418349, | |
| "learning_rate": 1.0473079614951546e-06, | |
| "loss": 0.0655, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.7134336836126101, | |
| "grad_norm": 0.7029593419874601, | |
| "learning_rate": 1.0396723600754144e-06, | |
| "loss": 0.0596, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.7145697245100824, | |
| "grad_norm": 0.6926854888222683, | |
| "learning_rate": 1.0320573809501796e-06, | |
| "loss": 0.0592, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.7157057654075547, | |
| "grad_norm": 0.677936567695018, | |
| "learning_rate": 1.0244631316558268e-06, | |
| "loss": 0.0575, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.716841806305027, | |
| "grad_norm": 0.6830938530740739, | |
| "learning_rate": 1.0168897194359922e-06, | |
| "loss": 0.0561, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.7179778472024992, | |
| "grad_norm": 0.7493058249527598, | |
| "learning_rate": 1.009337251240055e-06, | |
| "loss": 0.0643, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.7191138880999716, | |
| "grad_norm": 0.6948071010614205, | |
| "learning_rate": 1.0018058337216327e-06, | |
| "loss": 0.0598, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.7202499289974439, | |
| "grad_norm": 0.6969420266325782, | |
| "learning_rate": 9.942955732370706e-07, | |
| "loss": 0.0609, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.7213859698949162, | |
| "grad_norm": 0.7578662249147158, | |
| "learning_rate": 9.868065758439388e-07, | |
| "loss": 0.0665, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.7225220107923885, | |
| "grad_norm": 0.6911037252532237, | |
| "learning_rate": 9.793389472995393e-07, | |
| "loss": 0.0576, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.7236580516898609, | |
| "grad_norm": 0.6739642804854075, | |
| "learning_rate": 9.718927930594087e-07, | |
| "loss": 0.0553, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.7247940925873332, | |
| "grad_norm": 0.7079769503814837, | |
| "learning_rate": 9.644682182758305e-07, | |
| "loss": 0.0619, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.7259301334848055, | |
| "grad_norm": 0.7111145505806309, | |
| "learning_rate": 9.570653277963493e-07, | |
| "loss": 0.0641, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.7270661743822777, | |
| "grad_norm": 0.6878608709979896, | |
| "learning_rate": 9.496842261622921e-07, | |
| "loss": 0.0556, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.72820221527975, | |
| "grad_norm": 0.708762829460775, | |
| "learning_rate": 9.423250176072877e-07, | |
| "loss": 0.0615, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.7293382561772224, | |
| "grad_norm": 0.7077145100832107, | |
| "learning_rate": 9.349878060557998e-07, | |
| "loss": 0.0567, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.7304742970746947, | |
| "grad_norm": 0.6814892554720623, | |
| "learning_rate": 9.276726951216572e-07, | |
| "loss": 0.0543, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.731610337972167, | |
| "grad_norm": 0.6926696110783388, | |
| "learning_rate": 9.203797881065907e-07, | |
| "loss": 0.0562, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.7327463788696393, | |
| "grad_norm": 0.7485805520199306, | |
| "learning_rate": 9.131091879987725e-07, | |
| "loss": 0.0653, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.7338824197671117, | |
| "grad_norm": 0.7149870147688834, | |
| "learning_rate": 9.058609974713655e-07, | |
| "loss": 0.0557, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.7350184606645839, | |
| "grad_norm": 0.6706533054717475, | |
| "learning_rate": 8.986353188810706e-07, | |
| "loss": 0.0504, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.7361545015620562, | |
| "grad_norm": 0.6570525705060476, | |
| "learning_rate": 8.914322542666822e-07, | |
| "loss": 0.057, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.7372905424595285, | |
| "grad_norm": 0.8225152469323989, | |
| "learning_rate": 8.842519053476476e-07, | |
| "loss": 0.0756, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.7384265833570008, | |
| "grad_norm": 0.7687986898101944, | |
| "learning_rate": 8.770943735226303e-07, | |
| "loss": 0.077, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.7395626242544732, | |
| "grad_norm": 0.6760083488102284, | |
| "learning_rate": 8.699597598680753e-07, | |
| "loss": 0.0616, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.7406986651519455, | |
| "grad_norm": 0.7111445133059799, | |
| "learning_rate": 8.628481651367876e-07, | |
| "loss": 0.0609, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.7418347060494178, | |
| "grad_norm": 0.715680620040158, | |
| "learning_rate": 8.557596897565043e-07, | |
| "loss": 0.0638, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.7429707469468901, | |
| "grad_norm": 0.7033591520489146, | |
| "learning_rate": 8.486944338284797e-07, | |
| "loss": 0.0581, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.7441067878443623, | |
| "grad_norm": 0.8037728386903387, | |
| "learning_rate": 8.416524971260673e-07, | |
| "loss": 0.0612, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.7452428287418347, | |
| "grad_norm": 0.6785930994355049, | |
| "learning_rate": 8.346339790933167e-07, | |
| "loss": 0.0581, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.746378869639307, | |
| "grad_norm": 0.606320813922669, | |
| "learning_rate": 8.276389788435648e-07, | |
| "loss": 0.0498, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.7475149105367793, | |
| "grad_norm": 0.74087511760928, | |
| "learning_rate": 8.206675951580382e-07, | |
| "loss": 0.066, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.7486509514342516, | |
| "grad_norm": 0.7997387497978912, | |
| "learning_rate": 8.137199264844572e-07, | |
| "loss": 0.0611, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.749786992331724, | |
| "grad_norm": 0.6899912005637121, | |
| "learning_rate": 8.067960709356479e-07, | |
| "loss": 0.0598, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.7509230332291963, | |
| "grad_norm": 0.681907855332953, | |
| "learning_rate": 7.998961262881507e-07, | |
| "loss": 0.0585, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.7520590741266686, | |
| "grad_norm": 0.7119725078654311, | |
| "learning_rate": 7.930201899808476e-07, | |
| "loss": 0.0656, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.7531951150241408, | |
| "grad_norm": 0.6825623566073593, | |
| "learning_rate": 7.861683591135816e-07, | |
| "loss": 0.057, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.7543311559216132, | |
| "grad_norm": 0.7481916867742439, | |
| "learning_rate": 7.793407304457836e-07, | |
| "loss": 0.0672, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.7554671968190855, | |
| "grad_norm": 0.7492624248158564, | |
| "learning_rate": 7.725374003951117e-07, | |
| "loss": 0.0621, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.7566032377165578, | |
| "grad_norm": 0.7248814424445004, | |
| "learning_rate": 7.657584650360847e-07, | |
| "loss": 0.0584, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.7577392786140301, | |
| "grad_norm": 0.7043497561761057, | |
| "learning_rate": 7.590040200987275e-07, | |
| "loss": 0.0648, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.7588753195115024, | |
| "grad_norm": 0.7143918824523554, | |
| "learning_rate": 7.522741609672194e-07, | |
| "loss": 0.0633, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.7600113604089748, | |
| "grad_norm": 0.7755831116052598, | |
| "learning_rate": 7.455689826785456e-07, | |
| "loss": 0.069, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.761147401306447, | |
| "grad_norm": 0.8501109028445187, | |
| "learning_rate": 7.388885799211573e-07, | |
| "loss": 0.0645, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.7622834422039193, | |
| "grad_norm": 0.7699716418147975, | |
| "learning_rate": 7.322330470336314e-07, | |
| "loss": 0.0679, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.7634194831013916, | |
| "grad_norm": 0.6784245576570962, | |
| "learning_rate": 7.256024780033418e-07, | |
| "loss": 0.0564, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.764555523998864, | |
| "grad_norm": 0.6756586477723381, | |
| "learning_rate": 7.189969664651314e-07, | |
| "loss": 0.0561, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.7656915648963363, | |
| "grad_norm": 0.6751139607665853, | |
| "learning_rate": 7.124166056999854e-07, | |
| "loss": 0.0597, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.7668276057938086, | |
| "grad_norm": 0.7239702050922359, | |
| "learning_rate": 7.058614886337212e-07, | |
| "loss": 0.0669, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.7679636466912809, | |
| "grad_norm": 0.8123814274934885, | |
| "learning_rate": 6.993317078356709e-07, | |
| "loss": 0.0645, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.7690996875887532, | |
| "grad_norm": 0.6566260710318882, | |
| "learning_rate": 6.928273555173762e-07, | |
| "loss": 0.0612, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.7702357284862255, | |
| "grad_norm": 0.7245239279676755, | |
| "learning_rate": 6.863485235312853e-07, | |
| "loss": 0.0632, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.7713717693836978, | |
| "grad_norm": 0.7311867799854507, | |
| "learning_rate": 6.798953033694558e-07, | |
| "loss": 0.0635, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.7725078102811701, | |
| "grad_norm": 0.6374305064275428, | |
| "learning_rate": 6.734677861622652e-07, | |
| "loss": 0.0506, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.7736438511786424, | |
| "grad_norm": 0.6990838725244343, | |
| "learning_rate": 6.67066062677118e-07, | |
| "loss": 0.0643, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.7747798920761148, | |
| "grad_norm": 0.6830770597944112, | |
| "learning_rate": 6.60690223317171e-07, | |
| "loss": 0.0592, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.7759159329735871, | |
| "grad_norm": 0.7193721168009378, | |
| "learning_rate": 6.54340358120053e-07, | |
| "loss": 0.0543, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.7770519738710594, | |
| "grad_norm": 0.6929443175270855, | |
| "learning_rate": 6.480165567565913e-07, | |
| "loss": 0.0632, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.7781880147685317, | |
| "grad_norm": 0.7544691021614793, | |
| "learning_rate": 6.417189085295508e-07, | |
| "loss": 0.0643, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.7793240556660039, | |
| "grad_norm": 0.701467040161313, | |
| "learning_rate": 6.354475023723685e-07, | |
| "loss": 0.0667, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.7804600965634763, | |
| "grad_norm": 0.7755055024328538, | |
| "learning_rate": 6.292024268478991e-07, | |
| "loss": 0.0637, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.7815961374609486, | |
| "grad_norm": 0.7413535405232983, | |
| "learning_rate": 6.229837701471645e-07, | |
| "loss": 0.0667, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.7827321783584209, | |
| "grad_norm": 0.7579433832862532, | |
| "learning_rate": 6.167916200881085e-07, | |
| "loss": 0.0639, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.7838682192558932, | |
| "grad_norm": 0.7058994563449785, | |
| "learning_rate": 6.106260641143547e-07, | |
| "loss": 0.0566, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7850042601533656, | |
| "grad_norm": 0.7474580592638266, | |
| "learning_rate": 6.044871892939746e-07, | |
| "loss": 0.0585, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.7861403010508379, | |
| "grad_norm": 0.7095617754477044, | |
| "learning_rate": 5.983750823182574e-07, | |
| "loss": 0.0604, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.7872763419483101, | |
| "grad_norm": 0.6270813452574567, | |
| "learning_rate": 5.922898295004842e-07, | |
| "loss": 0.0569, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.7884123828457824, | |
| "grad_norm": 0.7683343290015114, | |
| "learning_rate": 5.86231516774709e-07, | |
| "loss": 0.0641, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.7895484237432547, | |
| "grad_norm": 0.6205544191304064, | |
| "learning_rate": 5.802002296945475e-07, | |
| "loss": 0.0521, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.7906844646407271, | |
| "grad_norm": 0.6855169824530946, | |
| "learning_rate": 5.741960534319677e-07, | |
| "loss": 0.0541, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.7918205055381994, | |
| "grad_norm": 0.677963115259813, | |
| "learning_rate": 5.682190727760864e-07, | |
| "loss": 0.061, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.7929565464356717, | |
| "grad_norm": 0.7157962748647665, | |
| "learning_rate": 5.622693721319728e-07, | |
| "loss": 0.0602, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.794092587333144, | |
| "grad_norm": 0.7021846226730226, | |
| "learning_rate": 5.563470355194564e-07, | |
| "loss": 0.0689, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.7952286282306164, | |
| "grad_norm": 0.6730377226852833, | |
| "learning_rate": 5.504521465719392e-07, | |
| "loss": 0.0624, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7963646691280886, | |
| "grad_norm": 0.7117566338537665, | |
| "learning_rate": 5.445847885352171e-07, | |
| "loss": 0.0544, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.7975007100255609, | |
| "grad_norm": 0.6514495814244172, | |
| "learning_rate": 5.387450442663026e-07, | |
| "loss": 0.0547, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.7986367509230332, | |
| "grad_norm": 0.7041482161296513, | |
| "learning_rate": 5.329329962322554e-07, | |
| "loss": 0.0637, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.7997727918205055, | |
| "grad_norm": 0.662553486771611, | |
| "learning_rate": 5.271487265090163e-07, | |
| "loss": 0.0617, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.8009088327179779, | |
| "grad_norm": 0.771571552061494, | |
| "learning_rate": 5.213923167802506e-07, | |
| "loss": 0.068, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.8020448736154502, | |
| "grad_norm": 0.7413005333218508, | |
| "learning_rate": 5.156638483361933e-07, | |
| "loss": 0.0592, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.8031809145129225, | |
| "grad_norm": 0.7075709987524001, | |
| "learning_rate": 5.099634020725012e-07, | |
| "loss": 0.0559, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.8043169554103948, | |
| "grad_norm": 0.7275257681800995, | |
| "learning_rate": 5.0429105848911e-07, | |
| "loss": 0.0696, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.805452996307867, | |
| "grad_norm": 0.6828000250150597, | |
| "learning_rate": 4.986468976890993e-07, | |
| "loss": 0.0659, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.8065890372053394, | |
| "grad_norm": 0.7757907559807298, | |
| "learning_rate": 4.930309993775578e-07, | |
| "loss": 0.064, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.8077250781028117, | |
| "grad_norm": 0.6173272258086845, | |
| "learning_rate": 4.874434428604625e-07, | |
| "loss": 0.0437, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.808861119000284, | |
| "grad_norm": 0.7177886268777128, | |
| "learning_rate": 4.818843070435561e-07, | |
| "loss": 0.0577, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.8099971598977563, | |
| "grad_norm": 0.68285187824597, | |
| "learning_rate": 4.763536704312305e-07, | |
| "loss": 0.0567, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.8111332007952287, | |
| "grad_norm": 0.771227288945189, | |
| "learning_rate": 4.708516111254238e-07, | |
| "loss": 0.071, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.812269241692701, | |
| "grad_norm": 0.7283356464831592, | |
| "learning_rate": 4.6537820682451273e-07, | |
| "loss": 0.0641, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.8134052825901732, | |
| "grad_norm": 0.7110559383473583, | |
| "learning_rate": 4.5993353482221697e-07, | |
| "loss": 0.0663, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.8145413234876455, | |
| "grad_norm": 0.6511668989401792, | |
| "learning_rate": 4.545176720065078e-07, | |
| "loss": 0.0574, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.8156773643851178, | |
| "grad_norm": 0.7393080837168933, | |
| "learning_rate": 4.4913069485852197e-07, | |
| "loss": 0.0604, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.8168134052825902, | |
| "grad_norm": 0.7436931187552859, | |
| "learning_rate": 4.437726794514824e-07, | |
| "loss": 0.0562, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.8179494461800625, | |
| "grad_norm": 0.7077518355152435, | |
| "learning_rate": 4.3844370144962153e-07, | |
| "loss": 0.0595, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.8190854870775348, | |
| "grad_norm": 0.6832077000007746, | |
| "learning_rate": 4.3314383610711633e-07, | |
| "loss": 0.0688, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.8202215279750071, | |
| "grad_norm": 0.7105084721185602, | |
| "learning_rate": 4.2787315826702396e-07, | |
| "loss": 0.0656, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.8213575688724795, | |
| "grad_norm": 0.6614284120793984, | |
| "learning_rate": 4.2263174236022245e-07, | |
| "loss": 0.0589, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.8224936097699517, | |
| "grad_norm": 0.6765730172646925, | |
| "learning_rate": 4.1741966240436446e-07, | |
| "loss": 0.0586, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.823629650667424, | |
| "grad_norm": 0.7338387674973738, | |
| "learning_rate": 4.122369920028277e-07, | |
| "loss": 0.0659, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.8247656915648963, | |
| "grad_norm": 0.7223755947250412, | |
| "learning_rate": 4.070838043436787e-07, | |
| "loss": 0.0608, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.8259017324623686, | |
| "grad_norm": 0.678695491674745, | |
| "learning_rate": 4.019601721986363e-07, | |
| "loss": 0.0667, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.827037773359841, | |
| "grad_norm": 0.6696005070496583, | |
| "learning_rate": 3.9686616792204677e-07, | |
| "loss": 0.0584, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.8281738142573133, | |
| "grad_norm": 0.6789148636755675, | |
| "learning_rate": 3.9180186344986103e-07, | |
| "loss": 0.0604, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.8293098551547856, | |
| "grad_norm": 0.7335609599059305, | |
| "learning_rate": 3.867673302986161e-07, | |
| "loss": 0.0585, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.8304458960522578, | |
| "grad_norm": 0.739136862580604, | |
| "learning_rate": 3.8176263956443056e-07, | |
| "loss": 0.0623, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.8315819369497301, | |
| "grad_norm": 0.7130221754855517, | |
| "learning_rate": 3.7678786192199695e-07, | |
| "loss": 0.059, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.8327179778472025, | |
| "grad_norm": 0.7513521882454077, | |
| "learning_rate": 3.7184306762358235e-07, | |
| "loss": 0.0595, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.8338540187446748, | |
| "grad_norm": 0.716760774039175, | |
| "learning_rate": 3.6692832649804085e-07, | |
| "loss": 0.0586, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.8349900596421471, | |
| "grad_norm": 0.6871206927436988, | |
| "learning_rate": 3.6204370794982376e-07, | |
| "loss": 0.0674, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.8361261005396194, | |
| "grad_norm": 0.692975693286346, | |
| "learning_rate": 3.571892809580013e-07, | |
| "loss": 0.0537, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.8372621414370918, | |
| "grad_norm": 0.680048611874437, | |
| "learning_rate": 3.5236511407528676e-07, | |
| "loss": 0.0607, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.8383981823345641, | |
| "grad_norm": 0.7149557473910435, | |
| "learning_rate": 3.475712754270716e-07, | |
| "loss": 0.0581, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.8395342232320363, | |
| "grad_norm": 0.6836636678136013, | |
| "learning_rate": 3.4280783271045863e-07, | |
| "loss": 0.0563, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.8406702641295086, | |
| "grad_norm": 0.6678612129832644, | |
| "learning_rate": 3.3807485319331037e-07, | |
| "loss": 0.0637, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.841806305026981, | |
| "grad_norm": 0.7068459150295527, | |
| "learning_rate": 3.333724037132977e-07, | |
| "loss": 0.061, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.8429423459244533, | |
| "grad_norm": 0.6444850358901095, | |
| "learning_rate": 3.2870055067695557e-07, | |
| "loss": 0.0479, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.8440783868219256, | |
| "grad_norm": 0.6551043511985852, | |
| "learning_rate": 3.240593600587444e-07, | |
| "loss": 0.0459, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.8452144277193979, | |
| "grad_norm": 0.7533706586347682, | |
| "learning_rate": 3.194488974001203e-07, | |
| "loss": 0.0727, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.8463504686168702, | |
| "grad_norm": 0.7193017131365196, | |
| "learning_rate": 3.148692278086088e-07, | |
| "loss": 0.0631, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.8474865095143426, | |
| "grad_norm": 0.6482741683877096, | |
| "learning_rate": 3.1032041595688514e-07, | |
| "loss": 0.0475, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.8486225504118148, | |
| "grad_norm": 0.6625125560890016, | |
| "learning_rate": 3.058025260818609e-07, | |
| "loss": 0.0625, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.8497585913092871, | |
| "grad_norm": 0.6797786911558783, | |
| "learning_rate": 3.0131562198377763e-07, | |
| "loss": 0.061, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.8508946322067594, | |
| "grad_norm": 0.7285783384018145, | |
| "learning_rate": 2.96859767025304e-07, | |
| "loss": 0.0652, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.8520306731042318, | |
| "grad_norm": 0.6758942124222337, | |
| "learning_rate": 2.9243502413064365e-07, | |
| "loss": 0.0601, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.8531667140017041, | |
| "grad_norm": 0.6569659983478026, | |
| "learning_rate": 2.8804145578464533e-07, | |
| "loss": 0.0549, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.8543027548991764, | |
| "grad_norm": 0.6561252291916106, | |
| "learning_rate": 2.8367912403191976e-07, | |
| "loss": 0.06, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.8554387957966487, | |
| "grad_norm": 0.6900748230193315, | |
| "learning_rate": 2.7934809047596436e-07, | |
| "loss": 0.0533, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.8565748366941209, | |
| "grad_norm": 0.6719565408280884, | |
| "learning_rate": 2.7504841627829293e-07, | |
| "loss": 0.0614, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.8577108775915933, | |
| "grad_norm": 0.6697878517856197, | |
| "learning_rate": 2.7078016215757343e-07, | |
| "loss": 0.0592, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.8588469184890656, | |
| "grad_norm": 0.6903304793443591, | |
| "learning_rate": 2.6654338838876664e-07, | |
| "loss": 0.0586, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.8599829593865379, | |
| "grad_norm": 0.6357831598645269, | |
| "learning_rate": 2.623381548022802e-07, | |
| "loss": 0.0542, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.8611190002840102, | |
| "grad_norm": 0.7011758996549884, | |
| "learning_rate": 2.581645207831204e-07, | |
| "loss": 0.0595, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.8622550411814826, | |
| "grad_norm": 0.7179364439414111, | |
| "learning_rate": 2.5402254527005286e-07, | |
| "loss": 0.0634, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.8633910820789549, | |
| "grad_norm": 0.7201197976310817, | |
| "learning_rate": 2.4991228675477293e-07, | |
| "loss": 0.0629, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.8645271229764272, | |
| "grad_norm": 0.702925778220309, | |
| "learning_rate": 2.458338032810781e-07, | |
| "loss": 0.0621, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.8656631638738994, | |
| "grad_norm": 0.6556798289743799, | |
| "learning_rate": 2.4178715244404796e-07, | |
| "loss": 0.0614, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.8667992047713717, | |
| "grad_norm": 0.6606948072301375, | |
| "learning_rate": 2.3777239138923214e-07, | |
| "loss": 0.057, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.8679352456688441, | |
| "grad_norm": 0.6761035461766024, | |
| "learning_rate": 2.3378957681184283e-07, | |
| "loss": 0.0523, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.8690712865663164, | |
| "grad_norm": 0.644129680660782, | |
| "learning_rate": 2.298387649559533e-07, | |
| "loss": 0.0553, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.8702073274637887, | |
| "grad_norm": 0.6581351644159102, | |
| "learning_rate": 2.2592001161370392e-07, | |
| "loss": 0.0544, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.871343368361261, | |
| "grad_norm": 0.6811793665508726, | |
| "learning_rate": 2.2203337212451632e-07, | |
| "loss": 0.0567, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.8724794092587334, | |
| "grad_norm": 0.6377187494528523, | |
| "learning_rate": 2.1817890137430936e-07, | |
| "loss": 0.0503, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.8736154501562057, | |
| "grad_norm": 0.6913242736410589, | |
| "learning_rate": 2.1435665379472393e-07, | |
| "loss": 0.0526, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.8747514910536779, | |
| "grad_norm": 0.7004250425383008, | |
| "learning_rate": 2.1056668336235624e-07, | |
| "loss": 0.0545, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.8758875319511502, | |
| "grad_norm": 0.7005696669465531, | |
| "learning_rate": 2.0680904359799582e-07, | |
| "loss": 0.0592, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.8770235728486225, | |
| "grad_norm": 0.6799341594364471, | |
| "learning_rate": 2.0308378756586562e-07, | |
| "loss": 0.0577, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.8781596137460949, | |
| "grad_norm": 0.664142756885439, | |
| "learning_rate": 1.9939096787287783e-07, | |
| "loss": 0.0539, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.8792956546435672, | |
| "grad_norm": 0.6412395921611368, | |
| "learning_rate": 1.9573063666788878e-07, | |
| "loss": 0.0559, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.8804316955410395, | |
| "grad_norm": 0.6613285197923775, | |
| "learning_rate": 1.9210284564096042e-07, | |
| "loss": 0.055, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.8815677364385118, | |
| "grad_norm": 0.6140474369508341, | |
| "learning_rate": 1.8850764602263428e-07, | |
| "loss": 0.0531, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.882703777335984, | |
| "grad_norm": 0.6692649568862223, | |
| "learning_rate": 1.8494508858320603e-07, | |
| "loss": 0.0571, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.8838398182334564, | |
| "grad_norm": 0.6425303533870481, | |
| "learning_rate": 1.8141522363200797e-07, | |
| "loss": 0.0561, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.8849758591309287, | |
| "grad_norm": 0.6768491095903121, | |
| "learning_rate": 1.7791810101669887e-07, | |
| "loss": 0.0648, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.886111900028401, | |
| "grad_norm": 0.6641297355215765, | |
| "learning_rate": 1.7445377012256127e-07, | |
| "loss": 0.0525, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8872479409258733, | |
| "grad_norm": 0.6968334370818641, | |
| "learning_rate": 1.710222798718028e-07, | |
| "loss": 0.056, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.8883839818233457, | |
| "grad_norm": 0.6500257846017748, | |
| "learning_rate": 1.676236787228652e-07, | |
| "loss": 0.0599, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.889520022720818, | |
| "grad_norm": 0.6996758140756307, | |
| "learning_rate": 1.6425801466974118e-07, | |
| "loss": 0.064, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.8906560636182903, | |
| "grad_norm": 0.6965589508578391, | |
| "learning_rate": 1.6092533524129623e-07, | |
| "loss": 0.0611, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.8917921045157625, | |
| "grad_norm": 0.7093030606255738, | |
| "learning_rate": 1.5762568750059604e-07, | |
| "loss": 0.0614, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.8929281454132348, | |
| "grad_norm": 0.6563496411557368, | |
| "learning_rate": 1.543591180442436e-07, | |
| "loss": 0.0549, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.8940641863107072, | |
| "grad_norm": 0.6667029761797618, | |
| "learning_rate": 1.5112567300172186e-07, | |
| "loss": 0.0589, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.8952002272081795, | |
| "grad_norm": 0.6828229184240027, | |
| "learning_rate": 1.4792539803473921e-07, | |
| "loss": 0.0557, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.8963362681056518, | |
| "grad_norm": 0.650678331393596, | |
| "learning_rate": 1.447583383365872e-07, | |
| "loss": 0.056, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.8974723090031241, | |
| "grad_norm": 0.7815935941115113, | |
| "learning_rate": 1.4162453863150183e-07, | |
| "loss": 0.0578, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8986083499005965, | |
| "grad_norm": 0.6868768630237868, | |
| "learning_rate": 1.38524043174032e-07, | |
| "loss": 0.0552, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.8997443907980688, | |
| "grad_norm": 0.6840956771248708, | |
| "learning_rate": 1.3545689574841341e-07, | |
| "loss": 0.065, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.900880431695541, | |
| "grad_norm": 0.6760255459454898, | |
| "learning_rate": 1.3242313966795207e-07, | |
| "loss": 0.0565, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.9020164725930133, | |
| "grad_norm": 0.7016923239975965, | |
| "learning_rate": 1.2942281777441168e-07, | |
| "loss": 0.0582, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.9031525134904856, | |
| "grad_norm": 0.7053617949532103, | |
| "learning_rate": 1.2645597243740788e-07, | |
| "loss": 0.0612, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.904288554387958, | |
| "grad_norm": 0.6802180277198182, | |
| "learning_rate": 1.2352264555381134e-07, | |
| "loss": 0.0613, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.9054245952854303, | |
| "grad_norm": 0.6967777911681594, | |
| "learning_rate": 1.2062287854715638e-07, | |
| "loss": 0.0538, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.9065606361829026, | |
| "grad_norm": 0.6496482286765165, | |
| "learning_rate": 1.1775671236705366e-07, | |
| "loss": 0.0529, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.9076966770803749, | |
| "grad_norm": 0.6523563127293543, | |
| "learning_rate": 1.1492418748861422e-07, | |
| "loss": 0.0579, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.9088327179778471, | |
| "grad_norm": 0.6666269373861077, | |
| "learning_rate": 1.121253439118769e-07, | |
| "loss": 0.0549, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9099687588753195, | |
| "grad_norm": 0.6219590734666158, | |
| "learning_rate": 1.0936022116124323e-07, | |
| "loss": 0.0577, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.9111047997727918, | |
| "grad_norm": 0.6646183084981874, | |
| "learning_rate": 1.0662885828492037e-07, | |
| "loss": 0.0507, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.9122408406702641, | |
| "grad_norm": 0.667226418096009, | |
| "learning_rate": 1.0393129385436824e-07, | |
| "loss": 0.0554, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.9133768815677364, | |
| "grad_norm": 0.6177821185317497, | |
| "learning_rate": 1.0126756596375687e-07, | |
| "loss": 0.0594, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.9145129224652088, | |
| "grad_norm": 0.6988859872273063, | |
| "learning_rate": 9.86377122294252e-08, | |
| "loss": 0.0563, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.9156489633626811, | |
| "grad_norm": 0.638466441774376, | |
| "learning_rate": 9.604176978935342e-08, | |
| "loss": 0.0516, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.9167850042601534, | |
| "grad_norm": 0.6355833397748155, | |
| "learning_rate": 9.347977530263646e-08, | |
| "loss": 0.0518, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.9179210451576256, | |
| "grad_norm": 0.6615633189629551, | |
| "learning_rate": 9.095176494896662e-08, | |
| "loss": 0.0551, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.919057086055098, | |
| "grad_norm": 0.6539242681970885, | |
| "learning_rate": 8.845777442812314e-08, | |
| "loss": 0.0558, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.9201931269525703, | |
| "grad_norm": 0.646172270844248, | |
| "learning_rate": 8.599783895946762e-08, | |
| "loss": 0.054, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.9213291678500426, | |
| "grad_norm": 0.6785332746212462, | |
| "learning_rate": 8.357199328144577e-08, | |
| "loss": 0.0599, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.9224652087475149, | |
| "grad_norm": 0.6495019596955803, | |
| "learning_rate": 8.118027165109926e-08, | |
| "loss": 0.0539, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.9236012496449872, | |
| "grad_norm": 0.6522218309663713, | |
| "learning_rate": 7.88227078435802e-08, | |
| "loss": 0.0545, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.9247372905424596, | |
| "grad_norm": 0.7053726256275266, | |
| "learning_rate": 7.649933515167407e-08, | |
| "loss": 0.0593, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.9258733314399319, | |
| "grad_norm": 0.6581258921923674, | |
| "learning_rate": 7.421018638533006e-08, | |
| "loss": 0.0554, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.9270093723374041, | |
| "grad_norm": 0.7110467493048188, | |
| "learning_rate": 7.195529387119815e-08, | |
| "loss": 0.0629, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.9281454132348764, | |
| "grad_norm": 0.61576971414751, | |
| "learning_rate": 6.973468945217138e-08, | |
| "loss": 0.0535, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.9292814541323487, | |
| "grad_norm": 0.6228873518276579, | |
| "learning_rate": 6.75484044869379e-08, | |
| "loss": 0.0529, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.9304174950298211, | |
| "grad_norm": 0.6884984566218579, | |
| "learning_rate": 6.539646984953629e-08, | |
| "loss": 0.0599, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.9315535359272934, | |
| "grad_norm": 0.6351812720863683, | |
| "learning_rate": 6.327891592892126e-08, | |
| "loss": 0.0532, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.9326895768247657, | |
| "grad_norm": 0.6412648558002986, | |
| "learning_rate": 6.119577262853255e-08, | |
| "loss": 0.0497, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.933825617722238, | |
| "grad_norm": 0.6250020595841635, | |
| "learning_rate": 5.914706936587494e-08, | |
| "loss": 0.0539, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.9349616586197103, | |
| "grad_norm": 0.7251758286477061, | |
| "learning_rate": 5.7132835072101486e-08, | |
| "loss": 0.0588, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.9360976995171826, | |
| "grad_norm": 0.6450110691416201, | |
| "learning_rate": 5.515309819160402e-08, | |
| "loss": 0.0563, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.9372337404146549, | |
| "grad_norm": 0.6976176454161298, | |
| "learning_rate": 5.3207886681613804e-08, | |
| "loss": 0.0568, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.9383697813121272, | |
| "grad_norm": 0.6806662590375777, | |
| "learning_rate": 5.129722801180542e-08, | |
| "loss": 0.0522, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.9395058222095996, | |
| "grad_norm": 0.6627861317533252, | |
| "learning_rate": 4.942114916390822e-08, | |
| "loss": 0.0569, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.9406418631070719, | |
| "grad_norm": 0.6921036678056119, | |
| "learning_rate": 4.75796766313269e-08, | |
| "loss": 0.0541, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.9417779040045442, | |
| "grad_norm": 0.685394799824403, | |
| "learning_rate": 4.5772836418765674e-08, | |
| "loss": 0.0585, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.9429139449020165, | |
| "grad_norm": 0.6764406432665999, | |
| "learning_rate": 4.4000654041862764e-08, | |
| "loss": 0.0608, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.9440499857994887, | |
| "grad_norm": 0.6631270624595927, | |
| "learning_rate": 4.2263154526828164e-08, | |
| "loss": 0.0574, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.9451860266969611, | |
| "grad_norm": 0.7131017565421759, | |
| "learning_rate": 4.05603624100917e-08, | |
| "loss": 0.0617, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.9463220675944334, | |
| "grad_norm": 0.641827746966638, | |
| "learning_rate": 3.889230173795639e-08, | |
| "loss": 0.0598, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.9474581084919057, | |
| "grad_norm": 0.6845999909211324, | |
| "learning_rate": 3.72589960662581e-08, | |
| "loss": 0.0624, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.948594149389378, | |
| "grad_norm": 0.6808398137416478, | |
| "learning_rate": 3.56604684600334e-08, | |
| "loss": 0.0597, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.9497301902868504, | |
| "grad_norm": 0.6766075948113582, | |
| "learning_rate": 3.4096741493194196e-08, | |
| "loss": 0.0664, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.9508662311843227, | |
| "grad_norm": 0.6608092476063419, | |
| "learning_rate": 3.2567837248208e-08, | |
| "loss": 0.0552, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.952002272081795, | |
| "grad_norm": 0.6801239599671042, | |
| "learning_rate": 3.107377731578709e-08, | |
| "loss": 0.0555, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.9531383129792672, | |
| "grad_norm": 0.6921131575508155, | |
| "learning_rate": 2.9614582794582904e-08, | |
| "loss": 0.062, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.9542743538767395, | |
| "grad_norm": 0.6832717944066543, | |
| "learning_rate": 2.819027429088822e-08, | |
| "loss": 0.0664, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.9554103947742119, | |
| "grad_norm": 0.7103866012859991, | |
| "learning_rate": 2.680087191834685e-08, | |
| "loss": 0.0607, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.9565464356716842, | |
| "grad_norm": 0.7120565073244881, | |
| "learning_rate": 2.544639529766829e-08, | |
| "loss": 0.0532, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.9576824765691565, | |
| "grad_norm": 0.6746499491772139, | |
| "learning_rate": 2.4126863556351854e-08, | |
| "loss": 0.0628, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.9588185174666288, | |
| "grad_norm": 0.6744896412450383, | |
| "learning_rate": 2.284229532841603e-08, | |
| "loss": 0.0597, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.9599545583641012, | |
| "grad_norm": 0.6566929119765946, | |
| "learning_rate": 2.1592708754135105e-08, | |
| "loss": 0.0568, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.9610905992615734, | |
| "grad_norm": 0.6458104835743707, | |
| "learning_rate": 2.0378121479783798e-08, | |
| "loss": 0.0557, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.9622266401590457, | |
| "grad_norm": 0.6716209159702464, | |
| "learning_rate": 1.919855065738746e-08, | |
| "loss": 0.0562, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.963362681056518, | |
| "grad_norm": 0.6679933147801366, | |
| "learning_rate": 1.8054012944479225e-08, | |
| "loss": 0.0554, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.9644987219539903, | |
| "grad_norm": 0.7113182719515749, | |
| "learning_rate": 1.6944524503866854e-08, | |
| "loss": 0.0564, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.9656347628514627, | |
| "grad_norm": 0.6608607329275047, | |
| "learning_rate": 1.5870101003402083e-08, | |
| "loss": 0.0548, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.966770803748935, | |
| "grad_norm": 0.6622351677702579, | |
| "learning_rate": 1.483075761576025e-08, | |
| "loss": 0.0526, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.9679068446464073, | |
| "grad_norm": 0.6927125282488752, | |
| "learning_rate": 1.382650901822713e-08, | |
| "loss": 0.0607, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.9690428855438796, | |
| "grad_norm": 0.7162152988014072, | |
| "learning_rate": 1.2857369392490493e-08, | |
| "loss": 0.0589, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.9701789264413518, | |
| "grad_norm": 0.7898973362889951, | |
| "learning_rate": 1.1923352424439149e-08, | |
| "loss": 0.0646, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.9713149673388242, | |
| "grad_norm": 0.6800027138060577, | |
| "learning_rate": 1.1024471303971995e-08, | |
| "loss": 0.0562, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.9724510082362965, | |
| "grad_norm": 0.7092009667813072, | |
| "learning_rate": 1.0160738724809549e-08, | |
| "loss": 0.0542, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.9735870491337688, | |
| "grad_norm": 0.6988482035788468, | |
| "learning_rate": 9.332166884315763e-09, | |
| "loss": 0.0664, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.9747230900312411, | |
| "grad_norm": 0.6887000678028918, | |
| "learning_rate": 8.538767483325384e-09, | |
| "loss": 0.0638, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.9758591309287135, | |
| "grad_norm": 0.6409429108166975, | |
| "learning_rate": 7.78055172597908e-09, | |
| "loss": 0.057, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.9769951718261858, | |
| "grad_norm": 0.6719107919151849, | |
| "learning_rate": 7.05753031956441e-09, | |
| "loss": 0.0557, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.9781312127236581, | |
| "grad_norm": 0.7066671846222795, | |
| "learning_rate": 6.369713474366213e-09, | |
| "loss": 0.0557, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.9792672536211303, | |
| "grad_norm": 0.6893439981594293, | |
| "learning_rate": 5.717110903520617e-09, | |
| "loss": 0.0582, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.9804032945186026, | |
| "grad_norm": 0.7127707639288884, | |
| "learning_rate": 5.09973182287904e-09, | |
| "loss": 0.0598, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.981539335416075, | |
| "grad_norm": 0.6688168935131097, | |
| "learning_rate": 4.517584950877451e-09, | |
| "loss": 0.0628, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.9826753763135473, | |
| "grad_norm": 0.6588910866128358, | |
| "learning_rate": 3.970678508413983e-09, | |
| "loss": 0.0579, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.9838114172110196, | |
| "grad_norm": 0.6434730119889486, | |
| "learning_rate": 3.4590202187315124e-09, | |
| "loss": 0.0524, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.9849474581084919, | |
| "grad_norm": 0.6675520230860594, | |
| "learning_rate": 2.982617307310254e-09, | |
| "loss": 0.0526, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.9860834990059643, | |
| "grad_norm": 0.7011452717713275, | |
| "learning_rate": 2.5414765017642285e-09, | |
| "loss": 0.0582, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.9872195399034365, | |
| "grad_norm": 0.6315815934268008, | |
| "learning_rate": 2.1356040317474512e-09, | |
| "loss": 0.0523, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.9883555808009088, | |
| "grad_norm": 0.6397196083044729, | |
| "learning_rate": 1.765005628865113e-09, | |
| "loss": 0.0534, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9894916216983811, | |
| "grad_norm": 0.6499579240718064, | |
| "learning_rate": 1.4296865265930882e-09, | |
| "loss": 0.0488, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.9906276625958534, | |
| "grad_norm": 0.7131022504272738, | |
| "learning_rate": 1.1296514602038289e-09, | |
| "loss": 0.0604, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.9917637034933258, | |
| "grad_norm": 0.6575236309958269, | |
| "learning_rate": 8.649046666994732e-10, | |
| "loss": 0.0514, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.9928997443907981, | |
| "grad_norm": 0.6876333215462411, | |
| "learning_rate": 6.354498847521706e-10, | |
| "loss": 0.0609, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.9940357852882704, | |
| "grad_norm": 0.656021262952682, | |
| "learning_rate": 4.412903546516245e-10, | |
| "loss": 0.0535, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.9951718261857427, | |
| "grad_norm": 0.6554446679692325, | |
| "learning_rate": 2.8242881825846225e-10, | |
| "loss": 0.0533, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.996307867083215, | |
| "grad_norm": 0.6892726133408795, | |
| "learning_rate": 1.5886751896565521e-10, | |
| "loss": 0.0573, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.9974439079806873, | |
| "grad_norm": 0.624707525404124, | |
| "learning_rate": 7.060820166826521e-11, | |
| "loss": 0.0489, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.9985799488781596, | |
| "grad_norm": 0.685439156837803, | |
| "learning_rate": 1.7652112736521455e-11, | |
| "loss": 0.0603, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.9997159897756319, | |
| "grad_norm": 0.6532677360538005, | |
| "learning_rate": 0.0, | |
| "loss": 0.0627, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9997159897756319, | |
| "step": 880, | |
| "total_flos": 104387643310080.0, | |
| "train_loss": 0.07837209747257558, | |
| "train_runtime": 3781.2331, | |
| "train_samples_per_second": 14.898, | |
| "train_steps_per_second": 0.233 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 880, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 104387643310080.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |