| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 10246, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0019521717911176184, | |
| "grad_norm": 3.0788934230804443, | |
| "learning_rate": 8.78048780487805e-08, | |
| "loss": 0.4941, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.003904343582235237, | |
| "grad_norm": 2.4788622856140137, | |
| "learning_rate": 1.8536585365853658e-07, | |
| "loss": 0.5202, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.005856515373352855, | |
| "grad_norm": 2.7830698490142822, | |
| "learning_rate": 2.829268292682927e-07, | |
| "loss": 0.4917, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.007808687164470474, | |
| "grad_norm": 2.2472496032714844, | |
| "learning_rate": 3.804878048780488e-07, | |
| "loss": 0.4798, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.009760858955588092, | |
| "grad_norm": 2.4603986740112305, | |
| "learning_rate": 4.780487804878049e-07, | |
| "loss": 0.507, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01171303074670571, | |
| "grad_norm": 2.1819260120391846, | |
| "learning_rate": 5.75609756097561e-07, | |
| "loss": 0.4741, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.013665202537823329, | |
| "grad_norm": 2.283219814300537, | |
| "learning_rate": 6.731707317073172e-07, | |
| "loss": 0.491, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.015617374328940947, | |
| "grad_norm": 2.379164695739746, | |
| "learning_rate": 7.707317073170732e-07, | |
| "loss": 0.4963, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.017569546120058566, | |
| "grad_norm": 1.8835978507995605, | |
| "learning_rate": 8.682926829268293e-07, | |
| "loss": 0.4649, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.019521717911176184, | |
| "grad_norm": 2.128862142562866, | |
| "learning_rate": 9.658536585365854e-07, | |
| "loss": 0.4627, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.021473889702293802, | |
| "grad_norm": 2.55245304107666, | |
| "learning_rate": 1.0634146341463415e-06, | |
| "loss": 0.4943, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.02342606149341142, | |
| "grad_norm": 2.192894697189331, | |
| "learning_rate": 1.1609756097560977e-06, | |
| "loss": 0.477, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.02537823328452904, | |
| "grad_norm": 2.0305230617523193, | |
| "learning_rate": 1.2585365853658538e-06, | |
| "loss": 0.4636, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.027330405075646658, | |
| "grad_norm": 2.010695695877075, | |
| "learning_rate": 1.3560975609756099e-06, | |
| "loss": 0.4873, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.029282576866764276, | |
| "grad_norm": 1.8330553770065308, | |
| "learning_rate": 1.453658536585366e-06, | |
| "loss": 0.4764, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.031234748657881894, | |
| "grad_norm": 1.8834083080291748, | |
| "learning_rate": 1.551219512195122e-06, | |
| "loss": 0.4945, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.03318692044899951, | |
| "grad_norm": 1.932608962059021, | |
| "learning_rate": 1.6487804878048783e-06, | |
| "loss": 0.4486, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.03513909224011713, | |
| "grad_norm": 2.3214528560638428, | |
| "learning_rate": 1.7463414634146341e-06, | |
| "loss": 0.4464, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.03709126403123475, | |
| "grad_norm": 3.1889891624450684, | |
| "learning_rate": 1.8439024390243904e-06, | |
| "loss": 0.4858, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.03904343582235237, | |
| "grad_norm": 2.5776352882385254, | |
| "learning_rate": 1.9414634146341465e-06, | |
| "loss": 0.4713, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.040995607613469986, | |
| "grad_norm": 2.4418435096740723, | |
| "learning_rate": 2.0390243902439023e-06, | |
| "loss": 0.4838, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.042947779404587605, | |
| "grad_norm": 2.241332530975342, | |
| "learning_rate": 2.1365853658536586e-06, | |
| "loss": 0.429, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.04489995119570522, | |
| "grad_norm": 1.7465415000915527, | |
| "learning_rate": 2.234146341463415e-06, | |
| "loss": 0.4743, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.04685212298682284, | |
| "grad_norm": 2.3134076595306396, | |
| "learning_rate": 2.331707317073171e-06, | |
| "loss": 0.4751, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.04880429477794046, | |
| "grad_norm": 2.256594657897949, | |
| "learning_rate": 2.429268292682927e-06, | |
| "loss": 0.4773, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.05075646656905808, | |
| "grad_norm": 2.873642921447754, | |
| "learning_rate": 2.5268292682926833e-06, | |
| "loss": 0.4484, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.0527086383601757, | |
| "grad_norm": 2.403874635696411, | |
| "learning_rate": 2.624390243902439e-06, | |
| "loss": 0.4338, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.054660810151293315, | |
| "grad_norm": 2.0818169116973877, | |
| "learning_rate": 2.7219512195121954e-06, | |
| "loss": 0.4569, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.056612981942410934, | |
| "grad_norm": 1.8225184679031372, | |
| "learning_rate": 2.8195121951219513e-06, | |
| "loss": 0.4647, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.05856515373352855, | |
| "grad_norm": 1.9539415836334229, | |
| "learning_rate": 2.9170731707317076e-06, | |
| "loss": 0.4713, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.06051732552464617, | |
| "grad_norm": 2.6394882202148438, | |
| "learning_rate": 3.0146341463414634e-06, | |
| "loss": 0.4314, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.06246949731576379, | |
| "grad_norm": 2.3676302433013916, | |
| "learning_rate": 3.1121951219512197e-06, | |
| "loss": 0.4849, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.06442166910688141, | |
| "grad_norm": 2.2868118286132812, | |
| "learning_rate": 3.209756097560976e-06, | |
| "loss": 0.4461, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.06637384089799903, | |
| "grad_norm": 2.1315979957580566, | |
| "learning_rate": 3.3073170731707323e-06, | |
| "loss": 0.4411, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.06832601268911664, | |
| "grad_norm": 2.1080586910247803, | |
| "learning_rate": 3.404878048780488e-06, | |
| "loss": 0.4752, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.07027818448023426, | |
| "grad_norm": 2.3179149627685547, | |
| "learning_rate": 3.502439024390244e-06, | |
| "loss": 0.4904, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.07223035627135188, | |
| "grad_norm": 2.042257308959961, | |
| "learning_rate": 3.6000000000000003e-06, | |
| "loss": 0.4776, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.0741825280624695, | |
| "grad_norm": 2.1745452880859375, | |
| "learning_rate": 3.6975609756097565e-06, | |
| "loss": 0.4796, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.07613469985358712, | |
| "grad_norm": 2.0925772190093994, | |
| "learning_rate": 3.7951219512195124e-06, | |
| "loss": 0.4414, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.07808687164470474, | |
| "grad_norm": 1.9548064470291138, | |
| "learning_rate": 3.892682926829269e-06, | |
| "loss": 0.4618, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.08003904343582235, | |
| "grad_norm": 2.2613227367401123, | |
| "learning_rate": 3.9902439024390245e-06, | |
| "loss": 0.4561, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.08199121522693997, | |
| "grad_norm": 2.3353497982025146, | |
| "learning_rate": 4.087804878048781e-06, | |
| "loss": 0.4661, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.08394338701805759, | |
| "grad_norm": 2.0971009731292725, | |
| "learning_rate": 4.185365853658537e-06, | |
| "loss": 0.4257, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.08589555880917521, | |
| "grad_norm": 2.191269636154175, | |
| "learning_rate": 4.282926829268293e-06, | |
| "loss": 0.4584, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.08784773060029283, | |
| "grad_norm": 1.9917972087860107, | |
| "learning_rate": 4.380487804878049e-06, | |
| "loss": 0.4402, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.08979990239141045, | |
| "grad_norm": 2.4279751777648926, | |
| "learning_rate": 4.478048780487805e-06, | |
| "loss": 0.4521, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.09175207418252807, | |
| "grad_norm": 2.051614284515381, | |
| "learning_rate": 4.575609756097561e-06, | |
| "loss": 0.462, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.09370424597364568, | |
| "grad_norm": 1.9167184829711914, | |
| "learning_rate": 4.673170731707318e-06, | |
| "loss": 0.4484, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.0956564177647633, | |
| "grad_norm": 3.8172707557678223, | |
| "learning_rate": 4.770731707317073e-06, | |
| "loss": 0.4307, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.09760858955588092, | |
| "grad_norm": 1.8547595739364624, | |
| "learning_rate": 4.868292682926829e-06, | |
| "loss": 0.4537, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.09956076134699854, | |
| "grad_norm": 2.314936637878418, | |
| "learning_rate": 4.965853658536586e-06, | |
| "loss": 0.4425, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.10151293313811616, | |
| "grad_norm": 1.9514055252075195, | |
| "learning_rate": 5.063414634146342e-06, | |
| "loss": 0.4453, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.10346510492923378, | |
| "grad_norm": 1.7186030149459839, | |
| "learning_rate": 5.160975609756098e-06, | |
| "loss": 0.3934, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.1054172767203514, | |
| "grad_norm": 1.8911871910095215, | |
| "learning_rate": 5.2585365853658545e-06, | |
| "loss": 0.4454, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.10736944851146901, | |
| "grad_norm": 2.092548370361328, | |
| "learning_rate": 5.356097560975611e-06, | |
| "loss": 0.4937, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.10932162030258663, | |
| "grad_norm": 1.619399905204773, | |
| "learning_rate": 5.453658536585367e-06, | |
| "loss": 0.445, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.11127379209370425, | |
| "grad_norm": 2.086709976196289, | |
| "learning_rate": 5.551219512195123e-06, | |
| "loss": 0.4528, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.11322596388482187, | |
| "grad_norm": 2.0194432735443115, | |
| "learning_rate": 5.648780487804878e-06, | |
| "loss": 0.4503, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.11517813567593949, | |
| "grad_norm": 2.06130051612854, | |
| "learning_rate": 5.746341463414634e-06, | |
| "loss": 0.464, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.1171303074670571, | |
| "grad_norm": 1.8607715368270874, | |
| "learning_rate": 5.8439024390243904e-06, | |
| "loss": 0.4545, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.11908247925817472, | |
| "grad_norm": 1.8002784252166748, | |
| "learning_rate": 5.941463414634147e-06, | |
| "loss": 0.4211, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.12103465104929234, | |
| "grad_norm": 2.372277021408081, | |
| "learning_rate": 6.039024390243903e-06, | |
| "loss": 0.4757, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.12298682284040996, | |
| "grad_norm": 1.8813685178756714, | |
| "learning_rate": 6.136585365853659e-06, | |
| "loss": 0.4533, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.12493899463152758, | |
| "grad_norm": 1.8307466506958008, | |
| "learning_rate": 6.234146341463415e-06, | |
| "loss": 0.4727, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.1268911664226452, | |
| "grad_norm": 1.9979982376098633, | |
| "learning_rate": 6.331707317073171e-06, | |
| "loss": 0.4198, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.12884333821376281, | |
| "grad_norm": 2.121293544769287, | |
| "learning_rate": 6.429268292682927e-06, | |
| "loss": 0.4552, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.13079551000488043, | |
| "grad_norm": 1.8079158067703247, | |
| "learning_rate": 6.5268292682926836e-06, | |
| "loss": 0.4425, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.13274768179599805, | |
| "grad_norm": 1.8363804817199707, | |
| "learning_rate": 6.62439024390244e-06, | |
| "loss": 0.4299, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.13469985358711567, | |
| "grad_norm": 1.9010361433029175, | |
| "learning_rate": 6.721951219512196e-06, | |
| "loss": 0.4432, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.1366520253782333, | |
| "grad_norm": 2.0648021697998047, | |
| "learning_rate": 6.819512195121952e-06, | |
| "loss": 0.4498, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.1386041971693509, | |
| "grad_norm": 1.9787408113479614, | |
| "learning_rate": 6.917073170731707e-06, | |
| "loss": 0.4378, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.14055636896046853, | |
| "grad_norm": 1.7960318326950073, | |
| "learning_rate": 7.014634146341463e-06, | |
| "loss": 0.4273, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.14250854075158614, | |
| "grad_norm": 1.6109867095947266, | |
| "learning_rate": 7.1121951219512195e-06, | |
| "loss": 0.4351, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.14446071254270376, | |
| "grad_norm": 1.8068534135818481, | |
| "learning_rate": 7.209756097560976e-06, | |
| "loss": 0.4522, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.14641288433382138, | |
| "grad_norm": 2.0688509941101074, | |
| "learning_rate": 7.307317073170732e-06, | |
| "loss": 0.4367, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.148365056124939, | |
| "grad_norm": 1.9218088388442993, | |
| "learning_rate": 7.404878048780488e-06, | |
| "loss": 0.4406, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.15031722791605662, | |
| "grad_norm": 2.0230157375335693, | |
| "learning_rate": 7.502439024390245e-06, | |
| "loss": 0.4339, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.15226939970717424, | |
| "grad_norm": 1.8556506633758545, | |
| "learning_rate": 7.600000000000001e-06, | |
| "loss": 0.4219, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.15422157149829185, | |
| "grad_norm": 1.9535895586013794, | |
| "learning_rate": 7.697560975609756e-06, | |
| "loss": 0.4513, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.15617374328940947, | |
| "grad_norm": 1.6965866088867188, | |
| "learning_rate": 7.795121951219513e-06, | |
| "loss": 0.4355, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.1581259150805271, | |
| "grad_norm": 2.014047145843506, | |
| "learning_rate": 7.892682926829269e-06, | |
| "loss": 0.4535, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.1600780868716447, | |
| "grad_norm": 1.7660433053970337, | |
| "learning_rate": 7.990243902439025e-06, | |
| "loss": 0.4439, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.16203025866276233, | |
| "grad_norm": 1.8006068468093872, | |
| "learning_rate": 8.087804878048781e-06, | |
| "loss": 0.4337, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.16398243045387995, | |
| "grad_norm": 2.9495394229888916, | |
| "learning_rate": 8.185365853658536e-06, | |
| "loss": 0.4596, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.16593460224499756, | |
| "grad_norm": 2.061260938644409, | |
| "learning_rate": 8.282926829268292e-06, | |
| "loss": 0.426, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.16788677403611518, | |
| "grad_norm": 1.7372653484344482, | |
| "learning_rate": 8.380487804878049e-06, | |
| "loss": 0.4445, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.1698389458272328, | |
| "grad_norm": 1.9551905393600464, | |
| "learning_rate": 8.478048780487805e-06, | |
| "loss": 0.4328, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.17179111761835042, | |
| "grad_norm": 1.709471583366394, | |
| "learning_rate": 8.575609756097561e-06, | |
| "loss": 0.4775, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.17374328940946804, | |
| "grad_norm": 1.6745487451553345, | |
| "learning_rate": 8.673170731707317e-06, | |
| "loss": 0.4428, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.17569546120058566, | |
| "grad_norm": 1.675772786140442, | |
| "learning_rate": 8.770731707317074e-06, | |
| "loss": 0.4608, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.17764763299170327, | |
| "grad_norm": 1.7424198389053345, | |
| "learning_rate": 8.86829268292683e-06, | |
| "loss": 0.4427, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.1795998047828209, | |
| "grad_norm": 1.6522661447525024, | |
| "learning_rate": 8.965853658536586e-06, | |
| "loss": 0.4242, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.1815519765739385, | |
| "grad_norm": 1.795058012008667, | |
| "learning_rate": 9.063414634146343e-06, | |
| "loss": 0.4454, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.18350414836505613, | |
| "grad_norm": 1.9827325344085693, | |
| "learning_rate": 9.160975609756099e-06, | |
| "loss": 0.445, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.18545632015617375, | |
| "grad_norm": 1.6691913604736328, | |
| "learning_rate": 9.258536585365855e-06, | |
| "loss": 0.4269, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.18740849194729137, | |
| "grad_norm": 2.306386947631836, | |
| "learning_rate": 9.356097560975611e-06, | |
| "loss": 0.4299, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.18936066373840899, | |
| "grad_norm": 1.7081074714660645, | |
| "learning_rate": 9.453658536585366e-06, | |
| "loss": 0.3928, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.1913128355295266, | |
| "grad_norm": 1.8791937828063965, | |
| "learning_rate": 9.551219512195122e-06, | |
| "loss": 0.4513, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.19326500732064422, | |
| "grad_norm": 1.6924887895584106, | |
| "learning_rate": 9.648780487804879e-06, | |
| "loss": 0.4497, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.19521717911176184, | |
| "grad_norm": 1.8185955286026, | |
| "learning_rate": 9.746341463414635e-06, | |
| "loss": 0.4211, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19716935090287946, | |
| "grad_norm": 2.200645923614502, | |
| "learning_rate": 9.843902439024391e-06, | |
| "loss": 0.429, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.19912152269399708, | |
| "grad_norm": 1.568526268005371, | |
| "learning_rate": 9.941463414634147e-06, | |
| "loss": 0.432, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.2010736944851147, | |
| "grad_norm": 2.0274457931518555, | |
| "learning_rate": 9.999995356947167e-06, | |
| "loss": 0.4331, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.20302586627623231, | |
| "grad_norm": 2.0021557807922363, | |
| "learning_rate": 9.999943122701835e-06, | |
| "loss": 0.4312, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.20497803806734993, | |
| "grad_norm": 1.8168703317642212, | |
| "learning_rate": 9.999832851003463e-06, | |
| "loss": 0.4233, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.20693020985846755, | |
| "grad_norm": 1.9876569509506226, | |
| "learning_rate": 9.999664543132046e-06, | |
| "loss": 0.4387, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.20888238164958517, | |
| "grad_norm": 2.8725063800811768, | |
| "learning_rate": 9.999438201041236e-06, | |
| "loss": 0.4465, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.2108345534407028, | |
| "grad_norm": 1.7831240892410278, | |
| "learning_rate": 9.999153827358329e-06, | |
| "loss": 0.4516, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.2127867252318204, | |
| "grad_norm": 1.7374250888824463, | |
| "learning_rate": 9.998811425384225e-06, | |
| "loss": 0.4228, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.21473889702293802, | |
| "grad_norm": 1.8771198987960815, | |
| "learning_rate": 9.998410999093401e-06, | |
| "loss": 0.4281, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.21669106881405564, | |
| "grad_norm": 1.6397325992584229, | |
| "learning_rate": 9.99795255313385e-06, | |
| "loss": 0.4169, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.21864324060517326, | |
| "grad_norm": 1.6200042963027954, | |
| "learning_rate": 9.997436092827042e-06, | |
| "loss": 0.4306, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.22059541239629088, | |
| "grad_norm": 1.7567847967147827, | |
| "learning_rate": 9.996861624167853e-06, | |
| "loss": 0.44, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.2225475841874085, | |
| "grad_norm": 2.0347070693969727, | |
| "learning_rate": 9.996229153824497e-06, | |
| "loss": 0.4122, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.22449975597852612, | |
| "grad_norm": 1.7771193981170654, | |
| "learning_rate": 9.995538689138454e-06, | |
| "loss": 0.4577, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.22645192776964373, | |
| "grad_norm": 1.78081214427948, | |
| "learning_rate": 9.994790238124374e-06, | |
| "loss": 0.4366, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.22840409956076135, | |
| "grad_norm": 1.5818620920181274, | |
| "learning_rate": 9.993983809469995e-06, | |
| "loss": 0.4062, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.23035627135187897, | |
| "grad_norm": 1.515559196472168, | |
| "learning_rate": 9.99311941253604e-06, | |
| "loss": 0.4164, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.2323084431429966, | |
| "grad_norm": 1.9654150009155273, | |
| "learning_rate": 9.992197057356098e-06, | |
| "loss": 0.407, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.2342606149341142, | |
| "grad_norm": 2.0451202392578125, | |
| "learning_rate": 9.991216754636522e-06, | |
| "loss": 0.4049, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.23621278672523183, | |
| "grad_norm": 1.8495047092437744, | |
| "learning_rate": 9.990178515756294e-06, | |
| "loss": 0.4389, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.23816495851634945, | |
| "grad_norm": 2.012528657913208, | |
| "learning_rate": 9.989082352766903e-06, | |
| "loss": 0.4244, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.24011713030746706, | |
| "grad_norm": 1.5398080348968506, | |
| "learning_rate": 9.987928278392192e-06, | |
| "loss": 0.4449, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.24206930209858468, | |
| "grad_norm": 2.037444591522217, | |
| "learning_rate": 9.986716306028222e-06, | |
| "loss": 0.4155, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.2440214738897023, | |
| "grad_norm": 1.666106104850769, | |
| "learning_rate": 9.985446449743111e-06, | |
| "loss": 0.4451, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.24597364568081992, | |
| "grad_norm": 1.8997377157211304, | |
| "learning_rate": 9.984118724276871e-06, | |
| "loss": 0.4218, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.24792581747193754, | |
| "grad_norm": 1.8274009227752686, | |
| "learning_rate": 9.982733145041238e-06, | |
| "loss": 0.4341, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.24987798926305516, | |
| "grad_norm": 1.8715990781784058, | |
| "learning_rate": 9.981289728119495e-06, | |
| "loss": 0.4208, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.2518301610541728, | |
| "grad_norm": 1.65500807762146, | |
| "learning_rate": 9.97978849026628e-06, | |
| "loss": 0.473, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.2537823328452904, | |
| "grad_norm": 1.9954007863998413, | |
| "learning_rate": 9.978229448907392e-06, | |
| "loss": 0.412, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.255734504636408, | |
| "grad_norm": 1.6430351734161377, | |
| "learning_rate": 9.9766126221396e-06, | |
| "loss": 0.4078, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.25768667642752563, | |
| "grad_norm": 1.9523135423660278, | |
| "learning_rate": 9.974938028730415e-06, | |
| "loss": 0.4072, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.25963884821864325, | |
| "grad_norm": 1.6687992811203003, | |
| "learning_rate": 9.97320568811789e-06, | |
| "loss": 0.4449, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.26159102000976087, | |
| "grad_norm": 1.4892559051513672, | |
| "learning_rate": 9.971415620410373e-06, | |
| "loss": 0.4314, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.2635431918008785, | |
| "grad_norm": 1.774784803390503, | |
| "learning_rate": 9.969567846386305e-06, | |
| "loss": 0.412, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.2654953635919961, | |
| "grad_norm": 1.6988633871078491, | |
| "learning_rate": 9.967662387493942e-06, | |
| "loss": 0.4225, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.2674475353831137, | |
| "grad_norm": 1.8404074907302856, | |
| "learning_rate": 9.965699265851134e-06, | |
| "loss": 0.3972, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.26939970717423134, | |
| "grad_norm": 1.7147365808486938, | |
| "learning_rate": 9.963678504245058e-06, | |
| "loss": 0.4168, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.27135187896534896, | |
| "grad_norm": 1.7342804670333862, | |
| "learning_rate": 9.961600126131949e-06, | |
| "loss": 0.4308, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.2733040507564666, | |
| "grad_norm": 1.7366749048233032, | |
| "learning_rate": 9.959464155636837e-06, | |
| "loss": 0.4362, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.2752562225475842, | |
| "grad_norm": 1.560364007949829, | |
| "learning_rate": 9.957270617553263e-06, | |
| "loss": 0.4295, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.2772083943387018, | |
| "grad_norm": 1.6392924785614014, | |
| "learning_rate": 9.955019537342988e-06, | |
| "loss": 0.4272, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.27916056612981943, | |
| "grad_norm": 1.6431063413619995, | |
| "learning_rate": 9.952710941135702e-06, | |
| "loss": 0.4369, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.28111273792093705, | |
| "grad_norm": 1.7722136974334717, | |
| "learning_rate": 9.950344855728717e-06, | |
| "loss": 0.4352, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.28306490971205467, | |
| "grad_norm": 1.8588448762893677, | |
| "learning_rate": 9.947921308586663e-06, | |
| "loss": 0.4126, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.2850170815031723, | |
| "grad_norm": 1.8954261541366577, | |
| "learning_rate": 9.945440327841159e-06, | |
| "loss": 0.406, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.2869692532942899, | |
| "grad_norm": 2.013242244720459, | |
| "learning_rate": 9.942901942290493e-06, | |
| "loss": 0.4159, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.2889214250854075, | |
| "grad_norm": 2.03349232673645, | |
| "learning_rate": 9.940306181399284e-06, | |
| "loss": 0.4206, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.29087359687652514, | |
| "grad_norm": 2.5981340408325195, | |
| "learning_rate": 9.93765307529815e-06, | |
| "loss": 0.4358, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.29282576866764276, | |
| "grad_norm": 1.626177191734314, | |
| "learning_rate": 9.934942654783343e-06, | |
| "loss": 0.3846, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.2947779404587604, | |
| "grad_norm": 1.7740366458892822, | |
| "learning_rate": 9.932174951316401e-06, | |
| "loss": 0.382, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.296730112249878, | |
| "grad_norm": 1.7357271909713745, | |
| "learning_rate": 9.929349997023782e-06, | |
| "loss": 0.3819, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.2986822840409956, | |
| "grad_norm": 1.6888749599456787, | |
| "learning_rate": 9.92646782469649e-06, | |
| "loss": 0.4121, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.30063445583211323, | |
| "grad_norm": 1.6334894895553589, | |
| "learning_rate": 9.923528467789694e-06, | |
| "loss": 0.4173, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.30258662762323085, | |
| "grad_norm": 1.8393194675445557, | |
| "learning_rate": 9.920531960422337e-06, | |
| "loss": 0.3917, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.30453879941434847, | |
| "grad_norm": 1.558082103729248, | |
| "learning_rate": 9.91747833737675e-06, | |
| "loss": 0.4068, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.3064909712054661, | |
| "grad_norm": 1.4404140710830688, | |
| "learning_rate": 9.914367634098233e-06, | |
| "loss": 0.4181, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.3084431429965837, | |
| "grad_norm": 1.7500289678573608, | |
| "learning_rate": 9.911199886694658e-06, | |
| "loss": 0.4064, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.3103953147877013, | |
| "grad_norm": 1.5437930822372437, | |
| "learning_rate": 9.907975131936043e-06, | |
| "loss": 0.3951, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.31234748657881894, | |
| "grad_norm": 2.1010377407073975, | |
| "learning_rate": 9.904693407254121e-06, | |
| "loss": 0.394, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.31429965836993656, | |
| "grad_norm": 1.9228754043579102, | |
| "learning_rate": 9.901354750741915e-06, | |
| "loss": 0.392, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.3162518301610542, | |
| "grad_norm": 1.8459354639053345, | |
| "learning_rate": 9.897959201153291e-06, | |
| "loss": 0.3964, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.3182040019521718, | |
| "grad_norm": 2.0633997917175293, | |
| "learning_rate": 9.894506797902508e-06, | |
| "loss": 0.3844, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.3201561737432894, | |
| "grad_norm": 1.7887264490127563, | |
| "learning_rate": 9.890997581063757e-06, | |
| "loss": 0.4231, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.32210834553440704, | |
| "grad_norm": 1.6638391017913818, | |
| "learning_rate": 9.887431591370707e-06, | |
| "loss": 0.3866, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.32406051732552466, | |
| "grad_norm": 1.5746939182281494, | |
| "learning_rate": 9.883808870216016e-06, | |
| "loss": 0.4002, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.3260126891166423, | |
| "grad_norm": 1.737459421157837, | |
| "learning_rate": 9.880129459650867e-06, | |
| "loss": 0.4179, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.3279648609077599, | |
| "grad_norm": 1.6839922666549683, | |
| "learning_rate": 9.87639340238447e-06, | |
| "loss": 0.3991, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.3299170326988775, | |
| "grad_norm": 2.228264331817627, | |
| "learning_rate": 9.872600741783565e-06, | |
| "loss": 0.3815, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.33186920448999513, | |
| "grad_norm": 1.7104638814926147, | |
| "learning_rate": 9.868751521871929e-06, | |
| "loss": 0.3833, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.33382137628111275, | |
| "grad_norm": 2.207789182662964, | |
| "learning_rate": 9.864845787329851e-06, | |
| "loss": 0.3806, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.33577354807223037, | |
| "grad_norm": 1.7838051319122314, | |
| "learning_rate": 9.860883583493624e-06, | |
| "loss": 0.4102, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.337725719863348, | |
| "grad_norm": 1.4554154872894287, | |
| "learning_rate": 9.856864956355018e-06, | |
| "loss": 0.402, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.3396778916544656, | |
| "grad_norm": 2.0781755447387695, | |
| "learning_rate": 9.852789952560737e-06, | |
| "loss": 0.395, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.3416300634455832, | |
| "grad_norm": 1.8114964962005615, | |
| "learning_rate": 9.848658619411889e-06, | |
| "loss": 0.3727, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.34358223523670084, | |
| "grad_norm": 1.6250026226043701, | |
| "learning_rate": 9.84447100486343e-06, | |
| "loss": 0.4073, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.34553440702781846, | |
| "grad_norm": 2.6292777061462402, | |
| "learning_rate": 9.84022715752361e-06, | |
| "loss": 0.3897, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.3474865788189361, | |
| "grad_norm": 1.6362555027008057, | |
| "learning_rate": 9.835927126653407e-06, | |
| "loss": 0.3935, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.3494387506100537, | |
| "grad_norm": 1.828399658203125, | |
| "learning_rate": 9.831570962165955e-06, | |
| "loss": 0.4215, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.3513909224011713, | |
| "grad_norm": 2.1395528316497803, | |
| "learning_rate": 9.827158714625971e-06, | |
| "loss": 0.3982, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.35334309419228893, | |
| "grad_norm": 1.7870949506759644, | |
| "learning_rate": 9.822690435249157e-06, | |
| "loss": 0.3871, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.35529526598340655, | |
| "grad_norm": 1.8139511346817017, | |
| "learning_rate": 9.818166175901613e-06, | |
| "loss": 0.4085, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.35724743777452417, | |
| "grad_norm": 1.7053240537643433, | |
| "learning_rate": 9.813585989099237e-06, | |
| "loss": 0.3874, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.3591996095656418, | |
| "grad_norm": 1.5838854312896729, | |
| "learning_rate": 9.808949928007108e-06, | |
| "loss": 0.3814, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.3611517813567594, | |
| "grad_norm": 1.6612449884414673, | |
| "learning_rate": 9.804258046438875e-06, | |
| "loss": 0.3871, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.363103953147877, | |
| "grad_norm": 1.915330410003662, | |
| "learning_rate": 9.799510398856125e-06, | |
| "loss": 0.4036, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.36505612493899464, | |
| "grad_norm": 2.415217161178589, | |
| "learning_rate": 9.794707040367763e-06, | |
| "loss": 0.4082, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.36700829673011226, | |
| "grad_norm": 2.1422359943389893, | |
| "learning_rate": 9.78984802672936e-06, | |
| "loss": 0.4177, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.3689604685212299, | |
| "grad_norm": 1.7647225856781006, | |
| "learning_rate": 9.784933414342516e-06, | |
| "loss": 0.397, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.3709126403123475, | |
| "grad_norm": 1.5418081283569336, | |
| "learning_rate": 9.779963260254193e-06, | |
| "loss": 0.3798, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.3728648121034651, | |
| "grad_norm": 1.7419403791427612, | |
| "learning_rate": 9.774937622156065e-06, | |
| "loss": 0.3956, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.37481698389458273, | |
| "grad_norm": 1.7291524410247803, | |
| "learning_rate": 9.769856558383845e-06, | |
| "loss": 0.3919, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.37676915568570035, | |
| "grad_norm": 1.8995845317840576, | |
| "learning_rate": 9.764720127916601e-06, | |
| "loss": 0.395, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.37872132747681797, | |
| "grad_norm": 1.7812365293502808, | |
| "learning_rate": 9.759528390376081e-06, | |
| "loss": 0.3846, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.3806734992679356, | |
| "grad_norm": 1.9531811475753784, | |
| "learning_rate": 9.754281406026016e-06, | |
| "loss": 0.4002, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.3826256710590532, | |
| "grad_norm": 1.832743525505066, | |
| "learning_rate": 9.74897923577142e-06, | |
| "loss": 0.3889, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.3845778428501708, | |
| "grad_norm": 1.8087397813796997, | |
| "learning_rate": 9.743621941157884e-06, | |
| "loss": 0.3731, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.38653001464128844, | |
| "grad_norm": 2.172013521194458, | |
| "learning_rate": 9.738209584370862e-06, | |
| "loss": 0.3859, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.38848218643240606, | |
| "grad_norm": 1.6979947090148926, | |
| "learning_rate": 9.732742228234948e-06, | |
| "loss": 0.3776, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.3904343582235237, | |
| "grad_norm": 1.8701212406158447, | |
| "learning_rate": 9.727219936213153e-06, | |
| "loss": 0.4074, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3923865300146413, | |
| "grad_norm": 1.4129152297973633, | |
| "learning_rate": 9.721642772406156e-06, | |
| "loss": 0.3775, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.3943387018057589, | |
| "grad_norm": 1.6878793239593506, | |
| "learning_rate": 9.71601080155157e-06, | |
| "loss": 0.3941, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.39629087359687654, | |
| "grad_norm": 2.422609806060791, | |
| "learning_rate": 9.710324089023188e-06, | |
| "loss": 0.3792, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.39824304538799415, | |
| "grad_norm": 1.4131423234939575, | |
| "learning_rate": 9.704582700830223e-06, | |
| "loss": 0.3888, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.4001952171791118, | |
| "grad_norm": 1.5815601348876953, | |
| "learning_rate": 9.698786703616543e-06, | |
| "loss": 0.3599, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.4021473889702294, | |
| "grad_norm": 1.8221068382263184, | |
| "learning_rate": 9.692936164659897e-06, | |
| "loss": 0.3532, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.404099560761347, | |
| "grad_norm": 1.6556801795959473, | |
| "learning_rate": 9.68703115187113e-06, | |
| "loss": 0.3744, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.40605173255246463, | |
| "grad_norm": 1.666934609413147, | |
| "learning_rate": 9.681071733793408e-06, | |
| "loss": 0.3976, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.40800390434358225, | |
| "grad_norm": 1.9403831958770752, | |
| "learning_rate": 9.675057979601404e-06, | |
| "loss": 0.3837, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.40995607613469986, | |
| "grad_norm": 1.6218396425247192, | |
| "learning_rate": 9.668989959100505e-06, | |
| "loss": 0.4007, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.4119082479258175, | |
| "grad_norm": 1.9031404256820679, | |
| "learning_rate": 9.662867742726008e-06, | |
| "loss": 0.3719, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.4138604197169351, | |
| "grad_norm": 1.7567657232284546, | |
| "learning_rate": 9.656691401542288e-06, | |
| "loss": 0.3878, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.4158125915080527, | |
| "grad_norm": 1.6076161861419678, | |
| "learning_rate": 9.650461007241983e-06, | |
| "loss": 0.3982, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.41776476329917034, | |
| "grad_norm": 1.894406795501709, | |
| "learning_rate": 9.644176632145158e-06, | |
| "loss": 0.3462, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.41971693509028796, | |
| "grad_norm": 1.7494549751281738, | |
| "learning_rate": 9.637838349198469e-06, | |
| "loss": 0.3829, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.4216691068814056, | |
| "grad_norm": 2.3885393142700195, | |
| "learning_rate": 9.631446231974313e-06, | |
| "loss": 0.3997, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.4236212786725232, | |
| "grad_norm": 1.9122508764266968, | |
| "learning_rate": 9.625000354669973e-06, | |
| "loss": 0.3661, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.4255734504636408, | |
| "grad_norm": 1.6724847555160522, | |
| "learning_rate": 9.618500792106765e-06, | |
| "loss": 0.3662, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.42752562225475843, | |
| "grad_norm": 1.5858006477355957, | |
| "learning_rate": 9.61194761972915e-06, | |
| "loss": 0.388, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.42947779404587605, | |
| "grad_norm": 1.6714067459106445, | |
| "learning_rate": 9.60534091360389e-06, | |
| "loss": 0.3734, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.43142996583699367, | |
| "grad_norm": 1.657956838607788, | |
| "learning_rate": 9.598680750419128e-06, | |
| "loss": 0.3888, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.4333821376281113, | |
| "grad_norm": 1.7038514614105225, | |
| "learning_rate": 9.59196720748353e-06, | |
| "loss": 0.3744, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.4353343094192289, | |
| "grad_norm": 1.6116293668746948, | |
| "learning_rate": 9.585200362725369e-06, | |
| "loss": 0.401, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.4372864812103465, | |
| "grad_norm": 1.8921372890472412, | |
| "learning_rate": 9.578380294691626e-06, | |
| "loss": 0.3734, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.43923865300146414, | |
| "grad_norm": 1.7467238903045654, | |
| "learning_rate": 9.571507082547071e-06, | |
| "loss": 0.364, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.44119082479258176, | |
| "grad_norm": 1.659144639968872, | |
| "learning_rate": 9.564580806073365e-06, | |
| "loss": 0.3746, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.4431429965836994, | |
| "grad_norm": 2.034449577331543, | |
| "learning_rate": 9.557601545668107e-06, | |
| "loss": 0.363, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.445095168374817, | |
| "grad_norm": 1.5256868600845337, | |
| "learning_rate": 9.55056938234392e-06, | |
| "loss": 0.3709, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.4470473401659346, | |
| "grad_norm": 1.6245321035385132, | |
| "learning_rate": 9.543484397727502e-06, | |
| "loss": 0.3791, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.44899951195705223, | |
| "grad_norm": 1.7325410842895508, | |
| "learning_rate": 9.53634667405868e-06, | |
| "loss": 0.3925, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.45095168374816985, | |
| "grad_norm": 1.793959379196167, | |
| "learning_rate": 9.529156294189459e-06, | |
| "loss": 0.3661, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.45290385553928747, | |
| "grad_norm": 1.762374758720398, | |
| "learning_rate": 9.521913341583051e-06, | |
| "loss": 0.3517, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.4548560273304051, | |
| "grad_norm": 1.7093387842178345, | |
| "learning_rate": 9.51461790031292e-06, | |
| "loss": 0.3404, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.4568081991215227, | |
| "grad_norm": 1.7257992029190063, | |
| "learning_rate": 9.507270055061798e-06, | |
| "loss": 0.376, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.4587603709126403, | |
| "grad_norm": 1.5308430194854736, | |
| "learning_rate": 9.499869891120694e-06, | |
| "loss": 0.3713, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.46071254270375794, | |
| "grad_norm": 2.258150339126587, | |
| "learning_rate": 9.492417494387923e-06, | |
| "loss": 0.3804, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.46266471449487556, | |
| "grad_norm": 1.6879998445510864, | |
| "learning_rate": 9.48491295136809e-06, | |
| "loss": 0.3572, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.4646168862859932, | |
| "grad_norm": 1.9355443716049194, | |
| "learning_rate": 9.477356349171102e-06, | |
| "loss": 0.3513, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.4665690580771108, | |
| "grad_norm": 1.737768292427063, | |
| "learning_rate": 9.469747775511147e-06, | |
| "loss": 0.3613, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.4685212298682284, | |
| "grad_norm": 2.228402614593506, | |
| "learning_rate": 9.462087318705672e-06, | |
| "loss": 0.3487, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.47047340165934604, | |
| "grad_norm": 1.715980887413025, | |
| "learning_rate": 9.454375067674374e-06, | |
| "loss": 0.3774, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.47242557345046365, | |
| "grad_norm": 1.7476235628128052, | |
| "learning_rate": 9.446611111938149e-06, | |
| "loss": 0.3557, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.4743777452415813, | |
| "grad_norm": 1.7379567623138428, | |
| "learning_rate": 9.438795541618067e-06, | |
| "loss": 0.3673, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.4763299170326989, | |
| "grad_norm": 1.634992241859436, | |
| "learning_rate": 9.430928447434317e-06, | |
| "loss": 0.3786, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.4782820888238165, | |
| "grad_norm": 1.5183148384094238, | |
| "learning_rate": 9.423009920705163e-06, | |
| "loss": 0.3593, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.4802342606149341, | |
| "grad_norm": 1.7934198379516602, | |
| "learning_rate": 9.415040053345876e-06, | |
| "loss": 0.3666, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.48218643240605175, | |
| "grad_norm": 2.3873658180236816, | |
| "learning_rate": 9.407018937867665e-06, | |
| "loss": 0.3771, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.48413860419716936, | |
| "grad_norm": 1.7866123914718628, | |
| "learning_rate": 9.398946667376614e-06, | |
| "loss": 0.3659, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.486090775988287, | |
| "grad_norm": 1.4919376373291016, | |
| "learning_rate": 9.390823335572591e-06, | |
| "loss": 0.3832, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.4880429477794046, | |
| "grad_norm": 1.5056414604187012, | |
| "learning_rate": 9.382649036748168e-06, | |
| "loss": 0.3439, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.4899951195705222, | |
| "grad_norm": 1.6114498376846313, | |
| "learning_rate": 9.374423865787521e-06, | |
| "loss": 0.3483, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.49194729136163984, | |
| "grad_norm": 1.7268636226654053, | |
| "learning_rate": 9.36614791816533e-06, | |
| "loss": 0.3607, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.49389946315275746, | |
| "grad_norm": 1.705761432647705, | |
| "learning_rate": 9.357821289945673e-06, | |
| "loss": 0.3373, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.4958516349438751, | |
| "grad_norm": 1.5740495920181274, | |
| "learning_rate": 9.349444077780905e-06, | |
| "loss": 0.336, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.4978038067349927, | |
| "grad_norm": 1.6093052625656128, | |
| "learning_rate": 9.34101637891055e-06, | |
| "loss": 0.3538, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.4997559785261103, | |
| "grad_norm": 1.5724884271621704, | |
| "learning_rate": 9.33253829116015e-06, | |
| "loss": 0.368, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.5017081503172279, | |
| "grad_norm": 2.033222198486328, | |
| "learning_rate": 9.324009912940151e-06, | |
| "loss": 0.3808, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.5036603221083455, | |
| "grad_norm": 1.569170594215393, | |
| "learning_rate": 9.315431343244752e-06, | |
| "loss": 0.3768, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.5056124938994632, | |
| "grad_norm": 1.5116662979125977, | |
| "learning_rate": 9.306802681650748e-06, | |
| "loss": 0.3697, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.5075646656905808, | |
| "grad_norm": 1.8755792379379272, | |
| "learning_rate": 9.298124028316388e-06, | |
| "loss": 0.3731, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.5095168374816984, | |
| "grad_norm": 1.7512634992599487, | |
| "learning_rate": 9.289395483980209e-06, | |
| "loss": 0.3707, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.511469009272816, | |
| "grad_norm": 1.724428415298462, | |
| "learning_rate": 9.280617149959853e-06, | |
| "loss": 0.3623, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.5134211810639336, | |
| "grad_norm": 2.088242530822754, | |
| "learning_rate": 9.271789128150916e-06, | |
| "loss": 0.3356, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.5153733528550513, | |
| "grad_norm": 1.7629714012145996, | |
| "learning_rate": 9.262911521025738e-06, | |
| "loss": 0.3543, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.5173255246461689, | |
| "grad_norm": 1.510256052017212, | |
| "learning_rate": 9.253984431632238e-06, | |
| "loss": 0.342, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.5192776964372865, | |
| "grad_norm": 1.78757905960083, | |
| "learning_rate": 9.245007963592697e-06, | |
| "loss": 0.3785, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.5212298682284041, | |
| "grad_norm": 1.7818865776062012, | |
| "learning_rate": 9.235982221102569e-06, | |
| "loss": 0.35, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.5231820400195217, | |
| "grad_norm": 1.6107079982757568, | |
| "learning_rate": 9.226907308929268e-06, | |
| "loss": 0.3462, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.5251342118106394, | |
| "grad_norm": 1.8741298913955688, | |
| "learning_rate": 9.21778333241095e-06, | |
| "loss": 0.358, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.527086383601757, | |
| "grad_norm": 1.697345495223999, | |
| "learning_rate": 9.208610397455292e-06, | |
| "loss": 0.3539, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.5290385553928746, | |
| "grad_norm": 1.5936979055404663, | |
| "learning_rate": 9.199388610538261e-06, | |
| "loss": 0.3656, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.5309907271839922, | |
| "grad_norm": 1.8244935274124146, | |
| "learning_rate": 9.190118078702879e-06, | |
| "loss": 0.3791, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.5329428989751098, | |
| "grad_norm": 1.7835068702697754, | |
| "learning_rate": 9.180798909557982e-06, | |
| "loss": 0.3642, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.5348950707662274, | |
| "grad_norm": 1.4222677946090698, | |
| "learning_rate": 9.17143121127697e-06, | |
| "loss": 0.3969, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.5368472425573451, | |
| "grad_norm": 1.6140766143798828, | |
| "learning_rate": 9.162015092596546e-06, | |
| "loss": 0.337, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.5387994143484627, | |
| "grad_norm": 1.4900151491165161, | |
| "learning_rate": 9.152550662815468e-06, | |
| "loss": 0.3686, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.5407515861395803, | |
| "grad_norm": 1.7115081548690796, | |
| "learning_rate": 9.143038031793259e-06, | |
| "loss": 0.3825, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.5427037579306979, | |
| "grad_norm": 1.9142796993255615, | |
| "learning_rate": 9.133477309948956e-06, | |
| "loss": 0.347, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.5446559297218155, | |
| "grad_norm": 1.6392863988876343, | |
| "learning_rate": 9.123868608259808e-06, | |
| "loss": 0.3571, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.5466081015129332, | |
| "grad_norm": 1.9869853258132935, | |
| "learning_rate": 9.114212038259998e-06, | |
| "loss": 0.3656, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.5485602733040508, | |
| "grad_norm": 1.797553300857544, | |
| "learning_rate": 9.104507712039348e-06, | |
| "loss": 0.3563, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.5505124450951684, | |
| "grad_norm": 1.4315779209136963, | |
| "learning_rate": 9.094755742242014e-06, | |
| "loss": 0.3608, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.552464616886286, | |
| "grad_norm": 1.5433530807495117, | |
| "learning_rate": 9.084956242065182e-06, | |
| "loss": 0.3531, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.5544167886774036, | |
| "grad_norm": 1.5521483421325684, | |
| "learning_rate": 9.07510932525775e-06, | |
| "loss": 0.3431, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.5563689604685212, | |
| "grad_norm": 1.7205086946487427, | |
| "learning_rate": 9.065215106119017e-06, | |
| "loss": 0.3506, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.5583211322596389, | |
| "grad_norm": 1.5988975763320923, | |
| "learning_rate": 9.05527369949734e-06, | |
| "loss": 0.3519, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.5602733040507565, | |
| "grad_norm": 1.5551445484161377, | |
| "learning_rate": 9.04528522078882e-06, | |
| "loss": 0.3413, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.5622254758418741, | |
| "grad_norm": 1.4696311950683594, | |
| "learning_rate": 9.035249785935946e-06, | |
| "loss": 0.3604, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.5641776476329917, | |
| "grad_norm": 1.6763559579849243, | |
| "learning_rate": 9.02516751142626e-06, | |
| "loss": 0.3624, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.5661298194241093, | |
| "grad_norm": 1.6215537786483765, | |
| "learning_rate": 9.015038514290999e-06, | |
| "loss": 0.3401, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.568081991215227, | |
| "grad_norm": 1.4154938459396362, | |
| "learning_rate": 9.00486291210374e-06, | |
| "loss": 0.3575, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.5700341630063446, | |
| "grad_norm": 1.8908969163894653, | |
| "learning_rate": 8.994640822979036e-06, | |
| "loss": 0.3304, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.5719863347974622, | |
| "grad_norm": 1.6332852840423584, | |
| "learning_rate": 8.984372365571036e-06, | |
| "loss": 0.3253, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.5739385065885798, | |
| "grad_norm": 1.5187574625015259, | |
| "learning_rate": 8.974057659072121e-06, | |
| "loss": 0.3545, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.5758906783796974, | |
| "grad_norm": 1.7546658515930176, | |
| "learning_rate": 8.963696823211512e-06, | |
| "loss": 0.356, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.577842850170815, | |
| "grad_norm": 1.621563196182251, | |
| "learning_rate": 8.953289978253881e-06, | |
| "loss": 0.3129, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.5797950219619327, | |
| "grad_norm": 1.652968406677246, | |
| "learning_rate": 8.942837244997959e-06, | |
| "loss": 0.3554, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.5817471937530503, | |
| "grad_norm": 1.8834774494171143, | |
| "learning_rate": 8.932338744775128e-06, | |
| "loss": 0.3588, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.5836993655441679, | |
| "grad_norm": 2.5238523483276367, | |
| "learning_rate": 8.921794599448015e-06, | |
| "loss": 0.3388, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.5856515373352855, | |
| "grad_norm": 1.6469742059707642, | |
| "learning_rate": 8.911204931409084e-06, | |
| "loss": 0.3824, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.5876037091264031, | |
| "grad_norm": 1.6612112522125244, | |
| "learning_rate": 8.900569863579203e-06, | |
| "loss": 0.3519, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.5895558809175208, | |
| "grad_norm": 1.8060604333877563, | |
| "learning_rate": 8.889889519406227e-06, | |
| "loss": 0.3393, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.5915080527086384, | |
| "grad_norm": 1.6979930400848389, | |
| "learning_rate": 8.879164022863562e-06, | |
| "loss": 0.3309, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.593460224499756, | |
| "grad_norm": 1.366909384727478, | |
| "learning_rate": 8.868393498448724e-06, | |
| "loss": 0.3309, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.5954123962908736, | |
| "grad_norm": 1.539758324623108, | |
| "learning_rate": 8.857578071181894e-06, | |
| "loss": 0.3379, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.5973645680819912, | |
| "grad_norm": 1.5678811073303223, | |
| "learning_rate": 8.846717866604471e-06, | |
| "loss": 0.3607, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.5993167398731089, | |
| "grad_norm": 1.8162411451339722, | |
| "learning_rate": 8.835813010777615e-06, | |
| "loss": 0.3653, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.6012689116642265, | |
| "grad_norm": 1.7801045179367065, | |
| "learning_rate": 8.824863630280775e-06, | |
| "loss": 0.3545, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.6032210834553441, | |
| "grad_norm": 1.7757545709609985, | |
| "learning_rate": 8.813869852210228e-06, | |
| "loss": 0.3383, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.6051732552464617, | |
| "grad_norm": 1.4703953266143799, | |
| "learning_rate": 8.802831804177601e-06, | |
| "loss": 0.3305, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.6071254270375793, | |
| "grad_norm": 1.6324414014816284, | |
| "learning_rate": 8.791749614308392e-06, | |
| "loss": 0.3386, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.6090775988286969, | |
| "grad_norm": 2.0556728839874268, | |
| "learning_rate": 8.780623411240477e-06, | |
| "loss": 0.3473, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.6110297706198146, | |
| "grad_norm": 1.7078546285629272, | |
| "learning_rate": 8.769453324122625e-06, | |
| "loss": 0.3279, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.6129819424109322, | |
| "grad_norm": 1.872077226638794, | |
| "learning_rate": 8.758239482612992e-06, | |
| "loss": 0.3765, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.6149341142020498, | |
| "grad_norm": 2.0109453201293945, | |
| "learning_rate": 8.746982016877616e-06, | |
| "loss": 0.3155, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.6168862859931674, | |
| "grad_norm": 1.9242374897003174, | |
| "learning_rate": 8.735681057588914e-06, | |
| "loss": 0.3679, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.618838457784285, | |
| "grad_norm": 1.6928529739379883, | |
| "learning_rate": 8.724336735924155e-06, | |
| "loss": 0.3589, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.6207906295754027, | |
| "grad_norm": 2.123342990875244, | |
| "learning_rate": 8.712949183563945e-06, | |
| "loss": 0.3265, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.6227428013665203, | |
| "grad_norm": 1.4975687265396118, | |
| "learning_rate": 8.701518532690696e-06, | |
| "loss": 0.3466, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.6246949731576379, | |
| "grad_norm": 2.0426642894744873, | |
| "learning_rate": 8.690044915987091e-06, | |
| "loss": 0.3662, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.6266471449487555, | |
| "grad_norm": 1.6315159797668457, | |
| "learning_rate": 8.678528466634537e-06, | |
| "loss": 0.3118, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.6285993167398731, | |
| "grad_norm": 1.6052002906799316, | |
| "learning_rate": 8.666969318311638e-06, | |
| "loss": 0.3499, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.6305514885309907, | |
| "grad_norm": 1.821866750717163, | |
| "learning_rate": 8.655367605192623e-06, | |
| "loss": 0.3231, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.6325036603221084, | |
| "grad_norm": 1.8277753591537476, | |
| "learning_rate": 8.643723461945804e-06, | |
| "loss": 0.3369, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.634455832113226, | |
| "grad_norm": 1.5077341794967651, | |
| "learning_rate": 8.632037023731997e-06, | |
| "loss": 0.3599, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.6364080039043436, | |
| "grad_norm": 1.663934350013733, | |
| "learning_rate": 8.62030842620297e-06, | |
| "loss": 0.3359, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.6383601756954612, | |
| "grad_norm": 1.6589021682739258, | |
| "learning_rate": 8.608537805499854e-06, | |
| "loss": 0.3467, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.6403123474865788, | |
| "grad_norm": 1.6243420839309692, | |
| "learning_rate": 8.596725298251578e-06, | |
| "loss": 0.3488, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.6422645192776965, | |
| "grad_norm": 2.027372360229492, | |
| "learning_rate": 8.584871041573263e-06, | |
| "loss": 0.3394, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.6442166910688141, | |
| "grad_norm": 1.519249677658081, | |
| "learning_rate": 8.572975173064651e-06, | |
| "loss": 0.3445, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.6461688628599317, | |
| "grad_norm": 1.7421610355377197, | |
| "learning_rate": 8.561037830808493e-06, | |
| "loss": 0.3308, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.6481210346510493, | |
| "grad_norm": 1.7053309679031372, | |
| "learning_rate": 8.549059153368954e-06, | |
| "loss": 0.3176, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.6500732064421669, | |
| "grad_norm": 1.6674582958221436, | |
| "learning_rate": 8.537039279790002e-06, | |
| "loss": 0.3247, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.6520253782332845, | |
| "grad_norm": 1.5092159509658813, | |
| "learning_rate": 8.524978349593791e-06, | |
| "loss": 0.3505, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.6539775500244022, | |
| "grad_norm": 1.6612021923065186, | |
| "learning_rate": 8.512876502779053e-06, | |
| "loss": 0.3256, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.6559297218155198, | |
| "grad_norm": 1.7003108263015747, | |
| "learning_rate": 8.500733879819453e-06, | |
| "loss": 0.3453, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.6578818936066374, | |
| "grad_norm": 1.549048662185669, | |
| "learning_rate": 8.488550621661982e-06, | |
| "loss": 0.3548, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.659834065397755, | |
| "grad_norm": 1.461796522140503, | |
| "learning_rate": 8.476326869725297e-06, | |
| "loss": 0.3154, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.6617862371888726, | |
| "grad_norm": 1.7047860622406006, | |
| "learning_rate": 8.464062765898104e-06, | |
| "loss": 0.3232, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.6637384089799903, | |
| "grad_norm": 1.7252593040466309, | |
| "learning_rate": 8.45175845253749e-06, | |
| "loss": 0.3207, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.6656905807711079, | |
| "grad_norm": 1.7007282972335815, | |
| "learning_rate": 8.43941407246728e-06, | |
| "loss": 0.3419, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.6676427525622255, | |
| "grad_norm": 1.7178446054458618, | |
| "learning_rate": 8.42702976897638e-06, | |
| "loss": 0.3348, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.6695949243533431, | |
| "grad_norm": 1.611234426498413, | |
| "learning_rate": 8.414605685817115e-06, | |
| "loss": 0.3095, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.6715470961444607, | |
| "grad_norm": 1.9325944185256958, | |
| "learning_rate": 8.40214196720355e-06, | |
| "loss": 0.3212, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.6734992679355783, | |
| "grad_norm": 1.4901853799819946, | |
| "learning_rate": 8.38963875780983e-06, | |
| "loss": 0.3535, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.675451439726696, | |
| "grad_norm": 1.5685844421386719, | |
| "learning_rate": 8.37709620276849e-06, | |
| "loss": 0.3426, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.6774036115178136, | |
| "grad_norm": 1.7217646837234497, | |
| "learning_rate": 8.364514447668777e-06, | |
| "loss": 0.3626, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.6793557833089312, | |
| "grad_norm": 1.7846200466156006, | |
| "learning_rate": 8.351893638554957e-06, | |
| "loss": 0.3245, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.6813079551000488, | |
| "grad_norm": 1.9821772575378418, | |
| "learning_rate": 8.339233921924619e-06, | |
| "loss": 0.338, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.6832601268911664, | |
| "grad_norm": 1.7743932008743286, | |
| "learning_rate": 8.326535444726975e-06, | |
| "loss": 0.3272, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.6852122986822841, | |
| "grad_norm": 1.653472900390625, | |
| "learning_rate": 8.31379835436116e-06, | |
| "loss": 0.3434, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.6871644704734017, | |
| "grad_norm": 1.4776761531829834, | |
| "learning_rate": 8.301022798674507e-06, | |
| "loss": 0.3148, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.6891166422645193, | |
| "grad_norm": 1.6221344470977783, | |
| "learning_rate": 8.288208925960853e-06, | |
| "loss": 0.3091, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.6910688140556369, | |
| "grad_norm": 1.911304235458374, | |
| "learning_rate": 8.27535688495879e-06, | |
| "loss": 0.3709, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.6930209858467545, | |
| "grad_norm": 1.5734984874725342, | |
| "learning_rate": 8.262466824849965e-06, | |
| "loss": 0.3239, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.6949731576378722, | |
| "grad_norm": 1.9672952890396118, | |
| "learning_rate": 8.24953889525733e-06, | |
| "loss": 0.3296, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.6969253294289898, | |
| "grad_norm": 2.76518177986145, | |
| "learning_rate": 8.236573246243414e-06, | |
| "loss": 0.3099, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.6988775012201074, | |
| "grad_norm": 1.655221700668335, | |
| "learning_rate": 8.223570028308578e-06, | |
| "loss": 0.3378, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.700829673011225, | |
| "grad_norm": 1.7827235460281372, | |
| "learning_rate": 8.210529392389268e-06, | |
| "loss": 0.321, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.7027818448023426, | |
| "grad_norm": 1.5071234703063965, | |
| "learning_rate": 8.197451489856265e-06, | |
| "loss": 0.3394, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.7047340165934602, | |
| "grad_norm": 1.7633196115493774, | |
| "learning_rate": 8.184336472512926e-06, | |
| "loss": 0.3154, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.7066861883845779, | |
| "grad_norm": 1.2931830883026123, | |
| "learning_rate": 8.171184492593427e-06, | |
| "loss": 0.3198, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.7086383601756955, | |
| "grad_norm": 1.9217031002044678, | |
| "learning_rate": 8.157995702760985e-06, | |
| "loss": 0.3144, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.7105905319668131, | |
| "grad_norm": 1.7861008644104004, | |
| "learning_rate": 8.144770256106095e-06, | |
| "loss": 0.3357, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.7125427037579307, | |
| "grad_norm": 1.4293644428253174, | |
| "learning_rate": 8.131508306144753e-06, | |
| "loss": 0.3309, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.7144948755490483, | |
| "grad_norm": 1.50441575050354, | |
| "learning_rate": 8.118210006816669e-06, | |
| "loss": 0.3406, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.716447047340166, | |
| "grad_norm": 1.6063085794448853, | |
| "learning_rate": 8.104875512483484e-06, | |
| "loss": 0.3207, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.7183992191312836, | |
| "grad_norm": 1.720033049583435, | |
| "learning_rate": 8.091504977926976e-06, | |
| "loss": 0.3316, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.7203513909224012, | |
| "grad_norm": 1.5257724523544312, | |
| "learning_rate": 8.078098558347266e-06, | |
| "loss": 0.3261, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.7223035627135188, | |
| "grad_norm": 1.4936727285385132, | |
| "learning_rate": 8.064656409361009e-06, | |
| "loss": 0.3263, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.7242557345046364, | |
| "grad_norm": 1.685624361038208, | |
| "learning_rate": 8.051178686999604e-06, | |
| "loss": 0.3366, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.726207906295754, | |
| "grad_norm": 1.7392653226852417, | |
| "learning_rate": 8.037665547707362e-06, | |
| "loss": 0.3283, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.7281600780868717, | |
| "grad_norm": 1.705322265625, | |
| "learning_rate": 8.024117148339708e-06, | |
| "loss": 0.3, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.7301122498779893, | |
| "grad_norm": 1.7675234079360962, | |
| "learning_rate": 8.010533646161345e-06, | |
| "loss": 0.3359, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.7320644216691069, | |
| "grad_norm": 1.575836181640625, | |
| "learning_rate": 7.996915198844446e-06, | |
| "loss": 0.3253, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.7340165934602245, | |
| "grad_norm": 1.9200180768966675, | |
| "learning_rate": 7.983261964466805e-06, | |
| "loss": 0.3277, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.7359687652513421, | |
| "grad_norm": 1.6772791147232056, | |
| "learning_rate": 7.969574101510014e-06, | |
| "loss": 0.3539, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.7379209370424598, | |
| "grad_norm": 1.6115095615386963, | |
| "learning_rate": 7.955851768857624e-06, | |
| "loss": 0.3128, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.7398731088335774, | |
| "grad_norm": 1.8706231117248535, | |
| "learning_rate": 7.942095125793293e-06, | |
| "loss": 0.3425, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.741825280624695, | |
| "grad_norm": 1.7618359327316284, | |
| "learning_rate": 7.928304331998942e-06, | |
| "loss": 0.3108, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.7437774524158126, | |
| "grad_norm": 1.5923467874526978, | |
| "learning_rate": 7.914479547552901e-06, | |
| "loss": 0.3205, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.7457296242069302, | |
| "grad_norm": 1.465217113494873, | |
| "learning_rate": 7.900620932928053e-06, | |
| "loss": 0.3386, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.7476817959980478, | |
| "grad_norm": 1.7743841409683228, | |
| "learning_rate": 7.886728648989965e-06, | |
| "loss": 0.3384, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.7496339677891655, | |
| "grad_norm": 2.0260348320007324, | |
| "learning_rate": 7.87280285699503e-06, | |
| "loss": 0.3561, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.7515861395802831, | |
| "grad_norm": 1.6193934679031372, | |
| "learning_rate": 7.85884371858858e-06, | |
| "loss": 0.3094, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.7535383113714007, | |
| "grad_norm": 1.3467310667037964, | |
| "learning_rate": 7.844851395803034e-06, | |
| "loss": 0.3093, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.7554904831625183, | |
| "grad_norm": 1.415460467338562, | |
| "learning_rate": 7.830826051055989e-06, | |
| "loss": 0.3311, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.7574426549536359, | |
| "grad_norm": 1.631778359413147, | |
| "learning_rate": 7.816767847148358e-06, | |
| "loss": 0.3353, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.7593948267447536, | |
| "grad_norm": 1.7144395112991333, | |
| "learning_rate": 7.802676947262466e-06, | |
| "loss": 0.3528, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.7613469985358712, | |
| "grad_norm": 1.720763921737671, | |
| "learning_rate": 7.788553514960158e-06, | |
| "loss": 0.34, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.7632991703269888, | |
| "grad_norm": 1.3365732431411743, | |
| "learning_rate": 7.774397714180913e-06, | |
| "loss": 0.3309, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.7652513421181064, | |
| "grad_norm": 1.425312876701355, | |
| "learning_rate": 7.760209709239921e-06, | |
| "loss": 0.3342, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.767203513909224, | |
| "grad_norm": 1.7869527339935303, | |
| "learning_rate": 7.74598966482619e-06, | |
| "loss": 0.3193, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.7691556857003417, | |
| "grad_norm": 1.4641571044921875, | |
| "learning_rate": 7.731737746000631e-06, | |
| "loss": 0.3061, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.7711078574914593, | |
| "grad_norm": 1.8277581930160522, | |
| "learning_rate": 7.717454118194138e-06, | |
| "loss": 0.3207, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.7730600292825769, | |
| "grad_norm": 2.149855613708496, | |
| "learning_rate": 7.703138947205672e-06, | |
| "loss": 0.3146, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.7750122010736945, | |
| "grad_norm": 10.179088592529297, | |
| "learning_rate": 7.68879239920034e-06, | |
| "loss": 0.3279, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.7769643728648121, | |
| "grad_norm": 1.7173975706100464, | |
| "learning_rate": 7.674414640707453e-06, | |
| "loss": 0.3352, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.7789165446559297, | |
| "grad_norm": 2.3102633953094482, | |
| "learning_rate": 7.660005838618607e-06, | |
| "loss": 0.3247, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.7808687164470474, | |
| "grad_norm": 1.6337876319885254, | |
| "learning_rate": 7.645566160185742e-06, | |
| "loss": 0.3013, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.782820888238165, | |
| "grad_norm": 1.7631540298461914, | |
| "learning_rate": 7.631095773019195e-06, | |
| "loss": 0.3227, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.7847730600292826, | |
| "grad_norm": 1.5217206478118896, | |
| "learning_rate": 7.616594845085759e-06, | |
| "loss": 0.3213, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.7867252318204002, | |
| "grad_norm": 1.7165250778198242, | |
| "learning_rate": 7.602063544706735e-06, | |
| "loss": 0.3075, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.7886774036115178, | |
| "grad_norm": 1.6861894130706787, | |
| "learning_rate": 7.587502040555972e-06, | |
| "loss": 0.3044, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.7906295754026355, | |
| "grad_norm": 1.8273816108703613, | |
| "learning_rate": 7.572910501657918e-06, | |
| "loss": 0.3362, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.7925817471937531, | |
| "grad_norm": 1.779876470565796, | |
| "learning_rate": 7.55828909738565e-06, | |
| "loss": 0.3297, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.7945339189848707, | |
| "grad_norm": 1.5581871271133423, | |
| "learning_rate": 7.54363799745891e-06, | |
| "loss": 0.2991, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.7964860907759883, | |
| "grad_norm": 1.8014543056488037, | |
| "learning_rate": 7.528957371942139e-06, | |
| "loss": 0.3134, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.7984382625671059, | |
| "grad_norm": 1.5220543146133423, | |
| "learning_rate": 7.5142473912424975e-06, | |
| "loss": 0.32, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.8003904343582235, | |
| "grad_norm": 1.6463323831558228, | |
| "learning_rate": 7.499508226107889e-06, | |
| "loss": 0.308, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.8023426061493412, | |
| "grad_norm": 1.795111060142517, | |
| "learning_rate": 7.484740047624983e-06, | |
| "loss": 0.311, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.8042947779404588, | |
| "grad_norm": 1.662244200706482, | |
| "learning_rate": 7.469943027217222e-06, | |
| "loss": 0.3188, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.8062469497315764, | |
| "grad_norm": 1.9081573486328125, | |
| "learning_rate": 7.4551173366428355e-06, | |
| "loss": 0.2983, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.808199121522694, | |
| "grad_norm": 1.5372672080993652, | |
| "learning_rate": 7.440263147992844e-06, | |
| "loss": 0.3156, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.8101512933138116, | |
| "grad_norm": 1.4703730344772339, | |
| "learning_rate": 7.425380633689065e-06, | |
| "loss": 0.3093, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.8121034651049293, | |
| "grad_norm": 1.5877699851989746, | |
| "learning_rate": 7.4104699664821076e-06, | |
| "loss": 0.3129, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.8140556368960469, | |
| "grad_norm": 1.7468280792236328, | |
| "learning_rate": 7.395531319449372e-06, | |
| "loss": 0.2965, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.8160078086871645, | |
| "grad_norm": 1.7475582361221313, | |
| "learning_rate": 7.380564865993034e-06, | |
| "loss": 0.3188, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.8179599804782821, | |
| "grad_norm": 1.5171892642974854, | |
| "learning_rate": 7.3655707798380385e-06, | |
| "loss": 0.3098, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.8199121522693997, | |
| "grad_norm": 1.7682745456695557, | |
| "learning_rate": 7.35054923503008e-06, | |
| "loss": 0.2925, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.8218643240605173, | |
| "grad_norm": 1.6885298490524292, | |
| "learning_rate": 7.335500405933581e-06, | |
| "loss": 0.3171, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.823816495851635, | |
| "grad_norm": 1.499431848526001, | |
| "learning_rate": 7.320424467229673e-06, | |
| "loss": 0.292, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.8257686676427526, | |
| "grad_norm": 1.9566731452941895, | |
| "learning_rate": 7.305321593914163e-06, | |
| "loss": 0.3064, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.8277208394338702, | |
| "grad_norm": 1.6873054504394531, | |
| "learning_rate": 7.290191961295503e-06, | |
| "loss": 0.3157, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.8296730112249878, | |
| "grad_norm": 1.7769254446029663, | |
| "learning_rate": 7.275035744992762e-06, | |
| "loss": 0.3138, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.8316251830161054, | |
| "grad_norm": 1.712579607963562, | |
| "learning_rate": 7.2598531209335785e-06, | |
| "loss": 0.3131, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.8335773548072231, | |
| "grad_norm": 1.3291455507278442, | |
| "learning_rate": 7.2446442653521235e-06, | |
| "loss": 0.3207, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.8355295265983407, | |
| "grad_norm": 1.687888503074646, | |
| "learning_rate": 7.229409354787053e-06, | |
| "loss": 0.3274, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.8374816983894583, | |
| "grad_norm": 1.7257566452026367, | |
| "learning_rate": 7.2141485660794605e-06, | |
| "loss": 0.3261, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.8394338701805759, | |
| "grad_norm": 3.248084545135498, | |
| "learning_rate": 7.198862076370825e-06, | |
| "loss": 0.3004, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.8413860419716935, | |
| "grad_norm": 1.6880978345870972, | |
| "learning_rate": 7.183550063100946e-06, | |
| "loss": 0.2934, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.8433382137628112, | |
| "grad_norm": 1.704007863998413, | |
| "learning_rate": 7.168212704005899e-06, | |
| "loss": 0.3134, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.8452903855539288, | |
| "grad_norm": 2.11948823928833, | |
| "learning_rate": 7.1528501771159585e-06, | |
| "loss": 0.3172, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.8472425573450464, | |
| "grad_norm": 1.5055100917816162, | |
| "learning_rate": 7.137462660753542e-06, | |
| "loss": 0.3174, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.849194729136164, | |
| "grad_norm": 1.7868834733963013, | |
| "learning_rate": 7.122050333531132e-06, | |
| "loss": 0.3129, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.8511469009272816, | |
| "grad_norm": 1.6094841957092285, | |
| "learning_rate": 7.106613374349206e-06, | |
| "loss": 0.3069, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.8530990727183992, | |
| "grad_norm": 1.7572426795959473, | |
| "learning_rate": 7.0911519623941625e-06, | |
| "loss": 0.315, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.8550512445095169, | |
| "grad_norm": 1.4624701738357544, | |
| "learning_rate": 7.075666277136235e-06, | |
| "loss": 0.3014, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.8570034163006345, | |
| "grad_norm": 1.7297512292861938, | |
| "learning_rate": 7.060156498327417e-06, | |
| "loss": 0.314, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.8589555880917521, | |
| "grad_norm": 1.429887294769287, | |
| "learning_rate": 7.0446228059993675e-06, | |
| "loss": 0.2955, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.8609077598828697, | |
| "grad_norm": 1.4165374040603638, | |
| "learning_rate": 7.029065380461324e-06, | |
| "loss": 0.3195, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.8628599316739873, | |
| "grad_norm": 1.5273165702819824, | |
| "learning_rate": 7.013484402298014e-06, | |
| "loss": 0.2905, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.864812103465105, | |
| "grad_norm": 1.7598567008972168, | |
| "learning_rate": 6.997880052367549e-06, | |
| "loss": 0.3114, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.8667642752562226, | |
| "grad_norm": 1.361842393875122, | |
| "learning_rate": 6.98225251179934e-06, | |
| "loss": 0.2928, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.8687164470473402, | |
| "grad_norm": 1.6436288356781006, | |
| "learning_rate": 6.9666019619919765e-06, | |
| "loss": 0.321, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.8706686188384578, | |
| "grad_norm": 1.7562702894210815, | |
| "learning_rate": 6.950928584611135e-06, | |
| "loss": 0.3194, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.8726207906295754, | |
| "grad_norm": 1.9722226858139038, | |
| "learning_rate": 6.935232561587468e-06, | |
| "loss": 0.3087, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.874572962420693, | |
| "grad_norm": 1.77361261844635, | |
| "learning_rate": 6.9195140751144866e-06, | |
| "loss": 0.3186, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.8765251342118107, | |
| "grad_norm": 1.6900993585586548, | |
| "learning_rate": 6.903773307646449e-06, | |
| "loss": 0.2882, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.8784773060029283, | |
| "grad_norm": 1.5208570957183838, | |
| "learning_rate": 6.888010441896249e-06, | |
| "loss": 0.292, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.8804294777940459, | |
| "grad_norm": 1.6470943689346313, | |
| "learning_rate": 6.872225660833278e-06, | |
| "loss": 0.312, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.8823816495851635, | |
| "grad_norm": 1.5475988388061523, | |
| "learning_rate": 6.856419147681322e-06, | |
| "loss": 0.3135, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.8843338213762811, | |
| "grad_norm": 1.3954654932022095, | |
| "learning_rate": 6.84059108591642e-06, | |
| "loss": 0.2928, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.8862859931673988, | |
| "grad_norm": 1.4919439554214478, | |
| "learning_rate": 6.824741659264742e-06, | |
| "loss": 0.2879, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.8882381649585164, | |
| "grad_norm": 1.4646505117416382, | |
| "learning_rate": 6.808871051700447e-06, | |
| "loss": 0.3109, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.890190336749634, | |
| "grad_norm": 1.4592580795288086, | |
| "learning_rate": 6.792979447443565e-06, | |
| "loss": 0.3197, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.8921425085407516, | |
| "grad_norm": 1.6104093790054321, | |
| "learning_rate": 6.777067030957838e-06, | |
| "loss": 0.3099, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.8940946803318692, | |
| "grad_norm": 2.3115813732147217, | |
| "learning_rate": 6.7611339869485894e-06, | |
| "loss": 0.3143, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.8960468521229868, | |
| "grad_norm": 1.6102055311203003, | |
| "learning_rate": 6.745180500360589e-06, | |
| "loss": 0.2866, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.8979990239141045, | |
| "grad_norm": 1.5225039720535278, | |
| "learning_rate": 6.729206756375883e-06, | |
| "loss": 0.2921, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.8999511957052221, | |
| "grad_norm": 1.5761200189590454, | |
| "learning_rate": 6.713212940411665e-06, | |
| "loss": 0.3304, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.9019033674963397, | |
| "grad_norm": 1.7699685096740723, | |
| "learning_rate": 6.697199238118117e-06, | |
| "loss": 0.2961, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.9038555392874573, | |
| "grad_norm": 1.9456015825271606, | |
| "learning_rate": 6.681165835376252e-06, | |
| "loss": 0.3068, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.9058077110785749, | |
| "grad_norm": 1.614859700202942, | |
| "learning_rate": 6.665112918295759e-06, | |
| "loss": 0.3192, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.9077598828696926, | |
| "grad_norm": 1.859410047531128, | |
| "learning_rate": 6.64904067321284e-06, | |
| "loss": 0.2961, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.9097120546608102, | |
| "grad_norm": 1.799694299697876, | |
| "learning_rate": 6.632949286688053e-06, | |
| "loss": 0.2969, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.9116642264519278, | |
| "grad_norm": 1.6212577819824219, | |
| "learning_rate": 6.6168389455041405e-06, | |
| "loss": 0.3118, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.9136163982430454, | |
| "grad_norm": 1.8194401264190674, | |
| "learning_rate": 6.600709836663861e-06, | |
| "loss": 0.3017, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.915568570034163, | |
| "grad_norm": 1.459394097328186, | |
| "learning_rate": 6.58456214738783e-06, | |
| "loss": 0.3209, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.9175207418252807, | |
| "grad_norm": 1.4963152408599854, | |
| "learning_rate": 6.5683960651123234e-06, | |
| "loss": 0.3062, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.9194729136163983, | |
| "grad_norm": 1.6015785932540894, | |
| "learning_rate": 6.55221177748713e-06, | |
| "loss": 0.3105, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.9214250854075159, | |
| "grad_norm": 1.656490445137024, | |
| "learning_rate": 6.536009472373351e-06, | |
| "loss": 0.3111, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.9233772571986335, | |
| "grad_norm": 1.6450612545013428, | |
| "learning_rate": 6.5197893378412295e-06, | |
| "loss": 0.3049, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.9253294289897511, | |
| "grad_norm": 1.5392154455184937, | |
| "learning_rate": 6.503551562167969e-06, | |
| "loss": 0.291, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.9272816007808687, | |
| "grad_norm": 1.703579306602478, | |
| "learning_rate": 6.4872963338355386e-06, | |
| "loss": 0.293, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.9292337725719864, | |
| "grad_norm": 1.695459246635437, | |
| "learning_rate": 6.4710238415284985e-06, | |
| "loss": 0.3142, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.931185944363104, | |
| "grad_norm": 1.4692509174346924, | |
| "learning_rate": 6.454734274131796e-06, | |
| "loss": 0.2857, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.9331381161542216, | |
| "grad_norm": 1.5995670557022095, | |
| "learning_rate": 6.438427820728584e-06, | |
| "loss": 0.3114, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.9350902879453392, | |
| "grad_norm": 1.643405795097351, | |
| "learning_rate": 6.422104670598021e-06, | |
| "loss": 0.3017, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.9370424597364568, | |
| "grad_norm": 1.4569945335388184, | |
| "learning_rate": 6.405765013213073e-06, | |
| "loss": 0.3, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.9389946315275745, | |
| "grad_norm": 1.4433624744415283, | |
| "learning_rate": 6.389409038238317e-06, | |
| "loss": 0.303, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.9409468033186921, | |
| "grad_norm": 1.7255630493164062, | |
| "learning_rate": 6.37303693552774e-06, | |
| "loss": 0.2974, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.9428989751098097, | |
| "grad_norm": 1.4573408365249634, | |
| "learning_rate": 6.35664889512253e-06, | |
| "loss": 0.3217, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.9448511469009273, | |
| "grad_norm": 2.016216278076172, | |
| "learning_rate": 6.340245107248879e-06, | |
| "loss": 0.2707, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.9468033186920449, | |
| "grad_norm": 1.9193816184997559, | |
| "learning_rate": 6.323825762315765e-06, | |
| "loss": 0.2976, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.9487554904831625, | |
| "grad_norm": 1.5443313121795654, | |
| "learning_rate": 6.307391050912748e-06, | |
| "loss": 0.3106, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.9507076622742802, | |
| "grad_norm": 1.598844289779663, | |
| "learning_rate": 6.290941163807756e-06, | |
| "loss": 0.3063, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.9526598340653978, | |
| "grad_norm": 1.764891505241394, | |
| "learning_rate": 6.27447629194487e-06, | |
| "loss": 0.3202, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.9546120058565154, | |
| "grad_norm": 1.6712101697921753, | |
| "learning_rate": 6.257996626442113e-06, | |
| "loss": 0.3073, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.956564177647633, | |
| "grad_norm": 1.5691962242126465, | |
| "learning_rate": 6.241502358589222e-06, | |
| "loss": 0.2932, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.9585163494387506, | |
| "grad_norm": 1.6402193307876587, | |
| "learning_rate": 6.224993679845434e-06, | |
| "loss": 0.2886, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.9604685212298683, | |
| "grad_norm": 1.7932533025741577, | |
| "learning_rate": 6.2084707818372604e-06, | |
| "loss": 0.3047, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.9624206930209859, | |
| "grad_norm": 1.9926279783248901, | |
| "learning_rate": 6.1919338563562705e-06, | |
| "loss": 0.3092, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.9643728648121035, | |
| "grad_norm": 1.5863287448883057, | |
| "learning_rate": 6.175383095356852e-06, | |
| "loss": 0.3038, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.9663250366032211, | |
| "grad_norm": 1.7748479843139648, | |
| "learning_rate": 6.158818690953991e-06, | |
| "loss": 0.3006, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.9682772083943387, | |
| "grad_norm": 1.546392560005188, | |
| "learning_rate": 6.142240835421049e-06, | |
| "loss": 0.3345, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.9702293801854563, | |
| "grad_norm": 1.392622470855713, | |
| "learning_rate": 6.125649721187514e-06, | |
| "loss": 0.3015, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.972181551976574, | |
| "grad_norm": 1.4012292623519897, | |
| "learning_rate": 6.109045540836779e-06, | |
| "loss": 0.2938, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.9741337237676916, | |
| "grad_norm": 1.656556248664856, | |
| "learning_rate": 6.0924284871039055e-06, | |
| "loss": 0.2907, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.9760858955588092, | |
| "grad_norm": 1.6998143196105957, | |
| "learning_rate": 6.075798752873381e-06, | |
| "loss": 0.2955, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.9780380673499268, | |
| "grad_norm": 1.7401262521743774, | |
| "learning_rate": 6.059156531176887e-06, | |
| "loss": 0.2864, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.9799902391410444, | |
| "grad_norm": 1.6069300174713135, | |
| "learning_rate": 6.042502015191052e-06, | |
| "loss": 0.2836, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.9819424109321621, | |
| "grad_norm": 2.119791269302368, | |
| "learning_rate": 6.025835398235213e-06, | |
| "loss": 0.3022, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.9838945827232797, | |
| "grad_norm": 1.3919116258621216, | |
| "learning_rate": 6.009156873769172e-06, | |
| "loss": 0.3041, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.9858467545143973, | |
| "grad_norm": 1.7489839792251587, | |
| "learning_rate": 5.992466635390945e-06, | |
| "loss": 0.2841, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.9877989263055149, | |
| "grad_norm": 1.5278687477111816, | |
| "learning_rate": 5.975764876834522e-06, | |
| "loss": 0.2791, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.9897510980966325, | |
| "grad_norm": 1.8421188592910767, | |
| "learning_rate": 5.959051791967612e-06, | |
| "loss": 0.2885, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.9917032698877501, | |
| "grad_norm": 1.8238730430603027, | |
| "learning_rate": 5.942327574789402e-06, | |
| "loss": 0.3192, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.9936554416788678, | |
| "grad_norm": 1.5909425020217896, | |
| "learning_rate": 5.925592419428291e-06, | |
| "loss": 0.2717, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.9956076134699854, | |
| "grad_norm": 1.8577344417572021, | |
| "learning_rate": 5.908846520139646e-06, | |
| "loss": 0.3013, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.997559785261103, | |
| "grad_norm": 1.6016184091567993, | |
| "learning_rate": 5.892090071303551e-06, | |
| "loss": 0.3048, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.9995119570522206, | |
| "grad_norm": 1.6936761140823364, | |
| "learning_rate": 5.875323267422538e-06, | |
| "loss": 0.2856, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.0013665202537823, | |
| "grad_norm": 1.4396748542785645, | |
| "learning_rate": 5.858546303119341e-06, | |
| "loss": 0.2494, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.0033186920449, | |
| "grad_norm": 1.4734578132629395, | |
| "learning_rate": 5.841759373134629e-06, | |
| "loss": 0.2256, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.0052708638360175, | |
| "grad_norm": 1.3114964962005615, | |
| "learning_rate": 5.8249626723247535e-06, | |
| "loss": 0.2184, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.0072230356271352, | |
| "grad_norm": 1.84878671169281, | |
| "learning_rate": 5.808156395659475e-06, | |
| "loss": 0.2266, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.0091752074182527, | |
| "grad_norm": 1.5423600673675537, | |
| "learning_rate": 5.791340738219715e-06, | |
| "loss": 0.2255, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.0111273792093705, | |
| "grad_norm": 1.4672765731811523, | |
| "learning_rate": 5.774515895195279e-06, | |
| "loss": 0.238, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.013079551000488, | |
| "grad_norm": 1.5899931192398071, | |
| "learning_rate": 5.757682061882596e-06, | |
| "loss": 0.2272, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.0150317227916057, | |
| "grad_norm": 1.8665298223495483, | |
| "learning_rate": 5.740839433682449e-06, | |
| "loss": 0.2402, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.0169838945827232, | |
| "grad_norm": 1.8546682596206665, | |
| "learning_rate": 5.723988206097712e-06, | |
| "loss": 0.2261, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.018936066373841, | |
| "grad_norm": 1.6443926095962524, | |
| "learning_rate": 5.7071285747310755e-06, | |
| "loss": 0.2357, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.0208882381649584, | |
| "grad_norm": 1.82640540599823, | |
| "learning_rate": 5.69026073528278e-06, | |
| "loss": 0.2224, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.0228404099560762, | |
| "grad_norm": 1.2243527173995972, | |
| "learning_rate": 5.673384883548339e-06, | |
| "loss": 0.2236, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.0247925817471937, | |
| "grad_norm": 1.621323823928833, | |
| "learning_rate": 5.656501215416272e-06, | |
| "loss": 0.2343, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.0267447535383114, | |
| "grad_norm": 1.4708138704299927, | |
| "learning_rate": 5.639609926865825e-06, | |
| "loss": 0.2246, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.028696925329429, | |
| "grad_norm": 1.7992093563079834, | |
| "learning_rate": 5.6227112139647065e-06, | |
| "loss": 0.2215, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.0306490971205466, | |
| "grad_norm": 1.545155644416809, | |
| "learning_rate": 5.605805272866797e-06, | |
| "loss": 0.2244, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.0326012689116641, | |
| "grad_norm": 1.6702899932861328, | |
| "learning_rate": 5.58889229980988e-06, | |
| "loss": 0.2302, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.0345534407027819, | |
| "grad_norm": 1.704221487045288, | |
| "learning_rate": 5.571972491113364e-06, | |
| "loss": 0.2337, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.0365056124938994, | |
| "grad_norm": 1.6749101877212524, | |
| "learning_rate": 5.555046043176008e-06, | |
| "loss": 0.2181, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.0384577842850171, | |
| "grad_norm": 1.77487313747406, | |
| "learning_rate": 5.538113152473628e-06, | |
| "loss": 0.2168, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.0404099560761346, | |
| "grad_norm": 1.8465723991394043, | |
| "learning_rate": 5.521174015556832e-06, | |
| "loss": 0.2228, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.0423621278672524, | |
| "grad_norm": 1.8834266662597656, | |
| "learning_rate": 5.504228829048728e-06, | |
| "loss": 0.2024, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.0443142996583699, | |
| "grad_norm": 1.3431150913238525, | |
| "learning_rate": 5.487277789642648e-06, | |
| "loss": 0.2436, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.0462664714494876, | |
| "grad_norm": 1.867727518081665, | |
| "learning_rate": 5.470321094099859e-06, | |
| "loss": 0.2316, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.048218643240605, | |
| "grad_norm": 1.5301685333251953, | |
| "learning_rate": 5.453358939247285e-06, | |
| "loss": 0.2188, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.0501708150317228, | |
| "grad_norm": 1.3902242183685303, | |
| "learning_rate": 5.4363915219752214e-06, | |
| "loss": 0.2096, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.0521229868228403, | |
| "grad_norm": 1.7120879888534546, | |
| "learning_rate": 5.419419039235042e-06, | |
| "loss": 0.2264, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.054075158613958, | |
| "grad_norm": 2.1603755950927734, | |
| "learning_rate": 5.4024416880369245e-06, | |
| "loss": 0.2179, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.0560273304050756, | |
| "grad_norm": 1.4981776475906372, | |
| "learning_rate": 5.385459665447554e-06, | |
| "loss": 0.2334, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.0579795021961933, | |
| "grad_norm": 1.601750373840332, | |
| "learning_rate": 5.368473168587838e-06, | |
| "loss": 0.2306, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.0599316739873108, | |
| "grad_norm": 1.6521871089935303, | |
| "learning_rate": 5.351482394630626e-06, | |
| "loss": 0.2512, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.0618838457784285, | |
| "grad_norm": 1.3969298601150513, | |
| "learning_rate": 5.334487540798408e-06, | |
| "loss": 0.2308, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.063836017569546, | |
| "grad_norm": 1.5212770700454712, | |
| "learning_rate": 5.317488804361035e-06, | |
| "loss": 0.2054, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.0657881893606638, | |
| "grad_norm": 1.7573391199111938, | |
| "learning_rate": 5.300486382633428e-06, | |
| "loss": 0.2058, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.0677403611517813, | |
| "grad_norm": 1.4401905536651611, | |
| "learning_rate": 5.283480472973278e-06, | |
| "loss": 0.2237, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.069692532942899, | |
| "grad_norm": 1.5984746217727661, | |
| "learning_rate": 5.26647127277877e-06, | |
| "loss": 0.2115, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.0716447047340165, | |
| "grad_norm": 1.6980221271514893, | |
| "learning_rate": 5.249458979486281e-06, | |
| "loss": 0.2152, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.0735968765251342, | |
| "grad_norm": 1.834252953529358, | |
| "learning_rate": 5.232443790568091e-06, | |
| "loss": 0.253, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.0755490483162518, | |
| "grad_norm": 1.8841309547424316, | |
| "learning_rate": 5.215425903530093e-06, | |
| "loss": 0.238, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.0775012201073695, | |
| "grad_norm": 1.3869953155517578, | |
| "learning_rate": 5.198405515909497e-06, | |
| "loss": 0.2119, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.079453391898487, | |
| "grad_norm": 1.437673807144165, | |
| "learning_rate": 5.181382825272543e-06, | |
| "loss": 0.2126, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.0814055636896047, | |
| "grad_norm": 1.5257771015167236, | |
| "learning_rate": 5.1643580292121955e-06, | |
| "loss": 0.2389, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.0833577354807222, | |
| "grad_norm": 1.3782355785369873, | |
| "learning_rate": 5.1473313253458654e-06, | |
| "loss": 0.236, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.08530990727184, | |
| "grad_norm": 1.6246196031570435, | |
| "learning_rate": 5.130302911313109e-06, | |
| "loss": 0.2055, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.0872620790629575, | |
| "grad_norm": 1.501262903213501, | |
| "learning_rate": 5.113272984773325e-06, | |
| "loss": 0.2181, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.0892142508540752, | |
| "grad_norm": 1.7623234987258911, | |
| "learning_rate": 5.09624174340348e-06, | |
| "loss": 0.2255, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.0911664226451927, | |
| "grad_norm": 1.4687215089797974, | |
| "learning_rate": 5.079209384895791e-06, | |
| "loss": 0.2182, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.0931185944363104, | |
| "grad_norm": 1.4029916524887085, | |
| "learning_rate": 5.062176106955456e-06, | |
| "loss": 0.2195, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.095070766227428, | |
| "grad_norm": 1.3234246969223022, | |
| "learning_rate": 5.04514210729833e-06, | |
| "loss": 0.2282, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.0970229380185457, | |
| "grad_norm": 1.8046473264694214, | |
| "learning_rate": 5.028107583648659e-06, | |
| "loss": 0.2275, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.0989751098096632, | |
| "grad_norm": 1.8408907651901245, | |
| "learning_rate": 5.011072733736764e-06, | |
| "loss": 0.2095, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.100927281600781, | |
| "grad_norm": 1.4698619842529297, | |
| "learning_rate": 4.994037755296751e-06, | |
| "loss": 0.2454, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.1028794533918984, | |
| "grad_norm": 1.5274852514266968, | |
| "learning_rate": 4.9770028460642274e-06, | |
| "loss": 0.2353, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.1048316251830161, | |
| "grad_norm": 1.8901482820510864, | |
| "learning_rate": 4.959968203773987e-06, | |
| "loss": 0.2271, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.1067837969741336, | |
| "grad_norm": 1.4531559944152832, | |
| "learning_rate": 4.942934026157734e-06, | |
| "loss": 0.2076, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.1087359687652514, | |
| "grad_norm": 1.6990450620651245, | |
| "learning_rate": 4.925900510941769e-06, | |
| "loss": 0.2184, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.1106881405563689, | |
| "grad_norm": 1.2921063899993896, | |
| "learning_rate": 4.908867855844709e-06, | |
| "loss": 0.235, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.1126403123474866, | |
| "grad_norm": 1.931989073753357, | |
| "learning_rate": 4.891836258575195e-06, | |
| "loss": 0.2289, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.1145924841386041, | |
| "grad_norm": 1.7392537593841553, | |
| "learning_rate": 4.874805916829573e-06, | |
| "loss": 0.231, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.1165446559297219, | |
| "grad_norm": 1.6657911539077759, | |
| "learning_rate": 4.857777028289627e-06, | |
| "loss": 0.219, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.1184968277208394, | |
| "grad_norm": 1.6863371133804321, | |
| "learning_rate": 4.840749790620268e-06, | |
| "loss": 0.2076, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.120448999511957, | |
| "grad_norm": 1.742223858833313, | |
| "learning_rate": 4.823724401467248e-06, | |
| "loss": 0.2114, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.1224011713030746, | |
| "grad_norm": 1.8562861680984497, | |
| "learning_rate": 4.806701058454856e-06, | |
| "loss": 0.2187, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.1243533430941923, | |
| "grad_norm": 1.945334553718567, | |
| "learning_rate": 4.789679959183638e-06, | |
| "loss": 0.2399, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.1263055148853098, | |
| "grad_norm": 1.8333011865615845, | |
| "learning_rate": 4.772661301228088e-06, | |
| "loss": 0.2335, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.1282576866764276, | |
| "grad_norm": 1.5760397911071777, | |
| "learning_rate": 4.755645282134368e-06, | |
| "loss": 0.243, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.130209858467545, | |
| "grad_norm": 1.642067790031433, | |
| "learning_rate": 4.738632099418004e-06, | |
| "loss": 0.2297, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.1321620302586628, | |
| "grad_norm": 1.6263489723205566, | |
| "learning_rate": 4.721621950561604e-06, | |
| "loss": 0.2145, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.1341142020497803, | |
| "grad_norm": 1.5291143655776978, | |
| "learning_rate": 4.704615033012556e-06, | |
| "loss": 0.2263, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.136066373840898, | |
| "grad_norm": 1.674197793006897, | |
| "learning_rate": 4.687611544180741e-06, | |
| "loss": 0.231, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.1380185456320155, | |
| "grad_norm": 1.3444764614105225, | |
| "learning_rate": 4.670611681436242e-06, | |
| "loss": 0.2265, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.1399707174231333, | |
| "grad_norm": 1.430079698562622, | |
| "learning_rate": 4.6536156421070484e-06, | |
| "loss": 0.1948, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.1419228892142508, | |
| "grad_norm": 1.8119804859161377, | |
| "learning_rate": 4.636623623476775e-06, | |
| "loss": 0.2292, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.1438750610053685, | |
| "grad_norm": 2.2670273780822754, | |
| "learning_rate": 4.619635822782357e-06, | |
| "loss": 0.2045, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.145827232796486, | |
| "grad_norm": 1.7421070337295532, | |
| "learning_rate": 4.602652437211781e-06, | |
| "loss": 0.2306, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.1477794045876037, | |
| "grad_norm": 1.8024625778198242, | |
| "learning_rate": 4.585673663901773e-06, | |
| "loss": 0.2136, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.1497315763787213, | |
| "grad_norm": 1.860446572303772, | |
| "learning_rate": 4.5686996999355266e-06, | |
| "loss": 0.227, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.151683748169839, | |
| "grad_norm": 1.634185552597046, | |
| "learning_rate": 4.551730742340416e-06, | |
| "loss": 0.227, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.1536359199609565, | |
| "grad_norm": 1.6622986793518066, | |
| "learning_rate": 4.5347669880856895e-06, | |
| "loss": 0.2058, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.1555880917520742, | |
| "grad_norm": 1.7402809858322144, | |
| "learning_rate": 4.517808634080213e-06, | |
| "loss": 0.2249, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.1575402635431917, | |
| "grad_norm": 1.7279547452926636, | |
| "learning_rate": 4.500855877170155e-06, | |
| "loss": 0.2289, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.1594924353343095, | |
| "grad_norm": 1.647835612297058, | |
| "learning_rate": 4.483908914136723e-06, | |
| "loss": 0.2232, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.161444607125427, | |
| "grad_norm": 1.6166253089904785, | |
| "learning_rate": 4.4669679416938685e-06, | |
| "loss": 0.2209, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.1633967789165447, | |
| "grad_norm": 1.8101551532745361, | |
| "learning_rate": 4.450033156486007e-06, | |
| "loss": 0.2251, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.1653489507076622, | |
| "grad_norm": 1.6204791069030762, | |
| "learning_rate": 4.4331047550857345e-06, | |
| "loss": 0.1969, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.16730112249878, | |
| "grad_norm": 1.601891040802002, | |
| "learning_rate": 4.416182933991548e-06, | |
| "loss": 0.2364, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.1692532942898974, | |
| "grad_norm": 1.7196043729782104, | |
| "learning_rate": 4.3992678896255595e-06, | |
| "loss": 0.2312, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.1712054660810152, | |
| "grad_norm": 1.5971014499664307, | |
| "learning_rate": 4.382359818331221e-06, | |
| "loss": 0.2291, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.1731576378721327, | |
| "grad_norm": 1.803208351135254, | |
| "learning_rate": 4.365458916371046e-06, | |
| "loss": 0.2133, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.1751098096632504, | |
| "grad_norm": 2.963254690170288, | |
| "learning_rate": 4.348565379924324e-06, | |
| "loss": 0.2279, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.177061981454368, | |
| "grad_norm": 1.800583004951477, | |
| "learning_rate": 4.331679405084853e-06, | |
| "loss": 0.2223, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.1790141532454856, | |
| "grad_norm": 1.5035797357559204, | |
| "learning_rate": 4.3148011878586576e-06, | |
| "loss": 0.2112, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.1809663250366031, | |
| "grad_norm": 1.5907450914382935, | |
| "learning_rate": 4.297930924161714e-06, | |
| "loss": 0.2127, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.1829184968277209, | |
| "grad_norm": 1.705296277999878, | |
| "learning_rate": 4.281068809817675e-06, | |
| "loss": 0.2242, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.1848706686188384, | |
| "grad_norm": 2.0136845111846924, | |
| "learning_rate": 4.264215040555605e-06, | |
| "loss": 0.2279, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.1868228404099561, | |
| "grad_norm": 1.5842479467391968, | |
| "learning_rate": 4.247369812007692e-06, | |
| "loss": 0.2281, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.1887750122010736, | |
| "grad_norm": 1.4241150617599487, | |
| "learning_rate": 4.230533319706998e-06, | |
| "loss": 0.2223, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.1907271839921914, | |
| "grad_norm": 1.6764804124832153, | |
| "learning_rate": 4.213705759085172e-06, | |
| "loss": 0.2191, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.1926793557833089, | |
| "grad_norm": 1.4387422800064087, | |
| "learning_rate": 4.196887325470183e-06, | |
| "loss": 0.2173, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.1946315275744266, | |
| "grad_norm": 1.4490535259246826, | |
| "learning_rate": 4.180078214084068e-06, | |
| "loss": 0.2182, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.196583699365544, | |
| "grad_norm": 1.8612465858459473, | |
| "learning_rate": 4.1632786200406436e-06, | |
| "loss": 0.2179, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.1985358711566618, | |
| "grad_norm": 1.6195158958435059, | |
| "learning_rate": 4.146488738343263e-06, | |
| "loss": 0.2113, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.2004880429477793, | |
| "grad_norm": 1.405368685722351, | |
| "learning_rate": 4.129708763882533e-06, | |
| "loss": 0.2209, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.202440214738897, | |
| "grad_norm": 1.5172168016433716, | |
| "learning_rate": 4.112938891434069e-06, | |
| "loss": 0.2136, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.2043923865300146, | |
| "grad_norm": 1.6718852519989014, | |
| "learning_rate": 4.096179315656219e-06, | |
| "loss": 0.2088, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.2063445583211323, | |
| "grad_norm": 1.8428574800491333, | |
| "learning_rate": 4.079430231087815e-06, | |
| "loss": 0.2199, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.2082967301122498, | |
| "grad_norm": 1.4721224308013916, | |
| "learning_rate": 4.062691832145913e-06, | |
| "loss": 0.2124, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.2102489019033675, | |
| "grad_norm": 1.5123246908187866, | |
| "learning_rate": 4.045964313123528e-06, | |
| "loss": 0.2063, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.212201073694485, | |
| "grad_norm": 1.5970547199249268, | |
| "learning_rate": 4.029247868187392e-06, | |
| "loss": 0.2037, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.2141532454856028, | |
| "grad_norm": 1.618791103363037, | |
| "learning_rate": 4.012542691375688e-06, | |
| "loss": 0.2214, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.2161054172767203, | |
| "grad_norm": 1.6315085887908936, | |
| "learning_rate": 3.995848976595806e-06, | |
| "loss": 0.2201, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.218057589067838, | |
| "grad_norm": 1.8346765041351318, | |
| "learning_rate": 3.979166917622086e-06, | |
| "loss": 0.2167, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.2200097608589555, | |
| "grad_norm": 1.7975165843963623, | |
| "learning_rate": 3.962496708093575e-06, | |
| "loss": 0.2021, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.2219619326500732, | |
| "grad_norm": 11.739858627319336, | |
| "learning_rate": 3.945838541511773e-06, | |
| "loss": 0.2218, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.2239141044411908, | |
| "grad_norm": 1.684730887413025, | |
| "learning_rate": 3.929192611238395e-06, | |
| "loss": 0.2336, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.2258662762323085, | |
| "grad_norm": 1.6722686290740967, | |
| "learning_rate": 3.912559110493115e-06, | |
| "loss": 0.1999, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.227818448023426, | |
| "grad_norm": 2.155362844467163, | |
| "learning_rate": 3.895938232351333e-06, | |
| "loss": 0.2237, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.2297706198145437, | |
| "grad_norm": 1.7131150960922241, | |
| "learning_rate": 3.879330169741934e-06, | |
| "loss": 0.2108, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.2317227916056612, | |
| "grad_norm": 1.8110287189483643, | |
| "learning_rate": 3.862735115445039e-06, | |
| "loss": 0.2111, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.233674963396779, | |
| "grad_norm": 1.29988694190979, | |
| "learning_rate": 3.846153262089777e-06, | |
| "loss": 0.2032, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.2356271351878965, | |
| "grad_norm": 1.6154941320419312, | |
| "learning_rate": 3.829584802152042e-06, | |
| "loss": 0.2116, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.2375793069790142, | |
| "grad_norm": 1.7335631847381592, | |
| "learning_rate": 3.8130299279522696e-06, | |
| "loss": 0.2237, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.2395314787701317, | |
| "grad_norm": 1.5450068712234497, | |
| "learning_rate": 3.796488831653187e-06, | |
| "loss": 0.2048, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.2414836505612494, | |
| "grad_norm": 1.7847269773483276, | |
| "learning_rate": 3.779961705257605e-06, | |
| "loss": 0.2205, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.243435822352367, | |
| "grad_norm": 1.9442840814590454, | |
| "learning_rate": 3.763448740606164e-06, | |
| "loss": 0.2263, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.2453879941434847, | |
| "grad_norm": 1.4432337284088135, | |
| "learning_rate": 3.7469501293751277e-06, | |
| "loss": 0.2264, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.2473401659346022, | |
| "grad_norm": 1.4733388423919678, | |
| "learning_rate": 3.730466063074154e-06, | |
| "loss": 0.2218, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.24929233772572, | |
| "grad_norm": 1.5178956985473633, | |
| "learning_rate": 3.713996733044059e-06, | |
| "loss": 0.2049, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.2512445095168374, | |
| "grad_norm": 1.6209615468978882, | |
| "learning_rate": 3.6975423304546142e-06, | |
| "loss": 0.2179, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.2531966813079551, | |
| "grad_norm": 1.591838002204895, | |
| "learning_rate": 3.6811030463023133e-06, | |
| "loss": 0.2227, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.2551488530990726, | |
| "grad_norm": 1.7100849151611328, | |
| "learning_rate": 3.664679071408166e-06, | |
| "loss": 0.23, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.2571010248901904, | |
| "grad_norm": 1.591233491897583, | |
| "learning_rate": 3.648270596415473e-06, | |
| "loss": 0.2248, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.2590531966813079, | |
| "grad_norm": 1.4432621002197266, | |
| "learning_rate": 3.6318778117876225e-06, | |
| "loss": 0.2202, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.2610053684724256, | |
| "grad_norm": 1.7753307819366455, | |
| "learning_rate": 3.61550090780587e-06, | |
| "loss": 0.2052, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.2629575402635431, | |
| "grad_norm": 1.6676727533340454, | |
| "learning_rate": 3.5991400745671384e-06, | |
| "loss": 0.2075, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.2649097120546609, | |
| "grad_norm": 1.4981262683868408, | |
| "learning_rate": 3.5827955019818072e-06, | |
| "loss": 0.2182, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.2668618838457784, | |
| "grad_norm": 1.5867663621902466, | |
| "learning_rate": 3.5664673797715056e-06, | |
| "loss": 0.2183, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.268814055636896, | |
| "grad_norm": 1.8115100860595703, | |
| "learning_rate": 3.550155897466917e-06, | |
| "loss": 0.2133, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.2707662274280136, | |
| "grad_norm": 1.7083834409713745, | |
| "learning_rate": 3.5338612444055697e-06, | |
| "loss": 0.2025, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.2727183992191313, | |
| "grad_norm": 1.6217999458312988, | |
| "learning_rate": 3.5175836097296504e-06, | |
| "loss": 0.2121, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.2746705710102488, | |
| "grad_norm": 1.762628197669983, | |
| "learning_rate": 3.5013231823837985e-06, | |
| "loss": 0.2122, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.2766227428013666, | |
| "grad_norm": 1.7376768589019775, | |
| "learning_rate": 3.4850801511129205e-06, | |
| "loss": 0.2168, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.278574914592484, | |
| "grad_norm": 2.347261667251587, | |
| "learning_rate": 3.468854704459991e-06, | |
| "loss": 0.2058, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.2805270863836018, | |
| "grad_norm": 1.9372010231018066, | |
| "learning_rate": 3.452647030763876e-06, | |
| "loss": 0.2009, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.2824792581747193, | |
| "grad_norm": 1.5644526481628418, | |
| "learning_rate": 3.436457318157131e-06, | |
| "loss": 0.2066, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.284431429965837, | |
| "grad_norm": 2.102677345275879, | |
| "learning_rate": 3.4202857545638346e-06, | |
| "loss": 0.2302, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.2863836017569545, | |
| "grad_norm": 1.641678810119629, | |
| "learning_rate": 3.4041325276973945e-06, | |
| "loss": 0.193, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.2883357735480723, | |
| "grad_norm": 1.6585208177566528, | |
| "learning_rate": 3.38799782505837e-06, | |
| "loss": 0.223, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.2902879453391898, | |
| "grad_norm": 1.4569475650787354, | |
| "learning_rate": 3.3718818339323058e-06, | |
| "loss": 0.2145, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.2922401171303075, | |
| "grad_norm": 1.6879363059997559, | |
| "learning_rate": 3.355784741387539e-06, | |
| "loss": 0.2171, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.294192288921425, | |
| "grad_norm": 1.4503649473190308, | |
| "learning_rate": 3.3397067342730504e-06, | |
| "loss": 0.2055, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.2961444607125427, | |
| "grad_norm": 1.8341500759124756, | |
| "learning_rate": 3.323647999216278e-06, | |
| "loss": 0.2065, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.2980966325036603, | |
| "grad_norm": 1.490210771560669, | |
| "learning_rate": 3.307608722620959e-06, | |
| "loss": 0.1857, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.300048804294778, | |
| "grad_norm": 1.4428335428237915, | |
| "learning_rate": 3.2915890906649628e-06, | |
| "loss": 0.2097, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.3020009760858955, | |
| "grad_norm": 1.6432727575302124, | |
| "learning_rate": 3.2755892892981323e-06, | |
| "loss": 0.205, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.3039531478770132, | |
| "grad_norm": 2.000216484069824, | |
| "learning_rate": 3.2596095042401256e-06, | |
| "loss": 0.2398, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.3059053196681307, | |
| "grad_norm": 1.362533450126648, | |
| "learning_rate": 3.2436499209782557e-06, | |
| "loss": 0.2083, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.3078574914592485, | |
| "grad_norm": 2.0070676803588867, | |
| "learning_rate": 3.227710724765345e-06, | |
| "loss": 0.2052, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.309809663250366, | |
| "grad_norm": 1.47205650806427, | |
| "learning_rate": 3.211792100617566e-06, | |
| "loss": 0.2149, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.3117618350414837, | |
| "grad_norm": 1.3983055353164673, | |
| "learning_rate": 3.1958942333123035e-06, | |
| "loss": 0.2337, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.3137140068326012, | |
| "grad_norm": 1.6914767026901245, | |
| "learning_rate": 3.1800173073859995e-06, | |
| "loss": 0.2068, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.315666178623719, | |
| "grad_norm": 1.351125717163086, | |
| "learning_rate": 3.164161507132021e-06, | |
| "loss": 0.1956, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.3176183504148364, | |
| "grad_norm": 1.5504491329193115, | |
| "learning_rate": 3.1483270165985124e-06, | |
| "loss": 0.2372, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.3195705222059542, | |
| "grad_norm": 1.9348682165145874, | |
| "learning_rate": 3.1325140195862664e-06, | |
| "loss": 0.2137, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.3215226939970717, | |
| "grad_norm": 1.408018708229065, | |
| "learning_rate": 3.1167226996465847e-06, | |
| "loss": 0.1905, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.3234748657881894, | |
| "grad_norm": 2.0732595920562744, | |
| "learning_rate": 3.10095324007915e-06, | |
| "loss": 0.2192, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.325427037579307, | |
| "grad_norm": 1.3288345336914062, | |
| "learning_rate": 3.085205823929899e-06, | |
| "loss": 0.2122, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.3273792093704246, | |
| "grad_norm": 1.5556180477142334, | |
| "learning_rate": 3.069480633988895e-06, | |
| "loss": 0.2007, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.3293313811615421, | |
| "grad_norm": 1.4359447956085205, | |
| "learning_rate": 3.053777852788211e-06, | |
| "loss": 0.204, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.3312835529526599, | |
| "grad_norm": 1.696527123451233, | |
| "learning_rate": 3.0380976625998014e-06, | |
| "loss": 0.2093, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.3332357247437774, | |
| "grad_norm": 1.706363320350647, | |
| "learning_rate": 3.022440245433403e-06, | |
| "loss": 0.1934, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.3351878965348951, | |
| "grad_norm": 1.193893551826477, | |
| "learning_rate": 3.0068057830343998e-06, | |
| "loss": 0.1961, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.3371400683260126, | |
| "grad_norm": 1.2589608430862427, | |
| "learning_rate": 2.991194456881737e-06, | |
| "loss": 0.2085, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.3390922401171304, | |
| "grad_norm": 1.5599446296691895, | |
| "learning_rate": 2.9756064481857937e-06, | |
| "loss": 0.2108, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.3410444119082479, | |
| "grad_norm": 1.5846322774887085, | |
| "learning_rate": 2.9600419378862925e-06, | |
| "loss": 0.1868, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.3429965836993656, | |
| "grad_norm": 1.6705174446105957, | |
| "learning_rate": 2.9445011066502015e-06, | |
| "loss": 0.2093, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.344948755490483, | |
| "grad_norm": 1.7667570114135742, | |
| "learning_rate": 2.928984134869619e-06, | |
| "loss": 0.2148, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.3469009272816008, | |
| "grad_norm": 1.7263866662979126, | |
| "learning_rate": 2.9134912026596995e-06, | |
| "loss": 0.1983, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.3488530990727183, | |
| "grad_norm": 1.8353338241577148, | |
| "learning_rate": 2.8980224898565555e-06, | |
| "loss": 0.2074, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.350805270863836, | |
| "grad_norm": 1.8815158605575562, | |
| "learning_rate": 2.8825781760151693e-06, | |
| "loss": 0.2097, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.3527574426549536, | |
| "grad_norm": 1.6498256921768188, | |
| "learning_rate": 2.8671584404073037e-06, | |
| "loss": 0.1941, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.3547096144460713, | |
| "grad_norm": 1.5063817501068115, | |
| "learning_rate": 2.8517634620194358e-06, | |
| "loss": 0.1977, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.3566617862371888, | |
| "grad_norm": 2.3448615074157715, | |
| "learning_rate": 2.836393419550661e-06, | |
| "loss": 0.1946, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.3586139580283065, | |
| "grad_norm": 1.4969979524612427, | |
| "learning_rate": 2.821048491410632e-06, | |
| "loss": 0.2039, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.360566129819424, | |
| "grad_norm": 1.774389386177063, | |
| "learning_rate": 2.8057288557174905e-06, | |
| "loss": 0.1783, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.3625183016105418, | |
| "grad_norm": 1.4157977104187012, | |
| "learning_rate": 2.790434690295781e-06, | |
| "loss": 0.2256, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.3644704734016593, | |
| "grad_norm": 1.6641570329666138, | |
| "learning_rate": 2.7751661726744083e-06, | |
| "loss": 0.2009, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.366422645192777, | |
| "grad_norm": 1.6484465599060059, | |
| "learning_rate": 2.75992348008456e-06, | |
| "loss": 0.2023, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.3683748169838945, | |
| "grad_norm": 1.6529380083084106, | |
| "learning_rate": 2.74470678945766e-06, | |
| "loss": 0.204, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.3703269887750122, | |
| "grad_norm": 1.4806594848632812, | |
| "learning_rate": 2.729516277423313e-06, | |
| "loss": 0.2004, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.3722791605661298, | |
| "grad_norm": 1.3869143724441528, | |
| "learning_rate": 2.714352120307252e-06, | |
| "loss": 0.2119, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.3742313323572475, | |
| "grad_norm": 1.3637787103652954, | |
| "learning_rate": 2.699214494129286e-06, | |
| "loss": 0.2128, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.376183504148365, | |
| "grad_norm": 1.4802008867263794, | |
| "learning_rate": 2.68410357460127e-06, | |
| "loss": 0.2053, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.3781356759394827, | |
| "grad_norm": 1.6326429843902588, | |
| "learning_rate": 2.669019537125056e-06, | |
| "loss": 0.2159, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.3800878477306002, | |
| "grad_norm": 1.4315476417541504, | |
| "learning_rate": 2.653962556790458e-06, | |
| "loss": 0.2104, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.382040019521718, | |
| "grad_norm": 1.6744425296783447, | |
| "learning_rate": 2.638932808373226e-06, | |
| "loss": 0.2073, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.3839921913128355, | |
| "grad_norm": 1.5166488885879517, | |
| "learning_rate": 2.623930466333002e-06, | |
| "loss": 0.2004, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.3859443631039532, | |
| "grad_norm": 1.4280526638031006, | |
| "learning_rate": 2.608955704811314e-06, | |
| "loss": 0.1921, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.3878965348950707, | |
| "grad_norm": 1.697077751159668, | |
| "learning_rate": 2.594008697629543e-06, | |
| "loss": 0.2176, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.3898487066861884, | |
| "grad_norm": 1.7205731868743896, | |
| "learning_rate": 2.5790896182869106e-06, | |
| "loss": 0.2108, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.391800878477306, | |
| "grad_norm": 1.5982770919799805, | |
| "learning_rate": 2.564198639958456e-06, | |
| "loss": 0.1982, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.3937530502684237, | |
| "grad_norm": 1.3759297132492065, | |
| "learning_rate": 2.5493359354930404e-06, | |
| "loss": 0.1954, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.3957052220595412, | |
| "grad_norm": 1.6432647705078125, | |
| "learning_rate": 2.5345016774113223e-06, | |
| "loss": 0.2006, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.397657393850659, | |
| "grad_norm": 1.5497634410858154, | |
| "learning_rate": 2.5196960379037783e-06, | |
| "loss": 0.1987, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.3996095656417764, | |
| "grad_norm": 1.5066088438034058, | |
| "learning_rate": 2.5049191888286846e-06, | |
| "loss": 0.1854, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.4015617374328941, | |
| "grad_norm": 1.6009175777435303, | |
| "learning_rate": 2.490171301710125e-06, | |
| "loss": 0.2093, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.4035139092240116, | |
| "grad_norm": 1.9166560173034668, | |
| "learning_rate": 2.475452547736013e-06, | |
| "loss": 0.2036, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.4054660810151294, | |
| "grad_norm": 1.4790300130844116, | |
| "learning_rate": 2.460763097756086e-06, | |
| "loss": 0.1901, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.4074182528062469, | |
| "grad_norm": 1.6410057544708252, | |
| "learning_rate": 2.446103122279938e-06, | |
| "loss": 0.1968, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.4093704245973646, | |
| "grad_norm": 1.4774519205093384, | |
| "learning_rate": 2.431472791475033e-06, | |
| "loss": 0.1826, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.4113225963884821, | |
| "grad_norm": 1.4628077745437622, | |
| "learning_rate": 2.416872275164732e-06, | |
| "loss": 0.1876, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.4132747681795998, | |
| "grad_norm": 1.589036464691162, | |
| "learning_rate": 2.402301742826314e-06, | |
| "loss": 0.2032, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.4152269399707174, | |
| "grad_norm": 1.8331555128097534, | |
| "learning_rate": 2.3877613635890233e-06, | |
| "loss": 0.2018, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.417179111761835, | |
| "grad_norm": 1.4858099222183228, | |
| "learning_rate": 2.373251306232095e-06, | |
| "loss": 0.1809, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.4191312835529526, | |
| "grad_norm": 1.6922942399978638, | |
| "learning_rate": 2.3587717391827997e-06, | |
| "loss": 0.2195, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.4210834553440703, | |
| "grad_norm": 1.8440519571304321, | |
| "learning_rate": 2.344322830514489e-06, | |
| "loss": 0.1992, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.4230356271351878, | |
| "grad_norm": 1.589641809463501, | |
| "learning_rate": 2.329904747944639e-06, | |
| "loss": 0.1952, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.4249877989263056, | |
| "grad_norm": 2.5631532669067383, | |
| "learning_rate": 2.315517658832914e-06, | |
| "loss": 0.2036, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.426939970717423, | |
| "grad_norm": 1.4761751890182495, | |
| "learning_rate": 2.3011617301792144e-06, | |
| "loss": 0.2157, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.4288921425085408, | |
| "grad_norm": 1.3230587244033813, | |
| "learning_rate": 2.2868371286217458e-06, | |
| "loss": 0.2074, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.4308443142996583, | |
| "grad_norm": 1.5246955156326294, | |
| "learning_rate": 2.272544020435073e-06, | |
| "loss": 0.1976, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.432796486090776, | |
| "grad_norm": 1.3427962064743042, | |
| "learning_rate": 2.2582825715282043e-06, | |
| "loss": 0.199, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.4347486578818935, | |
| "grad_norm": 1.5608947277069092, | |
| "learning_rate": 2.2440529474426576e-06, | |
| "loss": 0.2091, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.4367008296730113, | |
| "grad_norm": 1.4694558382034302, | |
| "learning_rate": 2.229855313350539e-06, | |
| "loss": 0.2016, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.4386530014641288, | |
| "grad_norm": 1.3749107122421265, | |
| "learning_rate": 2.21568983405263e-06, | |
| "loss": 0.1969, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.4406051732552465, | |
| "grad_norm": 1.803125262260437, | |
| "learning_rate": 2.2015566739764647e-06, | |
| "loss": 0.1825, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.442557345046364, | |
| "grad_norm": 1.7458932399749756, | |
| "learning_rate": 2.187455997174437e-06, | |
| "loss": 0.2274, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.4445095168374817, | |
| "grad_norm": 1.9437659978866577, | |
| "learning_rate": 2.1733879673218754e-06, | |
| "loss": 0.1892, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.4464616886285993, | |
| "grad_norm": 1.3329747915267944, | |
| "learning_rate": 2.15935274771517e-06, | |
| "loss": 0.1908, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.448413860419717, | |
| "grad_norm": 1.2210984230041504, | |
| "learning_rate": 2.145350501269848e-06, | |
| "loss": 0.2083, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.4503660322108345, | |
| "grad_norm": 1.6458992958068848, | |
| "learning_rate": 2.1313813905187057e-06, | |
| "loss": 0.1919, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.4523182040019522, | |
| "grad_norm": 1.724516749382019, | |
| "learning_rate": 2.117445577609907e-06, | |
| "loss": 0.2122, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.4542703757930697, | |
| "grad_norm": 1.9065065383911133, | |
| "learning_rate": 2.103543224305108e-06, | |
| "loss": 0.1766, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.4562225475841875, | |
| "grad_norm": 1.6535025835037231, | |
| "learning_rate": 2.0896744919775857e-06, | |
| "loss": 0.2011, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.458174719375305, | |
| "grad_norm": 1.4374114274978638, | |
| "learning_rate": 2.075839541610347e-06, | |
| "loss": 0.1874, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.4601268911664227, | |
| "grad_norm": 1.3221102952957153, | |
| "learning_rate": 2.062038533794278e-06, | |
| "loss": 0.1931, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.4620790629575402, | |
| "grad_norm": 1.524937391281128, | |
| "learning_rate": 2.0482716287262655e-06, | |
| "loss": 0.2148, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.464031234748658, | |
| "grad_norm": 1.4761067628860474, | |
| "learning_rate": 2.0345389862073515e-06, | |
| "loss": 0.1927, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.4659834065397754, | |
| "grad_norm": 1.3123400211334229, | |
| "learning_rate": 2.020840765640868e-06, | |
| "loss": 0.2027, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.4679355783308932, | |
| "grad_norm": 1.5982189178466797, | |
| "learning_rate": 2.0071771260305917e-06, | |
| "loss": 0.2187, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.4698877501220107, | |
| "grad_norm": 1.5544073581695557, | |
| "learning_rate": 1.993548225978892e-06, | |
| "loss": 0.1969, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.4718399219131284, | |
| "grad_norm": 1.753106951713562, | |
| "learning_rate": 1.9799542236848996e-06, | |
| "loss": 0.204, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.473792093704246, | |
| "grad_norm": 1.5062081813812256, | |
| "learning_rate": 1.966395276942663e-06, | |
| "loss": 0.1828, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.4757442654953636, | |
| "grad_norm": 1.4779527187347412, | |
| "learning_rate": 1.95287154313932e-06, | |
| "loss": 0.2011, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.4776964372864811, | |
| "grad_norm": 1.6511406898498535, | |
| "learning_rate": 1.9393831792532714e-06, | |
| "loss": 0.1944, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.4796486090775989, | |
| "grad_norm": 2.017911672592163, | |
| "learning_rate": 1.9259303418523505e-06, | |
| "loss": 0.1881, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.4816007808687164, | |
| "grad_norm": 1.4355378150939941, | |
| "learning_rate": 1.9125131870920212e-06, | |
| "loss": 0.1973, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.4835529526598341, | |
| "grad_norm": 1.6479721069335938, | |
| "learning_rate": 1.8991318707135515e-06, | |
| "loss": 0.1911, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.4855051244509516, | |
| "grad_norm": 1.593674659729004, | |
| "learning_rate": 1.8857865480422143e-06, | |
| "loss": 0.1977, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.4874572962420693, | |
| "grad_norm": 1.2697453498840332, | |
| "learning_rate": 1.8724773739854763e-06, | |
| "loss": 0.1861, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.4894094680331869, | |
| "grad_norm": 1.7640104293823242, | |
| "learning_rate": 1.8592045030312094e-06, | |
| "loss": 0.2057, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.4913616398243046, | |
| "grad_norm": 1.9129244089126587, | |
| "learning_rate": 1.8459680892458932e-06, | |
| "loss": 0.1847, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.493313811615422, | |
| "grad_norm": 1.7413532733917236, | |
| "learning_rate": 1.8327682862728174e-06, | |
| "loss": 0.217, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.4952659834065398, | |
| "grad_norm": 1.9197639226913452, | |
| "learning_rate": 1.8196052473303227e-06, | |
| "loss": 0.2086, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.4972181551976573, | |
| "grad_norm": 1.4320746660232544, | |
| "learning_rate": 1.8064791252099923e-06, | |
| "loss": 0.2005, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.499170326988775, | |
| "grad_norm": 1.49395751953125, | |
| "learning_rate": 1.793390072274902e-06, | |
| "loss": 0.1783, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.5011224987798926, | |
| "grad_norm": 1.5681458711624146, | |
| "learning_rate": 1.7803382404578356e-06, | |
| "loss": 0.1857, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.5030746705710103, | |
| "grad_norm": 1.4596507549285889, | |
| "learning_rate": 1.7673237812595334e-06, | |
| "loss": 0.2048, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.505026842362128, | |
| "grad_norm": 1.6496530771255493, | |
| "learning_rate": 1.7543468457469264e-06, | |
| "loss": 0.2015, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.5069790141532455, | |
| "grad_norm": 1.67415452003479, | |
| "learning_rate": 1.741407584551388e-06, | |
| "loss": 0.1899, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.508931185944363, | |
| "grad_norm": 1.4106497764587402, | |
| "learning_rate": 1.728506147866975e-06, | |
| "loss": 0.1966, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.5108833577354808, | |
| "grad_norm": 1.4033384323120117, | |
| "learning_rate": 1.715642685448698e-06, | |
| "loss": 0.1923, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.5128355295265985, | |
| "grad_norm": 1.6058449745178223, | |
| "learning_rate": 1.7028173466107756e-06, | |
| "loss": 0.1808, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.514787701317716, | |
| "grad_norm": 1.6106737852096558, | |
| "learning_rate": 1.6900302802249002e-06, | |
| "loss": 0.1939, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.5167398731088335, | |
| "grad_norm": 1.6454293727874756, | |
| "learning_rate": 1.6772816347185155e-06, | |
| "loss": 0.2005, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.5186920448999512, | |
| "grad_norm": 1.386349081993103, | |
| "learning_rate": 1.6645715580730842e-06, | |
| "loss": 0.1964, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.520644216691069, | |
| "grad_norm": 1.464933156967163, | |
| "learning_rate": 1.651900197822382e-06, | |
| "loss": 0.1882, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.5225963884821865, | |
| "grad_norm": 1.5540884733200073, | |
| "learning_rate": 1.6392677010507768e-06, | |
| "loss": 0.1997, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.524548560273304, | |
| "grad_norm": 1.8291113376617432, | |
| "learning_rate": 1.626674214391526e-06, | |
| "loss": 0.2043, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.5265007320644217, | |
| "grad_norm": 1.2215909957885742, | |
| "learning_rate": 1.6141198840250672e-06, | |
| "loss": 0.1876, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.5284529038555394, | |
| "grad_norm": 1.7053108215332031, | |
| "learning_rate": 1.6016048556773318e-06, | |
| "loss": 0.1926, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.530405075646657, | |
| "grad_norm": 2.2207212448120117, | |
| "learning_rate": 1.5891292746180453e-06, | |
| "loss": 0.1906, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.5323572474377745, | |
| "grad_norm": 1.6570477485656738, | |
| "learning_rate": 1.5766932856590467e-06, | |
| "loss": 0.1955, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.5343094192288922, | |
| "grad_norm": 3.652426242828369, | |
| "learning_rate": 1.564297033152603e-06, | |
| "loss": 0.1795, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.53626159102001, | |
| "grad_norm": 1.6970198154449463, | |
| "learning_rate": 1.5519406609897337e-06, | |
| "loss": 0.2021, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.5382137628111274, | |
| "grad_norm": 1.788756251335144, | |
| "learning_rate": 1.5396243125985467e-06, | |
| "loss": 0.2041, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.540165934602245, | |
| "grad_norm": 1.713059902191162, | |
| "learning_rate": 1.5273481309425614e-06, | |
| "loss": 0.2039, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.5421181063933627, | |
| "grad_norm": 1.5806763172149658, | |
| "learning_rate": 1.5151122585190697e-06, | |
| "loss": 0.1787, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.5440702781844804, | |
| "grad_norm": 1.6198954582214355, | |
| "learning_rate": 1.5029168373574553e-06, | |
| "loss": 0.2025, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.546022449975598, | |
| "grad_norm": 1.5160976648330688, | |
| "learning_rate": 1.4907620090175678e-06, | |
| "loss": 0.1842, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.5479746217667154, | |
| "grad_norm": 1.5395866632461548, | |
| "learning_rate": 1.4786479145880684e-06, | |
| "loss": 0.1876, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.5499267935578331, | |
| "grad_norm": 2.082068681716919, | |
| "learning_rate": 1.466574694684792e-06, | |
| "loss": 0.1865, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.5518789653489509, | |
| "grad_norm": 1.5307166576385498, | |
| "learning_rate": 1.45454248944912e-06, | |
| "loss": 0.2111, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.5538311371400684, | |
| "grad_norm": 1.3423751592636108, | |
| "learning_rate": 1.4425514385463513e-06, | |
| "loss": 0.2022, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.5557833089311859, | |
| "grad_norm": 1.5550248622894287, | |
| "learning_rate": 1.4306016811640804e-06, | |
| "loss": 0.1882, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.5577354807223036, | |
| "grad_norm": 1.5491834878921509, | |
| "learning_rate": 1.4186933560105798e-06, | |
| "loss": 0.1936, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.5596876525134213, | |
| "grad_norm": 1.489673137664795, | |
| "learning_rate": 1.4068266013131954e-06, | |
| "loss": 0.1961, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.5616398243045388, | |
| "grad_norm": 1.7777575254440308, | |
| "learning_rate": 1.3950015548167372e-06, | |
| "loss": 0.1868, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.5635919960956564, | |
| "grad_norm": 1.5120434761047363, | |
| "learning_rate": 1.383218353781885e-06, | |
| "loss": 0.1861, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.565544167886774, | |
| "grad_norm": 1.5217742919921875, | |
| "learning_rate": 1.3714771349835871e-06, | |
| "loss": 0.1991, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.5674963396778918, | |
| "grad_norm": 1.8366824388504028, | |
| "learning_rate": 1.3597780347094814e-06, | |
| "loss": 0.1924, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.5694485114690093, | |
| "grad_norm": 1.8791022300720215, | |
| "learning_rate": 1.3481211887583101e-06, | |
| "loss": 0.1798, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.5714006832601268, | |
| "grad_norm": 1.4695504903793335, | |
| "learning_rate": 1.3365067324383418e-06, | |
| "loss": 0.1861, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.5733528550512446, | |
| "grad_norm": 1.619231939315796, | |
| "learning_rate": 1.3249348005658047e-06, | |
| "loss": 0.2067, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.5753050268423623, | |
| "grad_norm": 1.5700544118881226, | |
| "learning_rate": 1.3134055274633135e-06, | |
| "loss": 0.177, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.5772571986334798, | |
| "grad_norm": 1.4808869361877441, | |
| "learning_rate": 1.3019190469583238e-06, | |
| "loss": 0.1878, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.5792093704245973, | |
| "grad_norm": 1.8071080446243286, | |
| "learning_rate": 1.2904754923815615e-06, | |
| "loss": 0.1895, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.581161542215715, | |
| "grad_norm": 1.6094269752502441, | |
| "learning_rate": 1.2790749965654964e-06, | |
| "loss": 0.2031, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.5831137140068328, | |
| "grad_norm": 1.8464001417160034, | |
| "learning_rate": 1.2677176918427769e-06, | |
| "loss": 0.1908, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.5850658857979503, | |
| "grad_norm": 1.3580669164657593, | |
| "learning_rate": 1.256403710044713e-06, | |
| "loss": 0.1877, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.5870180575890678, | |
| "grad_norm": 1.6515955924987793, | |
| "learning_rate": 1.245133182499737e-06, | |
| "loss": 0.1926, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.5889702293801855, | |
| "grad_norm": 1.3067126274108887, | |
| "learning_rate": 1.2339062400318746e-06, | |
| "loss": 0.1841, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.5909224011713032, | |
| "grad_norm": 1.7133537530899048, | |
| "learning_rate": 1.222723012959245e-06, | |
| "loss": 0.192, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.5928745729624207, | |
| "grad_norm": 1.7153127193450928, | |
| "learning_rate": 1.2115836310925222e-06, | |
| "loss": 0.1875, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.5948267447535383, | |
| "grad_norm": 1.5071845054626465, | |
| "learning_rate": 1.2004882237334508e-06, | |
| "loss": 0.1881, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.596778916544656, | |
| "grad_norm": 1.383007526397705, | |
| "learning_rate": 1.1894369196733296e-06, | |
| "loss": 0.1963, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.5987310883357737, | |
| "grad_norm": 2.016113042831421, | |
| "learning_rate": 1.1784298471915279e-06, | |
| "loss": 0.192, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.6006832601268912, | |
| "grad_norm": 1.7588931322097778, | |
| "learning_rate": 1.1674671340539895e-06, | |
| "loss": 0.1763, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.6026354319180087, | |
| "grad_norm": 1.6065738201141357, | |
| "learning_rate": 1.156548907511751e-06, | |
| "loss": 0.1782, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.6045876037091265, | |
| "grad_norm": 1.385704517364502, | |
| "learning_rate": 1.1456752942994675e-06, | |
| "loss": 0.1895, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.6065397755002442, | |
| "grad_norm": 1.7642816305160522, | |
| "learning_rate": 1.134846420633936e-06, | |
| "loss": 0.1863, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.6084919472913617, | |
| "grad_norm": 1.86152982711792, | |
| "learning_rate": 1.1240624122126364e-06, | |
| "loss": 0.1973, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.6104441190824792, | |
| "grad_norm": 1.391201376914978, | |
| "learning_rate": 1.11332339421227e-06, | |
| "loss": 0.2125, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.612396290873597, | |
| "grad_norm": 1.69306218624115, | |
| "learning_rate": 1.102629491287306e-06, | |
| "loss": 0.183, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.6143484626647147, | |
| "grad_norm": 1.454978585243225, | |
| "learning_rate": 1.0919808275685312e-06, | |
| "loss": 0.1756, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.6163006344558322, | |
| "grad_norm": 2.039059638977051, | |
| "learning_rate": 1.0813775266616178e-06, | |
| "loss": 0.2104, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.6182528062469497, | |
| "grad_norm": 1.7883342504501343, | |
| "learning_rate": 1.0708197116456814e-06, | |
| "loss": 0.186, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.6202049780380674, | |
| "grad_norm": 1.4627658128738403, | |
| "learning_rate": 1.060307505071856e-06, | |
| "loss": 0.1895, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.6221571498291851, | |
| "grad_norm": 1.8400914669036865, | |
| "learning_rate": 1.0498410289618661e-06, | |
| "loss": 0.2073, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.6241093216203026, | |
| "grad_norm": 1.562545657157898, | |
| "learning_rate": 1.039420404806618e-06, | |
| "loss": 0.1762, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.6260614934114201, | |
| "grad_norm": 1.5337347984313965, | |
| "learning_rate": 1.0290457535647851e-06, | |
| "loss": 0.185, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.6280136652025379, | |
| "grad_norm": 1.2989600896835327, | |
| "learning_rate": 1.0187171956614034e-06, | |
| "loss": 0.1979, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.6299658369936556, | |
| "grad_norm": 1.4280548095703125, | |
| "learning_rate": 1.0084348509864778e-06, | |
| "loss": 0.1819, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.631918008784773, | |
| "grad_norm": 1.5403130054473877, | |
| "learning_rate": 9.981988388935815e-07, | |
| "loss": 0.1998, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.6338701805758906, | |
| "grad_norm": 1.5721741914749146, | |
| "learning_rate": 9.88009278198484e-07, | |
| "loss": 0.1946, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.6358223523670083, | |
| "grad_norm": 1.484516978263855, | |
| "learning_rate": 9.778662871777577e-07, | |
| "loss": 0.1869, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.637774524158126, | |
| "grad_norm": 1.5680903196334839, | |
| "learning_rate": 9.677699835674165e-07, | |
| "loss": 0.1902, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.6397266959492436, | |
| "grad_norm": 2.306062936782837, | |
| "learning_rate": 9.577204845615423e-07, | |
| "loss": 0.1792, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.641678867740361, | |
| "grad_norm": 1.3779124021530151, | |
| "learning_rate": 9.477179068109276e-07, | |
| "loss": 0.1749, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.6436310395314788, | |
| "grad_norm": 1.7077033519744873, | |
| "learning_rate": 9.377623664217223e-07, | |
| "loss": 0.1803, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.6455832113225966, | |
| "grad_norm": 1.6295830011367798, | |
| "learning_rate": 9.278539789540791e-07, | |
| "loss": 0.1791, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.647535383113714, | |
| "grad_norm": 1.654883861541748, | |
| "learning_rate": 9.179928594208226e-07, | |
| "loss": 0.1897, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.6494875549048316, | |
| "grad_norm": 1.6051833629608154, | |
| "learning_rate": 9.08179122286107e-07, | |
| "loss": 0.1815, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.6514397266959493, | |
| "grad_norm": 1.543062448501587, | |
| "learning_rate": 8.984128814640913e-07, | |
| "loss": 0.1948, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.653391898487067, | |
| "grad_norm": 1.3002710342407227, | |
| "learning_rate": 8.886942503176111e-07, | |
| "loss": 0.1682, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.6553440702781845, | |
| "grad_norm": 1.279937982559204, | |
| "learning_rate": 8.790233416568705e-07, | |
| "loss": 0.1865, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.657296242069302, | |
| "grad_norm": 1.4807121753692627, | |
| "learning_rate": 8.694002677381275e-07, | |
| "loss": 0.1794, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.6592484138604198, | |
| "grad_norm": 1.7030433416366577, | |
| "learning_rate": 8.598251402623936e-07, | |
| "loss": 0.2162, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.6612005856515375, | |
| "grad_norm": 1.3917113542556763, | |
| "learning_rate": 8.502980703741365e-07, | |
| "loss": 0.2069, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.663152757442655, | |
| "grad_norm": 1.782596468925476, | |
| "learning_rate": 8.408191686599859e-07, | |
| "loss": 0.1957, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.6651049292337725, | |
| "grad_norm": 1.685548186302185, | |
| "learning_rate": 8.313885451474568e-07, | |
| "loss": 0.1814, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.6670571010248902, | |
| "grad_norm": 1.159106969833374, | |
| "learning_rate": 8.22006309303669e-07, | |
| "loss": 0.1897, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.669009272816008, | |
| "grad_norm": 1.6422431468963623, | |
| "learning_rate": 8.126725700340765e-07, | |
| "loss": 0.1749, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.6709614446071255, | |
| "grad_norm": 1.769709825515747, | |
| "learning_rate": 8.033874356811999e-07, | |
| "loss": 0.1887, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.672913616398243, | |
| "grad_norm": 1.5047273635864258, | |
| "learning_rate": 7.941510140233782e-07, | |
| "loss": 0.1972, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.6748657881893607, | |
| "grad_norm": 1.2721914052963257, | |
| "learning_rate": 7.849634122735051e-07, | |
| "loss": 0.1889, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.6768179599804784, | |
| "grad_norm": 1.4038310050964355, | |
| "learning_rate": 7.758247370777988e-07, | |
| "loss": 0.1846, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.678770131771596, | |
| "grad_norm": 2.210256576538086, | |
| "learning_rate": 7.66735094514549e-07, | |
| "loss": 0.1937, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.6807223035627135, | |
| "grad_norm": 2.027031183242798, | |
| "learning_rate": 7.576945900928989e-07, | |
| "loss": 0.1819, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.6826744753538312, | |
| "grad_norm": 2.145761251449585, | |
| "learning_rate": 7.487033287516121e-07, | |
| "loss": 0.1791, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.684626647144949, | |
| "grad_norm": 1.670264720916748, | |
| "learning_rate": 7.397614148578546e-07, | |
| "loss": 0.1909, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.6865788189360664, | |
| "grad_norm": 1.315961241722107, | |
| "learning_rate": 7.308689522059936e-07, | |
| "loss": 0.1618, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.688530990727184, | |
| "grad_norm": 1.3299071788787842, | |
| "learning_rate": 7.220260440163756e-07, | |
| "loss": 0.1865, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.6904831625183017, | |
| "grad_norm": 1.5446456670761108, | |
| "learning_rate": 7.132327929341448e-07, | |
| "loss": 0.1981, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.6924353343094194, | |
| "grad_norm": 1.5094953775405884, | |
| "learning_rate": 7.044893010280401e-07, | |
| "loss": 0.1758, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.694387506100537, | |
| "grad_norm": 1.5475033521652222, | |
| "learning_rate": 6.95795669789216e-07, | |
| "loss": 0.1774, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.6963396778916544, | |
| "grad_norm": 1.6808737516403198, | |
| "learning_rate": 6.871520001300641e-07, | |
| "loss": 0.1817, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.6982918496827721, | |
| "grad_norm": 1.6497918367385864, | |
| "learning_rate": 6.785583923830403e-07, | |
| "loss": 0.2037, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.7002440214738899, | |
| "grad_norm": 1.6453701257705688, | |
| "learning_rate": 6.70014946299501e-07, | |
| "loss": 0.1922, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.7021961932650074, | |
| "grad_norm": 1.8417551517486572, | |
| "learning_rate": 6.615217610485425e-07, | |
| "loss": 0.1957, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.7041483650561249, | |
| "grad_norm": 1.4647294282913208, | |
| "learning_rate": 6.530789352158556e-07, | |
| "loss": 0.177, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.7061005368472426, | |
| "grad_norm": 1.6255731582641602, | |
| "learning_rate": 6.446865668025764e-07, | |
| "loss": 0.1863, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.7080527086383603, | |
| "grad_norm": 1.3874322175979614, | |
| "learning_rate": 6.363447532241518e-07, | |
| "loss": 0.1787, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.7100048804294778, | |
| "grad_norm": 1.8756142854690552, | |
| "learning_rate": 6.280535913092039e-07, | |
| "loss": 0.2087, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.7119570522205954, | |
| "grad_norm": 1.6572879552841187, | |
| "learning_rate": 6.198131772984123e-07, | |
| "loss": 0.1874, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.713909224011713, | |
| "grad_norm": 1.7034530639648438, | |
| "learning_rate": 6.11623606843394e-07, | |
| "loss": 0.1744, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.7158613958028308, | |
| "grad_norm": 2.1638829708099365, | |
| "learning_rate": 6.034849750055922e-07, | |
| "loss": 0.1873, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.7178135675939483, | |
| "grad_norm": 1.5946378707885742, | |
| "learning_rate": 5.953973762551746e-07, | |
| "loss": 0.1879, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.7197657393850658, | |
| "grad_norm": 1.7199867963790894, | |
| "learning_rate": 5.873609044699347e-07, | |
| "loss": 0.1881, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.7217179111761836, | |
| "grad_norm": 1.6096932888031006, | |
| "learning_rate": 5.793756529342054e-07, | |
| "loss": 0.195, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.7236700829673013, | |
| "grad_norm": 1.6395195722579956, | |
| "learning_rate": 5.714417143377704e-07, | |
| "loss": 0.1899, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.7256222547584188, | |
| "grad_norm": 1.9110219478607178, | |
| "learning_rate": 5.635591807747997e-07, | |
| "loss": 0.2085, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.7275744265495363, | |
| "grad_norm": 1.688049554824829, | |
| "learning_rate": 5.557281437427647e-07, | |
| "loss": 0.1835, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.729526598340654, | |
| "grad_norm": 1.2737325429916382, | |
| "learning_rate": 5.479486941413914e-07, | |
| "loss": 0.1772, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.7314787701317718, | |
| "grad_norm": 1.3557264804840088, | |
| "learning_rate": 5.402209222715915e-07, | |
| "loss": 0.1809, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.7334309419228893, | |
| "grad_norm": 1.7743897438049316, | |
| "learning_rate": 5.325449178344272e-07, | |
| "loss": 0.1842, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.7353831137140068, | |
| "grad_norm": 1.759393572807312, | |
| "learning_rate": 5.249207699300607e-07, | |
| "loss": 0.1829, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.7373352855051245, | |
| "grad_norm": 1.3803874254226685, | |
| "learning_rate": 5.173485670567241e-07, | |
| "loss": 0.198, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.7392874572962422, | |
| "grad_norm": 1.1466678380966187, | |
| "learning_rate": 5.098283971096923e-07, | |
| "loss": 0.1861, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.7412396290873597, | |
| "grad_norm": 1.2843297719955444, | |
| "learning_rate": 5.02360347380258e-07, | |
| "loss": 0.1835, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.7431918008784772, | |
| "grad_norm": 1.6574630737304688, | |
| "learning_rate": 4.949445045547253e-07, | |
| "loss": 0.1957, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.745143972669595, | |
| "grad_norm": 1.6570775508880615, | |
| "learning_rate": 4.875809547133991e-07, | |
| "loss": 0.173, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.7470961444607127, | |
| "grad_norm": 1.4477870464324951, | |
| "learning_rate": 4.802697833295888e-07, | |
| "loss": 0.1908, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.7490483162518302, | |
| "grad_norm": 1.4322397708892822, | |
| "learning_rate": 4.7301107526861125e-07, | |
| "loss": 0.1843, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.7510004880429477, | |
| "grad_norm": 1.7200080156326294, | |
| "learning_rate": 4.65804914786811e-07, | |
| "loss": 0.1844, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.7529526598340655, | |
| "grad_norm": 1.5689972639083862, | |
| "learning_rate": 4.5865138553057963e-07, | |
| "loss": 0.1751, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.7549048316251832, | |
| "grad_norm": 2.6789133548736572, | |
| "learning_rate": 4.5155057053538564e-07, | |
| "loss": 0.1781, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.7568570034163007, | |
| "grad_norm": 1.8074640035629272, | |
| "learning_rate": 4.445025522248109e-07, | |
| "loss": 0.1972, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.7588091752074182, | |
| "grad_norm": 1.4728846549987793, | |
| "learning_rate": 4.375074124095902e-07, | |
| "loss": 0.1837, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.760761346998536, | |
| "grad_norm": 1.4168732166290283, | |
| "learning_rate": 4.3056523228666823e-07, | |
| "loss": 0.1889, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.7627135187896537, | |
| "grad_norm": 1.5875383615493774, | |
| "learning_rate": 4.2367609243825215e-07, | |
| "loss": 0.1811, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.7646656905807712, | |
| "grad_norm": 1.8759644031524658, | |
| "learning_rate": 4.1684007283087803e-07, | |
| "loss": 0.1887, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.7666178623718887, | |
| "grad_norm": 1.457709789276123, | |
| "learning_rate": 4.1005725281448083e-07, | |
| "loss": 0.1766, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.7685700341630064, | |
| "grad_norm": 1.5108357667922974, | |
| "learning_rate": 4.033277111214778e-07, | |
| "loss": 0.1781, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.7705222059541241, | |
| "grad_norm": 1.6905869245529175, | |
| "learning_rate": 3.966515258658465e-07, | |
| "loss": 0.206, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.7724743777452416, | |
| "grad_norm": 1.4353312253952026, | |
| "learning_rate": 3.9002877454222767e-07, | |
| "loss": 0.1712, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.7744265495363591, | |
| "grad_norm": 1.5684956312179565, | |
| "learning_rate": 3.834595340250208e-07, | |
| "loss": 0.1836, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.7763787213274769, | |
| "grad_norm": 1.705480933189392, | |
| "learning_rate": 3.7694388056748966e-07, | |
| "loss": 0.2001, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.7783308931185946, | |
| "grad_norm": 2.7086293697357178, | |
| "learning_rate": 3.704818898008811e-07, | |
| "loss": 0.194, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.780283064909712, | |
| "grad_norm": 1.3837997913360596, | |
| "learning_rate": 3.640736367335451e-07, | |
| "loss": 0.1961, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.7822352367008296, | |
| "grad_norm": 1.6206564903259277, | |
| "learning_rate": 3.577191957500653e-07, | |
| "loss": 0.1808, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.7841874084919473, | |
| "grad_norm": 1.6568931341171265, | |
| "learning_rate": 3.5141864061039534e-07, | |
| "loss": 0.1884, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.786139580283065, | |
| "grad_norm": 1.385784387588501, | |
| "learning_rate": 3.4517204444900143e-07, | |
| "loss": 0.1764, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.7880917520741826, | |
| "grad_norm": 1.6895047426223755, | |
| "learning_rate": 3.3897947977401426e-07, | |
| "loss": 0.1935, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.7900439238653, | |
| "grad_norm": 1.6801025867462158, | |
| "learning_rate": 3.328410184663883e-07, | |
| "loss": 0.1953, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.7919960956564178, | |
| "grad_norm": 1.7120310068130493, | |
| "learning_rate": 3.2675673177906543e-07, | |
| "loss": 0.1813, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 1.7939482674475355, | |
| "grad_norm": 1.6116238832473755, | |
| "learning_rate": 3.207266903361506e-07, | |
| "loss": 0.1741, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 1.795900439238653, | |
| "grad_norm": 1.5066558122634888, | |
| "learning_rate": 3.1475096413208895e-07, | |
| "loss": 0.1795, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.7978526110297706, | |
| "grad_norm": 1.6561325788497925, | |
| "learning_rate": 3.0882962253085513e-07, | |
| "loss": 0.179, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 1.7998047828208883, | |
| "grad_norm": 1.5498188734054565, | |
| "learning_rate": 3.029627342651481e-07, | |
| "loss": 0.1898, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 1.8017569546120058, | |
| "grad_norm": 1.6732233762741089, | |
| "learning_rate": 2.97150367435593e-07, | |
| "loss": 0.1996, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 1.8037091264031235, | |
| "grad_norm": 1.2501994371414185, | |
| "learning_rate": 2.913925895099512e-07, | |
| "loss": 0.1855, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 1.805661298194241, | |
| "grad_norm": 1.7852662801742554, | |
| "learning_rate": 2.8568946732233536e-07, | |
| "loss": 0.1868, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.8076134699853585, | |
| "grad_norm": 1.6470123529434204, | |
| "learning_rate": 2.8004106707243685e-07, | |
| "loss": 0.1752, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 1.8095656417764763, | |
| "grad_norm": 1.574867606163025, | |
| "learning_rate": 2.7444745432475217e-07, | |
| "loss": 0.1875, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 1.811517813567594, | |
| "grad_norm": 1.4707486629486084, | |
| "learning_rate": 2.6890869400782893e-07, | |
| "loss": 0.1852, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 1.8134699853587115, | |
| "grad_norm": 1.4332098960876465, | |
| "learning_rate": 2.6342485041350786e-07, | |
| "loss": 0.2055, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 1.815422157149829, | |
| "grad_norm": 1.5197958946228027, | |
| "learning_rate": 2.579959871961746e-07, | |
| "loss": 0.1957, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.8173743289409467, | |
| "grad_norm": 1.361507773399353, | |
| "learning_rate": 2.5262216737202526e-07, | |
| "loss": 0.1793, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 1.8193265007320645, | |
| "grad_norm": 1.4850951433181763, | |
| "learning_rate": 2.4730345331833105e-07, | |
| "loss": 0.1759, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 1.821278672523182, | |
| "grad_norm": 1.5868844985961914, | |
| "learning_rate": 2.4203990677272025e-07, | |
| "loss": 0.1862, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 1.8232308443142995, | |
| "grad_norm": 1.2263453006744385, | |
| "learning_rate": 2.3683158883245294e-07, | |
| "loss": 0.1712, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 1.8251830161054172, | |
| "grad_norm": 1.3057667016983032, | |
| "learning_rate": 2.3167855995372025e-07, | |
| "loss": 0.1921, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.827135187896535, | |
| "grad_norm": 1.3871904611587524, | |
| "learning_rate": 2.2658087995093503e-07, | |
| "loss": 0.1763, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 1.8290873596876525, | |
| "grad_norm": 1.5762321949005127, | |
| "learning_rate": 2.215386079960441e-07, | |
| "loss": 0.1763, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 1.83103953147877, | |
| "grad_norm": 1.703233242034912, | |
| "learning_rate": 2.1655180261783704e-07, | |
| "loss": 0.1844, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 1.8329917032698877, | |
| "grad_norm": 1.7117513418197632, | |
| "learning_rate": 2.1162052170126956e-07, | |
| "loss": 0.1853, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 1.8349438750610054, | |
| "grad_norm": 1.9015542268753052, | |
| "learning_rate": 2.0674482248679018e-07, | |
| "loss": 0.1795, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.836896046852123, | |
| "grad_norm": 1.317375659942627, | |
| "learning_rate": 2.0192476156967456e-07, | |
| "loss": 0.1635, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 1.8388482186432404, | |
| "grad_norm": 1.704931616783142, | |
| "learning_rate": 1.9716039489937056e-07, | |
| "loss": 0.1863, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 1.8408003904343582, | |
| "grad_norm": 1.7894163131713867, | |
| "learning_rate": 1.9245177777884983e-07, | |
| "loss": 0.1647, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 1.842752562225476, | |
| "grad_norm": 1.5365114212036133, | |
| "learning_rate": 1.877989648639633e-07, | |
| "loss": 0.1836, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 1.8447047340165934, | |
| "grad_norm": 1.510296106338501, | |
| "learning_rate": 1.8320201016280626e-07, | |
| "loss": 0.181, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.846656905807711, | |
| "grad_norm": 1.6183487176895142, | |
| "learning_rate": 1.7866096703509472e-07, | |
| "loss": 0.1735, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 1.8486090775988286, | |
| "grad_norm": 1.6009883880615234, | |
| "learning_rate": 1.741758881915434e-07, | |
| "loss": 0.1989, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 1.8505612493899464, | |
| "grad_norm": 1.6394355297088623, | |
| "learning_rate": 1.6974682569325607e-07, | |
| "loss": 0.1928, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 1.8525134211810639, | |
| "grad_norm": 1.5997397899627686, | |
| "learning_rate": 1.6537383095111882e-07, | |
| "loss": 0.1839, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 1.8544655929721814, | |
| "grad_norm": 1.5100394487380981, | |
| "learning_rate": 1.6105695472520333e-07, | |
| "loss": 0.1794, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.8564177647632991, | |
| "grad_norm": 1.6228221654891968, | |
| "learning_rate": 1.567962471241813e-07, | |
| "loss": 0.19, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 1.8583699365544168, | |
| "grad_norm": 1.5703927278518677, | |
| "learning_rate": 1.52591757604737e-07, | |
| "loss": 0.1941, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 1.8603221083455344, | |
| "grad_norm": 1.9678072929382324, | |
| "learning_rate": 1.4844353497100006e-07, | |
| "loss": 0.1897, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 1.8622742801366519, | |
| "grad_norm": 1.8721849918365479, | |
| "learning_rate": 1.4435162737397203e-07, | |
| "loss": 0.2055, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 1.8642264519277696, | |
| "grad_norm": 1.1459565162658691, | |
| "learning_rate": 1.4031608231097394e-07, | |
| "loss": 0.2025, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.8661786237188873, | |
| "grad_norm": 1.595178246498108, | |
| "learning_rate": 1.3633694662508745e-07, | |
| "loss": 0.1656, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 1.8681307955100048, | |
| "grad_norm": 1.6833758354187012, | |
| "learning_rate": 1.3241426650461964e-07, | |
| "loss": 0.2007, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 1.8700829673011223, | |
| "grad_norm": 2.375056505203247, | |
| "learning_rate": 1.285480874825623e-07, | |
| "loss": 0.1792, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 1.87203513909224, | |
| "grad_norm": 1.5321182012557983, | |
| "learning_rate": 1.2473845443606081e-07, | |
| "loss": 0.1926, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 1.8739873108833578, | |
| "grad_norm": 1.4411838054656982, | |
| "learning_rate": 1.2098541158589883e-07, | |
| "loss": 0.1895, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.8759394826744753, | |
| "grad_norm": 2.2660744190216064, | |
| "learning_rate": 1.1728900249598052e-07, | |
| "loss": 0.1813, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 1.8778916544655928, | |
| "grad_norm": 1.5801745653152466, | |
| "learning_rate": 1.1364927007282866e-07, | |
| "loss": 0.185, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 1.8798438262567105, | |
| "grad_norm": 1.7089074850082397, | |
| "learning_rate": 1.1006625656508397e-07, | |
| "loss": 0.1918, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 1.8817959980478283, | |
| "grad_norm": 1.8887181282043457, | |
| "learning_rate": 1.0654000356301541e-07, | |
| "loss": 0.1834, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 1.8837481698389458, | |
| "grad_norm": 1.394489049911499, | |
| "learning_rate": 1.0307055199803573e-07, | |
| "loss": 0.1751, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.8857003416300633, | |
| "grad_norm": 1.383232593536377, | |
| "learning_rate": 9.965794214223056e-08, | |
| "loss": 0.1878, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 1.887652513421181, | |
| "grad_norm": 1.9068111181259155, | |
| "learning_rate": 9.630221360788728e-08, | |
| "loss": 0.2025, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 1.8896046852122987, | |
| "grad_norm": 1.5991621017456055, | |
| "learning_rate": 9.300340534703634e-08, | |
| "loss": 0.1905, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 1.8915568570034162, | |
| "grad_norm": 2.5463223457336426, | |
| "learning_rate": 8.976155565099953e-08, | |
| "loss": 0.1821, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 1.8935090287945338, | |
| "grad_norm": 1.5754520893096924, | |
| "learning_rate": 8.657670214994418e-08, | |
| "loss": 0.1762, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.8954612005856515, | |
| "grad_norm": 1.4129728078842163, | |
| "learning_rate": 8.344888181244847e-08, | |
| "loss": 0.191, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 1.8974133723767692, | |
| "grad_norm": 2.057875633239746, | |
| "learning_rate": 8.037813094507018e-08, | |
| "loss": 0.1849, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 1.8993655441678867, | |
| "grad_norm": 1.4810922145843506, | |
| "learning_rate": 7.736448519192752e-08, | |
| "loss": 0.1987, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 1.9013177159590042, | |
| "grad_norm": 1.797695517539978, | |
| "learning_rate": 7.440797953428169e-08, | |
| "loss": 0.169, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 1.903269887750122, | |
| "grad_norm": 1.7072638273239136, | |
| "learning_rate": 7.150864829013616e-08, | |
| "loss": 0.189, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.9052220595412397, | |
| "grad_norm": 2.731884002685547, | |
| "learning_rate": 6.866652511383298e-08, | |
| "loss": 0.207, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 1.9071742313323572, | |
| "grad_norm": 1.4508967399597168, | |
| "learning_rate": 6.588164299566546e-08, | |
| "loss": 0.1822, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 1.9091264031234747, | |
| "grad_norm": 1.656947374343872, | |
| "learning_rate": 6.315403426149558e-08, | |
| "loss": 0.1787, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 1.9110785749145924, | |
| "grad_norm": 1.576837182044983, | |
| "learning_rate": 6.048373057237489e-08, | |
| "loss": 0.1699, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 1.9130307467057102, | |
| "grad_norm": 1.460389494895935, | |
| "learning_rate": 5.787076292418203e-08, | |
| "loss": 0.196, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.9149829184968277, | |
| "grad_norm": 1.6283118724822998, | |
| "learning_rate": 5.531516164725858e-08, | |
| "loss": 0.1989, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 1.9169350902879452, | |
| "grad_norm": 1.3542603254318237, | |
| "learning_rate": 5.281695640605988e-08, | |
| "loss": 0.1888, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 1.918887262079063, | |
| "grad_norm": 1.46894109249115, | |
| "learning_rate": 5.03761761988103e-08, | |
| "loss": 0.1874, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 1.9208394338701806, | |
| "grad_norm": 1.651439905166626, | |
| "learning_rate": 4.799284935716519e-08, | |
| "loss": 0.1935, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 1.9227916056612981, | |
| "grad_norm": 1.5114413499832153, | |
| "learning_rate": 4.566700354588283e-08, | |
| "loss": 0.1844, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.9247437774524156, | |
| "grad_norm": 1.7399928569793701, | |
| "learning_rate": 4.339866576250407e-08, | |
| "loss": 0.1715, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 1.9266959492435334, | |
| "grad_norm": 1.6063965559005737, | |
| "learning_rate": 4.1187862337038195e-08, | |
| "loss": 0.1751, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 1.928648121034651, | |
| "grad_norm": 1.688035011291504, | |
| "learning_rate": 3.90346189316565e-08, | |
| "loss": 0.1775, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 1.9306002928257686, | |
| "grad_norm": 1.5691899061203003, | |
| "learning_rate": 3.6938960540396364e-08, | |
| "loss": 0.1785, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 1.9325524646168861, | |
| "grad_norm": 1.4794323444366455, | |
| "learning_rate": 3.490091148886932e-08, | |
| "loss": 0.1911, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.9345046364080039, | |
| "grad_norm": 1.5015360116958618, | |
| "learning_rate": 3.2920495433980125e-08, | |
| "loss": 0.1772, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 1.9364568081991216, | |
| "grad_norm": 1.6606435775756836, | |
| "learning_rate": 3.099773536365036e-08, | |
| "loss": 0.1876, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 1.938408979990239, | |
| "grad_norm": 1.2969989776611328, | |
| "learning_rate": 2.913265359655415e-08, | |
| "loss": 0.1781, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 1.9403611517813566, | |
| "grad_norm": 1.765059232711792, | |
| "learning_rate": 2.7325271781856176e-08, | |
| "loss": 0.1958, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 1.9423133235724743, | |
| "grad_norm": 1.4548059701919556, | |
| "learning_rate": 2.5575610898962987e-08, | |
| "loss": 0.1799, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.944265495363592, | |
| "grad_norm": 1.309770107269287, | |
| "learning_rate": 2.3883691257277074e-08, | |
| "loss": 0.1742, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 1.9462176671547096, | |
| "grad_norm": 1.6486570835113525, | |
| "learning_rate": 2.2249532495964287e-08, | |
| "loss": 0.1692, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 1.948169838945827, | |
| "grad_norm": 1.840489387512207, | |
| "learning_rate": 2.0673153583722904e-08, | |
| "loss": 0.1858, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 1.9501220107369448, | |
| "grad_norm": 1.6692792177200317, | |
| "learning_rate": 1.9154572818563254e-08, | |
| "loss": 0.1904, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 1.9520741825280625, | |
| "grad_norm": 1.500740647315979, | |
| "learning_rate": 1.7693807827598998e-08, | |
| "loss": 0.1858, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.95402635431918, | |
| "grad_norm": 1.4872006177902222, | |
| "learning_rate": 1.629087556683784e-08, | |
| "loss": 0.1755, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 1.9559785261102975, | |
| "grad_norm": 1.3963922262191772, | |
| "learning_rate": 1.4945792320989472e-08, | |
| "loss": 0.1785, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 1.9579306979014153, | |
| "grad_norm": 1.4054490327835083, | |
| "learning_rate": 1.3658573703271282e-08, | |
| "loss": 0.1828, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 1.959882869692533, | |
| "grad_norm": 1.5541080236434937, | |
| "learning_rate": 1.242923465523238e-08, | |
| "loss": 0.184, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 1.9618350414836505, | |
| "grad_norm": 1.6898419857025146, | |
| "learning_rate": 1.1257789446575407e-08, | |
| "loss": 0.1759, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.963787213274768, | |
| "grad_norm": 1.7505004405975342, | |
| "learning_rate": 1.0144251674995553e-08, | |
| "loss": 0.1915, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 1.9657393850658857, | |
| "grad_norm": 1.5951542854309082, | |
| "learning_rate": 9.088634266017915e-09, | |
| "loss": 0.1736, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 1.9676915568570035, | |
| "grad_norm": 1.5528618097305298, | |
| "learning_rate": 8.09094947285205e-09, | |
| "loss": 0.1866, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 1.969643728648121, | |
| "grad_norm": 1.4379768371582031, | |
| "learning_rate": 7.151208876245985e-09, | |
| "loss": 0.1918, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 1.9715959004392385, | |
| "grad_norm": 1.6501480340957642, | |
| "learning_rate": 6.269423384353546e-09, | |
| "loss": 0.1802, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.9735480722303562, | |
| "grad_norm": 2.3685152530670166, | |
| "learning_rate": 5.445603232608898e-09, | |
| "loss": 0.1853, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 1.975500244021474, | |
| "grad_norm": 1.6372160911560059, | |
| "learning_rate": 4.679757983604983e-09, | |
| "loss": 0.167, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 1.9774524158125915, | |
| "grad_norm": 1.8266721963882446, | |
| "learning_rate": 3.971896526984709e-09, | |
| "loss": 0.1874, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 1.979404587603709, | |
| "grad_norm": 1.66588294506073, | |
| "learning_rate": 3.322027079336043e-09, | |
| "loss": 0.1794, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 1.9813567593948267, | |
| "grad_norm": 1.63362455368042, | |
| "learning_rate": 2.7301571840993022e-09, | |
| "loss": 0.1829, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.9833089311859444, | |
| "grad_norm": 1.626795768737793, | |
| "learning_rate": 2.1962937114766715e-09, | |
| "loss": 0.1969, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 1.985261102977062, | |
| "grad_norm": 1.3988165855407715, | |
| "learning_rate": 1.7204428583533773e-09, | |
| "loss": 0.1714, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 1.9872132747681794, | |
| "grad_norm": 1.5400702953338623, | |
| "learning_rate": 1.3026101482266352e-09, | |
| "loss": 0.1771, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 1.9891654465592972, | |
| "grad_norm": 1.419124960899353, | |
| "learning_rate": 9.428004311412552e-10, | |
| "loss": 0.1691, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 1.991117618350415, | |
| "grad_norm": 1.6745810508728027, | |
| "learning_rate": 6.410178836324666e-10, | |
| "loss": 0.1791, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.9930697901415324, | |
| "grad_norm": 1.6668099164962769, | |
| "learning_rate": 3.9726600867817657e-10, | |
| "loss": 0.1849, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 1.99502196193265, | |
| "grad_norm": 1.4950345754623413, | |
| "learning_rate": 2.1154763565844894e-10, | |
| "loss": 0.2081, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 1.9969741337237676, | |
| "grad_norm": 2.625748872756958, | |
| "learning_rate": 8.386492032164129e-11, | |
| "loss": 0.1656, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 1.9989263055148854, | |
| "grad_norm": 1.790960431098938, | |
| "learning_rate": 1.4219344760535436e-11, | |
| "loss": 0.1974, | |
| "step": 10240 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 10246, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.2355982926700924e+20, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |