| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9853300733496333, | |
| "eval_steps": 500, | |
| "global_step": 1024, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0019559902200488996, | |
| "grad_norm": 16.282888412475586, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.0979, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.003911980440097799, | |
| "grad_norm": 12.96766471862793, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.075, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0058679706601467, | |
| "grad_norm": 8.62896728515625, | |
| "learning_rate": 6e-06, | |
| "loss": 1.084, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.007823960880195598, | |
| "grad_norm": 28.233434677124023, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.9951, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.009779951100244499, | |
| "grad_norm": 20.809677124023438, | |
| "learning_rate": 1e-05, | |
| "loss": 1.0048, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0117359413202934, | |
| "grad_norm": 18.27992820739746, | |
| "learning_rate": 9.99998943199786e-06, | |
| "loss": 0.9456, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.013691931540342298, | |
| "grad_norm": 7.64124870300293, | |
| "learning_rate": 9.999957728036109e-06, | |
| "loss": 0.984, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.015647921760391197, | |
| "grad_norm": 6.329475402832031, | |
| "learning_rate": 9.99990488824877e-06, | |
| "loss": 0.9117, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.017603911980440097, | |
| "grad_norm": 3.9235880374908447, | |
| "learning_rate": 9.999830912859204e-06, | |
| "loss": 0.9326, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.019559902200488997, | |
| "grad_norm": 3.593456268310547, | |
| "learning_rate": 9.999735802180121e-06, | |
| "loss": 0.9041, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.021515892420537898, | |
| "grad_norm": 2.6196131706237793, | |
| "learning_rate": 9.999619556613573e-06, | |
| "loss": 0.8991, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0234718826405868, | |
| "grad_norm": 3.5464529991149902, | |
| "learning_rate": 9.999482176650956e-06, | |
| "loss": 0.8731, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.025427872860635695, | |
| "grad_norm": 2.584135055541992, | |
| "learning_rate": 9.999323662872998e-06, | |
| "loss": 0.8496, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.027383863080684596, | |
| "grad_norm": 2.4647350311279297, | |
| "learning_rate": 9.99914401594977e-06, | |
| "loss": 0.8545, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.029339853300733496, | |
| "grad_norm": 2.52474308013916, | |
| "learning_rate": 9.998943236640678e-06, | |
| "loss": 0.8293, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03129584352078239, | |
| "grad_norm": 2.1536269187927246, | |
| "learning_rate": 9.998721325794454e-06, | |
| "loss": 0.8203, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.033251833740831294, | |
| "grad_norm": 2.681109666824341, | |
| "learning_rate": 9.998478284349163e-06, | |
| "loss": 0.8426, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.035207823960880194, | |
| "grad_norm": 2.7063651084899902, | |
| "learning_rate": 9.998214113332184e-06, | |
| "loss": 0.8517, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.037163814180929094, | |
| "grad_norm": 2.0724613666534424, | |
| "learning_rate": 9.997928813860228e-06, | |
| "loss": 0.8007, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.039119804400977995, | |
| "grad_norm": 2.826772928237915, | |
| "learning_rate": 9.997622387139306e-06, | |
| "loss": 0.7952, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.041075794621026895, | |
| "grad_norm": 2.115604877471924, | |
| "learning_rate": 9.99729483446475e-06, | |
| "loss": 0.8614, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.043031784841075796, | |
| "grad_norm": 2.8321192264556885, | |
| "learning_rate": 9.996946157221192e-06, | |
| "loss": 0.8484, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.044987775061124696, | |
| "grad_norm": 2.0314548015594482, | |
| "learning_rate": 9.996576356882558e-06, | |
| "loss": 0.8133, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.0469437652811736, | |
| "grad_norm": 2.4443204402923584, | |
| "learning_rate": 9.99618543501207e-06, | |
| "loss": 0.7858, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.0488997555012225, | |
| "grad_norm": 2.3830316066741943, | |
| "learning_rate": 9.99577339326223e-06, | |
| "loss": 0.8126, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.05085574572127139, | |
| "grad_norm": 2.355252504348755, | |
| "learning_rate": 9.995340233374824e-06, | |
| "loss": 0.7963, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.05281173594132029, | |
| "grad_norm": 2.411180019378662, | |
| "learning_rate": 9.994885957180905e-06, | |
| "loss": 0.8081, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.05476772616136919, | |
| "grad_norm": 2.8028552532196045, | |
| "learning_rate": 9.994410566600792e-06, | |
| "loss": 0.8201, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.05672371638141809, | |
| "grad_norm": 2.396615505218506, | |
| "learning_rate": 9.993914063644053e-06, | |
| "loss": 0.7734, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.05867970660146699, | |
| "grad_norm": 1.850614309310913, | |
| "learning_rate": 9.993396450409508e-06, | |
| "loss": 0.7778, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06063569682151589, | |
| "grad_norm": 2.12536883354187, | |
| "learning_rate": 9.99285772908521e-06, | |
| "loss": 0.7897, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.06259168704156479, | |
| "grad_norm": 2.718092203140259, | |
| "learning_rate": 9.992297901948446e-06, | |
| "loss": 0.7499, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.06454767726161369, | |
| "grad_norm": 2.013169288635254, | |
| "learning_rate": 9.991716971365713e-06, | |
| "loss": 0.7705, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.06650366748166259, | |
| "grad_norm": 2.0360498428344727, | |
| "learning_rate": 9.991114939792725e-06, | |
| "loss": 0.7935, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.06845965770171149, | |
| "grad_norm": 1.9136534929275513, | |
| "learning_rate": 9.99049180977439e-06, | |
| "loss": 0.7495, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.07041564792176039, | |
| "grad_norm": 1.7010070085525513, | |
| "learning_rate": 9.989847583944801e-06, | |
| "loss": 0.7608, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.07237163814180929, | |
| "grad_norm": 1.7865066528320312, | |
| "learning_rate": 9.989182265027232e-06, | |
| "loss": 0.7994, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.07432762836185819, | |
| "grad_norm": 1.774584412574768, | |
| "learning_rate": 9.98849585583412e-06, | |
| "loss": 0.7754, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.07628361858190709, | |
| "grad_norm": 1.7736550569534302, | |
| "learning_rate": 9.987788359267053e-06, | |
| "loss": 0.7688, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.07823960880195599, | |
| "grad_norm": 2.0538413524627686, | |
| "learning_rate": 9.987059778316763e-06, | |
| "loss": 0.7722, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08019559902200489, | |
| "grad_norm": 1.742871880531311, | |
| "learning_rate": 9.986310116063108e-06, | |
| "loss": 0.782, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.08215158924205379, | |
| "grad_norm": 1.8255136013031006, | |
| "learning_rate": 9.985539375675058e-06, | |
| "loss": 0.7879, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.08410757946210269, | |
| "grad_norm": 1.8212188482284546, | |
| "learning_rate": 9.98474756041069e-06, | |
| "loss": 0.7656, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.08606356968215159, | |
| "grad_norm": 1.741162896156311, | |
| "learning_rate": 9.983934673617165e-06, | |
| "loss": 0.7877, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.08801955990220049, | |
| "grad_norm": 1.810950517654419, | |
| "learning_rate": 9.98310071873072e-06, | |
| "loss": 0.7854, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.08997555012224939, | |
| "grad_norm": 1.8254064321517944, | |
| "learning_rate": 9.982245699276651e-06, | |
| "loss": 0.784, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.09193154034229829, | |
| "grad_norm": 1.6075822114944458, | |
| "learning_rate": 9.981369618869293e-06, | |
| "loss": 0.7326, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.0938875305623472, | |
| "grad_norm": 1.767863392829895, | |
| "learning_rate": 9.980472481212015e-06, | |
| "loss": 0.7719, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.0958435207823961, | |
| "grad_norm": 1.6684534549713135, | |
| "learning_rate": 9.979554290097201e-06, | |
| "loss": 0.7841, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.097799511002445, | |
| "grad_norm": 1.8684815168380737, | |
| "learning_rate": 9.978615049406228e-06, | |
| "loss": 0.7519, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09975550122249388, | |
| "grad_norm": 1.5016576051712036, | |
| "learning_rate": 9.977654763109452e-06, | |
| "loss": 0.7075, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.10171149144254278, | |
| "grad_norm": 1.8114155530929565, | |
| "learning_rate": 9.9766734352662e-06, | |
| "loss": 0.7666, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.10366748166259168, | |
| "grad_norm": 1.9040465354919434, | |
| "learning_rate": 9.975671070024741e-06, | |
| "loss": 0.7444, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.10562347188264058, | |
| "grad_norm": 1.6450563669204712, | |
| "learning_rate": 9.974647671622271e-06, | |
| "loss": 0.7803, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.10757946210268948, | |
| "grad_norm": 1.5921177864074707, | |
| "learning_rate": 9.973603244384906e-06, | |
| "loss": 0.7692, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.10953545232273838, | |
| "grad_norm": 1.5766173601150513, | |
| "learning_rate": 9.972537792727645e-06, | |
| "loss": 0.7744, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.11149144254278728, | |
| "grad_norm": 1.4984322786331177, | |
| "learning_rate": 9.971451321154368e-06, | |
| "loss": 0.7854, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.11344743276283618, | |
| "grad_norm": 1.6171250343322754, | |
| "learning_rate": 9.97034383425781e-06, | |
| "loss": 0.761, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.11540342298288508, | |
| "grad_norm": 1.7444490194320679, | |
| "learning_rate": 9.969215336719537e-06, | |
| "loss": 0.7397, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.11735941320293398, | |
| "grad_norm": 1.6317099332809448, | |
| "learning_rate": 9.968065833309939e-06, | |
| "loss": 0.7624, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11931540342298289, | |
| "grad_norm": 1.6640548706054688, | |
| "learning_rate": 9.966895328888195e-06, | |
| "loss": 0.7247, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.12127139364303179, | |
| "grad_norm": 1.9074345827102661, | |
| "learning_rate": 9.965703828402263e-06, | |
| "loss": 0.7631, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.12322738386308069, | |
| "grad_norm": 1.5792888402938843, | |
| "learning_rate": 9.964491336888853e-06, | |
| "loss": 0.773, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.12518337408312957, | |
| "grad_norm": 1.8201286792755127, | |
| "learning_rate": 9.963257859473414e-06, | |
| "loss": 0.7659, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.1271393643031785, | |
| "grad_norm": 1.6881448030471802, | |
| "learning_rate": 9.962003401370101e-06, | |
| "loss": 0.7607, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.12909535452322737, | |
| "grad_norm": 1.5919877290725708, | |
| "learning_rate": 9.960727967881758e-06, | |
| "loss": 0.7369, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1310513447432763, | |
| "grad_norm": 1.5349136590957642, | |
| "learning_rate": 9.959431564399902e-06, | |
| "loss": 0.7594, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.13300733496332517, | |
| "grad_norm": 1.640322208404541, | |
| "learning_rate": 9.958114196404691e-06, | |
| "loss": 0.7888, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.1349633251833741, | |
| "grad_norm": 1.7044633626937866, | |
| "learning_rate": 9.956775869464901e-06, | |
| "loss": 0.7486, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.13691931540342298, | |
| "grad_norm": 1.6809159517288208, | |
| "learning_rate": 9.955416589237912e-06, | |
| "loss": 0.7695, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1388753056234719, | |
| "grad_norm": 1.6983811855316162, | |
| "learning_rate": 9.954036361469672e-06, | |
| "loss": 0.7524, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.14083129584352078, | |
| "grad_norm": 1.7974449396133423, | |
| "learning_rate": 9.952635191994682e-06, | |
| "loss": 0.7789, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.1427872860635697, | |
| "grad_norm": 1.8526934385299683, | |
| "learning_rate": 9.951213086735967e-06, | |
| "loss": 0.7605, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.14474327628361858, | |
| "grad_norm": 1.6601364612579346, | |
| "learning_rate": 9.949770051705051e-06, | |
| "loss": 0.7568, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.1466992665036675, | |
| "grad_norm": 1.9272420406341553, | |
| "learning_rate": 9.948306093001933e-06, | |
| "loss": 0.7335, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.14865525672371638, | |
| "grad_norm": 1.4759540557861328, | |
| "learning_rate": 9.946821216815064e-06, | |
| "loss": 0.7732, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.1506112469437653, | |
| "grad_norm": 1.5570530891418457, | |
| "learning_rate": 9.945315429421307e-06, | |
| "loss": 0.7545, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.15256723716381418, | |
| "grad_norm": 1.8461523056030273, | |
| "learning_rate": 9.943788737185934e-06, | |
| "loss": 0.7874, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1545232273838631, | |
| "grad_norm": 1.7286914587020874, | |
| "learning_rate": 9.942241146562575e-06, | |
| "loss": 0.7571, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.15647921760391198, | |
| "grad_norm": 1.7849379777908325, | |
| "learning_rate": 9.940672664093209e-06, | |
| "loss": 0.7565, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15843520782396087, | |
| "grad_norm": 1.6959819793701172, | |
| "learning_rate": 9.939083296408127e-06, | |
| "loss": 0.7777, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.16039119804400978, | |
| "grad_norm": 1.646620512008667, | |
| "learning_rate": 9.937473050225905e-06, | |
| "loss": 0.7579, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.16234718826405867, | |
| "grad_norm": 1.5833187103271484, | |
| "learning_rate": 9.935841932353376e-06, | |
| "loss": 0.7409, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.16430317848410758, | |
| "grad_norm": 1.6932284832000732, | |
| "learning_rate": 9.934189949685602e-06, | |
| "loss": 0.7684, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.16625916870415647, | |
| "grad_norm": 1.7056066989898682, | |
| "learning_rate": 9.932517109205849e-06, | |
| "loss": 0.7224, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.16821515892420538, | |
| "grad_norm": 1.7733569145202637, | |
| "learning_rate": 9.930823417985546e-06, | |
| "loss": 0.7517, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.17017114914425427, | |
| "grad_norm": 1.5914546251296997, | |
| "learning_rate": 9.929108883184269e-06, | |
| "loss": 0.7477, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.17212713936430318, | |
| "grad_norm": 1.7451390027999878, | |
| "learning_rate": 9.9273735120497e-06, | |
| "loss": 0.7354, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.17408312958435207, | |
| "grad_norm": 1.8151520490646362, | |
| "learning_rate": 9.9256173119176e-06, | |
| "loss": 0.769, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.17603911980440098, | |
| "grad_norm": 1.8201167583465576, | |
| "learning_rate": 9.923840290211781e-06, | |
| "loss": 0.7496, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17799511002444987, | |
| "grad_norm": 1.7559887170791626, | |
| "learning_rate": 9.92204245444407e-06, | |
| "loss": 0.7174, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.17995110024449879, | |
| "grad_norm": 1.4874770641326904, | |
| "learning_rate": 9.92022381221428e-06, | |
| "loss": 0.7332, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.18190709046454767, | |
| "grad_norm": 1.624958872795105, | |
| "learning_rate": 9.918384371210178e-06, | |
| "loss": 0.7484, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.18386308068459659, | |
| "grad_norm": 1.7472434043884277, | |
| "learning_rate": 9.916524139207449e-06, | |
| "loss": 0.73, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.18581907090464547, | |
| "grad_norm": 1.5148112773895264, | |
| "learning_rate": 9.914643124069667e-06, | |
| "loss": 0.736, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.1877750611246944, | |
| "grad_norm": 1.5377767086029053, | |
| "learning_rate": 9.912741333748264e-06, | |
| "loss": 0.7635, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.18973105134474327, | |
| "grad_norm": 1.5089975595474243, | |
| "learning_rate": 9.910818776282487e-06, | |
| "loss": 0.7669, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.1916870415647922, | |
| "grad_norm": 1.7104604244232178, | |
| "learning_rate": 9.908875459799373e-06, | |
| "loss": 0.7411, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.19364303178484107, | |
| "grad_norm": 1.5175243616104126, | |
| "learning_rate": 9.906911392513711e-06, | |
| "loss": 0.7507, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.19559902200489, | |
| "grad_norm": 1.4044206142425537, | |
| "learning_rate": 9.904926582728009e-06, | |
| "loss": 0.7526, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.19755501222493888, | |
| "grad_norm": 1.5137357711791992, | |
| "learning_rate": 9.902921038832456e-06, | |
| "loss": 0.732, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.19951100244498776, | |
| "grad_norm": 1.5008641481399536, | |
| "learning_rate": 9.900894769304888e-06, | |
| "loss": 0.7577, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.20146699266503668, | |
| "grad_norm": 1.4948675632476807, | |
| "learning_rate": 9.898847782710754e-06, | |
| "loss": 0.7408, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.20342298288508556, | |
| "grad_norm": 1.6185336112976074, | |
| "learning_rate": 9.896780087703077e-06, | |
| "loss": 0.7127, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.20537897310513448, | |
| "grad_norm": 1.3796682357788086, | |
| "learning_rate": 9.89469169302242e-06, | |
| "loss": 0.7412, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.20733496332518336, | |
| "grad_norm": 1.80031156539917, | |
| "learning_rate": 9.892582607496848e-06, | |
| "loss": 0.7283, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.20929095354523228, | |
| "grad_norm": 1.3735039234161377, | |
| "learning_rate": 9.890452840041885e-06, | |
| "loss": 0.7252, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.21124694376528116, | |
| "grad_norm": 1.6003905534744263, | |
| "learning_rate": 9.88830239966049e-06, | |
| "loss": 0.7808, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.21320293398533008, | |
| "grad_norm": 1.4597164392471313, | |
| "learning_rate": 9.886131295443003e-06, | |
| "loss": 0.7477, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.21515892420537897, | |
| "grad_norm": 1.5035536289215088, | |
| "learning_rate": 9.88393953656712e-06, | |
| "loss": 0.7423, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.21711491442542788, | |
| "grad_norm": 1.7422672510147095, | |
| "learning_rate": 9.881727132297847e-06, | |
| "loss": 0.7254, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.21907090464547677, | |
| "grad_norm": 2.035033941268921, | |
| "learning_rate": 9.879494091987459e-06, | |
| "loss": 0.7576, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.22102689486552568, | |
| "grad_norm": 1.4937026500701904, | |
| "learning_rate": 9.877240425075465e-06, | |
| "loss": 0.7404, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.22298288508557457, | |
| "grad_norm": 1.6916754245758057, | |
| "learning_rate": 9.874966141088569e-06, | |
| "loss": 0.7392, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.22493887530562348, | |
| "grad_norm": 1.435537576675415, | |
| "learning_rate": 9.872671249640627e-06, | |
| "loss": 0.7465, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.22689486552567237, | |
| "grad_norm": 1.833910346031189, | |
| "learning_rate": 9.870355760432607e-06, | |
| "loss": 0.7427, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.22885085574572128, | |
| "grad_norm": 1.6560982465744019, | |
| "learning_rate": 9.868019683252543e-06, | |
| "loss": 0.7057, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.23080684596577017, | |
| "grad_norm": 1.4378702640533447, | |
| "learning_rate": 9.865663027975504e-06, | |
| "loss": 0.7727, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.23276283618581908, | |
| "grad_norm": 1.6583086252212524, | |
| "learning_rate": 9.863285804563549e-06, | |
| "loss": 0.7531, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.23471882640586797, | |
| "grad_norm": 1.5125632286071777, | |
| "learning_rate": 9.860888023065676e-06, | |
| "loss": 0.719, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.23667481662591688, | |
| "grad_norm": 1.8075984716415405, | |
| "learning_rate": 9.858469693617787e-06, | |
| "loss": 0.7474, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.23863080684596577, | |
| "grad_norm": 1.4673876762390137, | |
| "learning_rate": 9.85603082644265e-06, | |
| "loss": 0.737, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.24058679706601466, | |
| "grad_norm": 1.622917890548706, | |
| "learning_rate": 9.853571431849844e-06, | |
| "loss": 0.7213, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.24254278728606357, | |
| "grad_norm": 1.4568711519241333, | |
| "learning_rate": 9.851091520235724e-06, | |
| "loss": 0.7316, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.24449877750611246, | |
| "grad_norm": 1.7095931768417358, | |
| "learning_rate": 9.848591102083375e-06, | |
| "loss": 0.7003, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.24645476772616137, | |
| "grad_norm": 1.5975903272628784, | |
| "learning_rate": 9.846070187962569e-06, | |
| "loss": 0.724, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.24841075794621026, | |
| "grad_norm": 1.4630956649780273, | |
| "learning_rate": 9.843528788529711e-06, | |
| "loss": 0.726, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.25036674816625915, | |
| "grad_norm": 1.6113767623901367, | |
| "learning_rate": 9.840966914527812e-06, | |
| "loss": 0.7451, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.2523227383863081, | |
| "grad_norm": 1.5766589641571045, | |
| "learning_rate": 9.838384576786427e-06, | |
| "loss": 0.7455, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.254278728606357, | |
| "grad_norm": 1.3475068807601929, | |
| "learning_rate": 9.835781786221612e-06, | |
| "loss": 0.7102, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.25623471882640586, | |
| "grad_norm": 1.8732693195343018, | |
| "learning_rate": 9.83315855383589e-06, | |
| "loss": 0.74, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.25819070904645475, | |
| "grad_norm": 1.7925317287445068, | |
| "learning_rate": 9.830514890718192e-06, | |
| "loss": 0.7395, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.2601466992665037, | |
| "grad_norm": 1.5492051839828491, | |
| "learning_rate": 9.82785080804381e-06, | |
| "loss": 0.7333, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.2621026894865526, | |
| "grad_norm": 1.4603351354599, | |
| "learning_rate": 9.825166317074357e-06, | |
| "loss": 0.7409, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.26405867970660146, | |
| "grad_norm": 1.4673221111297607, | |
| "learning_rate": 9.822461429157716e-06, | |
| "loss": 0.7433, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.26601466992665035, | |
| "grad_norm": 1.5018287897109985, | |
| "learning_rate": 9.819736155727992e-06, | |
| "loss": 0.7505, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.2679706601466993, | |
| "grad_norm": 1.4042693376541138, | |
| "learning_rate": 9.816990508305463e-06, | |
| "loss": 0.6874, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.2699266503667482, | |
| "grad_norm": 1.86225426197052, | |
| "learning_rate": 9.814224498496532e-06, | |
| "loss": 0.7126, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.27188264058679706, | |
| "grad_norm": 1.697844386100769, | |
| "learning_rate": 9.811438137993678e-06, | |
| "loss": 0.7172, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.27383863080684595, | |
| "grad_norm": 1.8233097791671753, | |
| "learning_rate": 9.808631438575404e-06, | |
| "loss": 0.7504, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2757946210268949, | |
| "grad_norm": 1.6390475034713745, | |
| "learning_rate": 9.805804412106197e-06, | |
| "loss": 0.7417, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.2777506112469438, | |
| "grad_norm": 1.8348608016967773, | |
| "learning_rate": 9.802957070536464e-06, | |
| "loss": 0.7371, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.27970660146699267, | |
| "grad_norm": 1.6390647888183594, | |
| "learning_rate": 9.800089425902489e-06, | |
| "loss": 0.7249, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.28166259168704155, | |
| "grad_norm": 1.775242805480957, | |
| "learning_rate": 9.79720149032638e-06, | |
| "loss": 0.7015, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.28361858190709044, | |
| "grad_norm": 1.653192162513733, | |
| "learning_rate": 9.794293276016024e-06, | |
| "loss": 0.7542, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2855745721271394, | |
| "grad_norm": 1.8253936767578125, | |
| "learning_rate": 9.791364795265027e-06, | |
| "loss": 0.7097, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.28753056234718827, | |
| "grad_norm": 1.5707979202270508, | |
| "learning_rate": 9.788416060452662e-06, | |
| "loss": 0.7317, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.28948655256723715, | |
| "grad_norm": 2.0345988273620605, | |
| "learning_rate": 9.785447084043825e-06, | |
| "loss": 0.7368, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.29144254278728604, | |
| "grad_norm": 1.4038718938827515, | |
| "learning_rate": 9.782457878588977e-06, | |
| "loss": 0.7092, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.293398533007335, | |
| "grad_norm": 1.8474197387695312, | |
| "learning_rate": 9.779448456724088e-06, | |
| "loss": 0.7147, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.29535452322738387, | |
| "grad_norm": 1.4272691011428833, | |
| "learning_rate": 9.776418831170591e-06, | |
| "loss": 0.742, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.29731051344743276, | |
| "grad_norm": 1.7123950719833374, | |
| "learning_rate": 9.77336901473532e-06, | |
| "loss": 0.7593, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.29926650366748164, | |
| "grad_norm": 1.4355639219284058, | |
| "learning_rate": 9.77029902031046e-06, | |
| "loss": 0.7345, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.3012224938875306, | |
| "grad_norm": 2.2433369159698486, | |
| "learning_rate": 9.767208860873498e-06, | |
| "loss": 0.7319, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.30317848410757947, | |
| "grad_norm": 1.5576993227005005, | |
| "learning_rate": 9.764098549487156e-06, | |
| "loss": 0.7128, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.30513447432762836, | |
| "grad_norm": 1.508055567741394, | |
| "learning_rate": 9.760968099299345e-06, | |
| "loss": 0.7616, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.30709046454767724, | |
| "grad_norm": 1.7950801849365234, | |
| "learning_rate": 9.75781752354311e-06, | |
| "loss": 0.7136, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.3090464547677262, | |
| "grad_norm": 1.509214997291565, | |
| "learning_rate": 9.754646835536561e-06, | |
| "loss": 0.7316, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3110024449877751, | |
| "grad_norm": 1.6463536024093628, | |
| "learning_rate": 9.751456048682839e-06, | |
| "loss": 0.7599, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.31295843520782396, | |
| "grad_norm": 1.566895842552185, | |
| "learning_rate": 9.748245176470037e-06, | |
| "loss": 0.7353, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.31491442542787285, | |
| "grad_norm": 1.568455696105957, | |
| "learning_rate": 9.745014232471161e-06, | |
| "loss": 0.7286, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.31687041564792173, | |
| "grad_norm": 1.6639809608459473, | |
| "learning_rate": 9.741763230344055e-06, | |
| "loss": 0.7405, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3188264058679707, | |
| "grad_norm": 1.5624911785125732, | |
| "learning_rate": 9.738492183831362e-06, | |
| "loss": 0.7247, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.32078239608801956, | |
| "grad_norm": 1.589717149734497, | |
| "learning_rate": 9.735201106760452e-06, | |
| "loss": 0.7204, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.32273838630806845, | |
| "grad_norm": 1.5873618125915527, | |
| "learning_rate": 9.731890013043367e-06, | |
| "loss": 0.7329, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.32469437652811733, | |
| "grad_norm": 1.546643853187561, | |
| "learning_rate": 9.728558916676769e-06, | |
| "loss": 0.7195, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.3266503667481663, | |
| "grad_norm": 1.6865900754928589, | |
| "learning_rate": 9.725207831741867e-06, | |
| "loss": 0.7196, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.32860635696821516, | |
| "grad_norm": 1.6543104648590088, | |
| "learning_rate": 9.721836772404372e-06, | |
| "loss": 0.7536, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.33056234718826405, | |
| "grad_norm": 1.8691520690917969, | |
| "learning_rate": 9.718445752914427e-06, | |
| "loss": 0.7192, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.33251833740831294, | |
| "grad_norm": 1.7233076095581055, | |
| "learning_rate": 9.715034787606556e-06, | |
| "loss": 0.7166, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3344743276283619, | |
| "grad_norm": 1.70209801197052, | |
| "learning_rate": 9.711603890899593e-06, | |
| "loss": 0.6976, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.33643031784841076, | |
| "grad_norm": 1.8248488903045654, | |
| "learning_rate": 9.708153077296626e-06, | |
| "loss": 0.743, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.33838630806845965, | |
| "grad_norm": 1.5105550289154053, | |
| "learning_rate": 9.704682361384941e-06, | |
| "loss": 0.6894, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.34034229828850854, | |
| "grad_norm": 1.6972191333770752, | |
| "learning_rate": 9.701191757835948e-06, | |
| "loss": 0.7204, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.3422982885085575, | |
| "grad_norm": 1.4798142910003662, | |
| "learning_rate": 9.69768128140513e-06, | |
| "loss": 0.7027, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.34425427872860637, | |
| "grad_norm": 1.7019851207733154, | |
| "learning_rate": 9.694150946931973e-06, | |
| "loss": 0.7172, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.34621026894865525, | |
| "grad_norm": 1.3944963216781616, | |
| "learning_rate": 9.690600769339916e-06, | |
| "loss": 0.7152, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.34816625916870414, | |
| "grad_norm": 1.5284974575042725, | |
| "learning_rate": 9.68703076363627e-06, | |
| "loss": 0.7266, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.3501222493887531, | |
| "grad_norm": 1.4864403009414673, | |
| "learning_rate": 9.683440944912165e-06, | |
| "loss": 0.6945, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.35207823960880197, | |
| "grad_norm": 1.5325855016708374, | |
| "learning_rate": 9.679831328342486e-06, | |
| "loss": 0.7228, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.35403422982885085, | |
| "grad_norm": 1.3994523286819458, | |
| "learning_rate": 9.676201929185809e-06, | |
| "loss": 0.7267, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.35599022004889974, | |
| "grad_norm": 1.4828211069107056, | |
| "learning_rate": 9.672552762784331e-06, | |
| "loss": 0.717, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.35794621026894863, | |
| "grad_norm": 1.4578241109848022, | |
| "learning_rate": 9.668883844563814e-06, | |
| "loss": 0.7107, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.35990220048899757, | |
| "grad_norm": 1.4730778932571411, | |
| "learning_rate": 9.66519519003351e-06, | |
| "loss": 0.6941, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.36185819070904646, | |
| "grad_norm": 1.375962257385254, | |
| "learning_rate": 9.661486814786104e-06, | |
| "loss": 0.7205, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.36381418092909534, | |
| "grad_norm": 1.5559496879577637, | |
| "learning_rate": 9.657758734497642e-06, | |
| "loss": 0.7394, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.36577017114914423, | |
| "grad_norm": 1.2486817836761475, | |
| "learning_rate": 9.654010964927467e-06, | |
| "loss": 0.7529, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.36772616136919317, | |
| "grad_norm": 1.6362264156341553, | |
| "learning_rate": 9.650243521918157e-06, | |
| "loss": 0.7205, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.36968215158924206, | |
| "grad_norm": 1.4043997526168823, | |
| "learning_rate": 9.646456421395447e-06, | |
| "loss": 0.7304, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.37163814180929094, | |
| "grad_norm": 1.4461724758148193, | |
| "learning_rate": 9.642649679368175e-06, | |
| "loss": 0.7138, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.37359413202933983, | |
| "grad_norm": 1.543683409690857, | |
| "learning_rate": 9.6388233119282e-06, | |
| "loss": 0.6915, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.3755501222493888, | |
| "grad_norm": 1.4317554235458374, | |
| "learning_rate": 9.63497733525035e-06, | |
| "loss": 0.7329, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.37750611246943766, | |
| "grad_norm": 1.5005362033843994, | |
| "learning_rate": 9.631111765592339e-06, | |
| "loss": 0.712, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.37946210268948655, | |
| "grad_norm": 1.4482682943344116, | |
| "learning_rate": 9.627226619294706e-06, | |
| "loss": 0.7119, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.38141809290953543, | |
| "grad_norm": 1.5683493614196777, | |
| "learning_rate": 9.623321912780745e-06, | |
| "loss": 0.7545, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.3833740831295844, | |
| "grad_norm": 1.335252046585083, | |
| "learning_rate": 9.619397662556434e-06, | |
| "loss": 0.7137, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.38533007334963326, | |
| "grad_norm": 1.6119199991226196, | |
| "learning_rate": 9.615453885210368e-06, | |
| "loss": 0.709, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.38728606356968215, | |
| "grad_norm": 1.2701472043991089, | |
| "learning_rate": 9.611490597413687e-06, | |
| "loss": 0.724, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.38924205378973104, | |
| "grad_norm": 1.5385199785232544, | |
| "learning_rate": 9.607507815920002e-06, | |
| "loss": 0.7067, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.39119804400978, | |
| "grad_norm": 1.324340581893921, | |
| "learning_rate": 9.603505557565332e-06, | |
| "loss": 0.6998, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.39315403422982886, | |
| "grad_norm": 1.5457934141159058, | |
| "learning_rate": 9.599483839268027e-06, | |
| "loss": 0.7429, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.39511002444987775, | |
| "grad_norm": 1.4941277503967285, | |
| "learning_rate": 9.595442678028696e-06, | |
| "loss": 0.727, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.39706601466992664, | |
| "grad_norm": 1.4549589157104492, | |
| "learning_rate": 9.59138209093014e-06, | |
| "loss": 0.7041, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.3990220048899755, | |
| "grad_norm": 1.423126459121704, | |
| "learning_rate": 9.587302095137281e-06, | |
| "loss": 0.7094, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.40097799511002447, | |
| "grad_norm": 1.347517967224121, | |
| "learning_rate": 9.583202707897075e-06, | |
| "loss": 0.7119, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.40293398533007335, | |
| "grad_norm": 1.3380125761032104, | |
| "learning_rate": 9.579083946538457e-06, | |
| "loss": 0.7291, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.40488997555012224, | |
| "grad_norm": 1.460777759552002, | |
| "learning_rate": 9.574945828472257e-06, | |
| "loss": 0.7008, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.4068459657701711, | |
| "grad_norm": 1.5004523992538452, | |
| "learning_rate": 9.570788371191134e-06, | |
| "loss": 0.6705, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.40880195599022007, | |
| "grad_norm": 1.3935741186141968, | |
| "learning_rate": 9.566611592269495e-06, | |
| "loss": 0.6983, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.41075794621026895, | |
| "grad_norm": 1.6443208456039429, | |
| "learning_rate": 9.562415509363422e-06, | |
| "loss": 0.7226, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.41271393643031784, | |
| "grad_norm": 1.3152751922607422, | |
| "learning_rate": 9.558200140210598e-06, | |
| "loss": 0.7476, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.4146699266503667, | |
| "grad_norm": 1.6871126890182495, | |
| "learning_rate": 9.55396550263024e-06, | |
| "loss": 0.7614, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.41662591687041567, | |
| "grad_norm": 1.5741229057312012, | |
| "learning_rate": 9.549711614523007e-06, | |
| "loss": 0.7494, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.41858190709046456, | |
| "grad_norm": 1.5038225650787354, | |
| "learning_rate": 9.545438493870943e-06, | |
| "loss": 0.6948, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.42053789731051344, | |
| "grad_norm": 1.4876679182052612, | |
| "learning_rate": 9.541146158737383e-06, | |
| "loss": 0.7081, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.42249388753056233, | |
| "grad_norm": 1.32923424243927, | |
| "learning_rate": 9.536834627266893e-06, | |
| "loss": 0.7197, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.42444987775061127, | |
| "grad_norm": 1.468302845954895, | |
| "learning_rate": 9.532503917685179e-06, | |
| "loss": 0.7222, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.42640586797066016, | |
| "grad_norm": 1.3885116577148438, | |
| "learning_rate": 9.528154048299025e-06, | |
| "loss": 0.7217, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.42836185819070904, | |
| "grad_norm": 1.516148328781128, | |
| "learning_rate": 9.5237850374962e-06, | |
| "loss": 0.7117, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.43031784841075793, | |
| "grad_norm": 1.317048192024231, | |
| "learning_rate": 9.519396903745387e-06, | |
| "loss": 0.7269, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4322738386308069, | |
| "grad_norm": 1.3124562501907349, | |
| "learning_rate": 9.514989665596114e-06, | |
| "loss": 0.7323, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.43422982885085576, | |
| "grad_norm": 1.4711933135986328, | |
| "learning_rate": 9.510563341678663e-06, | |
| "loss": 0.7135, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.43618581907090465, | |
| "grad_norm": 1.4237111806869507, | |
| "learning_rate": 9.506117950703988e-06, | |
| "loss": 0.7042, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.43814180929095353, | |
| "grad_norm": 1.3235597610473633, | |
| "learning_rate": 9.501653511463653e-06, | |
| "loss": 0.7348, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.4400977995110024, | |
| "grad_norm": 1.3544509410858154, | |
| "learning_rate": 9.497170042829737e-06, | |
| "loss": 0.7138, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.44205378973105136, | |
| "grad_norm": 1.2997663021087646, | |
| "learning_rate": 9.492667563754766e-06, | |
| "loss": 0.728, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.44400977995110025, | |
| "grad_norm": 1.409828782081604, | |
| "learning_rate": 9.488146093271625e-06, | |
| "loss": 0.6897, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.44596577017114913, | |
| "grad_norm": 1.3433566093444824, | |
| "learning_rate": 9.48360565049347e-06, | |
| "loss": 0.7191, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.447921760391198, | |
| "grad_norm": 1.37416410446167, | |
| "learning_rate": 9.479046254613673e-06, | |
| "loss": 0.7203, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.44987775061124696, | |
| "grad_norm": 1.3755804300308228, | |
| "learning_rate": 9.474467924905711e-06, | |
| "loss": 0.7165, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.45183374083129585, | |
| "grad_norm": 1.2708872556686401, | |
| "learning_rate": 9.469870680723104e-06, | |
| "loss": 0.7014, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.45378973105134474, | |
| "grad_norm": 1.3506673574447632, | |
| "learning_rate": 9.465254541499328e-06, | |
| "loss": 0.7031, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.4557457212713936, | |
| "grad_norm": 1.3668538331985474, | |
| "learning_rate": 9.460619526747732e-06, | |
| "loss": 0.6983, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.45770171149144256, | |
| "grad_norm": 1.4770394563674927, | |
| "learning_rate": 9.45596565606145e-06, | |
| "loss": 0.6995, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.45965770171149145, | |
| "grad_norm": 1.4407926797866821, | |
| "learning_rate": 9.451292949113332e-06, | |
| "loss": 0.7108, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.46161369193154034, | |
| "grad_norm": 1.6618622541427612, | |
| "learning_rate": 9.446601425655846e-06, | |
| "loss": 0.7059, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.4635696821515892, | |
| "grad_norm": 1.3920097351074219, | |
| "learning_rate": 9.441891105521005e-06, | |
| "loss": 0.7289, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.46552567237163817, | |
| "grad_norm": 1.3779282569885254, | |
| "learning_rate": 9.437162008620279e-06, | |
| "loss": 0.6999, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.46748166259168705, | |
| "grad_norm": 1.389256238937378, | |
| "learning_rate": 9.432414154944511e-06, | |
| "loss": 0.6981, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.46943765281173594, | |
| "grad_norm": 1.5415723323822021, | |
| "learning_rate": 9.42764756456383e-06, | |
| "loss": 0.7239, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.4713936430317848, | |
| "grad_norm": 1.3362054824829102, | |
| "learning_rate": 9.422862257627573e-06, | |
| "loss": 0.719, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.47334963325183377, | |
| "grad_norm": 1.4279307126998901, | |
| "learning_rate": 9.418058254364195e-06, | |
| "loss": 0.7069, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.47530562347188265, | |
| "grad_norm": 1.4915199279785156, | |
| "learning_rate": 9.413235575081177e-06, | |
| "loss": 0.7379, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.47726161369193154, | |
| "grad_norm": 1.3085819482803345, | |
| "learning_rate": 9.408394240164957e-06, | |
| "loss": 0.7389, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.4792176039119804, | |
| "grad_norm": 1.5228544473648071, | |
| "learning_rate": 9.40353427008083e-06, | |
| "loss": 0.7209, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.4811735941320293, | |
| "grad_norm": 1.2931418418884277, | |
| "learning_rate": 9.398655685372866e-06, | |
| "loss": 0.715, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.48312958435207826, | |
| "grad_norm": 1.3800779581069946, | |
| "learning_rate": 9.393758506663821e-06, | |
| "loss": 0.7037, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.48508557457212714, | |
| "grad_norm": 1.42329740524292, | |
| "learning_rate": 9.388842754655053e-06, | |
| "loss": 0.7293, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.48704156479217603, | |
| "grad_norm": 1.424433708190918, | |
| "learning_rate": 9.383908450126436e-06, | |
| "loss": 0.7183, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.4889975550122249, | |
| "grad_norm": 1.3259403705596924, | |
| "learning_rate": 9.378955613936261e-06, | |
| "loss": 0.6899, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.49095354523227386, | |
| "grad_norm": 1.391248106956482, | |
| "learning_rate": 9.373984267021167e-06, | |
| "loss": 0.7169, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.49290953545232274, | |
| "grad_norm": 1.3917707204818726, | |
| "learning_rate": 9.368994430396033e-06, | |
| "loss": 0.7185, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.49486552567237163, | |
| "grad_norm": 1.3821173906326294, | |
| "learning_rate": 9.3639861251539e-06, | |
| "loss": 0.7217, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.4968215158924205, | |
| "grad_norm": 1.5511088371276855, | |
| "learning_rate": 9.358959372465883e-06, | |
| "loss": 0.7239, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.49877750611246946, | |
| "grad_norm": 1.2932369709014893, | |
| "learning_rate": 9.353914193581073e-06, | |
| "loss": 0.751, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5007334963325183, | |
| "grad_norm": 1.3697410821914673, | |
| "learning_rate": 9.348850609826454e-06, | |
| "loss": 0.7072, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.5026894865525673, | |
| "grad_norm": 1.2831662893295288, | |
| "learning_rate": 9.343768642606813e-06, | |
| "loss": 0.6909, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.5046454767726162, | |
| "grad_norm": 1.2629653215408325, | |
| "learning_rate": 9.338668313404647e-06, | |
| "loss": 0.7182, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.5066014669926651, | |
| "grad_norm": 1.352612853050232, | |
| "learning_rate": 9.33354964378007e-06, | |
| "loss": 0.7159, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.508557457212714, | |
| "grad_norm": 1.2814078330993652, | |
| "learning_rate": 9.32841265537073e-06, | |
| "loss": 0.6925, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5105134474327628, | |
| "grad_norm": 1.2791004180908203, | |
| "learning_rate": 9.323257369891702e-06, | |
| "loss": 0.7319, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.5124694376528117, | |
| "grad_norm": 1.4273337125778198, | |
| "learning_rate": 9.318083809135421e-06, | |
| "loss": 0.6868, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.5144254278728606, | |
| "grad_norm": 1.3843871355056763, | |
| "learning_rate": 9.312891994971562e-06, | |
| "loss": 0.7103, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.5163814180929095, | |
| "grad_norm": 1.3229299783706665, | |
| "learning_rate": 9.307681949346969e-06, | |
| "loss": 0.6953, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.5183374083129584, | |
| "grad_norm": 1.2690023183822632, | |
| "learning_rate": 9.302453694285549e-06, | |
| "loss": 0.7201, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5202933985330074, | |
| "grad_norm": 1.544927954673767, | |
| "learning_rate": 9.29720725188819e-06, | |
| "loss": 0.7206, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.5222493887530563, | |
| "grad_norm": 1.2711515426635742, | |
| "learning_rate": 9.291942644332654e-06, | |
| "loss": 0.6915, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.5242053789731052, | |
| "grad_norm": 1.4151374101638794, | |
| "learning_rate": 9.286659893873498e-06, | |
| "loss": 0.7194, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.526161369193154, | |
| "grad_norm": 1.3871374130249023, | |
| "learning_rate": 9.281359022841966e-06, | |
| "loss": 0.7086, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.5281173594132029, | |
| "grad_norm": 1.3970173597335815, | |
| "learning_rate": 9.276040053645907e-06, | |
| "loss": 0.7111, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5300733496332518, | |
| "grad_norm": 1.287168264389038, | |
| "learning_rate": 9.27070300876967e-06, | |
| "loss": 0.7032, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.5320293398533007, | |
| "grad_norm": 1.3781298398971558, | |
| "learning_rate": 9.265347910774016e-06, | |
| "loss": 0.7055, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.5339853300733496, | |
| "grad_norm": 1.4398114681243896, | |
| "learning_rate": 9.259974782296023e-06, | |
| "loss": 0.6926, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.5359413202933986, | |
| "grad_norm": 1.506633996963501, | |
| "learning_rate": 9.254583646048981e-06, | |
| "loss": 0.7201, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.5378973105134475, | |
| "grad_norm": 1.508634328842163, | |
| "learning_rate": 9.249174524822307e-06, | |
| "loss": 0.699, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5398533007334964, | |
| "grad_norm": 1.469885230064392, | |
| "learning_rate": 9.24374744148144e-06, | |
| "loss": 0.728, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.5418092909535452, | |
| "grad_norm": 1.5133376121520996, | |
| "learning_rate": 9.238302418967757e-06, | |
| "loss": 0.698, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.5437652811735941, | |
| "grad_norm": 1.3522744178771973, | |
| "learning_rate": 9.23283948029846e-06, | |
| "loss": 0.7287, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.545721271393643, | |
| "grad_norm": 1.4715549945831299, | |
| "learning_rate": 9.227358648566483e-06, | |
| "loss": 0.7171, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.5476772616136919, | |
| "grad_norm": 1.37788987159729, | |
| "learning_rate": 9.221859946940407e-06, | |
| "loss": 0.7438, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.5496332518337408, | |
| "grad_norm": 1.3700509071350098, | |
| "learning_rate": 9.216343398664349e-06, | |
| "loss": 0.6981, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.5515892420537898, | |
| "grad_norm": 1.318377137184143, | |
| "learning_rate": 9.210809027057866e-06, | |
| "loss": 0.7176, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.5535452322738387, | |
| "grad_norm": 1.5471018552780151, | |
| "learning_rate": 9.205256855515856e-06, | |
| "loss": 0.6932, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.5555012224938876, | |
| "grad_norm": 1.4040894508361816, | |
| "learning_rate": 9.199686907508465e-06, | |
| "loss": 0.6905, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.5574572127139364, | |
| "grad_norm": 1.4304442405700684, | |
| "learning_rate": 9.194099206580981e-06, | |
| "loss": 0.7147, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.5594132029339853, | |
| "grad_norm": 1.401181697845459, | |
| "learning_rate": 9.188493776353743e-06, | |
| "loss": 0.6774, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.5613691931540342, | |
| "grad_norm": 1.4256865978240967, | |
| "learning_rate": 9.182870640522023e-06, | |
| "loss": 0.6909, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.5633251833740831, | |
| "grad_norm": 1.3554214239120483, | |
| "learning_rate": 9.177229822855949e-06, | |
| "loss": 0.7164, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.565281173594132, | |
| "grad_norm": 1.3488880395889282, | |
| "learning_rate": 9.171571347200392e-06, | |
| "loss": 0.6852, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.5672371638141809, | |
| "grad_norm": 1.3619459867477417, | |
| "learning_rate": 9.165895237474863e-06, | |
| "loss": 0.7171, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5691931540342299, | |
| "grad_norm": 1.3099799156188965, | |
| "learning_rate": 9.160201517673417e-06, | |
| "loss": 0.6947, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.5711491442542788, | |
| "grad_norm": 1.4156692028045654, | |
| "learning_rate": 9.154490211864554e-06, | |
| "loss": 0.7026, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.5731051344743276, | |
| "grad_norm": 1.3235222101211548, | |
| "learning_rate": 9.14876134419111e-06, | |
| "loss": 0.6966, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.5750611246943765, | |
| "grad_norm": 1.3630427122116089, | |
| "learning_rate": 9.143014938870157e-06, | |
| "loss": 0.7155, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.5770171149144254, | |
| "grad_norm": 1.2701759338378906, | |
| "learning_rate": 9.137251020192907e-06, | |
| "loss": 0.7074, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.5789731051344743, | |
| "grad_norm": 1.2779312133789062, | |
| "learning_rate": 9.131469612524602e-06, | |
| "loss": 0.7118, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.5809290953545232, | |
| "grad_norm": 1.37440824508667, | |
| "learning_rate": 9.125670740304412e-06, | |
| "loss": 0.6779, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.5828850855745721, | |
| "grad_norm": 1.3591920137405396, | |
| "learning_rate": 9.119854428045335e-06, | |
| "loss": 0.722, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.5848410757946211, | |
| "grad_norm": 1.311881422996521, | |
| "learning_rate": 9.114020700334092e-06, | |
| "loss": 0.6914, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.58679706601467, | |
| "grad_norm": 1.415059208869934, | |
| "learning_rate": 9.108169581831021e-06, | |
| "loss": 0.6986, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5887530562347189, | |
| "grad_norm": 1.5263363122940063, | |
| "learning_rate": 9.102301097269974e-06, | |
| "loss": 0.7066, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.5907090464547677, | |
| "grad_norm": 1.3773906230926514, | |
| "learning_rate": 9.096415271458218e-06, | |
| "loss": 0.6651, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.5926650366748166, | |
| "grad_norm": 1.4495782852172852, | |
| "learning_rate": 9.090512129276316e-06, | |
| "loss": 0.7033, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.5946210268948655, | |
| "grad_norm": 1.2831271886825562, | |
| "learning_rate": 9.08459169567804e-06, | |
| "loss": 0.7199, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.5965770171149144, | |
| "grad_norm": 1.3329228162765503, | |
| "learning_rate": 9.078653995690248e-06, | |
| "loss": 0.7101, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5985330073349633, | |
| "grad_norm": 1.4745112657546997, | |
| "learning_rate": 9.072699054412793e-06, | |
| "loss": 0.6837, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.6004889975550122, | |
| "grad_norm": 1.4286351203918457, | |
| "learning_rate": 9.066726897018408e-06, | |
| "loss": 0.7067, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.6024449877750612, | |
| "grad_norm": 1.4778072834014893, | |
| "learning_rate": 9.060737548752601e-06, | |
| "loss": 0.6937, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.6044009779951101, | |
| "grad_norm": 1.3873180150985718, | |
| "learning_rate": 9.05473103493355e-06, | |
| "loss": 0.6972, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.6063569682151589, | |
| "grad_norm": 1.4970178604125977, | |
| "learning_rate": 9.048707380951993e-06, | |
| "loss": 0.7295, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6083129584352078, | |
| "grad_norm": 1.4127295017242432, | |
| "learning_rate": 9.042666612271131e-06, | |
| "loss": 0.6977, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.6102689486552567, | |
| "grad_norm": 1.5276705026626587, | |
| "learning_rate": 9.036608754426504e-06, | |
| "loss": 0.6867, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.6122249388753056, | |
| "grad_norm": 1.4647181034088135, | |
| "learning_rate": 9.03053383302589e-06, | |
| "loss": 0.7076, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.6141809290953545, | |
| "grad_norm": 1.553812861442566, | |
| "learning_rate": 9.024441873749208e-06, | |
| "loss": 0.7213, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.6161369193154034, | |
| "grad_norm": 1.3352826833724976, | |
| "learning_rate": 9.018332902348389e-06, | |
| "loss": 0.6929, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6180929095354524, | |
| "grad_norm": 1.5422905683517456, | |
| "learning_rate": 9.012206944647284e-06, | |
| "loss": 0.7556, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.6200488997555013, | |
| "grad_norm": 1.3888792991638184, | |
| "learning_rate": 9.006064026541549e-06, | |
| "loss": 0.7233, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.6220048899755501, | |
| "grad_norm": 1.4934391975402832, | |
| "learning_rate": 8.999904173998525e-06, | |
| "loss": 0.6935, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.623960880195599, | |
| "grad_norm": 1.393879771232605, | |
| "learning_rate": 8.993727413057155e-06, | |
| "loss": 0.6695, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.6259168704156479, | |
| "grad_norm": 1.422959566116333, | |
| "learning_rate": 8.987533769827842e-06, | |
| "loss": 0.7058, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6278728606356968, | |
| "grad_norm": 1.3607128858566284, | |
| "learning_rate": 8.981323270492367e-06, | |
| "loss": 0.6757, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.6298288508557457, | |
| "grad_norm": 1.2379283905029297, | |
| "learning_rate": 8.975095941303748e-06, | |
| "loss": 0.698, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.6317848410757946, | |
| "grad_norm": 1.3851107358932495, | |
| "learning_rate": 8.968851808586163e-06, | |
| "loss": 0.7143, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.6337408312958435, | |
| "grad_norm": 1.3429324626922607, | |
| "learning_rate": 8.962590898734814e-06, | |
| "loss": 0.7217, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.6356968215158925, | |
| "grad_norm": 1.202528476715088, | |
| "learning_rate": 8.956313238215824e-06, | |
| "loss": 0.7085, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.6376528117359413, | |
| "grad_norm": 1.4195929765701294, | |
| "learning_rate": 8.950018853566128e-06, | |
| "loss": 0.7069, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.6396088019559902, | |
| "grad_norm": 1.2432851791381836, | |
| "learning_rate": 8.943707771393347e-06, | |
| "loss": 0.7148, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.6415647921760391, | |
| "grad_norm": 1.3695796728134155, | |
| "learning_rate": 8.9373800183757e-06, | |
| "loss": 0.7414, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.643520782396088, | |
| "grad_norm": 1.32993745803833, | |
| "learning_rate": 8.931035621261865e-06, | |
| "loss": 0.6864, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.6454767726161369, | |
| "grad_norm": 1.3239011764526367, | |
| "learning_rate": 8.924674606870887e-06, | |
| "loss": 0.6929, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.6474327628361858, | |
| "grad_norm": 1.267788290977478, | |
| "learning_rate": 8.918297002092048e-06, | |
| "loss": 0.6877, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.6493887530562347, | |
| "grad_norm": 1.2890385389328003, | |
| "learning_rate": 8.911902833884769e-06, | |
| "loss": 0.7101, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.6513447432762837, | |
| "grad_norm": 1.2896732091903687, | |
| "learning_rate": 8.905492129278478e-06, | |
| "loss": 0.6929, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.6533007334963326, | |
| "grad_norm": 1.2869383096694946, | |
| "learning_rate": 8.899064915372513e-06, | |
| "loss": 0.7076, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.6552567237163814, | |
| "grad_norm": 1.2590776681900024, | |
| "learning_rate": 8.892621219336001e-06, | |
| "loss": 0.7104, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.6572127139364303, | |
| "grad_norm": 1.4171830415725708, | |
| "learning_rate": 8.886161068407734e-06, | |
| "loss": 0.6914, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.6591687041564792, | |
| "grad_norm": 1.2935525178909302, | |
| "learning_rate": 8.879684489896073e-06, | |
| "loss": 0.6931, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.6611246943765281, | |
| "grad_norm": 1.3660129308700562, | |
| "learning_rate": 8.873191511178812e-06, | |
| "loss": 0.7125, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.663080684596577, | |
| "grad_norm": 1.2660232782363892, | |
| "learning_rate": 8.86668215970308e-06, | |
| "loss": 0.698, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.6650366748166259, | |
| "grad_norm": 1.313506841659546, | |
| "learning_rate": 8.860156462985207e-06, | |
| "loss": 0.6944, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.6669926650366749, | |
| "grad_norm": 1.371457815170288, | |
| "learning_rate": 8.85361444861063e-06, | |
| "loss": 0.6942, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.6689486552567238, | |
| "grad_norm": 1.3678666353225708, | |
| "learning_rate": 8.847056144233756e-06, | |
| "loss": 0.6804, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.6709046454767726, | |
| "grad_norm": 1.537421464920044, | |
| "learning_rate": 8.840481577577856e-06, | |
| "loss": 0.7163, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.6728606356968215, | |
| "grad_norm": 1.311813473701477, | |
| "learning_rate": 8.83389077643494e-06, | |
| "loss": 0.6952, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.6748166259168704, | |
| "grad_norm": 1.392874836921692, | |
| "learning_rate": 8.82728376866565e-06, | |
| "loss": 0.7025, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.6767726161369193, | |
| "grad_norm": 1.3374087810516357, | |
| "learning_rate": 8.820660582199137e-06, | |
| "loss": 0.718, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.6787286063569682, | |
| "grad_norm": 1.3735182285308838, | |
| "learning_rate": 8.814021245032938e-06, | |
| "loss": 0.6913, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.6806845965770171, | |
| "grad_norm": 1.4343420267105103, | |
| "learning_rate": 8.807365785232865e-06, | |
| "loss": 0.6984, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.682640586797066, | |
| "grad_norm": 1.3286428451538086, | |
| "learning_rate": 8.800694230932885e-06, | |
| "loss": 0.6441, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.684596577017115, | |
| "grad_norm": 1.3408572673797607, | |
| "learning_rate": 8.794006610334995e-06, | |
| "loss": 0.7161, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6865525672371638, | |
| "grad_norm": 1.422874927520752, | |
| "learning_rate": 8.787302951709113e-06, | |
| "loss": 0.6852, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.6885085574572127, | |
| "grad_norm": 1.374058723449707, | |
| "learning_rate": 8.780583283392952e-06, | |
| "loss": 0.6915, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.6904645476772616, | |
| "grad_norm": 1.323232889175415, | |
| "learning_rate": 8.773847633791897e-06, | |
| "loss": 0.7256, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.6924205378973105, | |
| "grad_norm": 1.3007184267044067, | |
| "learning_rate": 8.767096031378891e-06, | |
| "loss": 0.7001, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.6943765281173594, | |
| "grad_norm": 1.3835595846176147, | |
| "learning_rate": 8.760328504694317e-06, | |
| "loss": 0.7094, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.6963325183374083, | |
| "grad_norm": 1.2669827938079834, | |
| "learning_rate": 8.753545082345866e-06, | |
| "loss": 0.6707, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.6982885085574572, | |
| "grad_norm": 1.3541840314865112, | |
| "learning_rate": 8.74674579300843e-06, | |
| "loss": 0.6948, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.7002444987775062, | |
| "grad_norm": 1.350557565689087, | |
| "learning_rate": 8.739930665423968e-06, | |
| "loss": 0.703, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.702200488997555, | |
| "grad_norm": 1.3906623125076294, | |
| "learning_rate": 8.733099728401392e-06, | |
| "loss": 0.6959, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.7041564792176039, | |
| "grad_norm": 1.3461555242538452, | |
| "learning_rate": 8.72625301081645e-06, | |
| "loss": 0.6823, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7061124694376528, | |
| "grad_norm": 1.3507471084594727, | |
| "learning_rate": 8.71939054161159e-06, | |
| "loss": 0.707, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.7080684596577017, | |
| "grad_norm": 1.3323566913604736, | |
| "learning_rate": 8.712512349795845e-06, | |
| "loss": 0.7087, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.7100244498777506, | |
| "grad_norm": 1.3335025310516357, | |
| "learning_rate": 8.705618464444715e-06, | |
| "loss": 0.7094, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.7119804400977995, | |
| "grad_norm": 1.300357699394226, | |
| "learning_rate": 8.69870891470004e-06, | |
| "loss": 0.6792, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.7139364303178484, | |
| "grad_norm": 1.409981369972229, | |
| "learning_rate": 8.691783729769874e-06, | |
| "loss": 0.6814, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7158924205378973, | |
| "grad_norm": 1.2382073402404785, | |
| "learning_rate": 8.684842938928362e-06, | |
| "loss": 0.7074, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.7178484107579463, | |
| "grad_norm": 1.3354192972183228, | |
| "learning_rate": 8.677886571515624e-06, | |
| "loss": 0.7045, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.7198044009779951, | |
| "grad_norm": 1.3605114221572876, | |
| "learning_rate": 8.670914656937619e-06, | |
| "loss": 0.6841, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.721760391198044, | |
| "grad_norm": 1.3090858459472656, | |
| "learning_rate": 8.663927224666034e-06, | |
| "loss": 0.7118, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.7237163814180929, | |
| "grad_norm": 1.4557610750198364, | |
| "learning_rate": 8.656924304238149e-06, | |
| "loss": 0.6813, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.7256723716381418, | |
| "grad_norm": 1.2979029417037964, | |
| "learning_rate": 8.64990592525671e-06, | |
| "loss": 0.6909, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.7276283618581907, | |
| "grad_norm": 1.3519365787506104, | |
| "learning_rate": 8.642872117389818e-06, | |
| "loss": 0.7083, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.7295843520782396, | |
| "grad_norm": 1.3600205183029175, | |
| "learning_rate": 8.635822910370793e-06, | |
| "loss": 0.7012, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.7315403422982885, | |
| "grad_norm": 1.3810431957244873, | |
| "learning_rate": 8.628758333998047e-06, | |
| "loss": 0.6888, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.7334963325183375, | |
| "grad_norm": 1.4093194007873535, | |
| "learning_rate": 8.621678418134964e-06, | |
| "loss": 0.6932, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.7354523227383863, | |
| "grad_norm": 1.3567487001419067, | |
| "learning_rate": 8.61458319270977e-06, | |
| "loss": 0.6693, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.7374083129584352, | |
| "grad_norm": 1.2569823265075684, | |
| "learning_rate": 8.607472687715408e-06, | |
| "loss": 0.701, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.7393643031784841, | |
| "grad_norm": 1.379348874092102, | |
| "learning_rate": 8.60034693320941e-06, | |
| "loss": 0.6779, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.741320293398533, | |
| "grad_norm": 1.288761854171753, | |
| "learning_rate": 8.593205959313774e-06, | |
| "loss": 0.7077, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.7432762836185819, | |
| "grad_norm": 1.3518379926681519, | |
| "learning_rate": 8.58604979621483e-06, | |
| "loss": 0.6698, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.7452322738386308, | |
| "grad_norm": 1.3594914674758911, | |
| "learning_rate": 8.578878474163115e-06, | |
| "loss": 0.699, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.7471882640586797, | |
| "grad_norm": 1.3028552532196045, | |
| "learning_rate": 8.57169202347325e-06, | |
| "loss": 0.6914, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.7491442542787287, | |
| "grad_norm": 1.4065419435501099, | |
| "learning_rate": 8.564490474523803e-06, | |
| "loss": 0.6663, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.7511002444987775, | |
| "grad_norm": 1.31602942943573, | |
| "learning_rate": 8.557273857757172e-06, | |
| "loss": 0.6854, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.7530562347188264, | |
| "grad_norm": 1.6082147359848022, | |
| "learning_rate": 8.550042203679441e-06, | |
| "loss": 0.6861, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.7550122249388753, | |
| "grad_norm": 1.2542592287063599, | |
| "learning_rate": 8.542795542860265e-06, | |
| "loss": 0.6934, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.7569682151589242, | |
| "grad_norm": 1.5243535041809082, | |
| "learning_rate": 8.535533905932739e-06, | |
| "loss": 0.686, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.7589242053789731, | |
| "grad_norm": 1.364704966545105, | |
| "learning_rate": 8.528257323593257e-06, | |
| "loss": 0.6709, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.760880195599022, | |
| "grad_norm": 1.4337162971496582, | |
| "learning_rate": 8.520965826601394e-06, | |
| "loss": 0.7024, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.7628361858190709, | |
| "grad_norm": 1.4389928579330444, | |
| "learning_rate": 8.513659445779773e-06, | |
| "loss": 0.7034, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.7647921760391198, | |
| "grad_norm": 1.3389860391616821, | |
| "learning_rate": 8.506338212013937e-06, | |
| "loss": 0.6864, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.7667481662591688, | |
| "grad_norm": 1.2770016193389893, | |
| "learning_rate": 8.499002156252205e-06, | |
| "loss": 0.6864, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.7687041564792176, | |
| "grad_norm": 1.3530707359313965, | |
| "learning_rate": 8.491651309505562e-06, | |
| "loss": 0.6982, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.7706601466992665, | |
| "grad_norm": 1.3368823528289795, | |
| "learning_rate": 8.484285702847513e-06, | |
| "loss": 0.6942, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.7726161369193154, | |
| "grad_norm": 1.242735743522644, | |
| "learning_rate": 8.476905367413958e-06, | |
| "loss": 0.6499, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.7745721271393643, | |
| "grad_norm": 1.307515025138855, | |
| "learning_rate": 8.469510334403054e-06, | |
| "loss": 0.688, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.7765281173594132, | |
| "grad_norm": 1.2474210262298584, | |
| "learning_rate": 8.462100635075097e-06, | |
| "loss": 0.7273, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.7784841075794621, | |
| "grad_norm": 1.3087382316589355, | |
| "learning_rate": 8.454676300752367e-06, | |
| "loss": 0.6874, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.780440097799511, | |
| "grad_norm": 1.3236266374588013, | |
| "learning_rate": 8.447237362819022e-06, | |
| "loss": 0.701, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.78239608801956, | |
| "grad_norm": 1.3533902168273926, | |
| "learning_rate": 8.439783852720941e-06, | |
| "loss": 0.6639, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.7843520782396088, | |
| "grad_norm": 1.3087717294692993, | |
| "learning_rate": 8.432315801965616e-06, | |
| "loss": 0.6982, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.7863080684596577, | |
| "grad_norm": 1.4343737363815308, | |
| "learning_rate": 8.424833242121993e-06, | |
| "loss": 0.718, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.7882640586797066, | |
| "grad_norm": 1.3762623071670532, | |
| "learning_rate": 8.417336204820353e-06, | |
| "loss": 0.6884, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.7902200488997555, | |
| "grad_norm": 1.2853654623031616, | |
| "learning_rate": 8.409824721752183e-06, | |
| "loss": 0.67, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.7921760391198044, | |
| "grad_norm": 1.2903891801834106, | |
| "learning_rate": 8.40229882467003e-06, | |
| "loss": 0.6781, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.7941320293398533, | |
| "grad_norm": 1.3627846240997314, | |
| "learning_rate": 8.394758545387369e-06, | |
| "loss": 0.6932, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.7960880195599022, | |
| "grad_norm": 1.298510193824768, | |
| "learning_rate": 8.38720391577848e-06, | |
| "loss": 0.6856, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.798044009779951, | |
| "grad_norm": 1.180311918258667, | |
| "learning_rate": 8.379634967778297e-06, | |
| "loss": 0.684, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.1478567123413086, | |
| "learning_rate": 8.372051733382283e-06, | |
| "loss": 0.7289, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.8019559902200489, | |
| "grad_norm": 1.3660131692886353, | |
| "learning_rate": 8.364454244646294e-06, | |
| "loss": 0.6792, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8039119804400978, | |
| "grad_norm": 1.2295174598693848, | |
| "learning_rate": 8.35684253368644e-06, | |
| "loss": 0.6725, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.8058679706601467, | |
| "grad_norm": 1.2348603010177612, | |
| "learning_rate": 8.349216632678954e-06, | |
| "loss": 0.6629, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.8078239608801956, | |
| "grad_norm": 1.32919180393219, | |
| "learning_rate": 8.341576573860049e-06, | |
| "loss": 0.6994, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.8097799511002445, | |
| "grad_norm": 1.2249308824539185, | |
| "learning_rate": 8.333922389525789e-06, | |
| "loss": 0.6935, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.8117359413202934, | |
| "grad_norm": 1.288668155670166, | |
| "learning_rate": 8.32625411203195e-06, | |
| "loss": 0.6852, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8136919315403423, | |
| "grad_norm": 1.252305507659912, | |
| "learning_rate": 8.318571773793879e-06, | |
| "loss": 0.6985, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.8156479217603912, | |
| "grad_norm": 1.2717095613479614, | |
| "learning_rate": 8.310875407286364e-06, | |
| "loss": 0.6902, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.8176039119804401, | |
| "grad_norm": 1.2788197994232178, | |
| "learning_rate": 8.30316504504349e-06, | |
| "loss": 0.6828, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.819559902200489, | |
| "grad_norm": 1.186546802520752, | |
| "learning_rate": 8.295440719658512e-06, | |
| "loss": 0.6842, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.8215158924205379, | |
| "grad_norm": 1.2258790731430054, | |
| "learning_rate": 8.2877024637837e-06, | |
| "loss": 0.7062, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.8234718826405868, | |
| "grad_norm": 1.313541054725647, | |
| "learning_rate": 8.279950310130218e-06, | |
| "loss": 0.6903, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.8254278728606357, | |
| "grad_norm": 1.2598004341125488, | |
| "learning_rate": 8.272184291467976e-06, | |
| "loss": 0.7117, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.8273838630806846, | |
| "grad_norm": 1.2314584255218506, | |
| "learning_rate": 8.264404440625493e-06, | |
| "loss": 0.7166, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.8293398533007335, | |
| "grad_norm": 1.3035787343978882, | |
| "learning_rate": 8.256610790489765e-06, | |
| "loss": 0.6667, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.8312958435207825, | |
| "grad_norm": 1.3895798921585083, | |
| "learning_rate": 8.248803374006113e-06, | |
| "loss": 0.6708, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.8332518337408313, | |
| "grad_norm": 1.2241226434707642, | |
| "learning_rate": 8.240982224178058e-06, | |
| "loss": 0.6889, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.8352078239608802, | |
| "grad_norm": 1.1916440725326538, | |
| "learning_rate": 8.233147374067166e-06, | |
| "loss": 0.6858, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.8371638141809291, | |
| "grad_norm": 1.2678576707839966, | |
| "learning_rate": 8.225298856792929e-06, | |
| "loss": 0.695, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.839119804400978, | |
| "grad_norm": 1.2337770462036133, | |
| "learning_rate": 8.2174367055326e-06, | |
| "loss": 0.702, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.8410757946210269, | |
| "grad_norm": 1.171006202697754, | |
| "learning_rate": 8.209560953521075e-06, | |
| "loss": 0.7058, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.8430317848410758, | |
| "grad_norm": 1.2427887916564941, | |
| "learning_rate": 8.201671634050738e-06, | |
| "loss": 0.695, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.8449877750611247, | |
| "grad_norm": 1.2962589263916016, | |
| "learning_rate": 8.193768780471326e-06, | |
| "loss": 0.7152, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.8469437652811735, | |
| "grad_norm": 1.3337562084197998, | |
| "learning_rate": 8.185852426189794e-06, | |
| "loss": 0.7168, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.8488997555012225, | |
| "grad_norm": 1.352417230606079, | |
| "learning_rate": 8.177922604670155e-06, | |
| "loss": 0.6958, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.8508557457212714, | |
| "grad_norm": 1.186039924621582, | |
| "learning_rate": 8.169979349433358e-06, | |
| "loss": 0.6631, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.8528117359413203, | |
| "grad_norm": 1.1488475799560547, | |
| "learning_rate": 8.162022694057143e-06, | |
| "loss": 0.6991, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.8547677261613692, | |
| "grad_norm": 1.2668110132217407, | |
| "learning_rate": 8.154052672175888e-06, | |
| "loss": 0.6866, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.8567237163814181, | |
| "grad_norm": 1.266473412513733, | |
| "learning_rate": 8.146069317480475e-06, | |
| "loss": 0.6932, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.858679706601467, | |
| "grad_norm": 1.241896152496338, | |
| "learning_rate": 8.138072663718149e-06, | |
| "loss": 0.6788, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.8606356968215159, | |
| "grad_norm": 1.3747978210449219, | |
| "learning_rate": 8.130062744692371e-06, | |
| "loss": 0.6899, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.8625916870415647, | |
| "grad_norm": 1.393964171409607, | |
| "learning_rate": 8.122039594262679e-06, | |
| "loss": 0.6886, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.8645476772616137, | |
| "grad_norm": 1.1397488117218018, | |
| "learning_rate": 8.114003246344539e-06, | |
| "loss": 0.6797, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.8665036674816626, | |
| "grad_norm": 1.2788703441619873, | |
| "learning_rate": 8.10595373490921e-06, | |
| "loss": 0.6767, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.8684596577017115, | |
| "grad_norm": 1.2286616563796997, | |
| "learning_rate": 8.097891093983592e-06, | |
| "loss": 0.6616, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.8704156479217604, | |
| "grad_norm": 1.294381022453308, | |
| "learning_rate": 8.08981535765009e-06, | |
| "loss": 0.7023, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.8723716381418093, | |
| "grad_norm": 1.421319842338562, | |
| "learning_rate": 8.08172656004646e-06, | |
| "loss": 0.7068, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.8743276283618582, | |
| "grad_norm": 1.3619354963302612, | |
| "learning_rate": 8.073624735365677e-06, | |
| "loss": 0.6629, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.8762836185819071, | |
| "grad_norm": 1.2635352611541748, | |
| "learning_rate": 8.06550991785578e-06, | |
| "loss": 0.6986, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.878239608801956, | |
| "grad_norm": 1.3464255332946777, | |
| "learning_rate": 8.057382141819734e-06, | |
| "loss": 0.7125, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.8801955990220048, | |
| "grad_norm": 1.241179347038269, | |
| "learning_rate": 8.049241441615279e-06, | |
| "loss": 0.6943, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.8821515892420538, | |
| "grad_norm": 1.343956708908081, | |
| "learning_rate": 8.04108785165479e-06, | |
| "loss": 0.74, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.8841075794621027, | |
| "grad_norm": 1.3890380859375, | |
| "learning_rate": 8.032921406405132e-06, | |
| "loss": 0.6938, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.8860635696821516, | |
| "grad_norm": 1.2220715284347534, | |
| "learning_rate": 8.024742140387506e-06, | |
| "loss": 0.6873, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.8880195599022005, | |
| "grad_norm": 1.322405219078064, | |
| "learning_rate": 8.016550088177313e-06, | |
| "loss": 0.6721, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.8899755501222494, | |
| "grad_norm": 1.2938501834869385, | |
| "learning_rate": 8.008345284404005e-06, | |
| "loss": 0.722, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.8919315403422983, | |
| "grad_norm": 1.2954498529434204, | |
| "learning_rate": 8.000127763750934e-06, | |
| "loss": 0.6788, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.8938875305623472, | |
| "grad_norm": 1.3062814474105835, | |
| "learning_rate": 7.99189756095521e-06, | |
| "loss": 0.6423, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.895843520782396, | |
| "grad_norm": 1.2911349534988403, | |
| "learning_rate": 7.983654710807556e-06, | |
| "loss": 0.7107, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.897799511002445, | |
| "grad_norm": 1.4652104377746582, | |
| "learning_rate": 7.975399248152151e-06, | |
| "loss": 0.6706, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.8997555012224939, | |
| "grad_norm": 1.2224496603012085, | |
| "learning_rate": 7.967131207886497e-06, | |
| "loss": 0.687, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9017114914425428, | |
| "grad_norm": 1.3361140489578247, | |
| "learning_rate": 7.95885062496126e-06, | |
| "loss": 0.6558, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.9036674816625917, | |
| "grad_norm": 1.3264832496643066, | |
| "learning_rate": 7.950557534380126e-06, | |
| "loss": 0.6966, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.9056234718826406, | |
| "grad_norm": 1.3627907037734985, | |
| "learning_rate": 7.942251971199657e-06, | |
| "loss": 0.694, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.9075794621026895, | |
| "grad_norm": 1.2352824211120605, | |
| "learning_rate": 7.933933970529135e-06, | |
| "loss": 0.7287, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.9095354523227384, | |
| "grad_norm": 1.3050729036331177, | |
| "learning_rate": 7.92560356753042e-06, | |
| "loss": 0.6898, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9114914425427872, | |
| "grad_norm": 1.3547155857086182, | |
| "learning_rate": 7.917260797417801e-06, | |
| "loss": 0.6589, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.9134474327628361, | |
| "grad_norm": 1.2764095067977905, | |
| "learning_rate": 7.908905695457838e-06, | |
| "loss": 0.6777, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.9154034229828851, | |
| "grad_norm": 1.2692286968231201, | |
| "learning_rate": 7.900538296969228e-06, | |
| "loss": 0.675, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.917359413202934, | |
| "grad_norm": 1.2980645895004272, | |
| "learning_rate": 7.892158637322647e-06, | |
| "loss": 0.7241, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.9193154034229829, | |
| "grad_norm": 1.285950779914856, | |
| "learning_rate": 7.883766751940595e-06, | |
| "loss": 0.6724, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.9212713936430318, | |
| "grad_norm": 1.2841320037841797, | |
| "learning_rate": 7.87536267629726e-06, | |
| "loss": 0.6553, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.9232273838630807, | |
| "grad_norm": 1.2985844612121582, | |
| "learning_rate": 7.866946445918359e-06, | |
| "loss": 0.6767, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.9251833740831296, | |
| "grad_norm": 1.2050354480743408, | |
| "learning_rate": 7.858518096380984e-06, | |
| "loss": 0.6954, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.9271393643031784, | |
| "grad_norm": 1.1968133449554443, | |
| "learning_rate": 7.850077663313466e-06, | |
| "loss": 0.6921, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.9290953545232273, | |
| "grad_norm": 1.2390793561935425, | |
| "learning_rate": 7.841625182395207e-06, | |
| "loss": 0.7176, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.9310513447432763, | |
| "grad_norm": 1.2606759071350098, | |
| "learning_rate": 7.833160689356545e-06, | |
| "loss": 0.7002, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.9330073349633252, | |
| "grad_norm": 1.265933632850647, | |
| "learning_rate": 7.824684219978591e-06, | |
| "loss": 0.6572, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.9349633251833741, | |
| "grad_norm": 1.3236520290374756, | |
| "learning_rate": 7.816195810093081e-06, | |
| "loss": 0.7176, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.936919315403423, | |
| "grad_norm": 1.1737425327301025, | |
| "learning_rate": 7.807695495582233e-06, | |
| "loss": 0.6785, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.9388753056234719, | |
| "grad_norm": 1.1625640392303467, | |
| "learning_rate": 7.79918331237858e-06, | |
| "loss": 0.6656, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.9408312958435208, | |
| "grad_norm": 1.2079232931137085, | |
| "learning_rate": 7.790659296464833e-06, | |
| "loss": 0.6991, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.9427872860635697, | |
| "grad_norm": 1.1734462976455688, | |
| "learning_rate": 7.782123483873716e-06, | |
| "loss": 0.6793, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.9447432762836185, | |
| "grad_norm": 1.4000263214111328, | |
| "learning_rate": 7.773575910687827e-06, | |
| "loss": 0.6928, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.9466992665036675, | |
| "grad_norm": 1.368007779121399, | |
| "learning_rate": 7.76501661303947e-06, | |
| "loss": 0.676, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.9486552567237164, | |
| "grad_norm": 1.211277961730957, | |
| "learning_rate": 7.756445627110523e-06, | |
| "loss": 0.666, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.9506112469437653, | |
| "grad_norm": 1.3525391817092896, | |
| "learning_rate": 7.747862989132257e-06, | |
| "loss": 0.6932, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.9525672371638142, | |
| "grad_norm": 1.30064058303833, | |
| "learning_rate": 7.739268735385212e-06, | |
| "loss": 0.6976, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.9545232273838631, | |
| "grad_norm": 1.3304917812347412, | |
| "learning_rate": 7.730662902199022e-06, | |
| "loss": 0.6721, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.956479217603912, | |
| "grad_norm": 1.5623983144760132, | |
| "learning_rate": 7.722045525952272e-06, | |
| "loss": 0.7155, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.9584352078239609, | |
| "grad_norm": 1.258493423461914, | |
| "learning_rate": 7.713416643072345e-06, | |
| "loss": 0.6694, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.9603911980440097, | |
| "grad_norm": 1.3739190101623535, | |
| "learning_rate": 7.70477629003526e-06, | |
| "loss": 0.7087, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.9623471882640586, | |
| "grad_norm": 1.3824055194854736, | |
| "learning_rate": 7.696124503365526e-06, | |
| "loss": 0.6822, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.9643031784841076, | |
| "grad_norm": 1.2122122049331665, | |
| "learning_rate": 7.68746131963598e-06, | |
| "loss": 0.6785, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.9662591687041565, | |
| "grad_norm": 1.250258445739746, | |
| "learning_rate": 7.678786775467645e-06, | |
| "loss": 0.705, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.9682151589242054, | |
| "grad_norm": 1.287483811378479, | |
| "learning_rate": 7.670100907529558e-06, | |
| "loss": 0.6785, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.9701711491442543, | |
| "grad_norm": 1.213124394416809, | |
| "learning_rate": 7.661403752538628e-06, | |
| "loss": 0.7136, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.9721271393643032, | |
| "grad_norm": 1.2306246757507324, | |
| "learning_rate": 7.652695347259476e-06, | |
| "loss": 0.6748, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.9740831295843521, | |
| "grad_norm": 1.2954891920089722, | |
| "learning_rate": 7.64397572850428e-06, | |
| "loss": 0.6732, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.976039119804401, | |
| "grad_norm": 1.207684874534607, | |
| "learning_rate": 7.635244933132618e-06, | |
| "loss": 0.671, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.9779951100244498, | |
| "grad_norm": 1.3937175273895264, | |
| "learning_rate": 7.626502998051321e-06, | |
| "loss": 0.6884, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.9799511002444988, | |
| "grad_norm": 1.3250900506973267, | |
| "learning_rate": 7.6177499602143e-06, | |
| "loss": 0.6701, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.9819070904645477, | |
| "grad_norm": 1.2760993242263794, | |
| "learning_rate": 7.608985856622405e-06, | |
| "loss": 0.6703, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.9838630806845966, | |
| "grad_norm": 1.2552558183670044, | |
| "learning_rate": 7.6002107243232625e-06, | |
| "loss": 0.7043, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.9858190709046455, | |
| "grad_norm": 1.3188880681991577, | |
| "learning_rate": 7.5914246004111195e-06, | |
| "loss": 0.6893, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.9877750611246944, | |
| "grad_norm": 1.3230962753295898, | |
| "learning_rate": 7.582627522026686e-06, | |
| "loss": 0.6458, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.9897310513447433, | |
| "grad_norm": 1.212945818901062, | |
| "learning_rate": 7.573819526356979e-06, | |
| "loss": 0.684, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.9916870415647921, | |
| "grad_norm": 1.265489935874939, | |
| "learning_rate": 7.565000650635167e-06, | |
| "loss": 0.6697, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.993643031784841, | |
| "grad_norm": 1.2421671152114868, | |
| "learning_rate": 7.556170932140407e-06, | |
| "loss": 0.6875, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.9955990220048899, | |
| "grad_norm": 1.2173808813095093, | |
| "learning_rate": 7.547330408197695e-06, | |
| "loss": 0.6872, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.9975550122249389, | |
| "grad_norm": 1.2676969766616821, | |
| "learning_rate": 7.538479116177699e-06, | |
| "loss": 0.6543, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.9995110024449878, | |
| "grad_norm": 1.2565542459487915, | |
| "learning_rate": 7.529617093496609e-06, | |
| "loss": 0.7153, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.0014669926650366, | |
| "grad_norm": 1.333875298500061, | |
| "learning_rate": 7.520744377615975e-06, | |
| "loss": 0.6843, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.0034229828850856, | |
| "grad_norm": 1.2860937118530273, | |
| "learning_rate": 7.511861006042549e-06, | |
| "loss": 0.684, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.0053789731051346, | |
| "grad_norm": 1.1821390390396118, | |
| "learning_rate": 7.502967016328128e-06, | |
| "loss": 0.6788, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.0073349633251834, | |
| "grad_norm": 1.285091757774353, | |
| "learning_rate": 7.494062446069391e-06, | |
| "loss": 0.6679, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.0092909535452323, | |
| "grad_norm": 1.298203468322754, | |
| "learning_rate": 7.485147332907745e-06, | |
| "loss": 0.6622, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.0112469437652811, | |
| "grad_norm": 1.230342149734497, | |
| "learning_rate": 7.476221714529167e-06, | |
| "loss": 0.6829, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.0132029339853301, | |
| "grad_norm": 1.2091991901397705, | |
| "learning_rate": 7.467285628664036e-06, | |
| "loss": 0.6977, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.015158924205379, | |
| "grad_norm": 1.2483344078063965, | |
| "learning_rate": 7.458339113086983e-06, | |
| "loss": 0.6874, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.017114914425428, | |
| "grad_norm": 1.2287633419036865, | |
| "learning_rate": 7.4493822056167255e-06, | |
| "loss": 0.6984, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.0014669926650366, | |
| "grad_norm": 1.9640307426452637, | |
| "learning_rate": 7.440414944115909e-06, | |
| "loss": 0.5247, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.0034229828850856, | |
| "grad_norm": 1.8829586505889893, | |
| "learning_rate": 7.431437366490952e-06, | |
| "loss": 0.4741, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.0053789731051346, | |
| "grad_norm": 1.5505880117416382, | |
| "learning_rate": 7.422449510691878e-06, | |
| "loss": 0.4706, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.0073349633251834, | |
| "grad_norm": 1.6880980730056763, | |
| "learning_rate": 7.413451414712156e-06, | |
| "loss": 0.4542, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.0092909535452323, | |
| "grad_norm": 1.934731125831604, | |
| "learning_rate": 7.404443116588548e-06, | |
| "loss": 0.4819, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.0112469437652811, | |
| "grad_norm": 1.9934979677200317, | |
| "learning_rate": 7.395424654400938e-06, | |
| "loss": 0.4653, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.0132029339853301, | |
| "grad_norm": 1.781922698020935, | |
| "learning_rate": 7.386396066272177e-06, | |
| "loss": 0.4566, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.015158924205379, | |
| "grad_norm": 1.3940856456756592, | |
| "learning_rate": 7.377357390367922e-06, | |
| "loss": 0.4616, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.017114914425428, | |
| "grad_norm": 1.4322494268417358, | |
| "learning_rate": 7.368308664896471e-06, | |
| "loss": 0.4535, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.0190709046454767, | |
| "grad_norm": 1.6584018468856812, | |
| "learning_rate": 7.3592499281086e-06, | |
| "loss": 0.466, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.0210268948655257, | |
| "grad_norm": 1.39903724193573, | |
| "learning_rate": 7.350181218297417e-06, | |
| "loss": 0.4639, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.0229828850855747, | |
| "grad_norm": 1.3164935111999512, | |
| "learning_rate": 7.341102573798171e-06, | |
| "loss": 0.4354, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.0249388753056234, | |
| "grad_norm": 1.417481541633606, | |
| "learning_rate": 7.332014032988123e-06, | |
| "loss": 0.4512, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.0268948655256724, | |
| "grad_norm": 1.36899995803833, | |
| "learning_rate": 7.322915634286356e-06, | |
| "loss": 0.4386, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.0288508557457212, | |
| "grad_norm": 1.4433590173721313, | |
| "learning_rate": 7.3138074161536306e-06, | |
| "loss": 0.4639, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.0308068459657702, | |
| "grad_norm": 1.6212917566299438, | |
| "learning_rate": 7.304689417092215e-06, | |
| "loss": 0.43, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.032762836185819, | |
| "grad_norm": 1.329058051109314, | |
| "learning_rate": 7.29556167564572e-06, | |
| "loss": 0.4361, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.034718826405868, | |
| "grad_norm": 1.3723002672195435, | |
| "learning_rate": 7.286424230398946e-06, | |
| "loss": 0.4312, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.0366748166259168, | |
| "grad_norm": 1.408575177192688, | |
| "learning_rate": 7.277277119977706e-06, | |
| "loss": 0.4326, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.0386308068459658, | |
| "grad_norm": 1.2816498279571533, | |
| "learning_rate": 7.268120383048674e-06, | |
| "loss": 0.452, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.0405867970660148, | |
| "grad_norm": 1.340277910232544, | |
| "learning_rate": 7.2589540583192165e-06, | |
| "loss": 0.4617, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.0425427872860635, | |
| "grad_norm": 1.253343105316162, | |
| "learning_rate": 7.249778184537228e-06, | |
| "loss": 0.4422, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.0444987775061125, | |
| "grad_norm": 1.3268951177597046, | |
| "learning_rate": 7.240592800490972e-06, | |
| "loss": 0.4406, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.0464547677261613, | |
| "grad_norm": 1.3422157764434814, | |
| "learning_rate": 7.231397945008912e-06, | |
| "loss": 0.4434, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.0484107579462103, | |
| "grad_norm": 1.4334876537322998, | |
| "learning_rate": 7.222193656959546e-06, | |
| "loss": 0.4359, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.050366748166259, | |
| "grad_norm": 1.4176690578460693, | |
| "learning_rate": 7.212979975251252e-06, | |
| "loss": 0.4595, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.052322738386308, | |
| "grad_norm": 1.4059594869613647, | |
| "learning_rate": 7.20375693883211e-06, | |
| "loss": 0.4699, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.054278728606357, | |
| "grad_norm": 1.381970763206482, | |
| "learning_rate": 7.194524586689749e-06, | |
| "loss": 0.4439, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.0562347188264058, | |
| "grad_norm": 1.5071953535079956, | |
| "learning_rate": 7.185282957851175e-06, | |
| "loss": 0.4403, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.0581907090464548, | |
| "grad_norm": 1.374406337738037, | |
| "learning_rate": 7.176032091382611e-06, | |
| "loss": 0.4309, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.0601466992665036, | |
| "grad_norm": 1.5083845853805542, | |
| "learning_rate": 7.166772026389327e-06, | |
| "loss": 0.4333, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.0621026894865526, | |
| "grad_norm": 1.3600280284881592, | |
| "learning_rate": 7.157502802015477e-06, | |
| "loss": 0.4173, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.0640586797066014, | |
| "grad_norm": 1.3899999856948853, | |
| "learning_rate": 7.148224457443933e-06, | |
| "loss": 0.4414, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.0660146699266504, | |
| "grad_norm": 1.32930326461792, | |
| "learning_rate": 7.138937031896125e-06, | |
| "loss": 0.4489, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.0679706601466992, | |
| "grad_norm": 1.2142601013183594, | |
| "learning_rate": 7.129640564631863e-06, | |
| "loss": 0.4372, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.0699266503667482, | |
| "grad_norm": 1.2110955715179443, | |
| "learning_rate": 7.1203350949491824e-06, | |
| "loss": 0.4396, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.0718826405867972, | |
| "grad_norm": 1.2478694915771484, | |
| "learning_rate": 7.111020662184174e-06, | |
| "loss": 0.4467, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.073838630806846, | |
| "grad_norm": 1.2674793004989624, | |
| "learning_rate": 7.101697305710812e-06, | |
| "loss": 0.4595, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.075794621026895, | |
| "grad_norm": 1.280213475227356, | |
| "learning_rate": 7.092365064940801e-06, | |
| "loss": 0.4367, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.0777506112469437, | |
| "grad_norm": 1.2007982730865479, | |
| "learning_rate": 7.083023979323396e-06, | |
| "loss": 0.4328, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.0797066014669927, | |
| "grad_norm": 1.2380990982055664, | |
| "learning_rate": 7.073674088345239e-06, | |
| "loss": 0.4263, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.0816625916870415, | |
| "grad_norm": 1.1389713287353516, | |
| "learning_rate": 7.064315431530202e-06, | |
| "loss": 0.453, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.0836185819070905, | |
| "grad_norm": 1.2735354900360107, | |
| "learning_rate": 7.054948048439204e-06, | |
| "loss": 0.4342, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.0855745721271393, | |
| "grad_norm": 1.2365597486495972, | |
| "learning_rate": 7.045571978670057e-06, | |
| "loss": 0.4549, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.0875305623471883, | |
| "grad_norm": 1.2870897054672241, | |
| "learning_rate": 7.036187261857289e-06, | |
| "loss": 0.4347, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.0894865525672373, | |
| "grad_norm": 1.2662715911865234, | |
| "learning_rate": 7.026793937671984e-06, | |
| "loss": 0.4244, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.091442542787286, | |
| "grad_norm": 1.3069432973861694, | |
| "learning_rate": 7.017392045821609e-06, | |
| "loss": 0.4442, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.093398533007335, | |
| "grad_norm": 1.2944248914718628, | |
| "learning_rate": 7.007981626049851e-06, | |
| "loss": 0.4261, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.0953545232273838, | |
| "grad_norm": 1.271885633468628, | |
| "learning_rate": 6.998562718136445e-06, | |
| "loss": 0.4423, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.0973105134474328, | |
| "grad_norm": 1.2481279373168945, | |
| "learning_rate": 6.989135361897002e-06, | |
| "loss": 0.4146, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.0992665036674816, | |
| "grad_norm": 1.229149341583252, | |
| "learning_rate": 6.979699597182856e-06, | |
| "loss": 0.4413, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.1012224938875306, | |
| "grad_norm": 1.3400832414627075, | |
| "learning_rate": 6.970255463880879e-06, | |
| "loss": 0.4481, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.1031784841075796, | |
| "grad_norm": 1.1787781715393066, | |
| "learning_rate": 6.960803001913315e-06, | |
| "loss": 0.4434, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.1051344743276283, | |
| "grad_norm": 1.3244268894195557, | |
| "learning_rate": 6.9513422512376214e-06, | |
| "loss": 0.4567, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.1070904645476773, | |
| "grad_norm": 1.2756062746047974, | |
| "learning_rate": 6.9418732518462935e-06, | |
| "loss": 0.4472, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.1090464547677261, | |
| "grad_norm": 1.2949943542480469, | |
| "learning_rate": 6.932396043766694e-06, | |
| "loss": 0.4545, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.1110024449877751, | |
| "grad_norm": 1.2295341491699219, | |
| "learning_rate": 6.922910667060881e-06, | |
| "loss": 0.4519, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.112958435207824, | |
| "grad_norm": 1.3025312423706055, | |
| "learning_rate": 6.913417161825449e-06, | |
| "loss": 0.4649, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.114914425427873, | |
| "grad_norm": 1.303686261177063, | |
| "learning_rate": 6.903915568191353e-06, | |
| "loss": 0.4484, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.1168704156479217, | |
| "grad_norm": 1.2657026052474976, | |
| "learning_rate": 6.894405926323737e-06, | |
| "loss": 0.4664, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.1188264058679707, | |
| "grad_norm": 1.9421648979187012, | |
| "learning_rate": 6.884888276421766e-06, | |
| "loss": 0.4503, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.1207823960880197, | |
| "grad_norm": 1.2472649812698364, | |
| "learning_rate": 6.875362658718459e-06, | |
| "loss": 0.4636, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.1227383863080684, | |
| "grad_norm": 1.337275505065918, | |
| "learning_rate": 6.8658291134805155e-06, | |
| "loss": 0.4575, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.1246943765281174, | |
| "grad_norm": 1.438635230064392, | |
| "learning_rate": 6.856287681008145e-06, | |
| "loss": 0.45, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.1266503667481662, | |
| "grad_norm": 1.2741761207580566, | |
| "learning_rate": 6.846738401634899e-06, | |
| "loss": 0.4443, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.1286063569682152, | |
| "grad_norm": 1.4363293647766113, | |
| "learning_rate": 6.837181315727501e-06, | |
| "loss": 0.432, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.130562347188264, | |
| "grad_norm": 1.398519515991211, | |
| "learning_rate": 6.827616463685671e-06, | |
| "loss": 0.443, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.132518337408313, | |
| "grad_norm": 1.3236083984375, | |
| "learning_rate": 6.818043885941962e-06, | |
| "loss": 0.465, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.1344743276283618, | |
| "grad_norm": 1.2787197828292847, | |
| "learning_rate": 6.8084636229615786e-06, | |
| "loss": 0.4429, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.1364303178484108, | |
| "grad_norm": 1.1890900135040283, | |
| "learning_rate": 6.798875715242221e-06, | |
| "loss": 0.4437, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.1383863080684598, | |
| "grad_norm": 1.3473844528198242, | |
| "learning_rate": 6.789280203313899e-06, | |
| "loss": 0.4475, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.1403422982885085, | |
| "grad_norm": 1.2148065567016602, | |
| "learning_rate": 6.7796771277387705e-06, | |
| "loss": 0.4421, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.1422982885085575, | |
| "grad_norm": 1.3973183631896973, | |
| "learning_rate": 6.770066529110964e-06, | |
| "loss": 0.4344, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.1442542787286063, | |
| "grad_norm": 1.1743921041488647, | |
| "learning_rate": 6.760448448056407e-06, | |
| "loss": 0.4249, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.1462102689486553, | |
| "grad_norm": 1.3278478384017944, | |
| "learning_rate": 6.750822925232664e-06, | |
| "loss": 0.4542, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 1.148166259168704, | |
| "grad_norm": 1.2853714227676392, | |
| "learning_rate": 6.741190001328751e-06, | |
| "loss": 0.4457, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.150122249388753, | |
| "grad_norm": 1.217772126197815, | |
| "learning_rate": 6.731549717064975e-06, | |
| "loss": 0.4587, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 1.152078239608802, | |
| "grad_norm": 1.3278999328613281, | |
| "learning_rate": 6.721902113192752e-06, | |
| "loss": 0.4607, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.1540342298288508, | |
| "grad_norm": 1.2875677347183228, | |
| "learning_rate": 6.71224723049444e-06, | |
| "loss": 0.4445, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 1.1559902200488998, | |
| "grad_norm": 1.2930001020431519, | |
| "learning_rate": 6.702585109783169e-06, | |
| "loss": 0.4308, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.1579462102689486, | |
| "grad_norm": 1.2925857305526733, | |
| "learning_rate": 6.6929157919026645e-06, | |
| "loss": 0.4179, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 1.1599022004889976, | |
| "grad_norm": 1.2512215375900269, | |
| "learning_rate": 6.683239317727075e-06, | |
| "loss": 0.428, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.1618581907090464, | |
| "grad_norm": 1.1987147331237793, | |
| "learning_rate": 6.6735557281608e-06, | |
| "loss": 0.4581, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 1.1638141809290954, | |
| "grad_norm": 1.2744587659835815, | |
| "learning_rate": 6.663865064138316e-06, | |
| "loss": 0.4489, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.1657701711491442, | |
| "grad_norm": 1.248136043548584, | |
| "learning_rate": 6.654167366624009e-06, | |
| "loss": 0.4415, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.1677261613691932, | |
| "grad_norm": 1.239186406135559, | |
| "learning_rate": 6.644462676611993e-06, | |
| "loss": 0.4552, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.169682151589242, | |
| "grad_norm": 1.2165157794952393, | |
| "learning_rate": 6.634751035125943e-06, | |
| "loss": 0.4383, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 1.171638141809291, | |
| "grad_norm": 1.3180150985717773, | |
| "learning_rate": 6.625032483218917e-06, | |
| "loss": 0.4427, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.17359413202934, | |
| "grad_norm": 1.3215945959091187, | |
| "learning_rate": 6.615307061973185e-06, | |
| "loss": 0.4434, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 1.1755501222493887, | |
| "grad_norm": 1.2843338251113892, | |
| "learning_rate": 6.605574812500057e-06, | |
| "loss": 0.4563, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.1775061124694377, | |
| "grad_norm": 1.2404955625534058, | |
| "learning_rate": 6.595835775939709e-06, | |
| "loss": 0.4446, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 1.1794621026894865, | |
| "grad_norm": 1.203095555305481, | |
| "learning_rate": 6.586089993461e-06, | |
| "loss": 0.4288, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.1814180929095355, | |
| "grad_norm": 1.2812986373901367, | |
| "learning_rate": 6.576337506261314e-06, | |
| "loss": 0.4431, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 1.1833740831295843, | |
| "grad_norm": 1.2452881336212158, | |
| "learning_rate": 6.566578355566371e-06, | |
| "loss": 0.4602, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.1853300733496333, | |
| "grad_norm": 1.2354931831359863, | |
| "learning_rate": 6.55681258263006e-06, | |
| "loss": 0.4319, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.1872860635696822, | |
| "grad_norm": 1.230417251586914, | |
| "learning_rate": 6.547040228734268e-06, | |
| "loss": 0.4493, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.189242053789731, | |
| "grad_norm": 1.218149185180664, | |
| "learning_rate": 6.537261335188696e-06, | |
| "loss": 0.4361, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 1.19119804400978, | |
| "grad_norm": 1.2415145635604858, | |
| "learning_rate": 6.527475943330691e-06, | |
| "loss": 0.4396, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.1931540342298288, | |
| "grad_norm": 1.282718300819397, | |
| "learning_rate": 6.517684094525071e-06, | |
| "loss": 0.4397, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 1.1951100244498778, | |
| "grad_norm": 1.1451632976531982, | |
| "learning_rate": 6.507885830163946e-06, | |
| "loss": 0.4564, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.1970660146699266, | |
| "grad_norm": 1.249971628189087, | |
| "learning_rate": 6.498081191666549e-06, | |
| "loss": 0.4565, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 1.1990220048899756, | |
| "grad_norm": 1.2496131658554077, | |
| "learning_rate": 6.488270220479055e-06, | |
| "loss": 0.4351, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.2009779951100246, | |
| "grad_norm": 1.2109962701797485, | |
| "learning_rate": 6.478452958074411e-06, | |
| "loss": 0.4459, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 1.2029339853300733, | |
| "grad_norm": 1.2337491512298584, | |
| "learning_rate": 6.468629445952156e-06, | |
| "loss": 0.4373, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.2048899755501223, | |
| "grad_norm": 1.3091925382614136, | |
| "learning_rate": 6.458799725638249e-06, | |
| "loss": 0.4352, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.2068459657701711, | |
| "grad_norm": 1.2573388814926147, | |
| "learning_rate": 6.448963838684893e-06, | |
| "loss": 0.4419, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.2088019559902201, | |
| "grad_norm": 1.2169891595840454, | |
| "learning_rate": 6.439121826670357e-06, | |
| "loss": 0.4326, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 1.2107579462102689, | |
| "grad_norm": 1.1968295574188232, | |
| "learning_rate": 6.429273731198803e-06, | |
| "loss": 0.4814, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.2127139364303179, | |
| "grad_norm": 1.1939140558242798, | |
| "learning_rate": 6.419419593900109e-06, | |
| "loss": 0.4449, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 1.2146699266503667, | |
| "grad_norm": 1.280173420906067, | |
| "learning_rate": 6.40955945642969e-06, | |
| "loss": 0.4637, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.2166259168704157, | |
| "grad_norm": 1.2060580253601074, | |
| "learning_rate": 6.399693360468332e-06, | |
| "loss": 0.4399, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 1.2185819070904644, | |
| "grad_norm": 1.2972716093063354, | |
| "learning_rate": 6.3898213477220005e-06, | |
| "loss": 0.4381, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.2205378973105134, | |
| "grad_norm": 1.238663911819458, | |
| "learning_rate": 6.379943459921677e-06, | |
| "loss": 0.4497, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 1.2224938875305624, | |
| "grad_norm": 1.3543317317962646, | |
| "learning_rate": 6.37005973882318e-06, | |
| "loss": 0.4278, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.2244498777506112, | |
| "grad_norm": 1.2317306995391846, | |
| "learning_rate": 6.360170226206981e-06, | |
| "loss": 0.4512, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.2264058679706602, | |
| "grad_norm": 1.3280153274536133, | |
| "learning_rate": 6.350274963878035e-06, | |
| "loss": 0.4476, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.228361858190709, | |
| "grad_norm": 1.1947249174118042, | |
| "learning_rate": 6.340373993665607e-06, | |
| "loss": 0.4531, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 1.230317848410758, | |
| "grad_norm": 1.2678272724151611, | |
| "learning_rate": 6.330467357423084e-06, | |
| "loss": 0.4426, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.2322738386308068, | |
| "grad_norm": 1.2685966491699219, | |
| "learning_rate": 6.32055509702781e-06, | |
| "loss": 0.4241, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 1.2342298288508557, | |
| "grad_norm": 1.3568520545959473, | |
| "learning_rate": 6.310637254380898e-06, | |
| "loss": 0.4347, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.2361858190709047, | |
| "grad_norm": 1.3837932348251343, | |
| "learning_rate": 6.300713871407062e-06, | |
| "loss": 0.4571, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 1.2381418092909535, | |
| "grad_norm": 1.2901196479797363, | |
| "learning_rate": 6.2907849900544345e-06, | |
| "loss": 0.4476, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.2400977995110025, | |
| "grad_norm": 1.3630965948104858, | |
| "learning_rate": 6.280850652294391e-06, | |
| "loss": 0.4545, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 1.2420537897310513, | |
| "grad_norm": 1.279327392578125, | |
| "learning_rate": 6.2709109001213744e-06, | |
| "loss": 0.4747, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.2440097799511003, | |
| "grad_norm": 1.284744381904602, | |
| "learning_rate": 6.2609657755527135e-06, | |
| "loss": 0.4349, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.245965770171149, | |
| "grad_norm": 1.3701122999191284, | |
| "learning_rate": 6.251015320628443e-06, | |
| "loss": 0.4491, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.247921760391198, | |
| "grad_norm": 1.430738091468811, | |
| "learning_rate": 6.24105957741114e-06, | |
| "loss": 0.4472, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 1.249877750611247, | |
| "grad_norm": 1.243706226348877, | |
| "learning_rate": 6.231098587985727e-06, | |
| "loss": 0.4539, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.2518337408312958, | |
| "grad_norm": 1.4228640794754028, | |
| "learning_rate": 6.22113239445931e-06, | |
| "loss": 0.4669, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 1.2537897310513446, | |
| "grad_norm": 1.194962501525879, | |
| "learning_rate": 6.211161038960989e-06, | |
| "loss": 0.4454, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.2557457212713936, | |
| "grad_norm": 1.2327982187271118, | |
| "learning_rate": 6.201184563641687e-06, | |
| "loss": 0.4355, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 1.2577017114914426, | |
| "grad_norm": 1.328300952911377, | |
| "learning_rate": 6.191203010673969e-06, | |
| "loss": 0.4593, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.2596577017114914, | |
| "grad_norm": 1.146906852722168, | |
| "learning_rate": 6.1812164222518626e-06, | |
| "loss": 0.4655, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 1.2616136919315404, | |
| "grad_norm": 1.2471123933792114, | |
| "learning_rate": 6.171224840590684e-06, | |
| "loss": 0.4396, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.2635696821515894, | |
| "grad_norm": 1.2813622951507568, | |
| "learning_rate": 6.161228307926859e-06, | |
| "loss": 0.4462, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.2655256723716382, | |
| "grad_norm": 1.3747153282165527, | |
| "learning_rate": 6.151226866517734e-06, | |
| "loss": 0.4441, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.267481662591687, | |
| "grad_norm": 1.335888385772705, | |
| "learning_rate": 6.141220558641416e-06, | |
| "loss": 0.4454, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 1.269437652811736, | |
| "grad_norm": 1.3105989694595337, | |
| "learning_rate": 6.131209426596571e-06, | |
| "loss": 0.4362, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.271393643031785, | |
| "grad_norm": 1.3146666288375854, | |
| "learning_rate": 6.12119351270227e-06, | |
| "loss": 0.4589, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 1.2733496332518337, | |
| "grad_norm": 1.1910253763198853, | |
| "learning_rate": 6.111172859297794e-06, | |
| "loss": 0.4445, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.2753056234718827, | |
| "grad_norm": 1.3139874935150146, | |
| "learning_rate": 6.101147508742456e-06, | |
| "loss": 0.4617, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 1.2772616136919315, | |
| "grad_norm": 1.2148922681808472, | |
| "learning_rate": 6.0911175034154236e-06, | |
| "loss": 0.4349, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.2792176039119805, | |
| "grad_norm": 1.3743414878845215, | |
| "learning_rate": 6.081082885715547e-06, | |
| "loss": 0.4399, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 1.2811735941320292, | |
| "grad_norm": 1.2060234546661377, | |
| "learning_rate": 6.07104369806117e-06, | |
| "loss": 0.422, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.2831295843520782, | |
| "grad_norm": 1.2871215343475342, | |
| "learning_rate": 6.060999982889955e-06, | |
| "loss": 0.4484, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.2850855745721272, | |
| "grad_norm": 1.739700436592102, | |
| "learning_rate": 6.050951782658705e-06, | |
| "loss": 0.4488, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.287041564792176, | |
| "grad_norm": 1.2554281949996948, | |
| "learning_rate": 6.040899139843177e-06, | |
| "loss": 0.4337, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 1.288997555012225, | |
| "grad_norm": 1.2270786762237549, | |
| "learning_rate": 6.030842096937916e-06, | |
| "loss": 0.4533, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.2909535452322738, | |
| "grad_norm": 1.1629031896591187, | |
| "learning_rate": 6.020780696456059e-06, | |
| "loss": 0.461, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 1.2929095354523228, | |
| "grad_norm": 1.3253370523452759, | |
| "learning_rate": 6.010714980929168e-06, | |
| "loss": 0.4264, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.2948655256723716, | |
| "grad_norm": 1.1790844202041626, | |
| "learning_rate": 6.000644992907044e-06, | |
| "loss": 0.4556, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 1.2968215158924206, | |
| "grad_norm": 1.265358328819275, | |
| "learning_rate": 5.990570774957548e-06, | |
| "loss": 0.4357, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.2987775061124696, | |
| "grad_norm": 1.7498961687088013, | |
| "learning_rate": 5.9804923696664255e-06, | |
| "loss": 0.4429, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 1.3007334963325183, | |
| "grad_norm": 1.264091968536377, | |
| "learning_rate": 5.970409819637116e-06, | |
| "loss": 0.448, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.302689486552567, | |
| "grad_norm": 1.2229095697402954, | |
| "learning_rate": 5.960323167490588e-06, | |
| "loss": 0.4601, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.304645476772616, | |
| "grad_norm": 1.2520877122879028, | |
| "learning_rate": 5.950232455865142e-06, | |
| "loss": 0.4421, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.306601466992665, | |
| "grad_norm": 1.170745849609375, | |
| "learning_rate": 5.940137727416247e-06, | |
| "loss": 0.4475, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 1.3085574572127139, | |
| "grad_norm": 1.2222089767456055, | |
| "learning_rate": 5.930039024816344e-06, | |
| "loss": 0.4657, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.3105134474327629, | |
| "grad_norm": 1.185941457748413, | |
| "learning_rate": 5.919936390754679e-06, | |
| "loss": 0.4552, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 1.3124694376528117, | |
| "grad_norm": 1.2470475435256958, | |
| "learning_rate": 5.9098298679371155e-06, | |
| "loss": 0.4536, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.3144254278728607, | |
| "grad_norm": 1.2581121921539307, | |
| "learning_rate": 5.8997194990859545e-06, | |
| "loss": 0.4358, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 1.3163814180929094, | |
| "grad_norm": 1.1885335445404053, | |
| "learning_rate": 5.889605326939757e-06, | |
| "loss": 0.4403, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.3183374083129584, | |
| "grad_norm": 1.1669498682022095, | |
| "learning_rate": 5.87948739425316e-06, | |
| "loss": 0.4673, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 1.3202933985330074, | |
| "grad_norm": 1.2828259468078613, | |
| "learning_rate": 5.8693657437966955e-06, | |
| "loss": 0.4404, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.3222493887530562, | |
| "grad_norm": 1.2900339365005493, | |
| "learning_rate": 5.859240418356614e-06, | |
| "loss": 0.4643, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.3242053789731052, | |
| "grad_norm": 1.2165307998657227, | |
| "learning_rate": 5.849111460734702e-06, | |
| "loss": 0.4378, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.326161369193154, | |
| "grad_norm": 1.3515350818634033, | |
| "learning_rate": 5.838978913748096e-06, | |
| "loss": 0.4428, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 1.328117359413203, | |
| "grad_norm": 1.233166217803955, | |
| "learning_rate": 5.828842820229106e-06, | |
| "loss": 0.4541, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.3300733496332517, | |
| "grad_norm": 1.1359091997146606, | |
| "learning_rate": 5.818703223025036e-06, | |
| "loss": 0.4565, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 1.3320293398533007, | |
| "grad_norm": 1.3372867107391357, | |
| "learning_rate": 5.808560164998002e-06, | |
| "loss": 0.4324, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.3339853300733497, | |
| "grad_norm": 1.1790531873703003, | |
| "learning_rate": 5.7984136890247455e-06, | |
| "loss": 0.4347, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 1.3359413202933985, | |
| "grad_norm": 1.2366808652877808, | |
| "learning_rate": 5.78826383799646e-06, | |
| "loss": 0.4502, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.3378973105134475, | |
| "grad_norm": 1.2719463109970093, | |
| "learning_rate": 5.778110654818602e-06, | |
| "loss": 0.4473, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 1.3398533007334963, | |
| "grad_norm": 1.146131157875061, | |
| "learning_rate": 5.767954182410717e-06, | |
| "loss": 0.4467, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.3418092909535453, | |
| "grad_norm": 1.1366889476776123, | |
| "learning_rate": 5.7577944637062545e-06, | |
| "loss": 0.4312, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.343765281173594, | |
| "grad_norm": 1.2757666110992432, | |
| "learning_rate": 5.747631541652388e-06, | |
| "loss": 0.4372, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.345721271393643, | |
| "grad_norm": 1.239727258682251, | |
| "learning_rate": 5.737465459209825e-06, | |
| "loss": 0.4536, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 1.347677261613692, | |
| "grad_norm": 1.815891146659851, | |
| "learning_rate": 5.727296259352645e-06, | |
| "loss": 0.4632, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.3496332518337408, | |
| "grad_norm": 1.2289308309555054, | |
| "learning_rate": 5.717123985068094e-06, | |
| "loss": 0.457, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 1.3515892420537896, | |
| "grad_norm": 1.3102158308029175, | |
| "learning_rate": 5.706948679356417e-06, | |
| "loss": 0.4296, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.3535452322738386, | |
| "grad_norm": 1.2064359188079834, | |
| "learning_rate": 5.696770385230679e-06, | |
| "loss": 0.4511, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 1.3555012224938876, | |
| "grad_norm": 1.2940292358398438, | |
| "learning_rate": 5.68658914571657e-06, | |
| "loss": 0.4729, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.3574572127139364, | |
| "grad_norm": 1.1980189085006714, | |
| "learning_rate": 5.676405003852238e-06, | |
| "loss": 0.429, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 1.3594132029339854, | |
| "grad_norm": 1.2084240913391113, | |
| "learning_rate": 5.666218002688094e-06, | |
| "loss": 0.4438, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.3613691931540342, | |
| "grad_norm": 1.3325614929199219, | |
| "learning_rate": 5.656028185286638e-06, | |
| "loss": 0.4286, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.3633251833740831, | |
| "grad_norm": 1.2942312955856323, | |
| "learning_rate": 5.645835594722276e-06, | |
| "loss": 0.4467, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.365281173594132, | |
| "grad_norm": 1.263159990310669, | |
| "learning_rate": 5.635640274081135e-06, | |
| "loss": 0.4596, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 1.367237163814181, | |
| "grad_norm": 1.2454265356063843, | |
| "learning_rate": 5.625442266460882e-06, | |
| "loss": 0.4175, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.36919315403423, | |
| "grad_norm": 1.3009308576583862, | |
| "learning_rate": 5.615241614970546e-06, | |
| "loss": 0.4525, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 1.3711491442542787, | |
| "grad_norm": 1.222086787223816, | |
| "learning_rate": 5.605038362730326e-06, | |
| "loss": 0.45, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.3731051344743277, | |
| "grad_norm": 1.1456713676452637, | |
| "learning_rate": 5.594832552871423e-06, | |
| "loss": 0.4653, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 1.3750611246943765, | |
| "grad_norm": 1.2631255388259888, | |
| "learning_rate": 5.5846242285358424e-06, | |
| "loss": 0.4392, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.3770171149144255, | |
| "grad_norm": 1.1547008752822876, | |
| "learning_rate": 5.5744134328762225e-06, | |
| "loss": 0.4483, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 1.3789731051344742, | |
| "grad_norm": 1.2225359678268433, | |
| "learning_rate": 5.564200209055647e-06, | |
| "loss": 0.4491, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.3809290953545232, | |
| "grad_norm": 1.2074551582336426, | |
| "learning_rate": 5.553984600247464e-06, | |
| "loss": 0.4555, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.3828850855745722, | |
| "grad_norm": 1.287567138671875, | |
| "learning_rate": 5.543766649635104e-06, | |
| "loss": 0.4539, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.384841075794621, | |
| "grad_norm": 1.195177674293518, | |
| "learning_rate": 5.533546400411899e-06, | |
| "loss": 0.4323, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 1.3867970660146698, | |
| "grad_norm": 1.2475037574768066, | |
| "learning_rate": 5.523323895780891e-06, | |
| "loss": 0.4428, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.3887530562347188, | |
| "grad_norm": 1.2410409450531006, | |
| "learning_rate": 5.513099178954664e-06, | |
| "loss": 0.431, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 1.3907090464547678, | |
| "grad_norm": 1.2587379217147827, | |
| "learning_rate": 5.502872293155148e-06, | |
| "loss": 0.4299, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.3926650366748166, | |
| "grad_norm": 1.306764006614685, | |
| "learning_rate": 5.492643281613444e-06, | |
| "loss": 0.4626, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 1.3946210268948656, | |
| "grad_norm": 1.2155743837356567, | |
| "learning_rate": 5.482412187569638e-06, | |
| "loss": 0.4547, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.3965770171149146, | |
| "grad_norm": 1.238143801689148, | |
| "learning_rate": 5.472179054272618e-06, | |
| "loss": 0.4338, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 1.3985330073349633, | |
| "grad_norm": 1.276924729347229, | |
| "learning_rate": 5.4619439249798975e-06, | |
| "loss": 0.4446, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.400488997555012, | |
| "grad_norm": 1.1909351348876953, | |
| "learning_rate": 5.4517068429574215e-06, | |
| "loss": 0.461, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.402444987775061, | |
| "grad_norm": 1.248591661453247, | |
| "learning_rate": 5.441467851479391e-06, | |
| "loss": 0.4404, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.40440097799511, | |
| "grad_norm": 1.197350025177002, | |
| "learning_rate": 5.431226993828081e-06, | |
| "loss": 0.4338, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 1.4063569682151589, | |
| "grad_norm": 1.2777246236801147, | |
| "learning_rate": 5.420984313293653e-06, | |
| "loss": 0.4535, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.4083129584352079, | |
| "grad_norm": 1.214824914932251, | |
| "learning_rate": 5.4107398531739765e-06, | |
| "loss": 0.4535, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 1.4102689486552566, | |
| "grad_norm": 1.1792421340942383, | |
| "learning_rate": 5.400493656774441e-06, | |
| "loss": 0.4493, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.4122249388753056, | |
| "grad_norm": 1.18449068069458, | |
| "learning_rate": 5.3902457674077746e-06, | |
| "loss": 0.4536, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 1.4141809290953544, | |
| "grad_norm": 1.2407759428024292, | |
| "learning_rate": 5.379996228393868e-06, | |
| "loss": 0.454, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.4161369193154034, | |
| "grad_norm": 1.1889723539352417, | |
| "learning_rate": 5.3697450830595775e-06, | |
| "loss": 0.4459, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 1.4180929095354524, | |
| "grad_norm": 1.1876051425933838, | |
| "learning_rate": 5.359492374738557e-06, | |
| "loss": 0.4635, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.4200488997555012, | |
| "grad_norm": 1.1337804794311523, | |
| "learning_rate": 5.349238146771062e-06, | |
| "loss": 0.4414, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.4220048899755502, | |
| "grad_norm": 1.1479378938674927, | |
| "learning_rate": 5.3389824425037725e-06, | |
| "loss": 0.4517, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.423960880195599, | |
| "grad_norm": 1.1429476737976074, | |
| "learning_rate": 5.3287253052896125e-06, | |
| "loss": 0.453, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 1.425916870415648, | |
| "grad_norm": 1.1652535200119019, | |
| "learning_rate": 5.31846677848756e-06, | |
| "loss": 0.4603, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.4278728606356967, | |
| "grad_norm": 1.2222468852996826, | |
| "learning_rate": 5.308206905462468e-06, | |
| "loss": 0.4297, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 1.4298288508557457, | |
| "grad_norm": 1.1538606882095337, | |
| "learning_rate": 5.297945729584884e-06, | |
| "loss": 0.4306, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.4317848410757947, | |
| "grad_norm": 1.150429606437683, | |
| "learning_rate": 5.287683294230855e-06, | |
| "loss": 0.4585, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 1.4337408312958435, | |
| "grad_norm": 1.1533079147338867, | |
| "learning_rate": 5.277419642781759e-06, | |
| "loss": 0.4561, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.4356968215158923, | |
| "grad_norm": 1.2049838304519653, | |
| "learning_rate": 5.26715481862411e-06, | |
| "loss": 0.4361, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 1.4376528117359413, | |
| "grad_norm": 1.1533024311065674, | |
| "learning_rate": 5.256888865149383e-06, | |
| "loss": 0.438, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.4396088019559903, | |
| "grad_norm": 1.23749577999115, | |
| "learning_rate": 5.246621825753827e-06, | |
| "loss": 0.4632, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.441564792176039, | |
| "grad_norm": 1.206140160560608, | |
| "learning_rate": 5.236353743838277e-06, | |
| "loss": 0.4426, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.443520782396088, | |
| "grad_norm": 1.162009835243225, | |
| "learning_rate": 5.226084662807978e-06, | |
| "loss": 0.4433, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 1.445476772616137, | |
| "grad_norm": 1.247118353843689, | |
| "learning_rate": 5.2158146260724006e-06, | |
| "loss": 0.4787, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.4474327628361858, | |
| "grad_norm": 1.2741855382919312, | |
| "learning_rate": 5.20554367704505e-06, | |
| "loss": 0.4552, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 1.4493887530562346, | |
| "grad_norm": 1.235175609588623, | |
| "learning_rate": 5.1952718591432914e-06, | |
| "loss": 0.4606, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.4513447432762836, | |
| "grad_norm": 1.1611201763153076, | |
| "learning_rate": 5.184999215788164e-06, | |
| "loss": 0.4549, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 1.4533007334963326, | |
| "grad_norm": 1.2934192419052124, | |
| "learning_rate": 5.17472579040419e-06, | |
| "loss": 0.4549, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.4552567237163814, | |
| "grad_norm": 1.1462373733520508, | |
| "learning_rate": 5.1644516264192075e-06, | |
| "loss": 0.4494, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 1.4572127139364304, | |
| "grad_norm": 1.1914169788360596, | |
| "learning_rate": 5.154176767264168e-06, | |
| "loss": 0.4581, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.4591687041564791, | |
| "grad_norm": 1.1715911626815796, | |
| "learning_rate": 5.1439012563729676e-06, | |
| "loss": 0.4401, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.4611246943765281, | |
| "grad_norm": 1.205413579940796, | |
| "learning_rate": 5.133625137182252e-06, | |
| "loss": 0.4524, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.463080684596577, | |
| "grad_norm": 1.3116756677627563, | |
| "learning_rate": 5.123348453131242e-06, | |
| "loss": 0.4542, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 1.465036674816626, | |
| "grad_norm": 1.2234801054000854, | |
| "learning_rate": 5.113071247661545e-06, | |
| "loss": 0.4622, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.466992665036675, | |
| "grad_norm": 1.1696038246154785, | |
| "learning_rate": 5.102793564216972e-06, | |
| "loss": 0.4622, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 1.4689486552567237, | |
| "grad_norm": 1.1506364345550537, | |
| "learning_rate": 5.092515446243359e-06, | |
| "loss": 0.4483, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.4709046454767727, | |
| "grad_norm": 1.2517993450164795, | |
| "learning_rate": 5.08223693718837e-06, | |
| "loss": 0.4265, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 1.4728606356968215, | |
| "grad_norm": 1.1876658201217651, | |
| "learning_rate": 5.071958080501331e-06, | |
| "loss": 0.4341, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.4748166259168705, | |
| "grad_norm": 1.1794910430908203, | |
| "learning_rate": 5.061678919633033e-06, | |
| "loss": 0.4442, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 1.4767726161369192, | |
| "grad_norm": 1.1299569606781006, | |
| "learning_rate": 5.051399498035552e-06, | |
| "loss": 0.4679, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.4787286063569682, | |
| "grad_norm": 1.1907482147216797, | |
| "learning_rate": 5.041119859162068e-06, | |
| "loss": 0.4547, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.4806845965770172, | |
| "grad_norm": 1.182114839553833, | |
| "learning_rate": 5.030840046466682e-06, | |
| "loss": 0.4497, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.482640586797066, | |
| "grad_norm": 1.119644045829773, | |
| "learning_rate": 5.020560103404225e-06, | |
| "loss": 0.4554, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 1.4845965770171148, | |
| "grad_norm": 1.1454432010650635, | |
| "learning_rate": 5.01028007343008e-06, | |
| "loss": 0.4172, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.4865525672371638, | |
| "grad_norm": 1.2797613143920898, | |
| "learning_rate": 5e-06, | |
| "loss": 0.449, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 1.4885085574572128, | |
| "grad_norm": 1.1603306531906128, | |
| "learning_rate": 4.9897199265699205e-06, | |
| "loss": 0.4649, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.4904645476772616, | |
| "grad_norm": 1.2365909814834595, | |
| "learning_rate": 4.979439896595777e-06, | |
| "loss": 0.4654, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 1.4924205378973106, | |
| "grad_norm": 1.1844136714935303, | |
| "learning_rate": 4.9691599535333196e-06, | |
| "loss": 0.4438, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.4943765281173595, | |
| "grad_norm": 1.1799577474594116, | |
| "learning_rate": 4.958880140837934e-06, | |
| "loss": 0.4445, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 1.4963325183374083, | |
| "grad_norm": 1.2819160223007202, | |
| "learning_rate": 4.94860050196445e-06, | |
| "loss": 0.4355, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.498288508557457, | |
| "grad_norm": 1.1956461668014526, | |
| "learning_rate": 4.938321080366969e-06, | |
| "loss": 0.4289, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.500244498777506, | |
| "grad_norm": 1.2148059606552124, | |
| "learning_rate": 4.928041919498669e-06, | |
| "loss": 0.4284, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.502200488997555, | |
| "grad_norm": 1.1130567789077759, | |
| "learning_rate": 4.917763062811631e-06, | |
| "loss": 0.445, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 1.5041564792176039, | |
| "grad_norm": 1.1467176675796509, | |
| "learning_rate": 4.907484553756644e-06, | |
| "loss": 0.4366, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.5061124694376526, | |
| "grad_norm": 1.1647167205810547, | |
| "learning_rate": 4.897206435783029e-06, | |
| "loss": 0.4455, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.5080684596577019, | |
| "grad_norm": 1.1800556182861328, | |
| "learning_rate": 4.8869287523384564e-06, | |
| "loss": 0.4619, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.5100244498777506, | |
| "grad_norm": 1.2697025537490845, | |
| "learning_rate": 4.876651546868759e-06, | |
| "loss": 0.4578, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 1.5119804400977994, | |
| "grad_norm": 1.1891041994094849, | |
| "learning_rate": 4.86637486281775e-06, | |
| "loss": 0.4418, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.5139364303178484, | |
| "grad_norm": 1.183430552482605, | |
| "learning_rate": 4.856098743627035e-06, | |
| "loss": 0.4305, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 1.5158924205378974, | |
| "grad_norm": 1.1615206003189087, | |
| "learning_rate": 4.845823232735833e-06, | |
| "loss": 0.4285, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.5178484107579462, | |
| "grad_norm": 1.1649718284606934, | |
| "learning_rate": 4.835548373580793e-06, | |
| "loss": 0.4363, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.519804400977995, | |
| "grad_norm": 1.166445255279541, | |
| "learning_rate": 4.82527420959581e-06, | |
| "loss": 0.4441, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.521760391198044, | |
| "grad_norm": 1.2622777223587036, | |
| "learning_rate": 4.815000784211839e-06, | |
| "loss": 0.4278, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 1.523716381418093, | |
| "grad_norm": 1.223970890045166, | |
| "learning_rate": 4.80472814085671e-06, | |
| "loss": 0.4214, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.5256723716381417, | |
| "grad_norm": 1.2236289978027344, | |
| "learning_rate": 4.794456322954953e-06, | |
| "loss": 0.4461, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 1.5276283618581907, | |
| "grad_norm": 1.2569670677185059, | |
| "learning_rate": 4.784185373927601e-06, | |
| "loss": 0.4604, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.5295843520782397, | |
| "grad_norm": 1.1970974206924438, | |
| "learning_rate": 4.773915337192022e-06, | |
| "loss": 0.4328, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 1.5315403422982885, | |
| "grad_norm": 1.2718005180358887, | |
| "learning_rate": 4.763646256161724e-06, | |
| "loss": 0.4356, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.5334963325183373, | |
| "grad_norm": 1.260063648223877, | |
| "learning_rate": 4.753378174246174e-06, | |
| "loss": 0.4635, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 1.5354523227383863, | |
| "grad_norm": 1.3000822067260742, | |
| "learning_rate": 4.743111134850618e-06, | |
| "loss": 0.4376, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.5374083129584353, | |
| "grad_norm": 1.159167766571045, | |
| "learning_rate": 4.7328451813758905e-06, | |
| "loss": 0.4489, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.539364303178484, | |
| "grad_norm": 1.2092034816741943, | |
| "learning_rate": 4.722580357218242e-06, | |
| "loss": 0.4416, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.541320293398533, | |
| "grad_norm": 1.2961796522140503, | |
| "learning_rate": 4.7123167057691446e-06, | |
| "loss": 0.4428, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 1.543276283618582, | |
| "grad_norm": 1.1921921968460083, | |
| "learning_rate": 4.702054270415118e-06, | |
| "loss": 0.4429, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.5452322738386308, | |
| "grad_norm": 1.2297002077102661, | |
| "learning_rate": 4.6917930945375325e-06, | |
| "loss": 0.4415, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 1.5471882640586796, | |
| "grad_norm": 1.1777838468551636, | |
| "learning_rate": 4.681533221512441e-06, | |
| "loss": 0.457, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.5491442542787286, | |
| "grad_norm": 1.2187058925628662, | |
| "learning_rate": 4.671274694710388e-06, | |
| "loss": 0.4543, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 1.5511002444987776, | |
| "grad_norm": 1.138017177581787, | |
| "learning_rate": 4.661017557496228e-06, | |
| "loss": 0.4923, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.5530562347188264, | |
| "grad_norm": 1.2310172319412231, | |
| "learning_rate": 4.65076185322894e-06, | |
| "loss": 0.4381, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 1.5550122249388751, | |
| "grad_norm": 1.2169889211654663, | |
| "learning_rate": 4.640507625261446e-06, | |
| "loss": 0.4541, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.5569682151589244, | |
| "grad_norm": 1.1405876874923706, | |
| "learning_rate": 4.630254916940424e-06, | |
| "loss": 0.4409, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.5589242053789731, | |
| "grad_norm": 1.1320923566818237, | |
| "learning_rate": 4.6200037716061334e-06, | |
| "loss": 0.4141, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.560880195599022, | |
| "grad_norm": 1.2325752973556519, | |
| "learning_rate": 4.609754232592225e-06, | |
| "loss": 0.4283, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 1.562836185819071, | |
| "grad_norm": 1.3088810443878174, | |
| "learning_rate": 4.599506343225562e-06, | |
| "loss": 0.4247, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.56479217603912, | |
| "grad_norm": 1.162494421005249, | |
| "learning_rate": 4.589260146826025e-06, | |
| "loss": 0.4396, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 1.5667481662591687, | |
| "grad_norm": 1.2321451902389526, | |
| "learning_rate": 4.579015686706348e-06, | |
| "loss": 0.4313, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.5687041564792175, | |
| "grad_norm": 1.2772425413131714, | |
| "learning_rate": 4.56877300617192e-06, | |
| "loss": 0.4392, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 1.5706601466992665, | |
| "grad_norm": 1.1612458229064941, | |
| "learning_rate": 4.55853214852061e-06, | |
| "loss": 0.4758, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.5726161369193155, | |
| "grad_norm": 1.1374866962432861, | |
| "learning_rate": 4.548293157042581e-06, | |
| "loss": 0.4502, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 1.5745721271393642, | |
| "grad_norm": 1.2353960275650024, | |
| "learning_rate": 4.538056075020104e-06, | |
| "loss": 0.4193, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.5765281173594132, | |
| "grad_norm": 1.155655026435852, | |
| "learning_rate": 4.527820945727383e-06, | |
| "loss": 0.4514, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.5784841075794622, | |
| "grad_norm": 1.2046425342559814, | |
| "learning_rate": 4.517587812430364e-06, | |
| "loss": 0.4365, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.580440097799511, | |
| "grad_norm": 1.2539176940917969, | |
| "learning_rate": 4.507356718386557e-06, | |
| "loss": 0.4466, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 1.5823960880195598, | |
| "grad_norm": 1.1303858757019043, | |
| "learning_rate": 4.497127706844852e-06, | |
| "loss": 0.4547, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.5843520782396088, | |
| "grad_norm": 1.285823106765747, | |
| "learning_rate": 4.486900821045337e-06, | |
| "loss": 0.4502, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 1.5863080684596578, | |
| "grad_norm": 1.2383862733840942, | |
| "learning_rate": 4.47667610421911e-06, | |
| "loss": 0.4685, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.5882640586797065, | |
| "grad_norm": 1.2859257459640503, | |
| "learning_rate": 4.466453599588103e-06, | |
| "loss": 0.4407, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 1.5902200488997555, | |
| "grad_norm": 1.242066502571106, | |
| "learning_rate": 4.4562333503648966e-06, | |
| "loss": 0.4465, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.5921760391198045, | |
| "grad_norm": 1.121509313583374, | |
| "learning_rate": 4.446015399752536e-06, | |
| "loss": 0.4516, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 1.5941320293398533, | |
| "grad_norm": 1.2806360721588135, | |
| "learning_rate": 4.435799790944356e-06, | |
| "loss": 0.455, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.596088019559902, | |
| "grad_norm": 1.2540717124938965, | |
| "learning_rate": 4.42558656712378e-06, | |
| "loss": 0.447, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.598044009779951, | |
| "grad_norm": 1.1869229078292847, | |
| "learning_rate": 4.41537577146416e-06, | |
| "loss": 0.4575, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.2167538404464722, | |
| "learning_rate": 4.405167447128578e-06, | |
| "loss": 0.4326, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 1.6019559902200489, | |
| "grad_norm": 1.1809440851211548, | |
| "learning_rate": 4.394961637269674e-06, | |
| "loss": 0.4367, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 1.6039119804400976, | |
| "grad_norm": 1.223663568496704, | |
| "learning_rate": 4.384758385029457e-06, | |
| "loss": 0.454, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 1.6058679706601469, | |
| "grad_norm": 1.1459578275680542, | |
| "learning_rate": 4.374557733539119e-06, | |
| "loss": 0.4498, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.6078239608801956, | |
| "grad_norm": 1.174612283706665, | |
| "learning_rate": 4.364359725918868e-06, | |
| "loss": 0.449, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 1.6097799511002444, | |
| "grad_norm": 1.1853433847427368, | |
| "learning_rate": 4.354164405277725e-06, | |
| "loss": 0.4354, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 1.6117359413202934, | |
| "grad_norm": 1.134567141532898, | |
| "learning_rate": 4.3439718147133625e-06, | |
| "loss": 0.4581, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 1.6136919315403424, | |
| "grad_norm": 1.2028684616088867, | |
| "learning_rate": 4.333781997311909e-06, | |
| "loss": 0.4814, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 1.6156479217603912, | |
| "grad_norm": 1.2520511150360107, | |
| "learning_rate": 4.323594996147763e-06, | |
| "loss": 0.4519, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.61760391198044, | |
| "grad_norm": 1.163762092590332, | |
| "learning_rate": 4.3134108542834315e-06, | |
| "loss": 0.4534, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 1.619559902200489, | |
| "grad_norm": 1.194406270980835, | |
| "learning_rate": 4.3032296147693225e-06, | |
| "loss": 0.4575, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 1.621515892420538, | |
| "grad_norm": 1.2662689685821533, | |
| "learning_rate": 4.293051320643583e-06, | |
| "loss": 0.439, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 1.6234718826405867, | |
| "grad_norm": 1.2627875804901123, | |
| "learning_rate": 4.28287601493191e-06, | |
| "loss": 0.4275, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 1.6254278728606357, | |
| "grad_norm": 1.2528843879699707, | |
| "learning_rate": 4.272703740647356e-06, | |
| "loss": 0.4659, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.6273838630806847, | |
| "grad_norm": 1.188919186592102, | |
| "learning_rate": 4.262534540790176e-06, | |
| "loss": 0.4387, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 1.6293398533007335, | |
| "grad_norm": 1.3087656497955322, | |
| "learning_rate": 4.252368458347614e-06, | |
| "loss": 0.4281, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 1.6312958435207823, | |
| "grad_norm": 1.19478440284729, | |
| "learning_rate": 4.2422055362937455e-06, | |
| "loss": 0.4403, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 1.6332518337408313, | |
| "grad_norm": 1.2115154266357422, | |
| "learning_rate": 4.232045817589285e-06, | |
| "loss": 0.4712, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 1.6352078239608803, | |
| "grad_norm": 1.2290632724761963, | |
| "learning_rate": 4.2218893451814005e-06, | |
| "loss": 0.4436, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.637163814180929, | |
| "grad_norm": 1.1975177526474, | |
| "learning_rate": 4.211736162003543e-06, | |
| "loss": 0.4334, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 1.639119804400978, | |
| "grad_norm": 1.1870671510696411, | |
| "learning_rate": 4.201586310975256e-06, | |
| "loss": 0.4428, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 1.641075794621027, | |
| "grad_norm": 1.3159699440002441, | |
| "learning_rate": 4.191439835001999e-06, | |
| "loss": 0.4347, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 1.6430317848410758, | |
| "grad_norm": 1.3310630321502686, | |
| "learning_rate": 4.181296776974963e-06, | |
| "loss": 0.4486, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 1.6449877750611246, | |
| "grad_norm": 1.192111849784851, | |
| "learning_rate": 4.171157179770896e-06, | |
| "loss": 0.4456, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.6469437652811736, | |
| "grad_norm": 1.2716262340545654, | |
| "learning_rate": 4.161021086251906e-06, | |
| "loss": 0.4547, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 1.6488997555012226, | |
| "grad_norm": 1.2495301961898804, | |
| "learning_rate": 4.1508885392653e-06, | |
| "loss": 0.4401, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 1.6508557457212714, | |
| "grad_norm": 1.1762745380401611, | |
| "learning_rate": 4.140759581643386e-06, | |
| "loss": 0.4634, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 1.6528117359413201, | |
| "grad_norm": 1.2814241647720337, | |
| "learning_rate": 4.1306342562033045e-06, | |
| "loss": 0.4472, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 1.6547677261613694, | |
| "grad_norm": 1.3218616247177124, | |
| "learning_rate": 4.120512605746842e-06, | |
| "loss": 0.4415, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.6567237163814181, | |
| "grad_norm": 1.1885900497436523, | |
| "learning_rate": 4.110394673060244e-06, | |
| "loss": 0.4274, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 1.658679706601467, | |
| "grad_norm": 1.1911470890045166, | |
| "learning_rate": 4.100280500914046e-06, | |
| "loss": 0.4321, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 1.660635696821516, | |
| "grad_norm": 2.320434808731079, | |
| "learning_rate": 4.090170132062885e-06, | |
| "loss": 0.4374, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 1.662591687041565, | |
| "grad_norm": 1.2307498455047607, | |
| "learning_rate": 4.080063609245322e-06, | |
| "loss": 0.4508, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 1.6645476772616137, | |
| "grad_norm": 1.2727370262145996, | |
| "learning_rate": 4.0699609751836575e-06, | |
| "loss": 0.4483, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.6665036674816625, | |
| "grad_norm": 1.1779570579528809, | |
| "learning_rate": 4.059862272583755e-06, | |
| "loss": 0.4548, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 1.6684596577017115, | |
| "grad_norm": 1.2141393423080444, | |
| "learning_rate": 4.049767544134859e-06, | |
| "loss": 0.4404, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 1.6704156479217604, | |
| "grad_norm": 1.2709059715270996, | |
| "learning_rate": 4.0396768325094135e-06, | |
| "loss": 0.4378, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 1.6723716381418092, | |
| "grad_norm": 1.163887619972229, | |
| "learning_rate": 4.029590180362884e-06, | |
| "loss": 0.4399, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 1.6743276283618582, | |
| "grad_norm": 1.1120645999908447, | |
| "learning_rate": 4.019507630333577e-06, | |
| "loss": 0.4359, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.6762836185819072, | |
| "grad_norm": 1.220585584640503, | |
| "learning_rate": 4.009429225042453e-06, | |
| "loss": 0.4357, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 1.678239608801956, | |
| "grad_norm": 1.2459168434143066, | |
| "learning_rate": 3.999355007092958e-06, | |
| "loss": 0.4301, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 1.6801955990220048, | |
| "grad_norm": 1.132286548614502, | |
| "learning_rate": 3.989285019070834e-06, | |
| "loss": 0.4194, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 1.6821515892420538, | |
| "grad_norm": 1.3133598566055298, | |
| "learning_rate": 3.979219303543942e-06, | |
| "loss": 0.4526, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 1.6841075794621028, | |
| "grad_norm": 1.2260628938674927, | |
| "learning_rate": 3.969157903062086e-06, | |
| "loss": 0.4506, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.6860635696821515, | |
| "grad_norm": 1.1542868614196777, | |
| "learning_rate": 3.959100860156824e-06, | |
| "loss": 0.4433, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 1.6880195599022005, | |
| "grad_norm": 1.1528064012527466, | |
| "learning_rate": 3.949048217341297e-06, | |
| "loss": 0.4328, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 1.6899755501222495, | |
| "grad_norm": 1.2643218040466309, | |
| "learning_rate": 3.939000017110046e-06, | |
| "loss": 0.4186, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 1.6919315403422983, | |
| "grad_norm": 1.1736092567443848, | |
| "learning_rate": 3.928956301938831e-06, | |
| "loss": 0.4105, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 1.693887530562347, | |
| "grad_norm": 1.2194820642471313, | |
| "learning_rate": 3.918917114284456e-06, | |
| "loss": 0.4274, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.695843520782396, | |
| "grad_norm": 1.1990725994110107, | |
| "learning_rate": 3.908882496584578e-06, | |
| "loss": 0.4539, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 1.697799511002445, | |
| "grad_norm": 1.1804009675979614, | |
| "learning_rate": 3.898852491257547e-06, | |
| "loss": 0.4599, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 1.6997555012224939, | |
| "grad_norm": 1.1332404613494873, | |
| "learning_rate": 3.888827140702207e-06, | |
| "loss": 0.4666, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 1.7017114914425426, | |
| "grad_norm": 1.2241950035095215, | |
| "learning_rate": 3.87880648729773e-06, | |
| "loss": 0.44, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 1.7036674816625916, | |
| "grad_norm": 1.224329948425293, | |
| "learning_rate": 3.868790573403431e-06, | |
| "loss": 0.4587, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.7056234718826406, | |
| "grad_norm": 1.2016890048980713, | |
| "learning_rate": 3.858779441358588e-06, | |
| "loss": 0.4683, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 1.7075794621026894, | |
| "grad_norm": 1.190882682800293, | |
| "learning_rate": 3.848773133482267e-06, | |
| "loss": 0.4436, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 1.7095354523227384, | |
| "grad_norm": 1.2475522756576538, | |
| "learning_rate": 3.838771692073144e-06, | |
| "loss": 0.4367, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 1.7114914425427874, | |
| "grad_norm": 1.1946569681167603, | |
| "learning_rate": 3.828775159409316e-06, | |
| "loss": 0.4514, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 1.7134474327628362, | |
| "grad_norm": 1.1988422870635986, | |
| "learning_rate": 3.818783577748138e-06, | |
| "loss": 0.4411, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.715403422982885, | |
| "grad_norm": 1.2769694328308105, | |
| "learning_rate": 3.808796989326034e-06, | |
| "loss": 0.4413, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 1.717359413202934, | |
| "grad_norm": 1.108680009841919, | |
| "learning_rate": 3.7988154363583153e-06, | |
| "loss": 0.4479, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 1.719315403422983, | |
| "grad_norm": 1.19581937789917, | |
| "learning_rate": 3.788838961039012e-06, | |
| "loss": 0.446, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 1.7212713936430317, | |
| "grad_norm": 1.181363821029663, | |
| "learning_rate": 3.7788676055406913e-06, | |
| "loss": 0.4359, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 1.7232273838630807, | |
| "grad_norm": 1.1199826002120972, | |
| "learning_rate": 3.768901412014273e-06, | |
| "loss": 0.4236, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.7251833740831297, | |
| "grad_norm": 1.1632542610168457, | |
| "learning_rate": 3.7589404225888624e-06, | |
| "loss": 0.4402, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 1.7271393643031785, | |
| "grad_norm": 1.205407738685608, | |
| "learning_rate": 3.748984679371558e-06, | |
| "loss": 0.4678, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 1.7290953545232273, | |
| "grad_norm": 1.1697795391082764, | |
| "learning_rate": 3.7390342244472886e-06, | |
| "loss": 0.4114, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 1.7310513447432763, | |
| "grad_norm": 1.1678640842437744, | |
| "learning_rate": 3.729089099878627e-06, | |
| "loss": 0.4443, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 1.7330073349633253, | |
| "grad_norm": 1.1855502128601074, | |
| "learning_rate": 3.719149347705609e-06, | |
| "loss": 0.4461, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.734963325183374, | |
| "grad_norm": 1.1547439098358154, | |
| "learning_rate": 3.7092150099455676e-06, | |
| "loss": 0.4517, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 1.736919315403423, | |
| "grad_norm": 1.163944959640503, | |
| "learning_rate": 3.6992861285929395e-06, | |
| "loss": 0.4456, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 1.738875305623472, | |
| "grad_norm": 1.1487215757369995, | |
| "learning_rate": 3.689362745619103e-06, | |
| "loss": 0.4514, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 1.7408312958435208, | |
| "grad_norm": 1.2750896215438843, | |
| "learning_rate": 3.6794449029721913e-06, | |
| "loss": 0.4227, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 1.7427872860635696, | |
| "grad_norm": 1.1833292245864868, | |
| "learning_rate": 3.6695326425769156e-06, | |
| "loss": 0.473, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.7447432762836186, | |
| "grad_norm": 1.2018202543258667, | |
| "learning_rate": 3.659626006334395e-06, | |
| "loss": 0.4332, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 1.7466992665036676, | |
| "grad_norm": 1.1667495965957642, | |
| "learning_rate": 3.649725036121966e-06, | |
| "loss": 0.4529, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 1.7486552567237164, | |
| "grad_norm": 1.1951237916946411, | |
| "learning_rate": 3.6398297737930212e-06, | |
| "loss": 0.4355, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 1.7506112469437651, | |
| "grad_norm": 1.1440187692642212, | |
| "learning_rate": 3.629940261176822e-06, | |
| "loss": 0.423, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 1.7525672371638141, | |
| "grad_norm": 1.2192459106445312, | |
| "learning_rate": 3.620056540078323e-06, | |
| "loss": 0.4473, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.7545232273838631, | |
| "grad_norm": 1.1615161895751953, | |
| "learning_rate": 3.6101786522780024e-06, | |
| "loss": 0.4584, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 1.756479217603912, | |
| "grad_norm": 1.1762903928756714, | |
| "learning_rate": 3.6003066395316704e-06, | |
| "loss": 0.4569, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 1.758435207823961, | |
| "grad_norm": 1.2474490404129028, | |
| "learning_rate": 3.590440543570311e-06, | |
| "loss": 0.4491, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 1.76039119804401, | |
| "grad_norm": 1.166661024093628, | |
| "learning_rate": 3.5805804060998926e-06, | |
| "loss": 0.4195, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 1.7623471882640587, | |
| "grad_norm": 1.1908670663833618, | |
| "learning_rate": 3.5707262688011983e-06, | |
| "loss": 0.4402, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.7643031784841074, | |
| "grad_norm": 1.1561764478683472, | |
| "learning_rate": 3.560878173329646e-06, | |
| "loss": 0.4321, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 1.7662591687041564, | |
| "grad_norm": 1.2901417016983032, | |
| "learning_rate": 3.551036161315109e-06, | |
| "loss": 0.4435, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 1.7682151589242054, | |
| "grad_norm": 1.0962755680084229, | |
| "learning_rate": 3.5412002743617525e-06, | |
| "loss": 0.4285, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 1.7701711491442542, | |
| "grad_norm": 1.1892595291137695, | |
| "learning_rate": 3.531370554047845e-06, | |
| "loss": 0.4598, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 1.7721271393643032, | |
| "grad_norm": 1.151934266090393, | |
| "learning_rate": 3.5215470419255905e-06, | |
| "loss": 0.4365, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.7740831295843522, | |
| "grad_norm": 1.1396538019180298, | |
| "learning_rate": 3.511729779520946e-06, | |
| "loss": 0.4396, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 1.776039119804401, | |
| "grad_norm": 1.131398320198059, | |
| "learning_rate": 3.501918808333453e-06, | |
| "loss": 0.4312, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 1.7779951100244498, | |
| "grad_norm": 1.106647253036499, | |
| "learning_rate": 3.4921141698360554e-06, | |
| "loss": 0.4439, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 1.7799511002444988, | |
| "grad_norm": 1.2006912231445312, | |
| "learning_rate": 3.48231590547493e-06, | |
| "loss": 0.44, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 1.7819070904645478, | |
| "grad_norm": 1.1762027740478516, | |
| "learning_rate": 3.4725240566693104e-06, | |
| "loss": 0.4496, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.7838630806845965, | |
| "grad_norm": 1.1079798936843872, | |
| "learning_rate": 3.4627386648113046e-06, | |
| "loss": 0.4402, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 1.7858190709046453, | |
| "grad_norm": 1.1334391832351685, | |
| "learning_rate": 3.4529597712657342e-06, | |
| "loss": 0.4793, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 1.7877750611246945, | |
| "grad_norm": 1.139093279838562, | |
| "learning_rate": 3.4431874173699415e-06, | |
| "loss": 0.429, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 1.7897310513447433, | |
| "grad_norm": 1.0803550481796265, | |
| "learning_rate": 3.433421644433631e-06, | |
| "loss": 0.4133, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 1.791687041564792, | |
| "grad_norm": 1.0991169214248657, | |
| "learning_rate": 3.4236624937386874e-06, | |
| "loss": 0.419, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.793643031784841, | |
| "grad_norm": 1.1221754550933838, | |
| "learning_rate": 3.4139100065390007e-06, | |
| "loss": 0.4391, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 1.79559902200489, | |
| "grad_norm": 1.1930972337722778, | |
| "learning_rate": 3.404164224060294e-06, | |
| "loss": 0.423, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 1.7975550122249389, | |
| "grad_norm": 1.2077200412750244, | |
| "learning_rate": 3.394425187499944e-06, | |
| "loss": 0.4282, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 1.7995110024449876, | |
| "grad_norm": 1.159081220626831, | |
| "learning_rate": 3.384692938026816e-06, | |
| "loss": 0.4221, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 1.8014669926650366, | |
| "grad_norm": 1.1197729110717773, | |
| "learning_rate": 3.374967516781085e-06, | |
| "loss": 0.4441, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.8034229828850856, | |
| "grad_norm": 1.1913528442382812, | |
| "learning_rate": 3.365248964874058e-06, | |
| "loss": 0.4472, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 1.8053789731051344, | |
| "grad_norm": 1.1669801473617554, | |
| "learning_rate": 3.3555373233880096e-06, | |
| "loss": 0.4434, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 1.8073349633251834, | |
| "grad_norm": 1.1273599863052368, | |
| "learning_rate": 3.3458326333759927e-06, | |
| "loss": 0.4467, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 1.8092909535452324, | |
| "grad_norm": 1.144005298614502, | |
| "learning_rate": 3.3361349358616853e-06, | |
| "loss": 0.4387, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 1.8112469437652812, | |
| "grad_norm": 1.1881077289581299, | |
| "learning_rate": 3.326444271839202e-06, | |
| "loss": 0.4177, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.81320293398533, | |
| "grad_norm": 1.1392303705215454, | |
| "learning_rate": 3.316760682272927e-06, | |
| "loss": 0.4339, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 1.815158924205379, | |
| "grad_norm": 1.19828200340271, | |
| "learning_rate": 3.307084208097337e-06, | |
| "loss": 0.4247, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 1.817114914425428, | |
| "grad_norm": 1.1880228519439697, | |
| "learning_rate": 3.297414890216833e-06, | |
| "loss": 0.4519, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 1.8190709046454767, | |
| "grad_norm": 1.124159812927246, | |
| "learning_rate": 3.2877527695055615e-06, | |
| "loss": 0.4305, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 1.8210268948655257, | |
| "grad_norm": 1.1801859140396118, | |
| "learning_rate": 3.27809788680725e-06, | |
| "loss": 0.4734, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.8229828850855747, | |
| "grad_norm": 1.1997686624526978, | |
| "learning_rate": 3.268450282935026e-06, | |
| "loss": 0.4653, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 1.8249388753056235, | |
| "grad_norm": 1.1670591831207275, | |
| "learning_rate": 3.2588099986712496e-06, | |
| "loss": 0.4208, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 1.8268948655256723, | |
| "grad_norm": 1.1300679445266724, | |
| "learning_rate": 3.2491770747673384e-06, | |
| "loss": 0.4505, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 1.8288508557457213, | |
| "grad_norm": 1.1710411310195923, | |
| "learning_rate": 3.239551551943595e-06, | |
| "loss": 0.4439, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 1.8308068459657703, | |
| "grad_norm": 1.1152379512786865, | |
| "learning_rate": 3.2299334708890384e-06, | |
| "loss": 0.445, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.832762836185819, | |
| "grad_norm": 1.1259523630142212, | |
| "learning_rate": 3.22032287226123e-06, | |
| "loss": 0.4336, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 1.8347188264058678, | |
| "grad_norm": 1.1431976556777954, | |
| "learning_rate": 3.2107197966861003e-06, | |
| "loss": 0.4196, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 1.836674816625917, | |
| "grad_norm": 1.2426306009292603, | |
| "learning_rate": 3.2011242847577804e-06, | |
| "loss": 0.435, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 1.8386308068459658, | |
| "grad_norm": 1.17112135887146, | |
| "learning_rate": 3.1915363770384223e-06, | |
| "loss": 0.4609, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 1.8405867970660146, | |
| "grad_norm": 1.1593024730682373, | |
| "learning_rate": 3.18195611405804e-06, | |
| "loss": 0.4384, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.8425427872860636, | |
| "grad_norm": 1.166476845741272, | |
| "learning_rate": 3.1723835363143296e-06, | |
| "loss": 0.4238, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 1.8444987775061126, | |
| "grad_norm": 1.0797832012176514, | |
| "learning_rate": 3.1628186842725e-06, | |
| "loss": 0.4433, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 1.8464547677261614, | |
| "grad_norm": 1.215981364250183, | |
| "learning_rate": 3.1532615983651027e-06, | |
| "loss": 0.4326, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 1.8484107579462101, | |
| "grad_norm": 1.095432162284851, | |
| "learning_rate": 3.1437123189918574e-06, | |
| "loss": 0.4485, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 1.8503667481662591, | |
| "grad_norm": 1.2197959423065186, | |
| "learning_rate": 3.1341708865194866e-06, | |
| "loss": 0.4572, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.8523227383863081, | |
| "grad_norm": 1.2195861339569092, | |
| "learning_rate": 3.124637341281541e-06, | |
| "loss": 0.4407, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 1.854278728606357, | |
| "grad_norm": 1.1470884084701538, | |
| "learning_rate": 3.1151117235782346e-06, | |
| "loss": 0.4136, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 1.856234718826406, | |
| "grad_norm": 1.132288932800293, | |
| "learning_rate": 3.1055940736762647e-06, | |
| "loss": 0.4199, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 1.858190709046455, | |
| "grad_norm": 1.128574013710022, | |
| "learning_rate": 3.0960844318086482e-06, | |
| "loss": 0.4661, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 1.8601466992665037, | |
| "grad_norm": 1.1992732286453247, | |
| "learning_rate": 3.0865828381745515e-06, | |
| "loss": 0.4415, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.8621026894865524, | |
| "grad_norm": 1.1346315145492554, | |
| "learning_rate": 3.0770893329391207e-06, | |
| "loss": 0.421, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 1.8640586797066014, | |
| "grad_norm": 1.160053014755249, | |
| "learning_rate": 3.067603956233308e-06, | |
| "loss": 0.4625, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 1.8660146699266504, | |
| "grad_norm": 1.204193115234375, | |
| "learning_rate": 3.0581267481537073e-06, | |
| "loss": 0.4367, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 1.8679706601466992, | |
| "grad_norm": 1.2072075605392456, | |
| "learning_rate": 3.0486577487623802e-06, | |
| "loss": 0.4574, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 1.8699266503667482, | |
| "grad_norm": 1.0959627628326416, | |
| "learning_rate": 3.0391969980866874e-06, | |
| "loss": 0.4288, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.8718826405867972, | |
| "grad_norm": 1.1430832147598267, | |
| "learning_rate": 3.0297445361191235e-06, | |
| "loss": 0.445, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 1.873838630806846, | |
| "grad_norm": 1.256406307220459, | |
| "learning_rate": 3.0203004028171434e-06, | |
| "loss": 0.4315, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 1.8757946210268948, | |
| "grad_norm": 1.163475751876831, | |
| "learning_rate": 3.010864638102998e-06, | |
| "loss": 0.4357, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 1.8777506112469438, | |
| "grad_norm": 1.1195828914642334, | |
| "learning_rate": 3.001437281863558e-06, | |
| "loss": 0.4656, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 1.8797066014669928, | |
| "grad_norm": 1.169587254524231, | |
| "learning_rate": 2.9920183739501503e-06, | |
| "loss": 0.4269, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.8816625916870415, | |
| "grad_norm": 1.112953782081604, | |
| "learning_rate": 2.9826079541783914e-06, | |
| "loss": 0.4408, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 1.8836185819070903, | |
| "grad_norm": 1.107088327407837, | |
| "learning_rate": 2.973206062328017e-06, | |
| "loss": 0.4311, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 1.8855745721271395, | |
| "grad_norm": 1.1068124771118164, | |
| "learning_rate": 2.963812738142713e-06, | |
| "loss": 0.4286, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 1.8875305623471883, | |
| "grad_norm": 1.0876468420028687, | |
| "learning_rate": 2.954428021329946e-06, | |
| "loss": 0.429, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 1.889486552567237, | |
| "grad_norm": 1.160590410232544, | |
| "learning_rate": 2.9450519515607966e-06, | |
| "loss": 0.4167, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.891442542787286, | |
| "grad_norm": 1.0903960466384888, | |
| "learning_rate": 2.935684568469799e-06, | |
| "loss": 0.4385, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 1.893398533007335, | |
| "grad_norm": 1.1685900688171387, | |
| "learning_rate": 2.9263259116547606e-06, | |
| "loss": 0.4521, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 1.8953545232273838, | |
| "grad_norm": 1.1978154182434082, | |
| "learning_rate": 2.916976020676606e-06, | |
| "loss": 0.4342, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 1.8973105134474326, | |
| "grad_norm": 1.1818468570709229, | |
| "learning_rate": 2.9076349350592014e-06, | |
| "loss": 0.439, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 1.8992665036674816, | |
| "grad_norm": 1.194822072982788, | |
| "learning_rate": 2.898302694289189e-06, | |
| "loss": 0.444, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.9012224938875306, | |
| "grad_norm": 1.2437156438827515, | |
| "learning_rate": 2.8889793378158284e-06, | |
| "loss": 0.4409, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 1.9031784841075794, | |
| "grad_norm": 1.2146315574645996, | |
| "learning_rate": 2.8796649050508175e-06, | |
| "loss": 0.4173, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 1.9051344743276284, | |
| "grad_norm": 1.228637456893921, | |
| "learning_rate": 2.870359435368136e-06, | |
| "loss": 0.447, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 1.9070904645476774, | |
| "grad_norm": 1.2419463396072388, | |
| "learning_rate": 2.861062968103876e-06, | |
| "loss": 0.4208, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 1.9090464547677262, | |
| "grad_norm": 1.2601884603500366, | |
| "learning_rate": 2.8517755425560665e-06, | |
| "loss": 0.4361, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.911002444987775, | |
| "grad_norm": 1.2036101818084717, | |
| "learning_rate": 2.842497197984524e-06, | |
| "loss": 0.435, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 1.912958435207824, | |
| "grad_norm": 1.1465144157409668, | |
| "learning_rate": 2.8332279736106747e-06, | |
| "loss": 0.4325, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 1.914914425427873, | |
| "grad_norm": 1.1453237533569336, | |
| "learning_rate": 2.82396790861739e-06, | |
| "loss": 0.4444, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 1.9168704156479217, | |
| "grad_norm": 1.1577181816101074, | |
| "learning_rate": 2.814717042148827e-06, | |
| "loss": 0.4625, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 1.9188264058679707, | |
| "grad_norm": 1.2406984567642212, | |
| "learning_rate": 2.8054754133102535e-06, | |
| "loss": 0.4326, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.9207823960880197, | |
| "grad_norm": 1.2444988489151, | |
| "learning_rate": 2.796243061167892e-06, | |
| "loss": 0.4323, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 1.9227383863080685, | |
| "grad_norm": 1.1628625392913818, | |
| "learning_rate": 2.78702002474875e-06, | |
| "loss": 0.4349, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 1.9246943765281173, | |
| "grad_norm": 1.1444227695465088, | |
| "learning_rate": 2.7778063430404544e-06, | |
| "loss": 0.4552, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 1.9266503667481663, | |
| "grad_norm": 1.1746978759765625, | |
| "learning_rate": 2.76860205499109e-06, | |
| "loss": 0.4119, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 1.9286063569682153, | |
| "grad_norm": 1.1781418323516846, | |
| "learning_rate": 2.759407199509029e-06, | |
| "loss": 0.4105, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.930562347188264, | |
| "grad_norm": 1.1440287828445435, | |
| "learning_rate": 2.7502218154627718e-06, | |
| "loss": 0.4351, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 1.9325183374083128, | |
| "grad_norm": 1.1332827806472778, | |
| "learning_rate": 2.7410459416807856e-06, | |
| "loss": 0.4525, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 1.934474327628362, | |
| "grad_norm": 1.6635149717330933, | |
| "learning_rate": 2.7318796169513275e-06, | |
| "loss": 0.4511, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 1.9364303178484108, | |
| "grad_norm": 1.176467776298523, | |
| "learning_rate": 2.722722880022297e-06, | |
| "loss": 0.4331, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 1.9383863080684596, | |
| "grad_norm": 1.1627734899520874, | |
| "learning_rate": 2.7135757696010565e-06, | |
| "loss": 0.4403, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.9403422982885086, | |
| "grad_norm": 1.1340796947479248, | |
| "learning_rate": 2.7044383243542804e-06, | |
| "loss": 0.4188, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 1.9422982885085576, | |
| "grad_norm": 1.1349642276763916, | |
| "learning_rate": 2.6953105829077863e-06, | |
| "loss": 0.4385, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 1.9442542787286063, | |
| "grad_norm": 1.1615030765533447, | |
| "learning_rate": 2.6861925838463694e-06, | |
| "loss": 0.4272, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 1.9462102689486551, | |
| "grad_norm": 1.1460556983947754, | |
| "learning_rate": 2.6770843657136457e-06, | |
| "loss": 0.4321, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 1.9481662591687041, | |
| "grad_norm": 1.1438319683074951, | |
| "learning_rate": 2.6679859670118785e-06, | |
| "loss": 0.461, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.9501222493887531, | |
| "grad_norm": 1.1441649198532104, | |
| "learning_rate": 2.658897426201829e-06, | |
| "loss": 0.4531, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 1.952078239608802, | |
| "grad_norm": 1.1989426612854004, | |
| "learning_rate": 2.6498187817025845e-06, | |
| "loss": 0.4326, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 1.9540342298288509, | |
| "grad_norm": 1.1358593702316284, | |
| "learning_rate": 2.6407500718914e-06, | |
| "loss": 0.4549, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 1.9559902200488999, | |
| "grad_norm": 1.2670133113861084, | |
| "learning_rate": 2.6316913351035313e-06, | |
| "loss": 0.4377, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 1.9579462102689487, | |
| "grad_norm": 1.186421513557434, | |
| "learning_rate": 2.62264260963208e-06, | |
| "loss": 0.4507, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.9599022004889974, | |
| "grad_norm": 1.122685432434082, | |
| "learning_rate": 2.613603933727824e-06, | |
| "loss": 0.4506, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 1.9618581907090464, | |
| "grad_norm": 1.1497164964675903, | |
| "learning_rate": 2.604575345599063e-06, | |
| "loss": 0.4369, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 1.9638141809290954, | |
| "grad_norm": 1.1628155708312988, | |
| "learning_rate": 2.5955568834114523e-06, | |
| "loss": 0.4508, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 1.9657701711491442, | |
| "grad_norm": 1.1360890865325928, | |
| "learning_rate": 2.5865485852878435e-06, | |
| "loss": 0.4286, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 1.9677261613691932, | |
| "grad_norm": 1.121633529663086, | |
| "learning_rate": 2.577550489308124e-06, | |
| "loss": 0.4265, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.9696821515892422, | |
| "grad_norm": 1.1679707765579224, | |
| "learning_rate": 2.5685626335090487e-06, | |
| "loss": 0.4341, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 1.971638141809291, | |
| "grad_norm": 1.1223803758621216, | |
| "learning_rate": 2.5595850558840908e-06, | |
| "loss": 0.4364, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 1.9735941320293398, | |
| "grad_norm": 1.2146944999694824, | |
| "learning_rate": 2.550617794383278e-06, | |
| "loss": 0.4591, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 1.9755501222493888, | |
| "grad_norm": 1.1352821588516235, | |
| "learning_rate": 2.541660886913019e-06, | |
| "loss": 0.4316, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 1.9775061124694377, | |
| "grad_norm": 1.1358625888824463, | |
| "learning_rate": 2.5327143713359668e-06, | |
| "loss": 0.438, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.9794621026894865, | |
| "grad_norm": 1.1737252473831177, | |
| "learning_rate": 2.523778285470835e-06, | |
| "loss": 0.4313, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 1.9814180929095353, | |
| "grad_norm": 1.1773995161056519, | |
| "learning_rate": 2.5148526670922556e-06, | |
| "loss": 0.4359, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 1.9833740831295843, | |
| "grad_norm": 1.132659912109375, | |
| "learning_rate": 2.5059375539306103e-06, | |
| "loss": 0.4517, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 1.9853300733496333, | |
| "grad_norm": 1.1384680271148682, | |
| "learning_rate": 2.497032983671873e-06, | |
| "loss": 0.4323, | |
| "step": 1024 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1533, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 256, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.8180416233323626e+19, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |