| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.999679630934837, | |
| "eval_steps": 500, | |
| "global_step": 39015, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.9800000000000004e-05, | |
| "loss": 0.049, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.997937713868709e-05, | |
| "loss": 0.0574, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.991721127523219e-05, | |
| "loss": 0.0621, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.981360546534587e-05, | |
| "loss": 0.0639, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.966873201640091e-05, | |
| "loss": 0.0679, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 4.948283186819981e-05, | |
| "loss": 0.0746, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 4.9256214192266534e-05, | |
| "loss": 0.0778, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.898982977024972e-05, | |
| "loss": 0.0752, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.868305410275411e-05, | |
| "loss": 0.0763, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 4.833689102217645e-05, | |
| "loss": 0.0769, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.795191623417485e-05, | |
| "loss": 0.0755, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 4.752965392266396e-05, | |
| "loss": 0.0751, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.706911415528901e-05, | |
| "loss": 0.0783, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.6571871126931825e-05, | |
| "loss": 0.078, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.6038751805114225e-05, | |
| "loss": 0.0785, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.547064282341224e-05, | |
| "loss": 0.0719, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 4.486848900689111e-05, | |
| "loss": 0.0593, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 4.423329180076173e-05, | |
| "loss": 0.0636, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 4.356747316744135e-05, | |
| "loss": 0.0639, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.286947219232885e-05, | |
| "loss": 0.0614, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 4.214175240309113e-05, | |
| "loss": 0.0657, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.138552407437527e-05, | |
| "loss": 0.0625, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 4.0602044893531846e-05, | |
| "loss": 0.065, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 3.979426173115841e-05, | |
| "loss": 0.0625, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 3.89619724591825e-05, | |
| "loss": 0.0635, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 3.81048191928653e-05, | |
| "loss": 0.0627, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 3.7225871232136e-05, | |
| "loss": 0.0669, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 3.6326590360017014e-05, | |
| "loss": 0.0638, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.540847217530374e-05, | |
| "loss": 0.0632, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.447304360522618e-05, | |
| "loss": 0.0666, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 3.352186036600821e-05, | |
| "loss": 0.0626, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "learning_rate": 3.2556504375547626e-05, | |
| "loss": 0.0514, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 3.158054843335624e-05, | |
| "loss": 0.0463, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 3.0591704553180736e-05, | |
| "loss": 0.0467, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 2.959356107945986e-05, | |
| "loss": 0.0476, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 2.858979611033543e-05, | |
| "loss": 0.0505, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 2.75780564653496e-05, | |
| "loss": 0.0502, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 2.656202924096166e-05, | |
| "loss": 0.047, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 2.554340419744563e-05, | |
| "loss": 0.0461, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 2.4523875415522277e-05, | |
| "loss": 0.0465, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "learning_rate": 2.350513847892453e-05, | |
| "loss": 0.05, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 2.2488887654463146e-05, | |
| "loss": 0.0471, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.1478831935354516e-05, | |
| "loss": 0.0478, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.047260339339565e-05, | |
| "loss": 0.0467, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 1.9473904388782642e-05, | |
| "loss": 0.0458, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.8484395863135113e-05, | |
| "loss": 0.0451, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "learning_rate": 1.750572347333834e-05, | |
| "loss": 0.0443, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 1.6541433761257506e-05, | |
| "loss": 0.0307, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 1.558926609041298e-05, | |
| "loss": 0.0313, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 1.4652749461123058e-05, | |
| "loss": 0.0362, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 1.3733441399168673e-05, | |
| "loss": 0.035, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 1.2832870810670993e-05, | |
| "loss": 0.0324, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.1952535439359563e-05, | |
| "loss": 0.0335, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 1.1095594036489903e-05, | |
| "loss": 0.0322, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 1.0260037628024472e-05, | |
| "loss": 0.0332, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 9.448995329278178e-06, | |
| "loss": 0.033, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 8.66381598900628e-06, | |
| "loss": 0.035, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 7.905805443140457e-06, | |
| "loss": 0.035, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 7.177654318094604e-06, | |
| "loss": 0.0349, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 6.4776555698389505e-06, | |
| "loss": 0.0333, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 5.808461430995301e-06, | |
| "loss": 0.0313, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 5.171184841890728e-06, | |
| "loss": 0.0358, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 4.568060670213628e-06, | |
| "loss": 0.0301, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.99767497937713e-06, | |
| "loss": 0.028, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.4622183668462316e-06, | |
| "loss": 0.028, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 2.962581353357577e-06, | |
| "loss": 0.0269, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 2.5004837614951537e-06, | |
| "loss": 0.0273, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 4.36, | |
| "learning_rate": 2.0748422709038623e-06, | |
| "loss": 0.0277, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.6873277319096742e-06, | |
| "loss": 0.027, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 1.3385846220008548e-06, | |
| "loss": 0.0271, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 1.0297720953505841e-06, | |
| "loss": 0.0297, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 7.601661860652421e-07, | |
| "loss": 0.0276, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 4.68, | |
| "learning_rate": 5.308736731572172e-07, | |
| "loss": 0.0283, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 3.4227589422272943e-07, | |
| "loss": 0.0305, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.9468650722975211e-07, | |
| "loss": 0.0302, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 8.852233960627565e-08, | |
| "loss": 0.028, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 2.3534504957314906e-08, | |
| "loss": 0.0344, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 9.056858269462787e-11, | |
| "loss": 0.0306, | |
| "step": 39000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 39015, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 3.307647360088965e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |