{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.999679630934837, "eval_steps": 500, "global_step": 39015, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 4.9800000000000004e-05, "loss": 0.049, "step": 500 }, { "epoch": 0.13, "learning_rate": 4.997937713868709e-05, "loss": 0.0574, "step": 1000 }, { "epoch": 0.19, "learning_rate": 4.991721127523219e-05, "loss": 0.0621, "step": 1500 }, { "epoch": 0.26, "learning_rate": 4.981360546534587e-05, "loss": 0.0639, "step": 2000 }, { "epoch": 0.32, "learning_rate": 4.966873201640091e-05, "loss": 0.0679, "step": 2500 }, { "epoch": 0.38, "learning_rate": 4.948283186819981e-05, "loss": 0.0746, "step": 3000 }, { "epoch": 0.45, "learning_rate": 4.9256214192266534e-05, "loss": 0.0778, "step": 3500 }, { "epoch": 0.51, "learning_rate": 4.898982977024972e-05, "loss": 0.0752, "step": 4000 }, { "epoch": 0.58, "learning_rate": 4.868305410275411e-05, "loss": 0.0763, "step": 4500 }, { "epoch": 0.64, "learning_rate": 4.833689102217645e-05, "loss": 0.0769, "step": 5000 }, { "epoch": 0.7, "learning_rate": 4.795191623417485e-05, "loss": 0.0755, "step": 5500 }, { "epoch": 0.77, "learning_rate": 4.752965392266396e-05, "loss": 0.0751, "step": 6000 }, { "epoch": 0.83, "learning_rate": 4.706911415528901e-05, "loss": 0.0783, "step": 6500 }, { "epoch": 0.9, "learning_rate": 4.6571871126931825e-05, "loss": 0.078, "step": 7000 }, { "epoch": 0.96, "learning_rate": 4.6038751805114225e-05, "loss": 0.0785, "step": 7500 }, { "epoch": 1.03, "learning_rate": 4.547064282341224e-05, "loss": 0.0719, "step": 8000 }, { "epoch": 1.09, "learning_rate": 4.486848900689111e-05, "loss": 0.0593, "step": 8500 }, { "epoch": 1.15, "learning_rate": 4.423329180076173e-05, "loss": 0.0636, "step": 9000 }, { "epoch": 1.22, "learning_rate": 4.356747316744135e-05, "loss": 0.0639, "step": 9500 }, { "epoch": 1.28, "learning_rate": 4.286947219232885e-05, "loss": 0.0614, "step": 10000 }, { "epoch": 1.35, "learning_rate": 4.214175240309113e-05, "loss": 0.0657, "step": 10500 }, { "epoch": 1.41, "learning_rate": 4.138552407437527e-05, "loss": 0.0625, "step": 11000 }, { "epoch": 1.47, "learning_rate": 4.0602044893531846e-05, "loss": 0.065, "step": 11500 }, { "epoch": 1.54, "learning_rate": 3.979426173115841e-05, "loss": 0.0625, "step": 12000 }, { "epoch": 1.6, "learning_rate": 3.89619724591825e-05, "loss": 0.0635, "step": 12500 }, { "epoch": 1.67, "learning_rate": 3.81048191928653e-05, "loss": 0.0627, "step": 13000 }, { "epoch": 1.73, "learning_rate": 3.7225871232136e-05, "loss": 0.0669, "step": 13500 }, { "epoch": 1.79, "learning_rate": 3.6326590360017014e-05, "loss": 0.0638, "step": 14000 }, { "epoch": 1.86, "learning_rate": 3.540847217530374e-05, "loss": 0.0632, "step": 14500 }, { "epoch": 1.92, "learning_rate": 3.447304360522618e-05, "loss": 0.0666, "step": 15000 }, { "epoch": 1.99, "learning_rate": 3.352186036600821e-05, "loss": 0.0626, "step": 15500 }, { "epoch": 2.05, "learning_rate": 3.2556504375547626e-05, "loss": 0.0514, "step": 16000 }, { "epoch": 2.11, "learning_rate": 3.158054843335624e-05, "loss": 0.0463, "step": 16500 }, { "epoch": 2.18, "learning_rate": 3.0591704553180736e-05, "loss": 0.0467, "step": 17000 }, { "epoch": 2.24, "learning_rate": 2.959356107945986e-05, "loss": 0.0476, "step": 17500 }, { "epoch": 2.31, "learning_rate": 2.858979611033543e-05, "loss": 0.0505, "step": 18000 }, { "epoch": 2.37, "learning_rate": 2.75780564653496e-05, "loss": 0.0502, "step": 18500 }, { "epoch": 2.43, "learning_rate": 2.656202924096166e-05, "loss": 0.047, "step": 19000 }, { "epoch": 2.5, "learning_rate": 2.554340419744563e-05, "loss": 0.0461, "step": 19500 }, { "epoch": 2.56, "learning_rate": 2.4523875415522277e-05, "loss": 0.0465, "step": 20000 }, { "epoch": 2.63, "learning_rate": 2.350513847892453e-05, "loss": 0.05, "step": 20500 }, { "epoch": 2.69, "learning_rate": 2.2488887654463146e-05, "loss": 0.0471, "step": 21000 }, { "epoch": 2.76, "learning_rate": 2.1478831935354516e-05, "loss": 0.0478, "step": 21500 }, { "epoch": 2.82, "learning_rate": 2.047260339339565e-05, "loss": 0.0467, "step": 22000 }, { "epoch": 2.88, "learning_rate": 1.9473904388782642e-05, "loss": 0.0458, "step": 22500 }, { "epoch": 2.95, "learning_rate": 1.8484395863135113e-05, "loss": 0.0451, "step": 23000 }, { "epoch": 3.01, "learning_rate": 1.750572347333834e-05, "loss": 0.0443, "step": 23500 }, { "epoch": 3.08, "learning_rate": 1.6541433761257506e-05, "loss": 0.0307, "step": 24000 }, { "epoch": 3.14, "learning_rate": 1.558926609041298e-05, "loss": 0.0313, "step": 24500 }, { "epoch": 3.2, "learning_rate": 1.4652749461123058e-05, "loss": 0.0362, "step": 25000 }, { "epoch": 3.27, "learning_rate": 1.3733441399168673e-05, "loss": 0.035, "step": 25500 }, { "epoch": 3.33, "learning_rate": 1.2832870810670993e-05, "loss": 0.0324, "step": 26000 }, { "epoch": 3.4, "learning_rate": 1.1952535439359563e-05, "loss": 0.0335, "step": 26500 }, { "epoch": 3.46, "learning_rate": 1.1095594036489903e-05, "loss": 0.0322, "step": 27000 }, { "epoch": 3.52, "learning_rate": 1.0260037628024472e-05, "loss": 0.0332, "step": 27500 }, { "epoch": 3.59, "learning_rate": 9.448995329278178e-06, "loss": 0.033, "step": 28000 }, { "epoch": 3.65, "learning_rate": 8.66381598900628e-06, "loss": 0.035, "step": 28500 }, { "epoch": 3.72, "learning_rate": 7.905805443140457e-06, "loss": 0.035, "step": 29000 }, { "epoch": 3.78, "learning_rate": 7.177654318094604e-06, "loss": 0.0349, "step": 29500 }, { "epoch": 3.84, "learning_rate": 6.4776555698389505e-06, "loss": 0.0333, "step": 30000 }, { "epoch": 3.91, "learning_rate": 5.808461430995301e-06, "loss": 0.0313, "step": 30500 }, { "epoch": 3.97, "learning_rate": 5.171184841890728e-06, "loss": 0.0358, "step": 31000 }, { "epoch": 4.04, "learning_rate": 4.568060670213628e-06, "loss": 0.0301, "step": 31500 }, { "epoch": 4.1, "learning_rate": 3.99767497937713e-06, "loss": 0.028, "step": 32000 }, { "epoch": 4.16, "learning_rate": 3.4622183668462316e-06, "loss": 0.028, "step": 32500 }, { "epoch": 4.23, "learning_rate": 2.962581353357577e-06, "loss": 0.0269, "step": 33000 }, { "epoch": 4.29, "learning_rate": 2.5004837614951537e-06, "loss": 0.0273, "step": 33500 }, { "epoch": 4.36, "learning_rate": 2.0748422709038623e-06, "loss": 0.0277, "step": 34000 }, { "epoch": 4.42, "learning_rate": 1.6873277319096742e-06, "loss": 0.027, "step": 34500 }, { "epoch": 4.49, "learning_rate": 1.3385846220008548e-06, "loss": 0.0271, "step": 35000 }, { "epoch": 4.55, "learning_rate": 1.0297720953505841e-06, "loss": 0.0297, "step": 35500 }, { "epoch": 4.61, "learning_rate": 7.601661860652421e-07, "loss": 0.0276, "step": 36000 }, { "epoch": 4.68, "learning_rate": 5.308736731572172e-07, "loss": 0.0283, "step": 36500 }, { "epoch": 4.74, "learning_rate": 3.4227589422272943e-07, "loss": 0.0305, "step": 37000 }, { "epoch": 4.81, "learning_rate": 1.9468650722975211e-07, "loss": 0.0302, "step": 37500 }, { "epoch": 4.87, "learning_rate": 8.852233960627565e-08, "loss": 0.028, "step": 38000 }, { "epoch": 4.93, "learning_rate": 2.3534504957314906e-08, "loss": 0.0344, "step": 38500 }, { "epoch": 5.0, "learning_rate": 9.056858269462787e-11, "loss": 0.0306, "step": 39000 } ], "logging_steps": 500, "max_steps": 39015, "num_train_epochs": 5, "save_steps": 500, "total_flos": 3.307647360088965e+17, "trial_name": null, "trial_params": null }