{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.999138673557278, "eval_steps": 500, "global_step": 14510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "learning_rate": 4.9800000000000004e-05, "loss": 0.0562, "step": 500 }, { "epoch": 0.34, "learning_rate": 4.984428128696318e-05, "loss": 0.0703, "step": 1000 }, { "epoch": 0.52, "learning_rate": 4.9376579221363204e-05, "loss": 0.0744, "step": 1500 }, { "epoch": 0.69, "learning_rate": 4.8602764529345966e-05, "loss": 0.0807, "step": 2000 }, { "epoch": 0.86, "learning_rate": 4.7534982401683416e-05, "loss": 0.082, "step": 2500 }, { "epoch": 1.03, "learning_rate": 4.618236648123212e-05, "loss": 0.072, "step": 3000 }, { "epoch": 1.21, "learning_rate": 4.4563749749985335e-05, "loss": 0.0638, "step": 3500 }, { "epoch": 1.38, "learning_rate": 4.269945823540843e-05, "loss": 0.0627, "step": 4000 }, { "epoch": 1.55, "learning_rate": 4.061290306367919e-05, "loss": 0.0638, "step": 4500 }, { "epoch": 1.72, "learning_rate": 3.8330286470885907e-05, "loss": 0.0652, "step": 5000 }, { "epoch": 1.89, "learning_rate": 3.58802727644352e-05, "loss": 0.0622, "step": 5500 }, { "epoch": 2.07, "learning_rate": 3.3298916668553426e-05, "loss": 0.0555, "step": 6000 }, { "epoch": 2.24, "learning_rate": 3.0608298703277996e-05, "loss": 0.0467, "step": 6500 }, { "epoch": 2.41, "learning_rate": 2.7847253668223704e-05, "loss": 0.0428, "step": 7000 }, { "epoch": 2.58, "learning_rate": 2.5050453809223793e-05, "loss": 0.0473, "step": 7500 }, { "epoch": 2.76, "learning_rate": 2.2253020368788445e-05, "loss": 0.0452, "step": 8000 }, { "epoch": 2.93, "learning_rate": 1.9495550817504742e-05, "loss": 0.0412, "step": 8500 }, { "epoch": 3.1, "learning_rate": 1.6801632122617095e-05, "loss": 0.0385, "step": 9000 }, { "epoch": 3.27, "learning_rate": 1.4210665687785734e-05, "loss": 0.0311, "step": 9500 }, { "epoch": 3.45, "learning_rate": 1.1755187970777065e-05, "loss": 0.0317, "step": 10000 }, { "epoch": 3.62, "learning_rate": 9.470426834280832e-06, "loss": 0.0309, "step": 10500 }, { "epoch": 3.79, "learning_rate": 7.3759257710121985e-06, "loss": 0.0296, "step": 11000 }, { "epoch": 3.96, "learning_rate": 5.5027417186915175e-06, "loss": 0.0261, "step": 11500 }, { "epoch": 4.13, "learning_rate": 3.877395681105919e-06, "loss": 0.0268, "step": 12000 }, { "epoch": 4.31, "learning_rate": 2.513790616397402e-06, "loss": 0.0261, "step": 12500 }, { "epoch": 4.48, "learning_rate": 1.4325595693915228e-06, "loss": 0.022, "step": 13000 }, { "epoch": 4.65, "learning_rate": 6.472802645326154e-07, "loss": 0.0205, "step": 13500 }, { "epoch": 4.82, "learning_rate": 1.6846304762653532e-07, "loss": 0.025, "step": 14000 }, { "epoch": 5.0, "learning_rate": 2.0364694921504168e-10, "loss": 0.0274, "step": 14500 } ], "logging_steps": 500, "max_steps": 14510, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1.2300127834804224e+17, "trial_name": null, "trial_params": null }