| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.999138673557278, | |
| "eval_steps": 500, | |
| "global_step": 14510, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.9800000000000004e-05, | |
| "loss": 0.0562, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 4.984428128696318e-05, | |
| "loss": 0.0703, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 4.9376579221363204e-05, | |
| "loss": 0.0744, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.8602764529345966e-05, | |
| "loss": 0.0807, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.7534982401683416e-05, | |
| "loss": 0.082, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 4.618236648123212e-05, | |
| "loss": 0.072, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.4563749749985335e-05, | |
| "loss": 0.0638, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.269945823540843e-05, | |
| "loss": 0.0627, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.061290306367919e-05, | |
| "loss": 0.0638, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 3.8330286470885907e-05, | |
| "loss": 0.0652, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 3.58802727644352e-05, | |
| "loss": 0.0622, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 3.3298916668553426e-05, | |
| "loss": 0.0555, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.0608298703277996e-05, | |
| "loss": 0.0467, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 2.7847253668223704e-05, | |
| "loss": 0.0428, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 2.5050453809223793e-05, | |
| "loss": 0.0473, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 2.2253020368788445e-05, | |
| "loss": 0.0452, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 1.9495550817504742e-05, | |
| "loss": 0.0412, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 1.6801632122617095e-05, | |
| "loss": 0.0385, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 1.4210665687785734e-05, | |
| "loss": 0.0311, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 1.1755187970777065e-05, | |
| "loss": 0.0317, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 9.470426834280832e-06, | |
| "loss": 0.0309, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 7.3759257710121985e-06, | |
| "loss": 0.0296, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 5.5027417186915175e-06, | |
| "loss": 0.0261, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 3.877395681105919e-06, | |
| "loss": 0.0268, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 2.513790616397402e-06, | |
| "loss": 0.0261, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 1.4325595693915228e-06, | |
| "loss": 0.022, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 6.472802645326154e-07, | |
| "loss": 0.0205, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 1.6846304762653532e-07, | |
| "loss": 0.025, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 2.0364694921504168e-10, | |
| "loss": 0.0274, | |
| "step": 14500 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 14510, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 1.2300127834804224e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |