{ "best_global_step": 200, "best_metric": 0.7568597793579102, "best_model_checkpoint": "j05hr3d/SFT-Qwen2.5-Coder-3B_long_v1/checkpoint-200", "epoch": 3.0, "eval_steps": 20, "global_step": 216, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2807017543859649, "grad_norm": 0.47658494114875793, "learning_rate": 9.425837320574164e-05, "loss": 0.9906, "step": 20 }, { "epoch": 0.2807017543859649, "eval_loss": 0.9487136006355286, "eval_runtime": 14.7059, "eval_samples_per_second": 4.352, "eval_steps_per_second": 4.352, "step": 20 }, { "epoch": 0.5614035087719298, "grad_norm": 0.4057779908180237, "learning_rate": 8.468899521531101e-05, "loss": 0.8528, "step": 40 }, { "epoch": 0.5614035087719298, "eval_loss": 0.8620332479476929, "eval_runtime": 14.5037, "eval_samples_per_second": 4.413, "eval_steps_per_second": 4.413, "step": 40 }, { "epoch": 0.8421052631578947, "grad_norm": 0.5583078861236572, "learning_rate": 7.511961722488039e-05, "loss": 0.8721, "step": 60 }, { "epoch": 0.8421052631578947, "eval_loss": 0.8238226175308228, "eval_runtime": 14.7369, "eval_samples_per_second": 4.343, "eval_steps_per_second": 4.343, "step": 60 }, { "epoch": 1.1122807017543859, "grad_norm": 0.4012566804885864, "learning_rate": 6.555023923444976e-05, "loss": 0.8059, "step": 80 }, { "epoch": 1.1122807017543859, "eval_loss": 0.8017938137054443, "eval_runtime": 14.6894, "eval_samples_per_second": 4.357, "eval_steps_per_second": 4.357, "step": 80 }, { "epoch": 1.3929824561403508, "grad_norm": 0.7860776782035828, "learning_rate": 5.5980861244019145e-05, "loss": 0.8141, "step": 100 }, { "epoch": 1.3929824561403508, "eval_loss": 0.7868363857269287, "eval_runtime": 14.7, "eval_samples_per_second": 4.354, "eval_steps_per_second": 4.354, "step": 100 }, { "epoch": 1.6736842105263157, "grad_norm": 0.48762819170951843, "learning_rate": 4.641148325358852e-05, "loss": 0.7353, "step": 120 }, { "epoch": 1.6736842105263157, "eval_loss": 0.7766555547714233, "eval_runtime": 14.7914, "eval_samples_per_second": 4.327, "eval_steps_per_second": 4.327, "step": 120 }, { "epoch": 1.9543859649122806, "grad_norm": 0.5413577556610107, "learning_rate": 3.6842105263157895e-05, "loss": 0.6779, "step": 140 }, { "epoch": 1.9543859649122806, "eval_loss": 0.7647180557250977, "eval_runtime": 14.7626, "eval_samples_per_second": 4.335, "eval_steps_per_second": 4.335, "step": 140 }, { "epoch": 2.2245614035087717, "grad_norm": 0.8345220685005188, "learning_rate": 2.7272727272727273e-05, "loss": 0.6273, "step": 160 }, { "epoch": 2.2245614035087717, "eval_loss": 0.7629338502883911, "eval_runtime": 14.7511, "eval_samples_per_second": 4.339, "eval_steps_per_second": 4.339, "step": 160 }, { "epoch": 2.5052631578947366, "grad_norm": 0.5914349555969238, "learning_rate": 1.770334928229665e-05, "loss": 0.6983, "step": 180 }, { "epoch": 2.5052631578947366, "eval_loss": 0.7597119808197021, "eval_runtime": 14.7516, "eval_samples_per_second": 4.339, "eval_steps_per_second": 4.339, "step": 180 }, { "epoch": 2.7859649122807015, "grad_norm": 0.5911722183227539, "learning_rate": 8.133971291866028e-06, "loss": 0.6958, "step": 200 }, { "epoch": 2.7859649122807015, "eval_loss": 0.7568597793579102, "eval_runtime": 14.6958, "eval_samples_per_second": 4.355, "eval_steps_per_second": 4.355, "step": 200 }, { "epoch": 3.0, "step": 216, "total_flos": 2.684496203948851e+16, "train_loss": 0.7727184339805886, "train_runtime": 1493.2791, "train_samples_per_second": 1.145, "train_steps_per_second": 0.145 }, { "epoch": 3.0, "eval_loss": 0.7568597793579102, "eval_runtime": 14.794, "eval_samples_per_second": 4.326, "eval_steps_per_second": 4.326, "step": 216 } ], "logging_steps": 20, "max_steps": 216, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 20, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.684496203948851e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }