| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5, | |
| "eval_steps": 3, | |
| "global_step": 57, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008771929824561403, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.0802, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02631578947368421, | |
| "grad_norm": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.0494, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.05263157894736842, | |
| "grad_norm": 2.4723047601512564, | |
| "learning_rate": 6.000000000000001e-07, | |
| "loss": 2.0771, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.07894736842105263, | |
| "grad_norm": 2.106898891910819, | |
| "learning_rate": 1.8e-06, | |
| "loss": 2.0245, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 0.8386924455230856, | |
| "learning_rate": 2.9432692307692307e-06, | |
| "loss": 2.0568, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.13157894736842105, | |
| "grad_norm": 1.1653346026055935, | |
| "learning_rate": 2.7730769230769233e-06, | |
| "loss": 2.0326, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.15789473684210525, | |
| "grad_norm": 1.5574035133942843, | |
| "learning_rate": 2.6028846153846155e-06, | |
| "loss": 2.0396, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.18421052631578946, | |
| "grad_norm": 0.9621595083981531, | |
| "learning_rate": 2.4326923076923077e-06, | |
| "loss": 2.0336, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 0.9955527535333841, | |
| "learning_rate": 2.2625e-06, | |
| "loss": 1.9571, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.23684210526315788, | |
| "grad_norm": 0.7832834940985813, | |
| "learning_rate": 2.092307692307692e-06, | |
| "loss": 1.9792, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.2631578947368421, | |
| "grad_norm": 0.7516778470264893, | |
| "learning_rate": 1.9221153846153848e-06, | |
| "loss": 2.015, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2894736842105263, | |
| "grad_norm": 1.4307615046721256, | |
| "learning_rate": 1.7519230769230768e-06, | |
| "loss": 1.9845, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 0.7110172377205767, | |
| "learning_rate": 1.581730769230769e-06, | |
| "loss": 1.974, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.34210526315789475, | |
| "grad_norm": 0.9472807779995442, | |
| "learning_rate": 1.4115384615384616e-06, | |
| "loss": 1.9848, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.3684210526315789, | |
| "grad_norm": 0.6928271721519345, | |
| "learning_rate": 1.2413461538461538e-06, | |
| "loss": 1.9453, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.39473684210526316, | |
| "grad_norm": 0.9136097540650397, | |
| "learning_rate": 1.071153846153846e-06, | |
| "loss": 1.9987, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 0.6671789202988747, | |
| "learning_rate": 9.009615384615385e-07, | |
| "loss": 2.0054, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4473684210526316, | |
| "grad_norm": 1.2888605208856772, | |
| "learning_rate": 7.307692307692307e-07, | |
| "loss": 1.9706, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.47368421052631576, | |
| "grad_norm": 0.9597150120115726, | |
| "learning_rate": 6.740384615384617e-07, | |
| "loss": 1.9874, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.7428542672176522, | |
| "learning_rate": 5.038461538461539e-07, | |
| "loss": 2.0058, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "step": 57, | |
| "total_flos": 227154377834496.0, | |
| "train_loss": 2.006924344782244, | |
| "train_runtime": 17818.5287, | |
| "train_samples_per_second": 0.409, | |
| "train_steps_per_second": 0.003 | |
| } | |
| ], | |
| "logging_steps": 3, | |
| "max_steps": 57, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 12, | |
| "total_flos": 227154377834496.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |