| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.5, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 6.0, | |
| "learning_rate": 3.8000000000000005e-06, | |
| "loss": 1.4753, | |
| "mean_token_accuracy": 0.6514762923121452, | |
| "num_tokens": 9165.0, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 7.800000000000002e-06, | |
| "loss": 1.3604, | |
| "mean_token_accuracy": 0.6717274159193038, | |
| "num_tokens": 19130.0, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 1.18e-05, | |
| "loss": 1.2917, | |
| "mean_token_accuracy": 0.691003431379795, | |
| "num_tokens": 30279.0, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 30.625, | |
| "learning_rate": 1.58e-05, | |
| "loss": 0.9447, | |
| "mean_token_accuracy": 0.7457822680473327, | |
| "num_tokens": 42533.0, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 9.875, | |
| "learning_rate": 1.98e-05, | |
| "loss": 1.3137, | |
| "mean_token_accuracy": 0.6690018624067307, | |
| "num_tokens": 48242.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 18.0, | |
| "learning_rate": 1.988886498744505e-05, | |
| "loss": 1.0214, | |
| "mean_token_accuracy": 0.7285905957221985, | |
| "num_tokens": 59331.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 3.515625, | |
| "learning_rate": 1.953454172319001e-05, | |
| "loss": 1.0517, | |
| "mean_token_accuracy": 0.7213401407003402, | |
| "num_tokens": 68979.0, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 1.894544639838025e-05, | |
| "loss": 1.1587, | |
| "mean_token_accuracy": 0.7021817207336426, | |
| "num_tokens": 76070.0, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 7.0625, | |
| "learning_rate": 1.8136084495007874e-05, | |
| "loss": 1.0255, | |
| "mean_token_accuracy": 0.7250557497143746, | |
| "num_tokens": 85738.0, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 18.875, | |
| "learning_rate": 1.7126385189252055e-05, | |
| "loss": 1.1949, | |
| "mean_token_accuracy": 0.6837209314107895, | |
| "num_tokens": 94928.0, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 7.5, | |
| "learning_rate": 1.594121062902039e-05, | |
| "loss": 1.0857, | |
| "mean_token_accuracy": 0.7113691180944443, | |
| "num_tokens": 104773.0, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 6.625, | |
| "learning_rate": 1.4609743745354625e-05, | |
| "loss": 0.9951, | |
| "mean_token_accuracy": 0.7261969804763794, | |
| "num_tokens": 114064.0, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 19.125, | |
| "learning_rate": 1.3164769671815862e-05, | |
| "loss": 1.0099, | |
| "mean_token_accuracy": 0.7223263427615165, | |
| "num_tokens": 123805.0, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 5.03125, | |
| "learning_rate": 1.164186846568863e-05, | |
| "loss": 1.0713, | |
| "mean_token_accuracy": 0.7073337942361831, | |
| "num_tokens": 134324.0, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 11.5, | |
| "learning_rate": 1.0078539008887114e-05, | |
| "loss": 0.9601, | |
| "mean_token_accuracy": 0.7360554546117782, | |
| "num_tokens": 143153.0, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 7.9375, | |
| "learning_rate": 8.51327566103077e-06, | |
| "loss": 0.9354, | |
| "mean_token_accuracy": 0.7383034735918045, | |
| "num_tokens": 154603.0, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 6.875, | |
| "learning_rate": 6.984620400555044e-06, | |
| "loss": 0.9676, | |
| "mean_token_accuracy": 0.7273150086402893, | |
| "num_tokens": 167175.0, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 17.625, | |
| "learning_rate": 5.53021379328879e-06, | |
| "loss": 1.085, | |
| "mean_token_accuracy": 0.715969854593277, | |
| "num_tokens": 176751.0, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 6.0625, | |
| "learning_rate": 4.185868156801695e-06, | |
| "loss": 1.0635, | |
| "mean_token_accuracy": 0.7170247107744216, | |
| "num_tokens": 185233.0, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 7.25, | |
| "learning_rate": 2.9846857422914434e-06, | |
| "loss": 1.0838, | |
| "mean_token_accuracy": 0.7044813245534897, | |
| "num_tokens": 193072.0, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 6.21875, | |
| "learning_rate": 1.956243647299155e-06, | |
| "loss": 1.006, | |
| "mean_token_accuracy": 0.7172566086053849, | |
| "num_tokens": 204372.0, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 11.8125, | |
| "learning_rate": 1.1258655294071686e-06, | |
| "loss": 0.9791, | |
| "mean_token_accuracy": 0.727970740199089, | |
| "num_tokens": 216681.0, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 5.5, | |
| "learning_rate": 5.13998053744954e-07, | |
| "loss": 1.0866, | |
| "mean_token_accuracy": 0.7147636383771896, | |
| "num_tokens": 226708.0, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 11.5, | |
| "learning_rate": 1.357074282350457e-07, | |
| "loss": 1.01, | |
| "mean_token_accuracy": 0.7146593451499939, | |
| "num_tokens": 237866.0, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 6.90625, | |
| "learning_rate": 3.0842355210336515e-10, | |
| "loss": 1.1541, | |
| "mean_token_accuracy": 0.68363136947155, | |
| "num_tokens": 245487.0, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0414470479035392e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |