| { | |
| "best_metric": 0.052040886133909225, | |
| "best_model_checkpoint": "ckpt/checkpoint-200", | |
| "epoch": 2.824858757062147, | |
| "eval_steps": 200, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2824858757062147, | |
| "grad_norm": 0.08298324048519135, | |
| "learning_rate": 0.00029152542372881354, | |
| "loss": 0.1901, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5649717514124294, | |
| "grad_norm": 0.11693409830331802, | |
| "learning_rate": 0.0002830508474576271, | |
| "loss": 0.0624, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5649717514124294, | |
| "eval_accuracy": 0.9860834990059643, | |
| "eval_f1": 0.9891354958870091, | |
| "eval_loss": 0.052040886133909225, | |
| "eval_precision": 0.9869908626297041, | |
| "eval_recall": 0.9912894695909161, | |
| "eval_runtime": 12.1769, | |
| "eval_samples_per_second": 826.154, | |
| "eval_steps_per_second": 6.488, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.847457627118644, | |
| "grad_norm": 0.10722041875123978, | |
| "learning_rate": 0.00027457627118644066, | |
| "loss": 0.0562, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.1299435028248588, | |
| "grad_norm": 0.08397164940834045, | |
| "learning_rate": 0.0002661016949152542, | |
| "loss": 0.0524, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.1299435028248588, | |
| "eval_accuracy": 0.9874751491053678, | |
| "eval_f1": 0.9902219462983083, | |
| "eval_loss": 0.04475295916199684, | |
| "eval_precision": 0.9880749574105622, | |
| "eval_recall": 0.9923782858920517, | |
| "eval_runtime": 11.6899, | |
| "eval_samples_per_second": 860.571, | |
| "eval_steps_per_second": 6.758, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.4124293785310735, | |
| "grad_norm": 0.16527259349822998, | |
| "learning_rate": 0.0002576271186440678, | |
| "loss": 0.0492, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "grad_norm": 0.07748957723379135, | |
| "learning_rate": 0.00024915254237288135, | |
| "loss": 0.0518, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.694915254237288, | |
| "eval_accuracy": 0.9872763419483102, | |
| "eval_f1": 0.9900482040118178, | |
| "eval_loss": 0.04457252100110054, | |
| "eval_precision": 0.9897404010570496, | |
| "eval_recall": 0.9903561984756571, | |
| "eval_runtime": 12.8001, | |
| "eval_samples_per_second": 785.934, | |
| "eval_steps_per_second": 6.172, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.9774011299435028, | |
| "grad_norm": 0.07877205312252045, | |
| "learning_rate": 0.0002406779661016949, | |
| "loss": 0.0489, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.2598870056497176, | |
| "grad_norm": 0.09426571428775787, | |
| "learning_rate": 0.00023220338983050845, | |
| "loss": 0.0477, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.2598870056497176, | |
| "eval_accuracy": 0.9879721669980119, | |
| "eval_f1": 0.9906339499961297, | |
| "eval_loss": 0.04242447018623352, | |
| "eval_precision": 0.9859784283513097, | |
| "eval_recall": 0.9953336444237051, | |
| "eval_runtime": 13.2576, | |
| "eval_samples_per_second": 758.811, | |
| "eval_steps_per_second": 5.959, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.542372881355932, | |
| "grad_norm": 0.09674050658941269, | |
| "learning_rate": 0.000223728813559322, | |
| "loss": 0.0463, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.824858757062147, | |
| "grad_norm": 0.08737868070602417, | |
| "learning_rate": 0.0002152542372881356, | |
| "loss": 0.0465, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.824858757062147, | |
| "eval_accuracy": 0.9887673956262425, | |
| "eval_f1": 0.9912369135323769, | |
| "eval_loss": 0.04115341976284981, | |
| "eval_precision": 0.9884008660686668, | |
| "eval_recall": 0.9940892829366931, | |
| "eval_runtime": 11.8282, | |
| "eval_samples_per_second": 850.507, | |
| "eval_steps_per_second": 6.679, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3540, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 8484875417960448.0, | |
| "train_batch_size": 256, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |