| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.005340453938585, | |
| "global_step": 33000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.2135922330097088e-05, | |
| "loss": 2.9902, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_HasAns_exact": 58.41767881241565, | |
| "eval_HasAns_f1": 67.0415484792148, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 32.38015138772077, | |
| "eval_NoAns_f1": 32.38015138772077, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 50.11370336056599, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 51.236780700905136, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 45.380274572559586, | |
| "eval_f1": 49.686035491012206, | |
| "eval_total": 11873, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 2.4271844660194176e-05, | |
| "loss": 1.5894, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_HasAns_exact": 69.28137651821862, | |
| "eval_HasAns_f1": 77.89405016908765, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 46.913372582001685, | |
| "eval_NoAns_f1": 46.913372582001685, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 58.08978354249137, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 62.389954468319395, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 58.08136107133833, | |
| "eval_f1": 62.38153199716619, | |
| "eval_total": 11873, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 2.959094078454665e-05, | |
| "loss": 1.3761, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_HasAns_exact": 71.87921727395411, | |
| "eval_HasAns_f1": 80.58982724903632, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 51.42136248948697, | |
| "eval_NoAns_f1": 51.42136248948697, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 61.65248884022572, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 66.00155781456188, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 61.63564389791965, | |
| "eval_f1": 65.98471287225554, | |
| "eval_total": 11873, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 2.8816207421945613e-05, | |
| "loss": 1.249, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_HasAns_exact": 68.47165991902834, | |
| "eval_HasAns_f1": 75.13575702386423, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 74.3313708999159, | |
| "eval_NoAns_f1": 74.3313708999159, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 71.40571043544176, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 74.7329880937815, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 71.40571043544176, | |
| "eval_f1": 74.73298809378153, | |
| "eval_total": 11873, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.8041474059344576e-05, | |
| "loss": 1.1796, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_HasAns_exact": 67.62820512820512, | |
| "eval_HasAns_f1": 73.71252998774504, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 78.26745164003364, | |
| "eval_NoAns_f1": 78.26745164003364, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 72.95544512760044, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 75.99325172806816, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 72.95544512760044, | |
| "eval_f1": 75.99325172806819, | |
| "eval_total": 11873, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 2.726674069674354e-05, | |
| "loss": 1.1097, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_HasAns_exact": 73.80229419703105, | |
| "eval_HasAns_f1": 80.14574039018072, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 70.21026072329688, | |
| "eval_NoAns_f1": 70.21026072329688, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 72.01212835846037, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 75.17931011816671, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 72.00370588730733, | |
| "eval_f1": 75.17088764701359, | |
| "eval_total": 11873, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 2.64920073341425e-05, | |
| "loss": 1.0804, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_HasAns_exact": 75.60728744939271, | |
| "eval_HasAns_f1": 82.66989971492067, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 68.56181665264928, | |
| "eval_NoAns_f1": 68.56181665264928, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 72.0879305988377, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 75.61418053651563, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 72.07950812768466, | |
| "eval_f1": 75.60575806536264, | |
| "eval_total": 11873, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 2.5717273971541462e-05, | |
| "loss": 1.0272, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_HasAns_exact": 72.85762483130904, | |
| "eval_HasAns_f1": 79.23824809473913, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 76.01345668629101, | |
| "eval_NoAns_f1": 76.01345668629101, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 74.43780005053483, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 77.62354372994311, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 74.43780005053483, | |
| "eval_f1": 77.62354372994308, | |
| "eval_total": 11873, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.4942540608940426e-05, | |
| "loss": 0.8905, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_HasAns_exact": 76.417004048583, | |
| "eval_HasAns_f1": 83.82699462168138, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 66.27417998317914, | |
| "eval_NoAns_f1": 66.27417998317914, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 71.3467531373705, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 75.04644353721282, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 71.33833066621747, | |
| "eval_f1": 75.03802106605983, | |
| "eval_total": 11873, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 2.4167807246339385e-05, | |
| "loss": 0.8348, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_HasAns_exact": 74.00472334682861, | |
| "eval_HasAns_f1": 80.47538325931156, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 75.55929352396973, | |
| "eval_NoAns_f1": 75.55929352396973, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 74.78312136780931, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 78.01381891360221, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 74.78312136780931, | |
| "eval_f1": 78.01381891360226, | |
| "eval_total": 11873, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 2.3393073883738345e-05, | |
| "loss": 0.8345, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_HasAns_exact": 73.21187584345479, | |
| "eval_HasAns_f1": 79.61538989647931, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 77.91421362489487, | |
| "eval_NoAns_f1": 77.91421362489487, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 75.56641118504169, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 78.76358387150074, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 75.56641118504169, | |
| "eval_f1": 78.76358387150083, | |
| "eval_total": 11873, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 2.2618340521137308e-05, | |
| "loss": 0.8372, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_HasAns_exact": 74.12280701754386, | |
| "eval_HasAns_f1": 80.2215790589714, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 77.6955424726661, | |
| "eval_NoAns_f1": 77.6955424726661, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 75.91173250231618, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 78.95675235084504, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 75.91173250231618, | |
| "eval_f1": 78.95675235084498, | |
| "eval_total": 11873, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 2.184360715853627e-05, | |
| "loss": 0.8396, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_HasAns_exact": 77.74966261808368, | |
| "eval_HasAns_f1": 84.34526051529392, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 70.68124474348191, | |
| "eval_NoAns_f1": 70.68124474348191, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 74.21881580055589, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 77.51189289435379, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 74.21039332940285, | |
| "eval_f1": 77.50347042320072, | |
| "eval_total": 11873, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 2.1068873795935234e-05, | |
| "loss": 0.8164, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "eval_HasAns_exact": 75.65789473684211, | |
| "eval_HasAns_f1": 82.25660585284308, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 73.59125315391086, | |
| "eval_NoAns_f1": 73.59125315391086, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 74.62309441590162, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 77.9177258903103, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 74.62309441590162, | |
| "eval_f1": 77.91772589031022, | |
| "eval_total": 11873, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 2.0294140433334194e-05, | |
| "loss": 0.8052, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_HasAns_exact": 78.71120107962213, | |
| "eval_HasAns_f1": 85.01212257884671, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 68.78048780487805, | |
| "eval_NoAns_f1": 68.78048780487805, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 73.74715741598585, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 76.89310727258507, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 73.73873494483281, | |
| "eval_f1": 76.88468480143192, | |
| "eval_total": 11873, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 1.9519407070733157e-05, | |
| "loss": 0.8066, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_HasAns_exact": 74.69635627530364, | |
| "eval_HasAns_f1": 80.71107846446266, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 77.89739276703112, | |
| "eval_NoAns_f1": 77.89739276703112, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.29916617535585, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.30222126988423, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.29916617535585, | |
| "eval_f1": 79.30222126988414, | |
| "eval_total": 11873, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 1.874467370813212e-05, | |
| "loss": 0.7057, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_HasAns_exact": 77.47975708502024, | |
| "eval_HasAns_f1": 84.97990004810745, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 68.71320437342304, | |
| "eval_NoAns_f1": 68.71320437342304, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 73.09020466604902, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 76.8349067198839, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 73.09020466604902, | |
| "eval_f1": 76.83490671988388, | |
| "eval_total": 11873, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 1.796994034553108e-05, | |
| "loss": 0.6376, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_HasAns_exact": 74.25775978407557, | |
| "eval_HasAns_f1": 81.15876336598458, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 77.37594617325483, | |
| "eval_NoAns_f1": 77.37594617325483, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 75.81908531963278, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.26464661278166, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 75.81908531963278, | |
| "eval_f1": 79.26464661278159, | |
| "eval_total": 11873, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.719520698293004e-05, | |
| "loss": 0.6315, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_HasAns_exact": 73.3468286099865, | |
| "eval_HasAns_f1": 79.65372011100962, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 80.43734230445753, | |
| "eval_NoAns_f1": 80.43734230445753, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.89716162722142, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.04609221073564, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.89716162722142, | |
| "eval_f1": 80.0460922107357, | |
| "eval_total": 11873, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 1.6420473620329003e-05, | |
| "loss": 0.6323, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_HasAns_exact": 75.21929824561404, | |
| "eval_HasAns_f1": 81.79839130722176, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 76.85449957947856, | |
| "eval_NoAns_f1": 76.85449957947856, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.03806956961172, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.32290606158605, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.03806956961172, | |
| "eval_f1": 79.322906061586, | |
| "eval_total": 11873, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 1.5645740257727966e-05, | |
| "loss": 0.6125, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "eval_HasAns_exact": 75.82658569500674, | |
| "eval_HasAns_f1": 82.54925380201965, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 76.36669470142978, | |
| "eval_NoAns_f1": 76.36669470142978, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.09702686768298, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.45354809554222, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.09702686768298, | |
| "eval_f1": 79.45354809554216, | |
| "eval_total": 11873, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 1.4871006895126928e-05, | |
| "loss": 0.6095, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_HasAns_exact": 77.36167341430499, | |
| "eval_HasAns_f1": 84.06251850374095, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 73.92767031118586, | |
| "eval_NoAns_f1": 73.92767031118586, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 75.65063589657206, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 78.99626123896034, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 75.64221342541902, | |
| "eval_f1": 78.98783876780718, | |
| "eval_total": 11873, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 1.4096273532525889e-05, | |
| "loss": 0.5988, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_HasAns_exact": 77.17611336032388, | |
| "eval_HasAns_f1": 83.48725009021588, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 74.95374264087468, | |
| "eval_NoAns_f1": 74.95374264087468, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.07175945422387, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.22280961297066, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.06333698307083, | |
| "eval_f1": 79.21438714181748, | |
| "eval_total": 11873, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 1.3321540169924852e-05, | |
| "loss": 0.5968, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "eval_HasAns_exact": 74.13967611336032, | |
| "eval_HasAns_f1": 80.3794525038334, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 81.22792262405383, | |
| "eval_NoAns_f1": 81.22792262405383, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 77.68887391560683, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.80429499222815, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 77.68887391560683, | |
| "eval_f1": 80.80429499222812, | |
| "eval_total": 11873, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 1.2546806807323812e-05, | |
| "loss": 0.5722, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "eval_HasAns_exact": 75.2867746288799, | |
| "eval_HasAns_f1": 81.508728103177, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 77.30866274179984, | |
| "eval_NoAns_f1": 77.30866274179984, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.29916617535585, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.40568855349397, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.29916617535585, | |
| "eval_f1": 79.4056885534939, | |
| "eval_total": 11873, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 1.1772073444722775e-05, | |
| "loss": 0.4844, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "eval_HasAns_exact": 76.78812415654521, | |
| "eval_HasAns_f1": 84.07498836042032, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 73.10344827586206, | |
| "eval_NoAns_f1": 73.10344827586206, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 74.95157079087004, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 78.58978615350551, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 74.94314831971701, | |
| "eval_f1": 78.58136368235249, | |
| "eval_total": 11873, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 1.0997340082121736e-05, | |
| "loss": 0.4664, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "eval_HasAns_exact": 76.40013495276654, | |
| "eval_HasAns_f1": 83.06591024545988, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 78.04878048780488, | |
| "eval_NoAns_f1": 78.04878048780488, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 77.23406047334288, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.56217602417964, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 77.22563800218984, | |
| "eval_f1": 80.55375355302654, | |
| "eval_total": 11873, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.02226067195207e-05, | |
| "loss": 0.4914, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_HasAns_exact": 76.38326585695006, | |
| "eval_HasAns_f1": 83.19767154572087, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 76.01345668629101, | |
| "eval_NoAns_f1": 76.01345668629101, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.20651899267246, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.60884333555407, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.19809652151942, | |
| "eval_f1": 79.60042086440092, | |
| "eval_total": 11873, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 9.44787335691966e-06, | |
| "loss": 0.4696, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "eval_HasAns_exact": 74.91565452091768, | |
| "eval_HasAns_f1": 81.5931788001518, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 79.37762825904122, | |
| "eval_NoAns_f1": 79.37762825904122, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 77.14983576181251, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.48381739470226, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 77.14983576181251, | |
| "eval_f1": 80.48381739470223, | |
| "eval_total": 11873, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 8.673139994318622e-06, | |
| "loss": 0.483, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "eval_HasAns_exact": 76.6025641025641, | |
| "eval_HasAns_f1": 83.549417980174, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 76.13120269133726, | |
| "eval_NoAns_f1": 76.13120269133726, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.36654594458014, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 79.83499956089204, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.36654594458014, | |
| "eval_f1": 79.83499956089204, | |
| "eval_total": 11873, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 7.898406631717584e-06, | |
| "loss": 0.4602, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "eval_HasAns_exact": 73.61673414304994, | |
| "eval_HasAns_f1": 79.59590506841275, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 81.69890664423886, | |
| "eval_NoAns_f1": 81.69890664423886, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 77.66360650214773, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.64891141628488, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 77.66360650214773, | |
| "eval_f1": 80.64891141628489, | |
| "eval_total": 11873, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 7.123673269116546e-06, | |
| "loss": 0.4706, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "eval_HasAns_exact": 76.1302294197031, | |
| "eval_HasAns_f1": 83.02595617673859, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 77.14045416316232, | |
| "eval_NoAns_f1": 77.14045416316232, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 76.63606502147731, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.07899167992126, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 76.63606502147731, | |
| "eval_f1": 80.07899167992143, | |
| "eval_total": 11873, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "learning_rate": 6.348939906515508e-06, | |
| "loss": 0.4633, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.01, | |
| "eval_HasAns_exact": 75.38798920377867, | |
| "eval_HasAns_f1": 82.20772908314846, | |
| "eval_HasAns_total": 5928, | |
| "eval_NoAns_exact": 79.42809083263246, | |
| "eval_NoAns_f1": 79.42809083263246, | |
| "eval_NoAns_total": 5945, | |
| "eval_best_exact": 77.41935483870968, | |
| "eval_best_exact_thresh": 0.0, | |
| "eval_best_f1": 80.82434245809002, | |
| "eval_best_f1_thresh": 0.0, | |
| "eval_exact": 77.41093236755664, | |
| "eval_f1": 80.81591998693705, | |
| "eval_total": 11873, | |
| "step": 33000 | |
| } | |
| ], | |
| "max_steps": 41195, | |
| "num_train_epochs": 5, | |
| "total_flos": 1.2072877510811443e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |