| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2421, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 61.79818261705088, | |
| "learning_rate": 2.05761316872428e-09, | |
| "logits/chosen": -3.5, | |
| "logits/rejected": -1.4140625, | |
| "logps/chosen": -262.0, | |
| "logps/rejected": -788.0, | |
| "loss": 0.6914, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 71.93619140446947, | |
| "learning_rate": 2.0576131687242796e-08, | |
| "logits/chosen": -1.9140625, | |
| "logits/rejected": -3.0625, | |
| "logps/chosen": -648.0, | |
| "logps/rejected": -760.0, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -0.033447265625, | |
| "rewards/margins": -0.033447265625, | |
| "rewards/rejected": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 95.51795034006538, | |
| "learning_rate": 4.115226337448559e-08, | |
| "logits/chosen": -1.828125, | |
| "logits/rejected": -2.671875, | |
| "logps/chosen": -484.0, | |
| "logps/rejected": -352.0, | |
| "loss": 0.69, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": 0.02001953125, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.02001953125, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 83.15780879075649, | |
| "learning_rate": 6.172839506172839e-08, | |
| "logits/chosen": -1.8671875, | |
| "logits/rejected": -1.75, | |
| "logps/chosen": -404.0, | |
| "logps/rejected": -540.0, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -0.0150146484375, | |
| "rewards/margins": -0.02001953125, | |
| "rewards/rejected": 0.0050048828125, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 78.74493611311031, | |
| "learning_rate": 8.230452674897118e-08, | |
| "logits/chosen": -2.015625, | |
| "logits/rejected": -2.265625, | |
| "logps/chosen": -448.0, | |
| "logps/rejected": -600.0, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0250244140625, | |
| "rewards/margins": 0.06005859375, | |
| "rewards/rejected": -0.03515625, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 84.22871977974292, | |
| "learning_rate": 1.02880658436214e-07, | |
| "logits/chosen": -1.6640625, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -592.0, | |
| "logps/rejected": -580.0, | |
| "loss": 0.6922, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -0.0400390625, | |
| "rewards/margins": -0.030029296875, | |
| "rewards/rejected": -0.010009765625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 57.8386192430028, | |
| "learning_rate": 1.2345679012345677e-07, | |
| "logits/chosen": -2.03125, | |
| "logits/rejected": -2.9375, | |
| "logps/chosen": -362.0, | |
| "logps/rejected": -368.0, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.10000000149011612, | |
| "rewards/chosen": -0.0050048828125, | |
| "rewards/margins": -0.044921875, | |
| "rewards/rejected": 0.0400390625, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 108.85645337456651, | |
| "learning_rate": 1.4403292181069958e-07, | |
| "logits/chosen": -2.140625, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -460.0, | |
| "logps/rejected": -468.0, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.05517578125, | |
| "rewards/margins": 0.0751953125, | |
| "rewards/rejected": -0.02001953125, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 70.2187244330304, | |
| "learning_rate": 1.6460905349794237e-07, | |
| "logits/chosen": -1.8984375, | |
| "logits/rejected": -2.28125, | |
| "logps/chosen": -516.0, | |
| "logps/rejected": -506.0, | |
| "loss": 0.687, | |
| "rewards/accuracies": 0.20000000298023224, | |
| "rewards/chosen": -0.050048828125, | |
| "rewards/margins": -0.03515625, | |
| "rewards/rejected": -0.0150146484375, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 54.744834672425526, | |
| "learning_rate": 1.8518518518518516e-07, | |
| "logits/chosen": -1.7578125, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -532.0, | |
| "logps/rejected": -660.0, | |
| "loss": 0.677, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": 0.050048828125, | |
| "rewards/margins": 0.06494140625, | |
| "rewards/rejected": -0.0150146484375, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 98.04764626666417, | |
| "learning_rate": 2.05761316872428e-07, | |
| "logits/chosen": -1.6015625, | |
| "logits/rejected": -2.234375, | |
| "logps/chosen": -560.0, | |
| "logps/rejected": -660.0, | |
| "loss": 0.6657, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": 0.03515625, | |
| "rewards/margins": -0.0301513671875, | |
| "rewards/rejected": 0.06494140625, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 78.67800061099335, | |
| "learning_rate": 2.2633744855967078e-07, | |
| "logits/chosen": -2.03125, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -484.0, | |
| "logps/rejected": -428.0, | |
| "loss": 0.6618, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.054931640625, | |
| "rewards/margins": 0.06982421875, | |
| "rewards/rejected": -0.01507568359375, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 98.90783538853094, | |
| "learning_rate": 2.4691358024691354e-07, | |
| "logits/chosen": -1.8984375, | |
| "logits/rejected": -1.71875, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -752.0, | |
| "loss": 0.6576, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.25, | |
| "rewards/margins": 0.205078125, | |
| "rewards/rejected": 0.045166015625, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 183.377786164052, | |
| "learning_rate": 2.6748971193415635e-07, | |
| "logits/chosen": -2.140625, | |
| "logits/rejected": -2.78125, | |
| "logps/chosen": -408.0, | |
| "logps/rejected": -358.0, | |
| "loss": 0.6568, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.09521484375, | |
| "rewards/margins": 0.02001953125, | |
| "rewards/rejected": -0.115234375, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 75.16806808811697, | |
| "learning_rate": 2.8806584362139917e-07, | |
| "logits/chosen": -1.359375, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -556.0, | |
| "loss": 0.6513, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.0703125, | |
| "rewards/margins": 0.0400390625, | |
| "rewards/rejected": -0.1103515625, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 59.26706314390388, | |
| "learning_rate": 3.086419753086419e-07, | |
| "logits/chosen": -2.390625, | |
| "logits/rejected": -2.1875, | |
| "logps/chosen": -356.0, | |
| "logps/rejected": -382.0, | |
| "loss": 0.6289, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.125, | |
| "rewards/margins": 0.1357421875, | |
| "rewards/rejected": -0.010009765625, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 103.75204061254972, | |
| "learning_rate": 3.2921810699588474e-07, | |
| "logits/chosen": -2.46875, | |
| "logits/rejected": -1.6171875, | |
| "logps/chosen": -398.0, | |
| "logps/rejected": -486.0, | |
| "loss": 0.6381, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0703125, | |
| "rewards/margins": 0.08056640625, | |
| "rewards/rejected": -0.150390625, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 113.86192442502461, | |
| "learning_rate": 3.4979423868312755e-07, | |
| "logits/chosen": -2.3125, | |
| "logits/rejected": -1.9140625, | |
| "logps/chosen": -450.0, | |
| "logps/rejected": -464.0, | |
| "loss": 0.6266, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.205078125, | |
| "rewards/margins": 0.1455078125, | |
| "rewards/rejected": -0.3515625, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 63.20529708150695, | |
| "learning_rate": 3.703703703703703e-07, | |
| "logits/chosen": -1.8046875, | |
| "logits/rejected": -2.8125, | |
| "logps/chosen": -744.0, | |
| "logps/rejected": -584.0, | |
| "loss": 0.6384, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.0849609375, | |
| "rewards/margins": 0.1806640625, | |
| "rewards/rejected": -0.265625, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 83.60084870315862, | |
| "learning_rate": 3.909465020576131e-07, | |
| "logits/chosen": -1.7578125, | |
| "logits/rejected": -1.828125, | |
| "logps/chosen": -716.0, | |
| "logps/rejected": -848.0, | |
| "loss": 0.5983, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.1201171875, | |
| "rewards/margins": 0.400390625, | |
| "rewards/rejected": -0.51953125, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 62.24290659916015, | |
| "learning_rate": 4.11522633744856e-07, | |
| "logits/chosen": -2.125, | |
| "logits/rejected": -2.28125, | |
| "logps/chosen": -536.0, | |
| "logps/rejected": -576.0, | |
| "loss": 0.5939, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.23046875, | |
| "rewards/margins": 0.49609375, | |
| "rewards/rejected": -0.265625, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 54.79737482658103, | |
| "learning_rate": 4.320987654320987e-07, | |
| "logits/chosen": -1.9765625, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -494.0, | |
| "logps/rejected": -648.0, | |
| "loss": 0.6176, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.1103515625, | |
| "rewards/margins": 0.56640625, | |
| "rewards/rejected": -0.455078125, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 75.36398911732266, | |
| "learning_rate": 4.5267489711934156e-07, | |
| "logits/chosen": -2.046875, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -512.0, | |
| "logps/rejected": -390.0, | |
| "loss": 0.6214, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.1806640625, | |
| "rewards/margins": -0.0103759765625, | |
| "rewards/rejected": -0.169921875, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 65.42502577887264, | |
| "learning_rate": 4.732510288065844e-07, | |
| "logits/chosen": -2.40625, | |
| "logits/rejected": -2.3125, | |
| "logps/chosen": -652.0, | |
| "logps/rejected": -504.0, | |
| "loss": 0.5634, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.134765625, | |
| "rewards/margins": 0.359375, | |
| "rewards/rejected": -0.49609375, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 101.59685524071844, | |
| "learning_rate": 4.938271604938271e-07, | |
| "logits/chosen": -1.6953125, | |
| "logits/rejected": -1.6640625, | |
| "logps/chosen": -704.0, | |
| "logps/rejected": -548.0, | |
| "loss": 0.5768, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.37109375, | |
| "rewards/margins": 0.1103515625, | |
| "rewards/rejected": -0.48046875, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 56.985215974106104, | |
| "learning_rate": 4.999872565682321e-07, | |
| "logits/chosen": -1.4921875, | |
| "logits/rejected": -2.109375, | |
| "logps/chosen": -732.0, | |
| "logps/rejected": -496.0, | |
| "loss": 0.5665, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.205078125, | |
| "rewards/margins": 0.6328125, | |
| "rewards/rejected": -0.42578125, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 60.61672615933723, | |
| "learning_rate": 4.999248428870611e-07, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -3.453125, | |
| "logps/chosen": -588.0, | |
| "logps/rejected": -430.0, | |
| "loss": 0.5575, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.1396484375, | |
| "rewards/margins": 0.37109375, | |
| "rewards/rejected": -0.51171875, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 171.89078996410038, | |
| "learning_rate": 4.99810431295357e-07, | |
| "logits/chosen": -1.640625, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -688.0, | |
| "logps/rejected": -488.0, | |
| "loss": 0.5472, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.828125, | |
| "rewards/margins": 0.005462646484375, | |
| "rewards/rejected": -0.8359375, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 132.22262423790198, | |
| "learning_rate": 4.99644045596931e-07, | |
| "logits/chosen": -1.9375, | |
| "logits/rejected": -2.875, | |
| "logps/chosen": -462.0, | |
| "logps/rejected": -356.0, | |
| "loss": 0.5785, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2451171875, | |
| "rewards/margins": 0.474609375, | |
| "rewards/rejected": -0.71875, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 123.26679640601527, | |
| "learning_rate": 4.994257204090243e-07, | |
| "logits/chosen": -1.609375, | |
| "logits/rejected": -3.0, | |
| "logps/chosen": -580.0, | |
| "logps/rejected": -476.0, | |
| "loss": 0.5198, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.42578125, | |
| "rewards/margins": 0.5703125, | |
| "rewards/rejected": -0.99609375, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 47.55857001285771, | |
| "learning_rate": 4.991555011551073e-07, | |
| "logits/chosen": -1.53125, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -624.0, | |
| "logps/rejected": -620.0, | |
| "loss": 0.5123, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.07470703125, | |
| "rewards/margins": 0.88671875, | |
| "rewards/rejected": -0.9609375, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 57.83726580735741, | |
| "learning_rate": 4.988334440554274e-07, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -2.0, | |
| "logps/chosen": -800.0, | |
| "logps/rejected": -632.0, | |
| "loss": 0.5449, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.10498046875, | |
| "rewards/margins": 0.5234375, | |
| "rewards/rejected": -0.6328125, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 63.9187247476568, | |
| "learning_rate": 4.984596161153135e-07, | |
| "logits/chosen": -1.96875, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -612.0, | |
| "logps/rejected": -664.0, | |
| "loss": 0.5372, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.8515625, | |
| "rewards/margins": 0.33984375, | |
| "rewards/rejected": -1.1875, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 59.66658958585498, | |
| "learning_rate": 4.980340951112345e-07, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -2.4375, | |
| "logps/chosen": -652.0, | |
| "logps/rejected": -612.0, | |
| "loss": 0.5528, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.65625, | |
| "rewards/margins": 0.58984375, | |
| "rewards/rejected": -1.25, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 51.91101244482548, | |
| "learning_rate": 4.975569695746179e-07, | |
| "logits/chosen": -1.4140625, | |
| "logits/rejected": -2.703125, | |
| "logps/chosen": -716.0, | |
| "logps/rejected": -496.0, | |
| "loss": 0.5169, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.259765625, | |
| "rewards/margins": 0.4140625, | |
| "rewards/rejected": -0.671875, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 72.46042088468953, | |
| "learning_rate": 4.970283387734303e-07, | |
| "logits/chosen": -2.34375, | |
| "logits/rejected": -1.96875, | |
| "logps/chosen": -552.0, | |
| "logps/rejected": -494.0, | |
| "loss": 0.5549, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.546875, | |
| "rewards/margins": 0.1943359375, | |
| "rewards/rejected": -0.7421875, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 98.36392274253446, | |
| "learning_rate": 4.964483126915245e-07, | |
| "logits/chosen": -1.9375, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -552.0, | |
| "logps/rejected": -524.0, | |
| "loss": 0.5237, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.375, | |
| "rewards/margins": 0.62109375, | |
| "rewards/rejected": -0.99609375, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 35.92451209539994, | |
| "learning_rate": 4.958170120057565e-07, | |
| "logits/chosen": -1.875, | |
| "logits/rejected": -3.46875, | |
| "logps/chosen": -438.0, | |
| "logps/rejected": -400.0, | |
| "loss": 0.514, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.39453125, | |
| "rewards/margins": 0.75390625, | |
| "rewards/rejected": -1.1484375, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 43.212042789239504, | |
| "learning_rate": 4.951345680608787e-07, | |
| "logits/chosen": -1.3984375, | |
| "logits/rejected": -2.65625, | |
| "logps/chosen": -660.0, | |
| "logps/rejected": -616.0, | |
| "loss": 0.5045, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.140625, | |
| "rewards/margins": 0.201171875, | |
| "rewards/rejected": -1.34375, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 55.92901896058768, | |
| "learning_rate": 4.944011228422125e-07, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -536.0, | |
| "logps/rejected": -508.0, | |
| "loss": 0.5521, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.6953125, | |
| "rewards/margins": 0.427734375, | |
| "rewards/rejected": -1.125, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 48.27467634078171, | |
| "learning_rate": 4.936168289461084e-07, | |
| "logits/chosen": -2.25, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -390.0, | |
| "logps/rejected": -452.0, | |
| "loss": 0.5116, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.76953125, | |
| "rewards/margins": 0.2353515625, | |
| "rewards/rejected": -1.0078125, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 43.39520482895247, | |
| "learning_rate": 4.92781849548197e-07, | |
| "logits/chosen": -1.6640625, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -580.0, | |
| "logps/rejected": -548.0, | |
| "loss": 0.5259, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.0234375, | |
| "rewards/margins": 0.5859375, | |
| "rewards/rejected": -1.609375, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 53.58876818695228, | |
| "learning_rate": 4.918963583694396e-07, | |
| "logits/chosen": -1.765625, | |
| "logits/rejected": -1.5234375, | |
| "logps/chosen": -548.0, | |
| "logps/rejected": -596.0, | |
| "loss": 0.516, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.93359375, | |
| "rewards/margins": 0.59765625, | |
| "rewards/rejected": -1.53125, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 195.21265227218643, | |
| "learning_rate": 4.909605396399855e-07, | |
| "logits/chosen": -1.9375, | |
| "logits/rejected": -1.625, | |
| "logps/chosen": -414.0, | |
| "logps/rejected": -756.0, | |
| "loss": 0.5513, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.64453125, | |
| "rewards/margins": 1.6640625, | |
| "rewards/rejected": -2.3125, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 46.48860995329084, | |
| "learning_rate": 4.899745880608417e-07, | |
| "logits/chosen": -1.5, | |
| "logits/rejected": -1.921875, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -664.0, | |
| "loss": 0.7219, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.921875, | |
| "rewards/margins": 0.462890625, | |
| "rewards/rejected": -1.3828125, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 42.31847046504421, | |
| "learning_rate": 4.889387087633647e-07, | |
| "logits/chosen": -1.4296875, | |
| "logits/rejected": -2.84375, | |
| "logps/chosen": -612.0, | |
| "logps/rejected": -600.0, | |
| "loss": 0.5171, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.69921875, | |
| "rewards/margins": 0.6328125, | |
| "rewards/rejected": -1.328125, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 43.44689835885094, | |
| "learning_rate": 4.878531172665815e-07, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -2.75, | |
| "logps/chosen": -608.0, | |
| "logps/rejected": -544.0, | |
| "loss": 0.4904, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.169921875, | |
| "rewards/margins": 0.9453125, | |
| "rewards/rejected": -1.1171875, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 47.14528734842805, | |
| "learning_rate": 4.867180394323509e-07, | |
| "logits/chosen": -1.5546875, | |
| "logits/rejected": -2.1875, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -732.0, | |
| "loss": 0.5632, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.0, | |
| "rewards/margins": 1.3359375, | |
| "rewards/rejected": -2.328125, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 43.11262095050651, | |
| "learning_rate": 4.855337114183711e-07, | |
| "logits/chosen": -1.421875, | |
| "logits/rejected": -1.5859375, | |
| "logps/chosen": -644.0, | |
| "logps/rejected": -692.0, | |
| "loss": 0.4951, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.330078125, | |
| "rewards/margins": 1.53125, | |
| "rewards/rejected": -1.859375, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 45.38075851262632, | |
| "learning_rate": 4.843003796290469e-07, | |
| "logits/chosen": -1.8046875, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -772.0, | |
| "logps/rejected": -576.0, | |
| "loss": 0.5214, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.375, | |
| "rewards/margins": 0.71484375, | |
| "rewards/rejected": -1.09375, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 50.08648685280834, | |
| "learning_rate": 4.830183006642236e-07, | |
| "logits/chosen": -2.40625, | |
| "logits/rejected": -1.9765625, | |
| "logps/chosen": -444.0, | |
| "logps/rejected": -536.0, | |
| "loss": 0.5388, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.83203125, | |
| "rewards/margins": 0.34765625, | |
| "rewards/rejected": -1.1796875, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_logits/chosen": -1.0859375, | |
| "eval_logits/rejected": -1.0078125, | |
| "eval_logps/chosen": -540.0, | |
| "eval_logps/rejected": -620.0, | |
| "eval_loss": 0.49058592319488525, | |
| "eval_rewards/accuracies": 0.8055555820465088, | |
| "eval_rewards/chosen": -0.7578125, | |
| "eval_rewards/margins": 0.86328125, | |
| "eval_rewards/rejected": -1.625, | |
| "eval_runtime": 50.5034, | |
| "eval_samples_per_second": 20.791, | |
| "eval_steps_per_second": 0.178, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 45.830329324613906, | |
| "learning_rate": 4.816877412658007e-07, | |
| "logits/chosen": -1.5546875, | |
| "logits/rejected": -1.84375, | |
| "logps/chosen": -772.0, | |
| "logps/rejected": -616.0, | |
| "loss": 0.4923, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.6171875, | |
| "rewards/margins": 0.921875, | |
| "rewards/rejected": -1.5390625, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 53.190960939965166, | |
| "learning_rate": 4.80308978262235e-07, | |
| "logits/chosen": -1.515625, | |
| "logits/rejected": -2.609375, | |
| "logps/chosen": -548.0, | |
| "logps/rejected": -502.0, | |
| "loss": 0.4409, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.88671875, | |
| "rewards/margins": 0.84375, | |
| "rewards/rejected": -1.7265625, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 45.04181332411229, | |
| "learning_rate": 4.788822985109449e-07, | |
| "logits/chosen": -1.875, | |
| "logits/rejected": -1.9140625, | |
| "logps/chosen": -432.0, | |
| "logps/rejected": -448.0, | |
| "loss": 0.5368, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.78515625, | |
| "rewards/margins": 0.50390625, | |
| "rewards/rejected": -1.2890625, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 40.85074772821745, | |
| "learning_rate": 4.774079988386296e-07, | |
| "logits/chosen": -1.9765625, | |
| "logits/rejected": -1.640625, | |
| "logps/chosen": -436.0, | |
| "logps/rejected": -464.0, | |
| "loss": 0.5083, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.94140625, | |
| "rewards/margins": 0.2197265625, | |
| "rewards/rejected": -1.15625, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 45.70832159940248, | |
| "learning_rate": 4.7588638597951173e-07, | |
| "logits/chosen": -1.1171875, | |
| "logits/rejected": -2.265625, | |
| "logps/chosen": -748.0, | |
| "logps/rejected": -524.0, | |
| "loss": 0.4754, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.287109375, | |
| "rewards/margins": 1.640625, | |
| "rewards/rejected": -1.3515625, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 59.530448462467945, | |
| "learning_rate": 4.7431777651152103e-07, | |
| "logits/chosen": -1.4765625, | |
| "logits/rejected": -2.09375, | |
| "logps/chosen": -464.0, | |
| "logps/rejected": -452.0, | |
| "loss": 0.5154, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.890625, | |
| "rewards/margins": 0.380859375, | |
| "rewards/rejected": -1.2734375, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 60.197735477558666, | |
| "learning_rate": 4.727024967904284e-07, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -1.4375, | |
| "logps/chosen": -414.0, | |
| "logps/rejected": -564.0, | |
| "loss": 0.5178, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.7421875, | |
| "rewards/margins": -0.025634765625, | |
| "rewards/rejected": -0.71484375, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 42.26765729774977, | |
| "learning_rate": 4.710408828819463e-07, | |
| "logits/chosen": -1.609375, | |
| "logits/rejected": -2.359375, | |
| "logps/chosen": -552.0, | |
| "logps/rejected": -524.0, | |
| "loss": 0.5026, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.2890625, | |
| "rewards/margins": 0.6484375, | |
| "rewards/rejected": -1.9375, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 50.89266004351078, | |
| "learning_rate": 4.6933328049180937e-07, | |
| "logits/chosen": -1.890625, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -496.0, | |
| "logps/rejected": -580.0, | |
| "loss": 0.4856, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.734375, | |
| "rewards/margins": 0.421875, | |
| "rewards/rejected": -1.15625, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 47.93987624657981, | |
| "learning_rate": 4.6758004489384815e-07, | |
| "logits/chosen": -1.7734375, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -408.0, | |
| "logps/rejected": -544.0, | |
| "loss": 0.4713, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -1.2109375, | |
| "rewards/margins": 0.23828125, | |
| "rewards/rejected": -1.4453125, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 48.72108104567496, | |
| "learning_rate": 4.6578154085607323e-07, | |
| "logits/chosen": -2.8125, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -580.0, | |
| "logps/rejected": -824.0, | |
| "loss": 0.488, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.51953125, | |
| "rewards/margins": 1.4609375, | |
| "rewards/rejected": -1.984375, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 55.85530255600864, | |
| "learning_rate": 4.639381425647841e-07, | |
| "logits/chosen": -1.3984375, | |
| "logits/rejected": -1.5703125, | |
| "logps/chosen": -502.0, | |
| "logps/rejected": -576.0, | |
| "loss": 0.5025, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.64453125, | |
| "rewards/margins": 1.0, | |
| "rewards/rejected": -1.6484375, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 59.68466050286291, | |
| "learning_rate": 4.6205023354671735e-07, | |
| "logits/chosen": -1.5390625, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -460.0, | |
| "logps/rejected": -446.0, | |
| "loss": 0.4818, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.77734375, | |
| "rewards/margins": 0.875, | |
| "rewards/rejected": -1.6484375, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 44.610719658440864, | |
| "learning_rate": 4.601182065892529e-07, | |
| "logits/chosen": -1.5625, | |
| "logits/rejected": -1.453125, | |
| "logps/chosen": -556.0, | |
| "logps/rejected": -460.0, | |
| "loss": 0.476, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.91015625, | |
| "rewards/margins": 0.53515625, | |
| "rewards/rejected": -1.4453125, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 49.65621035961898, | |
| "learning_rate": 4.581424636586928e-07, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -456.0, | |
| "logps/rejected": -536.0, | |
| "loss": 0.5334, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.83984375, | |
| "rewards/margins": 0.2060546875, | |
| "rewards/rejected": -1.046875, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 37.22285340163814, | |
| "learning_rate": 4.561234158166305e-07, | |
| "logits/chosen": -1.265625, | |
| "logits/rejected": -1.3046875, | |
| "logps/chosen": -540.0, | |
| "logps/rejected": -524.0, | |
| "loss": 0.4929, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.87109375, | |
| "rewards/margins": 0.75, | |
| "rewards/rejected": -1.6171875, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 72.63982166912706, | |
| "learning_rate": 4.5406148313442753e-07, | |
| "logits/chosen": -2.140625, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -354.0, | |
| "logps/rejected": -372.0, | |
| "loss": 0.4682, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.98046875, | |
| "rewards/margins": 0.7109375, | |
| "rewards/rejected": -1.6875, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 49.88245474539256, | |
| "learning_rate": 4.519570946058162e-07, | |
| "logits/chosen": -1.3046875, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -482.0, | |
| "logps/rejected": -556.0, | |
| "loss": 0.4673, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.84375, | |
| "rewards/margins": 1.0234375, | |
| "rewards/rejected": -1.8671875, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 36.17988110201782, | |
| "learning_rate": 4.4981068805764545e-07, | |
| "logits/chosen": -1.2109375, | |
| "logits/rejected": -2.125, | |
| "logps/chosen": -700.0, | |
| "logps/rejected": -668.0, | |
| "loss": 0.4494, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -1.4765625, | |
| "rewards/margins": -0.234375, | |
| "rewards/rejected": -1.2421875, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 61.21171348805659, | |
| "learning_rate": 4.4762271005878913e-07, | |
| "logits/chosen": -1.765625, | |
| "logits/rejected": -1.9765625, | |
| "logps/chosen": -388.0, | |
| "logps/rejected": -446.0, | |
| "loss": 0.4665, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.2578125, | |
| "rewards/margins": 0.55859375, | |
| "rewards/rejected": -1.8203125, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 52.27627457515467, | |
| "learning_rate": 4.4539361582723586e-07, | |
| "logits/chosen": -1.3515625, | |
| "logits/rejected": -1.453125, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -892.0, | |
| "loss": 0.4806, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.8828125, | |
| "rewards/margins": 1.46875, | |
| "rewards/rejected": -3.34375, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 65.21759338991524, | |
| "learning_rate": 4.431238691353784e-07, | |
| "logits/chosen": -1.7265625, | |
| "logits/rejected": -1.9921875, | |
| "logps/chosen": -486.0, | |
| "logps/rejected": -536.0, | |
| "loss": 0.4816, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.125, | |
| "rewards/margins": 1.75, | |
| "rewards/rejected": -2.875, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 51.27261353179044, | |
| "learning_rate": 4.408139422135241e-07, | |
| "logits/chosen": -1.4140625, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -462.0, | |
| "logps/rejected": -428.0, | |
| "loss": 0.4583, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2890625, | |
| "rewards/margins": 1.859375, | |
| "rewards/rejected": -3.15625, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 64.31719063334275, | |
| "learning_rate": 4.3846431565164596e-07, | |
| "logits/chosen": -1.390625, | |
| "logits/rejected": -1.3515625, | |
| "logps/chosen": -584.0, | |
| "logps/rejected": -608.0, | |
| "loss": 1.897, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.828125, | |
| "rewards/margins": 1.9453125, | |
| "rewards/rejected": -3.78125, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 46.19657074878751, | |
| "learning_rate": 4.360754782993929e-07, | |
| "logits/chosen": -1.4765625, | |
| "logits/rejected": -2.25, | |
| "logps/chosen": -484.0, | |
| "logps/rejected": -434.0, | |
| "loss": 0.465, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.328125, | |
| "rewards/margins": 0.7578125, | |
| "rewards/rejected": -2.078125, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 55.55089778313834, | |
| "learning_rate": 4.336479271643833e-07, | |
| "logits/chosen": -1.4765625, | |
| "logits/rejected": -1.4609375, | |
| "logps/chosen": -488.0, | |
| "logps/rejected": -462.0, | |
| "loss": 0.49, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.2265625, | |
| "rewards/margins": 0.5390625, | |
| "rewards/rejected": -1.765625, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 77.27127183474478, | |
| "learning_rate": 4.3118216730880015e-07, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -2.515625, | |
| "logps/chosen": -552.0, | |
| "logps/rejected": -520.0, | |
| "loss": 0.4607, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.0703125, | |
| "rewards/margins": 1.0859375, | |
| "rewards/rejected": -2.15625, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 61.141458979311125, | |
| "learning_rate": 4.286787117443108e-07, | |
| "logits/chosen": -1.203125, | |
| "logits/rejected": -1.3984375, | |
| "logps/chosen": -588.0, | |
| "logps/rejected": -580.0, | |
| "loss": 0.4711, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.1796875, | |
| "rewards/margins": 1.1796875, | |
| "rewards/rejected": -2.359375, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 48.22890637233016, | |
| "learning_rate": 4.261380813253328e-07, | |
| "logits/chosen": -1.828125, | |
| "logits/rejected": -2.578125, | |
| "logps/chosen": -448.0, | |
| "logps/rejected": -418.0, | |
| "loss": 0.4414, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1640625, | |
| "rewards/margins": 0.4375, | |
| "rewards/rejected": -1.6015625, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 34.09372820587962, | |
| "learning_rate": 4.2356080464066784e-07, | |
| "logits/chosen": -1.3671875, | |
| "logits/rejected": -3.171875, | |
| "logps/chosen": -536.0, | |
| "logps/rejected": -464.0, | |
| "loss": 0.4354, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.140625, | |
| "rewards/margins": 1.5390625, | |
| "rewards/rejected": -2.6875, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 40.13298242319205, | |
| "learning_rate": 4.2094741790352673e-07, | |
| "logits/chosen": -1.859375, | |
| "logits/rejected": -2.203125, | |
| "logps/chosen": -516.0, | |
| "logps/rejected": -624.0, | |
| "loss": 0.3855, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.03125, | |
| "rewards/margins": 1.203125, | |
| "rewards/rejected": -2.234375, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 24.15795246510711, | |
| "learning_rate": 4.1829846483996813e-07, | |
| "logits/chosen": -1.265625, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -884.0, | |
| "logps/rejected": -696.0, | |
| "loss": 0.2332, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.154296875, | |
| "rewards/margins": 2.125, | |
| "rewards/rejected": -2.28125, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 40.12239181322821, | |
| "learning_rate": 4.156144965757735e-07, | |
| "logits/chosen": -2.828125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -416.0, | |
| "logps/rejected": -464.0, | |
| "loss": 0.264, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.484375, | |
| "rewards/margins": 1.7734375, | |
| "rewards/rejected": -3.25, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 22.033239175244603, | |
| "learning_rate": 4.128960715217839e-07, | |
| "logits/chosen": -1.6015625, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -636.0, | |
| "logps/rejected": -572.0, | |
| "loss": 0.2482, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.7109375, | |
| "rewards/margins": 2.125, | |
| "rewards/rejected": -2.828125, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 24.492268202299986, | |
| "learning_rate": 4.1014375525771963e-07, | |
| "logits/chosen": -1.21875, | |
| "logits/rejected": -1.875, | |
| "logps/chosen": -636.0, | |
| "logps/rejected": -600.0, | |
| "loss": 0.2447, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": 0.0361328125, | |
| "rewards/margins": 2.453125, | |
| "rewards/rejected": -2.421875, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 31.641742017850387, | |
| "learning_rate": 4.0735812041450926e-07, | |
| "logits/chosen": -1.578125, | |
| "logits/rejected": -1.6484375, | |
| "logps/chosen": -596.0, | |
| "logps/rejected": -596.0, | |
| "loss": 0.2428, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.5390625, | |
| "rewards/margins": 2.5625, | |
| "rewards/rejected": -3.09375, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 44.35625008242433, | |
| "learning_rate": 4.045397465551513e-07, | |
| "logits/chosen": -1.5234375, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -520.0, | |
| "logps/rejected": -596.0, | |
| "loss": 0.2393, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.0859375, | |
| "rewards/margins": 1.171875, | |
| "rewards/rejected": -2.265625, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 47.415350341880725, | |
| "learning_rate": 4.0168922005413384e-07, | |
| "logits/chosen": -1.1171875, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -692.0, | |
| "logps/rejected": -612.0, | |
| "loss": 0.2804, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.65625, | |
| "rewards/margins": 1.5, | |
| "rewards/rejected": -2.15625, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 31.910462711815494, | |
| "learning_rate": 3.988071339754366e-07, | |
| "logits/chosen": -1.296875, | |
| "logits/rejected": -1.765625, | |
| "logps/chosen": -632.0, | |
| "logps/rejected": -736.0, | |
| "loss": 0.2293, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.21875, | |
| "rewards/margins": 2.296875, | |
| "rewards/rejected": -2.515625, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 27.7996029933054, | |
| "learning_rate": 3.958940879491418e-07, | |
| "logits/chosen": -1.4296875, | |
| "logits/rejected": -1.984375, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -498.0, | |
| "loss": 0.2693, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.84765625, | |
| "rewards/margins": 1.734375, | |
| "rewards/rejected": -2.578125, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 24.76648851906884, | |
| "learning_rate": 3.9295068804667823e-07, | |
| "logits/chosen": -1.3515625, | |
| "logits/rejected": -2.015625, | |
| "logps/chosen": -498.0, | |
| "logps/rejected": -494.0, | |
| "loss": 0.2089, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.73046875, | |
| "rewards/margins": 2.203125, | |
| "rewards/rejected": -2.9375, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 23.922743861511947, | |
| "learning_rate": 3.899775466547261e-07, | |
| "logits/chosen": -1.625, | |
| "logits/rejected": -1.9609375, | |
| "logps/chosen": -556.0, | |
| "logps/rejected": -532.0, | |
| "loss": 0.231, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.86328125, | |
| "rewards/margins": 1.8046875, | |
| "rewards/rejected": -2.671875, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 51.66225882393323, | |
| "learning_rate": 3.8697528234780674e-07, | |
| "logits/chosen": -2.015625, | |
| "logits/rejected": -1.6640625, | |
| "logps/chosen": -400.0, | |
| "logps/rejected": -612.0, | |
| "loss": 0.9229, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.515625, | |
| "rewards/margins": 2.140625, | |
| "rewards/rejected": -3.640625, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 38.82610683477862, | |
| "learning_rate": 3.839445197595863e-07, | |
| "logits/chosen": -1.8671875, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -418.0, | |
| "logps/rejected": -378.0, | |
| "loss": 0.231, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.71484375, | |
| "rewards/margins": 2.03125, | |
| "rewards/rejected": -2.75, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 22.389795986878887, | |
| "learning_rate": 3.8088588945291734e-07, | |
| "logits/chosen": -1.3515625, | |
| "logits/rejected": -2.453125, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -496.0, | |
| "loss": 0.2115, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.90625, | |
| "rewards/margins": 1.8203125, | |
| "rewards/rejected": -2.734375, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 22.583198913209376, | |
| "learning_rate": 3.778000277886483e-07, | |
| "logits/chosen": -1.3984375, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -676.0, | |
| "loss": 0.2358, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.326171875, | |
| "rewards/margins": 2.78125, | |
| "rewards/rejected": -3.109375, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 21.507764548315773, | |
| "learning_rate": 3.746875767932255e-07, | |
| "logits/chosen": -1.609375, | |
| "logits/rejected": -2.234375, | |
| "logps/chosen": -588.0, | |
| "logps/rejected": -668.0, | |
| "loss": 0.2342, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.453125, | |
| "rewards/margins": 2.078125, | |
| "rewards/rejected": -3.53125, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 23.64514805939643, | |
| "learning_rate": 3.7154918402511714e-07, | |
| "logits/chosen": -1.5078125, | |
| "logits/rejected": -3.703125, | |
| "logps/chosen": -536.0, | |
| "logps/rejected": -436.0, | |
| "loss": 0.2427, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.66015625, | |
| "rewards/margins": 2.46875, | |
| "rewards/rejected": -3.125, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 25.279762808321895, | |
| "learning_rate": 3.6838550244008573e-07, | |
| "logits/chosen": -1.9765625, | |
| "logits/rejected": -1.5234375, | |
| "logps/chosen": -556.0, | |
| "logps/rejected": -592.0, | |
| "loss": 0.2198, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.875, | |
| "rewards/margins": 1.6015625, | |
| "rewards/rejected": -2.484375, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 23.158055403708982, | |
| "learning_rate": 3.651971902553381e-07, | |
| "logits/chosen": -1.3984375, | |
| "logits/rejected": -2.609375, | |
| "logps/chosen": -564.0, | |
| "logps/rejected": -486.0, | |
| "loss": 0.2367, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.63671875, | |
| "rewards/margins": 2.515625, | |
| "rewards/rejected": -3.140625, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_logits/chosen": -1.0859375, | |
| "eval_logits/rejected": -0.9921875, | |
| "eval_logps/chosen": -564.0, | |
| "eval_logps/rejected": -656.0, | |
| "eval_loss": 0.4058724045753479, | |
| "eval_rewards/accuracies": 0.8888888955116272, | |
| "eval_rewards/chosen": -1.90625, | |
| "eval_rewards/margins": 1.5625, | |
| "eval_rewards/rejected": -3.453125, | |
| "eval_runtime": 50.8495, | |
| "eval_samples_per_second": 20.649, | |
| "eval_steps_per_second": 0.177, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 39.231752515284, | |
| "learning_rate": 3.6198491081258066e-07, | |
| "logits/chosen": -1.1484375, | |
| "logits/rejected": -1.8828125, | |
| "logps/chosen": -604.0, | |
| "logps/rejected": -494.0, | |
| "loss": 0.2308, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.265625, | |
| "rewards/margins": 1.734375, | |
| "rewards/rejected": -3.0, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 29.796507962550486, | |
| "learning_rate": 3.58749332440008e-07, | |
| "logits/chosen": -1.359375, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -584.0, | |
| "logps/rejected": -556.0, | |
| "loss": 0.2362, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.2421875, | |
| "rewards/margins": 1.640625, | |
| "rewards/rejected": -2.875, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 21.63597550992159, | |
| "learning_rate": 3.55491128313255e-07, | |
| "logits/chosen": -2.3125, | |
| "logits/rejected": -1.7734375, | |
| "logps/chosen": -496.0, | |
| "logps/rejected": -548.0, | |
| "loss": 0.2107, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.5703125, | |
| "rewards/margins": 2.5, | |
| "rewards/rejected": -4.0625, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 14.702295094535923, | |
| "learning_rate": 3.522109763153392e-07, | |
| "logits/chosen": -1.546875, | |
| "logits/rejected": -1.6328125, | |
| "logps/chosen": -442.0, | |
| "logps/rejected": -520.0, | |
| "loss": 0.2199, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.3984375, | |
| "rewards/margins": 1.796875, | |
| "rewards/rejected": -3.203125, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 21.29596218246951, | |
| "learning_rate": 3.489095588956249e-07, | |
| "logits/chosen": -1.21875, | |
| "logits/rejected": -1.890625, | |
| "logps/chosen": -648.0, | |
| "logps/rejected": -668.0, | |
| "loss": 0.2325, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7890625, | |
| "rewards/margins": 2.53125, | |
| "rewards/rejected": -3.328125, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 52.09643118168795, | |
| "learning_rate": 3.455875629278363e-07, | |
| "logits/chosen": -1.5390625, | |
| "logits/rejected": -2.140625, | |
| "logps/chosen": -540.0, | |
| "logps/rejected": -536.0, | |
| "loss": 0.2484, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2578125, | |
| "rewards/margins": 2.265625, | |
| "rewards/rejected": -3.515625, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 35.57618950847242, | |
| "learning_rate": 3.4224567956715085e-07, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -1.8515625, | |
| "logps/chosen": -536.0, | |
| "logps/rejected": -564.0, | |
| "loss": 0.2062, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.703125, | |
| "rewards/margins": 1.8203125, | |
| "rewards/rejected": -2.515625, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 20.490288705612254, | |
| "learning_rate": 3.388846041064012e-07, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -2.25, | |
| "logps/chosen": -796.0, | |
| "logps/rejected": -604.0, | |
| "loss": 0.2027, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0078125, | |
| "rewards/margins": 1.8125, | |
| "rewards/rejected": -2.828125, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 25.587126609494646, | |
| "learning_rate": 3.355050358314172e-07, | |
| "logits/chosen": -1.125, | |
| "logits/rejected": -1.078125, | |
| "logps/chosen": -728.0, | |
| "logps/rejected": -732.0, | |
| "loss": 0.2549, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3203125, | |
| "rewards/margins": 2.125, | |
| "rewards/rejected": -3.4375, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 32.538109074719216, | |
| "learning_rate": 3.321076778755358e-07, | |
| "logits/chosen": -1.8359375, | |
| "logits/rejected": -1.3125, | |
| "logps/chosen": -572.0, | |
| "logps/rejected": -568.0, | |
| "loss": 0.2117, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.359375, | |
| "rewards/margins": 1.90625, | |
| "rewards/rejected": -3.265625, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 19.634746831169892, | |
| "learning_rate": 3.2869323707331176e-07, | |
| "logits/chosen": -1.453125, | |
| "logits/rejected": -1.96875, | |
| "logps/chosen": -490.0, | |
| "logps/rejected": -536.0, | |
| "loss": 0.217, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.390625, | |
| "rewards/margins": 2.171875, | |
| "rewards/rejected": -3.5625, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 52.58964934069639, | |
| "learning_rate": 3.2526242381345766e-07, | |
| "logits/chosen": -2.21875, | |
| "logits/rejected": -2.390625, | |
| "logps/chosen": -484.0, | |
| "logps/rejected": -652.0, | |
| "loss": 0.2209, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.1875, | |
| "rewards/margins": 3.359375, | |
| "rewards/rejected": -4.53125, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 26.692041535833823, | |
| "learning_rate": 3.218159518910443e-07, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -540.0, | |
| "logps/rejected": -580.0, | |
| "loss": 0.2288, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.6328125, | |
| "rewards/margins": 2.015625, | |
| "rewards/rejected": -3.640625, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 36.803960598284185, | |
| "learning_rate": 3.183545383589927e-07, | |
| "logits/chosen": -1.1484375, | |
| "logits/rejected": -1.5078125, | |
| "logps/chosen": -628.0, | |
| "logps/rejected": -620.0, | |
| "loss": 0.2016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4375, | |
| "rewards/margins": 2.0625, | |
| "rewards/rejected": -3.5, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 62.57539343068982, | |
| "learning_rate": 3.148789033788889e-07, | |
| "logits/chosen": -1.3359375, | |
| "logits/rejected": -1.5, | |
| "logps/chosen": -504.0, | |
| "logps/rejected": -524.0, | |
| "loss": 0.1919, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0546875, | |
| "rewards/margins": 2.234375, | |
| "rewards/rejected": -3.28125, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 27.545095580039987, | |
| "learning_rate": 3.113897700711502e-07, | |
| "logits/chosen": -0.9375, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -412.0, | |
| "logps/rejected": -728.0, | |
| "loss": 0.2339, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.484375, | |
| "rewards/margins": 2.71875, | |
| "rewards/rejected": -4.1875, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 20.69956429305614, | |
| "learning_rate": 3.078878643645778e-07, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.4296875, | |
| "logps/chosen": -564.0, | |
| "logps/rejected": -628.0, | |
| "loss": 0.2147, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.921875, | |
| "rewards/margins": 2.25, | |
| "rewards/rejected": -4.1875, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 30.998852642526103, | |
| "learning_rate": 3.0437391484532403e-07, | |
| "logits/chosen": -1.1328125, | |
| "logits/rejected": -1.9921875, | |
| "logps/chosen": -804.0, | |
| "logps/rejected": -600.0, | |
| "loss": 0.2046, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.203125, | |
| "rewards/margins": 2.390625, | |
| "rewards/rejected": -3.59375, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 20.204732181232373, | |
| "learning_rate": 3.0084865260530666e-07, | |
| "logits/chosen": -1.6328125, | |
| "logits/rejected": -1.1875, | |
| "logps/chosen": -552.0, | |
| "logps/rejected": -780.0, | |
| "loss": 0.2196, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.92578125, | |
| "rewards/margins": 3.203125, | |
| "rewards/rejected": -4.125, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 38.16020262352693, | |
| "learning_rate": 2.9731281109010253e-07, | |
| "logits/chosen": -1.8203125, | |
| "logits/rejected": -1.8359375, | |
| "logps/chosen": -446.0, | |
| "logps/rejected": -484.0, | |
| "loss": 0.233, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.640625, | |
| "rewards/margins": 2.40625, | |
| "rewards/rejected": -4.0625, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 29.06302627643078, | |
| "learning_rate": 2.937671259463512e-07, | |
| "logits/chosen": -1.8515625, | |
| "logits/rejected": -1.625, | |
| "logps/chosen": -536.0, | |
| "logps/rejected": -648.0, | |
| "loss": 0.2302, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.296875, | |
| "rewards/margins": 3.484375, | |
| "rewards/rejected": -4.78125, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 91.98272670818265, | |
| "learning_rate": 2.9021233486869994e-07, | |
| "logits/chosen": -1.4375, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -672.0, | |
| "logps/rejected": -560.0, | |
| "loss": 0.2402, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.484375, | |
| "rewards/margins": 1.6953125, | |
| "rewards/rejected": -3.1875, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 30.373071126803815, | |
| "learning_rate": 2.8664917744632423e-07, | |
| "logits/chosen": -1.6484375, | |
| "logits/rejected": -1.875, | |
| "logps/chosen": -544.0, | |
| "logps/rejected": -640.0, | |
| "loss": 0.2211, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.90625, | |
| "rewards/margins": 2.40625, | |
| "rewards/rejected": -4.3125, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 24.064215386247714, | |
| "learning_rate": 2.830783950090522e-07, | |
| "logits/chosen": -1.2265625, | |
| "logits/rejected": -1.734375, | |
| "logps/chosen": -528.0, | |
| "logps/rejected": -462.0, | |
| "loss": 0.2278, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.875, | |
| "rewards/margins": 1.96875, | |
| "rewards/rejected": -3.828125, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 38.77040858523863, | |
| "learning_rate": 2.7950073047312855e-07, | |
| "logits/chosen": -1.2109375, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -640.0, | |
| "logps/rejected": -752.0, | |
| "loss": 0.2113, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.8125, | |
| "rewards/margins": 2.40625, | |
| "rewards/rejected": -4.21875, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 36.115163088163186, | |
| "learning_rate": 2.759169281866472e-07, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -460.0, | |
| "logps/rejected": -430.0, | |
| "loss": 0.2048, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.953125, | |
| "rewards/margins": 1.7734375, | |
| "rewards/rejected": -3.71875, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 21.74767552298878, | |
| "learning_rate": 2.72327733774687e-07, | |
| "logits/chosen": -1.28125, | |
| "logits/rejected": -1.875, | |
| "logps/chosen": -752.0, | |
| "logps/rejected": -588.0, | |
| "loss": 0.2159, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8125, | |
| "rewards/margins": 2.40625, | |
| "rewards/rejected": -3.21875, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 29.90116775368825, | |
| "learning_rate": 2.6873389398418085e-07, | |
| "logits/chosen": -1.4453125, | |
| "logits/rejected": -1.9921875, | |
| "logps/chosen": -420.0, | |
| "logps/rejected": -490.0, | |
| "loss": 0.2191, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.390625, | |
| "rewards/margins": 1.59375, | |
| "rewards/rejected": -3.984375, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 23.793756365026436, | |
| "learning_rate": 2.6513615652855246e-07, | |
| "logits/chosen": -1.5546875, | |
| "logits/rejected": -1.3046875, | |
| "logps/chosen": -572.0, | |
| "logps/rejected": -600.0, | |
| "loss": 0.2011, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.84375, | |
| "rewards/margins": 1.53125, | |
| "rewards/rejected": -3.375, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 43.491509201941334, | |
| "learning_rate": 2.6153526993215085e-07, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.6171875, | |
| "logps/chosen": -500.0, | |
| "logps/rejected": -548.0, | |
| "loss": 0.1999, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.375, | |
| "rewards/margins": 2.34375, | |
| "rewards/rejected": -4.71875, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 43.19768159653058, | |
| "learning_rate": 2.579319833745169e-07, | |
| "logits/chosen": -1.546875, | |
| "logits/rejected": -1.859375, | |
| "logps/chosen": -460.0, | |
| "logps/rejected": -406.0, | |
| "loss": 0.2023, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.65625, | |
| "rewards/margins": 2.171875, | |
| "rewards/rejected": -3.828125, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 35.267489669162956, | |
| "learning_rate": 2.5432704653451374e-07, | |
| "logits/chosen": -1.3515625, | |
| "logits/rejected": -1.234375, | |
| "logps/chosen": -532.0, | |
| "logps/rejected": -728.0, | |
| "loss": 0.1962, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.046875, | |
| "rewards/margins": 3.109375, | |
| "rewards/rejected": -5.15625, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 17.971641900070907, | |
| "learning_rate": 2.5072120943435246e-07, | |
| "logits/chosen": -1.1953125, | |
| "logits/rejected": -1.4765625, | |
| "logps/chosen": -784.0, | |
| "logps/rejected": -808.0, | |
| "loss": 0.1927, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.453125, | |
| "rewards/margins": 2.765625, | |
| "rewards/rejected": -4.21875, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 26.236832847315178, | |
| "learning_rate": 2.471152222835471e-07, | |
| "logits/chosen": -2.1875, | |
| "logits/rejected": -1.5859375, | |
| "logps/chosen": -684.0, | |
| "logps/rejected": -624.0, | |
| "loss": 0.2161, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.078125, | |
| "rewards/margins": 3.125, | |
| "rewards/rejected": -4.1875, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 25.147006316460192, | |
| "learning_rate": 2.4350983532283043e-07, | |
| "logits/chosen": -1.4296875, | |
| "logits/rejected": -1.1484375, | |
| "logps/chosen": -472.0, | |
| "logps/rejected": -592.0, | |
| "loss": 0.189, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.34375, | |
| "rewards/margins": 2.125, | |
| "rewards/rejected": -3.46875, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 54.14713597288178, | |
| "learning_rate": 2.39905798668063e-07, | |
| "logits/chosen": -1.34375, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -510.0, | |
| "logps/rejected": -568.0, | |
| "loss": 0.2336, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8359375, | |
| "rewards/margins": 2.484375, | |
| "rewards/rejected": -4.3125, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 21.942609458104677, | |
| "learning_rate": 2.3630386215416878e-07, | |
| "logits/chosen": -1.5390625, | |
| "logits/rejected": -1.8671875, | |
| "logps/chosen": -620.0, | |
| "logps/rejected": -620.0, | |
| "loss": 0.1966, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1484375, | |
| "rewards/margins": 3.0, | |
| "rewards/rejected": -4.15625, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 72.72707544492208, | |
| "learning_rate": 2.3270477517912835e-07, | |
| "logits/chosen": -1.1953125, | |
| "logits/rejected": -1.1171875, | |
| "logps/chosen": -716.0, | |
| "logps/rejected": -708.0, | |
| "loss": 0.2211, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.1875, | |
| "rewards/margins": 1.84375, | |
| "rewards/rejected": -4.03125, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 22.411175858752106, | |
| "learning_rate": 2.291092865480641e-07, | |
| "logits/chosen": -1.4375, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -680.0, | |
| "loss": 0.2027, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.7109375, | |
| "rewards/margins": 3.09375, | |
| "rewards/rejected": -4.8125, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 124.47455963689552, | |
| "learning_rate": 2.2551814431744758e-07, | |
| "logits/chosen": -1.4765625, | |
| "logits/rejected": -1.5, | |
| "logps/chosen": -592.0, | |
| "logps/rejected": -600.0, | |
| "loss": 0.3274, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9765625, | |
| "rewards/margins": 2.40625, | |
| "rewards/rejected": -4.375, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 40.10464388387894, | |
| "learning_rate": 2.2193209563946382e-07, | |
| "logits/chosen": -1.1875, | |
| "logits/rejected": -1.859375, | |
| "logps/chosen": -740.0, | |
| "logps/rejected": -604.0, | |
| "loss": 0.2608, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.34375, | |
| "rewards/margins": 2.984375, | |
| "rewards/rejected": -5.3125, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 139.91739653913288, | |
| "learning_rate": 2.1835188660656265e-07, | |
| "logits/chosen": -1.25, | |
| "logits/rejected": -1.4921875, | |
| "logps/chosen": -600.0, | |
| "logps/rejected": -580.0, | |
| "loss": 0.1985, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.546875, | |
| "rewards/margins": 2.125, | |
| "rewards/rejected": -3.671875, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 45.52923168908793, | |
| "learning_rate": 2.147782620962314e-07, | |
| "logits/chosen": -1.5, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -544.0, | |
| "logps/rejected": -540.0, | |
| "loss": 0.2292, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4375, | |
| "rewards/margins": 2.140625, | |
| "rewards/rejected": -3.578125, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 55.0384600231688, | |
| "learning_rate": 2.112119656160199e-07, | |
| "logits/chosen": -1.15625, | |
| "logits/rejected": -1.25, | |
| "logps/chosen": -620.0, | |
| "logps/rejected": -700.0, | |
| "loss": 0.2028, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.8046875, | |
| "rewards/margins": 1.6953125, | |
| "rewards/rejected": -3.484375, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 18.297415510730342, | |
| "learning_rate": 2.0765373914885047e-07, | |
| "logits/chosen": -1.8203125, | |
| "logits/rejected": -1.5234375, | |
| "logps/chosen": -418.0, | |
| "logps/rejected": -508.0, | |
| "loss": 0.2187, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.3828125, | |
| "rewards/margins": 1.5234375, | |
| "rewards/rejected": -2.90625, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 25.14760440570739, | |
| "learning_rate": 2.0410432299864556e-07, | |
| "logits/chosen": -1.46875, | |
| "logits/rejected": -1.3828125, | |
| "logps/chosen": -584.0, | |
| "logps/rejected": -816.0, | |
| "loss": 0.2162, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.125, | |
| "rewards/margins": 2.9375, | |
| "rewards/rejected": -5.0625, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 25.279928980019168, | |
| "learning_rate": 2.0056445563630423e-07, | |
| "logits/chosen": -1.9609375, | |
| "logits/rejected": -1.5625, | |
| "logps/chosen": -532.0, | |
| "logps/rejected": -604.0, | |
| "loss": 0.217, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.40625, | |
| "rewards/margins": 2.046875, | |
| "rewards/rejected": -3.46875, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 31.005511813027777, | |
| "learning_rate": 1.9703487354606018e-07, | |
| "logits/chosen": -2.640625, | |
| "logits/rejected": -1.96875, | |
| "logps/chosen": -564.0, | |
| "logps/rejected": -628.0, | |
| "loss": 0.2051, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.25, | |
| "rewards/margins": 3.265625, | |
| "rewards/rejected": -4.53125, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 17.907410467346242, | |
| "learning_rate": 1.935163110722533e-07, | |
| "logits/chosen": -1.578125, | |
| "logits/rejected": -1.8515625, | |
| "logps/chosen": -672.0, | |
| "logps/rejected": -528.0, | |
| "loss": 0.2019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.82421875, | |
| "rewards/margins": 2.359375, | |
| "rewards/rejected": -3.171875, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 37.56302682379733, | |
| "learning_rate": 1.900095002665459e-07, | |
| "logits/chosen": -1.375, | |
| "logits/rejected": -1.5859375, | |
| "logps/chosen": -544.0, | |
| "logps/rejected": -668.0, | |
| "loss": 0.2247, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4609375, | |
| "rewards/margins": 2.109375, | |
| "rewards/rejected": -3.5625, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_logits/chosen": -1.0546875, | |
| "eval_logits/rejected": -0.9765625, | |
| "eval_logps/chosen": -568.0, | |
| "eval_logps/rejected": -668.0, | |
| "eval_loss": 0.3828948140144348, | |
| "eval_rewards/accuracies": 0.7777777910232544, | |
| "eval_rewards/chosen": -2.203125, | |
| "eval_rewards/margins": 1.8125, | |
| "eval_rewards/rejected": -4.03125, | |
| "eval_runtime": 49.0823, | |
| "eval_samples_per_second": 21.393, | |
| "eval_steps_per_second": 0.183, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 142.03485499088688, | |
| "learning_rate": 1.8651517073561673e-07, | |
| "logits/chosen": -1.9140625, | |
| "logits/rejected": -1.84375, | |
| "logps/chosen": -516.0, | |
| "logps/rejected": -444.0, | |
| "loss": 0.2354, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.0546875, | |
| "rewards/margins": 1.6640625, | |
| "rewards/rejected": -2.71875, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 20.904872000144213, | |
| "learning_rate": 1.8303404948936285e-07, | |
| "logits/chosen": -1.5625, | |
| "logits/rejected": -1.3828125, | |
| "logps/chosen": -466.0, | |
| "logps/rejected": -492.0, | |
| "loss": 0.2063, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.98046875, | |
| "rewards/margins": 2.203125, | |
| "rewards/rejected": -3.171875, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 23.082974237096174, | |
| "learning_rate": 1.7956686078964255e-07, | |
| "logits/chosen": -1.375, | |
| "logits/rejected": -1.4375, | |
| "logps/chosen": -528.0, | |
| "logps/rejected": -656.0, | |
| "loss": 0.2083, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.421875, | |
| "rewards/margins": 3.09375, | |
| "rewards/rejected": -4.5, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 65.20385510486626, | |
| "learning_rate": 1.7611432599958924e-07, | |
| "logits/chosen": -1.9140625, | |
| "logits/rejected": -2.234375, | |
| "logps/chosen": -352.0, | |
| "logps/rejected": -392.0, | |
| "loss": 0.2083, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3515625, | |
| "rewards/margins": 1.9609375, | |
| "rewards/rejected": -3.3125, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 26.23633528972487, | |
| "learning_rate": 1.726771634335293e-07, | |
| "logits/chosen": -1.4609375, | |
| "logits/rejected": -2.0625, | |
| "logps/chosen": -492.0, | |
| "logps/rejected": -456.0, | |
| "loss": 0.2321, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.0703125, | |
| "rewards/margins": 1.6015625, | |
| "rewards/rejected": -2.671875, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 49.10798542538174, | |
| "learning_rate": 1.6925608820753325e-07, | |
| "logits/chosen": -0.83203125, | |
| "logits/rejected": -1.1953125, | |
| "logps/chosen": -708.0, | |
| "logps/rejected": -880.0, | |
| "loss": 0.2232, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.86328125, | |
| "rewards/margins": 2.625, | |
| "rewards/rejected": -3.484375, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 52.60647313486629, | |
| "learning_rate": 1.6585181209063321e-07, | |
| "logits/chosen": -1.71875, | |
| "logits/rejected": -1.4921875, | |
| "logps/chosen": -472.0, | |
| "logps/rejected": -704.0, | |
| "loss": 0.1907, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.5546875, | |
| "rewards/margins": 2.578125, | |
| "rewards/rejected": -4.125, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 24.213468426964006, | |
| "learning_rate": 1.6246504335673625e-07, | |
| "logits/chosen": -1.0390625, | |
| "logits/rejected": -1.4453125, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -856.0, | |
| "loss": 0.2086, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9375, | |
| "rewards/margins": 2.15625, | |
| "rewards/rejected": -3.09375, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 47.733763616697836, | |
| "learning_rate": 1.590964866372652e-07, | |
| "logits/chosen": -1.09375, | |
| "logits/rejected": -1.2734375, | |
| "logps/chosen": -636.0, | |
| "logps/rejected": -784.0, | |
| "loss": 0.2083, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3515625, | |
| "rewards/margins": 2.828125, | |
| "rewards/rejected": -4.1875, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 37.63668160123638, | |
| "learning_rate": 1.5574684277455685e-07, | |
| "logits/chosen": -1.765625, | |
| "logits/rejected": -1.1953125, | |
| "logps/chosen": -464.0, | |
| "logps/rejected": -640.0, | |
| "loss": 0.22, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -1.5234375, | |
| "rewards/margins": 1.765625, | |
| "rewards/rejected": -3.28125, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 20.84423028894674, | |
| "learning_rate": 1.5241680867604905e-07, | |
| "logits/chosen": -1.0078125, | |
| "logits/rejected": -2.34375, | |
| "logps/chosen": -660.0, | |
| "logps/rejected": -624.0, | |
| "loss": 0.2062, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.73046875, | |
| "rewards/margins": 2.265625, | |
| "rewards/rejected": -3.0, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 15.238180752697565, | |
| "learning_rate": 1.4910707716928586e-07, | |
| "logits/chosen": -1.75, | |
| "logits/rejected": -2.375, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -696.0, | |
| "loss": 0.1306, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.828125, | |
| "rewards/margins": 3.359375, | |
| "rewards/rejected": -5.1875, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 13.583277201205796, | |
| "learning_rate": 1.4581833685777228e-07, | |
| "logits/chosen": -1.34375, | |
| "logits/rejected": -1.578125, | |
| "logps/chosen": -552.0, | |
| "logps/rejected": -640.0, | |
| "loss": 0.1173, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5234375, | |
| "rewards/margins": 2.65625, | |
| "rewards/rejected": -4.1875, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 14.86440122341942, | |
| "learning_rate": 1.4255127197770707e-07, | |
| "logits/chosen": -1.4609375, | |
| "logits/rejected": -1.3828125, | |
| "logps/chosen": -434.0, | |
| "logps/rejected": -552.0, | |
| "loss": 0.1149, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.65625, | |
| "rewards/margins": 2.515625, | |
| "rewards/rejected": -4.1875, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 15.578800057924948, | |
| "learning_rate": 1.3930656225562474e-07, | |
| "logits/chosen": -1.6640625, | |
| "logits/rejected": -1.515625, | |
| "logps/chosen": -540.0, | |
| "logps/rejected": -620.0, | |
| "loss": 0.1074, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.90625, | |
| "rewards/margins": 3.28125, | |
| "rewards/rejected": -5.1875, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 13.991553452696552, | |
| "learning_rate": 1.360848827669756e-07, | |
| "logits/chosen": -1.421875, | |
| "logits/rejected": -1.2265625, | |
| "logps/chosen": -524.0, | |
| "logps/rejected": -520.0, | |
| "loss": 0.1255, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7578125, | |
| "rewards/margins": 2.734375, | |
| "rewards/rejected": -4.5, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 21.745298822673373, | |
| "learning_rate": 1.3288690379567314e-07, | |
| "logits/chosen": -1.4140625, | |
| "logits/rejected": -1.84375, | |
| "logps/chosen": -506.0, | |
| "logps/rejected": -544.0, | |
| "loss": 0.123, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.78125, | |
| "rewards/margins": 2.171875, | |
| "rewards/rejected": -3.953125, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 15.334862616251963, | |
| "learning_rate": 1.2971329069463932e-07, | |
| "logits/chosen": -1.328125, | |
| "logits/rejected": -1.8984375, | |
| "logps/chosen": -632.0, | |
| "logps/rejected": -672.0, | |
| "loss": 0.1169, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7421875, | |
| "rewards/margins": 3.03125, | |
| "rewards/rejected": -4.78125, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 36.5802518789977, | |
| "learning_rate": 1.2656470374737434e-07, | |
| "logits/chosen": -1.1875, | |
| "logits/rejected": -1.3671875, | |
| "logps/chosen": -716.0, | |
| "logps/rejected": -1024.0, | |
| "loss": 0.1232, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.859375, | |
| "rewards/margins": 4.25, | |
| "rewards/rejected": -6.125, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 35.20242961161644, | |
| "learning_rate": 1.2344179803058264e-07, | |
| "logits/chosen": -1.2578125, | |
| "logits/rejected": -1.9921875, | |
| "logps/chosen": -528.0, | |
| "logps/rejected": -624.0, | |
| "loss": 0.1247, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.1875, | |
| "rewards/margins": 2.25, | |
| "rewards/rejected": -4.4375, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 20.682912146389263, | |
| "learning_rate": 1.203452232778807e-07, | |
| "logits/chosen": -1.4375, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -748.0, | |
| "logps/rejected": -824.0, | |
| "loss": 0.1213, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.78125, | |
| "rewards/margins": 3.921875, | |
| "rewards/rejected": -5.71875, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 21.01075482943445, | |
| "learning_rate": 1.1727562374461788e-07, | |
| "logits/chosen": -1.9765625, | |
| "logits/rejected": -1.515625, | |
| "logps/chosen": -532.0, | |
| "logps/rejected": -620.0, | |
| "loss": 0.1279, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.5859375, | |
| "rewards/margins": 3.53125, | |
| "rewards/rejected": -5.125, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 29.30233670676864, | |
| "learning_rate": 1.142336380738361e-07, | |
| "logits/chosen": -1.3203125, | |
| "logits/rejected": -1.109375, | |
| "logps/chosen": -564.0, | |
| "logps/rejected": -544.0, | |
| "loss": 0.1133, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.3125, | |
| "rewards/margins": 2.90625, | |
| "rewards/rejected": -5.21875, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 16.664591107532367, | |
| "learning_rate": 1.1121989916339756e-07, | |
| "logits/chosen": -1.203125, | |
| "logits/rejected": -2.9375, | |
| "logps/chosen": -732.0, | |
| "logps/rejected": -624.0, | |
| "loss": 0.1121, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.46875, | |
| "rewards/margins": 3.109375, | |
| "rewards/rejected": -5.59375, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 83.44488397290417, | |
| "learning_rate": 1.0823503403430734e-07, | |
| "logits/chosen": -1.25, | |
| "logits/rejected": -1.5546875, | |
| "logps/chosen": -648.0, | |
| "logps/rejected": -508.0, | |
| "loss": 0.1218, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.859375, | |
| "rewards/margins": 2.90625, | |
| "rewards/rejected": -4.78125, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 15.585689114051172, | |
| "learning_rate": 1.0527966370025964e-07, | |
| "logits/chosen": -1.125, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -716.0, | |
| "logps/rejected": -692.0, | |
| "loss": 0.1205, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.015625, | |
| "rewards/margins": 2.875, | |
| "rewards/rejected": -4.875, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 10.765461249613185, | |
| "learning_rate": 1.0235440303843302e-07, | |
| "logits/chosen": -1.2109375, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -500.0, | |
| "logps/rejected": -636.0, | |
| "loss": 0.1099, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.59375, | |
| "rewards/margins": 2.515625, | |
| "rewards/rejected": -5.125, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 12.386913795936541, | |
| "learning_rate": 9.945986066156248e-08, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -1.8828125, | |
| "logps/chosen": -498.0, | |
| "logps/rejected": -576.0, | |
| "loss": 0.108, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5703125, | |
| "rewards/margins": 3.5625, | |
| "rewards/rejected": -5.125, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 16.61091563337375, | |
| "learning_rate": 9.659663879131503e-08, | |
| "logits/chosen": -1.265625, | |
| "logits/rejected": -1.3125, | |
| "logps/chosen": -560.0, | |
| "logps/rejected": -528.0, | |
| "loss": 0.125, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.40625, | |
| "rewards/margins": 2.71875, | |
| "rewards/rejected": -5.125, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 24.411403141380244, | |
| "learning_rate": 9.376533313299542e-08, | |
| "logits/chosen": -1.2265625, | |
| "logits/rejected": -2.21875, | |
| "logps/chosen": -772.0, | |
| "logps/rejected": -660.0, | |
| "loss": 0.111, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.078125, | |
| "rewards/margins": 2.9375, | |
| "rewards/rejected": -5.0, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 15.505538034971874, | |
| "learning_rate": 9.096653275160641e-08, | |
| "logits/chosen": -1.5390625, | |
| "logits/rejected": -1.59375, | |
| "logps/chosen": -492.0, | |
| "logps/rejected": -576.0, | |
| "loss": 0.1229, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7109375, | |
| "rewards/margins": 2.765625, | |
| "rewards/rejected": -4.46875, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 15.710939806805685, | |
| "learning_rate": 8.820081994929207e-08, | |
| "logits/chosen": -1.7421875, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -724.0, | |
| "logps/rejected": -656.0, | |
| "loss": 0.1194, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.4375, | |
| "rewards/margins": 2.4375, | |
| "rewards/rejected": -4.875, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 15.364495322714388, | |
| "learning_rate": 8.546877014418671e-08, | |
| "logits/chosen": -1.9296875, | |
| "logits/rejected": -2.15625, | |
| "logps/chosen": -496.0, | |
| "logps/rejected": -532.0, | |
| "loss": 0.1282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.046875, | |
| "rewards/margins": 2.484375, | |
| "rewards/rejected": -4.53125, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 13.756725707803474, | |
| "learning_rate": 8.277095175069738e-08, | |
| "logits/chosen": -1.3984375, | |
| "logits/rejected": -1.4609375, | |
| "logps/chosen": -532.0, | |
| "logps/rejected": -552.0, | |
| "loss": 0.1072, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.390625, | |
| "rewards/margins": 2.9375, | |
| "rewards/rejected": -5.3125, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 23.80983088717624, | |
| "learning_rate": 8.010792606124228e-08, | |
| "logits/chosen": -1.0703125, | |
| "logits/rejected": -1.0546875, | |
| "logps/chosen": -672.0, | |
| "logps/rejected": -680.0, | |
| "loss": 0.1081, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.890625, | |
| "rewards/margins": 3.53125, | |
| "rewards/rejected": -5.40625, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 17.774551031970322, | |
| "learning_rate": 7.748024712947204e-08, | |
| "logits/chosen": -1.3984375, | |
| "logits/rejected": -1.2421875, | |
| "logps/chosen": -636.0, | |
| "logps/rejected": -652.0, | |
| "loss": 0.1291, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.28125, | |
| "rewards/margins": 2.34375, | |
| "rewards/rejected": -4.625, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 14.943619749566544, | |
| "learning_rate": 7.488846165499596e-08, | |
| "logits/chosen": -1.3984375, | |
| "logits/rejected": -1.9609375, | |
| "logps/chosen": -572.0, | |
| "logps/rejected": -684.0, | |
| "loss": 0.1282, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.734375, | |
| "rewards/margins": 3.515625, | |
| "rewards/rejected": -6.25, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 16.562697765445648, | |
| "learning_rate": 7.233310886963942e-08, | |
| "logits/chosen": -1.375, | |
| "logits/rejected": -1.3984375, | |
| "logps/chosen": -474.0, | |
| "logps/rejected": -544.0, | |
| "loss": 0.1229, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.28125, | |
| "rewards/margins": 2.5, | |
| "rewards/rejected": -4.78125, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 16.237370125481036, | |
| "learning_rate": 6.981472042525416e-08, | |
| "logits/chosen": -1.515625, | |
| "logits/rejected": -1.765625, | |
| "logps/chosen": -640.0, | |
| "logps/rejected": -588.0, | |
| "loss": 0.1077, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.078125, | |
| "rewards/margins": 2.890625, | |
| "rewards/rejected": -4.96875, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 12.54574310017106, | |
| "learning_rate": 6.7333820283106e-08, | |
| "logits/chosen": -0.94921875, | |
| "logits/rejected": -1.3828125, | |
| "logps/chosen": -696.0, | |
| "logps/rejected": -840.0, | |
| "loss": 0.1192, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.578125, | |
| "rewards/margins": 4.125, | |
| "rewards/rejected": -5.71875, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 17.067698975214256, | |
| "learning_rate": 6.48909246048622e-08, | |
| "logits/chosen": -1.6953125, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -490.0, | |
| "logps/rejected": -560.0, | |
| "loss": 0.1259, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.078125, | |
| "rewards/margins": 3.03125, | |
| "rewards/rejected": -5.09375, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 17.315279196446202, | |
| "learning_rate": 6.248654164520237e-08, | |
| "logits/chosen": -1.2890625, | |
| "logits/rejected": -1.4609375, | |
| "logps/chosen": -458.0, | |
| "logps/rejected": -426.0, | |
| "loss": 0.1221, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.515625, | |
| "rewards/margins": 2.09375, | |
| "rewards/rejected": -4.625, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 15.397715588828959, | |
| "learning_rate": 6.012117164607347e-08, | |
| "logits/chosen": -0.90625, | |
| "logits/rejected": -1.4921875, | |
| "logps/chosen": -796.0, | |
| "logps/rejected": -708.0, | |
| "loss": 0.109, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5078125, | |
| "rewards/margins": 3.78125, | |
| "rewards/rejected": -5.3125, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 45.120668890615434, | |
| "learning_rate": 5.779530673261279e-08, | |
| "logits/chosen": -1.0703125, | |
| "logits/rejected": -1.8125, | |
| "logps/chosen": -612.0, | |
| "logps/rejected": -820.0, | |
| "loss": 0.0907, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7265625, | |
| "rewards/margins": 3.5625, | |
| "rewards/rejected": -5.3125, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 12.405391170841106, | |
| "learning_rate": 5.5509430810758817e-08, | |
| "logits/chosen": -1.0234375, | |
| "logits/rejected": -1.5703125, | |
| "logps/chosen": -800.0, | |
| "logps/rejected": -848.0, | |
| "loss": 0.1012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.28125, | |
| "rewards/margins": 3.375, | |
| "rewards/rejected": -5.65625, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 13.29830997717489, | |
| "learning_rate": 5.3264019466573053e-08, | |
| "logits/chosen": -1.03125, | |
| "logits/rejected": -1.8203125, | |
| "logps/chosen": -660.0, | |
| "logps/rejected": -588.0, | |
| "loss": 0.0953, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.859375, | |
| "rewards/margins": 3.640625, | |
| "rewards/rejected": -5.5, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 15.306039857091942, | |
| "learning_rate": 5.105953986729195e-08, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.2109375, | |
| "logps/chosen": -576.0, | |
| "logps/rejected": -732.0, | |
| "loss": 0.1057, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.4375, | |
| "rewards/margins": 3.140625, | |
| "rewards/rejected": -5.5625, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 17.90216325537495, | |
| "learning_rate": 4.889645066413112e-08, | |
| "logits/chosen": -1.125, | |
| "logits/rejected": -1.5546875, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -612.0, | |
| "loss": 0.1165, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.203125, | |
| "rewards/margins": 2.4375, | |
| "rewards/rejected": -4.625, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 13.665308623510128, | |
| "learning_rate": 4.67752018968606e-08, | |
| "logits/chosen": -1.1328125, | |
| "logits/rejected": -1.4765625, | |
| "logps/chosen": -624.0, | |
| "logps/rejected": -592.0, | |
| "loss": 0.0942, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.1875, | |
| "rewards/margins": 3.3125, | |
| "rewards/rejected": -5.5, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 14.773793343841884, | |
| "learning_rate": 4.4696234900172744e-08, | |
| "logits/chosen": -1.5703125, | |
| "logits/rejected": -1.0546875, | |
| "logps/chosen": -540.0, | |
| "logps/rejected": -824.0, | |
| "loss": 0.1132, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.4375, | |
| "rewards/margins": 3.46875, | |
| "rewards/rejected": -5.90625, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_logits/chosen": -1.0625, | |
| "eval_logits/rejected": -0.96484375, | |
| "eval_logps/chosen": -592.0, | |
| "eval_logps/rejected": -696.0, | |
| "eval_loss": 0.37350770831108093, | |
| "eval_rewards/accuracies": 0.75, | |
| "eval_rewards/chosen": -3.3125, | |
| "eval_rewards/margins": 2.09375, | |
| "eval_rewards/rejected": -5.40625, | |
| "eval_runtime": 49.8427, | |
| "eval_samples_per_second": 21.066, | |
| "eval_steps_per_second": 0.181, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 15.779889705391266, | |
| "learning_rate": 4.265998221186023e-08, | |
| "logits/chosen": -1.2421875, | |
| "logits/rejected": -1.1640625, | |
| "logps/chosen": -592.0, | |
| "logps/rejected": -560.0, | |
| "loss": 0.1073, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.546875, | |
| "rewards/margins": 3.03125, | |
| "rewards/rejected": -5.5625, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 27.097026355265392, | |
| "learning_rate": 4.0666867482825135e-08, | |
| "logits/chosen": -1.0859375, | |
| "logits/rejected": -1.0859375, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -548.0, | |
| "loss": 0.117, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.953125, | |
| "rewards/margins": 3.359375, | |
| "rewards/rejected": -5.3125, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 13.442767252473281, | |
| "learning_rate": 3.871730538893611e-08, | |
| "logits/chosen": -1.3515625, | |
| "logits/rejected": -1.9609375, | |
| "logps/chosen": -736.0, | |
| "logps/rejected": -740.0, | |
| "loss": 0.1108, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.421875, | |
| "rewards/margins": 3.328125, | |
| "rewards/rejected": -4.75, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 17.818418586870905, | |
| "learning_rate": 3.681170154475391e-08, | |
| "logits/chosen": -1.625, | |
| "logits/rejected": -1.546875, | |
| "logps/chosen": -442.0, | |
| "logps/rejected": -620.0, | |
| "loss": 0.1236, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.90625, | |
| "rewards/margins": 2.5625, | |
| "rewards/rejected": -4.46875, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 16.487242563455915, | |
| "learning_rate": 3.495045241914105e-08, | |
| "logits/chosen": -1.09375, | |
| "logits/rejected": -2.546875, | |
| "logps/chosen": -584.0, | |
| "logps/rejected": -672.0, | |
| "loss": 0.1079, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.25, | |
| "rewards/margins": 4.09375, | |
| "rewards/rejected": -6.34375, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 15.802897986414916, | |
| "learning_rate": 3.313394525277527e-08, | |
| "logits/chosen": -1.4609375, | |
| "logits/rejected": -1.3125, | |
| "logps/chosen": -482.0, | |
| "logps/rejected": -572.0, | |
| "loss": 0.0979, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.265625, | |
| "rewards/margins": 3.078125, | |
| "rewards/rejected": -5.34375, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 11.010895733144364, | |
| "learning_rate": 3.1362557977582e-08, | |
| "logits/chosen": -1.1953125, | |
| "logits/rejected": -1.2109375, | |
| "logps/chosen": -482.0, | |
| "logps/rejected": -506.0, | |
| "loss": 0.1009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.65625, | |
| "rewards/margins": 1.8515625, | |
| "rewards/rejected": -4.5, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 19.922568800825218, | |
| "learning_rate": 2.963665913810451e-08, | |
| "logits/chosen": -1.0078125, | |
| "logits/rejected": -2.40625, | |
| "logps/chosen": -712.0, | |
| "logps/rejected": -588.0, | |
| "loss": 0.1016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.34375, | |
| "rewards/margins": 2.796875, | |
| "rewards/rejected": -5.125, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 14.614675732714645, | |
| "learning_rate": 2.7956607814826366e-08, | |
| "logits/chosen": -1.2109375, | |
| "logits/rejected": -1.078125, | |
| "logps/chosen": -732.0, | |
| "logps/rejected": -712.0, | |
| "loss": 0.1244, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.15625, | |
| "rewards/margins": 3.171875, | |
| "rewards/rejected": -5.34375, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 13.459903874571827, | |
| "learning_rate": 2.632275354946342e-08, | |
| "logits/chosen": -0.9375, | |
| "logits/rejected": -2.328125, | |
| "logps/chosen": -470.0, | |
| "logps/rejected": -386.0, | |
| "loss": 0.1195, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.8359375, | |
| "rewards/margins": 2.0625, | |
| "rewards/rejected": -3.90625, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 11.287103509304053, | |
| "learning_rate": 2.4735436272239922e-08, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -2.484375, | |
| "logps/chosen": -500.0, | |
| "logps/rejected": -612.0, | |
| "loss": 0.096, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.03125, | |
| "rewards/margins": 3.71875, | |
| "rewards/rejected": -5.75, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 9.246071212037243, | |
| "learning_rate": 2.319498623116492e-08, | |
| "logits/chosen": -2.203125, | |
| "logits/rejected": -1.578125, | |
| "logps/chosen": -564.0, | |
| "logps/rejected": -796.0, | |
| "loss": 0.1098, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.125, | |
| "rewards/margins": 4.15625, | |
| "rewards/rejected": -7.28125, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 25.87859572275051, | |
| "learning_rate": 2.1701723923322673e-08, | |
| "logits/chosen": -1.7265625, | |
| "logits/rejected": -1.8984375, | |
| "logps/chosen": -516.0, | |
| "logps/rejected": -644.0, | |
| "loss": 0.1225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.484375, | |
| "rewards/margins": 3.4375, | |
| "rewards/rejected": -5.9375, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 22.24550487415097, | |
| "learning_rate": 2.0255960028191798e-08, | |
| "logits/chosen": -1.5703125, | |
| "logits/rejected": -1.703125, | |
| "logps/chosen": -502.0, | |
| "logps/rejected": -552.0, | |
| "loss": 0.1103, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.375, | |
| "rewards/margins": 3.0625, | |
| "rewards/rejected": -5.4375, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 22.983007966397018, | |
| "learning_rate": 1.8857995343007167e-08, | |
| "logits/chosen": -1.8125, | |
| "logits/rejected": -1.3828125, | |
| "logps/chosen": -728.0, | |
| "logps/rejected": -900.0, | |
| "loss": 0.1094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.96875, | |
| "rewards/margins": 3.96875, | |
| "rewards/rejected": -5.9375, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 25.097623669642626, | |
| "learning_rate": 1.7508120720177795e-08, | |
| "logits/chosen": -1.1796875, | |
| "logits/rejected": -1.03125, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -604.0, | |
| "loss": 0.1184, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.78125, | |
| "rewards/margins": 2.078125, | |
| "rewards/rejected": -4.875, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 32.20516783572916, | |
| "learning_rate": 1.6206617006773753e-08, | |
| "logits/chosen": -0.78515625, | |
| "logits/rejected": -2.078125, | |
| "logps/chosen": -736.0, | |
| "logps/rejected": -556.0, | |
| "loss": 0.1038, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.5078125, | |
| "rewards/margins": 2.828125, | |
| "rewards/rejected": -4.34375, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 11.93984865185982, | |
| "learning_rate": 1.4953754986094886e-08, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -1.6328125, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -580.0, | |
| "loss": 0.1095, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7109375, | |
| "rewards/margins": 2.640625, | |
| "rewards/rejected": -4.34375, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 30.015474257847476, | |
| "learning_rate": 1.3749795321332885e-08, | |
| "logits/chosen": -1.265625, | |
| "logits/rejected": -1.5390625, | |
| "logps/chosen": -664.0, | |
| "logps/rejected": -804.0, | |
| "loss": 0.1274, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.96875, | |
| "rewards/margins": 3.421875, | |
| "rewards/rejected": -5.40625, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 20.828265625017977, | |
| "learning_rate": 1.2594988501339665e-08, | |
| "logits/chosen": -1.1796875, | |
| "logits/rejected": -1.796875, | |
| "logps/chosen": -628.0, | |
| "logps/rejected": -684.0, | |
| "loss": 0.1094, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.71875, | |
| "rewards/margins": 3.109375, | |
| "rewards/rejected": -5.8125, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 22.34873903931498, | |
| "learning_rate": 1.148957478851173e-08, | |
| "logits/chosen": -1.515625, | |
| "logits/rejected": -1.375, | |
| "logps/chosen": -604.0, | |
| "logps/rejected": -572.0, | |
| "loss": 0.1136, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.328125, | |
| "rewards/margins": 2.515625, | |
| "rewards/rejected": -4.84375, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 15.346930767785905, | |
| "learning_rate": 1.0433784168802805e-08, | |
| "logits/chosen": -1.3125, | |
| "logits/rejected": -1.578125, | |
| "logps/chosen": -624.0, | |
| "logps/rejected": -820.0, | |
| "loss": 0.1239, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.03125, | |
| "rewards/margins": 3.015625, | |
| "rewards/rejected": -6.0625, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 16.847862725204944, | |
| "learning_rate": 9.427836303874115e-09, | |
| "logits/chosen": -1.1640625, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -568.0, | |
| "logps/rejected": -648.0, | |
| "loss": 0.1139, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.921875, | |
| "rewards/margins": 4.0, | |
| "rewards/rejected": -6.9375, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 11.648381674685714, | |
| "learning_rate": 8.47194048539307e-09, | |
| "logits/chosen": -1.015625, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -880.0, | |
| "logps/rejected": -708.0, | |
| "loss": 0.114, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.515625, | |
| "rewards/margins": 3.515625, | |
| "rewards/rejected": -6.03125, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 18.35912272257459, | |
| "learning_rate": 7.566295591489052e-09, | |
| "logits/chosen": -1.4765625, | |
| "logits/rejected": -1.421875, | |
| "logps/chosen": -604.0, | |
| "logps/rejected": -676.0, | |
| "loss": 0.1191, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9140625, | |
| "rewards/margins": 3.015625, | |
| "rewards/rejected": -4.9375, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 13.681536454598728, | |
| "learning_rate": 6.71109004537615e-09, | |
| "logits/chosen": -1.125, | |
| "logits/rejected": -1.1875, | |
| "logps/chosen": -604.0, | |
| "logps/rejected": -664.0, | |
| "loss": 0.0951, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.46875, | |
| "rewards/margins": 3.0625, | |
| "rewards/rejected": -5.53125, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 9.950566904078942, | |
| "learning_rate": 5.906501776150763e-09, | |
| "logits/chosen": -1.0, | |
| "logits/rejected": -2.921875, | |
| "logps/chosen": -712.0, | |
| "logps/rejected": -600.0, | |
| "loss": 0.1083, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8515625, | |
| "rewards/margins": 3.109375, | |
| "rewards/rejected": -4.96875, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 22.085690818869246, | |
| "learning_rate": 5.152698181772857e-09, | |
| "logits/chosen": -1.140625, | |
| "logits/rejected": -1.546875, | |
| "logps/chosen": -572.0, | |
| "logps/rejected": -760.0, | |
| "loss": 0.1072, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.125, | |
| "rewards/margins": 2.71875, | |
| "rewards/rejected": -4.84375, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 21.353543333604414, | |
| "learning_rate": 4.449836094238019e-09, | |
| "logits/chosen": -1.171875, | |
| "logits/rejected": -1.890625, | |
| "logps/chosen": -620.0, | |
| "logps/rejected": -486.0, | |
| "loss": 0.0966, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9453125, | |
| "rewards/margins": 3.21875, | |
| "rewards/rejected": -5.15625, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 11.392655664834834, | |
| "learning_rate": 3.798061746947995e-09, | |
| "logits/chosen": -1.09375, | |
| "logits/rejected": -1.546875, | |
| "logps/chosen": -500.0, | |
| "logps/rejected": -568.0, | |
| "loss": 0.1108, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.40625, | |
| "rewards/margins": 2.078125, | |
| "rewards/rejected": -4.5, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 16.899400188846528, | |
| "learning_rate": 3.1975107442860637e-09, | |
| "logits/chosen": -1.953125, | |
| "logits/rejected": -1.453125, | |
| "logps/chosen": -494.0, | |
| "logps/rejected": -928.0, | |
| "loss": 0.6379, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.453125, | |
| "rewards/margins": 4.59375, | |
| "rewards/rejected": -7.0625, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 18.0993042603957, | |
| "learning_rate": 2.6483080334041287e-09, | |
| "logits/chosen": -1.328125, | |
| "logits/rejected": -1.453125, | |
| "logps/chosen": -652.0, | |
| "logps/rejected": -816.0, | |
| "loss": 0.0927, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.078125, | |
| "rewards/margins": 3.625, | |
| "rewards/rejected": -5.6875, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 18.326061508758997, | |
| "learning_rate": 2.1505678782269e-09, | |
| "logits/chosen": -1.3125, | |
| "logits/rejected": -2.1875, | |
| "logps/chosen": -668.0, | |
| "logps/rejected": -572.0, | |
| "loss": 0.1068, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.125, | |
| "rewards/margins": 3.1875, | |
| "rewards/rejected": -5.3125, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 20.00896083752618, | |
| "learning_rate": 1.7043938356787467e-09, | |
| "logits/chosen": -1.4921875, | |
| "logits/rejected": -1.3125, | |
| "logps/chosen": -354.0, | |
| "logps/rejected": -556.0, | |
| "loss": 0.1215, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.109375, | |
| "rewards/margins": 2.765625, | |
| "rewards/rejected": -4.875, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 33.844584510123674, | |
| "learning_rate": 1.30987873413832e-09, | |
| "logits/chosen": -0.94140625, | |
| "logits/rejected": -1.109375, | |
| "logps/chosen": -588.0, | |
| "logps/rejected": -660.0, | |
| "loss": 0.1078, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.375, | |
| "rewards/margins": 2.859375, | |
| "rewards/rejected": -5.21875, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 13.322271542703083, | |
| "learning_rate": 9.671046541251393e-10, | |
| "logits/chosen": -1.09375, | |
| "logits/rejected": -2.046875, | |
| "logps/chosen": -672.0, | |
| "logps/rejected": -576.0, | |
| "loss": 0.1159, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7890625, | |
| "rewards/margins": 3.765625, | |
| "rewards/rejected": -5.5625, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 28.085054792653725, | |
| "learning_rate": 6.761429112225326e-10, | |
| "logits/chosen": -1.1484375, | |
| "logits/rejected": -0.7734375, | |
| "logps/chosen": -688.0, | |
| "logps/rejected": -908.0, | |
| "loss": 0.0952, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8203125, | |
| "rewards/margins": 3.71875, | |
| "rewards/rejected": -5.53125, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 20.97872609812394, | |
| "learning_rate": 4.370540412399759e-10, | |
| "logits/chosen": -1.859375, | |
| "logits/rejected": -1.8125, | |
| "logps/chosen": -564.0, | |
| "logps/rejected": -684.0, | |
| "loss": 0.1135, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.328125, | |
| "rewards/margins": 3.609375, | |
| "rewards/rejected": -5.9375, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 19.210485937449167, | |
| "learning_rate": 2.498877876184191e-10, | |
| "logits/chosen": -1.4765625, | |
| "logits/rejected": -1.046875, | |
| "logps/chosen": -688.0, | |
| "logps/rejected": -664.0, | |
| "loss": 0.1025, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.6015625, | |
| "rewards/margins": 3.125, | |
| "rewards/rejected": -4.71875, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 12.682964834422256, | |
| "learning_rate": 1.1468309108100816e-10, | |
| "logits/chosen": -1.1171875, | |
| "logits/rejected": -1.3125, | |
| "logps/chosen": -456.0, | |
| "logps/rejected": -540.0, | |
| "loss": 0.1049, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.0, | |
| "rewards/margins": 3.109375, | |
| "rewards/rejected": -6.125, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 16.080027717160323, | |
| "learning_rate": 3.146808153123293e-11, | |
| "logits/chosen": -1.234375, | |
| "logits/rejected": -2.03125, | |
| "logps/chosen": -492.0, | |
| "logps/rejected": -532.0, | |
| "loss": 0.1196, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.109375, | |
| "rewards/margins": 2.046875, | |
| "rewards/rejected": -4.15625, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 13.95546245482141, | |
| "learning_rate": 2.60072200469752e-13, | |
| "logits/chosen": -1.2265625, | |
| "logits/rejected": -2.421875, | |
| "logps/chosen": -624.0, | |
| "logps/rejected": -482.0, | |
| "loss": 0.1097, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.34375, | |
| "rewards/margins": 2.84375, | |
| "rewards/rejected": -5.1875, | |
| "step": 2420 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2421, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |