| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.008502949460594144, | |
| "eval_steps": 500, | |
| "global_step": 40, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.796875, | |
| "epoch": 0.00021257373651485358, | |
| "grad_norm": 0.4854881763458252, | |
| "kl": 9.614229202270508e-05, | |
| "learning_rate": 9.997874149659865e-07, | |
| "loss": 0.0, | |
| "reward": 2.732285737991333, | |
| "reward_std": 0.02619727296405472, | |
| "rewards/format_reward_hoi_key": 0.9139583259820938, | |
| "rewards/format_reward_hoi_object_label": 0.8222222253680229, | |
| "rewards/format_reward_hoi_verb_label": 0.3161458373069763, | |
| "rewards/hoi_iou_reward": 0.6799592822790146, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 173.3125, | |
| "epoch": 0.00042514747302970716, | |
| "grad_norm": 0.6831408739089966, | |
| "kl": 1.3329088687896729e-05, | |
| "learning_rate": 9.995748299319728e-07, | |
| "loss": 0.0, | |
| "reward": 2.8274163007736206, | |
| "reward_std": 0.03815040903282352, | |
| "rewards/format_reward_hoi_key": 0.8166666775941849, | |
| "rewards/format_reward_hoi_object_label": 0.7916666567325592, | |
| "rewards/format_reward_hoi_verb_label": 0.5974702388048172, | |
| "rewards/hoi_iou_reward": 0.6216127127408981, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 166.03125, | |
| "epoch": 0.0006377212095445608, | |
| "grad_norm": 0.8407193422317505, | |
| "kl": 0.00014454126358032227, | |
| "learning_rate": 9.99362244897959e-07, | |
| "loss": 0.0, | |
| "reward": 2.986231029033661, | |
| "reward_std": 0.0052611194987548515, | |
| "rewards/format_reward_hoi_key": 0.8208333402872086, | |
| "rewards/format_reward_hoi_object_label": 0.84375, | |
| "rewards/format_reward_hoi_verb_label": 0.6927083432674408, | |
| "rewards/hoi_iou_reward": 0.6289393231272697, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 185.703125, | |
| "epoch": 0.0008502949460594143, | |
| "grad_norm": 2.3168516159057617, | |
| "kl": 0.00014531612396240234, | |
| "learning_rate": 9.991496598639456e-07, | |
| "loss": 0.0, | |
| "reward": 2.3956105709075928, | |
| "reward_std": 0.045728508091997355, | |
| "rewards/format_reward_hoi_key": 0.7395220696926117, | |
| "rewards/format_reward_hoi_object_label": 0.59375, | |
| "rewards/format_reward_hoi_verb_label": 0.5073784738779068, | |
| "rewards/hoi_iou_reward": 0.5549599975347519, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 180.421875, | |
| "epoch": 0.001062868682574268, | |
| "grad_norm": 0.5881515741348267, | |
| "kl": 0.00014841556549072266, | |
| "learning_rate": 9.989370748299319e-07, | |
| "loss": 0.0, | |
| "reward": 2.2462641298770905, | |
| "reward_std": 0.14320564700756222, | |
| "rewards/format_reward_hoi_key": 0.7350446432828903, | |
| "rewards/format_reward_hoi_object_label": 0.4899553433060646, | |
| "rewards/format_reward_hoi_verb_label": 0.5563345961272717, | |
| "rewards/hoi_iou_reward": 0.46492957696318626, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 208.28125, | |
| "epoch": 0.0012754424190891216, | |
| "grad_norm": 0.29585161805152893, | |
| "kl": 0.0001379847526550293, | |
| "learning_rate": 9.987244897959182e-07, | |
| "loss": 0.0, | |
| "reward": 2.1843446791172028, | |
| "reward_std": 0.005820542646688409, | |
| "rewards/format_reward_hoi_key": 0.8457291722297668, | |
| "rewards/format_reward_hoi_object_label": 0.6000000089406967, | |
| "rewards/format_reward_hoi_verb_label": 0.1180555634200573, | |
| "rewards/hoi_iou_reward": 0.6205599009990692, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 164.84375, | |
| "epoch": 0.0014880161556039752, | |
| "grad_norm": 0.5830075144767761, | |
| "kl": 0.00010955333709716797, | |
| "learning_rate": 9.985119047619047e-07, | |
| "loss": -0.0, | |
| "reward": 2.5442887246608734, | |
| "reward_std": 0.11149050580570474, | |
| "rewards/format_reward_hoi_key": 0.7979166656732559, | |
| "rewards/format_reward_hoi_object_label": 0.7083333358168602, | |
| "rewards/format_reward_hoi_verb_label": 0.4583333358168602, | |
| "rewards/hoi_iou_reward": 0.5797053650021553, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 167.234375, | |
| "epoch": 0.0017005898921188286, | |
| "grad_norm": 0.35756170749664307, | |
| "kl": 8.910894393920898e-05, | |
| "learning_rate": 9.982993197278912e-07, | |
| "loss": 0.0, | |
| "reward": 2.5064347982406616, | |
| "reward_std": 0.0026310062530683354, | |
| "rewards/format_reward_hoi_key": 0.7702381014823914, | |
| "rewards/format_reward_hoi_object_label": 0.595362103311345, | |
| "rewards/format_reward_hoi_verb_label": 0.5941220238455571, | |
| "rewards/hoi_iou_reward": 0.546712551265955, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 188.484375, | |
| "epoch": 0.0019131636286336823, | |
| "grad_norm": 1.1468232870101929, | |
| "kl": 0.00023877620697021484, | |
| "learning_rate": 9.980867346938775e-07, | |
| "loss": 0.0, | |
| "reward": 2.93448406457901, | |
| "reward_std": 0.07516021025367081, | |
| "rewards/format_reward_hoi_key": 0.90625, | |
| "rewards/format_reward_hoi_object_label": 0.79296875, | |
| "rewards/format_reward_hoi_verb_label": 0.447916679084301, | |
| "rewards/hoi_iou_reward": 0.7873486280441284, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 188.921875, | |
| "epoch": 0.002125737365148536, | |
| "grad_norm": 0.608834445476532, | |
| "kl": 0.0003757476806640625, | |
| "learning_rate": 9.97874149659864e-07, | |
| "loss": -0.0, | |
| "reward": 2.309541165828705, | |
| "reward_std": 0.04332686646375805, | |
| "rewards/format_reward_hoi_key": 0.7756249904632568, | |
| "rewards/format_reward_hoi_object_label": 0.5166666656732559, | |
| "rewards/format_reward_hoi_verb_label": 0.46510415710508823, | |
| "rewards/hoi_iou_reward": 0.5521453768014908, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 156.34375, | |
| "epoch": 0.0023383111016633895, | |
| "grad_norm": 1.079801321029663, | |
| "kl": 0.0002917051315307617, | |
| "learning_rate": 9.976615646258503e-07, | |
| "loss": -0.0, | |
| "reward": 2.9021179378032684, | |
| "reward_std": 0.06573383091017604, | |
| "rewards/format_reward_hoi_key": 0.9125000089406967, | |
| "rewards/format_reward_hoi_object_label": 0.75, | |
| "rewards/format_reward_hoi_verb_label": 0.5, | |
| "rewards/hoi_iou_reward": 0.7396180182695389, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 174.46875, | |
| "epoch": 0.002550884838178243, | |
| "grad_norm": 0.6156663298606873, | |
| "kl": 0.0005776882171630859, | |
| "learning_rate": 9.974489795918366e-07, | |
| "loss": 0.0, | |
| "reward": 2.3791774213314056, | |
| "reward_std": 0.0850577435339801, | |
| "rewards/format_reward_hoi_key": 0.7312500178813934, | |
| "rewards/format_reward_hoi_object_label": 0.5208333358168602, | |
| "rewards/format_reward_hoi_verb_label": 0.5911458358168602, | |
| "rewards/hoi_iou_reward": 0.535948283970356, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 167.578125, | |
| "epoch": 0.002763458574693097, | |
| "grad_norm": 3.2466673851013184, | |
| "kl": 0.0002658367156982422, | |
| "learning_rate": 9.972363945578231e-07, | |
| "loss": 0.0, | |
| "reward": 3.0418315529823303, | |
| "reward_std": 0.013055827002972364, | |
| "rewards/format_reward_hoi_key": 0.9000000059604645, | |
| "rewards/format_reward_hoi_object_label": 0.8125, | |
| "rewards/format_reward_hoi_verb_label": 0.625, | |
| "rewards/hoi_iou_reward": 0.704331636428833, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 167.4375, | |
| "epoch": 0.0029760323112079505, | |
| "grad_norm": 0.5669279098510742, | |
| "kl": 0.0004019737243652344, | |
| "learning_rate": 9.970238095238094e-07, | |
| "loss": 0.0, | |
| "reward": 2.4804917573928833, | |
| "reward_std": 0.08349880830792245, | |
| "rewards/format_reward_hoi_key": 0.7427083253860474, | |
| "rewards/format_reward_hoi_object_label": 0.697916679084301, | |
| "rewards/format_reward_hoi_verb_label": 0.483333345502615, | |
| "rewards/hoi_iou_reward": 0.5565334260463715, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 198.625, | |
| "epoch": 0.0031886060477228037, | |
| "grad_norm": 0.27379515767097473, | |
| "kl": 0.00033855438232421875, | |
| "learning_rate": 9.968112244897957e-07, | |
| "loss": 0.0, | |
| "reward": 2.1904609203338623, | |
| "reward_std": 0.06255148959462531, | |
| "rewards/format_reward_hoi_key": 0.7820312678813934, | |
| "rewards/format_reward_hoi_object_label": 0.6083984375, | |
| "rewards/format_reward_hoi_verb_label": 0.3639322891831398, | |
| "rewards/hoi_iou_reward": 0.4360988959670067, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 242.234375, | |
| "epoch": 0.0034011797842376573, | |
| "grad_norm": 0.2515924870967865, | |
| "kl": 0.0006353855133056641, | |
| "learning_rate": 9.965986394557822e-07, | |
| "loss": 0.0, | |
| "reward": 2.716467797756195, | |
| "reward_std": 0.07810639549279585, | |
| "rewards/format_reward_hoi_key": 0.7664583474397659, | |
| "rewards/format_reward_hoi_object_label": 0.6187500059604645, | |
| "rewards/format_reward_hoi_verb_label": 0.6677083224058151, | |
| "rewards/hoi_iou_reward": 0.663551077246666, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 226.859375, | |
| "epoch": 0.003613753520752511, | |
| "grad_norm": 0.5136963725090027, | |
| "kl": 0.0003933906555175781, | |
| "learning_rate": 9.963860544217688e-07, | |
| "loss": 0.0, | |
| "reward": 2.071069449186325, | |
| "reward_std": 0.06459418445592746, | |
| "rewards/format_reward_hoi_key": 0.6252120807766914, | |
| "rewards/format_reward_hoi_object_label": 0.5837053582072258, | |
| "rewards/format_reward_hoi_verb_label": 0.4394965320825577, | |
| "rewards/hoi_iou_reward": 0.4226554408669472, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 277.625, | |
| "epoch": 0.0038263272572673646, | |
| "grad_norm": 0.6188202500343323, | |
| "kl": 0.0002378225326538086, | |
| "learning_rate": 9.96173469387755e-07, | |
| "loss": 0.0, | |
| "reward": 3.0354496240615845, | |
| "reward_std": 0.30482952669262886, | |
| "rewards/format_reward_hoi_key": 0.8430059552192688, | |
| "rewards/format_reward_hoi_object_label": 0.8227306753396988, | |
| "rewards/format_reward_hoi_verb_label": 0.5986328125, | |
| "rewards/hoi_iou_reward": 0.7710802108049393, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 151.75, | |
| "epoch": 0.004038900993782219, | |
| "grad_norm": 0.27549490332603455, | |
| "kl": 0.0006313323974609375, | |
| "learning_rate": 9.959608843537416e-07, | |
| "loss": -0.0, | |
| "reward": 2.0183950662612915, | |
| "reward_std": 0.015180108457570896, | |
| "rewards/format_reward_hoi_key": 0.6604166775941849, | |
| "rewards/format_reward_hoi_object_label": 0.5416666716337204, | |
| "rewards/format_reward_hoi_verb_label": 0.3524305671453476, | |
| "rewards/hoi_iou_reward": 0.4638812467455864, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 224.078125, | |
| "epoch": 0.004251474730297072, | |
| "grad_norm": 0.56353759765625, | |
| "kl": 0.0008664131164550781, | |
| "learning_rate": 9.957482993197279e-07, | |
| "loss": 0.0, | |
| "reward": 2.617310881614685, | |
| "reward_std": 0.185114907566458, | |
| "rewards/format_reward_hoi_key": 0.7604167088866234, | |
| "rewards/format_reward_hoi_object_label": 0.6744791641831398, | |
| "rewards/format_reward_hoi_verb_label": 0.5677083283662796, | |
| "rewards/hoi_iou_reward": 0.6147066801786423, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 174.09375, | |
| "epoch": 0.004464048466811925, | |
| "grad_norm": 0.31322988867759705, | |
| "kl": 0.0007352828979492188, | |
| "learning_rate": 9.955357142857142e-07, | |
| "loss": 0.0, | |
| "reward": 2.9305796921253204, | |
| "reward_std": 0.01013911364134401, | |
| "rewards/format_reward_hoi_key": 0.8696428686380386, | |
| "rewards/format_reward_hoi_object_label": 0.7857142835855484, | |
| "rewards/format_reward_hoi_verb_label": 0.552300363779068, | |
| "rewards/hoi_iou_reward": 0.7229221612215042, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 162.0, | |
| "epoch": 0.004676622203326779, | |
| "grad_norm": 0.4496309161186218, | |
| "kl": 0.001049041748046875, | |
| "learning_rate": 9.953231292517007e-07, | |
| "loss": 0.0, | |
| "reward": 2.2096868455410004, | |
| "reward_std": 0.0113821976701729, | |
| "rewards/format_reward_hoi_key": 0.7333928644657135, | |
| "rewards/format_reward_hoi_object_label": 0.6169642880558968, | |
| "rewards/format_reward_hoi_verb_label": 0.2777777761220932, | |
| "rewards/hoi_iou_reward": 0.5815519690513611, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 154.609375, | |
| "epoch": 0.004889195939841632, | |
| "grad_norm": 0.8822689652442932, | |
| "kl": 0.0013833045959472656, | |
| "learning_rate": 9.95110544217687e-07, | |
| "loss": 0.0, | |
| "reward": 3.247895896434784, | |
| "reward_std": 0.04373934442992322, | |
| "rewards/format_reward_hoi_key": 0.9250000268220901, | |
| "rewards/format_reward_hoi_object_label": 0.9583333283662796, | |
| "rewards/format_reward_hoi_verb_label": 0.6562499850988388, | |
| "rewards/hoi_iou_reward": 0.708312600851059, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 410.328125, | |
| "epoch": 0.005101769676356486, | |
| "grad_norm": 0.7035483121871948, | |
| "kl": 0.0004572868347167969, | |
| "learning_rate": 9.948979591836735e-07, | |
| "loss": 0.0, | |
| "reward": 2.264761805534363, | |
| "reward_std": 0.28715356811881065, | |
| "rewards/format_reward_hoi_key": 0.6794504672288895, | |
| "rewards/format_reward_hoi_object_label": 0.5326923131942749, | |
| "rewards/format_reward_hoi_verb_label": 0.6280448734760284, | |
| "rewards/hoi_iou_reward": 0.42457417771220207, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 227.59375, | |
| "epoch": 0.0053143434128713396, | |
| "grad_norm": 0.31094542145729065, | |
| "kl": 0.0009341239929199219, | |
| "learning_rate": 9.946853741496598e-07, | |
| "loss": 0.0, | |
| "reward": 2.356251895427704, | |
| "reward_std": 0.003799198704655282, | |
| "rewards/format_reward_hoi_key": 0.767708346247673, | |
| "rewards/format_reward_hoi_object_label": 0.4895833432674408, | |
| "rewards/format_reward_hoi_verb_label": 0.5043560639023781, | |
| "rewards/hoi_iou_reward": 0.5946041345596313, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 181.375, | |
| "epoch": 0.005526917149386194, | |
| "grad_norm": 0.5995525121688843, | |
| "kl": 0.00150299072265625, | |
| "learning_rate": 9.944727891156463e-07, | |
| "loss": 0.0001, | |
| "reward": 2.6978970766067505, | |
| "reward_std": 0.13544296027976088, | |
| "rewards/format_reward_hoi_key": 0.8333333432674408, | |
| "rewards/format_reward_hoi_object_label": 0.6158854141831398, | |
| "rewards/format_reward_hoi_verb_label": 0.5898437350988388, | |
| "rewards/hoi_iou_reward": 0.6588345021009445, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 189.03125, | |
| "epoch": 0.005739490885901047, | |
| "grad_norm": 0.5540001392364502, | |
| "kl": 0.000858306884765625, | |
| "learning_rate": 9.942602040816326e-07, | |
| "loss": 0.0001, | |
| "reward": 3.355882227420807, | |
| "reward_std": 0.005877207615412772, | |
| "rewards/format_reward_hoi_key": 0.9535714238882065, | |
| "rewards/format_reward_hoi_object_label": 0.9017857313156128, | |
| "rewards/format_reward_hoi_verb_label": 0.7232142835855484, | |
| "rewards/hoi_iou_reward": 0.7773108184337616, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 211.90625, | |
| "epoch": 0.005952064622415901, | |
| "grad_norm": 2.0975677967071533, | |
| "kl": 0.001495361328125, | |
| "learning_rate": 9.940476190476191e-07, | |
| "loss": 0.0001, | |
| "reward": 2.007324628531933, | |
| "reward_std": 0.03993106237612665, | |
| "rewards/format_reward_hoi_key": 0.5873221457004547, | |
| "rewards/format_reward_hoi_object_label": 0.44114159047603607, | |
| "rewards/format_reward_hoi_verb_label": 0.5036415904760361, | |
| "rewards/hoi_iou_reward": 0.47521928139030933, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 165.1875, | |
| "epoch": 0.006164638358930754, | |
| "grad_norm": 0.37749311327934265, | |
| "kl": 0.0019092559814453125, | |
| "learning_rate": 9.938350340136054e-07, | |
| "loss": 0.0001, | |
| "reward": 2.2582033574581146, | |
| "reward_std": 0.08065436300239526, | |
| "rewards/format_reward_hoi_key": 0.6932291835546494, | |
| "rewards/format_reward_hoi_object_label": 0.59375, | |
| "rewards/format_reward_hoi_verb_label": 0.3541666641831398, | |
| "rewards/hoi_iou_reward": 0.6170575618743896, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 184.59375, | |
| "epoch": 0.006377212095445607, | |
| "grad_norm": 0.3330162763595581, | |
| "kl": 0.0014448165893554688, | |
| "learning_rate": 9.936224489795917e-07, | |
| "loss": 0.0, | |
| "reward": 2.6335054636001587, | |
| "reward_std": 0.0012341497422312386, | |
| "rewards/format_reward_hoi_key": 0.8750000149011612, | |
| "rewards/format_reward_hoi_object_label": 0.6875, | |
| "rewards/format_reward_hoi_verb_label": 0.3880208358168602, | |
| "rewards/hoi_iou_reward": 0.6829846650362015, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 172.125, | |
| "epoch": 0.006589785831960461, | |
| "grad_norm": 0.8369670510292053, | |
| "kl": 0.0013284683227539062, | |
| "learning_rate": 9.934098639455782e-07, | |
| "loss": 0.0001, | |
| "reward": 2.4850784838199615, | |
| "reward_std": 0.02788396377582103, | |
| "rewards/format_reward_hoi_key": 0.8687500208616257, | |
| "rewards/format_reward_hoi_object_label": 0.4687500074505806, | |
| "rewards/format_reward_hoi_verb_label": 0.5, | |
| "rewards/hoi_iou_reward": 0.6475784331560135, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 216.046875, | |
| "epoch": 0.006802359568475315, | |
| "grad_norm": 0.9224941730499268, | |
| "kl": 0.00140380859375, | |
| "learning_rate": 9.931972789115645e-07, | |
| "loss": 0.0, | |
| "reward": 2.751905083656311, | |
| "reward_std": 0.08277821098454297, | |
| "rewards/format_reward_hoi_key": 0.809895858168602, | |
| "rewards/format_reward_hoi_object_label": 0.5078125, | |
| "rewards/format_reward_hoi_verb_label": 0.6927083283662796, | |
| "rewards/hoi_iou_reward": 0.7414884492754936, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 210.71875, | |
| "epoch": 0.007014933304990169, | |
| "grad_norm": 0.39392945170402527, | |
| "kl": 0.002300262451171875, | |
| "learning_rate": 9.92984693877551e-07, | |
| "loss": 0.0001, | |
| "reward": 2.1440170407295227, | |
| "reward_std": 0.02253561234101653, | |
| "rewards/format_reward_hoi_key": 0.9121875166893005, | |
| "rewards/format_reward_hoi_object_label": 0.3125, | |
| "rewards/format_reward_hoi_verb_label": 0.3333333358168602, | |
| "rewards/hoi_iou_reward": 0.5859961807727814, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 194.265625, | |
| "epoch": 0.007227507041505022, | |
| "grad_norm": 0.5018682479858398, | |
| "kl": 0.0017986297607421875, | |
| "learning_rate": 9.927721088435373e-07, | |
| "loss": 0.0001, | |
| "reward": 2.5592292845249176, | |
| "reward_std": 0.00986732606543228, | |
| "rewards/format_reward_hoi_key": 0.7691666930913925, | |
| "rewards/format_reward_hoi_object_label": 0.6583333313465118, | |
| "rewards/format_reward_hoi_verb_label": 0.5562499985098839, | |
| "rewards/hoi_iou_reward": 0.5754793435335159, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 205.03125, | |
| "epoch": 0.007440080778019876, | |
| "grad_norm": 0.6149845719337463, | |
| "kl": 0.0016880035400390625, | |
| "learning_rate": 9.925595238095238e-07, | |
| "loss": 0.0001, | |
| "reward": 2.778216004371643, | |
| "reward_std": 0.11917518911650404, | |
| "rewards/format_reward_hoi_key": 0.8614583313465118, | |
| "rewards/format_reward_hoi_object_label": 0.7333333194255829, | |
| "rewards/format_reward_hoi_verb_label": 0.5011574029922485, | |
| "rewards/hoi_iou_reward": 0.6822669506072998, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 222.671875, | |
| "epoch": 0.007652654514534729, | |
| "grad_norm": 0.5013077259063721, | |
| "kl": 0.0017242431640625, | |
| "learning_rate": 9.923469387755101e-07, | |
| "loss": 0.0001, | |
| "reward": 2.724997416138649, | |
| "reward_std": 0.007125564094167203, | |
| "rewards/format_reward_hoi_key": 0.8181547522544861, | |
| "rewards/format_reward_hoi_object_label": 0.6875, | |
| "rewards/format_reward_hoi_verb_label": 0.625, | |
| "rewards/hoi_iou_reward": 0.5943426117300987, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 249.65625, | |
| "epoch": 0.007865228251049582, | |
| "grad_norm": 0.4427626430988312, | |
| "kl": 0.00135040283203125, | |
| "learning_rate": 9.921343537414967e-07, | |
| "loss": 0.0001, | |
| "reward": 2.5683979988098145, | |
| "reward_std": 0.05630575024406426, | |
| "rewards/format_reward_hoi_key": 0.810416653752327, | |
| "rewards/format_reward_hoi_object_label": 0.625, | |
| "rewards/format_reward_hoi_verb_label": 0.3906250111758709, | |
| "rewards/hoi_iou_reward": 0.7423563152551651, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 270.921875, | |
| "epoch": 0.008077801987564437, | |
| "grad_norm": 2.8067455291748047, | |
| "kl": 0.0019435882568359375, | |
| "learning_rate": 9.91921768707483e-07, | |
| "loss": 0.0001, | |
| "reward": 2.22263365983963, | |
| "reward_std": 0.20146464882418513, | |
| "rewards/format_reward_hoi_key": 0.6945772171020508, | |
| "rewards/format_reward_hoi_object_label": 0.3977022171020508, | |
| "rewards/format_reward_hoi_verb_label": 0.5460824370384216, | |
| "rewards/hoi_iou_reward": 0.5842718333005905, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 211.40625, | |
| "epoch": 0.00829037572407929, | |
| "grad_norm": 0.5841067433357239, | |
| "kl": 0.00464630126953125, | |
| "learning_rate": 9.917091836734693e-07, | |
| "loss": 0.0002, | |
| "reward": 2.7285755276679993, | |
| "reward_std": 0.15019595221383497, | |
| "rewards/format_reward_hoi_key": 0.931383952498436, | |
| "rewards/format_reward_hoi_object_label": 0.5188244059681892, | |
| "rewards/format_reward_hoi_verb_label": 0.5774181559681892, | |
| "rewards/hoi_iou_reward": 0.7009490430355072, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 235.15625, | |
| "epoch": 0.008502949460594144, | |
| "grad_norm": 0.3903954327106476, | |
| "kl": 0.0014748573303222656, | |
| "learning_rate": 9.914965986394558e-07, | |
| "loss": 0.0001, | |
| "reward": 2.41436231136322, | |
| "reward_std": 0.031614198378520086, | |
| "rewards/format_reward_hoi_key": 0.7393315136432648, | |
| "rewards/format_reward_hoi_object_label": 0.6360462605953217, | |
| "rewards/format_reward_hoi_verb_label": 0.5051649361848831, | |
| "rewards/hoi_iou_reward": 0.5338196456432343, | |
| "step": 40 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 4704, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 20, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |