| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.999559277214632, | |
| "eval_steps": 500, | |
| "global_step": 567, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 477.0, | |
| "completions/max_terminated_length": 477.0, | |
| "completions/mean_length": 175.50390625, | |
| "completions/mean_terminated_length": 175.50390625, | |
| "completions/min_length": 21.0, | |
| "completions/min_terminated_length": 21.0, | |
| "epoch": 0.0017628911414720142, | |
| "grad_norm": 1.0880173896572545, | |
| "kl": 0.0, | |
| "learning_rate": 0.0, | |
| "loss": -0.327, | |
| "num_tokens": 129409.0, | |
| "reward": 0.814777672290802, | |
| "reward_std": 0.14736539125442505, | |
| "rewards/format_reward/mean": 0.68359375, | |
| "rewards/format_reward/std": 0.4659844934940338, | |
| "rewards/qatch_metrics/mean": 0.8332747220993042, | |
| "rewards/qatch_metrics/std": 0.3284282088279724, | |
| "rewards/tag_count_reward/mean": 0.7626953125, | |
| "rewards/tag_count_reward/std": 0.34948837757110596, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 421.0, | |
| "completions/max_terminated_length": 421.0, | |
| "completions/mean_length": 177.318359375, | |
| "completions/mean_terminated_length": 177.318359375, | |
| "completions/min_length": 21.5, | |
| "completions/min_terminated_length": 21.5, | |
| "epoch": 0.00881445570736007, | |
| "grad_norm": 0.9499188530188546, | |
| "kl": 0.00019824504852294922, | |
| "learning_rate": 7.017543859649122e-08, | |
| "loss": -0.2902, | |
| "num_tokens": 685703.0, | |
| "reward": 0.762174516916275, | |
| "reward_std": 0.15002675727009773, | |
| "rewards/format_reward/mean": 0.7265625, | |
| "rewards/format_reward/std": 0.4450720399618149, | |
| "rewards/qatch_metrics/mean": 0.7644235193729401, | |
| "rewards/qatch_metrics/std": 0.3610532283782959, | |
| "rewards/tag_count_reward/mean": 0.795166015625, | |
| "rewards/tag_count_reward/std": 0.33385463058948517, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 438.8, | |
| "completions/max_terminated_length": 438.8, | |
| "completions/mean_length": 173.41171875, | |
| "completions/mean_terminated_length": 173.41171875, | |
| "completions/min_length": 21.8, | |
| "completions/min_terminated_length": 21.8, | |
| "epoch": 0.01762891141472014, | |
| "grad_norm": 0.9346895582900878, | |
| "kl": 0.00028295516967773436, | |
| "learning_rate": 1.5789473684210525e-07, | |
| "loss": -0.2591, | |
| "num_tokens": 1398566.0, | |
| "reward": 0.7710299372673035, | |
| "reward_std": 0.1539353460073471, | |
| "rewards/format_reward/mean": 0.71796875, | |
| "rewards/format_reward/std": 0.4487275779247284, | |
| "rewards/qatch_metrics/mean": 0.7762346506118775, | |
| "rewards/qatch_metrics/std": 0.3281721532344818, | |
| "rewards/tag_count_reward/mean": 0.788671875, | |
| "rewards/tag_count_reward/std": 0.33627479076385497, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 438.2, | |
| "completions/max_terminated_length": 438.2, | |
| "completions/mean_length": 183.1796875, | |
| "completions/mean_terminated_length": 183.1796875, | |
| "completions/min_length": 20.0, | |
| "completions/min_terminated_length": 20.0, | |
| "epoch": 0.026443367122080213, | |
| "grad_norm": 0.7943318239924386, | |
| "kl": 0.00037631988525390627, | |
| "learning_rate": 2.456140350877193e-07, | |
| "loss": -0.2603, | |
| "num_tokens": 2071996.0, | |
| "reward": 0.7256837129592896, | |
| "reward_std": 0.12991088777780532, | |
| "rewards/format_reward/mean": 0.765625, | |
| "rewards/format_reward/std": 0.4240167737007141, | |
| "rewards/qatch_metrics/mean": 0.7151770830154419, | |
| "rewards/qatch_metrics/std": 0.37596395611763, | |
| "rewards/tag_count_reward/mean": 0.8244140625, | |
| "rewards/tag_count_reward/std": 0.31790287494659425, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 479.6, | |
| "completions/max_terminated_length": 479.6, | |
| "completions/mean_length": 201.30234375, | |
| "completions/mean_terminated_length": 201.30234375, | |
| "completions/min_length": 21.2, | |
| "completions/min_terminated_length": 21.2, | |
| "epoch": 0.03525782282944028, | |
| "grad_norm": 0.4721344642723057, | |
| "kl": 0.00091400146484375, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": -0.1315, | |
| "num_tokens": 2791247.0, | |
| "reward": 0.8173989057540894, | |
| "reward_std": 0.12794919013977052, | |
| "rewards/format_reward/mean": 0.89765625, | |
| "rewards/format_reward/std": 0.29814977645874025, | |
| "rewards/qatch_metrics/mean": 0.8017192721366883, | |
| "rewards/qatch_metrics/std": 0.331482595205307, | |
| "rewards/tag_count_reward/mean": 0.9234375, | |
| "rewards/tag_count_reward/std": 0.22307254374027252, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 432.2, | |
| "completions/max_terminated_length": 432.2, | |
| "completions/mean_length": 221.4625, | |
| "completions/mean_terminated_length": 221.4625, | |
| "completions/min_length": 51.4, | |
| "completions/min_terminated_length": 51.4, | |
| "epoch": 0.044072278536800354, | |
| "grad_norm": 0.29592079815815686, | |
| "kl": 0.0016038894653320312, | |
| "learning_rate": 4.2105263157894733e-07, | |
| "loss": -0.0424, | |
| "num_tokens": 3536975.0, | |
| "reward": 0.7564297676086426, | |
| "reward_std": 0.08200130835175515, | |
| "rewards/format_reward/mean": 0.96953125, | |
| "rewards/format_reward/std": 0.13422587364912034, | |
| "rewards/qatch_metrics/mean": 0.7183640837669373, | |
| "rewards/qatch_metrics/std": 0.3674669623374939, | |
| "rewards/tag_count_reward/mean": 0.97734375, | |
| "rewards/tag_count_reward/std": 0.09909781143069267, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 445.6, | |
| "completions/max_terminated_length": 445.6, | |
| "completions/mean_length": 216.53984375, | |
| "completions/mean_terminated_length": 216.53984375, | |
| "completions/min_length": 77.0, | |
| "completions/min_terminated_length": 77.0, | |
| "epoch": 0.052886734244160426, | |
| "grad_norm": 0.275794455786416, | |
| "kl": 0.0034694671630859375, | |
| "learning_rate": 5.087719298245614e-07, | |
| "loss": 0.002, | |
| "num_tokens": 4281330.0, | |
| "reward": 0.7764788866043091, | |
| "reward_std": 0.09769791960716248, | |
| "rewards/format_reward/mean": 0.9953125, | |
| "rewards/format_reward/std": 0.06028594672679901, | |
| "rewards/qatch_metrics/mean": 0.7377692699432373, | |
| "rewards/qatch_metrics/std": 0.3548368811607361, | |
| "rewards/tag_count_reward/mean": 0.996875, | |
| "rewards/tag_count_reward/std": 0.04124387204647064, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 445.8, | |
| "completions/max_terminated_length": 445.8, | |
| "completions/mean_length": 220.11796875, | |
| "completions/mean_terminated_length": 220.11796875, | |
| "completions/min_length": 59.8, | |
| "completions/min_terminated_length": 59.8, | |
| "epoch": 0.06170118995152049, | |
| "grad_norm": 0.2691159080285212, | |
| "kl": 0.005501174926757812, | |
| "learning_rate": 5.964912280701754e-07, | |
| "loss": -0.0083, | |
| "num_tokens": 5008025.0, | |
| "reward": 0.8268720507621765, | |
| "reward_std": 0.08243840038776398, | |
| "rewards/format_reward/mean": 0.99609375, | |
| "rewards/format_reward/std": 0.0625, | |
| "rewards/qatch_metrics/mean": 0.7969059944152832, | |
| "rewards/qatch_metrics/std": 0.30500164330005647, | |
| "rewards/tag_count_reward/mean": 0.9978515625, | |
| "rewards/tag_count_reward/std": 0.03437500074505806, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 487.2, | |
| "completions/max_terminated_length": 487.2, | |
| "completions/mean_length": 227.76015625, | |
| "completions/mean_terminated_length": 227.76015625, | |
| "completions/min_length": 83.4, | |
| "completions/min_terminated_length": 83.4, | |
| "epoch": 0.07051564565888056, | |
| "grad_norm": 0.33908836616855625, | |
| "kl": 0.002800750732421875, | |
| "learning_rate": 6.842105263157895e-07, | |
| "loss": 0.0002, | |
| "num_tokens": 5774806.0, | |
| "reward": 0.7647829532623291, | |
| "reward_std": 0.09533883556723595, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.025, | |
| "rewards/qatch_metrics/mean": 0.7235268354415894, | |
| "rewards/qatch_metrics/std": 0.35323665738105775, | |
| "rewards/tag_count_reward/mean": 0.998828125, | |
| "rewards/tag_count_reward/std": 0.01875, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 476.2, | |
| "completions/max_terminated_length": 476.2, | |
| "completions/mean_length": 221.7984375, | |
| "completions/mean_terminated_length": 221.7984375, | |
| "completions/min_length": 83.4, | |
| "completions/min_terminated_length": 83.4, | |
| "epoch": 0.07933010136624064, | |
| "grad_norm": 0.3262303740341099, | |
| "kl": 0.00310516357421875, | |
| "learning_rate": 7.719298245614034e-07, | |
| "loss": 0.0104, | |
| "num_tokens": 6557268.0, | |
| "reward": 0.7565465092658996, | |
| "reward_std": 0.09911727011203766, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7135841250419617, | |
| "rewards/qatch_metrics/std": 0.37862626910209657, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 512.8, | |
| "completions/max_terminated_length": 512.8, | |
| "completions/mean_length": 228.45546875, | |
| "completions/mean_terminated_length": 228.45546875, | |
| "completions/min_length": 76.0, | |
| "completions/min_terminated_length": 76.0, | |
| "epoch": 0.08814455707360071, | |
| "grad_norm": 0.23276410584015308, | |
| "kl": 0.00273895263671875, | |
| "learning_rate": 8.596491228070175e-07, | |
| "loss": -0.0018, | |
| "num_tokens": 7327499.0, | |
| "reward": 0.7988326072692871, | |
| "reward_std": 0.06667622029781342, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.025, | |
| "rewards/qatch_metrics/mean": 0.7635622501373291, | |
| "rewards/qatch_metrics/std": 0.369570130109787, | |
| "rewards/tag_count_reward/mean": 0.99921875, | |
| "rewards/tag_count_reward/std": 0.0125, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 483.8, | |
| "completions/max_terminated_length": 483.8, | |
| "completions/mean_length": 220.52734375, | |
| "completions/mean_terminated_length": 220.52734375, | |
| "completions/min_length": 81.2, | |
| "completions/min_terminated_length": 81.2, | |
| "epoch": 0.09695901278096078, | |
| "grad_norm": 0.28218074028465906, | |
| "kl": 0.00196533203125, | |
| "learning_rate": 9.473684210526315e-07, | |
| "loss": -0.0021, | |
| "num_tokens": 8077390.0, | |
| "reward": 0.8159880757331848, | |
| "reward_std": 0.10231453701853752, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7835153818130494, | |
| "rewards/qatch_metrics/std": 0.33782891631126405, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 481.2, | |
| "completions/max_terminated_length": 481.2, | |
| "completions/mean_length": 223.60703125, | |
| "completions/mean_terminated_length": 223.60703125, | |
| "completions/min_length": 75.6, | |
| "completions/min_terminated_length": 75.6, | |
| "epoch": 0.10577346848832085, | |
| "grad_norm": 0.23258401790732933, | |
| "kl": 0.00223388671875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0045, | |
| "num_tokens": 8800407.0, | |
| "reward": 0.74871985912323, | |
| "reward_std": 0.09312780797481537, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7043763160705566, | |
| "rewards/qatch_metrics/std": 0.39227073788642886, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 487.8, | |
| "completions/max_terminated_length": 487.8, | |
| "completions/mean_length": 222.81015625, | |
| "completions/mean_terminated_length": 222.81015625, | |
| "completions/min_length": 77.4, | |
| "completions/min_terminated_length": 77.4, | |
| "epoch": 0.11458792419568092, | |
| "grad_norm": 0.22445170455470606, | |
| "kl": 0.002956390380859375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0057, | |
| "num_tokens": 9557380.0, | |
| "reward": 0.8077908515930176, | |
| "reward_std": 0.09828853458166123, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.025, | |
| "rewards/qatch_metrics/mean": 0.774078369140625, | |
| "rewards/qatch_metrics/std": 0.33206661343574523, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.00625, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 492.6, | |
| "completions/max_terminated_length": 492.6, | |
| "completions/mean_length": 231.83984375, | |
| "completions/mean_terminated_length": 231.83984375, | |
| "completions/min_length": 94.6, | |
| "completions/min_terminated_length": 94.6, | |
| "epoch": 0.12340237990304098, | |
| "grad_norm": 0.22832903725685313, | |
| "kl": 0.00381317138671875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0025, | |
| "num_tokens": 10339127.0, | |
| "reward": 0.7895300030708313, | |
| "reward_std": 0.10415169298648834, | |
| "rewards/format_reward/mean": 0.9984375, | |
| "rewards/format_reward/std": 0.025, | |
| "rewards/qatch_metrics/mean": 0.7526065230369567, | |
| "rewards/qatch_metrics/std": 0.3542828977108002, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.009375, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 521.8, | |
| "completions/max_terminated_length": 521.8, | |
| "completions/mean_length": 236.3125, | |
| "completions/mean_terminated_length": 236.3125, | |
| "completions/min_length": 80.4, | |
| "completions/min_terminated_length": 80.4, | |
| "epoch": 0.13221683561040107, | |
| "grad_norm": 0.2597151805235052, | |
| "kl": 0.00432281494140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0083, | |
| "num_tokens": 11147287.0, | |
| "reward": 0.7333161950111389, | |
| "reward_std": 0.08832715749740601, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.6862887978553772, | |
| "rewards/qatch_metrics/std": 0.36336439847946167, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.0069767430424690245, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 445.6, | |
| "completions/max_terminated_length": 445.6, | |
| "completions/mean_length": 216.43984375, | |
| "completions/mean_terminated_length": 216.43984375, | |
| "completions/min_length": 87.8, | |
| "completions/min_terminated_length": 87.8, | |
| "epoch": 0.14103129131776113, | |
| "grad_norm": 0.2463929158667687, | |
| "kl": 0.00528717041015625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0044, | |
| "num_tokens": 11891066.0, | |
| "reward": 0.8300724029541016, | |
| "reward_std": 0.09615504890680313, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8000851631164551, | |
| "rewards/qatch_metrics/std": 0.3208737909793854, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 491.6, | |
| "completions/max_terminated_length": 491.6, | |
| "completions/mean_length": 225.32890625, | |
| "completions/mean_terminated_length": 225.32890625, | |
| "completions/min_length": 86.2, | |
| "completions/min_terminated_length": 86.2, | |
| "epoch": 0.1498457470251212, | |
| "grad_norm": 0.22719354366888944, | |
| "kl": 0.005328369140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0129, | |
| "num_tokens": 12668159.0, | |
| "reward": 0.816937243938446, | |
| "reward_std": 0.08283708170056343, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7846320390701294, | |
| "rewards/qatch_metrics/std": 0.32469419240951536, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 460.4, | |
| "completions/max_terminated_length": 460.4, | |
| "completions/mean_length": 217.92890625, | |
| "completions/mean_terminated_length": 217.92890625, | |
| "completions/min_length": 76.2, | |
| "completions/min_terminated_length": 76.2, | |
| "epoch": 0.15866020273248127, | |
| "grad_norm": 0.2721517170479785, | |
| "kl": 0.00579071044921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0117, | |
| "num_tokens": 13413588.0, | |
| "reward": 0.7426301956176757, | |
| "reward_std": 0.0905102699995041, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.6972119808197021, | |
| "rewards/qatch_metrics/std": 0.37120566368103025, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 428.6, | |
| "completions/max_terminated_length": 428.6, | |
| "completions/mean_length": 204.6640625, | |
| "completions/mean_terminated_length": 204.6640625, | |
| "completions/min_length": 75.6, | |
| "completions/min_terminated_length": 75.6, | |
| "epoch": 0.16747465843984133, | |
| "grad_norm": 0.2525985499058037, | |
| "kl": 0.0056243896484375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0012, | |
| "num_tokens": 14111606.0, | |
| "reward": 0.7979554295539856, | |
| "reward_std": 0.06609301418066024, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7623119950294495, | |
| "rewards/qatch_metrics/std": 0.34469759464263916, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.003125, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 431.0, | |
| "completions/max_terminated_length": 431.0, | |
| "completions/mean_length": 212.34765625, | |
| "completions/mean_terminated_length": 212.34765625, | |
| "completions/min_length": 69.2, | |
| "completions/min_terminated_length": 69.2, | |
| "epoch": 0.17628911414720141, | |
| "grad_norm": 0.30357672091416305, | |
| "kl": 0.0057861328125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0083, | |
| "num_tokens": 14876659.0, | |
| "reward": 0.7724857568740845, | |
| "reward_std": 0.09265935122966766, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7323476672172546, | |
| "rewards/qatch_metrics/std": 0.33567925691604616, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.003125, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 463.4, | |
| "completions/max_terminated_length": 463.4, | |
| "completions/mean_length": 216.46875, | |
| "completions/mean_terminated_length": 216.46875, | |
| "completions/min_length": 80.4, | |
| "completions/min_terminated_length": 80.4, | |
| "epoch": 0.18510356985456147, | |
| "grad_norm": 0.23780324977532238, | |
| "kl": 0.0056549072265625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0087, | |
| "num_tokens": 15600331.0, | |
| "reward": 0.7508906722068787, | |
| "reward_std": 0.0951332688331604, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7069302201271057, | |
| "rewards/qatch_metrics/std": 0.38108278512954713, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 442.4, | |
| "completions/max_terminated_length": 442.4, | |
| "completions/mean_length": 216.578125, | |
| "completions/mean_terminated_length": 216.578125, | |
| "completions/min_length": 80.2, | |
| "completions/min_terminated_length": 80.2, | |
| "epoch": 0.19391802556192156, | |
| "grad_norm": 0.21716869090526136, | |
| "kl": 0.0054229736328125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0045, | |
| "num_tokens": 16326015.0, | |
| "reward": 0.8402611017227173, | |
| "reward_std": 0.05716411247849464, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8120718836784363, | |
| "rewards/qatch_metrics/std": 0.2929441839456558, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 428.2, | |
| "completions/max_terminated_length": 428.2, | |
| "completions/mean_length": 222.0265625, | |
| "completions/mean_terminated_length": 222.0265625, | |
| "completions/min_length": 78.0, | |
| "completions/min_terminated_length": 78.0, | |
| "epoch": 0.20273248126928162, | |
| "grad_norm": 0.22835452896575356, | |
| "kl": 0.0060882568359375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0017, | |
| "num_tokens": 17091921.0, | |
| "reward": 0.8265595078468323, | |
| "reward_std": 0.07398260906338691, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7959523558616638, | |
| "rewards/qatch_metrics/std": 0.3277123510837555, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 459.8, | |
| "completions/max_terminated_length": 459.8, | |
| "completions/mean_length": 220.7453125, | |
| "completions/mean_terminated_length": 220.7453125, | |
| "completions/min_length": 87.0, | |
| "completions/min_terminated_length": 87.0, | |
| "epoch": 0.2115469369766417, | |
| "grad_norm": 0.22726862373109216, | |
| "kl": 0.006689453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0043, | |
| "num_tokens": 17877371.0, | |
| "reward": 0.8397867679595947, | |
| "reward_std": 0.09087342023849487, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8115137934684753, | |
| "rewards/qatch_metrics/std": 0.3017837733030319, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 491.4, | |
| "completions/max_terminated_length": 491.4, | |
| "completions/mean_length": 225.2140625, | |
| "completions/mean_terminated_length": 225.2140625, | |
| "completions/min_length": 75.4, | |
| "completions/min_terminated_length": 75.4, | |
| "epoch": 0.22036139268400176, | |
| "grad_norm": 0.2004953082769917, | |
| "kl": 0.00776519775390625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0056, | |
| "num_tokens": 18623005.0, | |
| "reward": 0.8202541828155517, | |
| "reward_std": 0.07537120208144188, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.7886492252349854, | |
| "rewards/qatch_metrics/std": 0.32776339948177335, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.00625, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 456.8, | |
| "completions/max_terminated_length": 456.8, | |
| "completions/mean_length": 223.48203125, | |
| "completions/mean_terminated_length": 223.48203125, | |
| "completions/min_length": 78.2, | |
| "completions/min_terminated_length": 78.2, | |
| "epoch": 0.22917584839136185, | |
| "grad_norm": 0.2341532579835068, | |
| "kl": 0.00804290771484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0096, | |
| "num_tokens": 19349606.0, | |
| "reward": 0.8026262044906616, | |
| "reward_std": 0.06839245334267616, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.7679218888282776, | |
| "rewards/qatch_metrics/std": 0.3324147403240204, | |
| "rewards/tag_count_reward/mean": 0.9994140625, | |
| "rewards/tag_count_reward/std": 0.009375, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 458.4, | |
| "completions/max_terminated_length": 458.4, | |
| "completions/mean_length": 216.72578125, | |
| "completions/mean_terminated_length": 216.72578125, | |
| "completions/min_length": 86.2, | |
| "completions/min_terminated_length": 86.2, | |
| "epoch": 0.2379903040987219, | |
| "grad_norm": 0.23655650548465582, | |
| "kl": 0.0078033447265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 20092311.0, | |
| "reward": 0.8197526335716248, | |
| "reward_std": 0.0839143767952919, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7879442930221557, | |
| "rewards/qatch_metrics/std": 0.3431123554706573, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 454.6, | |
| "completions/max_terminated_length": 454.6, | |
| "completions/mean_length": 204.48984375, | |
| "completions/mean_terminated_length": 204.48984375, | |
| "completions/min_length": 79.0, | |
| "completions/min_terminated_length": 79.0, | |
| "epoch": 0.24680475980608196, | |
| "grad_norm": 0.2641797202959811, | |
| "kl": 0.00862884521484375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0051, | |
| "num_tokens": 20821962.0, | |
| "reward": 0.8242111682891846, | |
| "reward_std": 0.07407020255923272, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7931895971298217, | |
| "rewards/qatch_metrics/std": 0.3176054835319519, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 443.2, | |
| "completions/max_terminated_length": 443.2, | |
| "completions/mean_length": 203.590625, | |
| "completions/mean_terminated_length": 203.590625, | |
| "completions/min_length": 86.6, | |
| "completions/min_terminated_length": 86.6, | |
| "epoch": 0.255619215513442, | |
| "grad_norm": 0.263066002535131, | |
| "kl": 0.009637451171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0065, | |
| "num_tokens": 21526046.0, | |
| "reward": 0.7875781059265137, | |
| "reward_std": 0.09901705384254456, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7501148462295533, | |
| "rewards/qatch_metrics/std": 0.3672972857952118, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.00625, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 448.4, | |
| "completions/max_terminated_length": 448.4, | |
| "completions/mean_length": 208.90546875, | |
| "completions/mean_terminated_length": 208.90546875, | |
| "completions/min_length": 73.2, | |
| "completions/min_terminated_length": 73.2, | |
| "epoch": 0.26443367122080214, | |
| "grad_norm": 0.2798500312218402, | |
| "kl": 0.01026153564453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0003, | |
| "num_tokens": 22271333.0, | |
| "reward": 0.818337082862854, | |
| "reward_std": 0.07784928977489472, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7862788915634156, | |
| "rewards/qatch_metrics/std": 0.3341992735862732, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 494.2, | |
| "completions/max_terminated_length": 494.2, | |
| "completions/mean_length": 209.651953125, | |
| "completions/mean_terminated_length": 209.651953125, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 0.5464962538563244, | |
| "grad_norm": 0.2122029879190087, | |
| "kl": 0.010993194580078126, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0126, | |
| "num_tokens": 23726666.0, | |
| "reward": 0.811666476726532, | |
| "reward_std": 0.0841904804110527, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7784311413764954, | |
| "rewards/qatch_metrics/std": 0.32770459055900575, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 452.6, | |
| "completions/max_terminated_length": 452.6, | |
| "completions/mean_length": 217.9859375, | |
| "completions/mean_terminated_length": 217.9859375, | |
| "completions/min_length": 75.8, | |
| "completions/min_terminated_length": 75.8, | |
| "epoch": 0.5641251652710445, | |
| "grad_norm": 0.15403477284537095, | |
| "kl": 0.00980377197265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0008, | |
| "num_tokens": 25239750.0, | |
| "reward": 0.7868865132331848, | |
| "reward_std": 0.07244862839579583, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7492782354354859, | |
| "rewards/qatch_metrics/std": 0.3493395745754242, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 511.0, | |
| "completions/max_terminated_length": 511.0, | |
| "completions/mean_length": 208.38984375, | |
| "completions/mean_terminated_length": 208.38984375, | |
| "completions/min_length": 58.4, | |
| "completions/min_terminated_length": 58.4, | |
| "epoch": 0.5817540766857646, | |
| "grad_norm": 0.18706575889421317, | |
| "kl": 0.00914154052734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0072, | |
| "num_tokens": 26687596.0, | |
| "reward": 0.828769075870514, | |
| "reward_std": 0.07729479111731052, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7985518336296081, | |
| "rewards/qatch_metrics/std": 0.29670341312885284, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 505.0, | |
| "completions/max_terminated_length": 505.0, | |
| "completions/mean_length": 206.281640625, | |
| "completions/mean_terminated_length": 206.281640625, | |
| "completions/min_length": 79.0, | |
| "completions/min_terminated_length": 79.0, | |
| "epoch": 0.5993829881004848, | |
| "grad_norm": 0.19776450858978561, | |
| "kl": 0.01090240478515625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0105, | |
| "num_tokens": 28175773.0, | |
| "reward": 0.8511051416397095, | |
| "reward_std": 0.07431531846523284, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8248295664787293, | |
| "rewards/qatch_metrics/std": 0.3192874014377594, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 462.2, | |
| "completions/max_terminated_length": 462.2, | |
| "completions/mean_length": 219.3890625, | |
| "completions/mean_terminated_length": 219.3890625, | |
| "completions/min_length": 70.8, | |
| "completions/min_terminated_length": 70.8, | |
| "epoch": 0.617011899515205, | |
| "grad_norm": 0.15290022120008429, | |
| "kl": 0.01065216064453125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0047, | |
| "num_tokens": 29739969.0, | |
| "reward": 0.8426113128662109, | |
| "reward_std": 0.09004694148898125, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.814836847782135, | |
| "rewards/qatch_metrics/std": 0.309688937664032, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 458.8, | |
| "completions/max_terminated_length": 458.8, | |
| "completions/mean_length": 211.655078125, | |
| "completions/mean_terminated_length": 211.655078125, | |
| "completions/min_length": 73.4, | |
| "completions/min_terminated_length": 73.4, | |
| "epoch": 0.6346408109299251, | |
| "grad_norm": 0.17923424569681315, | |
| "kl": 0.0114501953125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.01, | |
| "num_tokens": 31191502.0, | |
| "reward": 0.8262084484100342, | |
| "reward_std": 0.08637549504637718, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7955393195152283, | |
| "rewards/qatch_metrics/std": 0.3134327620267868, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 499.2, | |
| "completions/max_terminated_length": 499.2, | |
| "completions/mean_length": 215.95546875, | |
| "completions/mean_terminated_length": 215.95546875, | |
| "completions/min_length": 78.0, | |
| "completions/min_terminated_length": 78.0, | |
| "epoch": 0.6522697223446452, | |
| "grad_norm": 0.1321015357675111, | |
| "kl": 0.012237548828125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0021, | |
| "num_tokens": 32694108.0, | |
| "reward": 0.7994898676872253, | |
| "reward_std": 0.08254800513386726, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.764105749130249, | |
| "rewards/qatch_metrics/std": 0.3532308578491211, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 442.2, | |
| "completions/max_terminated_length": 442.2, | |
| "completions/mean_length": 209.90078125, | |
| "completions/mean_terminated_length": 209.90078125, | |
| "completions/min_length": 76.6, | |
| "completions/min_terminated_length": 76.6, | |
| "epoch": 0.6698986337593653, | |
| "grad_norm": 0.22256806005967145, | |
| "kl": 0.01057586669921875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0013, | |
| "num_tokens": 34144670.0, | |
| "reward": 0.7911163926124573, | |
| "reward_std": 0.06518566869199276, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7542545795440674, | |
| "rewards/qatch_metrics/std": 0.35398219227790834, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 476.2, | |
| "completions/max_terminated_length": 476.2, | |
| "completions/mean_length": 208.534765625, | |
| "completions/mean_terminated_length": 208.534765625, | |
| "completions/min_length": 77.8, | |
| "completions/min_terminated_length": 77.8, | |
| "epoch": 0.6875275451740855, | |
| "grad_norm": 0.17237028945675698, | |
| "kl": 0.0087860107421875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0069, | |
| "num_tokens": 35620023.0, | |
| "reward": 0.8418472170829773, | |
| "reward_std": 0.08243692219257355, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8139379024505615, | |
| "rewards/qatch_metrics/std": 0.336453515291214, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 514.6, | |
| "completions/max_terminated_length": 514.6, | |
| "completions/mean_length": 217.3328125, | |
| "completions/mean_terminated_length": 217.3328125, | |
| "completions/min_length": 90.4, | |
| "completions/min_terminated_length": 90.4, | |
| "epoch": 0.7051564565888057, | |
| "grad_norm": 0.19274445010407998, | |
| "kl": 0.009130859375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0053, | |
| "num_tokens": 37166635.0, | |
| "reward": 0.8295193314552307, | |
| "reward_std": 0.06927115023136139, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7994345307350159, | |
| "rewards/qatch_metrics/std": 0.3011426508426666, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 499.8, | |
| "completions/max_terminated_length": 499.8, | |
| "completions/mean_length": 212.651171875, | |
| "completions/mean_terminated_length": 212.651171875, | |
| "completions/min_length": 68.6, | |
| "completions/min_terminated_length": 68.6, | |
| "epoch": 0.7227853680035258, | |
| "grad_norm": 0.13990900967805797, | |
| "kl": 0.0087432861328125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0027, | |
| "num_tokens": 38617966.0, | |
| "reward": 0.8151894211769104, | |
| "reward_std": 0.07495353966951371, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7825757980346679, | |
| "rewards/qatch_metrics/std": 0.33874245882034304, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 560.4, | |
| "completions/max_terminated_length": 560.4, | |
| "completions/mean_length": 223.7015625, | |
| "completions/mean_terminated_length": 223.7015625, | |
| "completions/min_length": 74.6, | |
| "completions/min_terminated_length": 74.6, | |
| "epoch": 0.7404142794182459, | |
| "grad_norm": 0.20163985914598806, | |
| "kl": 0.00806884765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0054, | |
| "num_tokens": 40092050.0, | |
| "reward": 0.8460610270500183, | |
| "reward_std": 0.05867695920169354, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8188953161239624, | |
| "rewards/qatch_metrics/std": 0.3239317536354065, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 486.6, | |
| "completions/max_terminated_length": 486.6, | |
| "completions/mean_length": 215.6828125, | |
| "completions/mean_terminated_length": 215.6828125, | |
| "completions/min_length": 82.2, | |
| "completions/min_terminated_length": 82.2, | |
| "epoch": 0.7580431908329661, | |
| "grad_norm": 0.17564998217230318, | |
| "kl": 0.009525299072265625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0034, | |
| "num_tokens": 41565542.0, | |
| "reward": 0.799136507511139, | |
| "reward_std": 0.06419738680124283, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7636899828910828, | |
| "rewards/qatch_metrics/std": 0.3342160403728485, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 503.4, | |
| "completions/max_terminated_length": 503.4, | |
| "completions/mean_length": 233.409765625, | |
| "completions/mean_terminated_length": 233.409765625, | |
| "completions/min_length": 91.0, | |
| "completions/min_terminated_length": 91.0, | |
| "epoch": 0.7756721022476862, | |
| "grad_norm": 0.19283324501226842, | |
| "kl": 0.009130096435546875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0109, | |
| "num_tokens": 43081919.0, | |
| "reward": 0.7851791024208069, | |
| "reward_std": 0.07570808604359627, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7472695469856262, | |
| "rewards/qatch_metrics/std": 0.36822828054428103, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 448.4, | |
| "completions/max_terminated_length": 448.4, | |
| "completions/mean_length": 224.728125, | |
| "completions/mean_terminated_length": 224.728125, | |
| "completions/min_length": 81.0, | |
| "completions/min_terminated_length": 81.0, | |
| "epoch": 0.7933010136624064, | |
| "grad_norm": 0.17754847688569442, | |
| "kl": 0.009470367431640625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.002, | |
| "num_tokens": 44606439.0, | |
| "reward": 0.8152384400367737, | |
| "reward_std": 0.09764492362737656, | |
| "rewards/format_reward/mean": 0.999609375, | |
| "rewards/format_reward/std": 0.00883883461356163, | |
| "rewards/qatch_metrics/mean": 0.7826851725578308, | |
| "rewards/qatch_metrics/std": 0.3263732075691223, | |
| "rewards/tag_count_reward/mean": 0.99990234375, | |
| "rewards/tag_count_reward/std": 0.0022097086533904076, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 503.2, | |
| "completions/max_terminated_length": 503.2, | |
| "completions/mean_length": 218.58203125, | |
| "completions/mean_terminated_length": 218.58203125, | |
| "completions/min_length": 69.6, | |
| "completions/min_terminated_length": 69.6, | |
| "epoch": 0.8109299250771265, | |
| "grad_norm": 0.19017267970498908, | |
| "kl": 0.009508514404296875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0009, | |
| "num_tokens": 46095257.0, | |
| "reward": 0.8068280577659607, | |
| "reward_std": 0.0781441181898117, | |
| "rewards/format_reward/mean": 0.999609375, | |
| "rewards/format_reward/std": 0.00883883461356163, | |
| "rewards/qatch_metrics/mean": 0.7728020906448364, | |
| "rewards/qatch_metrics/std": 0.3386655867099762, | |
| "rewards/tag_count_reward/mean": 0.99970703125, | |
| "rewards/tag_count_reward/std": 0.006629125773906707, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 456.6, | |
| "completions/max_terminated_length": 456.6, | |
| "completions/mean_length": 204.84375, | |
| "completions/mean_terminated_length": 204.84375, | |
| "completions/min_length": 72.6, | |
| "completions/min_terminated_length": 72.6, | |
| "epoch": 0.8285588364918466, | |
| "grad_norm": 0.1678878918468119, | |
| "kl": 0.009729766845703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0041, | |
| "num_tokens": 47519433.0, | |
| "reward": 0.8672606706619262, | |
| "reward_std": 0.0644603468477726, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8438360691070557, | |
| "rewards/qatch_metrics/std": 0.2717843741178513, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 526.2, | |
| "completions/max_terminated_length": 526.2, | |
| "completions/mean_length": 214.90625, | |
| "completions/mean_terminated_length": 214.90625, | |
| "completions/min_length": 66.8, | |
| "completions/min_terminated_length": 66.8, | |
| "epoch": 0.8461877479065668, | |
| "grad_norm": 0.18169011669761398, | |
| "kl": 0.01288604736328125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0035, | |
| "num_tokens": 48943993.0, | |
| "reward": 0.8558493018150329, | |
| "reward_std": 0.07027828097343444, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8304109454154969, | |
| "rewards/qatch_metrics/std": 0.301141357421875, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 472.6, | |
| "completions/max_terminated_length": 472.6, | |
| "completions/mean_length": 212.559765625, | |
| "completions/mean_terminated_length": 212.559765625, | |
| "completions/min_length": 76.8, | |
| "completions/min_terminated_length": 76.8, | |
| "epoch": 0.8638166593212869, | |
| "grad_norm": 0.2046340854229955, | |
| "kl": 0.01494140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.006, | |
| "num_tokens": 50416114.0, | |
| "reward": 0.831060528755188, | |
| "reward_std": 0.07754805404692888, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8012476563453674, | |
| "rewards/qatch_metrics/std": 0.3293557226657867, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 503.2, | |
| "completions/max_terminated_length": 503.2, | |
| "completions/mean_length": 222.1375, | |
| "completions/mean_terminated_length": 222.1375, | |
| "completions/min_length": 83.2, | |
| "completions/min_terminated_length": 83.2, | |
| "epoch": 0.881445570736007, | |
| "grad_norm": 0.15161264539796646, | |
| "kl": 0.0138031005859375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 51932274.0, | |
| "reward": 0.8422249555587769, | |
| "reward_std": 0.06234893724322319, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8143823027610779, | |
| "rewards/qatch_metrics/std": 0.2993943512439728, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 463.6, | |
| "completions/max_terminated_length": 463.6, | |
| "completions/mean_length": 231.19609375, | |
| "completions/mean_terminated_length": 231.19609375, | |
| "completions/min_length": 77.4, | |
| "completions/min_terminated_length": 77.4, | |
| "epoch": 0.8990744821507272, | |
| "grad_norm": 0.20035266636054513, | |
| "kl": 0.011871337890625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0003, | |
| "num_tokens": 53450248.0, | |
| "reward": 0.8096501588821411, | |
| "reward_std": 0.06698438860476016, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7760589838027954, | |
| "rewards/qatch_metrics/std": 0.3199191153049469, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 471.2, | |
| "completions/max_terminated_length": 471.2, | |
| "completions/mean_length": 237.80234375, | |
| "completions/mean_terminated_length": 237.80234375, | |
| "completions/min_length": 82.2, | |
| "completions/min_terminated_length": 82.2, | |
| "epoch": 0.9167033935654474, | |
| "grad_norm": 0.0856229450795828, | |
| "kl": 0.011614227294921875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0017, | |
| "num_tokens": 54970542.0, | |
| "reward": 0.8725608706474304, | |
| "reward_std": 0.051827043667435645, | |
| "rewards/format_reward/mean": 0.999609375, | |
| "rewards/format_reward/std": 0.00883883461356163, | |
| "rewards/qatch_metrics/mean": 0.8501232981681823, | |
| "rewards/qatch_metrics/std": 0.26386110931634904, | |
| "rewards/tag_count_reward/mean": 0.99990234375, | |
| "rewards/tag_count_reward/std": 0.0022097086533904076, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 476.6, | |
| "completions/max_terminated_length": 476.6, | |
| "completions/mean_length": 231.53671875, | |
| "completions/mean_terminated_length": 231.53671875, | |
| "completions/min_length": 79.0, | |
| "completions/min_terminated_length": 79.0, | |
| "epoch": 0.9343323049801675, | |
| "grad_norm": 0.17178453068271043, | |
| "kl": 0.010117340087890624, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0063, | |
| "num_tokens": 56485356.0, | |
| "reward": 0.8532873392105103, | |
| "reward_std": 0.07009301483631133, | |
| "rewards/format_reward/mean": 0.999609375, | |
| "rewards/format_reward/std": 0.00883883461356163, | |
| "rewards/qatch_metrics/mean": 0.8274485826492309, | |
| "rewards/qatch_metrics/std": 0.31240676045417787, | |
| "rewards/tag_count_reward/mean": 0.99990234375, | |
| "rewards/tag_count_reward/std": 0.0022097086533904076, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 459.6, | |
| "completions/max_terminated_length": 459.6, | |
| "completions/mean_length": 220.95234375, | |
| "completions/mean_terminated_length": 220.95234375, | |
| "completions/min_length": 68.6, | |
| "completions/min_terminated_length": 68.6, | |
| "epoch": 0.9519612163948876, | |
| "grad_norm": 0.15364550208264494, | |
| "kl": 0.00984039306640625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0031, | |
| "num_tokens": 57953010.0, | |
| "reward": 0.868242597579956, | |
| "reward_std": 0.06916632130742073, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8449912905693054, | |
| "rewards/qatch_metrics/std": 0.2899660974740982, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 423.2, | |
| "completions/max_terminated_length": 423.2, | |
| "completions/mean_length": 225.621875, | |
| "completions/mean_terminated_length": 225.621875, | |
| "completions/min_length": 88.8, | |
| "completions/min_terminated_length": 88.8, | |
| "epoch": 0.48479506390480387, | |
| "grad_norm": 0.17697767584196022, | |
| "kl": 0.00970916748046875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0059, | |
| "num_tokens": 58736110.0, | |
| "reward": 0.8460039258003235, | |
| "reward_std": 0.055821475386619565, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8188281297683716, | |
| "rewards/qatch_metrics/std": 0.30660555958747865, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 504.0, | |
| "completions/max_terminated_length": 504.0, | |
| "completions/mean_length": 223.92734375, | |
| "completions/mean_terminated_length": 223.92734375, | |
| "completions/min_length": 77.8, | |
| "completions/min_terminated_length": 77.8, | |
| "epoch": 0.4936095196121639, | |
| "grad_norm": 0.2692630701899735, | |
| "kl": 0.0131378173828125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0021, | |
| "num_tokens": 59498897.0, | |
| "reward": 0.7988754034042358, | |
| "reward_std": 0.08376505076885224, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7633828163146973, | |
| "rewards/qatch_metrics/std": 0.3335907101631165, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 434.8, | |
| "completions/max_terminated_length": 434.8, | |
| "completions/mean_length": 223.48125, | |
| "completions/mean_terminated_length": 223.48125, | |
| "completions/min_length": 83.2, | |
| "completions/min_terminated_length": 83.2, | |
| "epoch": 0.502423975319524, | |
| "grad_norm": 0.2666009697829767, | |
| "kl": 0.0107269287109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 60277897.0, | |
| "reward": 0.7720089554786682, | |
| "reward_std": 0.0594131164252758, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7317867279052734, | |
| "rewards/qatch_metrics/std": 0.33845625519752504, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.003125, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 481.0, | |
| "completions/max_terminated_length": 481.0, | |
| "completions/mean_length": 219.62421875, | |
| "completions/mean_terminated_length": 219.62421875, | |
| "completions/min_length": 91.0, | |
| "completions/min_terminated_length": 91.0, | |
| "epoch": 0.511238431026884, | |
| "grad_norm": 0.16876063412105669, | |
| "kl": 0.01141357421875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0033, | |
| "num_tokens": 61033560.0, | |
| "reward": 0.7902166962623596, | |
| "reward_std": 0.0687429528683424, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7531961083412171, | |
| "rewards/qatch_metrics/std": 0.37054654359817507, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 471.2, | |
| "completions/max_terminated_length": 471.2, | |
| "completions/mean_length": 226.275, | |
| "completions/mean_terminated_length": 226.275, | |
| "completions/min_length": 80.6, | |
| "completions/min_terminated_length": 80.6, | |
| "epoch": 0.5200528867342442, | |
| "grad_norm": 0.26818466602074054, | |
| "kl": 0.0130706787109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 61786008.0, | |
| "reward": 0.7699209451675415, | |
| "reward_std": 0.07550354823470115, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.7294221520423889, | |
| "rewards/qatch_metrics/std": 0.3492735385894775, | |
| "rewards/tag_count_reward/mean": 0.9998046875, | |
| "rewards/tag_count_reward/std": 0.003125, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 484.6, | |
| "completions/max_terminated_length": 484.6, | |
| "completions/mean_length": 247.2328125, | |
| "completions/mean_terminated_length": 247.2328125, | |
| "completions/min_length": 95.8, | |
| "completions/min_terminated_length": 95.8, | |
| "epoch": 0.5288673424416043, | |
| "grad_norm": 0.16485515882678206, | |
| "kl": 0.0113006591796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0007, | |
| "num_tokens": 62590434.0, | |
| "reward": 0.8454334974288941, | |
| "reward_std": 0.0570029616355896, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8181570172309875, | |
| "rewards/qatch_metrics/std": 0.2992805689573288, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 480.6, | |
| "completions/max_terminated_length": 480.6, | |
| "completions/mean_length": 235.16015625, | |
| "completions/mean_terminated_length": 235.16015625, | |
| "completions/min_length": 88.0, | |
| "completions/min_terminated_length": 88.0, | |
| "epoch": 0.5376817981489643, | |
| "grad_norm": 0.27561378534620606, | |
| "kl": 0.01141510009765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0097, | |
| "num_tokens": 63366287.0, | |
| "reward": 0.8380108118057251, | |
| "reward_std": 0.07530387155711651, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8094244718551635, | |
| "rewards/qatch_metrics/std": 0.30977231860160825, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 458.6, | |
| "completions/max_terminated_length": 458.6, | |
| "completions/mean_length": 215.646875, | |
| "completions/mean_terminated_length": 215.646875, | |
| "completions/min_length": 79.4, | |
| "completions/min_terminated_length": 79.4, | |
| "epoch": 0.5464962538563244, | |
| "grad_norm": 0.2018916915779266, | |
| "kl": 0.013714599609375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0045, | |
| "num_tokens": 64097387.0, | |
| "reward": 0.8135073184967041, | |
| "reward_std": 0.05950811579823494, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7806198120117187, | |
| "rewards/qatch_metrics/std": 0.33523867428302767, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.00625, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 447.4, | |
| "completions/max_terminated_length": 447.4, | |
| "completions/mean_length": 223.68515625, | |
| "completions/mean_terminated_length": 223.68515625, | |
| "completions/min_length": 92.4, | |
| "completions/min_terminated_length": 92.4, | |
| "epoch": 0.5553107095636844, | |
| "grad_norm": 0.1836962735356692, | |
| "kl": 0.0138214111328125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0024, | |
| "num_tokens": 64869416.0, | |
| "reward": 0.8333834052085877, | |
| "reward_std": 0.07006162852048874, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8039804816246032, | |
| "rewards/qatch_metrics/std": 0.3219245493412018, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 315 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 504.8, | |
| "completions/max_terminated_length": 504.8, | |
| "completions/mean_length": 221.45390625, | |
| "completions/mean_terminated_length": 221.45390625, | |
| "completions/min_length": 80.0, | |
| "completions/min_terminated_length": 80.0, | |
| "epoch": 0.5641251652710445, | |
| "grad_norm": 0.23250178423343035, | |
| "kl": 0.01497802734375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0014, | |
| "num_tokens": 65613165.0, | |
| "reward": 0.8320096850395202, | |
| "reward_std": 0.053499556705355646, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8023643255233764, | |
| "rewards/qatch_metrics/std": 0.3343039393424988, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 466.6, | |
| "completions/max_terminated_length": 466.6, | |
| "completions/mean_length": 220.3984375, | |
| "completions/mean_terminated_length": 220.3984375, | |
| "completions/min_length": 72.6, | |
| "completions/min_terminated_length": 72.6, | |
| "epoch": 0.5729396209784046, | |
| "grad_norm": 0.09740281424559781, | |
| "kl": 0.0155609130859375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0012, | |
| "num_tokens": 66336475.0, | |
| "reward": 0.8796087980270386, | |
| "reward_std": 0.05236431676894426, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8583632946014405, | |
| "rewards/qatch_metrics/std": 0.2817832052707672, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 325 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 459.4, | |
| "completions/max_terminated_length": 459.4, | |
| "completions/mean_length": 225.27890625, | |
| "completions/mean_terminated_length": 225.27890625, | |
| "completions/min_length": 78.6, | |
| "completions/min_terminated_length": 78.6, | |
| "epoch": 0.5817540766857646, | |
| "grad_norm": 0.08354955287926201, | |
| "kl": 0.01513671875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0095, | |
| "num_tokens": 67098736.0, | |
| "reward": 0.8658102512359619, | |
| "reward_std": 0.07466748803853988, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8421296954154969, | |
| "rewards/qatch_metrics/std": 0.2614422976970673, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 447.2, | |
| "completions/max_terminated_length": 447.2, | |
| "completions/mean_length": 220.01875, | |
| "completions/mean_terminated_length": 220.01875, | |
| "completions/min_length": 81.8, | |
| "completions/min_terminated_length": 81.8, | |
| "epoch": 0.5905685323931247, | |
| "grad_norm": 0.20574209747901576, | |
| "kl": 0.015081787109375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.011, | |
| "num_tokens": 67847928.0, | |
| "reward": 0.865822184085846, | |
| "reward_std": 0.046268445625901225, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8421437621116639, | |
| "rewards/qatch_metrics/std": 0.29589260220527647, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 335 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 432.0, | |
| "completions/max_terminated_length": 432.0, | |
| "completions/mean_length": 213.5953125, | |
| "completions/mean_terminated_length": 213.5953125, | |
| "completions/min_length": 76.0, | |
| "completions/min_terminated_length": 76.0, | |
| "epoch": 0.5993829881004848, | |
| "grad_norm": 0.2039975034177896, | |
| "kl": 0.0161651611328125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0066, | |
| "num_tokens": 68585234.0, | |
| "reward": 0.8343551635742188, | |
| "reward_std": 0.0688902921974659, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8051237106323242, | |
| "rewards/qatch_metrics/std": 0.30847290754318235, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 340 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 435.0, | |
| "completions/max_terminated_length": 435.0, | |
| "completions/mean_length": 203.69453125, | |
| "completions/mean_terminated_length": 203.69453125, | |
| "completions/min_length": 76.0, | |
| "completions/min_terminated_length": 76.0, | |
| "epoch": 0.6081974438078449, | |
| "grad_norm": 0.26848084439203446, | |
| "kl": 0.014788818359375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0023, | |
| "num_tokens": 69338379.0, | |
| "reward": 0.8848124146461487, | |
| "reward_std": 0.06373886093497276, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8644851684570313, | |
| "rewards/qatch_metrics/std": 0.26705425381660464, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 345 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 446.8, | |
| "completions/max_terminated_length": 446.8, | |
| "completions/mean_length": 221.1046875, | |
| "completions/mean_terminated_length": 221.1046875, | |
| "completions/min_length": 79.4, | |
| "completions/min_terminated_length": 79.4, | |
| "epoch": 0.617011899515205, | |
| "grad_norm": 0.2363792510293019, | |
| "kl": 0.019024658203125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.002, | |
| "num_tokens": 70095473.0, | |
| "reward": 0.8130708336830139, | |
| "reward_std": 0.08477363213896752, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7800833344459533, | |
| "rewards/qatch_metrics/std": 0.3211198329925537, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 458.4, | |
| "completions/max_terminated_length": 458.4, | |
| "completions/mean_length": 234.00078125, | |
| "completions/mean_terminated_length": 234.00078125, | |
| "completions/min_length": 91.4, | |
| "completions/min_terminated_length": 91.4, | |
| "epoch": 0.625826355222565, | |
| "grad_norm": 0.1856420640121193, | |
| "kl": 0.019122314453125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0036, | |
| "num_tokens": 70860290.0, | |
| "reward": 0.8471660256385803, | |
| "reward_std": 0.0506692998111248, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8201953172683716, | |
| "rewards/qatch_metrics/std": 0.30663308799266814, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 355 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 452.8, | |
| "completions/max_terminated_length": 452.8, | |
| "completions/mean_length": 241.72421875, | |
| "completions/mean_terminated_length": 241.72421875, | |
| "completions/min_length": 88.0, | |
| "completions/min_terminated_length": 88.0, | |
| "epoch": 0.6346408109299251, | |
| "grad_norm": 0.22939974521057024, | |
| "kl": 0.01826171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0127, | |
| "num_tokens": 71648401.0, | |
| "reward": 0.8702264785766601, | |
| "reward_std": 0.0592925101518631, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8473252534866333, | |
| "rewards/qatch_metrics/std": 0.28537269234657286, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 360 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 434.6, | |
| "completions/max_terminated_length": 434.6, | |
| "completions/mean_length": 215.853125, | |
| "completions/mean_terminated_length": 215.853125, | |
| "completions/min_length": 75.0, | |
| "completions/min_terminated_length": 75.0, | |
| "epoch": 0.6434552666372851, | |
| "grad_norm": 0.19883621919511643, | |
| "kl": 0.0163330078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0073, | |
| "num_tokens": 72382693.0, | |
| "reward": 0.8091506719589233, | |
| "reward_std": 0.0635421834886074, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7754713773727417, | |
| "rewards/qatch_metrics/std": 0.3179103255271912, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 365 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 496.8, | |
| "completions/max_terminated_length": 496.8, | |
| "completions/mean_length": 213.1640625, | |
| "completions/mean_terminated_length": 213.1640625, | |
| "completions/min_length": 76.2, | |
| "completions/min_terminated_length": 76.2, | |
| "epoch": 0.6522697223446452, | |
| "grad_norm": 0.1916457590662772, | |
| "kl": 0.0175506591796875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0029, | |
| "num_tokens": 73161111.0, | |
| "reward": 0.8094798445701599, | |
| "reward_std": 0.04875086285173893, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7758586168289184, | |
| "rewards/qatch_metrics/std": 0.32606661319732666, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 370 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 480.4, | |
| "completions/max_terminated_length": 480.4, | |
| "completions/mean_length": 222.75, | |
| "completions/mean_terminated_length": 222.75, | |
| "completions/min_length": 72.6, | |
| "completions/min_terminated_length": 72.6, | |
| "epoch": 0.6610841780520053, | |
| "grad_norm": 0.15787517122504152, | |
| "kl": 0.0181884765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0018, | |
| "num_tokens": 73905591.0, | |
| "reward": 0.89048171043396, | |
| "reward_std": 0.04932568361982703, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8711549639701843, | |
| "rewards/qatch_metrics/std": 0.2736783862113953, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 375 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 561.4, | |
| "completions/max_terminated_length": 561.4, | |
| "completions/mean_length": 234.4390625, | |
| "completions/mean_terminated_length": 234.4390625, | |
| "completions/min_length": 75.2, | |
| "completions/min_terminated_length": 75.2, | |
| "epoch": 0.6698986337593653, | |
| "grad_norm": 0.2653930596733297, | |
| "kl": 0.0174713134765625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.003, | |
| "num_tokens": 74679801.0, | |
| "reward": 0.8243065714836121, | |
| "reward_std": 0.06958894729614258, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7933018207550049, | |
| "rewards/qatch_metrics/std": 0.3086866676807404, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 380 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 511.8, | |
| "completions/max_terminated_length": 511.8, | |
| "completions/mean_length": 243.73359375, | |
| "completions/mean_terminated_length": 243.73359375, | |
| "completions/min_length": 81.6, | |
| "completions/min_terminated_length": 81.6, | |
| "epoch": 0.6787130894667255, | |
| "grad_norm": 0.20233916054675122, | |
| "kl": 0.014093017578125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0048, | |
| "num_tokens": 75445892.0, | |
| "reward": 0.8653998494148254, | |
| "reward_std": 0.07132081612944603, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8416468739509583, | |
| "rewards/qatch_metrics/std": 0.3147186517715454, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 385 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 447.4, | |
| "completions/max_terminated_length": 447.4, | |
| "completions/mean_length": 228.43671875, | |
| "completions/mean_terminated_length": 228.43671875, | |
| "completions/min_length": 79.8, | |
| "completions/min_terminated_length": 79.8, | |
| "epoch": 0.6875275451740855, | |
| "grad_norm": 0.29996778931865303, | |
| "kl": 0.0146087646484375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0016, | |
| "num_tokens": 76229251.0, | |
| "reward": 0.8502862334251404, | |
| "reward_std": 0.07314281612634659, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8238661646842956, | |
| "rewards/qatch_metrics/std": 0.3113024443387985, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 390 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 487.0, | |
| "completions/max_terminated_length": 487.0, | |
| "completions/mean_length": 249.8734375, | |
| "completions/mean_terminated_length": 249.8734375, | |
| "completions/min_length": 84.4, | |
| "completions/min_terminated_length": 84.4, | |
| "epoch": 0.6963420008814456, | |
| "grad_norm": 0.2150032953896288, | |
| "kl": 0.017156982421875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0029, | |
| "num_tokens": 77021793.0, | |
| "reward": 0.8494030237197876, | |
| "reward_std": 0.05776047557592392, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8228270888328553, | |
| "rewards/qatch_metrics/std": 0.3020846724510193, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 395 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 471.4, | |
| "completions/max_terminated_length": 471.4, | |
| "completions/mean_length": 247.9828125, | |
| "completions/mean_terminated_length": 247.9828125, | |
| "completions/min_length": 84.4, | |
| "completions/min_terminated_length": 84.4, | |
| "epoch": 0.7051564565888057, | |
| "grad_norm": 0.2754041387856829, | |
| "kl": 0.0148590087890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0065, | |
| "num_tokens": 77833451.0, | |
| "reward": 0.8363431453704834, | |
| "reward_std": 0.06054745838046074, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.807462501525879, | |
| "rewards/qatch_metrics/std": 0.29668720066547394, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 442.6, | |
| "completions/max_terminated_length": 442.6, | |
| "completions/mean_length": 225.4296875, | |
| "completions/mean_terminated_length": 225.4296875, | |
| "completions/min_length": 83.0, | |
| "completions/min_terminated_length": 83.0, | |
| "epoch": 0.7139709122961657, | |
| "grad_norm": 0.22420011771594078, | |
| "kl": 0.017706298828125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0019, | |
| "num_tokens": 78585793.0, | |
| "reward": 0.8382049560546875, | |
| "reward_std": 0.05150428526103497, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8096528768539428, | |
| "rewards/qatch_metrics/std": 0.2925006330013275, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 405 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 618.4, | |
| "completions/max_terminated_length": 618.4, | |
| "completions/mean_length": 219.3421875, | |
| "completions/mean_terminated_length": 219.3421875, | |
| "completions/min_length": 84.6, | |
| "completions/min_terminated_length": 84.6, | |
| "epoch": 0.7227853680035258, | |
| "grad_norm": 0.0986589707089894, | |
| "kl": 0.0170196533203125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0022, | |
| "num_tokens": 79352135.0, | |
| "reward": 0.8465274453163147, | |
| "reward_std": 0.05231629386544227, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8194440126419067, | |
| "rewards/qatch_metrics/std": 0.3004340440034866, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 410 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 470.6, | |
| "completions/max_terminated_length": 470.6, | |
| "completions/mean_length": 213.9921875, | |
| "completions/mean_terminated_length": 213.9921875, | |
| "completions/min_length": 83.6, | |
| "completions/min_terminated_length": 83.6, | |
| "epoch": 0.7315998237108858, | |
| "grad_norm": 0.17969166348358623, | |
| "kl": 0.01600341796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0196, | |
| "num_tokens": 80093021.0, | |
| "reward": 0.7899853944778442, | |
| "reward_std": 0.06183199286460876, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7529239773750305, | |
| "rewards/qatch_metrics/std": 0.32831716537475586, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 415 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 458.4, | |
| "completions/max_terminated_length": 458.4, | |
| "completions/mean_length": 208.25390625, | |
| "completions/mean_terminated_length": 208.25390625, | |
| "completions/min_length": 72.6, | |
| "completions/min_terminated_length": 72.6, | |
| "epoch": 0.7404142794182459, | |
| "grad_norm": 0.12360613268228073, | |
| "kl": 0.0170166015625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0011, | |
| "num_tokens": 80810658.0, | |
| "reward": 0.8781363725662231, | |
| "reward_std": 0.04314489997923374, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8566309928894043, | |
| "rewards/qatch_metrics/std": 0.2832080274820328, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 420 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 447.2, | |
| "completions/max_terminated_length": 447.2, | |
| "completions/mean_length": 203.98046875, | |
| "completions/mean_terminated_length": 203.98046875, | |
| "completions/min_length": 79.2, | |
| "completions/min_terminated_length": 79.2, | |
| "epoch": 0.749228735125606, | |
| "grad_norm": 0.210810313322166, | |
| "kl": 0.0164581298828125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.003, | |
| "num_tokens": 81548361.0, | |
| "reward": 0.8270991563796997, | |
| "reward_std": 0.06941422820091248, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7965872406959533, | |
| "rewards/qatch_metrics/std": 0.33117216229438784, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 425 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 434.6, | |
| "completions/max_terminated_length": 434.6, | |
| "completions/mean_length": 220.75703125, | |
| "completions/mean_terminated_length": 220.75703125, | |
| "completions/min_length": 80.4, | |
| "completions/min_terminated_length": 80.4, | |
| "epoch": 0.7580431908329661, | |
| "grad_norm": 0.21910688267881026, | |
| "kl": 0.016754150390625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0012, | |
| "num_tokens": 82290706.0, | |
| "reward": 0.8464880228042603, | |
| "reward_std": 0.04884184449911118, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8193976640701294, | |
| "rewards/qatch_metrics/std": 0.28375020921230315, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 430 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 451.8, | |
| "completions/max_terminated_length": 451.8, | |
| "completions/mean_length": 223.1234375, | |
| "completions/mean_terminated_length": 223.1234375, | |
| "completions/min_length": 85.0, | |
| "completions/min_terminated_length": 85.0, | |
| "epoch": 0.7668576465403262, | |
| "grad_norm": 0.26253720274856984, | |
| "kl": 0.0178009033203125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0023, | |
| "num_tokens": 83056976.0, | |
| "reward": 0.8096219301223755, | |
| "reward_std": 0.07494284212589264, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7760257959365845, | |
| "rewards/qatch_metrics/std": 0.3492628037929535, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 435 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 501.2, | |
| "completions/max_terminated_length": 501.2, | |
| "completions/mean_length": 216.840625, | |
| "completions/mean_terminated_length": 216.840625, | |
| "completions/min_length": 88.8, | |
| "completions/min_terminated_length": 88.8, | |
| "epoch": 0.7756721022476862, | |
| "grad_norm": 0.27647079947407377, | |
| "kl": 0.0181732177734375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0012, | |
| "num_tokens": 83805044.0, | |
| "reward": 0.7776495218276978, | |
| "reward_std": 0.056884029135108, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7384112119674683, | |
| "rewards/qatch_metrics/std": 0.3683965981006622, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 440 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 455.0, | |
| "completions/max_terminated_length": 455.0, | |
| "completions/mean_length": 216.46015625, | |
| "completions/mean_terminated_length": 216.46015625, | |
| "completions/min_length": 78.2, | |
| "completions/min_terminated_length": 78.2, | |
| "epoch": 0.7844865579550463, | |
| "grad_norm": 0.20996305667402082, | |
| "kl": 0.0163116455078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0063, | |
| "num_tokens": 84571313.0, | |
| "reward": 0.8477118849754334, | |
| "reward_std": 0.06959039457142353, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8208375215530396, | |
| "rewards/qatch_metrics/std": 0.30095059871673585, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 445 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 443.4, | |
| "completions/max_terminated_length": 443.4, | |
| "completions/mean_length": 211.00234375, | |
| "completions/mean_terminated_length": 211.00234375, | |
| "completions/min_length": 86.2, | |
| "completions/min_terminated_length": 86.2, | |
| "epoch": 0.7933010136624064, | |
| "grad_norm": 0.15662206787116065, | |
| "kl": 0.0160797119140625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0004, | |
| "num_tokens": 85319188.0, | |
| "reward": 0.8328658938407898, | |
| "reward_std": 0.05801869332790375, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8033716320991516, | |
| "rewards/qatch_metrics/std": 0.3037038058042526, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 475.2, | |
| "completions/max_terminated_length": 475.2, | |
| "completions/mean_length": 222.7078125, | |
| "completions/mean_terminated_length": 222.7078125, | |
| "completions/min_length": 80.8, | |
| "completions/min_terminated_length": 80.8, | |
| "epoch": 0.8021154693697664, | |
| "grad_norm": 0.19919629119501958, | |
| "kl": 0.01639404296875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.001, | |
| "num_tokens": 86091774.0, | |
| "reward": 0.8358211517333984, | |
| "reward_std": 0.0607087716460228, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8068713665008544, | |
| "rewards/qatch_metrics/std": 0.30334635376930236, | |
| "rewards/tag_count_reward/mean": 0.999609375, | |
| "rewards/tag_count_reward/std": 0.00625, | |
| "step": 455 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 418.4, | |
| "completions/max_terminated_length": 418.4, | |
| "completions/mean_length": 221.0015625, | |
| "completions/mean_terminated_length": 221.0015625, | |
| "completions/min_length": 80.4, | |
| "completions/min_terminated_length": 80.4, | |
| "epoch": 0.8109299250771265, | |
| "grad_norm": 0.1419366062228353, | |
| "kl": 0.01617431640625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0031, | |
| "num_tokens": 86848528.0, | |
| "reward": 0.8028954148292542, | |
| "reward_std": 0.06934207193553447, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7681122660636902, | |
| "rewards/qatch_metrics/std": 0.3390295565128326, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 460 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 426.6, | |
| "completions/max_terminated_length": 426.6, | |
| "completions/mean_length": 210.6140625, | |
| "completions/mean_terminated_length": 210.6140625, | |
| "completions/min_length": 85.0, | |
| "completions/min_terminated_length": 85.0, | |
| "epoch": 0.8197443807844865, | |
| "grad_norm": 0.16116384181364513, | |
| "kl": 0.0162078857421875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.013, | |
| "num_tokens": 87564482.0, | |
| "reward": 0.8424649000167846, | |
| "reward_std": 0.040234316140413284, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8146645903587342, | |
| "rewards/qatch_metrics/std": 0.2840981811285019, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 465 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 447.6, | |
| "completions/max_terminated_length": 447.6, | |
| "completions/mean_length": 195.89140625, | |
| "completions/mean_terminated_length": 195.89140625, | |
| "completions/min_length": 80.8, | |
| "completions/min_terminated_length": 80.8, | |
| "epoch": 0.8285588364918466, | |
| "grad_norm": 0.21075371504226795, | |
| "kl": 0.0193115234375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.001, | |
| "num_tokens": 88255159.0, | |
| "reward": 0.8565711379051208, | |
| "reward_std": 0.06344871073961258, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8312601566314697, | |
| "rewards/qatch_metrics/std": 0.3075568675994873, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 470 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 445.4, | |
| "completions/max_terminated_length": 445.4, | |
| "completions/mean_length": 201.01484375, | |
| "completions/mean_terminated_length": 201.01484375, | |
| "completions/min_length": 85.0, | |
| "completions/min_terminated_length": 85.0, | |
| "epoch": 0.8373732921992068, | |
| "grad_norm": 0.27204162033836665, | |
| "kl": 0.019976806640625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0009, | |
| "num_tokens": 88945690.0, | |
| "reward": 0.8785177230834961, | |
| "reward_std": 0.06470721438527108, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8570796966552734, | |
| "rewards/qatch_metrics/std": 0.2825317859649658, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 475 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 497.4, | |
| "completions/max_terminated_length": 497.4, | |
| "completions/mean_length": 221.90859375, | |
| "completions/mean_terminated_length": 221.90859375, | |
| "completions/min_length": 81.2, | |
| "completions/min_terminated_length": 81.2, | |
| "epoch": 0.8461877479065668, | |
| "grad_norm": 0.19323853705899263, | |
| "kl": 0.0183746337890625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0041, | |
| "num_tokens": 89712373.0, | |
| "reward": 0.8555493712425232, | |
| "reward_std": 0.06230065375566483, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8300580739974975, | |
| "rewards/qatch_metrics/std": 0.28706649839878084, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 498.8, | |
| "completions/max_terminated_length": 498.8, | |
| "completions/mean_length": 228.7890625, | |
| "completions/mean_terminated_length": 228.7890625, | |
| "completions/min_length": 88.4, | |
| "completions/min_terminated_length": 88.4, | |
| "epoch": 0.8550022036139269, | |
| "grad_norm": 0.24770714763886528, | |
| "kl": 0.0176513671875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.004, | |
| "num_tokens": 90520071.0, | |
| "reward": 0.8527018785476684, | |
| "reward_std": 0.062195781618356705, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8267080783843994, | |
| "rewards/qatch_metrics/std": 0.2996180385351181, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 485 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 480.6, | |
| "completions/max_terminated_length": 480.6, | |
| "completions/mean_length": 227.66875, | |
| "completions/mean_terminated_length": 227.66875, | |
| "completions/min_length": 83.2, | |
| "completions/min_terminated_length": 83.2, | |
| "epoch": 0.8638166593212869, | |
| "grad_norm": 0.16162980170931898, | |
| "kl": 0.0188812255859375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0006, | |
| "num_tokens": 91278479.0, | |
| "reward": 0.8309607028961181, | |
| "reward_std": 0.0656251635402441, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8011302351951599, | |
| "rewards/qatch_metrics/std": 0.31802850365638735, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 490 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 502.4, | |
| "completions/max_terminated_length": 502.4, | |
| "completions/mean_length": 225.3171875, | |
| "completions/mean_terminated_length": 225.3171875, | |
| "completions/min_length": 80.4, | |
| "completions/min_terminated_length": 80.4, | |
| "epoch": 0.872631115028647, | |
| "grad_norm": 0.1886973597841831, | |
| "kl": 0.01859130859375, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0052, | |
| "num_tokens": 92033173.0, | |
| "reward": 0.8441248655319213, | |
| "reward_std": 0.043570340052247046, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8166174769401551, | |
| "rewards/qatch_metrics/std": 0.30278873145580293, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 495 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 443.6, | |
| "completions/max_terminated_length": 443.6, | |
| "completions/mean_length": 234.475, | |
| "completions/mean_terminated_length": 234.475, | |
| "completions/min_length": 99.8, | |
| "completions/min_terminated_length": 99.8, | |
| "epoch": 0.881445570736007, | |
| "grad_norm": 0.24444756963754977, | |
| "kl": 0.01798095703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.007, | |
| "num_tokens": 92808293.0, | |
| "reward": 0.8517020106315613, | |
| "reward_std": 0.06295906975865365, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.825531804561615, | |
| "rewards/qatch_metrics/std": 0.3100520223379135, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 500 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 440.0, | |
| "completions/max_terminated_length": 440.0, | |
| "completions/mean_length": 215.0328125, | |
| "completions/mean_terminated_length": 215.0328125, | |
| "completions/min_length": 84.2, | |
| "completions/min_terminated_length": 84.2, | |
| "epoch": 0.8902600264433671, | |
| "grad_norm": 0.21103775626066984, | |
| "kl": 0.0171600341796875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0051, | |
| "num_tokens": 93563327.0, | |
| "reward": 0.8682243466377259, | |
| "reward_std": 0.04365142099559307, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8449697852134704, | |
| "rewards/qatch_metrics/std": 0.2696381151676178, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 505 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 463.4, | |
| "completions/max_terminated_length": 463.4, | |
| "completions/mean_length": 218.59453125, | |
| "completions/mean_terminated_length": 218.59453125, | |
| "completions/min_length": 77.0, | |
| "completions/min_terminated_length": 77.0, | |
| "epoch": 0.8990744821507272, | |
| "grad_norm": 0.20107359914643413, | |
| "kl": 0.016455078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0086, | |
| "num_tokens": 94333288.0, | |
| "reward": 0.8064153909683227, | |
| "reward_std": 0.06192653328180313, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.772253406047821, | |
| "rewards/qatch_metrics/std": 0.3227865040302277, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 510 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 443.0, | |
| "completions/max_terminated_length": 443.0, | |
| "completions/mean_length": 206.5703125, | |
| "completions/mean_terminated_length": 206.5703125, | |
| "completions/min_length": 73.0, | |
| "completions/min_terminated_length": 73.0, | |
| "epoch": 0.9078889378580872, | |
| "grad_norm": 0.10741725097461949, | |
| "kl": 0.0163330078125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0056, | |
| "num_tokens": 95051890.0, | |
| "reward": 0.8839513182640075, | |
| "reward_std": 0.04564618114382028, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8634721517562867, | |
| "rewards/qatch_metrics/std": 0.24794530421495437, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 515 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 420.2, | |
| "completions/max_terminated_length": 420.2, | |
| "completions/mean_length": 193.584375, | |
| "completions/mean_terminated_length": 193.584375, | |
| "completions/min_length": 74.8, | |
| "completions/min_terminated_length": 74.8, | |
| "epoch": 0.9167033935654474, | |
| "grad_norm": 0.3417922303720187, | |
| "kl": 0.0196563720703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0033, | |
| "num_tokens": 95755150.0, | |
| "reward": 0.8428452134132385, | |
| "reward_std": 0.05727057494223118, | |
| "rewards/format_reward/mean": 0.99921875, | |
| "rewards/format_reward/std": 0.0125, | |
| "rewards/qatch_metrics/mean": 0.8152039051055908, | |
| "rewards/qatch_metrics/std": 0.31376497745513915, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 520 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 404.8, | |
| "completions/max_terminated_length": 404.8, | |
| "completions/mean_length": 208.72890625, | |
| "completions/mean_terminated_length": 208.72890625, | |
| "completions/min_length": 72.2, | |
| "completions/min_terminated_length": 72.2, | |
| "epoch": 0.9255178492728074, | |
| "grad_norm": 0.17161657062686406, | |
| "kl": 0.0185943603515625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0023, | |
| "num_tokens": 96514835.0, | |
| "reward": 0.8597602009773254, | |
| "reward_std": 0.044371549785137174, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8350119948387146, | |
| "rewards/qatch_metrics/std": 0.295586758852005, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 525 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 426.8, | |
| "completions/max_terminated_length": 426.8, | |
| "completions/mean_length": 212.95859375, | |
| "completions/mean_terminated_length": 212.95859375, | |
| "completions/min_length": 77.0, | |
| "completions/min_terminated_length": 77.0, | |
| "epoch": 0.9343323049801675, | |
| "grad_norm": 0.22162383692372334, | |
| "kl": 0.0186981201171875, | |
| "learning_rate": 1e-06, | |
| "loss": -0.002, | |
| "num_tokens": 97270782.0, | |
| "reward": 0.8363440155982971, | |
| "reward_std": 0.06691965609788894, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8074635624885559, | |
| "rewards/qatch_metrics/std": 0.3064163327217102, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 530 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 454.6, | |
| "completions/max_terminated_length": 454.6, | |
| "completions/mean_length": 233.40234375, | |
| "completions/mean_terminated_length": 233.40234375, | |
| "completions/min_length": 76.4, | |
| "completions/min_terminated_length": 76.4, | |
| "epoch": 0.9431467606875276, | |
| "grad_norm": 0.1434511776519399, | |
| "kl": 0.019879150390625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0023, | |
| "num_tokens": 98016705.0, | |
| "reward": 0.8363542199134827, | |
| "reward_std": 0.05200971700251102, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8074755430221557, | |
| "rewards/qatch_metrics/std": 0.2885085940361023, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 535 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 444.6, | |
| "completions/max_terminated_length": 444.6, | |
| "completions/mean_length": 235.70625, | |
| "completions/mean_terminated_length": 235.70625, | |
| "completions/min_length": 80.6, | |
| "completions/min_terminated_length": 80.6, | |
| "epoch": 0.9519612163948876, | |
| "grad_norm": 0.09221258199209693, | |
| "kl": 0.018701171875, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0038, | |
| "num_tokens": 98787193.0, | |
| "reward": 0.8677037119865417, | |
| "reward_std": 0.057669999450445174, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8443572998046875, | |
| "rewards/qatch_metrics/std": 0.288933590054512, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 540 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 455.2, | |
| "completions/max_terminated_length": 455.2, | |
| "completions/mean_length": 222.11875, | |
| "completions/mean_terminated_length": 222.11875, | |
| "completions/min_length": 74.6, | |
| "completions/min_terminated_length": 74.6, | |
| "epoch": 0.9607756721022477, | |
| "grad_norm": 0.1352237905149159, | |
| "kl": 0.018145751953125, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0031, | |
| "num_tokens": 99532081.0, | |
| "reward": 0.8805891752243042, | |
| "reward_std": 0.05483146589249373, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8595166802406311, | |
| "rewards/qatch_metrics/std": 0.25585181415081026, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 545 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 472.6, | |
| "completions/max_terminated_length": 472.6, | |
| "completions/mean_length": 218.659375, | |
| "completions/mean_terminated_length": 218.659375, | |
| "completions/min_length": 86.2, | |
| "completions/min_terminated_length": 86.2, | |
| "epoch": 0.9695901278096077, | |
| "grad_norm": 0.16904630982662794, | |
| "kl": 0.01783447265625, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0016, | |
| "num_tokens": 100246573.0, | |
| "reward": 0.8569401383399964, | |
| "reward_std": 0.07272802218794823, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8316942691802979, | |
| "rewards/qatch_metrics/std": 0.3041912466287613, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 550 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 459.8, | |
| "completions/max_terminated_length": 459.8, | |
| "completions/mean_length": 221.78984375, | |
| "completions/mean_terminated_length": 221.78984375, | |
| "completions/min_length": 77.8, | |
| "completions/min_terminated_length": 77.8, | |
| "epoch": 0.9784045835169678, | |
| "grad_norm": 0.31854687165087076, | |
| "kl": 0.0183258056640625, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0058, | |
| "num_tokens": 100996640.0, | |
| "reward": 0.8102917551994324, | |
| "reward_std": 0.07570969834923744, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.7768138289451599, | |
| "rewards/qatch_metrics/std": 0.34436498284339906, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 555 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 490.6, | |
| "completions/max_terminated_length": 490.6, | |
| "completions/mean_length": 230.28828125, | |
| "completions/mean_terminated_length": 230.28828125, | |
| "completions/min_length": 85.0, | |
| "completions/min_terminated_length": 85.0, | |
| "epoch": 0.9872190392243279, | |
| "grad_norm": 0.16545798735816303, | |
| "kl": 0.01719970703125, | |
| "learning_rate": 1e-06, | |
| "loss": 0.0054, | |
| "num_tokens": 101777473.0, | |
| "reward": 0.854366683959961, | |
| "reward_std": 0.050544672086834906, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.8286666750907898, | |
| "rewards/qatch_metrics/std": 0.3027670204639435, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 560 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 460.0, | |
| "completions/max_terminated_length": 460.0, | |
| "completions/mean_length": 232.278125, | |
| "completions/mean_terminated_length": 232.278125, | |
| "completions/min_length": 79.2, | |
| "completions/min_terminated_length": 79.2, | |
| "epoch": 0.996033494931688, | |
| "grad_norm": 0.2064718967348405, | |
| "kl": 0.020306396484375, | |
| "learning_rate": 1e-06, | |
| "loss": -0.0052, | |
| "num_tokens": 102547669.0, | |
| "reward": 0.7918175339698792, | |
| "reward_std": 0.05684706475585699, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.755079448223114, | |
| "rewards/qatch_metrics/std": 0.3250477254390717, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 565 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 468.5, | |
| "completions/max_terminated_length": 468.5, | |
| "completions/mean_length": 214.265625, | |
| "completions/mean_terminated_length": 214.265625, | |
| "completions/min_length": 66.0, | |
| "completions/min_terminated_length": 66.0, | |
| "epoch": 0.999559277214632, | |
| "kl": 0.01806640625, | |
| "num_tokens": 102823629.0, | |
| "reward": 0.8797399699687958, | |
| "reward_std": 0.056224397383630276, | |
| "rewards/format_reward/mean": 1.0, | |
| "rewards/format_reward/std": 0.0, | |
| "rewards/qatch_metrics/mean": 0.858517587184906, | |
| "rewards/qatch_metrics/std": 0.26497258245944977, | |
| "rewards/tag_count_reward/mean": 1.0, | |
| "rewards/tag_count_reward/std": 0.0, | |
| "step": 567, | |
| "total_flos": 0.0, | |
| "train_loss": -1.6490349831877564e-05, | |
| "train_runtime": 5804.9117, | |
| "train_samples_per_second": 1.564, | |
| "train_steps_per_second": 0.098 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 567, | |
| "num_input_tokens_seen": 102823629, | |
| "num_train_epochs": 1, | |
| "save_steps": 5, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |