| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.6129032258064515, | |
| "eval_steps": 500, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2796.0, | |
| "completions/mean_length": 328.47918701171875, | |
| "completions/min_length": 46.0, | |
| "epoch": 0.016129032258064516, | |
| "grad_norm": 0.2647878213954199, | |
| "kl": 0.0, | |
| "learning_rate": 8.333333333333333e-08, | |
| "loss": 8.76995454035523e-09, | |
| "memory(GiB)": 66.25, | |
| "reward": 0.9901389479637146, | |
| "reward_std": 0.2693294882774353, | |
| "rewards/CODEORM/mean": 0.010625001043081284, | |
| "rewards/CODEORM/std": 0.055468425154685974, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.01317615807056427, | |
| "rewards/VQAORM/mean": 0.4798611104488373, | |
| "rewards/VQAORM/std": 0.49976783990859985, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.002513 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 4599.0, | |
| "completions/mean_length": 323.51251220703125, | |
| "completions/min_length": 46.0, | |
| "epoch": 0.03225806451612903, | |
| "grad_norm": 0.25445235632470914, | |
| "kl": 0.0, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "loss": 2.5870072750677764e-09, | |
| "memory(GiB)": 67.27, | |
| "reward": 0.9681249856948853, | |
| "reward_std": 0.2566339373588562, | |
| "rewards/CODEORM/mean": 0.0066666672937572, | |
| "rewards/CODEORM/std": 0.04423702508211136, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.01317615807056427, | |
| "rewards/VQAORM/mean": 0.4618055522441864, | |
| "rewards/VQAORM/std": 0.4987122416496277, | |
| "step": 2, | |
| "train_speed(iter/s)": 0.002804 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2598.0, | |
| "completions/mean_length": 308.897216796875, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.04838709677419355, | |
| "grad_norm": 0.26595615090662006, | |
| "kl": 0.0001987457275390625, | |
| "learning_rate": 2.5e-07, | |
| "loss": 2.0041927655256586e-06, | |
| "memory(GiB)": 67.27, | |
| "reward": 0.993263840675354, | |
| "reward_std": 0.24853122234344482, | |
| "rewards/CODEORM/mean": 0.008541665971279144, | |
| "rewards/CODEORM/std": 0.049912624061107635, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.5, | |
| "rewards/FMTORM/std": 0.0, | |
| "rewards/VQAORM/mean": 0.48472222685813904, | |
| "rewards/VQAORM/std": 0.4999401569366455, | |
| "step": 3, | |
| "train_speed(iter/s)": 0.003038 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3566.0, | |
| "completions/mean_length": 319.540283203125, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.06451612903225806, | |
| "grad_norm": 0.28511858475822077, | |
| "kl": 0.00017852783203125, | |
| "learning_rate": 3.333333333333333e-07, | |
| "loss": 1.797711320250528e-06, | |
| "memory(GiB)": 67.27, | |
| "reward": 0.998055636882782, | |
| "reward_std": 0.268062561750412, | |
| "rewards/CODEORM/mean": 0.01020833384245634, | |
| "rewards/CODEORM/std": 0.05440906062722206, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4888888895511627, | |
| "rewards/VQAORM/std": 0.5000501871109009, | |
| "step": 4, | |
| "train_speed(iter/s)": 0.003145 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2966.0, | |
| "completions/mean_length": 364.01043701171875, | |
| "completions/min_length": 43.0, | |
| "epoch": 0.08064516129032258, | |
| "grad_norm": 0.2684251266862969, | |
| "kl": 0.000177764892578125, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "loss": 1.7898388477988192e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 0.9475694894790649, | |
| "reward_std": 0.27028676867485046, | |
| "rewards/CODEORM/mean": 0.00937500037252903, | |
| "rewards/CODEORM/std": 0.05221592262387276, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.43888890743255615, | |
| "rewards/VQAORM/std": 0.49642378091812134, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.003206 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2712.0, | |
| "completions/mean_length": 319.3951416015625, | |
| "completions/min_length": 41.0, | |
| "epoch": 0.0967741935483871, | |
| "grad_norm": 0.2404392113235966, | |
| "kl": 0.0001796722412109375, | |
| "learning_rate": 5e-07, | |
| "loss": 1.804373255254177e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 0.9318056106567383, | |
| "reward_std": 0.2407531440258026, | |
| "rewards/CODEORM/mean": 0.006458333693444729, | |
| "rewards/CODEORM/std": 0.04355579614639282, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.42569446563720703, | |
| "rewards/VQAORM/std": 0.4946196377277374, | |
| "step": 6, | |
| "train_speed(iter/s)": 0.003221 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2772.0, | |
| "completions/mean_length": 302.9992980957031, | |
| "completions/min_length": 54.0, | |
| "epoch": 0.11290322580645161, | |
| "grad_norm": 0.24614522756314014, | |
| "kl": 0.0001972198486328125, | |
| "learning_rate": 4.99965731410188e-07, | |
| "loss": 1.978319232875947e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 0.9795833230018616, | |
| "reward_std": 0.2439241260290146, | |
| "rewards/CODEORM/mean": 0.004583333153277636, | |
| "rewards/CODEORM/std": 0.03680942952632904, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.5, | |
| "rewards/FMTORM/std": 0.0, | |
| "rewards/VQAORM/mean": 0.4750000238418579, | |
| "rewards/VQAORM/std": 0.4995481073856354, | |
| "step": 7, | |
| "train_speed(iter/s)": 0.003258 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2551.0, | |
| "completions/mean_length": 327.8243103027344, | |
| "completions/min_length": 55.0, | |
| "epoch": 0.12903225806451613, | |
| "grad_norm": 0.25632964332335334, | |
| "kl": 0.000189208984375, | |
| "learning_rate": 4.998629360792966e-07, | |
| "loss": 1.9058469433730352e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 1.0435417890548706, | |
| "reward_std": 0.26714909076690674, | |
| "rewards/CODEORM/mean": 0.01020833384245634, | |
| "rewards/CODEORM/std": 0.05440906062722206, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.5340278148651123, | |
| "rewards/VQAORM/std": 0.49901407957077026, | |
| "step": 8, | |
| "train_speed(iter/s)": 0.003284 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 11576.0, | |
| "completions/mean_length": 324.6034851074219, | |
| "completions/min_length": 40.0, | |
| "epoch": 0.14516129032258066, | |
| "grad_norm": 0.2555360483438473, | |
| "kl": 0.00021514892578125, | |
| "learning_rate": 4.996916453197791e-07, | |
| "loss": 2.1652638224622933e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 1.0275694131851196, | |
| "reward_std": 0.2437307983636856, | |
| "rewards/CODEORM/mean": 0.009166667237877846, | |
| "rewards/CODEORM/std": 0.0516509935259819, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4982638955116272, | |
| "rewards/FMTORM/std": 0.02942180633544922, | |
| "rewards/VQAORM/mean": 0.5201389193534851, | |
| "rewards/VQAORM/std": 0.49976781010627747, | |
| "step": 9, | |
| "train_speed(iter/s)": 0.003131 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2962.0, | |
| "completions/mean_length": 347.196533203125, | |
| "completions/min_length": 58.0, | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 0.22806471451561894, | |
| "kl": 0.0002552032470703125, | |
| "learning_rate": 4.994519113084604e-07, | |
| "loss": 2.564860324127949e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 0.9813888669013977, | |
| "reward_std": 0.22997106611728668, | |
| "rewards/CODEORM/mean": 0.009166667237877846, | |
| "rewards/CODEORM/std": 0.0516509935259819, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49791666865348816, | |
| "rewards/FMTORM/std": 0.03221873939037323, | |
| "rewards/VQAORM/mean": 0.47430557012557983, | |
| "rewards/VQAORM/std": 0.49951285123825073, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.003171 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 5360.0, | |
| "completions/mean_length": 360.31390380859375, | |
| "completions/min_length": 48.0, | |
| "epoch": 0.1774193548387097, | |
| "grad_norm": 0.2832579586762888, | |
| "kl": 0.0003253936767578125, | |
| "learning_rate": 4.991438070706428e-07, | |
| "loss": 3.250584086345043e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 1.0013889074325562, | |
| "reward_std": 0.25826162099838257, | |
| "rewards/CODEORM/mean": 0.01979166828095913, | |
| "rewards/CODEORM/std": 0.07449593394994736, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4826388955116272, | |
| "rewards/VQAORM/std": 0.499872088432312, | |
| "step": 11, | |
| "train_speed(iter/s)": 0.003168 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2102.0, | |
| "completions/mean_length": 335.2215270996094, | |
| "completions/min_length": 52.0, | |
| "epoch": 0.1935483870967742, | |
| "grad_norm": 0.27801393384997114, | |
| "kl": 0.0003223419189453125, | |
| "learning_rate": 4.987674264578614e-07, | |
| "loss": 3.231521304769558e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 0.9737499952316284, | |
| "reward_std": 0.27600589394569397, | |
| "rewards/CODEORM/mean": 0.009166666306555271, | |
| "rewards/CODEORM/std": 0.0516509935259819, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4652777910232544, | |
| "rewards/VQAORM/std": 0.49896618723869324, | |
| "step": 12, | |
| "train_speed(iter/s)": 0.003207 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2673.0, | |
| "completions/mean_length": 342.41876220703125, | |
| "completions/min_length": 54.0, | |
| "epoch": 0.20967741935483872, | |
| "grad_norm": 0.2586540657680122, | |
| "kl": 0.000345611572265625, | |
| "learning_rate": 4.983228841192975e-07, | |
| "loss": 3.455609885349986e-06, | |
| "memory(GiB)": 71.39, | |
| "reward": 0.9851389527320862, | |
| "reward_std": 0.26006534695625305, | |
| "rewards/CODEORM/mean": 0.008750000037252903, | |
| "rewards/CODEORM/std": 0.05049958825111389, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.47708335518836975, | |
| "rewards/VQAORM/std": 0.4996480643749237, | |
| "step": 13, | |
| "train_speed(iter/s)": 0.003229 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2441.0, | |
| "completions/mean_length": 346.803466796875, | |
| "completions/min_length": 54.0, | |
| "epoch": 0.22580645161290322, | |
| "grad_norm": 0.26610442827789593, | |
| "kl": 0.000457763671875, | |
| "learning_rate": 4.978103154668533e-07, | |
| "loss": 4.586853265209356e-06, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9384721517562866, | |
| "reward_std": 0.26384806632995605, | |
| "rewards/CODEORM/mean": 0.008958334103226662, | |
| "rewards/CODEORM/std": 0.05107896029949188, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4305555522441864, | |
| "rewards/VQAORM/std": 0.4953259825706482, | |
| "step": 14, | |
| "train_speed(iter/s)": 0.003242 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2603.0, | |
| "completions/mean_length": 344.1347351074219, | |
| "completions/min_length": 52.0, | |
| "epoch": 0.24193548387096775, | |
| "grad_norm": 0.24930681367809324, | |
| "kl": 0.000681304931640625, | |
| "learning_rate": 4.972298766339059e-07, | |
| "loss": 6.819126156187849e-06, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9356943964958191, | |
| "reward_std": 0.2636728882789612, | |
| "rewards/CODEORM/mean": 0.009999999776482582, | |
| "rewards/CODEORM/std": 0.05387035384774208, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4263888895511627, | |
| "rewards/VQAORM/std": 0.49472352862358093, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.003258 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2302.0, | |
| "completions/mean_length": 315.2257080078125, | |
| "completions/min_length": 47.0, | |
| "epoch": 0.25806451612903225, | |
| "grad_norm": 0.2280556698693846, | |
| "kl": 0.000701904296875, | |
| "learning_rate": 4.965817444277468e-07, | |
| "loss": 7.027399533399148e-06, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9556249976158142, | |
| "reward_std": 0.24996338784694672, | |
| "rewards/CODEORM/mean": 0.008750000037252903, | |
| "rewards/CODEORM/std": 0.05049958825111389, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.4472222328186035, | |
| "rewards/VQAORM/std": 0.49737945199012756, | |
| "step": 16, | |
| "train_speed(iter/s)": 0.003286 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2913.0, | |
| "completions/mean_length": 346.1423645019531, | |
| "completions/min_length": 1.0, | |
| "epoch": 0.27419354838709675, | |
| "grad_norm": 0.9969603368489056, | |
| "kl": 0.001026153564453125, | |
| "learning_rate": 4.958661162757244e-07, | |
| "loss": 1.0287308214174118e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9165971875190735, | |
| "reward_std": 0.2503140866756439, | |
| "rewards/CODEORM/mean": 0.008958333171904087, | |
| "rewards/CODEORM/std": 0.05107896029949188, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.40833333134651184, | |
| "rewards/VQAORM/std": 0.49169617891311646, | |
| "step": 17, | |
| "train_speed(iter/s)": 0.002895 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2168.0, | |
| "completions/mean_length": 336.17291259765625, | |
| "completions/min_length": 50.0, | |
| "epoch": 0.2903225806451613, | |
| "grad_norm": 0.2580204320478004, | |
| "kl": 0.000994873046875, | |
| "learning_rate": 4.950832101651062e-07, | |
| "loss": 9.966525794879999e-06, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9159722328186035, | |
| "reward_std": 0.22609728574752808, | |
| "rewards/CODEORM/mean": 0.0062500000931322575, | |
| "rewards/CODEORM/std": 0.042862728238105774, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.5, | |
| "rewards/FMTORM/std": 0.0, | |
| "rewards/VQAORM/mean": 0.409722238779068, | |
| "rewards/VQAORM/std": 0.4919532239437103, | |
| "step": 18, | |
| "train_speed(iter/s)": 0.002929 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2828.0, | |
| "completions/mean_length": 344.4798583984375, | |
| "completions/min_length": 56.0, | |
| "epoch": 0.3064516129032258, | |
| "grad_norm": 0.2572831613912269, | |
| "kl": 0.001146697998046875, | |
| "learning_rate": 4.94233264576678e-07, | |
| "loss": 1.1491146324260626e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9686111807823181, | |
| "reward_std": 0.2488832324743271, | |
| "rewards/CODEORM/mean": 0.008541667833924294, | |
| "rewards/CODEORM/std": 0.049912624061107635, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4611111283302307, | |
| "rewards/VQAORM/std": 0.49865853786468506, | |
| "step": 19, | |
| "train_speed(iter/s)": 0.002957 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2005.0, | |
| "completions/mean_length": 339.0666809082031, | |
| "completions/min_length": 50.0, | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 0.25242303032801683, | |
| "kl": 0.0013702392578125, | |
| "learning_rate": 4.933165384120992e-07, | |
| "loss": 1.3699734154215548e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9613890051841736, | |
| "reward_std": 0.2589081823825836, | |
| "rewards/CODEORM/mean": 0.006874999962747097, | |
| "rewards/CODEORM/std": 0.04490695893764496, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.4548611342906952, | |
| "rewards/VQAORM/std": 0.49813133478164673, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.002987 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2956.0, | |
| "completions/mean_length": 347.7388916015625, | |
| "completions/min_length": 56.0, | |
| "epoch": 0.3387096774193548, | |
| "grad_norm": 0.2878476776805163, | |
| "kl": 0.001605224609375, | |
| "learning_rate": 4.923333109150403e-07, | |
| "loss": 1.6045431038946845e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9531945586204529, | |
| "reward_std": 0.2703089118003845, | |
| "rewards/CODEORM/mean": 0.013958333991467953, | |
| "rewards/CODEORM/std": 0.06320948898792267, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4402777850627899, | |
| "rewards/VQAORM/std": 0.4965929090976715, | |
| "step": 21, | |
| "train_speed(iter/s)": 0.003002 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2177.0, | |
| "completions/mean_length": 300.4312438964844, | |
| "completions/min_length": 47.0, | |
| "epoch": 0.3548387096774194, | |
| "grad_norm": 0.23824468095972504, | |
| "kl": 0.0014373779296875, | |
| "learning_rate": 4.912838815861217e-07, | |
| "loss": 1.4388399904419202e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9931944608688354, | |
| "reward_std": 0.24322348833084106, | |
| "rewards/CODEORM/mean": 0.009166667237877846, | |
| "rewards/CODEORM/std": 0.051650989800691605, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.48472222685813904, | |
| "rewards/VQAORM/std": 0.4999401569366455, | |
| "step": 22, | |
| "train_speed(iter/s)": 0.003021 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1788.0, | |
| "completions/mean_length": 325.1958312988281, | |
| "completions/min_length": 53.0, | |
| "epoch": 0.3709677419354839, | |
| "grad_norm": 0.3498689416243422, | |
| "kl": 0.00506134033203125, | |
| "learning_rate": 4.90168570091683e-07, | |
| "loss": 5.055519432062283e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 1.0063194036483765, | |
| "reward_std": 0.26852238178253174, | |
| "rewards/CODEORM/mean": 0.013958334922790527, | |
| "rewards/CODEORM/std": 0.06320948898792267, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4930555820465088, | |
| "rewards/VQAORM/std": 0.5001254677772522, | |
| "step": 23, | |
| "train_speed(iter/s)": 0.003047 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2844.0, | |
| "completions/mean_length": 355.35833740234375, | |
| "completions/min_length": 59.0, | |
| "epoch": 0.3870967741935484, | |
| "grad_norm": 0.2465080561687103, | |
| "kl": 0.0025970458984375, | |
| "learning_rate": 4.889877161664096e-07, | |
| "loss": 2.5979932615882717e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9511805176734924, | |
| "reward_std": 0.23766973614692688, | |
| "rewards/CODEORM/mean": 0.0133333345875144, | |
| "rewards/CODEORM/std": 0.061845600605010986, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.4381944537162781, | |
| "rewards/VQAORM/std": 0.49633774161338806, | |
| "step": 24, | |
| "train_speed(iter/s)": 0.00306 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2828.0, | |
| "completions/mean_length": 364.1534729003906, | |
| "completions/min_length": 55.0, | |
| "epoch": 0.4032258064516129, | |
| "grad_norm": 0.2306227358811404, | |
| "kl": 0.0028472900390625, | |
| "learning_rate": 4.877416795098463e-07, | |
| "loss": 2.847511314030271e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9688888788223267, | |
| "reward_std": 0.24576471745967865, | |
| "rewards/CODEORM/mean": 0.01020833384245634, | |
| "rewards/CODEORM/std": 0.05440906062722206, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.01317615807056427, | |
| "rewards/VQAORM/mean": 0.45902779698371887, | |
| "rewards/VQAORM/std": 0.4984915852546692, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.003078 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3089.0, | |
| "completions/mean_length": 317.5958557128906, | |
| "completions/min_length": 46.0, | |
| "epoch": 0.41935483870967744, | |
| "grad_norm": 0.29253210222640175, | |
| "kl": 0.0026397705078125, | |
| "learning_rate": 4.864308396768294e-07, | |
| "loss": 2.6405881726532243e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9640277624130249, | |
| "reward_std": 0.24826957285404205, | |
| "rewards/CODEORM/mean": 0.011250000447034836, | |
| "rewards/CODEORM/std": 0.057014863938093185, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.5, | |
| "rewards/FMTORM/std": 0.0, | |
| "rewards/VQAORM/mean": 0.45277780294418335, | |
| "rewards/VQAORM/std": 0.49793797731399536, | |
| "step": 26, | |
| "train_speed(iter/s)": 0.003097 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3438.0, | |
| "completions/mean_length": 338.2472229003906, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.43548387096774194, | |
| "grad_norm": 0.2644761394181819, | |
| "kl": 0.0032989501953125, | |
| "learning_rate": 4.850555959618704e-07, | |
| "loss": 3.298750743851997e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 1.0049306154251099, | |
| "reward_std": 0.2615948021411896, | |
| "rewards/CODEORM/mean": 0.012916668318212032, | |
| "rewards/CODEORM/std": 0.06091581657528877, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.4923611283302307, | |
| "rewards/VQAORM/std": 0.5001153349876404, | |
| "step": 27, | |
| "train_speed(iter/s)": 0.003109 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2612.0, | |
| "completions/mean_length": 356.2597351074219, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.45161290322580644, | |
| "grad_norm": 0.2665651878200343, | |
| "kl": 0.0039642333984375, | |
| "learning_rate": 4.836163672775272e-07, | |
| "loss": 3.9662394556216896e-05, | |
| "memory(GiB)": 72.28, | |
| "reward": 0.9762500524520874, | |
| "reward_std": 0.25711533427238464, | |
| "rewards/CODEORM/mean": 0.013749999925494194, | |
| "rewards/CODEORM/std": 0.06275884807109833, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.46319445967674255, | |
| "rewards/VQAORM/std": 0.49881675839424133, | |
| "step": 28, | |
| "train_speed(iter/s)": 0.00312 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1871.0, | |
| "completions/mean_length": 343.495849609375, | |
| "completions/min_length": 39.0, | |
| "epoch": 0.46774193548387094, | |
| "grad_norm": 0.23395808716833594, | |
| "kl": 0.004046630859375, | |
| "learning_rate": 4.82113592026799e-07, | |
| "loss": 4.044502566102892e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9686111807823181, | |
| "reward_std": 0.2451702207326889, | |
| "rewards/CODEORM/mean": 0.011666666716337204, | |
| "rewards/CODEORM/std": 0.058019187301397324, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4576388895511627, | |
| "rewards/VQAORM/std": 0.49837538599967957, | |
| "step": 29, | |
| "train_speed(iter/s)": 0.00314 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2888.0, | |
| "completions/mean_length": 346.4354248046875, | |
| "completions/min_length": 38.0, | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 0.25535632583207873, | |
| "kl": 0.0040679931640625, | |
| "learning_rate": 4.805477279695852e-07, | |
| "loss": 4.070028444402851e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9380555152893066, | |
| "reward_std": 0.27259209752082825, | |
| "rewards/CODEORM/mean": 0.012708333320915699, | |
| "rewards/CODEORM/std": 0.060444481670856476, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4263888895511627, | |
| "rewards/VQAORM/std": 0.49472349882125854, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.003152 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1982.0, | |
| "completions/mean_length": 342.07708740234375, | |
| "completions/min_length": 45.0, | |
| "epoch": 0.5, | |
| "grad_norm": 0.2757470110574285, | |
| "kl": 0.005291748046875, | |
| "learning_rate": 4.789192520832462e-07, | |
| "loss": 5.291224078973755e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9945833086967468, | |
| "reward_std": 0.2432967722415924, | |
| "rewards/CODEORM/mean": 0.017500000074505806, | |
| "rewards/CODEORM/std": 0.0703362300992012, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.47777777910232544, | |
| "rewards/VQAORM/std": 0.49967944622039795, | |
| "step": 31, | |
| "train_speed(iter/s)": 0.00317 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1852.0, | |
| "completions/mean_length": 358.5646057128906, | |
| "completions/min_length": 55.0, | |
| "epoch": 0.5161290322580645, | |
| "grad_norm": 0.24973674461489612, | |
| "kl": 0.00535888671875, | |
| "learning_rate": 4.772286604173125e-07, | |
| "loss": 5.361745570553467e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9641666412353516, | |
| "reward_std": 0.24900199472904205, | |
| "rewards/CODEORM/mean": 0.02041666768491268, | |
| "rewards/CODEORM/std": 0.07557861506938934, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.4444444477558136, | |
| "rewards/VQAORM/std": 0.4970766007900238, | |
| "step": 32, | |
| "train_speed(iter/s)": 0.003173 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2454.0, | |
| "completions/mean_length": 316.1222229003906, | |
| "completions/min_length": 36.0, | |
| "epoch": 0.532258064516129, | |
| "grad_norm": 0.27765379348097485, | |
| "kl": 0.005023193359375, | |
| "learning_rate": 4.754764679423827e-07, | |
| "loss": 5.026329745305702e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9695139527320862, | |
| "reward_std": 0.23932775855064392, | |
| "rewards/CODEORM/mean": 0.013958333991467953, | |
| "rewards/CODEORM/std": 0.06320948898792267, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.5, | |
| "rewards/FMTORM/std": 0.0, | |
| "rewards/VQAORM/mean": 0.4555555582046509, | |
| "rewards/VQAORM/std": 0.49819380044937134, | |
| "step": 33, | |
| "train_speed(iter/s)": 0.003188 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2153.0, | |
| "completions/mean_length": 378.8312683105469, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.5483870967741935, | |
| "grad_norm": 0.21472738186965287, | |
| "kl": 0.006378173828125, | |
| "learning_rate": 4.7366320839325856e-07, | |
| "loss": 6.384468724718317e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0134028196334839, | |
| "reward_std": 0.2518027424812317, | |
| "rewards/CODEORM/mean": 0.015833334997296333, | |
| "rewards/CODEORM/std": 0.0671001672744751, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.49861112236976624, | |
| "rewards/VQAORM/std": 0.5001717805862427, | |
| "step": 34, | |
| "train_speed(iter/s)": 0.003199 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2375.0, | |
| "completions/mean_length": 337.39654541015625, | |
| "completions/min_length": 56.0, | |
| "epoch": 0.5645161290322581, | |
| "grad_norm": 0.24373439505720154, | |
| "kl": 0.00570068359375, | |
| "learning_rate": 4.71789434106364e-07, | |
| "loss": 5.708396929549053e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0311111211776733, | |
| "reward_std": 0.2390429675579071, | |
| "rewards/CODEORM/mean": 0.013750000856816769, | |
| "rewards/CODEORM/std": 0.06275884807109833, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.5, | |
| "rewards/FMTORM/std": 0.0, | |
| "rewards/VQAORM/mean": 0.5173611044883728, | |
| "rewards/VQAORM/std": 0.499872088432312, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.003203 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2472.0, | |
| "completions/mean_length": 378.3333435058594, | |
| "completions/min_length": 46.0, | |
| "epoch": 0.5806451612903226, | |
| "grad_norm": 0.2500603222917725, | |
| "kl": 0.008148193359375, | |
| "learning_rate": 4.698557158514987e-07, | |
| "loss": 8.145920583046973e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9934723377227783, | |
| "reward_std": 0.26507648825645447, | |
| "rewards/CODEORM/mean": 0.020208334550261497, | |
| "rewards/CODEORM/std": 0.07522002607584, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.01317615620791912, | |
| "rewards/VQAORM/mean": 0.47361111640930176, | |
| "rewards/VQAORM/std": 0.4994766116142273, | |
| "step": 36, | |
| "train_speed(iter/s)": 0.003211 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2245.0, | |
| "completions/mean_length": 382.0798645019531, | |
| "completions/min_length": 63.0, | |
| "epoch": 0.5967741935483871, | |
| "grad_norm": 0.21874458117115267, | |
| "kl": 0.009130859375, | |
| "learning_rate": 4.6786264265797526e-07, | |
| "loss": 9.136443986790255e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9631944894790649, | |
| "reward_std": 0.22931374609470367, | |
| "rewards/CODEORM/mean": 0.02083333395421505, | |
| "rewards/CODEORM/std": 0.07628901302814484, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49791666865348816, | |
| "rewards/FMTORM/std": 0.03221873939037323, | |
| "rewards/VQAORM/mean": 0.4444444477558136, | |
| "rewards/VQAORM/std": 0.4970766305923462, | |
| "step": 37, | |
| "train_speed(iter/s)": 0.003219 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2041.0, | |
| "completions/mean_length": 408.1896057128906, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.6129032258064516, | |
| "grad_norm": 0.23783779914096984, | |
| "kl": 0.0090576171875, | |
| "learning_rate": 4.658108216351958e-07, | |
| "loss": 9.058650903170928e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9589584469795227, | |
| "reward_std": 0.25281450152397156, | |
| "rewards/CODEORM/mean": 0.01833333447575569, | |
| "rewards/CODEORM/std": 0.07188516855239868, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4416666626930237, | |
| "rewards/VQAORM/std": 0.4967580735683441, | |
| "step": 38, | |
| "train_speed(iter/s)": 0.003221 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2560.0, | |
| "completions/mean_length": 425.4576416015625, | |
| "completions/min_length": 36.0, | |
| "epoch": 0.6290322580645161, | |
| "grad_norm": 0.25328193961667955, | |
| "kl": 0.009716796875, | |
| "learning_rate": 4.6370087778772037e-07, | |
| "loss": 9.695239714346826e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9884028434753418, | |
| "reward_std": 0.2516257166862488, | |
| "rewards/CODEORM/mean": 0.016875000670552254, | |
| "rewards/CODEORM/std": 0.06914517283439636, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.026324830949306488, | |
| "rewards/VQAORM/mean": 0.47291669249534607, | |
| "rewards/VQAORM/std": 0.49943938851356506, | |
| "step": 39, | |
| "train_speed(iter/s)": 0.003227 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2677.0, | |
| "completions/mean_length": 380.4618225097656, | |
| "completions/min_length": 57.0, | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 0.24632981619112712, | |
| "kl": 0.009765625, | |
| "learning_rate": 4.6153345382488437e-07, | |
| "loss": 9.762972331373021e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0220834016799927, | |
| "reward_std": 0.2817615866661072, | |
| "rewards/CODEORM/mean": 0.017916668206453323, | |
| "rewards/CODEORM/std": 0.071116141974926, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.5048611164093018, | |
| "rewards/VQAORM/std": 0.5001500844955444, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.003233 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2209.0, | |
| "completions/mean_length": 347.9048767089844, | |
| "completions/min_length": 43.0, | |
| "epoch": 0.6612903225806451, | |
| "grad_norm": 0.2448212519488402, | |
| "kl": 0.007769775390625, | |
| "learning_rate": 4.5930920996502315e-07, | |
| "loss": 7.772096432745457e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9471527338027954, | |
| "reward_std": 0.2524837851524353, | |
| "rewards/CODEORM/mean": 0.012083334848284721, | |
| "rewards/CODEORM/std": 0.05900347977876663, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280591055750847, | |
| "rewards/VQAORM/mean": 0.43611112236976624, | |
| "rewards/VQAORM/std": 0.4960736930370331, | |
| "step": 41, | |
| "train_speed(iter/s)": 0.003239 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2924.0, | |
| "completions/mean_length": 363.67987060546875, | |
| "completions/min_length": 47.0, | |
| "epoch": 0.6774193548387096, | |
| "grad_norm": 0.22163591343338251, | |
| "kl": 0.0093994140625, | |
| "learning_rate": 4.5702882373436314e-07, | |
| "loss": 9.394727385370061e-05, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9790278077125549, | |
| "reward_std": 0.2052461951971054, | |
| "rewards/CODEORM/mean": 0.016875000670552254, | |
| "rewards/CODEORM/std": 0.06914517283439636, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.01317615807056427, | |
| "rewards/VQAORM/mean": 0.4625000059604645, | |
| "rewards/VQAORM/std": 0.4987649917602539, | |
| "step": 42, | |
| "train_speed(iter/s)": 0.003243 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2287.0, | |
| "completions/mean_length": 374.9638977050781, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.6935483870967742, | |
| "grad_norm": 0.2311530259898522, | |
| "kl": 0.01051025390625, | |
| "learning_rate": 4.546929897606409e-07, | |
| "loss": 0.00010506305989110842, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9784722328186035, | |
| "reward_std": 0.2506452798843384, | |
| "rewards/CODEORM/mean": 0.01666666753590107, | |
| "rewards/CODEORM/std": 0.0687422975897789, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.4625000059604645, | |
| "rewards/VQAORM/std": 0.4987649917602539, | |
| "step": 43, | |
| "train_speed(iter/s)": 0.00325 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3019.0, | |
| "completions/mean_length": 345.5111083984375, | |
| "completions/min_length": 41.0, | |
| "epoch": 0.7096774193548387, | |
| "grad_norm": 0.25687083497515134, | |
| "kl": 0.0111328125, | |
| "learning_rate": 4.5230241956151236e-07, | |
| "loss": 0.00011140214337501675, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.019861102104187, | |
| "reward_std": 0.24551907181739807, | |
| "rewards/CODEORM/mean": 0.017083335667848587, | |
| "rewards/CODEORM/std": 0.06954508274793625, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.5, | |
| "rewards/FMTORM/std": 0.0, | |
| "rewards/VQAORM/mean": 0.5027778148651123, | |
| "rewards/VQAORM/std": 0.5001659989356995, | |
| "step": 44, | |
| "train_speed(iter/s)": 0.00325 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2333.0, | |
| "completions/mean_length": 367.42779541015625, | |
| "completions/min_length": 50.0, | |
| "epoch": 0.7258064516129032, | |
| "grad_norm": 0.237050640265248, | |
| "kl": 0.010888671875, | |
| "learning_rate": 4.4985784132781846e-07, | |
| "loss": 0.00010892213322222233, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9680556058883667, | |
| "reward_std": 0.24429023265838623, | |
| "rewards/CODEORM/mean": 0.01666666753590107, | |
| "rewards/CODEORM/std": 0.0687422975897789, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.4520833492279053, | |
| "rewards/VQAORM/std": 0.49787160754203796, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.003256 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2553.0, | |
| "completions/mean_length": 440.5652770996094, | |
| "completions/min_length": 45.0, | |
| "epoch": 0.7419354838709677, | |
| "grad_norm": 0.24164999330022277, | |
| "kl": 0.01668701171875, | |
| "learning_rate": 4.4735999970177e-07, | |
| "loss": 0.00016686106391716748, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0237499475479126, | |
| "reward_std": 0.27759116888046265, | |
| "rewards/CODEORM/mean": 0.0133333345875144, | |
| "rewards/CODEORM/std": 0.061845600605010986, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.5111111402511597, | |
| "rewards/VQAORM/std": 0.5000501871109009, | |
| "step": 46, | |
| "train_speed(iter/s)": 0.003256 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2202.0, | |
| "completions/mean_length": 405.51458740234375, | |
| "completions/min_length": 43.0, | |
| "epoch": 0.7580645161290323, | |
| "grad_norm": 0.25800115560395864, | |
| "kl": 0.01693115234375, | |
| "learning_rate": 4.4480965555012375e-07, | |
| "loss": 0.00016923561634030193, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0121527910232544, | |
| "reward_std": 0.2747058570384979, | |
| "rewards/CODEORM/mean": 0.01770833320915699, | |
| "rewards/CODEORM/std": 0.07072756439447403, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.49513891339302063, | |
| "rewards/VQAORM/std": 0.5001500844955444, | |
| "step": 47, | |
| "train_speed(iter/s)": 0.003258 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 10708.0, | |
| "completions/mean_length": 374.5027770996094, | |
| "completions/min_length": 45.0, | |
| "epoch": 0.7741935483870968, | |
| "grad_norm": 0.2341781405037742, | |
| "kl": 0.01322021484375, | |
| "learning_rate": 4.422075857324137e-07, | |
| "loss": 0.00013221960398368537, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9981943964958191, | |
| "reward_std": 0.23682290315628052, | |
| "rewards/CODEORM/mean": 0.015208334662020206, | |
| "rewards/CODEORM/std": 0.06583476066589355, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.48402780294418335, | |
| "rewards/VQAORM/std": 0.49991846084594727, | |
| "step": 48, | |
| "train_speed(iter/s)": 0.003224 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2973.0, | |
| "completions/mean_length": 388.96319580078125, | |
| "completions/min_length": 41.0, | |
| "epoch": 0.7903225806451613, | |
| "grad_norm": 0.23494838565416404, | |
| "kl": 0.01656494140625, | |
| "learning_rate": 4.3955458286431336e-07, | |
| "loss": 0.00016603765834588557, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9825693964958191, | |
| "reward_std": 0.25734853744506836, | |
| "rewards/CODEORM/mean": 0.015208334662020206, | |
| "rewards/CODEORM/std": 0.06583476811647415, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.026324832811951637, | |
| "rewards/VQAORM/mean": 0.46875, | |
| "rewards/VQAORM/std": 0.49919581413269043, | |
| "step": 49, | |
| "train_speed(iter/s)": 0.003224 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3018.0, | |
| "completions/mean_length": 397.47430419921875, | |
| "completions/min_length": 53.0, | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 0.22328454872730055, | |
| "kl": 0.01668701171875, | |
| "learning_rate": 4.3685145507619647e-07, | |
| "loss": 0.0001667440083110705, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9420834183692932, | |
| "reward_std": 0.23709836602210999, | |
| "rewards/CODEORM/mean": 0.010833334177732468, | |
| "rewards/CODEORM/std": 0.055989425629377365, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.018627425655722618, | |
| "rewards/VQAORM/mean": 0.43194445967674255, | |
| "rewards/VQAORM/std": 0.49551889300346375, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.003226 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2100.0, | |
| "completions/mean_length": 375.0187683105469, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.8225806451612904, | |
| "grad_norm": 0.2476536462434363, | |
| "kl": 0.01484375, | |
| "learning_rate": 4.3409902576697315e-07, | |
| "loss": 0.0001483853702666238, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9879860877990723, | |
| "reward_std": 0.23200082778930664, | |
| "rewards/CODEORM/mean": 0.01229166705161333, | |
| "rewards/CODEORM/std": 0.05948842316865921, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.026324830949306488, | |
| "rewards/VQAORM/mean": 0.47708335518836975, | |
| "rewards/VQAORM/std": 0.4996480941772461, | |
| "step": 51, | |
| "train_speed(iter/s)": 0.003218 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2223.0, | |
| "completions/mean_length": 437.4423828125, | |
| "completions/min_length": 49.0, | |
| "epoch": 0.8387096774193549, | |
| "grad_norm": 0.2231399513016873, | |
| "kl": 0.021875, | |
| "learning_rate": 4.3129813335327437e-07, | |
| "loss": 0.000219002366065979, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.004652738571167, | |
| "reward_std": 0.22962474822998047, | |
| "rewards/CODEORM/mean": 0.01229166705161333, | |
| "rewards/CODEORM/std": 0.05948842689394951, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49791666865348816, | |
| "rewards/FMTORM/std": 0.03221873939037323, | |
| "rewards/VQAORM/mean": 0.49444445967674255, | |
| "rewards/VQAORM/std": 0.5001428127288818, | |
| "step": 52, | |
| "train_speed(iter/s)": 0.003217 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2176.0, | |
| "completions/mean_length": 407.48541259765625, | |
| "completions/min_length": 54.0, | |
| "epoch": 0.8548387096774194, | |
| "grad_norm": 0.22842893432608774, | |
| "kl": 0.020361328125, | |
| "learning_rate": 4.2844963101406216e-07, | |
| "loss": 0.00020356160530354828, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9823611378669739, | |
| "reward_std": 0.2580673396587372, | |
| "rewards/CODEORM/mean": 0.009791667573153973, | |
| "rewards/CODEORM/std": 0.05332539603114128, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.47291669249534607, | |
| "rewards/VQAORM/std": 0.49943938851356506, | |
| "step": 53, | |
| "train_speed(iter/s)": 0.003195 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 9521.0, | |
| "completions/mean_length": 426.1222229003906, | |
| "completions/min_length": 61.0, | |
| "epoch": 0.8709677419354839, | |
| "grad_norm": 0.2174558142577718, | |
| "kl": 0.0199951171875, | |
| "learning_rate": 4.255543864307431e-07, | |
| "loss": 0.00020004776888526976, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9699999690055847, | |
| "reward_std": 0.24552029371261597, | |
| "rewards/CODEORM/mean": 0.008541667833924294, | |
| "rewards/CODEORM/std": 0.049912624061107635, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4625000059604645, | |
| "rewards/VQAORM/std": 0.4987649917602539, | |
| "step": 54, | |
| "train_speed(iter/s)": 0.003179 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2597.0, | |
| "completions/mean_length": 480.8597412109375, | |
| "completions/min_length": 63.0, | |
| "epoch": 0.8870967741935484, | |
| "grad_norm": 0.23478429409929014, | |
| "kl": 0.0271484375, | |
| "learning_rate": 4.226132815228641e-07, | |
| "loss": 0.0002712236891966313, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9563194513320923, | |
| "reward_std": 0.26775386929512024, | |
| "rewards/CODEORM/mean": 0.007708333898335695, | |
| "rewards/CODEORM/std": 0.04748312756419182, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.02632482908666134, | |
| "rewards/VQAORM/mean": 0.45000001788139343, | |
| "rewards/VQAORM/std": 0.49766653776168823, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.00318 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2971.0, | |
| "completions/mean_length": 435.2521057128906, | |
| "completions/min_length": 52.0, | |
| "epoch": 0.9032258064516129, | |
| "grad_norm": 0.22207675380665862, | |
| "kl": 0.0236572265625, | |
| "learning_rate": 4.1962721217947136e-07, | |
| "loss": 0.00023679251899011433, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9582638740539551, | |
| "reward_std": 0.2368679940700531, | |
| "rewards/CODEORM/mean": 0.006875000428408384, | |
| "rewards/CODEORM/std": 0.04490695893764496, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49791666865348816, | |
| "rewards/FMTORM/std": 0.03221874311566353, | |
| "rewards/VQAORM/mean": 0.45347222685813904, | |
| "rewards/VQAORM/std": 0.49800342321395874, | |
| "step": 56, | |
| "train_speed(iter/s)": 0.003181 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2578.0, | |
| "completions/mean_length": 446.1222229003906, | |
| "completions/min_length": 47.0, | |
| "epoch": 0.9193548387096774, | |
| "grad_norm": 0.23718890954848998, | |
| "kl": 0.0261474609375, | |
| "learning_rate": 4.165970879862134e-07, | |
| "loss": 0.0002614260884001851, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9674999713897705, | |
| "reward_std": 0.25433528423309326, | |
| "rewards/CODEORM/mean": 0.006041666492819786, | |
| "rewards/CODEORM/std": 0.04215723276138306, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4625000059604645, | |
| "rewards/VQAORM/std": 0.4987649917602539, | |
| "step": 57, | |
| "train_speed(iter/s)": 0.003184 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3103.0, | |
| "completions/mean_length": 439.11181640625, | |
| "completions/min_length": 52.0, | |
| "epoch": 0.9354838709677419, | |
| "grad_norm": 0.20743082364096332, | |
| "kl": 0.0252685546875, | |
| "learning_rate": 4.1352383194827306e-07, | |
| "loss": 0.00025291036581620574, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9715972542762756, | |
| "reward_std": 0.2394816130399704, | |
| "rewards/CODEORM/mean": 0.007708333898335695, | |
| "rewards/CODEORM/std": 0.04748312756419182, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49722224473953247, | |
| "rewards/FMTORM/std": 0.03717704117298126, | |
| "rewards/VQAORM/mean": 0.46666666865348816, | |
| "rewards/VQAORM/std": 0.4990609884262085, | |
| "step": 58, | |
| "train_speed(iter/s)": 0.003184 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3248.0, | |
| "completions/mean_length": 433.08404541015625, | |
| "completions/min_length": 55.0, | |
| "epoch": 0.9516129032258065, | |
| "grad_norm": 0.2068345779016102, | |
| "kl": 0.021630859375, | |
| "learning_rate": 4.1040838020921087e-07, | |
| "loss": 0.00021632216521538794, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9704861640930176, | |
| "reward_std": 0.21852527558803558, | |
| "rewards/CODEORM/mean": 0.0062500000931322575, | |
| "rewards/CODEORM/std": 0.042862728238105774, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4652777910232544, | |
| "rewards/VQAORM/std": 0.49896618723869324, | |
| "step": 59, | |
| "train_speed(iter/s)": 0.003185 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2551.0, | |
| "completions/mean_length": 426.58404541015625, | |
| "completions/min_length": 45.0, | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 0.2064093927604063, | |
| "kl": 0.022265625, | |
| "learning_rate": 4.072516817658064e-07, | |
| "loss": 0.00022273208014667034, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9838889241218567, | |
| "reward_std": 0.22381478548049927, | |
| "rewards/CODEORM/mean": 0.006458332762122154, | |
| "rewards/CODEORM/std": 0.04355579614639282, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4975694417953491, | |
| "rewards/FMTORM/std": 0.03478807955980301, | |
| "rewards/VQAORM/mean": 0.4798611104488373, | |
| "rewards/VQAORM/std": 0.49976783990859985, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3516.0, | |
| "completions/mean_length": 447.4507141113281, | |
| "completions/min_length": 59.0, | |
| "epoch": 0.9838709677419355, | |
| "grad_norm": 0.2458563564310446, | |
| "kl": 0.02421875, | |
| "learning_rate": 4.0405469817898533e-07, | |
| "loss": 0.00024220610794145614, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9372223019599915, | |
| "reward_std": 0.25408098101615906, | |
| "rewards/CODEORM/mean": 0.004583333153277636, | |
| "rewards/CODEORM/std": 0.03680942580103874, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4333333373069763, | |
| "rewards/VQAORM/std": 0.49570778012275696, | |
| "step": 61, | |
| "train_speed(iter/s)": 0.003159 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2159.0, | |
| "completions/mean_length": 462.3416748046875, | |
| "completions/min_length": 40.0, | |
| "epoch": 1.0, | |
| "grad_norm": 0.22387971484005992, | |
| "kl": 0.0287353515625, | |
| "learning_rate": 4.00818403280918e-07, | |
| "loss": 0.0002869199379347265, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0059027671813965, | |
| "reward_std": 0.26113608479499817, | |
| "rewards/CODEORM/mean": 0.0052083334885537624, | |
| "rewards/CODEORM/std": 0.03919745236635208, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.02632482908666134, | |
| "rewards/VQAORM/mean": 0.5020833611488342, | |
| "rewards/VQAORM/std": 0.5001693964004517, | |
| "step": 62, | |
| "train_speed(iter/s)": 0.003159 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2093.0, | |
| "completions/mean_length": 459.2437744140625, | |
| "completions/min_length": 51.0, | |
| "epoch": 1.0161290322580645, | |
| "grad_norm": 0.2176432920608644, | |
| "kl": 0.027392578125, | |
| "learning_rate": 3.9754378287838106e-07, | |
| "loss": 0.0002737568283919245, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9327083230018616, | |
| "reward_std": 0.22113071382045746, | |
| "rewards/CODEORM/mean": 0.003541666781529784, | |
| "rewards/CODEORM/std": 0.03241429105401039, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4298611283302307, | |
| "rewards/VQAORM/std": 0.4952280521392822, | |
| "step": 63, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3442.0, | |
| "completions/mean_length": 413.9354248046875, | |
| "completions/min_length": 53.0, | |
| "epoch": 1.032258064516129, | |
| "grad_norm": 0.21234850263649102, | |
| "kl": 0.0212158203125, | |
| "learning_rate": 3.942318344524711e-07, | |
| "loss": 0.00021199470211286098, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9656250476837158, | |
| "reward_std": 0.21792437136173248, | |
| "rewards/CODEORM/mean": 0.00520833395421505, | |
| "rewards/CODEORM/std": 0.03919744864106178, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.026324830949306488, | |
| "rewards/VQAORM/mean": 0.4618055522441864, | |
| "rewards/VQAORM/std": 0.4987122416496277, | |
| "step": 64, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3102.0, | |
| "completions/mean_length": 438.6458435058594, | |
| "completions/min_length": 54.0, | |
| "epoch": 1.0483870967741935, | |
| "grad_norm": 0.2232724953512252, | |
| "kl": 0.0247314453125, | |
| "learning_rate": 3.908835668547622e-07, | |
| "loss": 0.0002475287183187902, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9796527624130249, | |
| "reward_std": 0.23396123945713043, | |
| "rewards/CODEORM/mean": 0.007083333563059568, | |
| "rewards/CODEORM/std": 0.04556608572602272, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.47361111640930176, | |
| "rewards/VQAORM/std": 0.4994766116142273, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.003163 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3270.0, | |
| "completions/mean_length": 499.7298583984375, | |
| "completions/min_length": 56.0, | |
| "epoch": 1.064516129032258, | |
| "grad_norm": 0.23317861529104608, | |
| "kl": 0.032275390625, | |
| "learning_rate": 3.875e-07, | |
| "loss": 0.0003234185860492289, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9625694751739502, | |
| "reward_std": 0.27475544810295105, | |
| "rewards/CODEORM/mean": 0.003541666781529784, | |
| "rewards/CODEORM/std": 0.03241428732872009, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.026324830949306488, | |
| "rewards/VQAORM/mean": 0.46041667461395264, | |
| "rewards/VQAORM/std": 0.49860385060310364, | |
| "step": 66, | |
| "train_speed(iter/s)": 0.003163 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3353.0, | |
| "completions/mean_length": 418.64166259765625, | |
| "completions/min_length": 49.0, | |
| "epoch": 1.0806451612903225, | |
| "grad_norm": 0.23413187500533847, | |
| "kl": 0.0256103515625, | |
| "learning_rate": 3.8408216455542586e-07, | |
| "loss": 0.00025615975027903914, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.022291660308838, | |
| "reward_std": 0.27045774459838867, | |
| "rewards/CODEORM/mean": 0.004583333618938923, | |
| "rewards/CODEORM/std": 0.03680942952632904, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.5180555582046509, | |
| "rewards/VQAORM/std": 0.4998474717140198, | |
| "step": 67, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2800.0, | |
| "completions/mean_length": 415.0632019042969, | |
| "completions/min_length": 54.0, | |
| "epoch": 1.096774193548387, | |
| "grad_norm": 0.2280696197690621, | |
| "kl": 0.0226318359375, | |
| "learning_rate": 3.8063110162682536e-07, | |
| "loss": 0.00022613290639128536, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9749305248260498, | |
| "reward_std": 0.23834767937660217, | |
| "rewards/CODEORM/mean": 0.0037500003818422556, | |
| "rewards/CODEORM/std": 0.033342309296131134, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.01317615620791912, | |
| "rewards/VQAORM/mean": 0.4715277850627899, | |
| "rewards/VQAORM/std": 0.4993620812892914, | |
| "step": 68, | |
| "train_speed(iter/s)": 0.003163 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3211.0, | |
| "completions/mean_length": 476.21875, | |
| "completions/min_length": 40.0, | |
| "epoch": 1.1129032258064515, | |
| "grad_norm": 0.25556590033515914, | |
| "kl": 0.0304443359375, | |
| "learning_rate": 3.77147862441398e-07, | |
| "loss": 0.0003040857263840735, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0324305295944214, | |
| "reward_std": 0.2670601010322571, | |
| "rewards/CODEORM/mean": 0.003958333283662796, | |
| "rewards/CODEORM/std": 0.03424391895532608, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.5291666984558105, | |
| "rewards/VQAORM/std": 0.49932199716567993, | |
| "step": 69, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2698.0, | |
| "completions/mean_length": 457.7632141113281, | |
| "completions/min_length": 46.0, | |
| "epoch": 1.129032258064516, | |
| "grad_norm": 0.22078914084547285, | |
| "kl": 0.026123046875, | |
| "learning_rate": 3.736335080275424e-07, | |
| "loss": 0.0002610915689729154, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.988194465637207, | |
| "reward_std": 0.22356846928596497, | |
| "rewards/CODEORM/mean": 0.0052083334885537624, | |
| "rewards/CODEORM/std": 0.03919745236635208, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.48402780294418335, | |
| "rewards/VQAORM/std": 0.4999184310436249, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3641.0, | |
| "completions/mean_length": 471.3034973144531, | |
| "completions/min_length": 54.0, | |
| "epoch": 1.1451612903225807, | |
| "grad_norm": 0.22352257730187777, | |
| "kl": 0.0291015625, | |
| "learning_rate": 3.7008910889165734e-07, | |
| "loss": 0.00029054959304630756, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9690278172492981, | |
| "reward_std": 0.23872590065002441, | |
| "rewards/CODEORM/mean": 0.002708333544433117, | |
| "rewards/CODEORM/std": 0.028385289013385773, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.46666666865348816, | |
| "rewards/VQAORM/std": 0.4990609884262085, | |
| "step": 71, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2802.0, | |
| "completions/mean_length": 415.3722229003906, | |
| "completions/min_length": 60.0, | |
| "epoch": 1.1612903225806452, | |
| "grad_norm": 0.24022424561360972, | |
| "kl": 0.0269287109375, | |
| "learning_rate": 3.6651574469205503e-07, | |
| "loss": 0.0002691347326617688, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0179860591888428, | |
| "reward_std": 0.2555491328239441, | |
| "rewards/CODEORM/mean": 0.004791666753590107, | |
| "rewards/CODEORM/std": 0.037623435258865356, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.5138888955116272, | |
| "rewards/VQAORM/std": 0.4999806880950928, | |
| "step": 72, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2134.0, | |
| "completions/mean_length": 447.7007141113281, | |
| "completions/min_length": 52.0, | |
| "epoch": 1.1774193548387097, | |
| "grad_norm": 0.20648518009940414, | |
| "kl": 0.03193359375, | |
| "learning_rate": 3.6291450391008654e-07, | |
| "loss": 0.0003187285328749567, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.003819465637207, | |
| "reward_std": 0.19375617802143097, | |
| "rewards/CODEORM/mean": 0.0020833334419876337, | |
| "rewards/CODEORM/std": 0.02492169663310051, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.5020833611488342, | |
| "rewards/VQAORM/std": 0.5001693367958069, | |
| "step": 73, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2162.0, | |
| "completions/mean_length": 438.4805603027344, | |
| "completions/min_length": 49.0, | |
| "epoch": 1.1935483870967742, | |
| "grad_norm": 0.2063264046724451, | |
| "kl": 0.0267333984375, | |
| "learning_rate": 3.592864835185802e-07, | |
| "loss": 0.00026784028159454465, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9955556392669678, | |
| "reward_std": 0.22626902163028717, | |
| "rewards/CODEORM/mean": 0.002500000176951289, | |
| "rewards/CODEORM/std": 0.027281256392598152, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.5, | |
| "rewards/FMTORM/std": 0.0, | |
| "rewards/VQAORM/mean": 0.4930555820465088, | |
| "rewards/VQAORM/std": 0.5001254677772522, | |
| "step": 74, | |
| "train_speed(iter/s)": 0.003162 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2497.0, | |
| "completions/mean_length": 390.0340270996094, | |
| "completions/min_length": 44.0, | |
| "epoch": 1.2096774193548387, | |
| "grad_norm": 0.2377329072915874, | |
| "kl": 0.0220458984375, | |
| "learning_rate": 3.556327886476925e-07, | |
| "loss": 0.00022035164874978364, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9577084183692932, | |
| "reward_std": 0.22338639199733734, | |
| "rewards/CODEORM/mean": 0.002500000409781933, | |
| "rewards/CODEORM/std": 0.027281254529953003, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.01317615807056427, | |
| "rewards/VQAORM/mean": 0.4555555582046509, | |
| "rewards/VQAORM/std": 0.49819377064704895, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.003161 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2510.0, | |
| "completions/mean_length": 432.8798828125, | |
| "completions/min_length": 59.0, | |
| "epoch": 1.2258064516129032, | |
| "grad_norm": 0.2016163479831072, | |
| "kl": 0.0228759765625, | |
| "learning_rate": 3.5195453224827547e-07, | |
| "loss": 0.0002286717208335176, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9972222447395325, | |
| "reward_std": 0.2204340249300003, | |
| "rewards/CODEORM/mean": 0.0031250000465661287, | |
| "rewards/CODEORM/std": 0.030469313263893127, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.49513891339302063, | |
| "rewards/VQAORM/std": 0.5001500248908997, | |
| "step": 76, | |
| "train_speed(iter/s)": 0.00316 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2529.0, | |
| "completions/mean_length": 454.6923828125, | |
| "completions/min_length": 48.0, | |
| "epoch": 1.2419354838709677, | |
| "grad_norm": 0.22659833554073525, | |
| "kl": 0.0289794921875, | |
| "learning_rate": 3.482528347528602e-07, | |
| "loss": 0.00029045954579487443, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9425694942474365, | |
| "reward_std": 0.24315796792507172, | |
| "rewards/CODEORM/mean": 0.0012500000884756446, | |
| "rewards/CODEORM/std": 0.019331244751811028, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.44236111640930176, | |
| "rewards/VQAORM/std": 0.4968391954898834, | |
| "step": 77, | |
| "train_speed(iter/s)": 0.003121 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2246.0, | |
| "completions/mean_length": 443.509033203125, | |
| "completions/min_length": 39.0, | |
| "epoch": 1.2580645161290323, | |
| "grad_norm": 0.23270608066363938, | |
| "kl": 0.026318359375, | |
| "learning_rate": 3.4452882373436316e-07, | |
| "loss": 0.0002637436846271157, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9564583897590637, | |
| "reward_std": 0.2442195564508438, | |
| "rewards/CODEORM/mean": 0.0012500000884756446, | |
| "rewards/CODEORM/std": 0.019331244751811028, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.01317615620791912, | |
| "rewards/VQAORM/mean": 0.4555555582046509, | |
| "rewards/VQAORM/std": 0.49819380044937134, | |
| "step": 78, | |
| "train_speed(iter/s)": 0.003123 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2990.0, | |
| "completions/mean_length": 433.7701416015625, | |
| "completions/min_length": 53.0, | |
| "epoch": 1.2741935483870968, | |
| "grad_norm": 0.23460258143973778, | |
| "kl": 0.0271728515625, | |
| "learning_rate": 3.4078363356261576e-07, | |
| "loss": 0.0002716649032663554, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.957777738571167, | |
| "reward_std": 0.22971472144126892, | |
| "rewards/CODEORM/mean": 0.001875000074505806, | |
| "rewards/CODEORM/std": 0.023651065304875374, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.45694446563720703, | |
| "rewards/VQAORM/std": 0.49831584095954895, | |
| "step": 79, | |
| "train_speed(iter/s)": 0.003124 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 6626.0, | |
| "completions/mean_length": 467.6298828125, | |
| "completions/min_length": 60.0, | |
| "epoch": 1.2903225806451613, | |
| "grad_norm": 0.2203448340379557, | |
| "kl": 0.028564453125, | |
| "learning_rate": 3.3701840505882476e-07, | |
| "loss": 0.0002859021187759936, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9722917675971985, | |
| "reward_std": 0.23510144650936127, | |
| "rewards/CODEORM/mean": 0.0014583334559574723, | |
| "rewards/CODEORM/std": 0.020872849971055984, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.026324830949306488, | |
| "rewards/VQAORM/mean": 0.472222238779068, | |
| "rewards/VQAORM/std": 0.4994012117385864, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.003118 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2564.0, | |
| "completions/mean_length": 418.5590515136719, | |
| "completions/min_length": 52.0, | |
| "epoch": 1.3064516129032258, | |
| "grad_norm": 0.2324164062331274, | |
| "kl": 0.026513671875, | |
| "learning_rate": 3.332342851480672e-07, | |
| "loss": 0.00026555670774541795, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0112500190734863, | |
| "reward_std": 0.22868500649929047, | |
| "rewards/CODEORM/mean": 0.001875000074505806, | |
| "rewards/CODEORM/std": 0.023651065304875374, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.5104166865348816, | |
| "rewards/VQAORM/std": 0.5000651478767395, | |
| "step": 81, | |
| "train_speed(iter/s)": 0.003119 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3178.0, | |
| "completions/mean_length": 458.19097900390625, | |
| "completions/min_length": 40.0, | |
| "epoch": 1.3225806451612903, | |
| "grad_norm": 0.23312846161895653, | |
| "kl": 0.0256103515625, | |
| "learning_rate": 3.294324265099252e-07, | |
| "loss": 0.0002559431886766106, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9519445300102234, | |
| "reward_std": 0.23361359536647797, | |
| "rewards/CODEORM/mean": 0.0012500000884756446, | |
| "rewards/CODEORM/std": 0.019331244751811028, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4513888955116272, | |
| "rewards/VQAORM/std": 0.4978042244911194, | |
| "step": 82, | |
| "train_speed(iter/s)": 0.003119 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2874.0, | |
| "completions/mean_length": 389.6625061035156, | |
| "completions/min_length": 54.0, | |
| "epoch": 1.3387096774193548, | |
| "grad_norm": 0.21650063881691448, | |
| "kl": 0.0203857421875, | |
| "learning_rate": 3.256139872273696e-07, | |
| "loss": 0.00020405216491781175, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9847917556762695, | |
| "reward_std": 0.2361350655555725, | |
| "rewards/CODEORM/mean": 0.002500000176951289, | |
| "rewards/CODEORM/std": 0.027281256392598152, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.4833333492279053, | |
| "rewards/VQAORM/std": 0.49989575147628784, | |
| "step": 83, | |
| "train_speed(iter/s)": 0.003118 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3981.0, | |
| "completions/mean_length": 438.977783203125, | |
| "completions/min_length": 64.0, | |
| "epoch": 1.3548387096774195, | |
| "grad_norm": 0.2017820781256394, | |
| "kl": 0.025146484375, | |
| "learning_rate": 3.217801304339958e-07, | |
| "loss": 0.0002516305830795318, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9712501168251038, | |
| "reward_std": 0.23558129370212555, | |
| "rewards/CODEORM/mean": 0.0014583334559574723, | |
| "rewards/CODEORM/std": 0.020872849971055984, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.47083333134651184, | |
| "rewards/VQAORM/std": 0.49932199716567993, | |
| "step": 84, | |
| "train_speed(iter/s)": 0.003115 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3085.0, | |
| "completions/mean_length": 471.2632141113281, | |
| "completions/min_length": 40.0, | |
| "epoch": 1.370967741935484, | |
| "grad_norm": 0.23160273619017643, | |
| "kl": 0.027685546875, | |
| "learning_rate": 3.1793202395972255e-07, | |
| "loss": 0.000276345235761255, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9824305176734924, | |
| "reward_std": 0.2479579597711563, | |
| "rewards/CODEORM/mean": 0.00083333341171965, | |
| "rewards/CODEORM/std": 0.015794897451996803, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4982638955116272, | |
| "rewards/FMTORM/std": 0.02942180261015892, | |
| "rewards/VQAORM/mean": 0.4833333492279053, | |
| "rewards/VQAORM/std": 0.49989572167396545, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.003114 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3080.0, | |
| "completions/mean_length": 438.87640380859375, | |
| "completions/min_length": 61.0, | |
| "epoch": 1.3870967741935485, | |
| "grad_norm": 0.194152810560773, | |
| "kl": 0.0279052734375, | |
| "learning_rate": 3.1407083997505936e-07, | |
| "loss": 0.0002790595463011414, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9933332800865173, | |
| "reward_std": 0.1885756254196167, | |
| "rewards/CODEORM/mean": 0.0016666668234393, | |
| "rewards/CODEORM/std": 0.022306226193904877, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.02632482908666134, | |
| "rewards/VQAORM/mean": 0.4930555820465088, | |
| "rewards/VQAORM/std": 0.5001254677772522, | |
| "step": 86, | |
| "train_speed(iter/s)": 0.003113 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 1936.0, | |
| "completions/mean_length": 384.3055725097656, | |
| "completions/min_length": 45.0, | |
| "epoch": 1.403225806451613, | |
| "grad_norm": 0.24459951745327296, | |
| "kl": 0.0214599609375, | |
| "learning_rate": 3.1019775463405193e-07, | |
| "loss": 0.0002145486796507612, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9838194251060486, | |
| "reward_std": 0.25452497601509094, | |
| "rewards/CODEORM/mean": 0.001875000074505806, | |
| "rewards/CODEORM/std": 0.023651065304875374, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4826388955116272, | |
| "rewards/VQAORM/std": 0.4998720586299896, | |
| "step": 87, | |
| "train_speed(iter/s)": 0.003116 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2332.0, | |
| "completions/mean_length": 427.3319396972656, | |
| "completions/min_length": 46.0, | |
| "epoch": 1.4193548387096775, | |
| "grad_norm": 0.22526758870168345, | |
| "kl": 0.026171875, | |
| "learning_rate": 3.063139477160147e-07, | |
| "loss": 0.00026203077868558466, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0167361497879028, | |
| "reward_std": 0.27220967411994934, | |
| "rewards/CODEORM/mean": 0.002500000176951289, | |
| "rewards/CODEORM/std": 0.027281256392598152, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.5145833492279053, | |
| "rewards/VQAORM/std": 0.49996086955070496, | |
| "step": 88, | |
| "train_speed(iter/s)": 0.003118 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2392.0, | |
| "completions/mean_length": 386.8798828125, | |
| "completions/min_length": 48.0, | |
| "epoch": 1.435483870967742, | |
| "grad_norm": 0.22614274989113003, | |
| "kl": 0.019384765625, | |
| "learning_rate": 3.024206022661582e-07, | |
| "loss": 0.0001937642809934914, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9733334183692932, | |
| "reward_std": 0.23752889037132263, | |
| "rewards/CODEORM/mean": 0.0014583334559574723, | |
| "rewards/CODEORM/std": 0.020872849971055984, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.47291669249534607, | |
| "rewards/VQAORM/std": 0.49943938851356506, | |
| "step": 89, | |
| "train_speed(iter/s)": 0.003118 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2021.0, | |
| "completions/mean_length": 363.97222900390625, | |
| "completions/min_length": 41.0, | |
| "epoch": 1.4516129032258065, | |
| "grad_norm": 0.2398906373001323, | |
| "kl": 0.0197509765625, | |
| "learning_rate": 2.98518904235222e-07, | |
| "loss": 0.00019763951422646642, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0004167556762695, | |
| "reward_std": 0.23650936782360077, | |
| "rewards/CODEORM/mean": 0.0014583334559574723, | |
| "rewards/CODEORM/std": 0.020872849971055984, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.4993055760860443, | |
| "rewards/VQAORM/std": 0.5001731514930725, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.00312 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2460.0, | |
| "completions/mean_length": 369.35418701171875, | |
| "completions/min_length": 44.0, | |
| "epoch": 1.467741935483871, | |
| "grad_norm": 0.23682693521156525, | |
| "kl": 0.020166015625, | |
| "learning_rate": 2.946100421182231e-07, | |
| "loss": 0.00020152229990344495, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0318750143051147, | |
| "reward_std": 0.24585361778736115, | |
| "rewards/CODEORM/mean": 0.0016666668234393, | |
| "rewards/CODEORM/std": 0.022306226193904877, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.5305555462837219, | |
| "rewards/VQAORM/std": 0.49923887848854065, | |
| "step": 91, | |
| "train_speed(iter/s)": 0.003123 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3620.0, | |
| "completions/mean_length": 414.87432861328125, | |
| "completions/min_length": 58.0, | |
| "epoch": 1.4838709677419355, | |
| "grad_norm": 0.23857713339439449, | |
| "kl": 0.0223388671875, | |
| "learning_rate": 2.906952065924282e-07, | |
| "loss": 0.00022341267322190106, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9906944632530212, | |
| "reward_std": 0.2329772561788559, | |
| "rewards/CODEORM/mean": 0.0014583334559574723, | |
| "rewards/CODEORM/std": 0.020872849971055984, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.4895833432674408, | |
| "rewards/VQAORM/std": 0.5000651478767395, | |
| "step": 92, | |
| "train_speed(iter/s)": 0.003121 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2573.0, | |
| "completions/mean_length": 390.6229248046875, | |
| "completions/min_length": 56.0, | |
| "epoch": 1.5, | |
| "grad_norm": 0.2533005916156193, | |
| "kl": 0.0206298828125, | |
| "learning_rate": 2.867755901546624e-07, | |
| "loss": 0.00020597083494067192, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0013195276260376, | |
| "reward_std": 0.2565329372882843, | |
| "rewards/CODEORM/mean": 0.0006250000442378223, | |
| "rewards/CODEORM/std": 0.013683545403182507, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.02632482908666134, | |
| "rewards/VQAORM/mean": 0.5020833611488342, | |
| "rewards/VQAORM/std": 0.5001693964004517, | |
| "step": 93, | |
| "train_speed(iter/s)": 0.003122 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3188.0, | |
| "completions/mean_length": 376.4840393066406, | |
| "completions/min_length": 55.0, | |
| "epoch": 1.5161290322580645, | |
| "grad_norm": 0.20748704361554995, | |
| "kl": 0.018408203125, | |
| "learning_rate": 2.8285238675806277e-07, | |
| "loss": 0.00018427410395815969, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.932847261428833, | |
| "reward_std": 0.21737442910671234, | |
| "rewards/CODEORM/mean": 0.0012500000884756446, | |
| "rewards/CODEORM/std": 0.01933124288916588, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.43263891339302063, | |
| "rewards/VQAORM/std": 0.49561378359794617, | |
| "step": 94, | |
| "train_speed(iter/s)": 0.003121 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3588.0, | |
| "completions/mean_length": 389.97015380859375, | |
| "completions/min_length": 50.0, | |
| "epoch": 1.532258064516129, | |
| "grad_norm": 0.21959352013259997, | |
| "kl": 0.0175537109375, | |
| "learning_rate": 2.7892679144838883e-07, | |
| "loss": 0.0001756453566486016, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9204861521720886, | |
| "reward_std": 0.2225702553987503, | |
| "rewards/CODEORM/mean": 0.0010416667209938169, | |
| "rewards/CODEORM/std": 0.0176530834287405, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49861112236976624, | |
| "rewards/FMTORM/std": 0.026324830949306488, | |
| "rewards/VQAORM/mean": 0.4208333492279053, | |
| "rewards/VQAORM/std": 0.49386435747146606, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.003119 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2273.0, | |
| "completions/mean_length": 411.7618103027344, | |
| "completions/min_length": 48.0, | |
| "epoch": 1.5483870967741935, | |
| "grad_norm": 0.22487601694634113, | |
| "kl": 0.0216552734375, | |
| "learning_rate": 2.75e-07, | |
| "loss": 0.00021632894640788436, | |
| "memory(GiB)": 72.55, | |
| "reward": 1.0133333206176758, | |
| "reward_std": 0.2392030954360962, | |
| "rewards/CODEORM/mean": 0.001875000074505806, | |
| "rewards/CODEORM/std": 0.023651065304875374, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4989583492279053, | |
| "rewards/FMTORM/std": 0.02280590869486332, | |
| "rewards/VQAORM/mean": 0.512499988079071, | |
| "rewards/VQAORM/std": 0.5000174045562744, | |
| "step": 96, | |
| "train_speed(iter/s)": 0.003118 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2213.0, | |
| "completions/mean_length": 377.1131896972656, | |
| "completions/min_length": 59.0, | |
| "epoch": 1.564516129032258, | |
| "grad_norm": 0.22674694657896516, | |
| "kl": 0.018505859375, | |
| "learning_rate": 2.710732085516112e-07, | |
| "loss": 0.0001852215063991025, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9863194823265076, | |
| "reward_std": 0.23460441827774048, | |
| "rewards/CODEORM/mean": 0.0012500000884756446, | |
| "rewards/CODEORM/std": 0.019331244751811028, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.4854166805744171, | |
| "rewards/VQAORM/std": 0.49996092915534973, | |
| "step": 97, | |
| "train_speed(iter/s)": 0.003119 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2510.0, | |
| "completions/mean_length": 392.0382080078125, | |
| "completions/min_length": 40.0, | |
| "epoch": 1.5806451612903225, | |
| "grad_norm": 0.21844911939638736, | |
| "kl": 0.02021484375, | |
| "learning_rate": 2.6714761324193736e-07, | |
| "loss": 0.00020227438653819263, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.962569534778595, | |
| "reward_std": 0.22012458741664886, | |
| "rewards/CODEORM/mean": 0.0014583334559574723, | |
| "rewards/CODEORM/std": 0.020872848108410835, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.4993055760860443, | |
| "rewards/FMTORM/std": 0.01862742379307747, | |
| "rewards/VQAORM/mean": 0.4618055522441864, | |
| "rewards/VQAORM/std": 0.4987122416496277, | |
| "step": 98, | |
| "train_speed(iter/s)": 0.00312 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 2929.0, | |
| "completions/mean_length": 382.51458740234375, | |
| "completions/min_length": 46.0, | |
| "epoch": 1.596774193548387, | |
| "grad_norm": 0.21209971181854506, | |
| "kl": 0.018798828125, | |
| "learning_rate": 2.632244098453376e-07, | |
| "loss": 0.0001878054899862036, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9566666483879089, | |
| "reward_std": 0.22168388962745667, | |
| "rewards/CODEORM/mean": 0.0014583333395421505, | |
| "rewards/CODEORM/std": 0.020872848108410835, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49965280294418335, | |
| "rewards/FMTORM/std": 0.013176157139241695, | |
| "rewards/VQAORM/mean": 0.4555555582046509, | |
| "rewards/VQAORM/std": 0.49819380044937134, | |
| "step": 99, | |
| "train_speed(iter/s)": 0.003121 | |
| }, | |
| { | |
| "clip_ratio/high_max": 0.0, | |
| "clip_ratio/high_mean": 0.0, | |
| "clip_ratio/low_mean": 0.0, | |
| "clip_ratio/low_min": 0.0, | |
| "clip_ratio/region_mean": 0.0, | |
| "completions/clipped_ratio": 0.0, | |
| "completions/max_length": 3312.0, | |
| "completions/mean_length": 380.2840270996094, | |
| "completions/min_length": 49.0, | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 0.21940988188704383, | |
| "kl": 0.01865234375, | |
| "learning_rate": 2.593047934075718e-07, | |
| "loss": 0.0001866342208813876, | |
| "memory(GiB)": 72.55, | |
| "reward": 0.9885416626930237, | |
| "reward_std": 0.22178694605827332, | |
| "rewards/CODEORM/mean": 0.0010416667209938169, | |
| "rewards/CODEORM/std": 0.0176530834287405, | |
| "rewards/CSTORM/mean": 0.0, | |
| "rewards/CSTORM/std": 0.0, | |
| "rewards/FMTORM/mean": 0.49791666865348816, | |
| "rewards/FMTORM/std": 0.03221873939037323, | |
| "rewards/VQAORM/mean": 0.4895833432674408, | |
| "rewards/VQAORM/std": 0.5000651478767395, | |
| "step": 100, | |
| "train_speed(iter/s)": 0.003122 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 186, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |