| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.402861860209136, | |
| "eval_steps": 500, | |
| "global_step": 40000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00550357732526142, | |
| "grad_norm": 5.828019142150879, | |
| "learning_rate": 6.47007042253521e-06, | |
| "loss": 8.235, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01100715465052284, | |
| "grad_norm": 15.358248710632324, | |
| "learning_rate": 1.3072183098591547e-05, | |
| "loss": 6.0857, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.01651073197578426, | |
| "grad_norm": 7.99273681640625, | |
| "learning_rate": 1.9674295774647885e-05, | |
| "loss": 4.4315, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.02201430930104568, | |
| "grad_norm": 2.8550803661346436, | |
| "learning_rate": 2.6276408450704222e-05, | |
| "loss": 3.681, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0275178866263071, | |
| "grad_norm": 2.3769114017486572, | |
| "learning_rate": 3.2878521126760565e-05, | |
| "loss": 3.4729, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.03302146395156852, | |
| "grad_norm": 5.98811149597168, | |
| "learning_rate": 3.94806338028169e-05, | |
| "loss": 3.3829, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.03852504127682994, | |
| "grad_norm": 3.616163492202759, | |
| "learning_rate": 4.608274647887324e-05, | |
| "loss": 3.4119, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.04402861860209136, | |
| "grad_norm": 2.1746344566345215, | |
| "learning_rate": 5.268485915492957e-05, | |
| "loss": 3.3955, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04953219592735278, | |
| "grad_norm": 1.3189276456832886, | |
| "learning_rate": 5.928697183098591e-05, | |
| "loss": 3.3177, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.0550357732526142, | |
| "grad_norm": 1.2459770441055298, | |
| "learning_rate": 6.588908450704225e-05, | |
| "loss": 3.3095, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.06053935057787562, | |
| "grad_norm": 1.2855902910232544, | |
| "learning_rate": 7.249119718309858e-05, | |
| "loss": 3.2437, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.06604292790313704, | |
| "grad_norm": 1.2871235609054565, | |
| "learning_rate": 7.909330985915493e-05, | |
| "loss": 3.2069, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07154650522839846, | |
| "grad_norm": 1.0846847295761108, | |
| "learning_rate": 8.569542253521127e-05, | |
| "loss": 3.2512, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.07705008255365987, | |
| "grad_norm": 1.5505499839782715, | |
| "learning_rate": 9.22975352112676e-05, | |
| "loss": 3.1704, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.0825536598789213, | |
| "grad_norm": 1.178614616394043, | |
| "learning_rate": 9.889964788732394e-05, | |
| "loss": 3.1747, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.08805723720418272, | |
| "grad_norm": 1.1504727602005005, | |
| "learning_rate": 0.00010550176056338028, | |
| "loss": 3.1241, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.09356081452944413, | |
| "grad_norm": 1.02865469455719, | |
| "learning_rate": 0.00011210387323943662, | |
| "loss": 3.0481, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.09906439185470556, | |
| "grad_norm": 1.1366077661514282, | |
| "learning_rate": 0.00011870598591549295, | |
| "loss": 3.0201, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.10456796917996698, | |
| "grad_norm": 0.9753648042678833, | |
| "learning_rate": 0.00012530809859154929, | |
| "loss": 3.0145, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.1100715465052284, | |
| "grad_norm": 0.6859256625175476, | |
| "learning_rate": 0.00013191021126760563, | |
| "loss": 2.9671, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.11557512383048982, | |
| "grad_norm": 0.8368203043937683, | |
| "learning_rate": 0.00013851232394366197, | |
| "loss": 2.992, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.12107870115575124, | |
| "grad_norm": 1.1109174489974976, | |
| "learning_rate": 0.0001451144366197183, | |
| "loss": 2.967, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.12658227848101267, | |
| "grad_norm": 0.6072912812232971, | |
| "learning_rate": 0.00015171654929577465, | |
| "loss": 2.9501, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.13208585580627408, | |
| "grad_norm": 0.7659889459609985, | |
| "learning_rate": 0.00015831866197183099, | |
| "loss": 2.9026, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.1375894331315355, | |
| "grad_norm": 0.5841110348701477, | |
| "learning_rate": 0.0001649207746478873, | |
| "loss": 2.8972, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.14309301045679693, | |
| "grad_norm": 0.5997458100318909, | |
| "learning_rate": 0.00017152288732394364, | |
| "loss": 2.9189, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.14859658778205834, | |
| "grad_norm": 0.6082264184951782, | |
| "learning_rate": 0.00017812499999999998, | |
| "loss": 2.8877, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.15410016510731975, | |
| "grad_norm": 0.696685254573822, | |
| "learning_rate": 0.00018472711267605632, | |
| "loss": 2.8752, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.15960374243258119, | |
| "grad_norm": 0.6795832514762878, | |
| "learning_rate": 0.00019132922535211266, | |
| "loss": 2.851, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.1651073197578426, | |
| "grad_norm": 0.5588585734367371, | |
| "learning_rate": 0.000197931338028169, | |
| "loss": 2.8546, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.170610897083104, | |
| "grad_norm": 0.5267760157585144, | |
| "learning_rate": 0.00020453345070422534, | |
| "loss": 2.8049, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.17611447440836545, | |
| "grad_norm": 0.591826856136322, | |
| "learning_rate": 0.00021113556338028168, | |
| "loss": 2.8071, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.18161805173362686, | |
| "grad_norm": 0.5463298559188843, | |
| "learning_rate": 0.00021773767605633802, | |
| "loss": 2.7667, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.18712162905888827, | |
| "grad_norm": 0.5745858550071716, | |
| "learning_rate": 0.00022433978873239433, | |
| "loss": 2.7968, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.1926252063841497, | |
| "grad_norm": 0.6239858865737915, | |
| "learning_rate": 0.00023094190140845067, | |
| "loss": 2.8206, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.19812878370941112, | |
| "grad_norm": 0.46496015787124634, | |
| "learning_rate": 0.000237544014084507, | |
| "loss": 2.7944, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.20363236103467253, | |
| "grad_norm": 0.5664075016975403, | |
| "learning_rate": 0.00024414612676056335, | |
| "loss": 2.7837, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.20913593835993396, | |
| "grad_norm": 0.537627100944519, | |
| "learning_rate": 0.0002507482394366197, | |
| "loss": 2.7179, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.21463951568519538, | |
| "grad_norm": 0.544585645198822, | |
| "learning_rate": 0.00025735035211267603, | |
| "loss": 2.7552, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.2201430930104568, | |
| "grad_norm": 0.5067969560623169, | |
| "learning_rate": 0.0002639524647887324, | |
| "loss": 2.7309, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.22564667033571822, | |
| "grad_norm": 0.462003618478775, | |
| "learning_rate": 0.0002705545774647887, | |
| "loss": 2.716, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.23115024766097964, | |
| "grad_norm": 0.44174736738204956, | |
| "learning_rate": 0.00027715669014084505, | |
| "loss": 2.7319, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.23665382498624105, | |
| "grad_norm": 0.5010894536972046, | |
| "learning_rate": 0.0002837588028169014, | |
| "loss": 2.7199, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.24215740231150248, | |
| "grad_norm": 0.44113022089004517, | |
| "learning_rate": 0.00029036091549295773, | |
| "loss": 2.7146, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2476609796367639, | |
| "grad_norm": 0.4972345232963562, | |
| "learning_rate": 0.0002969630281690141, | |
| "loss": 2.6471, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.25316455696202533, | |
| "grad_norm": 0.5614003539085388, | |
| "learning_rate": 0.0002999997102213327, | |
| "loss": 2.687, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.25866813428728674, | |
| "grad_norm": 0.5223066806793213, | |
| "learning_rate": 0.0002999976432184194, | |
| "loss": 2.6279, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.26417171161254815, | |
| "grad_norm": 0.4381965100765228, | |
| "learning_rate": 0.0002999935887402823, | |
| "loss": 2.6889, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.26967528893780957, | |
| "grad_norm": 0.3969985842704773, | |
| "learning_rate": 0.00029998754684064345, | |
| "loss": 2.6565, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.275178866263071, | |
| "grad_norm": 0.44786953926086426, | |
| "learning_rate": 0.00029997951759955823, | |
| "loss": 2.6736, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.28068244358833244, | |
| "grad_norm": 0.3951723873615265, | |
| "learning_rate": 0.0002999695011234145, | |
| "loss": 2.6173, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.28618602091359385, | |
| "grad_norm": 0.4258750081062317, | |
| "learning_rate": 0.00029995749754493093, | |
| "loss": 2.5992, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.29168959823885526, | |
| "grad_norm": 0.41892707347869873, | |
| "learning_rate": 0.0002999435070231555, | |
| "loss": 2.6194, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.2971931755641167, | |
| "grad_norm": 0.4064221978187561, | |
| "learning_rate": 0.0002999275297434632, | |
| "loss": 2.6265, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.3026967528893781, | |
| "grad_norm": 0.3920956254005432, | |
| "learning_rate": 0.00029990956591755365, | |
| "loss": 2.6035, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.3082003302146395, | |
| "grad_norm": 0.366318941116333, | |
| "learning_rate": 0.0002998896157834484, | |
| "loss": 2.6352, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.31370390753990096, | |
| "grad_norm": 0.40867865085601807, | |
| "learning_rate": 0.0002998676796054875, | |
| "loss": 2.5674, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.31920748486516237, | |
| "grad_norm": 0.3685750365257263, | |
| "learning_rate": 0.00029984375767432627, | |
| "loss": 2.5748, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.3247110621904238, | |
| "grad_norm": 0.38134968280792236, | |
| "learning_rate": 0.0002998178503069314, | |
| "loss": 2.5703, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.3302146395156852, | |
| "grad_norm": 0.36195963621139526, | |
| "learning_rate": 0.00029978995784657643, | |
| "loss": 2.5367, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3357182168409466, | |
| "grad_norm": 0.3696858584880829, | |
| "learning_rate": 0.0002997600806628379, | |
| "loss": 2.5693, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.341221794166208, | |
| "grad_norm": 0.3404608964920044, | |
| "learning_rate": 0.00029972821915158964, | |
| "loss": 2.5454, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.3467253714914695, | |
| "grad_norm": 0.33220136165618896, | |
| "learning_rate": 0.0002996943737349981, | |
| "loss": 2.5675, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.3522289488167309, | |
| "grad_norm": 0.3744208812713623, | |
| "learning_rate": 0.00029965854486151643, | |
| "loss": 2.5453, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.3577325261419923, | |
| "grad_norm": 0.36920756101608276, | |
| "learning_rate": 0.0002996207330058788, | |
| "loss": 2.5314, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.3632361034672537, | |
| "grad_norm": 0.36301830410957336, | |
| "learning_rate": 0.00029958093866909403, | |
| "loss": 2.5361, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.3687396807925151, | |
| "grad_norm": 0.3135142922401428, | |
| "learning_rate": 0.00029953916237843853, | |
| "loss": 2.5222, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.37424325811777653, | |
| "grad_norm": 0.3634016513824463, | |
| "learning_rate": 0.00029949540468745015, | |
| "loss": 2.5179, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.379746835443038, | |
| "grad_norm": 0.370914101600647, | |
| "learning_rate": 0.00029944966617592017, | |
| "loss": 2.5077, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.3852504127682994, | |
| "grad_norm": 0.32538744807243347, | |
| "learning_rate": 0.0002994019474498858, | |
| "loss": 2.4873, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.3907539900935608, | |
| "grad_norm": 0.3077157139778137, | |
| "learning_rate": 0.0002993522491416221, | |
| "loss": 2.5149, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.39625756741882223, | |
| "grad_norm": 0.32142704725265503, | |
| "learning_rate": 0.000299300571909634, | |
| "loss": 2.4989, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.40176114474408364, | |
| "grad_norm": 0.3485497832298279, | |
| "learning_rate": 0.00029924691643864684, | |
| "loss": 2.5114, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.40726472206934505, | |
| "grad_norm": 0.34483280777931213, | |
| "learning_rate": 0.0002991912834395981, | |
| "loss": 2.4863, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.4127682993946065, | |
| "grad_norm": 0.30801650881767273, | |
| "learning_rate": 0.00029913367364962733, | |
| "loss": 2.4935, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.41827187671986793, | |
| "grad_norm": 0.3099140524864197, | |
| "learning_rate": 0.00029907408783206674, | |
| "loss": 2.5122, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.42377545404512934, | |
| "grad_norm": 0.33073538541793823, | |
| "learning_rate": 0.0002990125267764309, | |
| "loss": 2.4613, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.42927903137039075, | |
| "grad_norm": 0.3025052845478058, | |
| "learning_rate": 0.00029894899129840653, | |
| "loss": 2.4855, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 0.3200172483921051, | |
| "learning_rate": 0.00029888348223984143, | |
| "loss": 2.4542, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.4402861860209136, | |
| "grad_norm": 0.31750544905662537, | |
| "learning_rate": 0.0002988160004687335, | |
| "loss": 2.4714, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.44578976334617504, | |
| "grad_norm": 0.28732484579086304, | |
| "learning_rate": 0.00029874654687921895, | |
| "loss": 2.4595, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.45129334067143645, | |
| "grad_norm": 0.33312809467315674, | |
| "learning_rate": 0.0002986751223915609, | |
| "loss": 2.4797, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.45679691799669786, | |
| "grad_norm": 0.3109307289123535, | |
| "learning_rate": 0.00029860172795213695, | |
| "loss": 2.4506, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.46230049532195927, | |
| "grad_norm": 0.31127694249153137, | |
| "learning_rate": 0.0002985263645334266, | |
| "loss": 2.4544, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.4678040726472207, | |
| "grad_norm": 0.34132882952690125, | |
| "learning_rate": 0.0002984490331339982, | |
| "loss": 2.455, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.4733076499724821, | |
| "grad_norm": 0.3107958436012268, | |
| "learning_rate": 0.00029836973477849634, | |
| "loss": 2.4733, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.47881122729774356, | |
| "grad_norm": 0.27321889996528625, | |
| "learning_rate": 0.00029828847051762753, | |
| "loss": 2.4381, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.48431480462300497, | |
| "grad_norm": 0.3114171326160431, | |
| "learning_rate": 0.0002982052414281467, | |
| "loss": 2.4612, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.4898183819482664, | |
| "grad_norm": 0.2926501929759979, | |
| "learning_rate": 0.00029812004861284294, | |
| "loss": 2.4364, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.4953219592735278, | |
| "grad_norm": 0.2824021279811859, | |
| "learning_rate": 0.00029803289320052466, | |
| "loss": 2.4453, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.5008255365987893, | |
| "grad_norm": 0.2710123062133789, | |
| "learning_rate": 0.00029794377634600485, | |
| "loss": 2.4207, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.5063291139240507, | |
| "grad_norm": 0.28232479095458984, | |
| "learning_rate": 0.0002978526992300856, | |
| "loss": 2.4332, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.5118326912493121, | |
| "grad_norm": 0.2661692798137665, | |
| "learning_rate": 0.0002977596630595427, | |
| "loss": 2.4469, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.5173362685745735, | |
| "grad_norm": 0.317094087600708, | |
| "learning_rate": 0.0002976646690671094, | |
| "loss": 2.4486, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.5228398458998349, | |
| "grad_norm": 0.268915593624115, | |
| "learning_rate": 0.0002975677185114602, | |
| "loss": 2.4447, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.5283434232250963, | |
| "grad_norm": 0.2714874744415283, | |
| "learning_rate": 0.000297468812677194, | |
| "loss": 2.4065, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.5338470005503577, | |
| "grad_norm": 0.2872399389743805, | |
| "learning_rate": 0.0002973679528748175, | |
| "loss": 2.3943, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.5393505778756191, | |
| "grad_norm": 0.24350598454475403, | |
| "learning_rate": 0.00029726514044072736, | |
| "loss": 2.3888, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.5448541552008805, | |
| "grad_norm": 0.2634688913822174, | |
| "learning_rate": 0.00029716037673719275, | |
| "loss": 2.3953, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.550357732526142, | |
| "grad_norm": 0.2842876613140106, | |
| "learning_rate": 0.0002970536631523373, | |
| "loss": 2.403, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.5558613098514034, | |
| "grad_norm": 0.30130457878112793, | |
| "learning_rate": 0.00029694500110012055, | |
| "loss": 2.4393, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.5613648871766649, | |
| "grad_norm": 0.2594560980796814, | |
| "learning_rate": 0.00029683439202031936, | |
| "loss": 2.3827, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.5668684645019263, | |
| "grad_norm": 0.25417500734329224, | |
| "learning_rate": 0.0002967218373785088, | |
| "loss": 2.3789, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.5723720418271877, | |
| "grad_norm": 0.2533874213695526, | |
| "learning_rate": 0.0002966073386660428, | |
| "loss": 2.4211, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.5778756191524491, | |
| "grad_norm": 0.2343342900276184, | |
| "learning_rate": 0.0002964908974000341, | |
| "loss": 2.4332, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.5833791964777105, | |
| "grad_norm": 0.25453534722328186, | |
| "learning_rate": 0.0002963725151233345, | |
| "loss": 2.3984, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.5888827738029719, | |
| "grad_norm": 0.2670257091522217, | |
| "learning_rate": 0.00029625219340451435, | |
| "loss": 2.4126, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.5943863511282333, | |
| "grad_norm": 0.2728422284126282, | |
| "learning_rate": 0.00029612993383784154, | |
| "loss": 2.4152, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.5998899284534948, | |
| "grad_norm": 0.27964428067207336, | |
| "learning_rate": 0.0002960057380432606, | |
| "loss": 2.3675, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.6053935057787562, | |
| "grad_norm": 0.27354755997657776, | |
| "learning_rate": 0.00029587960766637103, | |
| "loss": 2.4109, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.6108970831040176, | |
| "grad_norm": 0.26175597310066223, | |
| "learning_rate": 0.000295751544378406, | |
| "loss": 2.3953, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.616400660429279, | |
| "grad_norm": 0.2590219974517822, | |
| "learning_rate": 0.0002956215498762093, | |
| "loss": 2.3699, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.6219042377545404, | |
| "grad_norm": 0.2495882660150528, | |
| "learning_rate": 0.0002954896258822139, | |
| "loss": 2.3949, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.6274078150798019, | |
| "grad_norm": 0.26052239537239075, | |
| "learning_rate": 0.0002953557741444183, | |
| "loss": 2.3642, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.6329113924050633, | |
| "grad_norm": 0.2627600431442261, | |
| "learning_rate": 0.0002952199964363638, | |
| "loss": 2.3895, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.6384149697303247, | |
| "grad_norm": 0.2417898327112198, | |
| "learning_rate": 0.00029508229455711086, | |
| "loss": 2.4087, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.6439185470555862, | |
| "grad_norm": 0.24891215562820435, | |
| "learning_rate": 0.00029494267033121525, | |
| "loss": 2.3642, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.6494221243808476, | |
| "grad_norm": 0.2453078180551529, | |
| "learning_rate": 0.0002948011256087041, | |
| "loss": 2.3761, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.654925701706109, | |
| "grad_norm": 0.24607980251312256, | |
| "learning_rate": 0.0002946576622650509, | |
| "loss": 2.3596, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.6604292790313704, | |
| "grad_norm": 0.2540590763092041, | |
| "learning_rate": 0.0002945122822011513, | |
| "loss": 2.3666, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.6659328563566318, | |
| "grad_norm": 0.25918814539909363, | |
| "learning_rate": 0.00029436498734329727, | |
| "loss": 2.3494, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.6714364336818932, | |
| "grad_norm": 0.27554330229759216, | |
| "learning_rate": 0.0002942157796431521, | |
| "loss": 2.3793, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.6769400110071546, | |
| "grad_norm": 0.22879928350448608, | |
| "learning_rate": 0.00029406466107772416, | |
| "loss": 2.3898, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.682443588332416, | |
| "grad_norm": 0.2851306200027466, | |
| "learning_rate": 0.00029391163364934095, | |
| "loss": 2.3493, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.6879471656576774, | |
| "grad_norm": 0.255014568567276, | |
| "learning_rate": 0.0002937566993856225, | |
| "loss": 2.3959, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.693450742982939, | |
| "grad_norm": 0.23992781341075897, | |
| "learning_rate": 0.00029359986033945454, | |
| "loss": 2.3588, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.6989543203082004, | |
| "grad_norm": 0.24902793765068054, | |
| "learning_rate": 0.000293441118588961, | |
| "loss": 2.3908, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.7044578976334618, | |
| "grad_norm": 0.2294873744249344, | |
| "learning_rate": 0.0002932804762374771, | |
| "loss": 2.398, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.7099614749587232, | |
| "grad_norm": 0.23560309410095215, | |
| "learning_rate": 0.00029311793541352075, | |
| "loss": 2.3481, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.7154650522839846, | |
| "grad_norm": 0.27493488788604736, | |
| "learning_rate": 0.00029295349827076497, | |
| "loss": 2.3248, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.720968629609246, | |
| "grad_norm": 0.22731854021549225, | |
| "learning_rate": 0.0002927871669880089, | |
| "loss": 2.3444, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.7264722069345074, | |
| "grad_norm": 0.22198539972305298, | |
| "learning_rate": 0.0002926189437691492, | |
| "loss": 2.3148, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.7319757842597688, | |
| "grad_norm": 0.25049686431884766, | |
| "learning_rate": 0.0002924488308431508, | |
| "loss": 2.3134, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.7374793615850302, | |
| "grad_norm": 0.2354484349489212, | |
| "learning_rate": 0.0002922768304640172, | |
| "loss": 2.348, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.7429829389102917, | |
| "grad_norm": 0.23955915868282318, | |
| "learning_rate": 0.00029210294491076094, | |
| "loss": 2.3172, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.7484865162355531, | |
| "grad_norm": 0.2476130574941635, | |
| "learning_rate": 0.000291927176487373, | |
| "loss": 2.3842, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.7539900935608145, | |
| "grad_norm": 0.2247840166091919, | |
| "learning_rate": 0.00029174952752279254, | |
| "loss": 2.3302, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.759493670886076, | |
| "grad_norm": 0.22530816495418549, | |
| "learning_rate": 0.000291570000370876, | |
| "loss": 2.3707, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.7649972482113374, | |
| "grad_norm": 0.2242722362279892, | |
| "learning_rate": 0.00029138859741036587, | |
| "loss": 2.312, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.7705008255365988, | |
| "grad_norm": 0.23376357555389404, | |
| "learning_rate": 0.0002912053210448592, | |
| "loss": 2.3334, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.7760044028618602, | |
| "grad_norm": 0.23131632804870605, | |
| "learning_rate": 0.0002910201737027757, | |
| "loss": 2.3293, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.7815079801871216, | |
| "grad_norm": 0.21440596878528595, | |
| "learning_rate": 0.0002908331578373256, | |
| "loss": 2.3593, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.787011557512383, | |
| "grad_norm": 0.225584477186203, | |
| "learning_rate": 0.00029064427592647715, | |
| "loss": 2.3464, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.7925151348376445, | |
| "grad_norm": 0.22819140553474426, | |
| "learning_rate": 0.0002904535304729238, | |
| "loss": 2.3145, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.7980187121629059, | |
| "grad_norm": 0.2503032982349396, | |
| "learning_rate": 0.00029026092400405115, | |
| "loss": 2.3376, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.8035222894881673, | |
| "grad_norm": 0.2061757743358612, | |
| "learning_rate": 0.00029006645907190295, | |
| "loss": 2.3252, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.8090258668134287, | |
| "grad_norm": 0.2099982351064682, | |
| "learning_rate": 0.00028987013825314804, | |
| "loss": 2.35, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.8145294441386901, | |
| "grad_norm": 0.212050199508667, | |
| "learning_rate": 0.00028967196414904573, | |
| "loss": 2.3304, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.8200330214639515, | |
| "grad_norm": 0.22283074259757996, | |
| "learning_rate": 0.0002894719393854114, | |
| "loss": 2.3389, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.825536598789213, | |
| "grad_norm": 0.2060365378856659, | |
| "learning_rate": 0.0002892700666125817, | |
| "loss": 2.3215, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.8310401761144744, | |
| "grad_norm": 0.22510799765586853, | |
| "learning_rate": 0.0002890663485053797, | |
| "loss": 2.3295, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.8365437534397359, | |
| "grad_norm": 0.21860332787036896, | |
| "learning_rate": 0.00028886078776307905, | |
| "loss": 2.3286, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.8420473307649973, | |
| "grad_norm": 0.20771068334579468, | |
| "learning_rate": 0.00028865338710936826, | |
| "loss": 2.3143, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.8475509080902587, | |
| "grad_norm": 0.23098550736904144, | |
| "learning_rate": 0.0002884441492923151, | |
| "loss": 2.3583, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.8530544854155201, | |
| "grad_norm": 0.204328715801239, | |
| "learning_rate": 0.00028823307708432963, | |
| "loss": 2.3425, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.8585580627407815, | |
| "grad_norm": 0.20610789954662323, | |
| "learning_rate": 0.0002880201732821275, | |
| "loss": 2.3269, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.8640616400660429, | |
| "grad_norm": 0.22584037482738495, | |
| "learning_rate": 0.0002878054407066935, | |
| "loss": 2.3061, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 0.22317476570606232, | |
| "learning_rate": 0.0002875888822032433, | |
| "loss": 2.3047, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.8750687947165657, | |
| "grad_norm": 0.2132745087146759, | |
| "learning_rate": 0.00028737050064118645, | |
| "loss": 2.2925, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.8805723720418271, | |
| "grad_norm": 0.21960894763469696, | |
| "learning_rate": 0.00028715029891408805, | |
| "loss": 2.3365, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.8860759493670886, | |
| "grad_norm": 0.2113264799118042, | |
| "learning_rate": 0.0002869282799396305, | |
| "loss": 2.2814, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.8915795266923501, | |
| "grad_norm": 0.19959582388401031, | |
| "learning_rate": 0.00028670444665957465, | |
| "loss": 2.2787, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.8970831040176115, | |
| "grad_norm": 0.24770890176296234, | |
| "learning_rate": 0.00028647880203972115, | |
| "loss": 2.3312, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.9025866813428729, | |
| "grad_norm": 0.2229924350976944, | |
| "learning_rate": 0.0002862513490698709, | |
| "loss": 2.2825, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.9080902586681343, | |
| "grad_norm": 0.23118580877780914, | |
| "learning_rate": 0.0002860220907637856, | |
| "loss": 2.2973, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.9135938359933957, | |
| "grad_norm": 0.1978590488433838, | |
| "learning_rate": 0.0002857910301591475, | |
| "loss": 2.3106, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.9190974133186571, | |
| "grad_norm": 0.20972274243831635, | |
| "learning_rate": 0.0002855581703175198, | |
| "loss": 2.3489, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.9246009906439185, | |
| "grad_norm": 0.20369485020637512, | |
| "learning_rate": 0.0002853235143243052, | |
| "loss": 2.3135, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.93010456796918, | |
| "grad_norm": 0.19085292518138885, | |
| "learning_rate": 0.00028508706528870576, | |
| "loss": 2.2969, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.9356081452944414, | |
| "grad_norm": 0.21080902218818665, | |
| "learning_rate": 0.0002848488263436814, | |
| "loss": 2.2979, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.9411117226197028, | |
| "grad_norm": 0.1939728856086731, | |
| "learning_rate": 0.00028460880064590835, | |
| "loss": 2.31, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.9466152999449642, | |
| "grad_norm": 0.23909969627857208, | |
| "learning_rate": 0.0002843669913757375, | |
| "loss": 2.2585, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.9521188772702256, | |
| "grad_norm": 0.22809971868991852, | |
| "learning_rate": 0.000284123401737152, | |
| "loss": 2.3058, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.9576224545954871, | |
| "grad_norm": 0.2066497504711151, | |
| "learning_rate": 0.00028387803495772513, | |
| "loss": 2.2736, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.9631260319207485, | |
| "grad_norm": 0.1921154260635376, | |
| "learning_rate": 0.00028363089428857716, | |
| "loss": 2.2813, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.9686296092460099, | |
| "grad_norm": 0.19423851370811462, | |
| "learning_rate": 0.00028338198300433264, | |
| "loss": 2.314, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.9741331865712713, | |
| "grad_norm": 0.224751815199852, | |
| "learning_rate": 0.0002831313044030768, | |
| "loss": 2.3335, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.9796367638965328, | |
| "grad_norm": 0.21634523570537567, | |
| "learning_rate": 0.00028287886180631183, | |
| "loss": 2.3021, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.9851403412217942, | |
| "grad_norm": 0.19320085644721985, | |
| "learning_rate": 0.00028262465855891306, | |
| "loss": 2.2635, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.9906439185470556, | |
| "grad_norm": 0.20169596374034882, | |
| "learning_rate": 0.00028236869802908453, | |
| "loss": 2.3013, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.996147495872317, | |
| "grad_norm": 0.2045382857322693, | |
| "learning_rate": 0.00028211098360831407, | |
| "loss": 2.2576, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.0016510731975785, | |
| "grad_norm": 0.2171631157398224, | |
| "learning_rate": 0.00028185151871132894, | |
| "loss": 2.3136, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.00715465052284, | |
| "grad_norm": 0.19541560113430023, | |
| "learning_rate": 0.00028159030677605017, | |
| "loss": 2.2547, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.0126582278481013, | |
| "grad_norm": 0.19888882339000702, | |
| "learning_rate": 0.0002813273512635472, | |
| "loss": 2.2275, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.0181618051733627, | |
| "grad_norm": 0.24578270316123962, | |
| "learning_rate": 0.0002810626556579918, | |
| "loss": 2.2775, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 1.0236653824986242, | |
| "grad_norm": 0.19902436435222626, | |
| "learning_rate": 0.00028079622346661217, | |
| "loss": 2.2353, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 1.0291689598238856, | |
| "grad_norm": 0.19234856963157654, | |
| "learning_rate": 0.00028052805821964633, | |
| "loss": 2.2769, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 1.034672537149147, | |
| "grad_norm": 0.19912928342819214, | |
| "learning_rate": 0.0002802581634702952, | |
| "loss": 2.2625, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.0401761144744084, | |
| "grad_norm": 0.1918436884880066, | |
| "learning_rate": 0.00027998654279467604, | |
| "loss": 2.2335, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 1.0456796917996698, | |
| "grad_norm": 0.20422010123729706, | |
| "learning_rate": 0.0002797131997917743, | |
| "loss": 2.3022, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.0511832691249312, | |
| "grad_norm": 0.19621697068214417, | |
| "learning_rate": 0.00027943813808339657, | |
| "loss": 2.2378, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 1.0566868464501926, | |
| "grad_norm": 0.18403683602809906, | |
| "learning_rate": 0.0002791613613141223, | |
| "loss": 2.2502, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.062190423775454, | |
| "grad_norm": 0.2140798568725586, | |
| "learning_rate": 0.0002788828731512556, | |
| "loss": 2.2722, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 1.0676940011007154, | |
| "grad_norm": 0.19459135830402374, | |
| "learning_rate": 0.0002786026772847767, | |
| "loss": 2.2361, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 1.0731975784259769, | |
| "grad_norm": 0.20383061468601227, | |
| "learning_rate": 0.00027832077742729277, | |
| "loss": 2.2471, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 1.0787011557512383, | |
| "grad_norm": 0.1901775300502777, | |
| "learning_rate": 0.0002780371773139891, | |
| "loss": 2.2968, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.0842047330764997, | |
| "grad_norm": 0.20100219547748566, | |
| "learning_rate": 0.0002777518807025793, | |
| "loss": 2.2568, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 1.089708310401761, | |
| "grad_norm": 0.2127520740032196, | |
| "learning_rate": 0.00027746489137325586, | |
| "loss": 2.28, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 1.0952118877270225, | |
| "grad_norm": 0.19059176743030548, | |
| "learning_rate": 0.00027717621312863965, | |
| "loss": 2.284, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 1.100715465052284, | |
| "grad_norm": 0.20845113694667816, | |
| "learning_rate": 0.0002768858497937299, | |
| "loss": 2.2409, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.1062190423775453, | |
| "grad_norm": 0.18969906866550446, | |
| "learning_rate": 0.00027659380521585325, | |
| "loss": 2.2725, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 1.1117226197028067, | |
| "grad_norm": 0.19071218371391296, | |
| "learning_rate": 0.0002763000832646129, | |
| "loss": 2.2624, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 1.1172261970280681, | |
| "grad_norm": 0.21723783016204834, | |
| "learning_rate": 0.0002760046878318373, | |
| "loss": 2.2453, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 1.1227297743533298, | |
| "grad_norm": 0.19336852431297302, | |
| "learning_rate": 0.00027570762283152874, | |
| "loss": 2.2322, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.1282333516785912, | |
| "grad_norm": 0.19500704109668732, | |
| "learning_rate": 0.0002754088921998112, | |
| "loss": 2.1858, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 1.1337369290038526, | |
| "grad_norm": 0.19920021295547485, | |
| "learning_rate": 0.0002751084998948784, | |
| "loss": 2.2286, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 1.139240506329114, | |
| "grad_norm": 0.18863485753536224, | |
| "learning_rate": 0.00027480644989694126, | |
| "loss": 2.2623, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 1.1447440836543754, | |
| "grad_norm": 0.18571630120277405, | |
| "learning_rate": 0.0002745027462081753, | |
| "loss": 2.2616, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.1502476609796368, | |
| "grad_norm": 0.187313050031662, | |
| "learning_rate": 0.00027419739285266745, | |
| "loss": 2.2608, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 1.1557512383048982, | |
| "grad_norm": 0.19139795005321503, | |
| "learning_rate": 0.0002738903938763628, | |
| "loss": 2.2204, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.1612548156301596, | |
| "grad_norm": 0.188013955950737, | |
| "learning_rate": 0.0002735817533470109, | |
| "loss": 2.2596, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 1.166758392955421, | |
| "grad_norm": 0.1884177178144455, | |
| "learning_rate": 0.0002732714753541122, | |
| "loss": 2.2618, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.1722619702806825, | |
| "grad_norm": 0.19187124073505402, | |
| "learning_rate": 0.00027295956400886335, | |
| "loss": 2.2075, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 1.1777655476059439, | |
| "grad_norm": 0.19815993309020996, | |
| "learning_rate": 0.0002726460234441031, | |
| "loss": 2.2677, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 1.1832691249312053, | |
| "grad_norm": 0.18218094110488892, | |
| "learning_rate": 0.00027233085781425744, | |
| "loss": 2.2697, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 1.1887727022564667, | |
| "grad_norm": 0.1923886239528656, | |
| "learning_rate": 0.0002720140712952845, | |
| "loss": 2.2127, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.194276279581728, | |
| "grad_norm": 0.18907909095287323, | |
| "learning_rate": 0.0002716956680846193, | |
| "loss": 2.1989, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 1.1997798569069895, | |
| "grad_norm": 0.1902381181716919, | |
| "learning_rate": 0.000271375652401118, | |
| "loss": 2.2211, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 1.205283434232251, | |
| "grad_norm": 0.20184637606143951, | |
| "learning_rate": 0.0002710540284850023, | |
| "loss": 2.2451, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 1.2107870115575123, | |
| "grad_norm": 0.19903656840324402, | |
| "learning_rate": 0.00027073080059780285, | |
| "loss": 2.2233, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.2162905888827737, | |
| "grad_norm": 0.19023925065994263, | |
| "learning_rate": 0.000270405973022303, | |
| "loss": 2.2438, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 1.2217941662080352, | |
| "grad_norm": 0.17722088098526, | |
| "learning_rate": 0.0002700795500624822, | |
| "loss": 2.259, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 1.2272977435332966, | |
| "grad_norm": 0.19715473055839539, | |
| "learning_rate": 0.0002697515360434587, | |
| "loss": 2.2509, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 1.232801320858558, | |
| "grad_norm": 0.18065761029720306, | |
| "learning_rate": 0.00026942193531143225, | |
| "loss": 2.2006, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.2383048981838194, | |
| "grad_norm": 0.18757817149162292, | |
| "learning_rate": 0.00026909075223362683, | |
| "loss": 2.2215, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 1.243808475509081, | |
| "grad_norm": 0.18816334009170532, | |
| "learning_rate": 0.00026875799119823243, | |
| "loss": 2.2314, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 1.2493120528343424, | |
| "grad_norm": 0.19772210717201233, | |
| "learning_rate": 0.0002684236566143471, | |
| "loss": 2.2256, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 1.2548156301596038, | |
| "grad_norm": 0.19510945677757263, | |
| "learning_rate": 0.0002680877529119184, | |
| "loss": 2.2427, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.2603192074848653, | |
| "grad_norm": 0.19960781931877136, | |
| "learning_rate": 0.0002677502845416849, | |
| "loss": 2.2324, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 1.2658227848101267, | |
| "grad_norm": 0.1882307380437851, | |
| "learning_rate": 0.0002674112559751169, | |
| "loss": 2.2432, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.271326362135388, | |
| "grad_norm": 0.18959587812423706, | |
| "learning_rate": 0.00026707067170435767, | |
| "loss": 2.19, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 1.2768299394606495, | |
| "grad_norm": 0.18768323957920074, | |
| "learning_rate": 0.0002667285362421634, | |
| "loss": 2.2294, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.282333516785911, | |
| "grad_norm": 0.20077955722808838, | |
| "learning_rate": 0.00026638485412184355, | |
| "loss": 2.2518, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 1.2878370941111723, | |
| "grad_norm": 0.1956523358821869, | |
| "learning_rate": 0.00026603962989720105, | |
| "loss": 2.2662, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 1.2933406714364337, | |
| "grad_norm": 0.19226029515266418, | |
| "learning_rate": 0.0002656928681424718, | |
| "loss": 2.2133, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 1.2988442487616951, | |
| "grad_norm": 0.16987648606300354, | |
| "learning_rate": 0.0002653445734522639, | |
| "loss": 2.2226, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.3043478260869565, | |
| "grad_norm": 0.17524655163288116, | |
| "learning_rate": 0.00026499475044149703, | |
| "loss": 2.2555, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 1.309851403412218, | |
| "grad_norm": 0.19188162684440613, | |
| "learning_rate": 0.00026464340374534104, | |
| "loss": 2.2234, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 1.3153549807374794, | |
| "grad_norm": 0.17676417529582977, | |
| "learning_rate": 0.00026429053801915487, | |
| "loss": 2.2188, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 1.3208585580627408, | |
| "grad_norm": 0.20071938633918762, | |
| "learning_rate": 0.0002639361579384245, | |
| "loss": 2.234, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.3263621353880022, | |
| "grad_norm": 0.19267459213733673, | |
| "learning_rate": 0.0002635802681987012, | |
| "loss": 2.235, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 1.3318657127132636, | |
| "grad_norm": 0.1724405139684677, | |
| "learning_rate": 0.00026322287351553944, | |
| "loss": 2.2388, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 1.337369290038525, | |
| "grad_norm": 0.1907806396484375, | |
| "learning_rate": 0.000262863978624434, | |
| "loss": 2.1937, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 1.3428728673637864, | |
| "grad_norm": 0.18452796339988708, | |
| "learning_rate": 0.0002625035882807578, | |
| "loss": 2.2429, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.3483764446890478, | |
| "grad_norm": 0.17565739154815674, | |
| "learning_rate": 0.0002621417072596982, | |
| "loss": 2.2367, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 1.3538800220143092, | |
| "grad_norm": 0.1761862337589264, | |
| "learning_rate": 0.00026177834035619445, | |
| "loss": 2.2258, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 1.3593835993395706, | |
| "grad_norm": 0.18616576492786407, | |
| "learning_rate": 0.0002614134923848736, | |
| "loss": 2.2207, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 1.364887176664832, | |
| "grad_norm": 0.18103408813476562, | |
| "learning_rate": 0.00026104716817998696, | |
| "loss": 2.2346, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.3703907539900935, | |
| "grad_norm": 0.18511448800563812, | |
| "learning_rate": 0.00026067937259534595, | |
| "loss": 2.2363, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 1.3758943313153549, | |
| "grad_norm": 0.1737717241048813, | |
| "learning_rate": 0.00026031011050425796, | |
| "loss": 2.2199, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.3813979086406163, | |
| "grad_norm": 0.18761631846427917, | |
| "learning_rate": 0.00025993938679946154, | |
| "loss": 2.228, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 1.3869014859658777, | |
| "grad_norm": 0.18815304338932037, | |
| "learning_rate": 0.0002595672063930617, | |
| "loss": 2.2034, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.3924050632911391, | |
| "grad_norm": 0.1868327558040619, | |
| "learning_rate": 0.00025919357421646486, | |
| "loss": 2.197, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 1.3979086406164005, | |
| "grad_norm": 0.17544154822826385, | |
| "learning_rate": 0.00025881849522031345, | |
| "loss": 2.1956, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 1.4034122179416622, | |
| "grad_norm": 0.18338458240032196, | |
| "learning_rate": 0.00025844197437442033, | |
| "loss": 2.2291, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 1.4089157952669236, | |
| "grad_norm": 0.17127631604671478, | |
| "learning_rate": 0.00025806401666770277, | |
| "loss": 2.2205, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.414419372592185, | |
| "grad_norm": 0.17331157624721527, | |
| "learning_rate": 0.00025768462710811673, | |
| "loss": 2.2356, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 1.4199229499174464, | |
| "grad_norm": 0.16098715364933014, | |
| "learning_rate": 0.00025730381072259026, | |
| "loss": 2.2065, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 1.4254265272427078, | |
| "grad_norm": 0.17098484933376312, | |
| "learning_rate": 0.00025692157255695667, | |
| "loss": 2.2323, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 1.4309301045679692, | |
| "grad_norm": 0.15763573348522186, | |
| "learning_rate": 0.00025653791767588823, | |
| "loss": 2.2064, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.4364336818932306, | |
| "grad_norm": 0.17587807774543762, | |
| "learning_rate": 0.0002561528511628286, | |
| "loss": 2.2085, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 1.441937259218492, | |
| "grad_norm": 0.17770230770111084, | |
| "learning_rate": 0.00025576637811992555, | |
| "loss": 2.2204, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 1.4474408365437534, | |
| "grad_norm": 0.18236953020095825, | |
| "learning_rate": 0.0002553785036679636, | |
| "loss": 2.2388, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 1.4529444138690149, | |
| "grad_norm": 0.16505366563796997, | |
| "learning_rate": 0.00025498923294629594, | |
| "loss": 2.2345, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.4584479911942763, | |
| "grad_norm": 0.1687227189540863, | |
| "learning_rate": 0.0002545985711127763, | |
| "loss": 2.1652, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 1.4639515685195377, | |
| "grad_norm": 0.17849218845367432, | |
| "learning_rate": 0.00025420652334369085, | |
| "loss": 2.2115, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 1.469455145844799, | |
| "grad_norm": 0.17708005011081696, | |
| "learning_rate": 0.0002538130948336894, | |
| "loss": 2.2017, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 1.4749587231700605, | |
| "grad_norm": 0.17772626876831055, | |
| "learning_rate": 0.0002534182907957165, | |
| "loss": 2.1926, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.480462300495322, | |
| "grad_norm": 0.16764992475509644, | |
| "learning_rate": 0.00025302211646094277, | |
| "loss": 2.1986, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 1.4859658778205833, | |
| "grad_norm": 0.19117778539657593, | |
| "learning_rate": 0.00025262457707869506, | |
| "loss": 2.2118, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.4914694551458447, | |
| "grad_norm": 0.17632248997688293, | |
| "learning_rate": 0.00025222567791638743, | |
| "loss": 2.1897, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 1.4969730324711064, | |
| "grad_norm": 0.19731369614601135, | |
| "learning_rate": 0.0002518254242594508, | |
| "loss": 2.1945, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.5024766097963678, | |
| "grad_norm": 0.16187229752540588, | |
| "learning_rate": 0.0002514238214112635, | |
| "loss": 2.1959, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 1.5079801871216292, | |
| "grad_norm": 0.1828337460756302, | |
| "learning_rate": 0.00025102087469308036, | |
| "loss": 2.1994, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 1.5134837644468906, | |
| "grad_norm": 0.17188578844070435, | |
| "learning_rate": 0.0002506165894439628, | |
| "loss": 2.2087, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 1.518987341772152, | |
| "grad_norm": 0.17914508283138275, | |
| "learning_rate": 0.00025021097102070786, | |
| "loss": 2.2266, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.5244909190974134, | |
| "grad_norm": 0.17482663691043854, | |
| "learning_rate": 0.0002498040247977769, | |
| "loss": 2.2431, | |
| "step": 13850 | |
| }, | |
| { | |
| "epoch": 1.5299944964226748, | |
| "grad_norm": 0.17342698574066162, | |
| "learning_rate": 0.00024939575616722505, | |
| "loss": 2.2045, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 1.5354980737479362, | |
| "grad_norm": 0.18258632719516754, | |
| "learning_rate": 0.00024898617053862904, | |
| "loss": 2.1843, | |
| "step": 13950 | |
| }, | |
| { | |
| "epoch": 1.5410016510731976, | |
| "grad_norm": 0.16960634291172028, | |
| "learning_rate": 0.00024857527333901625, | |
| "loss": 2.2096, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.546505228398459, | |
| "grad_norm": 0.17008773982524872, | |
| "learning_rate": 0.00024816307001279214, | |
| "loss": 2.1522, | |
| "step": 14050 | |
| }, | |
| { | |
| "epoch": 1.5520088057237205, | |
| "grad_norm": 0.16970837116241455, | |
| "learning_rate": 0.00024774956602166857, | |
| "loss": 2.2055, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 1.5575123830489819, | |
| "grad_norm": 0.17486636340618134, | |
| "learning_rate": 0.0002473347668445912, | |
| "loss": 2.1906, | |
| "step": 14150 | |
| }, | |
| { | |
| "epoch": 1.5630159603742433, | |
| "grad_norm": 0.19200065732002258, | |
| "learning_rate": 0.00024691867797766684, | |
| "loss": 2.1839, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.5685195376995047, | |
| "grad_norm": 0.19441814720630646, | |
| "learning_rate": 0.00024650130493409104, | |
| "loss": 2.2121, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 1.574023115024766, | |
| "grad_norm": 0.16507557034492493, | |
| "learning_rate": 0.0002460826532440745, | |
| "loss": 2.2016, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 1.5795266923500275, | |
| "grad_norm": 0.16732299327850342, | |
| "learning_rate": 0.00024566272845477014, | |
| "loss": 2.2362, | |
| "step": 14350 | |
| }, | |
| { | |
| "epoch": 1.585030269675289, | |
| "grad_norm": 0.18136867880821228, | |
| "learning_rate": 0.00024524153613019947, | |
| "loss": 2.193, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.5905338470005503, | |
| "grad_norm": 0.16409073770046234, | |
| "learning_rate": 0.0002448190818511789, | |
| "loss": 2.1589, | |
| "step": 14450 | |
| }, | |
| { | |
| "epoch": 1.5960374243258117, | |
| "grad_norm": 0.17381897568702698, | |
| "learning_rate": 0.00024439537121524583, | |
| "loss": 2.1834, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.6015410016510732, | |
| "grad_norm": 0.1628628373146057, | |
| "learning_rate": 0.00024397040983658436, | |
| "loss": 2.172, | |
| "step": 14550 | |
| }, | |
| { | |
| "epoch": 1.6070445789763346, | |
| "grad_norm": 0.16793246567249298, | |
| "learning_rate": 0.00024354420334595105, | |
| "loss": 2.176, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.612548156301596, | |
| "grad_norm": 0.17394863069057465, | |
| "learning_rate": 0.0002431167573906002, | |
| "loss": 2.199, | |
| "step": 14650 | |
| }, | |
| { | |
| "epoch": 1.6180517336268574, | |
| "grad_norm": 0.1554850935935974, | |
| "learning_rate": 0.00024268807763420914, | |
| "loss": 2.2042, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 1.6235553109521188, | |
| "grad_norm": 0.17003560066223145, | |
| "learning_rate": 0.00024225816975680312, | |
| "loss": 2.1717, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 1.6290588882773802, | |
| "grad_norm": 0.17207174003124237, | |
| "learning_rate": 0.0002418270394546799, | |
| "loss": 2.1894, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.6345624656026416, | |
| "grad_norm": 0.1507563292980194, | |
| "learning_rate": 0.00024139469244033467, | |
| "loss": 2.2101, | |
| "step": 14850 | |
| }, | |
| { | |
| "epoch": 1.640066042927903, | |
| "grad_norm": 0.17063497006893158, | |
| "learning_rate": 0.0002409611344423838, | |
| "loss": 2.1854, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 1.6455696202531644, | |
| "grad_norm": 0.17093567550182343, | |
| "learning_rate": 0.00024052637120548954, | |
| "loss": 2.2028, | |
| "step": 14950 | |
| }, | |
| { | |
| "epoch": 1.6510731975784259, | |
| "grad_norm": 0.17256326973438263, | |
| "learning_rate": 0.00024009040849028341, | |
| "loss": 2.2102, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.6565767749036873, | |
| "grad_norm": 0.17648595571517944, | |
| "learning_rate": 0.00023965325207329016, | |
| "loss": 2.2094, | |
| "step": 15050 | |
| }, | |
| { | |
| "epoch": 1.6620803522289487, | |
| "grad_norm": 0.17140266299247742, | |
| "learning_rate": 0.0002392149077468511, | |
| "loss": 2.1936, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 1.66758392955421, | |
| "grad_norm": 0.16277456283569336, | |
| "learning_rate": 0.00023877538131904743, | |
| "loss": 2.1705, | |
| "step": 15150 | |
| }, | |
| { | |
| "epoch": 1.6730875068794715, | |
| "grad_norm": 0.16912780702114105, | |
| "learning_rate": 0.00023833467861362323, | |
| "loss": 2.1834, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.6785910842047331, | |
| "grad_norm": 0.1684638261795044, | |
| "learning_rate": 0.00023789280546990824, | |
| "loss": 2.2044, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 1.6840946615299945, | |
| "grad_norm": 0.16793768107891083, | |
| "learning_rate": 0.00023744976774274065, | |
| "loss": 2.188, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 1.689598238855256, | |
| "grad_norm": 0.15743038058280945, | |
| "learning_rate": 0.00023700557130238944, | |
| "loss": 2.183, | |
| "step": 15350 | |
| }, | |
| { | |
| "epoch": 1.6951018161805174, | |
| "grad_norm": 0.1788504421710968, | |
| "learning_rate": 0.0002365602220344765, | |
| "loss": 2.1923, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.7006053935057788, | |
| "grad_norm": 0.1614784598350525, | |
| "learning_rate": 0.00023611372583989883, | |
| "loss": 2.196, | |
| "step": 15450 | |
| }, | |
| { | |
| "epoch": 1.7061089708310402, | |
| "grad_norm": 0.16596226394176483, | |
| "learning_rate": 0.0002356660886347502, | |
| "loss": 2.1952, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.7116125481563016, | |
| "grad_norm": 0.18096621334552765, | |
| "learning_rate": 0.00023521731635024274, | |
| "loss": 2.1858, | |
| "step": 15550 | |
| }, | |
| { | |
| "epoch": 1.717116125481563, | |
| "grad_norm": 0.1885058581829071, | |
| "learning_rate": 0.0002347674149326286, | |
| "loss": 2.1894, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.7226197028068244, | |
| "grad_norm": 0.15273383259773254, | |
| "learning_rate": 0.00023431639034312096, | |
| "loss": 2.1896, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 1.7281232801320858, | |
| "grad_norm": 0.17447490990161896, | |
| "learning_rate": 0.00023386424855781495, | |
| "loss": 2.1896, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 1.7336268574573472, | |
| "grad_norm": 0.16734521090984344, | |
| "learning_rate": 0.00023341099556760866, | |
| "loss": 2.1886, | |
| "step": 15750 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 0.17426900565624237, | |
| "learning_rate": 0.0002329566373781238, | |
| "loss": 2.1838, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.7446340121078703, | |
| "grad_norm": 0.1640431135892868, | |
| "learning_rate": 0.00023250118000962583, | |
| "loss": 2.2026, | |
| "step": 15850 | |
| }, | |
| { | |
| "epoch": 1.7501375894331317, | |
| "grad_norm": 0.16575609147548676, | |
| "learning_rate": 0.00023204462949694447, | |
| "loss": 2.1721, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 1.755641166758393, | |
| "grad_norm": 0.17576967179775238, | |
| "learning_rate": 0.00023158699188939371, | |
| "loss": 2.1517, | |
| "step": 15950 | |
| }, | |
| { | |
| "epoch": 1.7611447440836545, | |
| "grad_norm": 0.1661233901977539, | |
| "learning_rate": 0.00023112827325069154, | |
| "loss": 2.1594, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.766648321408916, | |
| "grad_norm": 0.17433176934719086, | |
| "learning_rate": 0.00023066847965887954, | |
| "loss": 2.2069, | |
| "step": 16050 | |
| }, | |
| { | |
| "epoch": 1.7721518987341773, | |
| "grad_norm": 0.164288729429245, | |
| "learning_rate": 0.0002302076172062427, | |
| "loss": 2.1945, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 1.7776554760594387, | |
| "grad_norm": 0.16619046032428741, | |
| "learning_rate": 0.00022974569199922827, | |
| "loss": 2.1678, | |
| "step": 16150 | |
| }, | |
| { | |
| "epoch": 1.7831590533847002, | |
| "grad_norm": 0.1572977751493454, | |
| "learning_rate": 0.00022928271015836514, | |
| "loss": 2.1834, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.7886626307099616, | |
| "grad_norm": 0.15041351318359375, | |
| "learning_rate": 0.0002288186778181826, | |
| "loss": 2.1372, | |
| "step": 16250 | |
| }, | |
| { | |
| "epoch": 1.794166208035223, | |
| "grad_norm": 0.17850619554519653, | |
| "learning_rate": 0.00022835360112712913, | |
| "loss": 2.2064, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 1.7996697853604844, | |
| "grad_norm": 0.1657785028219223, | |
| "learning_rate": 0.00022788748624749083, | |
| "loss": 2.1948, | |
| "step": 16350 | |
| }, | |
| { | |
| "epoch": 1.8051733626857458, | |
| "grad_norm": 0.16514721512794495, | |
| "learning_rate": 0.00022742033935530995, | |
| "loss": 2.209, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.8106769400110072, | |
| "grad_norm": 0.1726425439119339, | |
| "learning_rate": 0.00022695216664030297, | |
| "loss": 2.1992, | |
| "step": 16450 | |
| }, | |
| { | |
| "epoch": 1.8161805173362686, | |
| "grad_norm": 0.1753402054309845, | |
| "learning_rate": 0.00022648297430577835, | |
| "loss": 2.1929, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.82168409466153, | |
| "grad_norm": 0.18259774148464203, | |
| "learning_rate": 0.00022601276856855482, | |
| "loss": 2.2083, | |
| "step": 16550 | |
| }, | |
| { | |
| "epoch": 1.8271876719867914, | |
| "grad_norm": 0.16030113399028778, | |
| "learning_rate": 0.0002255415556588786, | |
| "loss": 2.1571, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.8326912493120529, | |
| "grad_norm": 0.1431453675031662, | |
| "learning_rate": 0.000225069341820341, | |
| "loss": 2.149, | |
| "step": 16650 | |
| }, | |
| { | |
| "epoch": 1.8381948266373143, | |
| "grad_norm": 0.1664692461490631, | |
| "learning_rate": 0.0002245961333097956, | |
| "loss": 2.1864, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 1.8436984039625757, | |
| "grad_norm": 0.17130005359649658, | |
| "learning_rate": 0.00022412193639727564, | |
| "loss": 2.18, | |
| "step": 16750 | |
| }, | |
| { | |
| "epoch": 1.849201981287837, | |
| "grad_norm": 0.15278129279613495, | |
| "learning_rate": 0.00022364675736591058, | |
| "loss": 2.1648, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.8547055586130985, | |
| "grad_norm": 0.176371768116951, | |
| "learning_rate": 0.000223170602511843, | |
| "loss": 2.1582, | |
| "step": 16850 | |
| }, | |
| { | |
| "epoch": 1.86020913593836, | |
| "grad_norm": 0.15902136266231537, | |
| "learning_rate": 0.0002226934781441452, | |
| "loss": 2.183, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 1.8657127132636213, | |
| "grad_norm": 0.14821839332580566, | |
| "learning_rate": 0.00022221539058473553, | |
| "loss": 2.172, | |
| "step": 16950 | |
| }, | |
| { | |
| "epoch": 1.8712162905888827, | |
| "grad_norm": 0.15611177682876587, | |
| "learning_rate": 0.00022173634616829481, | |
| "loss": 2.1715, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.8767198679141441, | |
| "grad_norm": 0.1633993536233902, | |
| "learning_rate": 0.00022125635124218207, | |
| "loss": 2.1961, | |
| "step": 17050 | |
| }, | |
| { | |
| "epoch": 1.8822234452394055, | |
| "grad_norm": 0.16087235510349274, | |
| "learning_rate": 0.00022077541216635076, | |
| "loss": 2.1963, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 1.887727022564667, | |
| "grad_norm": 0.16733358800411224, | |
| "learning_rate": 0.00022029353531326432, | |
| "loss": 2.1605, | |
| "step": 17150 | |
| }, | |
| { | |
| "epoch": 1.8932305998899284, | |
| "grad_norm": 0.16267576813697815, | |
| "learning_rate": 0.00021981072706781185, | |
| "loss": 2.1554, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.8987341772151898, | |
| "grad_norm": 0.16028568148612976, | |
| "learning_rate": 0.0002193269938272232, | |
| "loss": 2.1793, | |
| "step": 17250 | |
| }, | |
| { | |
| "epoch": 1.9042377545404512, | |
| "grad_norm": 0.15961337089538574, | |
| "learning_rate": 0.00021884234200098473, | |
| "loss": 2.1673, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 1.9097413318657126, | |
| "grad_norm": 0.1637711226940155, | |
| "learning_rate": 0.00021835677801075397, | |
| "loss": 2.1676, | |
| "step": 17350 | |
| }, | |
| { | |
| "epoch": 1.915244909190974, | |
| "grad_norm": 0.16490189731121063, | |
| "learning_rate": 0.00021787030829027468, | |
| "loss": 2.2119, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.9207484865162354, | |
| "grad_norm": 0.17354600131511688, | |
| "learning_rate": 0.00021738293928529154, | |
| "loss": 2.2001, | |
| "step": 17450 | |
| }, | |
| { | |
| "epoch": 1.9262520638414968, | |
| "grad_norm": 0.16645126044750214, | |
| "learning_rate": 0.00021689467745346483, | |
| "loss": 2.1915, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.9317556411667582, | |
| "grad_norm": 0.16187366843223572, | |
| "learning_rate": 0.0002164055292642849, | |
| "loss": 2.1829, | |
| "step": 17550 | |
| }, | |
| { | |
| "epoch": 1.9372592184920197, | |
| "grad_norm": 0.17859427630901337, | |
| "learning_rate": 0.00021591550119898616, | |
| "loss": 2.1203, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.9427627958172813, | |
| "grad_norm": 0.15773439407348633, | |
| "learning_rate": 0.00021542459975046164, | |
| "loss": 2.1535, | |
| "step": 17650 | |
| }, | |
| { | |
| "epoch": 1.9482663731425427, | |
| "grad_norm": 0.1696433126926422, | |
| "learning_rate": 0.00021493283142317668, | |
| "loss": 2.1271, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 1.953769950467804, | |
| "grad_norm": 0.16615457832813263, | |
| "learning_rate": 0.00021444020273308267, | |
| "loss": 2.1688, | |
| "step": 17750 | |
| }, | |
| { | |
| "epoch": 1.9592735277930655, | |
| "grad_norm": 0.16561925411224365, | |
| "learning_rate": 0.00021394672020753102, | |
| "loss": 2.1228, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.964777105118327, | |
| "grad_norm": 0.1655699759721756, | |
| "learning_rate": 0.00021345239038518638, | |
| "loss": 2.1614, | |
| "step": 17850 | |
| }, | |
| { | |
| "epoch": 1.9702806824435883, | |
| "grad_norm": 0.16106578707695007, | |
| "learning_rate": 0.0002129572198159401, | |
| "loss": 2.1471, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 1.9757842597688497, | |
| "grad_norm": 0.1635727882385254, | |
| "learning_rate": 0.00021246121506082367, | |
| "loss": 2.184, | |
| "step": 17950 | |
| }, | |
| { | |
| "epoch": 1.9812878370941112, | |
| "grad_norm": 0.15699493885040283, | |
| "learning_rate": 0.0002119643826919213, | |
| "loss": 2.1602, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.9867914144193726, | |
| "grad_norm": 0.15784280002117157, | |
| "learning_rate": 0.00021146672929228327, | |
| "loss": 2.1781, | |
| "step": 18050 | |
| }, | |
| { | |
| "epoch": 1.992294991744634, | |
| "grad_norm": 0.15157456696033478, | |
| "learning_rate": 0.00021096826145583852, | |
| "loss": 2.1532, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 1.9977985690698954, | |
| "grad_norm": 0.15633012354373932, | |
| "learning_rate": 0.0002104689857873074, | |
| "loss": 2.1676, | |
| "step": 18150 | |
| }, | |
| { | |
| "epoch": 2.003302146395157, | |
| "grad_norm": 0.16586118936538696, | |
| "learning_rate": 0.00020996890890211386, | |
| "loss": 2.1335, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 2.0088057237204184, | |
| "grad_norm": 0.1573173552751541, | |
| "learning_rate": 0.00020946803742629826, | |
| "loss": 2.11, | |
| "step": 18250 | |
| }, | |
| { | |
| "epoch": 2.01430930104568, | |
| "grad_norm": 0.18040034174919128, | |
| "learning_rate": 0.00020896637799642915, | |
| "loss": 2.1128, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 2.0198128783709413, | |
| "grad_norm": 0.1664518266916275, | |
| "learning_rate": 0.00020846393725951556, | |
| "loss": 2.1384, | |
| "step": 18350 | |
| }, | |
| { | |
| "epoch": 2.0253164556962027, | |
| "grad_norm": 0.17078042030334473, | |
| "learning_rate": 0.0002079607218729188, | |
| "loss": 2.0909, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 2.030820033021464, | |
| "grad_norm": 0.15759077668190002, | |
| "learning_rate": 0.00020745673850426446, | |
| "loss": 2.1015, | |
| "step": 18450 | |
| }, | |
| { | |
| "epoch": 2.0363236103467255, | |
| "grad_norm": 0.17311988770961761, | |
| "learning_rate": 0.00020695199383135386, | |
| "loss": 2.1436, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.041827187671987, | |
| "grad_norm": 0.1612362265586853, | |
| "learning_rate": 0.00020644649454207553, | |
| "loss": 2.0868, | |
| "step": 18550 | |
| }, | |
| { | |
| "epoch": 2.0473307649972483, | |
| "grad_norm": 0.16166241466999054, | |
| "learning_rate": 0.0002059402473343169, | |
| "loss": 2.0683, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 2.0528343423225097, | |
| "grad_norm": 0.17205455899238586, | |
| "learning_rate": 0.00020543325891587516, | |
| "loss": 2.0618, | |
| "step": 18650 | |
| }, | |
| { | |
| "epoch": 2.058337919647771, | |
| "grad_norm": 0.17534597218036652, | |
| "learning_rate": 0.00020492553600436872, | |
| "loss": 2.1173, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 2.0638414969730325, | |
| "grad_norm": 0.15081949532032013, | |
| "learning_rate": 0.00020441708532714797, | |
| "loss": 2.0991, | |
| "step": 18750 | |
| }, | |
| { | |
| "epoch": 2.069345074298294, | |
| "grad_norm": 0.15837198495864868, | |
| "learning_rate": 0.00020390791362120625, | |
| "loss": 2.1332, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 2.0748486516235554, | |
| "grad_norm": 0.1598348468542099, | |
| "learning_rate": 0.00020339802763309054, | |
| "loss": 2.1164, | |
| "step": 18850 | |
| }, | |
| { | |
| "epoch": 2.0803522289488168, | |
| "grad_norm": 0.1536133736371994, | |
| "learning_rate": 0.0002028874341188122, | |
| "loss": 2.1169, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 2.085855806274078, | |
| "grad_norm": 0.1654282808303833, | |
| "learning_rate": 0.00020237613984375711, | |
| "loss": 2.1253, | |
| "step": 18950 | |
| }, | |
| { | |
| "epoch": 2.0913593835993396, | |
| "grad_norm": 0.16015826165676117, | |
| "learning_rate": 0.00020186415158259652, | |
| "loss": 2.1196, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.096862960924601, | |
| "grad_norm": 0.16205786168575287, | |
| "learning_rate": 0.00020135147611919686, | |
| "loss": 2.0872, | |
| "step": 19050 | |
| }, | |
| { | |
| "epoch": 2.1023665382498624, | |
| "grad_norm": 0.16814254224300385, | |
| "learning_rate": 0.00020083812024653013, | |
| "loss": 2.1569, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 2.107870115575124, | |
| "grad_norm": 0.15433181822299957, | |
| "learning_rate": 0.00020032409076658364, | |
| "loss": 2.0967, | |
| "step": 19150 | |
| }, | |
| { | |
| "epoch": 2.1133736929003852, | |
| "grad_norm": 0.16261817514896393, | |
| "learning_rate": 0.00019980939449027016, | |
| "loss": 2.0938, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 2.1188772702256466, | |
| "grad_norm": 0.17877838015556335, | |
| "learning_rate": 0.00019929403823733748, | |
| "loss": 2.1399, | |
| "step": 19250 | |
| }, | |
| { | |
| "epoch": 2.124380847550908, | |
| "grad_norm": 0.178861603140831, | |
| "learning_rate": 0.00019877802883627808, | |
| "loss": 2.1362, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 2.1298844248761695, | |
| "grad_norm": 0.1621016561985016, | |
| "learning_rate": 0.00019826137312423873, | |
| "loss": 2.1089, | |
| "step": 19350 | |
| }, | |
| { | |
| "epoch": 2.135388002201431, | |
| "grad_norm": 0.17772269248962402, | |
| "learning_rate": 0.00019774407794692986, | |
| "loss": 2.1278, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 2.1408915795266923, | |
| "grad_norm": 0.16339927911758423, | |
| "learning_rate": 0.00019722615015853483, | |
| "loss": 2.1244, | |
| "step": 19450 | |
| }, | |
| { | |
| "epoch": 2.1463951568519537, | |
| "grad_norm": 0.16879414021968842, | |
| "learning_rate": 0.0001967075966216191, | |
| "loss": 2.106, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.151898734177215, | |
| "grad_norm": 0.15002557635307312, | |
| "learning_rate": 0.00019618842420703923, | |
| "loss": 2.1007, | |
| "step": 19550 | |
| }, | |
| { | |
| "epoch": 2.1574023115024765, | |
| "grad_norm": 0.17130738496780396, | |
| "learning_rate": 0.0001956686397938522, | |
| "loss": 2.1104, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 2.162905888827738, | |
| "grad_norm": 0.16796913743019104, | |
| "learning_rate": 0.00019514825026922377, | |
| "loss": 2.1278, | |
| "step": 19650 | |
| }, | |
| { | |
| "epoch": 2.1684094661529993, | |
| "grad_norm": 0.19111864268779755, | |
| "learning_rate": 0.0001946272625283375, | |
| "loss": 2.1402, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 0.17543087899684906, | |
| "learning_rate": 0.00019410568347430344, | |
| "loss": 2.0984, | |
| "step": 19750 | |
| }, | |
| { | |
| "epoch": 2.179416620803522, | |
| "grad_norm": 0.15795323252677917, | |
| "learning_rate": 0.00019358352001806642, | |
| "loss": 2.1125, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 2.1849201981287836, | |
| "grad_norm": 0.15581749379634857, | |
| "learning_rate": 0.0001930607790783147, | |
| "loss": 2.119, | |
| "step": 19850 | |
| }, | |
| { | |
| "epoch": 2.190423775454045, | |
| "grad_norm": 0.17086704075336456, | |
| "learning_rate": 0.00019253746758138822, | |
| "loss": 2.1029, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 2.1959273527793064, | |
| "grad_norm": 0.15870486199855804, | |
| "learning_rate": 0.00019201359246118682, | |
| "loss": 2.1133, | |
| "step": 19950 | |
| }, | |
| { | |
| "epoch": 2.201430930104568, | |
| "grad_norm": 0.16840116679668427, | |
| "learning_rate": 0.0001914891606590784, | |
| "loss": 2.1196, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.206934507429829, | |
| "grad_norm": 0.1643972396850586, | |
| "learning_rate": 0.00019096417912380686, | |
| "loss": 2.1173, | |
| "step": 20050 | |
| }, | |
| { | |
| "epoch": 2.2124380847550906, | |
| "grad_norm": 0.1598280966281891, | |
| "learning_rate": 0.0001904386548114001, | |
| "loss": 2.1338, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 2.217941662080352, | |
| "grad_norm": 0.16909152269363403, | |
| "learning_rate": 0.0001899125946850779, | |
| "loss": 2.0987, | |
| "step": 20150 | |
| }, | |
| { | |
| "epoch": 2.2234452394056134, | |
| "grad_norm": 0.15597747266292572, | |
| "learning_rate": 0.00018938600571515962, | |
| "loss": 2.1295, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 2.228948816730875, | |
| "grad_norm": 0.16759732365608215, | |
| "learning_rate": 0.00018885889487897162, | |
| "loss": 2.1374, | |
| "step": 20250 | |
| }, | |
| { | |
| "epoch": 2.2344523940561363, | |
| "grad_norm": 0.15526509284973145, | |
| "learning_rate": 0.00018833126916075526, | |
| "loss": 2.1287, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 2.2399559713813977, | |
| "grad_norm": 0.16562257707118988, | |
| "learning_rate": 0.00018780313555157393, | |
| "loss": 2.126, | |
| "step": 20350 | |
| }, | |
| { | |
| "epoch": 2.2454595487066595, | |
| "grad_norm": 0.17471948266029358, | |
| "learning_rate": 0.00018727450104922075, | |
| "loss": 2.1175, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 2.2509631260319205, | |
| "grad_norm": 0.16391079127788544, | |
| "learning_rate": 0.00018674537265812548, | |
| "loss": 2.0965, | |
| "step": 20450 | |
| }, | |
| { | |
| "epoch": 2.2564667033571824, | |
| "grad_norm": 0.1693831831216812, | |
| "learning_rate": 0.00018621575738926217, | |
| "loss": 2.1124, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.2619702806824438, | |
| "grad_norm": 0.16163313388824463, | |
| "learning_rate": 0.00018568566226005587, | |
| "loss": 2.1034, | |
| "step": 20550 | |
| }, | |
| { | |
| "epoch": 2.267473858007705, | |
| "grad_norm": 0.1676999181509018, | |
| "learning_rate": 0.00018515509429428988, | |
| "loss": 2.0991, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 2.2729774353329666, | |
| "grad_norm": 0.16531667113304138, | |
| "learning_rate": 0.00018462406052201252, | |
| "loss": 2.1183, | |
| "step": 20650 | |
| }, | |
| { | |
| "epoch": 2.278481012658228, | |
| "grad_norm": 0.16208726167678833, | |
| "learning_rate": 0.00018409256797944412, | |
| "loss": 2.0981, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 2.2839845899834894, | |
| "grad_norm": 0.1707569658756256, | |
| "learning_rate": 0.0001835606237088837, | |
| "loss": 2.0926, | |
| "step": 20750 | |
| }, | |
| { | |
| "epoch": 2.289488167308751, | |
| "grad_norm": 0.16191871464252472, | |
| "learning_rate": 0.00018302823475861582, | |
| "loss": 2.0895, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 2.2949917446340122, | |
| "grad_norm": 0.154753640294075, | |
| "learning_rate": 0.0001824954081828168, | |
| "loss": 2.0972, | |
| "step": 20850 | |
| }, | |
| { | |
| "epoch": 2.3004953219592736, | |
| "grad_norm": 0.16868236660957336, | |
| "learning_rate": 0.00018196215104146187, | |
| "loss": 2.1009, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 2.305998899284535, | |
| "grad_norm": 0.1602269858121872, | |
| "learning_rate": 0.00018142847040023097, | |
| "loss": 2.1063, | |
| "step": 20950 | |
| }, | |
| { | |
| "epoch": 2.3115024766097965, | |
| "grad_norm": 0.17220260202884674, | |
| "learning_rate": 0.00018089437333041562, | |
| "loss": 2.1049, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.317006053935058, | |
| "grad_norm": 0.14858907461166382, | |
| "learning_rate": 0.0001803598669088249, | |
| "loss": 2.1129, | |
| "step": 21050 | |
| }, | |
| { | |
| "epoch": 2.3225096312603193, | |
| "grad_norm": 0.17586296796798706, | |
| "learning_rate": 0.0001798249582176919, | |
| "loss": 2.0961, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 2.3280132085855807, | |
| "grad_norm": 0.14931316673755646, | |
| "learning_rate": 0.00017928965434457983, | |
| "loss": 2.1108, | |
| "step": 21150 | |
| }, | |
| { | |
| "epoch": 2.333516785910842, | |
| "grad_norm": 0.17148137092590332, | |
| "learning_rate": 0.00017875396238228798, | |
| "loss": 2.1029, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 2.3390203632361035, | |
| "grad_norm": 0.15888847410678864, | |
| "learning_rate": 0.00017821788942875793, | |
| "loss": 2.1258, | |
| "step": 21250 | |
| }, | |
| { | |
| "epoch": 2.344523940561365, | |
| "grad_norm": 0.1749711036682129, | |
| "learning_rate": 0.0001776814425869793, | |
| "loss": 2.1176, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 2.3500275178866263, | |
| "grad_norm": 0.16511616110801697, | |
| "learning_rate": 0.0001771446289648959, | |
| "loss": 2.0992, | |
| "step": 21350 | |
| }, | |
| { | |
| "epoch": 2.3555310952118877, | |
| "grad_norm": 0.16869331896305084, | |
| "learning_rate": 0.00017660745567531124, | |
| "loss": 2.1274, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 2.361034672537149, | |
| "grad_norm": 0.16024701297283173, | |
| "learning_rate": 0.00017606992983579457, | |
| "loss": 2.1379, | |
| "step": 21450 | |
| }, | |
| { | |
| "epoch": 2.3665382498624106, | |
| "grad_norm": 0.1646272987127304, | |
| "learning_rate": 0.00017553205856858637, | |
| "loss": 2.1365, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.372041827187672, | |
| "grad_norm": 0.1765459179878235, | |
| "learning_rate": 0.00017499384900050404, | |
| "loss": 2.1125, | |
| "step": 21550 | |
| }, | |
| { | |
| "epoch": 2.3775454045129334, | |
| "grad_norm": 0.15617190301418304, | |
| "learning_rate": 0.0001744553082628475, | |
| "loss": 2.0985, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 2.383048981838195, | |
| "grad_norm": 0.14687784016132355, | |
| "learning_rate": 0.00017391644349130467, | |
| "loss": 2.0771, | |
| "step": 21650 | |
| }, | |
| { | |
| "epoch": 2.388552559163456, | |
| "grad_norm": 0.16840553283691406, | |
| "learning_rate": 0.00017337726182585698, | |
| "loss": 2.0909, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 2.3940561364887176, | |
| "grad_norm": 0.16857478022575378, | |
| "learning_rate": 0.00017283777041068472, | |
| "loss": 2.0968, | |
| "step": 21750 | |
| }, | |
| { | |
| "epoch": 2.399559713813979, | |
| "grad_norm": 0.1799498200416565, | |
| "learning_rate": 0.00017229797639407233, | |
| "loss": 2.1023, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 2.4050632911392404, | |
| "grad_norm": 0.15700192749500275, | |
| "learning_rate": 0.00017175788692831367, | |
| "loss": 2.0986, | |
| "step": 21850 | |
| }, | |
| { | |
| "epoch": 2.410566868464502, | |
| "grad_norm": 0.16778048872947693, | |
| "learning_rate": 0.00017121750916961742, | |
| "loss": 2.0914, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 2.4160704457897633, | |
| "grad_norm": 0.15119898319244385, | |
| "learning_rate": 0.0001706768502780122, | |
| "loss": 2.0857, | |
| "step": 21950 | |
| }, | |
| { | |
| "epoch": 2.4215740231150247, | |
| "grad_norm": 0.17003406584262848, | |
| "learning_rate": 0.0001701359174172515, | |
| "loss": 2.1366, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.427077600440286, | |
| "grad_norm": 0.17182987928390503, | |
| "learning_rate": 0.000169594717754719, | |
| "loss": 2.1126, | |
| "step": 22050 | |
| }, | |
| { | |
| "epoch": 2.4325811777655475, | |
| "grad_norm": 0.16026781499385834, | |
| "learning_rate": 0.00016905325846133354, | |
| "loss": 2.076, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 2.438084755090809, | |
| "grad_norm": 0.16470612585544586, | |
| "learning_rate": 0.00016851154671145403, | |
| "loss": 2.0907, | |
| "step": 22150 | |
| }, | |
| { | |
| "epoch": 2.4435883324160703, | |
| "grad_norm": 0.14621587097644806, | |
| "learning_rate": 0.0001679695896827844, | |
| "loss": 2.1073, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 2.4490919097413317, | |
| "grad_norm": 0.1563323438167572, | |
| "learning_rate": 0.00016742739455627862, | |
| "loss": 2.0757, | |
| "step": 22250 | |
| }, | |
| { | |
| "epoch": 2.454595487066593, | |
| "grad_norm": 0.17763760685920715, | |
| "learning_rate": 0.0001668849685160455, | |
| "loss": 2.0705, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 2.4600990643918546, | |
| "grad_norm": 0.16651757061481476, | |
| "learning_rate": 0.00016634231874925335, | |
| "loss": 2.0881, | |
| "step": 22350 | |
| }, | |
| { | |
| "epoch": 2.465602641717116, | |
| "grad_norm": 0.16752833127975464, | |
| "learning_rate": 0.0001657994524460349, | |
| "loss": 2.0639, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 2.4711062190423774, | |
| "grad_norm": 0.14574387669563293, | |
| "learning_rate": 0.000165256376799392, | |
| "loss": 2.087, | |
| "step": 22450 | |
| }, | |
| { | |
| "epoch": 2.476609796367639, | |
| "grad_norm": 0.15595164895057678, | |
| "learning_rate": 0.00016471309900510043, | |
| "loss": 2.0965, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.4821133736929, | |
| "grad_norm": 0.17218752205371857, | |
| "learning_rate": 0.0001641696262616142, | |
| "loss": 2.0943, | |
| "step": 22550 | |
| }, | |
| { | |
| "epoch": 2.487616951018162, | |
| "grad_norm": 0.1563566029071808, | |
| "learning_rate": 0.00016362596576997057, | |
| "loss": 2.0946, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 2.4931205283434235, | |
| "grad_norm": 0.16190436482429504, | |
| "learning_rate": 0.00016308212473369453, | |
| "loss": 2.0924, | |
| "step": 22650 | |
| }, | |
| { | |
| "epoch": 2.498624105668685, | |
| "grad_norm": 0.16890835762023926, | |
| "learning_rate": 0.00016253811035870313, | |
| "loss": 2.0954, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 2.5041276829939463, | |
| "grad_norm": 0.16371208429336548, | |
| "learning_rate": 0.00016199392985321017, | |
| "loss": 2.0954, | |
| "step": 22750 | |
| }, | |
| { | |
| "epoch": 2.5096312603192077, | |
| "grad_norm": 0.15898428857326508, | |
| "learning_rate": 0.00016144959042763084, | |
| "loss": 2.0766, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 2.515134837644469, | |
| "grad_norm": 0.15516646206378937, | |
| "learning_rate": 0.00016090509929448594, | |
| "loss": 2.0891, | |
| "step": 22850 | |
| }, | |
| { | |
| "epoch": 2.5206384149697305, | |
| "grad_norm": 0.15007217228412628, | |
| "learning_rate": 0.00016036046366830635, | |
| "loss": 2.078, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 2.526141992294992, | |
| "grad_norm": 0.17989104986190796, | |
| "learning_rate": 0.00015981569076553765, | |
| "loss": 2.0888, | |
| "step": 22950 | |
| }, | |
| { | |
| "epoch": 2.5316455696202533, | |
| "grad_norm": 0.16754071414470673, | |
| "learning_rate": 0.00015927078780444403, | |
| "loss": 2.0882, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.5371491469455147, | |
| "grad_norm": 0.15897449851036072, | |
| "learning_rate": 0.0001587257620050134, | |
| "loss": 2.0933, | |
| "step": 23050 | |
| }, | |
| { | |
| "epoch": 2.542652724270776, | |
| "grad_norm": 0.16594423353672028, | |
| "learning_rate": 0.00015818062058886082, | |
| "loss": 2.0968, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 2.5481563015960376, | |
| "grad_norm": 0.1700371652841568, | |
| "learning_rate": 0.00015763537077913365, | |
| "loss": 2.096, | |
| "step": 23150 | |
| }, | |
| { | |
| "epoch": 2.553659878921299, | |
| "grad_norm": 0.16013766825199127, | |
| "learning_rate": 0.00015709001980041527, | |
| "loss": 2.1134, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 2.5591634562465604, | |
| "grad_norm": 0.1601455807685852, | |
| "learning_rate": 0.00015654457487862961, | |
| "loss": 2.0983, | |
| "step": 23250 | |
| }, | |
| { | |
| "epoch": 2.564667033571822, | |
| "grad_norm": 0.16373537480831146, | |
| "learning_rate": 0.00015599904324094534, | |
| "loss": 2.0866, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 2.570170610897083, | |
| "grad_norm": 0.15919940173625946, | |
| "learning_rate": 0.0001554534321156801, | |
| "loss": 2.1147, | |
| "step": 23350 | |
| }, | |
| { | |
| "epoch": 2.5756741882223446, | |
| "grad_norm": 0.1523171216249466, | |
| "learning_rate": 0.0001549077487322048, | |
| "loss": 2.0887, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 2.581177765547606, | |
| "grad_norm": 0.16425256431102753, | |
| "learning_rate": 0.00015436200032084774, | |
| "loss": 2.0918, | |
| "step": 23450 | |
| }, | |
| { | |
| "epoch": 2.5866813428728674, | |
| "grad_norm": 0.16046775877475739, | |
| "learning_rate": 0.0001538161941127988, | |
| "loss": 2.0816, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.592184920198129, | |
| "grad_norm": 0.1673419028520584, | |
| "learning_rate": 0.00015327033734001375, | |
| "loss": 2.1186, | |
| "step": 23550 | |
| }, | |
| { | |
| "epoch": 2.5976884975233903, | |
| "grad_norm": 0.15573158860206604, | |
| "learning_rate": 0.00015272443723511825, | |
| "loss": 2.0721, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 2.6031920748486517, | |
| "grad_norm": 0.16536468267440796, | |
| "learning_rate": 0.00015217850103131227, | |
| "loss": 2.0991, | |
| "step": 23650 | |
| }, | |
| { | |
| "epoch": 2.608695652173913, | |
| "grad_norm": 0.15281961858272552, | |
| "learning_rate": 0.00015163253596227393, | |
| "loss": 2.0713, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 2.6141992294991745, | |
| "grad_norm": 0.1582518070936203, | |
| "learning_rate": 0.0001510865492620638, | |
| "loss": 2.0923, | |
| "step": 23750 | |
| }, | |
| { | |
| "epoch": 2.619702806824436, | |
| "grad_norm": 0.1581014096736908, | |
| "learning_rate": 0.0001505405481650293, | |
| "loss": 2.0895, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 2.6252063841496973, | |
| "grad_norm": 0.16346633434295654, | |
| "learning_rate": 0.00014999453990570839, | |
| "loss": 2.1011, | |
| "step": 23850 | |
| }, | |
| { | |
| "epoch": 2.6307099614749587, | |
| "grad_norm": 0.16536889970302582, | |
| "learning_rate": 0.00014944853171873387, | |
| "loss": 2.113, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 2.63621353880022, | |
| "grad_norm": 0.170758455991745, | |
| "learning_rate": 0.00014890253083873784, | |
| "loss": 2.1124, | |
| "step": 23950 | |
| }, | |
| { | |
| "epoch": 2.6417171161254815, | |
| "grad_norm": 0.15669451653957367, | |
| "learning_rate": 0.00014835654450025542, | |
| "loss": 2.0894, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.647220693450743, | |
| "grad_norm": 0.16937313973903656, | |
| "learning_rate": 0.00014781057993762902, | |
| "loss": 2.127, | |
| "step": 24050 | |
| }, | |
| { | |
| "epoch": 2.6527242707760044, | |
| "grad_norm": 0.15754981338977814, | |
| "learning_rate": 0.0001472646443849126, | |
| "loss": 2.0959, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 2.6582278481012658, | |
| "grad_norm": 0.16420376300811768, | |
| "learning_rate": 0.00014671874507577574, | |
| "loss": 2.0742, | |
| "step": 24150 | |
| }, | |
| { | |
| "epoch": 2.663731425426527, | |
| "grad_norm": 0.16400571167469025, | |
| "learning_rate": 0.00014617288924340775, | |
| "loss": 2.0992, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 2.6692350027517886, | |
| "grad_norm": 0.1658066064119339, | |
| "learning_rate": 0.00014562708412042193, | |
| "loss": 2.0925, | |
| "step": 24250 | |
| }, | |
| { | |
| "epoch": 2.67473858007705, | |
| "grad_norm": 0.16872134804725647, | |
| "learning_rate": 0.00014508133693875953, | |
| "loss": 2.0899, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 2.6802421574023114, | |
| "grad_norm": 0.17002440989017487, | |
| "learning_rate": 0.00014453565492959422, | |
| "loss": 2.1252, | |
| "step": 24350 | |
| }, | |
| { | |
| "epoch": 2.685745734727573, | |
| "grad_norm": 0.16417178511619568, | |
| "learning_rate": 0.00014399004532323614, | |
| "loss": 2.112, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.6912493120528342, | |
| "grad_norm": 0.172217458486557, | |
| "learning_rate": 0.000143444515349036, | |
| "loss": 2.0784, | |
| "step": 24450 | |
| }, | |
| { | |
| "epoch": 2.6967528893780957, | |
| "grad_norm": 0.16687516868114471, | |
| "learning_rate": 0.0001428990722352894, | |
| "loss": 2.0775, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.702256466703357, | |
| "grad_norm": 0.162861630320549, | |
| "learning_rate": 0.00014235372320914108, | |
| "loss": 2.0928, | |
| "step": 24550 | |
| }, | |
| { | |
| "epoch": 2.7077600440286185, | |
| "grad_norm": 0.1640874743461609, | |
| "learning_rate": 0.00014180847549648913, | |
| "loss": 2.1071, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 2.71326362135388, | |
| "grad_norm": 0.1654275506734848, | |
| "learning_rate": 0.0001412633363218891, | |
| "loss": 2.0615, | |
| "step": 24650 | |
| }, | |
| { | |
| "epoch": 2.7187671986791413, | |
| "grad_norm": 0.14277833700180054, | |
| "learning_rate": 0.00014071831290845857, | |
| "loss": 2.1078, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 2.7242707760044027, | |
| "grad_norm": 0.1705639362335205, | |
| "learning_rate": 0.00014017341247778123, | |
| "loss": 2.0858, | |
| "step": 24750 | |
| }, | |
| { | |
| "epoch": 2.729774353329664, | |
| "grad_norm": 0.16390259563922882, | |
| "learning_rate": 0.0001396286422498112, | |
| "loss": 2.0508, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 2.7352779306549255, | |
| "grad_norm": 0.1526353508234024, | |
| "learning_rate": 0.00013908400944277738, | |
| "loss": 2.0784, | |
| "step": 24850 | |
| }, | |
| { | |
| "epoch": 2.740781507980187, | |
| "grad_norm": 0.1581793874502182, | |
| "learning_rate": 0.0001385395212730879, | |
| "loss": 2.0596, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 2.7462850853054483, | |
| "grad_norm": 0.1692277193069458, | |
| "learning_rate": 0.00013799518495523445, | |
| "loss": 2.0838, | |
| "step": 24950 | |
| }, | |
| { | |
| "epoch": 2.7517886626307098, | |
| "grad_norm": 0.1614445298910141, | |
| "learning_rate": 0.0001374510077016967, | |
| "loss": 2.0928, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.757292239955971, | |
| "grad_norm": 0.15747620165348053, | |
| "learning_rate": 0.00013690699672284657, | |
| "loss": 2.0686, | |
| "step": 25050 | |
| }, | |
| { | |
| "epoch": 2.7627958172812326, | |
| "grad_norm": 0.17565257847309113, | |
| "learning_rate": 0.00013636315922685294, | |
| "loss": 2.0971, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 2.768299394606494, | |
| "grad_norm": 0.16367433965206146, | |
| "learning_rate": 0.0001358195024195861, | |
| "loss": 2.1052, | |
| "step": 25150 | |
| }, | |
| { | |
| "epoch": 2.7738029719317554, | |
| "grad_norm": 0.1564227044582367, | |
| "learning_rate": 0.00013527603350452206, | |
| "loss": 2.0902, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 2.779306549257017, | |
| "grad_norm": 0.16555672883987427, | |
| "learning_rate": 0.0001347327596826473, | |
| "loss": 2.0683, | |
| "step": 25250 | |
| }, | |
| { | |
| "epoch": 2.7848101265822782, | |
| "grad_norm": 0.16134041547775269, | |
| "learning_rate": 0.00013418968815236346, | |
| "loss": 2.0627, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 2.7903137039075396, | |
| "grad_norm": 0.17097671329975128, | |
| "learning_rate": 0.0001336468261093915, | |
| "loss": 2.0724, | |
| "step": 25350 | |
| }, | |
| { | |
| "epoch": 2.795817281232801, | |
| "grad_norm": 0.1656900942325592, | |
| "learning_rate": 0.00013310418074667698, | |
| "loss": 2.1266, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 2.801320858558063, | |
| "grad_norm": 0.1745171695947647, | |
| "learning_rate": 0.00013256175925429415, | |
| "loss": 2.0983, | |
| "step": 25450 | |
| }, | |
| { | |
| "epoch": 2.8068244358833243, | |
| "grad_norm": 0.15649184584617615, | |
| "learning_rate": 0.0001320195688193512, | |
| "loss": 2.0672, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.8123280132085857, | |
| "grad_norm": 0.16284936666488647, | |
| "learning_rate": 0.00013147761662589473, | |
| "loss": 2.0865, | |
| "step": 25550 | |
| }, | |
| { | |
| "epoch": 2.817831590533847, | |
| "grad_norm": 0.15236727893352509, | |
| "learning_rate": 0.00013093590985481456, | |
| "loss": 2.0595, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 2.8233351678591085, | |
| "grad_norm": 0.166220560669899, | |
| "learning_rate": 0.00013039445568374872, | |
| "loss": 2.0478, | |
| "step": 25650 | |
| }, | |
| { | |
| "epoch": 2.82883874518437, | |
| "grad_norm": 0.16826435923576355, | |
| "learning_rate": 0.0001298532612869883, | |
| "loss": 2.0756, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 2.8343423225096314, | |
| "grad_norm": 0.16447168588638306, | |
| "learning_rate": 0.0001293123338353823, | |
| "loss": 2.0742, | |
| "step": 25750 | |
| }, | |
| { | |
| "epoch": 2.8398458998348928, | |
| "grad_norm": 0.157373309135437, | |
| "learning_rate": 0.00012877168049624268, | |
| "loss": 2.0728, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 2.845349477160154, | |
| "grad_norm": 0.1642802357673645, | |
| "learning_rate": 0.00012823130843324948, | |
| "loss": 2.0643, | |
| "step": 25850 | |
| }, | |
| { | |
| "epoch": 2.8508530544854156, | |
| "grad_norm": 0.15527865290641785, | |
| "learning_rate": 0.00012769122480635583, | |
| "loss": 2.1088, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 2.856356631810677, | |
| "grad_norm": 0.175029918551445, | |
| "learning_rate": 0.00012715143677169297, | |
| "loss": 2.0764, | |
| "step": 25950 | |
| }, | |
| { | |
| "epoch": 2.8618602091359384, | |
| "grad_norm": 0.17664563655853271, | |
| "learning_rate": 0.0001266119514814755, | |
| "loss": 2.0527, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.8673637864612, | |
| "grad_norm": 0.16839756071567535, | |
| "learning_rate": 0.00012607277608390677, | |
| "loss": 2.0708, | |
| "step": 26050 | |
| }, | |
| { | |
| "epoch": 2.8728673637864612, | |
| "grad_norm": 0.162934809923172, | |
| "learning_rate": 0.00012553391772308407, | |
| "loss": 2.0984, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 2.8783709411117226, | |
| "grad_norm": 0.1836494356393814, | |
| "learning_rate": 0.0001249953835389037, | |
| "loss": 2.1118, | |
| "step": 26150 | |
| }, | |
| { | |
| "epoch": 2.883874518436984, | |
| "grad_norm": 0.15794949233531952, | |
| "learning_rate": 0.00012445718066696687, | |
| "loss": 2.0541, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 2.8893780957622455, | |
| "grad_norm": 0.16560156643390656, | |
| "learning_rate": 0.00012391931623848476, | |
| "loss": 2.1111, | |
| "step": 26250 | |
| }, | |
| { | |
| "epoch": 2.894881673087507, | |
| "grad_norm": 0.16835126280784607, | |
| "learning_rate": 0.0001233817973801842, | |
| "loss": 2.0635, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 2.9003852504127683, | |
| "grad_norm": 0.16151325404644012, | |
| "learning_rate": 0.0001228446312142131, | |
| "loss": 2.0949, | |
| "step": 26350 | |
| }, | |
| { | |
| "epoch": 2.9058888277380297, | |
| "grad_norm": 0.16751372814178467, | |
| "learning_rate": 0.00012230782485804633, | |
| "loss": 2.0896, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 2.911392405063291, | |
| "grad_norm": 0.18384750187397003, | |
| "learning_rate": 0.00012177138542439122, | |
| "loss": 2.047, | |
| "step": 26450 | |
| }, | |
| { | |
| "epoch": 2.9168959823885525, | |
| "grad_norm": 0.15618419647216797, | |
| "learning_rate": 0.00012123532002109328, | |
| "loss": 2.0787, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.922399559713814, | |
| "grad_norm": 0.15803013741970062, | |
| "learning_rate": 0.00012069963575104223, | |
| "loss": 2.0735, | |
| "step": 26550 | |
| }, | |
| { | |
| "epoch": 2.9279031370390753, | |
| "grad_norm": 0.1603049784898758, | |
| "learning_rate": 0.00012016433971207757, | |
| "loss": 2.0802, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 2.9334067143643368, | |
| "grad_norm": 0.1752861887216568, | |
| "learning_rate": 0.0001196294389968949, | |
| "loss": 2.0969, | |
| "step": 26650 | |
| }, | |
| { | |
| "epoch": 2.938910291689598, | |
| "grad_norm": 0.16382624208927155, | |
| "learning_rate": 0.00011909494069295175, | |
| "loss": 2.0933, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 2.9444138690148596, | |
| "grad_norm": 0.16802163422107697, | |
| "learning_rate": 0.00011856085188237357, | |
| "loss": 2.1042, | |
| "step": 26750 | |
| }, | |
| { | |
| "epoch": 2.949917446340121, | |
| "grad_norm": 0.15907612442970276, | |
| "learning_rate": 0.00011802717964186006, | |
| "loss": 2.0575, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 2.9554210236653824, | |
| "grad_norm": 0.16981548070907593, | |
| "learning_rate": 0.00011749393104259143, | |
| "loss": 2.0951, | |
| "step": 26850 | |
| }, | |
| { | |
| "epoch": 2.960924600990644, | |
| "grad_norm": 0.15169958770275116, | |
| "learning_rate": 0.0001169611131501345, | |
| "loss": 2.0778, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 2.966428178315905, | |
| "grad_norm": 0.1601976752281189, | |
| "learning_rate": 0.00011642873302434926, | |
| "loss": 2.0621, | |
| "step": 26950 | |
| }, | |
| { | |
| "epoch": 2.9719317556411666, | |
| "grad_norm": 0.16251477599143982, | |
| "learning_rate": 0.00011589679771929529, | |
| "loss": 2.08, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.977435332966428, | |
| "grad_norm": 0.15681184828281403, | |
| "learning_rate": 0.00011536531428313828, | |
| "loss": 2.0518, | |
| "step": 27050 | |
| }, | |
| { | |
| "epoch": 2.9829389102916894, | |
| "grad_norm": 0.17260442674160004, | |
| "learning_rate": 0.00011483428975805664, | |
| "loss": 2.0899, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 2.988442487616951, | |
| "grad_norm": 0.16616973280906677, | |
| "learning_rate": 0.00011430373118014808, | |
| "loss": 2.0787, | |
| "step": 27150 | |
| }, | |
| { | |
| "epoch": 2.9939460649422127, | |
| "grad_norm": 0.1666904091835022, | |
| "learning_rate": 0.00011377364557933658, | |
| "loss": 2.0555, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 2.999449642267474, | |
| "grad_norm": 0.15540987253189087, | |
| "learning_rate": 0.00011324403997927922, | |
| "loss": 2.0676, | |
| "step": 27250 | |
| }, | |
| { | |
| "epoch": 3.004953219592735, | |
| "grad_norm": 0.16125087440013885, | |
| "learning_rate": 0.00011271492139727294, | |
| "loss": 2.0144, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 3.0104567969179965, | |
| "grad_norm": 0.17550891637802124, | |
| "learning_rate": 0.00011218629684416168, | |
| "loss": 2.0102, | |
| "step": 27350 | |
| }, | |
| { | |
| "epoch": 3.015960374243258, | |
| "grad_norm": 0.16927887499332428, | |
| "learning_rate": 0.00011165817332424356, | |
| "loss": 1.9816, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 3.0214639515685193, | |
| "grad_norm": 0.17522330582141876, | |
| "learning_rate": 0.00011113055783517793, | |
| "loss": 2.0066, | |
| "step": 27450 | |
| }, | |
| { | |
| "epoch": 3.026967528893781, | |
| "grad_norm": 0.160496324300766, | |
| "learning_rate": 0.00011060345736789265, | |
| "loss": 2.0003, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.0324711062190426, | |
| "grad_norm": 0.17563533782958984, | |
| "learning_rate": 0.00011007687890649163, | |
| "loss": 2.0272, | |
| "step": 27550 | |
| }, | |
| { | |
| "epoch": 3.037974683544304, | |
| "grad_norm": 0.17000171542167664, | |
| "learning_rate": 0.00010955082942816215, | |
| "loss": 2.0447, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 3.0434782608695654, | |
| "grad_norm": 0.17253416776657104, | |
| "learning_rate": 0.00010902531590308236, | |
| "loss": 1.9968, | |
| "step": 27650 | |
| }, | |
| { | |
| "epoch": 3.048981838194827, | |
| "grad_norm": 0.16862879693508148, | |
| "learning_rate": 0.00010850034529432919, | |
| "loss": 1.9952, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 3.0544854155200882, | |
| "grad_norm": 0.166275292634964, | |
| "learning_rate": 0.00010797592455778562, | |
| "loss": 2.0258, | |
| "step": 27750 | |
| }, | |
| { | |
| "epoch": 3.0599889928453496, | |
| "grad_norm": 0.1734013706445694, | |
| "learning_rate": 0.00010745206064204903, | |
| "loss": 2.0047, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 3.065492570170611, | |
| "grad_norm": 0.1760258674621582, | |
| "learning_rate": 0.00010692876048833889, | |
| "loss": 1.997, | |
| "step": 27850 | |
| }, | |
| { | |
| "epoch": 3.0709961474958725, | |
| "grad_norm": 0.1616799384355545, | |
| "learning_rate": 0.00010640603103040464, | |
| "loss": 1.9817, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 3.076499724821134, | |
| "grad_norm": 0.1775207221508026, | |
| "learning_rate": 0.00010588387919443416, | |
| "loss": 2.0391, | |
| "step": 27950 | |
| }, | |
| { | |
| "epoch": 3.0820033021463953, | |
| "grad_norm": 0.17136207222938538, | |
| "learning_rate": 0.00010536231189896175, | |
| "loss": 1.999, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.0875068794716567, | |
| "grad_norm": 0.1694386601448059, | |
| "learning_rate": 0.00010484133605477644, | |
| "loss": 2.0153, | |
| "step": 28050 | |
| }, | |
| { | |
| "epoch": 3.093010456796918, | |
| "grad_norm": 0.17021538317203522, | |
| "learning_rate": 0.00010432095856483057, | |
| "loss": 2.0411, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 3.0985140341221795, | |
| "grad_norm": 0.17548739910125732, | |
| "learning_rate": 0.00010380118632414833, | |
| "loss": 2.0416, | |
| "step": 28150 | |
| }, | |
| { | |
| "epoch": 3.104017611447441, | |
| "grad_norm": 0.1785077303647995, | |
| "learning_rate": 0.0001032820262197342, | |
| "loss": 2.032, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 3.1095211887727023, | |
| "grad_norm": 0.17226973176002502, | |
| "learning_rate": 0.00010276348513048193, | |
| "loss": 1.986, | |
| "step": 28250 | |
| }, | |
| { | |
| "epoch": 3.1150247660979637, | |
| "grad_norm": 0.18131954967975616, | |
| "learning_rate": 0.00010224556992708328, | |
| "loss": 2.0149, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 3.120528343423225, | |
| "grad_norm": 0.16980765759944916, | |
| "learning_rate": 0.00010172828747193689, | |
| "loss": 2.0309, | |
| "step": 28350 | |
| }, | |
| { | |
| "epoch": 3.1260319207484866, | |
| "grad_norm": 0.16887415945529938, | |
| "learning_rate": 0.00010121164461905759, | |
| "loss": 1.9814, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 3.131535498073748, | |
| "grad_norm": 0.18381917476654053, | |
| "learning_rate": 0.00010069564821398536, | |
| "loss": 2.0228, | |
| "step": 28450 | |
| }, | |
| { | |
| "epoch": 3.1370390753990094, | |
| "grad_norm": 0.18095500767230988, | |
| "learning_rate": 0.0001001803050936948, | |
| "loss": 2.0197, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.142542652724271, | |
| "grad_norm": 0.15869130194187164, | |
| "learning_rate": 9.966562208650441e-05, | |
| "loss": 2.0114, | |
| "step": 28550 | |
| }, | |
| { | |
| "epoch": 3.148046230049532, | |
| "grad_norm": 0.17333059012889862, | |
| "learning_rate": 9.915160601198616e-05, | |
| "loss": 2.0025, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 3.1535498073747936, | |
| "grad_norm": 0.161941796541214, | |
| "learning_rate": 9.863826368087516e-05, | |
| "loss": 2.0273, | |
| "step": 28650 | |
| }, | |
| { | |
| "epoch": 3.159053384700055, | |
| "grad_norm": 0.16833285987377167, | |
| "learning_rate": 9.812560189497935e-05, | |
| "loss": 2.0108, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 3.1645569620253164, | |
| "grad_norm": 0.15908978879451752, | |
| "learning_rate": 9.76136274470895e-05, | |
| "loss": 1.9852, | |
| "step": 28750 | |
| }, | |
| { | |
| "epoch": 3.170060539350578, | |
| "grad_norm": 0.1724586933851242, | |
| "learning_rate": 9.710234712088896e-05, | |
| "loss": 2.0297, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 3.1755641166758393, | |
| "grad_norm": 0.17113400995731354, | |
| "learning_rate": 9.659176769086419e-05, | |
| "loss": 1.9969, | |
| "step": 28850 | |
| }, | |
| { | |
| "epoch": 3.1810676940011007, | |
| "grad_norm": 0.16423186659812927, | |
| "learning_rate": 9.608189592221446e-05, | |
| "loss": 2.0106, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 3.186571271326362, | |
| "grad_norm": 0.16140978038311005, | |
| "learning_rate": 9.557273857076271e-05, | |
| "loss": 2.0339, | |
| "step": 28950 | |
| }, | |
| { | |
| "epoch": 3.1920748486516235, | |
| "grad_norm": 0.1718360334634781, | |
| "learning_rate": 9.506430238286566e-05, | |
| "loss": 2.04, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.197578425976885, | |
| "grad_norm": 0.164507195353508, | |
| "learning_rate": 9.455659409532474e-05, | |
| "loss": 2.0168, | |
| "step": 29050 | |
| }, | |
| { | |
| "epoch": 3.2030820033021463, | |
| "grad_norm": 0.17989708483219147, | |
| "learning_rate": 9.40496204352966e-05, | |
| "loss": 2.0273, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 3.2085855806274077, | |
| "grad_norm": 0.17319265007972717, | |
| "learning_rate": 9.354338812020402e-05, | |
| "loss": 1.9911, | |
| "step": 29150 | |
| }, | |
| { | |
| "epoch": 3.214089157952669, | |
| "grad_norm": 0.17870402336120605, | |
| "learning_rate": 9.303790385764694e-05, | |
| "loss": 2.0103, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 3.2195927352779306, | |
| "grad_norm": 0.1797982007265091, | |
| "learning_rate": 9.253317434531357e-05, | |
| "loss": 2.0169, | |
| "step": 29250 | |
| }, | |
| { | |
| "epoch": 3.225096312603192, | |
| "grad_norm": 0.15193064510822296, | |
| "learning_rate": 9.202920627089171e-05, | |
| "loss": 2.0116, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 3.2305998899284534, | |
| "grad_norm": 0.16679364442825317, | |
| "learning_rate": 9.152600631197996e-05, | |
| "loss": 2.0362, | |
| "step": 29350 | |
| }, | |
| { | |
| "epoch": 3.236103467253715, | |
| "grad_norm": 0.17053711414337158, | |
| "learning_rate": 9.102358113599942e-05, | |
| "loss": 1.9829, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 3.241607044578976, | |
| "grad_norm": 0.16923174262046814, | |
| "learning_rate": 9.052193740010536e-05, | |
| "loss": 2.0011, | |
| "step": 29450 | |
| }, | |
| { | |
| "epoch": 3.2471106219042376, | |
| "grad_norm": 0.18323257565498352, | |
| "learning_rate": 9.002108175109877e-05, | |
| "loss": 1.9784, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.252614199229499, | |
| "grad_norm": 0.16737329959869385, | |
| "learning_rate": 8.952102082533855e-05, | |
| "loss": 2.0276, | |
| "step": 29550 | |
| }, | |
| { | |
| "epoch": 3.2581177765547604, | |
| "grad_norm": 0.17038094997406006, | |
| "learning_rate": 8.902176124865348e-05, | |
| "loss": 2.0315, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 3.263621353880022, | |
| "grad_norm": 0.16778729856014252, | |
| "learning_rate": 8.852330963625444e-05, | |
| "loss": 2.0139, | |
| "step": 29650 | |
| }, | |
| { | |
| "epoch": 3.2691249312052832, | |
| "grad_norm": 0.18061307072639465, | |
| "learning_rate": 8.802567259264674e-05, | |
| "loss": 2.002, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 3.2746285085305447, | |
| "grad_norm": 0.17191821336746216, | |
| "learning_rate": 8.752885671154253e-05, | |
| "loss": 2.0218, | |
| "step": 29750 | |
| }, | |
| { | |
| "epoch": 3.280132085855806, | |
| "grad_norm": 0.17957885563373566, | |
| "learning_rate": 8.703286857577378e-05, | |
| "loss": 2.0217, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 3.2856356631810675, | |
| "grad_norm": 0.16803069412708282, | |
| "learning_rate": 8.653771475720442e-05, | |
| "loss": 2.0185, | |
| "step": 29850 | |
| }, | |
| { | |
| "epoch": 3.291139240506329, | |
| "grad_norm": 0.1788654327392578, | |
| "learning_rate": 8.604340181664395e-05, | |
| "loss": 1.9973, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 3.2966428178315903, | |
| "grad_norm": 0.17996376752853394, | |
| "learning_rate": 8.554993630376006e-05, | |
| "loss": 2.0161, | |
| "step": 29950 | |
| }, | |
| { | |
| "epoch": 3.3021463951568517, | |
| "grad_norm": 0.18701910972595215, | |
| "learning_rate": 8.50573247569921e-05, | |
| "loss": 2.0156, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.3076499724821136, | |
| "grad_norm": 0.1713022142648697, | |
| "learning_rate": 8.45655737034641e-05, | |
| "loss": 2.0144, | |
| "step": 30050 | |
| }, | |
| { | |
| "epoch": 3.313153549807375, | |
| "grad_norm": 0.17666327953338623, | |
| "learning_rate": 8.407468965889878e-05, | |
| "loss": 2.0116, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 3.3186571271326364, | |
| "grad_norm": 0.18481209874153137, | |
| "learning_rate": 8.358467912753079e-05, | |
| "loss": 2.0149, | |
| "step": 30150 | |
| }, | |
| { | |
| "epoch": 3.324160704457898, | |
| "grad_norm": 0.17895914614200592, | |
| "learning_rate": 8.309554860202082e-05, | |
| "loss": 1.9913, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 3.329664281783159, | |
| "grad_norm": 0.18427938222885132, | |
| "learning_rate": 8.260730456336925e-05, | |
| "loss": 1.9913, | |
| "step": 30250 | |
| }, | |
| { | |
| "epoch": 3.3351678591084206, | |
| "grad_norm": 0.1806970238685608, | |
| "learning_rate": 8.211995348083067e-05, | |
| "loss": 2.0177, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 3.340671436433682, | |
| "grad_norm": 0.1735043078660965, | |
| "learning_rate": 8.163350181182787e-05, | |
| "loss": 2.0275, | |
| "step": 30350 | |
| }, | |
| { | |
| "epoch": 3.3461750137589434, | |
| "grad_norm": 0.17593735456466675, | |
| "learning_rate": 8.11479560018664e-05, | |
| "loss": 2.0525, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 3.351678591084205, | |
| "grad_norm": 0.1755988448858261, | |
| "learning_rate": 8.066332248444908e-05, | |
| "loss": 2.0182, | |
| "step": 30450 | |
| }, | |
| { | |
| "epoch": 3.3571821684094663, | |
| "grad_norm": 0.15805041790008545, | |
| "learning_rate": 8.017960768099091e-05, | |
| "loss": 1.9748, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.3626857457347277, | |
| "grad_norm": 0.18487168848514557, | |
| "learning_rate": 7.969681800073386e-05, | |
| "loss": 2.021, | |
| "step": 30550 | |
| }, | |
| { | |
| "epoch": 3.368189323059989, | |
| "grad_norm": 0.17120800912380219, | |
| "learning_rate": 7.921495984066194e-05, | |
| "loss": 2.0113, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 3.3736929003852505, | |
| "grad_norm": 0.1721143126487732, | |
| "learning_rate": 7.87340395854164e-05, | |
| "loss": 1.9936, | |
| "step": 30650 | |
| }, | |
| { | |
| "epoch": 3.379196477710512, | |
| "grad_norm": 0.17595061659812927, | |
| "learning_rate": 7.825406360721139e-05, | |
| "loss": 2.0044, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 3.3847000550357733, | |
| "grad_norm": 0.16751642525196075, | |
| "learning_rate": 7.777503826574925e-05, | |
| "loss": 1.9988, | |
| "step": 30750 | |
| }, | |
| { | |
| "epoch": 3.3902036323610347, | |
| "grad_norm": 0.17864260077476501, | |
| "learning_rate": 7.729696990813634e-05, | |
| "loss": 2.0189, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 3.395707209686296, | |
| "grad_norm": 0.1791946142911911, | |
| "learning_rate": 7.681986486879898e-05, | |
| "loss": 2.0038, | |
| "step": 30850 | |
| }, | |
| { | |
| "epoch": 3.4012107870115575, | |
| "grad_norm": 0.1704343855381012, | |
| "learning_rate": 7.634372946939945e-05, | |
| "loss": 1.9625, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 3.406714364336819, | |
| "grad_norm": 0.17794691026210785, | |
| "learning_rate": 7.586857001875235e-05, | |
| "loss": 2.0017, | |
| "step": 30950 | |
| }, | |
| { | |
| "epoch": 3.4122179416620804, | |
| "grad_norm": 0.17922665178775787, | |
| "learning_rate": 7.539439281274071e-05, | |
| "loss": 2.054, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.4177215189873418, | |
| "grad_norm": 0.174763485789299, | |
| "learning_rate": 7.492120413423296e-05, | |
| "loss": 2.0185, | |
| "step": 31050 | |
| }, | |
| { | |
| "epoch": 3.423225096312603, | |
| "grad_norm": 0.1687181144952774, | |
| "learning_rate": 7.444901025299941e-05, | |
| "loss": 2.0463, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 3.4287286736378646, | |
| "grad_norm": 0.17793361842632294, | |
| "learning_rate": 7.397781742562941e-05, | |
| "loss": 2.0296, | |
| "step": 31150 | |
| }, | |
| { | |
| "epoch": 3.434232250963126, | |
| "grad_norm": 0.16858656704425812, | |
| "learning_rate": 7.350763189544803e-05, | |
| "loss": 1.9916, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 3.4397358282883874, | |
| "grad_norm": 0.16184011101722717, | |
| "learning_rate": 7.303845989243384e-05, | |
| "loss": 2.0314, | |
| "step": 31250 | |
| }, | |
| { | |
| "epoch": 3.445239405613649, | |
| "grad_norm": 0.1733245998620987, | |
| "learning_rate": 7.257030763313607e-05, | |
| "loss": 2.0125, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 3.4507429829389102, | |
| "grad_norm": 0.18471761047840118, | |
| "learning_rate": 7.210318132059231e-05, | |
| "loss": 1.9691, | |
| "step": 31350 | |
| }, | |
| { | |
| "epoch": 3.4562465602641717, | |
| "grad_norm": 0.17260605096817017, | |
| "learning_rate": 7.16370871442462e-05, | |
| "loss": 2.0113, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 3.461750137589433, | |
| "grad_norm": 0.1758509874343872, | |
| "learning_rate": 7.117203127986569e-05, | |
| "loss": 2.0239, | |
| "step": 31450 | |
| }, | |
| { | |
| "epoch": 3.4672537149146945, | |
| "grad_norm": 0.18456335365772247, | |
| "learning_rate": 7.070801988946098e-05, | |
| "loss": 1.999, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.472757292239956, | |
| "grad_norm": 0.17995281517505646, | |
| "learning_rate": 7.024505912120293e-05, | |
| "loss": 2.0141, | |
| "step": 31550 | |
| }, | |
| { | |
| "epoch": 3.4782608695652173, | |
| "grad_norm": 0.16747882962226868, | |
| "learning_rate": 6.978315510934164e-05, | |
| "loss": 2.0073, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 3.4837644468904787, | |
| "grad_norm": 0.1658480316400528, | |
| "learning_rate": 6.932231397412522e-05, | |
| "loss": 2.0079, | |
| "step": 31650 | |
| }, | |
| { | |
| "epoch": 3.48926802421574, | |
| "grad_norm": 0.17987202107906342, | |
| "learning_rate": 6.886254182171836e-05, | |
| "loss": 2.0439, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 3.4947716015410015, | |
| "grad_norm": 0.1672779768705368, | |
| "learning_rate": 6.840384474412201e-05, | |
| "loss": 1.9878, | |
| "step": 31750 | |
| }, | |
| { | |
| "epoch": 3.500275178866263, | |
| "grad_norm": 0.1716667115688324, | |
| "learning_rate": 6.794622881909207e-05, | |
| "loss": 2.0248, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 3.5057787561915243, | |
| "grad_norm": 0.17986662685871124, | |
| "learning_rate": 6.748970011005924e-05, | |
| "loss": 1.99, | |
| "step": 31850 | |
| }, | |
| { | |
| "epoch": 3.5112823335167858, | |
| "grad_norm": 0.1808827668428421, | |
| "learning_rate": 6.703426466604858e-05, | |
| "loss": 1.9692, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 3.516785910842047, | |
| "grad_norm": 0.17262905836105347, | |
| "learning_rate": 6.65799285215993e-05, | |
| "loss": 1.9636, | |
| "step": 31950 | |
| }, | |
| { | |
| "epoch": 3.522289488167309, | |
| "grad_norm": 0.16857630014419556, | |
| "learning_rate": 6.612669769668489e-05, | |
| "loss": 1.9923, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.5277930654925704, | |
| "grad_norm": 0.17498353123664856, | |
| "learning_rate": 6.567457819663327e-05, | |
| "loss": 2.0248, | |
| "step": 32050 | |
| }, | |
| { | |
| "epoch": 3.533296642817832, | |
| "grad_norm": 0.16948480904102325, | |
| "learning_rate": 6.522357601204719e-05, | |
| "loss": 1.9966, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 3.5388002201430933, | |
| "grad_norm": 0.1695474237203598, | |
| "learning_rate": 6.477369711872502e-05, | |
| "loss": 1.994, | |
| "step": 32150 | |
| }, | |
| { | |
| "epoch": 3.5443037974683547, | |
| "grad_norm": 0.1761193871498108, | |
| "learning_rate": 6.432494747758146e-05, | |
| "loss": 1.9827, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 3.549807374793616, | |
| "grad_norm": 0.17206500470638275, | |
| "learning_rate": 6.387733303456858e-05, | |
| "loss": 1.9944, | |
| "step": 32250 | |
| }, | |
| { | |
| "epoch": 3.5553109521188775, | |
| "grad_norm": 0.19551701843738556, | |
| "learning_rate": 6.3430859720597e-05, | |
| "loss": 2.0118, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 3.560814529444139, | |
| "grad_norm": 0.17827004194259644, | |
| "learning_rate": 6.298553345145728e-05, | |
| "loss": 1.9953, | |
| "step": 32350 | |
| }, | |
| { | |
| "epoch": 3.5663181067694003, | |
| "grad_norm": 0.17359542846679688, | |
| "learning_rate": 6.254136012774166e-05, | |
| "loss": 2.0098, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 3.5718216840946617, | |
| "grad_norm": 0.16225001215934753, | |
| "learning_rate": 6.209834563476578e-05, | |
| "loss": 2.008, | |
| "step": 32450 | |
| }, | |
| { | |
| "epoch": 3.577325261419923, | |
| "grad_norm": 0.17888249456882477, | |
| "learning_rate": 6.165649584249079e-05, | |
| "loss": 1.9962, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.5828288387451845, | |
| "grad_norm": 0.16299229860305786, | |
| "learning_rate": 6.121581660544532e-05, | |
| "loss": 2.0387, | |
| "step": 32550 | |
| }, | |
| { | |
| "epoch": 3.588332416070446, | |
| "grad_norm": 0.16753186285495758, | |
| "learning_rate": 6.077631376264832e-05, | |
| "loss": 1.9662, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 3.5938359933957074, | |
| "grad_norm": 0.18166500329971313, | |
| "learning_rate": 6.033799313753129e-05, | |
| "loss": 1.997, | |
| "step": 32650 | |
| }, | |
| { | |
| "epoch": 3.5993395707209688, | |
| "grad_norm": 0.16691668331623077, | |
| "learning_rate": 5.990086053786139e-05, | |
| "loss": 2.0072, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 3.60484314804623, | |
| "grad_norm": 0.18671706318855286, | |
| "learning_rate": 5.946492175566438e-05, | |
| "loss": 2.0217, | |
| "step": 32750 | |
| }, | |
| { | |
| "epoch": 3.6103467253714916, | |
| "grad_norm": 0.16831424832344055, | |
| "learning_rate": 5.9030182567147905e-05, | |
| "loss": 2.0024, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 3.615850302696753, | |
| "grad_norm": 0.17562079429626465, | |
| "learning_rate": 5.859664873262483e-05, | |
| "loss": 2.0351, | |
| "step": 32850 | |
| }, | |
| { | |
| "epoch": 3.6213538800220144, | |
| "grad_norm": 0.18190255761146545, | |
| "learning_rate": 5.816432599643719e-05, | |
| "loss": 2.0117, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 3.626857457347276, | |
| "grad_norm": 0.1830035150051117, | |
| "learning_rate": 5.773322008687972e-05, | |
| "loss": 2.012, | |
| "step": 32950 | |
| }, | |
| { | |
| "epoch": 3.6323610346725372, | |
| "grad_norm": 0.17998354136943817, | |
| "learning_rate": 5.730333671612435e-05, | |
| "loss": 1.9977, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 3.6378646119977986, | |
| "grad_norm": 0.1839103251695633, | |
| "learning_rate": 5.687468158014421e-05, | |
| "loss": 2.003, | |
| "step": 33050 | |
| }, | |
| { | |
| "epoch": 3.64336818932306, | |
| "grad_norm": 0.18631495535373688, | |
| "learning_rate": 5.6447260358638285e-05, | |
| "loss": 2.0072, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 3.6488717666483215, | |
| "grad_norm": 0.19609171152114868, | |
| "learning_rate": 5.602107871495615e-05, | |
| "loss": 1.9912, | |
| "step": 33150 | |
| }, | |
| { | |
| "epoch": 3.654375343973583, | |
| "grad_norm": 0.16935598850250244, | |
| "learning_rate": 5.5596142296022954e-05, | |
| "loss": 2.0097, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 3.6598789212988443, | |
| "grad_norm": 0.16973347961902618, | |
| "learning_rate": 5.5172456732264435e-05, | |
| "loss": 2.0151, | |
| "step": 33250 | |
| }, | |
| { | |
| "epoch": 3.6653824986241057, | |
| "grad_norm": 0.17289844155311584, | |
| "learning_rate": 5.475002763753257e-05, | |
| "loss": 2.0393, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 3.670886075949367, | |
| "grad_norm": 0.1864277720451355, | |
| "learning_rate": 5.432886060903105e-05, | |
| "loss": 2.0069, | |
| "step": 33350 | |
| }, | |
| { | |
| "epoch": 3.6763896532746285, | |
| "grad_norm": 0.17381645739078522, | |
| "learning_rate": 5.390896122724111e-05, | |
| "loss": 2.0323, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 3.68189323059989, | |
| "grad_norm": 0.18182097375392914, | |
| "learning_rate": 5.349033505584767e-05, | |
| "loss": 2.0344, | |
| "step": 33450 | |
| }, | |
| { | |
| "epoch": 3.6873968079251513, | |
| "grad_norm": 0.17326848208904266, | |
| "learning_rate": 5.3072987641665373e-05, | |
| "loss": 1.9888, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 3.6929003852504128, | |
| "grad_norm": 0.17132443189620972, | |
| "learning_rate": 5.2656924514565515e-05, | |
| "loss": 1.9741, | |
| "step": 33550 | |
| }, | |
| { | |
| "epoch": 3.698403962575674, | |
| "grad_norm": 0.17236609756946564, | |
| "learning_rate": 5.224215118740243e-05, | |
| "loss": 1.9967, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 3.7039075399009356, | |
| "grad_norm": 0.16628102958202362, | |
| "learning_rate": 5.182867315594046e-05, | |
| "loss": 2.0036, | |
| "step": 33650 | |
| }, | |
| { | |
| "epoch": 3.709411117226197, | |
| "grad_norm": 0.17410129308700562, | |
| "learning_rate": 5.141649589878134e-05, | |
| "loss": 2.0173, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 3.7149146945514584, | |
| "grad_norm": 0.1739521622657776, | |
| "learning_rate": 5.100562487729148e-05, | |
| "loss": 1.9977, | |
| "step": 33750 | |
| }, | |
| { | |
| "epoch": 3.72041827187672, | |
| "grad_norm": 0.17988687753677368, | |
| "learning_rate": 5.059606553552956e-05, | |
| "loss": 1.9964, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 3.725921849201981, | |
| "grad_norm": 0.17351609468460083, | |
| "learning_rate": 5.018782330017448e-05, | |
| "loss": 2.0078, | |
| "step": 33850 | |
| }, | |
| { | |
| "epoch": 3.7314254265272426, | |
| "grad_norm": 0.176718607544899, | |
| "learning_rate": 4.9780903580453435e-05, | |
| "loss": 2.0036, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 3.736929003852504, | |
| "grad_norm": 0.17504329979419708, | |
| "learning_rate": 4.937531176807023e-05, | |
| "loss": 1.9837, | |
| "step": 33950 | |
| }, | |
| { | |
| "epoch": 3.7424325811777654, | |
| "grad_norm": 0.179831862449646, | |
| "learning_rate": 4.897105323713374e-05, | |
| "loss": 1.999, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 3.747936158503027, | |
| "grad_norm": 0.17752991616725922, | |
| "learning_rate": 4.8568133344086986e-05, | |
| "loss": 2.0194, | |
| "step": 34050 | |
| }, | |
| { | |
| "epoch": 3.7534397358282883, | |
| "grad_norm": 0.17888140678405762, | |
| "learning_rate": 4.8166557427635774e-05, | |
| "loss": 2.0108, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 3.7589433131535497, | |
| "grad_norm": 0.17585836350917816, | |
| "learning_rate": 4.776633080867834e-05, | |
| "loss": 2.0421, | |
| "step": 34150 | |
| }, | |
| { | |
| "epoch": 3.764446890478811, | |
| "grad_norm": 0.16860149800777435, | |
| "learning_rate": 4.736745879023457e-05, | |
| "loss": 2.0126, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 3.7699504678040725, | |
| "grad_norm": 0.17601364850997925, | |
| "learning_rate": 4.6969946657375865e-05, | |
| "loss": 2.0127, | |
| "step": 34250 | |
| }, | |
| { | |
| "epoch": 3.775454045129334, | |
| "grad_norm": 0.19177961349487305, | |
| "learning_rate": 4.657379967715511e-05, | |
| "loss": 2.0234, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 3.7809576224545953, | |
| "grad_norm": 0.19012008607387543, | |
| "learning_rate": 4.617902309853685e-05, | |
| "loss": 1.976, | |
| "step": 34350 | |
| }, | |
| { | |
| "epoch": 3.7864611997798567, | |
| "grad_norm": 0.18448469042778015, | |
| "learning_rate": 4.578562215232766e-05, | |
| "loss": 1.9897, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 3.791964777105118, | |
| "grad_norm": 0.18167300522327423, | |
| "learning_rate": 4.539360205110701e-05, | |
| "loss": 2.0088, | |
| "step": 34450 | |
| }, | |
| { | |
| "epoch": 3.7974683544303796, | |
| "grad_norm": 0.1806318610906601, | |
| "learning_rate": 4.500296798915813e-05, | |
| "loss": 1.9949, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.802971931755641, | |
| "grad_norm": 0.17676065862178802, | |
| "learning_rate": 4.461372514239911e-05, | |
| "loss": 1.9966, | |
| "step": 34550 | |
| }, | |
| { | |
| "epoch": 3.8084755090809024, | |
| "grad_norm": 0.16621781885623932, | |
| "learning_rate": 4.422587866831446e-05, | |
| "loss": 1.9952, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 3.813979086406164, | |
| "grad_norm": 0.16662217676639557, | |
| "learning_rate": 4.3839433705886646e-05, | |
| "loss": 2.0032, | |
| "step": 34650 | |
| }, | |
| { | |
| "epoch": 3.819482663731425, | |
| "grad_norm": 0.15986546874046326, | |
| "learning_rate": 4.3454395375527966e-05, | |
| "loss": 1.9974, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 3.8249862410566866, | |
| "grad_norm": 0.18636077642440796, | |
| "learning_rate": 4.307076877901293e-05, | |
| "loss": 1.9959, | |
| "step": 34750 | |
| }, | |
| { | |
| "epoch": 3.830489818381948, | |
| "grad_norm": 0.17939697206020355, | |
| "learning_rate": 4.268855899941039e-05, | |
| "loss": 1.9923, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 3.8359933957072094, | |
| "grad_norm": 0.1743779331445694, | |
| "learning_rate": 4.2307771101016366e-05, | |
| "loss": 2.0055, | |
| "step": 34850 | |
| }, | |
| { | |
| "epoch": 3.841496973032471, | |
| "grad_norm": 0.1767256259918213, | |
| "learning_rate": 4.192841012928691e-05, | |
| "loss": 1.9834, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 3.8470005503577323, | |
| "grad_norm": 0.1775825172662735, | |
| "learning_rate": 4.1550481110771224e-05, | |
| "loss": 2.01, | |
| "step": 34950 | |
| }, | |
| { | |
| "epoch": 3.852504127682994, | |
| "grad_norm": 0.1638861745595932, | |
| "learning_rate": 4.1173989053045024e-05, | |
| "loss": 2.0001, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.8580077050082555, | |
| "grad_norm": 0.1761082410812378, | |
| "learning_rate": 4.079893894464429e-05, | |
| "loss": 2.0041, | |
| "step": 35050 | |
| }, | |
| { | |
| "epoch": 3.863511282333517, | |
| "grad_norm": 0.17102594673633575, | |
| "learning_rate": 4.042533575499914e-05, | |
| "loss": 2.003, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 3.8690148596587783, | |
| "grad_norm": 0.19267229735851288, | |
| "learning_rate": 4.005318443436779e-05, | |
| "loss": 1.973, | |
| "step": 35150 | |
| }, | |
| { | |
| "epoch": 3.8745184369840397, | |
| "grad_norm": 0.16570790112018585, | |
| "learning_rate": 3.968248991377137e-05, | |
| "loss": 1.9834, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 3.880022014309301, | |
| "grad_norm": 0.16816405951976776, | |
| "learning_rate": 3.9313257104928094e-05, | |
| "loss": 2.0011, | |
| "step": 35250 | |
| }, | |
| { | |
| "epoch": 3.8855255916345626, | |
| "grad_norm": 0.16395322978496552, | |
| "learning_rate": 3.8945490900188616e-05, | |
| "loss": 1.9771, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 3.891029168959824, | |
| "grad_norm": 0.17911794781684875, | |
| "learning_rate": 3.857919617247097e-05, | |
| "loss": 2.0, | |
| "step": 35350 | |
| }, | |
| { | |
| "epoch": 3.8965327462850854, | |
| "grad_norm": 0.18344633281230927, | |
| "learning_rate": 3.821437777519607e-05, | |
| "loss": 1.9952, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 3.902036323610347, | |
| "grad_norm": 0.1782706081867218, | |
| "learning_rate": 3.785104054222334e-05, | |
| "loss": 1.9952, | |
| "step": 35450 | |
| }, | |
| { | |
| "epoch": 3.907539900935608, | |
| "grad_norm": 0.17840951681137085, | |
| "learning_rate": 3.74891892877868e-05, | |
| "loss": 1.9994, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.9130434782608696, | |
| "grad_norm": 0.19014611840248108, | |
| "learning_rate": 3.7128828806431046e-05, | |
| "loss": 2.0023, | |
| "step": 35550 | |
| }, | |
| { | |
| "epoch": 3.918547055586131, | |
| "grad_norm": 0.17002084851264954, | |
| "learning_rate": 3.6769963872947996e-05, | |
| "loss": 2.0068, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 3.9240506329113924, | |
| "grad_norm": 0.16855107247829437, | |
| "learning_rate": 3.641259924231345e-05, | |
| "loss": 1.9847, | |
| "step": 35650 | |
| }, | |
| { | |
| "epoch": 3.929554210236654, | |
| "grad_norm": 0.190132737159729, | |
| "learning_rate": 3.605673964962414e-05, | |
| "loss": 1.9955, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 3.9350577875619153, | |
| "grad_norm": 0.1558249294757843, | |
| "learning_rate": 3.570238981003498e-05, | |
| "loss": 1.9861, | |
| "step": 35750 | |
| }, | |
| { | |
| "epoch": 3.9405613648871767, | |
| "grad_norm": 0.18199962377548218, | |
| "learning_rate": 3.53495544186966e-05, | |
| "loss": 2.0136, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 3.946064942212438, | |
| "grad_norm": 0.17813271284103394, | |
| "learning_rate": 3.499823815069306e-05, | |
| "loss": 2.0012, | |
| "step": 35850 | |
| }, | |
| { | |
| "epoch": 3.9515685195376995, | |
| "grad_norm": 0.18256065249443054, | |
| "learning_rate": 3.464844566098008e-05, | |
| "loss": 1.9892, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 3.957072096862961, | |
| "grad_norm": 0.1784873753786087, | |
| "learning_rate": 3.4300181584323126e-05, | |
| "loss": 1.9983, | |
| "step": 35950 | |
| }, | |
| { | |
| "epoch": 3.9625756741882223, | |
| "grad_norm": 0.18530665338039398, | |
| "learning_rate": 3.3953450535236226e-05, | |
| "loss": 2.0357, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.9680792515134837, | |
| "grad_norm": 0.17063435912132263, | |
| "learning_rate": 3.360825710792068e-05, | |
| "loss": 2.0029, | |
| "step": 36050 | |
| }, | |
| { | |
| "epoch": 3.973582828838745, | |
| "grad_norm": 0.1718268245458603, | |
| "learning_rate": 3.326460587620427e-05, | |
| "loss": 1.9762, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 3.9790864061640066, | |
| "grad_norm": 0.15901614725589752, | |
| "learning_rate": 3.292250139348057e-05, | |
| "loss": 1.9952, | |
| "step": 36150 | |
| }, | |
| { | |
| "epoch": 3.984589983489268, | |
| "grad_norm": 0.18968409299850464, | |
| "learning_rate": 3.258194819264871e-05, | |
| "loss": 2.0127, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 3.9900935608145294, | |
| "grad_norm": 0.18861430883407593, | |
| "learning_rate": 3.2242950786053296e-05, | |
| "loss": 1.9981, | |
| "step": 36250 | |
| }, | |
| { | |
| "epoch": 3.995597138139791, | |
| "grad_norm": 0.1702040284872055, | |
| "learning_rate": 3.1905513665424445e-05, | |
| "loss": 2.0023, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 4.001100715465053, | |
| "grad_norm": 0.18254053592681885, | |
| "learning_rate": 3.156964130181859e-05, | |
| "loss": 2.0038, | |
| "step": 36350 | |
| }, | |
| { | |
| "epoch": 4.006604292790314, | |
| "grad_norm": 0.18867474794387817, | |
| "learning_rate": 3.1235338145558925e-05, | |
| "loss": 1.9139, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 4.0121078701155755, | |
| "grad_norm": 0.1872694045305252, | |
| "learning_rate": 3.090260862617671e-05, | |
| "loss": 1.9484, | |
| "step": 36450 | |
| }, | |
| { | |
| "epoch": 4.017611447440837, | |
| "grad_norm": 0.17595593631267548, | |
| "learning_rate": 3.057145715235236e-05, | |
| "loss": 1.9543, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 4.023115024766098, | |
| "grad_norm": 0.17811767756938934, | |
| "learning_rate": 3.0241888111857145e-05, | |
| "loss": 1.9268, | |
| "step": 36550 | |
| }, | |
| { | |
| "epoch": 4.02861860209136, | |
| "grad_norm": 0.17598308622837067, | |
| "learning_rate": 2.9913905871495037e-05, | |
| "loss": 1.9439, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 4.034122179416621, | |
| "grad_norm": 0.19237573444843292, | |
| "learning_rate": 2.9587514777044874e-05, | |
| "loss": 1.9044, | |
| "step": 36650 | |
| }, | |
| { | |
| "epoch": 4.0396257567418825, | |
| "grad_norm": 0.1873626410961151, | |
| "learning_rate": 2.926271915320259e-05, | |
| "loss": 1.943, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 4.045129334067144, | |
| "grad_norm": 0.2001618891954422, | |
| "learning_rate": 2.893952330352423e-05, | |
| "loss": 1.9311, | |
| "step": 36750 | |
| }, | |
| { | |
| "epoch": 4.050632911392405, | |
| "grad_norm": 0.1838996410369873, | |
| "learning_rate": 2.86179315103687e-05, | |
| "loss": 1.9373, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 4.056136488717667, | |
| "grad_norm": 0.16863055527210236, | |
| "learning_rate": 2.8297948034841074e-05, | |
| "loss": 1.9289, | |
| "step": 36850 | |
| }, | |
| { | |
| "epoch": 4.061640066042928, | |
| "grad_norm": 0.19162894785404205, | |
| "learning_rate": 2.797957711673619e-05, | |
| "loss": 1.9293, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 4.06714364336819, | |
| "grad_norm": 0.1854933500289917, | |
| "learning_rate": 2.7662822974482423e-05, | |
| "loss": 1.9336, | |
| "step": 36950 | |
| }, | |
| { | |
| "epoch": 4.072647220693451, | |
| "grad_norm": 0.18568897247314453, | |
| "learning_rate": 2.7347689805085733e-05, | |
| "loss": 1.9431, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 4.078150798018712, | |
| "grad_norm": 0.1947336494922638, | |
| "learning_rate": 2.7034181784074205e-05, | |
| "loss": 1.9345, | |
| "step": 37050 | |
| }, | |
| { | |
| "epoch": 4.083654375343974, | |
| "grad_norm": 0.18774710595607758, | |
| "learning_rate": 2.672230306544254e-05, | |
| "loss": 1.9364, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 4.089157952669235, | |
| "grad_norm": 0.1984746754169464, | |
| "learning_rate": 2.6412057781597223e-05, | |
| "loss": 1.954, | |
| "step": 37150 | |
| }, | |
| { | |
| "epoch": 4.094661529994497, | |
| "grad_norm": 0.1908876746892929, | |
| "learning_rate": 2.61034500433016e-05, | |
| "loss": 1.9422, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 4.100165107319758, | |
| "grad_norm": 0.18860432505607605, | |
| "learning_rate": 2.579648393962147e-05, | |
| "loss": 1.9649, | |
| "step": 37250 | |
| }, | |
| { | |
| "epoch": 4.105668684645019, | |
| "grad_norm": 0.18678942322731018, | |
| "learning_rate": 2.5491163537870922e-05, | |
| "loss": 1.9544, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 4.111172261970281, | |
| "grad_norm": 0.19560185074806213, | |
| "learning_rate": 2.5187492883558435e-05, | |
| "loss": 1.9396, | |
| "step": 37350 | |
| }, | |
| { | |
| "epoch": 4.116675839295542, | |
| "grad_norm": 0.1825692355632782, | |
| "learning_rate": 2.488547600033317e-05, | |
| "loss": 1.949, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 4.122179416620804, | |
| "grad_norm": 0.1951771229505539, | |
| "learning_rate": 2.4585116889931873e-05, | |
| "loss": 1.9482, | |
| "step": 37450 | |
| }, | |
| { | |
| "epoch": 4.127682993946065, | |
| "grad_norm": 0.1859281212091446, | |
| "learning_rate": 2.4286419532125668e-05, | |
| "loss": 1.9512, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 4.1331865712713265, | |
| "grad_norm": 0.19174982607364655, | |
| "learning_rate": 2.3989387884667365e-05, | |
| "loss": 1.9358, | |
| "step": 37550 | |
| }, | |
| { | |
| "epoch": 4.138690148596588, | |
| "grad_norm": 0.187012180685997, | |
| "learning_rate": 2.369402588323908e-05, | |
| "loss": 1.9114, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 4.144193725921849, | |
| "grad_norm": 0.20616699755191803, | |
| "learning_rate": 2.340033744140004e-05, | |
| "loss": 1.8965, | |
| "step": 37650 | |
| }, | |
| { | |
| "epoch": 4.149697303247111, | |
| "grad_norm": 0.17636051774024963, | |
| "learning_rate": 2.3108326450534725e-05, | |
| "loss": 1.9549, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 4.155200880572372, | |
| "grad_norm": 0.19219453632831573, | |
| "learning_rate": 2.281799677980136e-05, | |
| "loss": 1.8911, | |
| "step": 37750 | |
| }, | |
| { | |
| "epoch": 4.1607044578976335, | |
| "grad_norm": 0.19742678105831146, | |
| "learning_rate": 2.2529352276080513e-05, | |
| "loss": 1.9562, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 4.166208035222895, | |
| "grad_norm": 0.18049471080303192, | |
| "learning_rate": 2.2242396763924292e-05, | |
| "loss": 1.9169, | |
| "step": 37850 | |
| }, | |
| { | |
| "epoch": 4.171711612548156, | |
| "grad_norm": 0.2108810693025589, | |
| "learning_rate": 2.1957134045505588e-05, | |
| "loss": 1.9363, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 4.177215189873418, | |
| "grad_norm": 0.20711076259613037, | |
| "learning_rate": 2.1673567900567667e-05, | |
| "loss": 1.9685, | |
| "step": 37950 | |
| }, | |
| { | |
| "epoch": 4.182718767198679, | |
| "grad_norm": 0.18670059740543365, | |
| "learning_rate": 2.139170208637415e-05, | |
| "loss": 1.9537, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 4.188222344523941, | |
| "grad_norm": 0.17974555492401123, | |
| "learning_rate": 2.1111540337659227e-05, | |
| "loss": 1.9476, | |
| "step": 38050 | |
| }, | |
| { | |
| "epoch": 4.193725921849202, | |
| "grad_norm": 0.18271256983280182, | |
| "learning_rate": 2.083308636657811e-05, | |
| "loss": 1.9535, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 4.199229499174463, | |
| "grad_norm": 0.19260470569133759, | |
| "learning_rate": 2.0556343862657855e-05, | |
| "loss": 1.9477, | |
| "step": 38150 | |
| }, | |
| { | |
| "epoch": 4.204733076499725, | |
| "grad_norm": 0.19317923486232758, | |
| "learning_rate": 2.0281316492748595e-05, | |
| "loss": 1.9518, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 4.210236653824986, | |
| "grad_norm": 0.1854427605867386, | |
| "learning_rate": 2.0008007900974738e-05, | |
| "loss": 1.9245, | |
| "step": 38250 | |
| }, | |
| { | |
| "epoch": 4.215740231150248, | |
| "grad_norm": 0.1781974732875824, | |
| "learning_rate": 1.9736421708686934e-05, | |
| "loss": 1.9387, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 4.221243808475509, | |
| "grad_norm": 0.18503354489803314, | |
| "learning_rate": 1.946656151441389e-05, | |
| "loss": 1.9683, | |
| "step": 38350 | |
| }, | |
| { | |
| "epoch": 4.2267473858007705, | |
| "grad_norm": 0.19728437066078186, | |
| "learning_rate": 1.9198430893814798e-05, | |
| "loss": 1.9546, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 4.232250963126032, | |
| "grad_norm": 0.1957421451807022, | |
| "learning_rate": 1.89320333996319e-05, | |
| "loss": 1.9526, | |
| "step": 38450 | |
| }, | |
| { | |
| "epoch": 4.237754540451293, | |
| "grad_norm": 0.18944865465164185, | |
| "learning_rate": 1.866737256164349e-05, | |
| "loss": 1.9467, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 4.243258117776555, | |
| "grad_norm": 0.1760573536157608, | |
| "learning_rate": 1.8404451886617005e-05, | |
| "loss": 1.9142, | |
| "step": 38550 | |
| }, | |
| { | |
| "epoch": 4.248761695101816, | |
| "grad_norm": 0.19618487358093262, | |
| "learning_rate": 1.8143274858262702e-05, | |
| "loss": 1.9491, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 4.2542652724270775, | |
| "grad_norm": 0.19029423594474792, | |
| "learning_rate": 1.7883844937187495e-05, | |
| "loss": 1.9553, | |
| "step": 38650 | |
| }, | |
| { | |
| "epoch": 4.259768849752339, | |
| "grad_norm": 0.19688698649406433, | |
| "learning_rate": 1.762616556084891e-05, | |
| "loss": 1.9698, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 4.2652724270776, | |
| "grad_norm": 0.16535942256450653, | |
| "learning_rate": 1.737024014350983e-05, | |
| "loss": 1.9395, | |
| "step": 38750 | |
| }, | |
| { | |
| "epoch": 4.270776004402862, | |
| "grad_norm": 0.19671253859996796, | |
| "learning_rate": 1.7116072076193e-05, | |
| "loss": 1.9343, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 4.276279581728123, | |
| "grad_norm": 0.18672049045562744, | |
| "learning_rate": 1.6863664726636278e-05, | |
| "loss": 1.939, | |
| "step": 38850 | |
| }, | |
| { | |
| "epoch": 4.281783159053385, | |
| "grad_norm": 0.19714199006557465, | |
| "learning_rate": 1.6613021439247914e-05, | |
| "loss": 1.9455, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 4.287286736378646, | |
| "grad_norm": 0.2018548846244812, | |
| "learning_rate": 1.6364145535062172e-05, | |
| "loss": 1.9169, | |
| "step": 38950 | |
| }, | |
| { | |
| "epoch": 4.292790313703907, | |
| "grad_norm": 0.19057820737361908, | |
| "learning_rate": 1.611704031169555e-05, | |
| "loss": 1.956, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 4.298293891029169, | |
| "grad_norm": 0.18719470500946045, | |
| "learning_rate": 1.5871709043302876e-05, | |
| "loss": 1.9483, | |
| "step": 39050 | |
| }, | |
| { | |
| "epoch": 4.30379746835443, | |
| "grad_norm": 0.19479648768901825, | |
| "learning_rate": 1.5628154980533984e-05, | |
| "loss": 1.9507, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 4.309301045679692, | |
| "grad_norm": 0.2034020870923996, | |
| "learning_rate": 1.538638135049071e-05, | |
| "loss": 1.9343, | |
| "step": 39150 | |
| }, | |
| { | |
| "epoch": 4.314804623004953, | |
| "grad_norm": 0.18177463114261627, | |
| "learning_rate": 1.5146391356684029e-05, | |
| "loss": 1.9481, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 4.3203082003302145, | |
| "grad_norm": 0.1850568652153015, | |
| "learning_rate": 1.4908188178991714e-05, | |
| "loss": 1.9324, | |
| "step": 39250 | |
| }, | |
| { | |
| "epoch": 4.325811777655476, | |
| "grad_norm": 0.1880549043416977, | |
| "learning_rate": 1.4671774973616102e-05, | |
| "loss": 1.9406, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 4.331315354980737, | |
| "grad_norm": 0.18556420505046844, | |
| "learning_rate": 1.4437154873042279e-05, | |
| "loss": 1.9516, | |
| "step": 39350 | |
| }, | |
| { | |
| "epoch": 4.336818932305999, | |
| "grad_norm": 0.19034932553768158, | |
| "learning_rate": 1.420433098599672e-05, | |
| "loss": 1.9477, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 4.34232250963126, | |
| "grad_norm": 0.18247225880622864, | |
| "learning_rate": 1.3973306397405909e-05, | |
| "loss": 1.9552, | |
| "step": 39450 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 0.19472351670265198, | |
| "learning_rate": 1.3744084168355612e-05, | |
| "loss": 1.9543, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 4.353329664281783, | |
| "grad_norm": 0.18814486265182495, | |
| "learning_rate": 1.3516667336050219e-05, | |
| "loss": 1.956, | |
| "step": 39550 | |
| }, | |
| { | |
| "epoch": 4.358833241607044, | |
| "grad_norm": 0.18639877438545227, | |
| "learning_rate": 1.3291058913772517e-05, | |
| "loss": 1.9262, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 4.364336818932306, | |
| "grad_norm": 0.19503189623355865, | |
| "learning_rate": 1.306726189084385e-05, | |
| "loss": 1.9768, | |
| "step": 39650 | |
| }, | |
| { | |
| "epoch": 4.369840396257567, | |
| "grad_norm": 0.1823161244392395, | |
| "learning_rate": 1.2845279232584354e-05, | |
| "loss": 1.9588, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 4.375343973582829, | |
| "grad_norm": 0.19661200046539307, | |
| "learning_rate": 1.2625113880273786e-05, | |
| "loss": 1.9576, | |
| "step": 39750 | |
| }, | |
| { | |
| "epoch": 4.38084755090809, | |
| "grad_norm": 0.17353294789791107, | |
| "learning_rate": 1.2406768751112572e-05, | |
| "loss": 1.9249, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 4.386351128233351, | |
| "grad_norm": 0.1832091063261032, | |
| "learning_rate": 1.2190246738183074e-05, | |
| "loss": 1.9169, | |
| "step": 39850 | |
| }, | |
| { | |
| "epoch": 4.391854705558613, | |
| "grad_norm": 0.18823185563087463, | |
| "learning_rate": 1.197555071041122e-05, | |
| "loss": 1.9368, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 4.397358282883874, | |
| "grad_norm": 0.1941109150648117, | |
| "learning_rate": 1.1762683512528642e-05, | |
| "loss": 1.96, | |
| "step": 39950 | |
| }, | |
| { | |
| "epoch": 4.402861860209136, | |
| "grad_norm": 0.1973351240158081, | |
| "learning_rate": 1.155164796503486e-05, | |
| "loss": 1.9304, | |
| "step": 40000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 45425, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 10000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.91746903834624e+17, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |