{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.015184011236168315, "eval_steps": 500, "global_step": 12800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0001999999999957118, "loss": 3.6491, "step": 5 }, { "epoch": 0.0, "learning_rate": 0.0001999999999827435, "loss": 3.5313, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.0001999999999610949, "loss": 3.4388, "step": 15 }, { "epoch": 0.0, "learning_rate": 0.00019999999993076608, "loss": 3.1787, "step": 20 }, { "epoch": 0.0, "learning_rate": 0.00019999999989175698, "loss": 3.0979, "step": 25 }, { "epoch": 0.0, "learning_rate": 0.00019999999984406761, "loss": 3.183, "step": 30 }, { "epoch": 0.0, "learning_rate": 0.000199999999787698, "loss": 3.24, "step": 35 }, { "epoch": 0.0, "learning_rate": 0.00019999999972264808, "loss": 3.2554, "step": 40 }, { "epoch": 0.0, "learning_rate": 0.00019999999964891794, "loss": 3.1969, "step": 45 }, { "epoch": 0.0, "learning_rate": 0.00019999999956650752, "loss": 3.013, "step": 50 }, { "epoch": 0.0, "learning_rate": 0.00019999999947541687, "loss": 2.9786, "step": 55 }, { "epoch": 0.0, "learning_rate": 0.00019999999937564593, "loss": 3.3584, "step": 60 }, { "epoch": 0.0, "learning_rate": 0.00019999999926719473, "loss": 3.3326, "step": 65 }, { "epoch": 0.0, "learning_rate": 0.00019999999915006324, "loss": 3.1324, "step": 70 }, { "epoch": 0.0, "learning_rate": 0.0001999999990242515, "loss": 3.3203, "step": 75 }, { "epoch": 0.0, "learning_rate": 0.00019999999888975953, "loss": 3.05, "step": 80 }, { "epoch": 0.0, "learning_rate": 0.00019999999874658727, "loss": 2.9144, "step": 85 }, { "epoch": 0.0, "learning_rate": 0.00019999999859473478, "loss": 3.1871, "step": 90 }, { "epoch": 0.0, "learning_rate": 0.00019999999843420198, "loss": 3.1406, "step": 95 }, { "epoch": 0.0, "learning_rate": 0.00019999999826498895, "loss": 2.7888, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.00019999999808709563, "loss": 2.8994, "step": 105 }, { "epoch": 0.0, "learning_rate": 0.00019999999790052206, "loss": 3.0518, "step": 110 }, { "epoch": 0.0, "learning_rate": 0.00019999999770526822, "loss": 3.0918, "step": 115 }, { "epoch": 0.0, "learning_rate": 0.00019999999750133416, "loss": 3.0768, "step": 120 }, { "epoch": 0.0, "learning_rate": 0.00019999999728871978, "loss": 2.9068, "step": 125 }, { "epoch": 0.0, "learning_rate": 0.00019999999706742517, "loss": 3.1022, "step": 130 }, { "epoch": 0.0, "learning_rate": 0.0001999999968374503, "loss": 3.0395, "step": 135 }, { "epoch": 0.0, "learning_rate": 0.00019999999659879515, "loss": 2.9104, "step": 140 }, { "epoch": 0.0, "learning_rate": 0.00019999999635145975, "loss": 3.0261, "step": 145 }, { "epoch": 0.0, "learning_rate": 0.00019999999609544408, "loss": 2.9319, "step": 150 }, { "epoch": 0.0, "learning_rate": 0.00019999999583074815, "loss": 3.0491, "step": 155 }, { "epoch": 0.0, "learning_rate": 0.00019999999555737197, "loss": 3.0118, "step": 160 }, { "epoch": 0.0, "learning_rate": 0.0001999999952753155, "loss": 2.9509, "step": 165 }, { "epoch": 0.0, "learning_rate": 0.0001999999949845788, "loss": 3.0506, "step": 170 }, { "epoch": 0.0, "learning_rate": 0.0001999999946851618, "loss": 2.9915, "step": 175 }, { "epoch": 0.0, "learning_rate": 0.00019999999437706457, "loss": 3.0174, "step": 180 }, { "epoch": 0.0, "learning_rate": 0.00019999999406028707, "loss": 3.0289, "step": 185 }, { "epoch": 0.0, "learning_rate": 0.0001999999937348293, "loss": 2.9562, "step": 190 }, { "epoch": 0.0, "learning_rate": 0.00019999999340069126, "loss": 3.1903, "step": 195 }, { "epoch": 0.0, "learning_rate": 0.00019999999305787301, "loss": 2.9751, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.00019999999270637443, "loss": 2.8893, "step": 205 }, { "epoch": 0.0, "learning_rate": 0.0001999999923461956, "loss": 2.8216, "step": 210 }, { "epoch": 0.0, "learning_rate": 0.00019999999197733653, "loss": 3.0853, "step": 215 }, { "epoch": 0.0, "learning_rate": 0.00019999999159979722, "loss": 3.1333, "step": 220 }, { "epoch": 0.0, "learning_rate": 0.00019999999121357763, "loss": 2.9536, "step": 225 }, { "epoch": 0.0, "learning_rate": 0.00019999999081867775, "loss": 2.937, "step": 230 }, { "epoch": 0.0, "learning_rate": 0.00019999999041509764, "loss": 2.8704, "step": 235 }, { "epoch": 0.0, "learning_rate": 0.00019999999000283722, "loss": 3.06, "step": 240 }, { "epoch": 0.0, "learning_rate": 0.0001999999895818966, "loss": 2.8869, "step": 245 }, { "epoch": 0.0, "learning_rate": 0.00019999998915227568, "loss": 2.8376, "step": 250 }, { "epoch": 0.0, "learning_rate": 0.0001999999887139745, "loss": 2.8362, "step": 255 }, { "epoch": 0.0, "learning_rate": 0.00019999998826699308, "loss": 2.9185, "step": 260 }, { "epoch": 0.0, "learning_rate": 0.00019999998781133137, "loss": 2.8776, "step": 265 }, { "epoch": 0.0, "learning_rate": 0.00019999998734698943, "loss": 2.9575, "step": 270 }, { "epoch": 0.0, "learning_rate": 0.00019999998687396722, "loss": 3.0368, "step": 275 }, { "epoch": 0.0, "learning_rate": 0.0001999999863922647, "loss": 2.8667, "step": 280 }, { "epoch": 0.0, "learning_rate": 0.000199999985901882, "loss": 2.8613, "step": 285 }, { "epoch": 0.0, "learning_rate": 0.000199999985402819, "loss": 2.9864, "step": 290 }, { "epoch": 0.0, "learning_rate": 0.00019999998489507573, "loss": 2.6927, "step": 295 }, { "epoch": 0.0, "learning_rate": 0.0001999999843786522, "loss": 2.7765, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.0001999999838535484, "loss": 2.8588, "step": 305 }, { "epoch": 0.0, "learning_rate": 0.00019999998331976434, "loss": 2.7963, "step": 310 }, { "epoch": 0.0, "learning_rate": 0.00019999998277730005, "loss": 2.7799, "step": 315 }, { "epoch": 0.0, "learning_rate": 0.0001999999822261555, "loss": 2.9825, "step": 320 }, { "epoch": 0.0, "learning_rate": 0.00019999998166633063, "loss": 2.9661, "step": 325 }, { "epoch": 0.0, "learning_rate": 0.00019999998109782553, "loss": 2.821, "step": 330 }, { "epoch": 0.0, "learning_rate": 0.0001999999805206402, "loss": 2.8383, "step": 335 }, { "epoch": 0.0, "learning_rate": 0.00019999997993477457, "loss": 2.9595, "step": 340 }, { "epoch": 0.0, "learning_rate": 0.0001999999793402287, "loss": 2.8529, "step": 345 }, { "epoch": 0.0, "learning_rate": 0.00019999997873700257, "loss": 2.7759, "step": 350 }, { "epoch": 0.0, "learning_rate": 0.00019999997812509616, "loss": 2.8958, "step": 355 }, { "epoch": 0.0, "learning_rate": 0.0001999999775045095, "loss": 2.9969, "step": 360 }, { "epoch": 0.0, "learning_rate": 0.00019999997687524256, "loss": 2.9635, "step": 365 }, { "epoch": 0.0, "learning_rate": 0.0001999999762372954, "loss": 3.0617, "step": 370 }, { "epoch": 0.0, "learning_rate": 0.00019999997559066795, "loss": 2.7889, "step": 375 }, { "epoch": 0.0, "learning_rate": 0.00019999997493536027, "loss": 2.9124, "step": 380 }, { "epoch": 0.0, "learning_rate": 0.00019999997427137229, "loss": 2.866, "step": 385 }, { "epoch": 0.0, "learning_rate": 0.00019999997359870407, "loss": 2.8843, "step": 390 }, { "epoch": 0.0, "learning_rate": 0.00019999997291735556, "loss": 2.8506, "step": 395 }, { "epoch": 0.0, "learning_rate": 0.00019999997222732682, "loss": 3.0383, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.00019999997152861783, "loss": 2.8744, "step": 405 }, { "epoch": 0.0, "learning_rate": 0.00019999997082122858, "loss": 2.8794, "step": 410 }, { "epoch": 0.0, "learning_rate": 0.00019999997010515904, "loss": 2.8376, "step": 415 }, { "epoch": 0.0, "learning_rate": 0.00019999996938040927, "loss": 2.8557, "step": 420 }, { "epoch": 0.0, "learning_rate": 0.00019999996864697922, "loss": 2.6925, "step": 425 }, { "epoch": 0.0, "learning_rate": 0.0001999999679048689, "loss": 2.5931, "step": 430 }, { "epoch": 0.0, "learning_rate": 0.00019999996715407833, "loss": 3.012, "step": 435 }, { "epoch": 0.0, "learning_rate": 0.0001999999663946075, "loss": 2.6536, "step": 440 }, { "epoch": 0.0, "learning_rate": 0.00019999996562645645, "loss": 2.8957, "step": 445 }, { "epoch": 0.0, "learning_rate": 0.00019999996484962508, "loss": 2.7622, "step": 450 }, { "epoch": 0.0, "learning_rate": 0.00019999996406411347, "loss": 2.8935, "step": 455 }, { "epoch": 0.0, "learning_rate": 0.0001999999632699216, "loss": 2.9804, "step": 460 }, { "epoch": 0.0, "learning_rate": 0.0001999999624670495, "loss": 2.8613, "step": 465 }, { "epoch": 0.0, "learning_rate": 0.00019999996165549712, "loss": 3.0941, "step": 470 }, { "epoch": 0.0, "learning_rate": 0.00019999996083526445, "loss": 2.6941, "step": 475 }, { "epoch": 0.0, "learning_rate": 0.00019999996000635156, "loss": 2.9191, "step": 480 }, { "epoch": 0.0, "learning_rate": 0.0001999999591687584, "loss": 3.0899, "step": 485 }, { "epoch": 0.0, "learning_rate": 0.00019999995832248497, "loss": 2.9366, "step": 490 }, { "epoch": 0.0, "learning_rate": 0.0001999999574675313, "loss": 2.9312, "step": 495 }, { "epoch": 0.0, "learning_rate": 0.00019999995660389735, "loss": 2.9255, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.00019999995573158317, "loss": 2.8753, "step": 505 }, { "epoch": 0.0, "learning_rate": 0.0001999999548505887, "loss": 2.8521, "step": 510 }, { "epoch": 0.0, "learning_rate": 0.000199999953960914, "loss": 2.8991, "step": 515 }, { "epoch": 0.0, "learning_rate": 0.00019999995306255902, "loss": 2.8577, "step": 520 }, { "epoch": 0.0, "learning_rate": 0.00019999995215552377, "loss": 2.61, "step": 525 }, { "epoch": 0.0, "learning_rate": 0.00019999995123980827, "loss": 2.8954, "step": 530 }, { "epoch": 0.0, "learning_rate": 0.00019999995031541254, "loss": 2.9163, "step": 535 }, { "epoch": 0.0, "learning_rate": 0.00019999994938233652, "loss": 2.8967, "step": 540 }, { "epoch": 0.0, "learning_rate": 0.00019999994844058024, "loss": 2.7085, "step": 545 }, { "epoch": 0.0, "learning_rate": 0.00019999994749014374, "loss": 3.0516, "step": 550 }, { "epoch": 0.0, "learning_rate": 0.00019999994653102695, "loss": 2.7746, "step": 555 }, { "epoch": 0.0, "learning_rate": 0.0001999999455632299, "loss": 2.742, "step": 560 }, { "epoch": 0.0, "learning_rate": 0.0001999999445867526, "loss": 2.8224, "step": 565 }, { "epoch": 0.0, "learning_rate": 0.00019999994360159505, "loss": 2.9539, "step": 570 }, { "epoch": 0.0, "learning_rate": 0.00019999994260775722, "loss": 2.7028, "step": 575 }, { "epoch": 0.0, "learning_rate": 0.00019999994160523917, "loss": 2.763, "step": 580 }, { "epoch": 0.0, "learning_rate": 0.00019999994059404083, "loss": 2.6874, "step": 585 }, { "epoch": 0.0, "learning_rate": 0.00019999993957416225, "loss": 2.9416, "step": 590 }, { "epoch": 0.0, "learning_rate": 0.0001999999385456034, "loss": 2.7574, "step": 595 }, { "epoch": 0.0, "learning_rate": 0.00019999993750836428, "loss": 2.9096, "step": 600 }, { "epoch": 0.0, "learning_rate": 0.00019999993646244494, "loss": 2.6635, "step": 605 }, { "epoch": 0.0, "learning_rate": 0.0001999999354078453, "loss": 2.8138, "step": 610 }, { "epoch": 0.0, "learning_rate": 0.00019999993434456544, "loss": 2.8623, "step": 615 }, { "epoch": 0.0, "learning_rate": 0.0001999999332726053, "loss": 2.716, "step": 620 }, { "epoch": 0.0, "learning_rate": 0.00019999993219196492, "loss": 2.8355, "step": 625 }, { "epoch": 0.0, "learning_rate": 0.00019999993110264428, "loss": 2.5897, "step": 630 }, { "epoch": 0.0, "learning_rate": 0.00019999993000464336, "loss": 2.574, "step": 635 }, { "epoch": 0.0, "learning_rate": 0.0001999999288979622, "loss": 2.6675, "step": 640 }, { "epoch": 0.0, "learning_rate": 0.0001999999277826008, "loss": 2.7379, "step": 645 }, { "epoch": 0.0, "learning_rate": 0.00019999992665855912, "loss": 2.7875, "step": 650 }, { "epoch": 0.0, "learning_rate": 0.00019999992552583717, "loss": 2.9414, "step": 655 }, { "epoch": 0.0, "learning_rate": 0.000199999924384435, "loss": 2.6379, "step": 660 }, { "epoch": 0.0, "learning_rate": 0.00019999992323435257, "loss": 2.9688, "step": 665 }, { "epoch": 0.0, "learning_rate": 0.00019999992207558987, "loss": 2.8554, "step": 670 }, { "epoch": 0.0, "learning_rate": 0.0001999999209081469, "loss": 2.8075, "step": 675 }, { "epoch": 0.0, "learning_rate": 0.0001999999197320237, "loss": 2.8249, "step": 680 }, { "epoch": 0.0, "learning_rate": 0.00019999991854722024, "loss": 2.4325, "step": 685 }, { "epoch": 0.0, "learning_rate": 0.0001999999173537365, "loss": 2.822, "step": 690 }, { "epoch": 0.0, "learning_rate": 0.00019999991615157251, "loss": 2.7508, "step": 695 }, { "epoch": 0.0, "learning_rate": 0.0001999999149407283, "loss": 2.8431, "step": 700 }, { "epoch": 0.0, "learning_rate": 0.0001999999137212038, "loss": 2.7273, "step": 705 }, { "epoch": 0.0, "learning_rate": 0.00019999991249299907, "loss": 2.8935, "step": 710 }, { "epoch": 0.0, "learning_rate": 0.00019999991125611407, "loss": 2.7908, "step": 715 }, { "epoch": 0.0, "learning_rate": 0.00019999991001054884, "loss": 2.7172, "step": 720 }, { "epoch": 0.0, "learning_rate": 0.00019999990875630333, "loss": 2.5546, "step": 725 }, { "epoch": 0.0, "learning_rate": 0.00019999990749337756, "loss": 2.9523, "step": 730 }, { "epoch": 0.0, "learning_rate": 0.00019999990622177153, "loss": 2.7861, "step": 735 }, { "epoch": 0.0, "learning_rate": 0.00019999990494148527, "loss": 2.7639, "step": 740 }, { "epoch": 0.0, "learning_rate": 0.00019999990365251872, "loss": 2.6946, "step": 745 }, { "epoch": 0.0, "learning_rate": 0.00019999990235487194, "loss": 2.8027, "step": 750 }, { "epoch": 0.0, "learning_rate": 0.0001999999010485449, "loss": 2.8132, "step": 755 }, { "epoch": 0.0, "learning_rate": 0.00019999989973353764, "loss": 2.6649, "step": 760 }, { "epoch": 0.0, "learning_rate": 0.00019999989840985006, "loss": 2.805, "step": 765 }, { "epoch": 0.0, "learning_rate": 0.00019999989707748228, "loss": 2.5914, "step": 770 }, { "epoch": 0.0, "learning_rate": 0.00019999989573643424, "loss": 2.6823, "step": 775 }, { "epoch": 0.0, "learning_rate": 0.0001999998943867059, "loss": 2.7104, "step": 780 }, { "epoch": 0.0, "learning_rate": 0.00019999989302829735, "loss": 2.7045, "step": 785 }, { "epoch": 0.0, "learning_rate": 0.00019999989166120857, "loss": 2.7507, "step": 790 }, { "epoch": 0.0, "learning_rate": 0.0001999998902854395, "loss": 2.6759, "step": 795 }, { "epoch": 0.0, "learning_rate": 0.00019999988890099016, "loss": 2.8907, "step": 800 }, { "epoch": 0.0, "learning_rate": 0.00019999988750786057, "loss": 3.0281, "step": 805 }, { "epoch": 0.0, "learning_rate": 0.00019999988610605075, "loss": 2.6338, "step": 810 }, { "epoch": 0.0, "learning_rate": 0.00019999988469556067, "loss": 2.7324, "step": 815 }, { "epoch": 0.0, "learning_rate": 0.00019999988327639036, "loss": 2.7257, "step": 820 }, { "epoch": 0.0, "learning_rate": 0.0001999998818485398, "loss": 2.7441, "step": 825 }, { "epoch": 0.0, "learning_rate": 0.00019999988041200894, "loss": 2.7089, "step": 830 }, { "epoch": 0.0, "learning_rate": 0.00019999987896679786, "loss": 2.6935, "step": 835 }, { "epoch": 0.0, "learning_rate": 0.00019999987751290652, "loss": 2.6191, "step": 840 }, { "epoch": 0.0, "learning_rate": 0.00019999987605033492, "loss": 2.7976, "step": 845 }, { "epoch": 0.0, "learning_rate": 0.00019999987457908306, "loss": 2.7551, "step": 850 }, { "epoch": 0.0, "learning_rate": 0.00019999987309915097, "loss": 2.7361, "step": 855 }, { "epoch": 0.0, "learning_rate": 0.00019999987161053865, "loss": 2.8261, "step": 860 }, { "epoch": 0.0, "learning_rate": 0.00019999987011324604, "loss": 2.8417, "step": 865 }, { "epoch": 0.0, "learning_rate": 0.00019999986860727318, "loss": 2.7262, "step": 870 }, { "epoch": 0.0, "learning_rate": 0.00019999986709262008, "loss": 2.6199, "step": 875 }, { "epoch": 0.0, "learning_rate": 0.00019999986556928673, "loss": 2.8727, "step": 880 }, { "epoch": 0.0, "learning_rate": 0.00019999986403727312, "loss": 2.7327, "step": 885 }, { "epoch": 0.0, "learning_rate": 0.00019999986249657928, "loss": 2.6243, "step": 890 }, { "epoch": 0.0, "learning_rate": 0.00019999986094720518, "loss": 2.7786, "step": 895 }, { "epoch": 0.0, "learning_rate": 0.0001999998593891508, "loss": 2.5695, "step": 900 }, { "epoch": 0.0, "learning_rate": 0.0001999998578224162, "loss": 2.7535, "step": 905 }, { "epoch": 0.0, "learning_rate": 0.00019999985624700136, "loss": 2.9243, "step": 910 }, { "epoch": 0.0, "learning_rate": 0.00019999985466290622, "loss": 2.8599, "step": 915 }, { "epoch": 0.0, "learning_rate": 0.00019999985307013086, "loss": 2.6587, "step": 920 }, { "epoch": 0.0, "learning_rate": 0.00019999985146867526, "loss": 2.7066, "step": 925 }, { "epoch": 0.0, "learning_rate": 0.0001999998498585394, "loss": 2.7567, "step": 930 }, { "epoch": 0.0, "learning_rate": 0.0001999998482397233, "loss": 2.8307, "step": 935 }, { "epoch": 0.0, "learning_rate": 0.00019999984661222696, "loss": 2.6447, "step": 940 }, { "epoch": 0.0, "learning_rate": 0.00019999984497605033, "loss": 2.916, "step": 945 }, { "epoch": 0.0, "learning_rate": 0.00019999984333119345, "loss": 2.8343, "step": 950 }, { "epoch": 0.0, "learning_rate": 0.00019999984167765638, "loss": 2.5178, "step": 955 }, { "epoch": 0.0, "learning_rate": 0.000199999840015439, "loss": 2.8607, "step": 960 }, { "epoch": 0.0, "learning_rate": 0.00019999983834454138, "loss": 2.7741, "step": 965 }, { "epoch": 0.0, "learning_rate": 0.00019999983666496357, "loss": 2.8707, "step": 970 }, { "epoch": 0.0, "learning_rate": 0.00019999983497670547, "loss": 2.9699, "step": 975 }, { "epoch": 0.0, "learning_rate": 0.0001999998332797671, "loss": 2.5461, "step": 980 }, { "epoch": 0.0, "learning_rate": 0.0001999998315741485, "loss": 2.7899, "step": 985 }, { "epoch": 0.0, "learning_rate": 0.00019999982985984967, "loss": 2.6976, "step": 990 }, { "epoch": 0.0, "learning_rate": 0.00019999982813687057, "loss": 2.6443, "step": 995 }, { "epoch": 0.0, "learning_rate": 0.0001999998264052112, "loss": 2.7354, "step": 1000 }, { "epoch": 0.0, "learning_rate": 0.00019999982466487165, "loss": 2.7049, "step": 1005 }, { "epoch": 0.0, "learning_rate": 0.0001999998229158518, "loss": 2.4594, "step": 1010 }, { "epoch": 0.0, "learning_rate": 0.0001999998211581517, "loss": 2.5669, "step": 1015 }, { "epoch": 0.0, "learning_rate": 0.00019999981939177136, "loss": 2.92, "step": 1020 }, { "epoch": 0.0, "learning_rate": 0.0001999998176167108, "loss": 2.7088, "step": 1025 }, { "epoch": 0.0, "learning_rate": 0.00019999981583297, "loss": 2.6139, "step": 1030 }, { "epoch": 0.0, "learning_rate": 0.0001999998140405489, "loss": 2.5142, "step": 1035 }, { "epoch": 0.0, "learning_rate": 0.0001999998122394476, "loss": 2.7047, "step": 1040 }, { "epoch": 0.0, "learning_rate": 0.00019999981042966602, "loss": 2.6954, "step": 1045 }, { "epoch": 0.0, "learning_rate": 0.00019999980861120422, "loss": 2.8237, "step": 1050 }, { "epoch": 0.0, "learning_rate": 0.00019999980678406213, "loss": 2.7916, "step": 1055 }, { "epoch": 0.0, "learning_rate": 0.00019999980494823984, "loss": 2.7145, "step": 1060 }, { "epoch": 0.0, "learning_rate": 0.0001999998031037373, "loss": 2.8736, "step": 1065 }, { "epoch": 0.0, "learning_rate": 0.0001999998012505545, "loss": 2.656, "step": 1070 }, { "epoch": 0.0, "learning_rate": 0.00019999979938869147, "loss": 2.5206, "step": 1075 }, { "epoch": 0.0, "learning_rate": 0.0001999997975181482, "loss": 2.8481, "step": 1080 }, { "epoch": 0.0, "learning_rate": 0.00019999979563892467, "loss": 2.8695, "step": 1085 }, { "epoch": 0.0, "learning_rate": 0.00019999979375102088, "loss": 2.5611, "step": 1090 }, { "epoch": 0.0, "learning_rate": 0.00019999979185443686, "loss": 2.7148, "step": 1095 }, { "epoch": 0.0, "learning_rate": 0.00019999978994917256, "loss": 2.8026, "step": 1100 }, { "epoch": 0.0, "learning_rate": 0.00019999978803522808, "loss": 2.5893, "step": 1105 }, { "epoch": 0.0, "learning_rate": 0.00019999978611260334, "loss": 2.6701, "step": 1110 }, { "epoch": 0.0, "learning_rate": 0.00019999978418129831, "loss": 2.7724, "step": 1115 }, { "epoch": 0.0, "learning_rate": 0.00019999978224131306, "loss": 2.4461, "step": 1120 }, { "epoch": 0.0, "learning_rate": 0.0001999997802926476, "loss": 2.7933, "step": 1125 }, { "epoch": 0.0, "learning_rate": 0.00019999977833530186, "loss": 2.6923, "step": 1130 }, { "epoch": 0.0, "learning_rate": 0.00019999977636927588, "loss": 2.6523, "step": 1135 }, { "epoch": 0.0, "learning_rate": 0.00019999977439456968, "loss": 2.6436, "step": 1140 }, { "epoch": 0.0, "learning_rate": 0.00019999977241118319, "loss": 2.8382, "step": 1145 }, { "epoch": 0.0, "learning_rate": 0.00019999977041911652, "loss": 2.7091, "step": 1150 }, { "epoch": 0.0, "learning_rate": 0.00019999976841836956, "loss": 2.8385, "step": 1155 }, { "epoch": 0.0, "learning_rate": 0.00019999976640894235, "loss": 2.3165, "step": 1160 }, { "epoch": 0.0, "learning_rate": 0.00019999976439083494, "loss": 2.8163, "step": 1165 }, { "epoch": 0.0, "learning_rate": 0.00019999976236404726, "loss": 2.7067, "step": 1170 }, { "epoch": 0.0, "learning_rate": 0.00019999976032857933, "loss": 2.5457, "step": 1175 }, { "epoch": 0.0, "learning_rate": 0.00019999975828443117, "loss": 2.7439, "step": 1180 }, { "epoch": 0.0, "learning_rate": 0.00019999975623160278, "loss": 2.6707, "step": 1185 }, { "epoch": 0.0, "learning_rate": 0.00019999975417009415, "loss": 2.5346, "step": 1190 }, { "epoch": 0.0, "learning_rate": 0.00019999975209990524, "loss": 2.7652, "step": 1195 }, { "epoch": 0.0, "learning_rate": 0.00019999975002103613, "loss": 2.6012, "step": 1200 }, { "epoch": 0.0, "learning_rate": 0.00019999974793348676, "loss": 2.7155, "step": 1205 }, { "epoch": 0.0, "learning_rate": 0.00019999974583725716, "loss": 2.8679, "step": 1210 }, { "epoch": 0.0, "learning_rate": 0.00019999974373234732, "loss": 2.6568, "step": 1215 }, { "epoch": 0.0, "learning_rate": 0.00019999974161875723, "loss": 2.763, "step": 1220 }, { "epoch": 0.0, "learning_rate": 0.00019999973949648688, "loss": 2.5574, "step": 1225 }, { "epoch": 0.0, "learning_rate": 0.00019999973736553633, "loss": 2.629, "step": 1230 }, { "epoch": 0.0, "learning_rate": 0.00019999973522590554, "loss": 2.5492, "step": 1235 }, { "epoch": 0.0, "learning_rate": 0.0001999997330775945, "loss": 2.4964, "step": 1240 }, { "epoch": 0.0, "learning_rate": 0.0001999997309206032, "loss": 2.8257, "step": 1245 }, { "epoch": 0.0, "learning_rate": 0.00019999972875493167, "loss": 2.9205, "step": 1250 }, { "epoch": 0.0, "learning_rate": 0.0001999997265805799, "loss": 2.5601, "step": 1255 }, { "epoch": 0.0, "learning_rate": 0.0001999997243975479, "loss": 2.6179, "step": 1260 }, { "epoch": 0.0, "learning_rate": 0.00019999972220583568, "loss": 2.5755, "step": 1265 }, { "epoch": 0.0, "learning_rate": 0.00019999972000544317, "loss": 2.4769, "step": 1270 }, { "epoch": 0.0, "learning_rate": 0.00019999971779637046, "loss": 2.6268, "step": 1275 }, { "epoch": 0.0, "learning_rate": 0.00019999971557861749, "loss": 2.4707, "step": 1280 }, { "epoch": 0.0, "learning_rate": 0.0001999997133521843, "loss": 2.6769, "step": 1285 }, { "epoch": 0.0, "learning_rate": 0.00019999971111707088, "loss": 2.514, "step": 1290 }, { "epoch": 0.0, "learning_rate": 0.00019999970887327718, "loss": 2.5784, "step": 1295 }, { "epoch": 0.0, "learning_rate": 0.0001999997066208033, "loss": 2.8341, "step": 1300 }, { "epoch": 0.0, "learning_rate": 0.00019999970435964913, "loss": 2.6296, "step": 1305 }, { "epoch": 0.0, "learning_rate": 0.00019999970208981475, "loss": 2.6278, "step": 1310 }, { "epoch": 0.0, "learning_rate": 0.0001999996998113001, "loss": 2.5416, "step": 1315 }, { "epoch": 0.0, "learning_rate": 0.00019999969752410528, "loss": 2.673, "step": 1320 }, { "epoch": 0.0, "learning_rate": 0.00019999969522823018, "loss": 2.6878, "step": 1325 }, { "epoch": 0.0, "learning_rate": 0.00019999969292367484, "loss": 2.5721, "step": 1330 }, { "epoch": 0.0, "learning_rate": 0.00019999969061043927, "loss": 2.6098, "step": 1335 }, { "epoch": 0.0, "learning_rate": 0.00019999968828852347, "loss": 2.7936, "step": 1340 }, { "epoch": 0.0, "learning_rate": 0.00019999968595792744, "loss": 2.5121, "step": 1345 }, { "epoch": 0.0, "learning_rate": 0.00019999968361865116, "loss": 2.8514, "step": 1350 }, { "epoch": 0.0, "learning_rate": 0.00019999968127069466, "loss": 2.4604, "step": 1355 }, { "epoch": 0.0, "learning_rate": 0.00019999967891405791, "loss": 2.6636, "step": 1360 }, { "epoch": 0.0, "learning_rate": 0.00019999967654874093, "loss": 2.5993, "step": 1365 }, { "epoch": 0.0, "learning_rate": 0.00019999967417474372, "loss": 2.6292, "step": 1370 }, { "epoch": 0.0, "learning_rate": 0.00019999967179206628, "loss": 2.6366, "step": 1375 }, { "epoch": 0.0, "learning_rate": 0.0001999996694007086, "loss": 2.7079, "step": 1380 }, { "epoch": 0.0, "learning_rate": 0.00019999966700067065, "loss": 2.6132, "step": 1385 }, { "epoch": 0.0, "learning_rate": 0.0001999996645919525, "loss": 2.6106, "step": 1390 }, { "epoch": 0.0, "learning_rate": 0.00019999966217455412, "loss": 2.352, "step": 1395 }, { "epoch": 0.0, "learning_rate": 0.00019999965974847552, "loss": 2.439, "step": 1400 }, { "epoch": 0.0, "learning_rate": 0.00019999965731371666, "loss": 2.7842, "step": 1405 }, { "epoch": 0.0, "learning_rate": 0.00019999965487027757, "loss": 2.5492, "step": 1410 }, { "epoch": 0.0, "learning_rate": 0.00019999965241815826, "loss": 2.8821, "step": 1415 }, { "epoch": 0.0, "learning_rate": 0.0001999996499573587, "loss": 2.6805, "step": 1420 }, { "epoch": 0.0, "learning_rate": 0.00019999964748787893, "loss": 2.4762, "step": 1425 }, { "epoch": 0.0, "learning_rate": 0.00019999964500971892, "loss": 2.6549, "step": 1430 }, { "epoch": 0.0, "learning_rate": 0.00019999964252287867, "loss": 2.6132, "step": 1435 }, { "epoch": 0.0, "learning_rate": 0.00019999964002735817, "loss": 2.4747, "step": 1440 }, { "epoch": 0.0, "learning_rate": 0.0001999996375231575, "loss": 2.8531, "step": 1445 }, { "epoch": 0.0, "learning_rate": 0.00019999963501027653, "loss": 2.7625, "step": 1450 }, { "epoch": 0.0, "learning_rate": 0.00019999963248871537, "loss": 2.5995, "step": 1455 }, { "epoch": 0.0, "learning_rate": 0.00019999962995847397, "loss": 2.6859, "step": 1460 }, { "epoch": 0.0, "learning_rate": 0.00019999962741955234, "loss": 2.8208, "step": 1465 }, { "epoch": 0.0, "learning_rate": 0.00019999962487195048, "loss": 2.6235, "step": 1470 }, { "epoch": 0.0, "learning_rate": 0.00019999962231566836, "loss": 2.6238, "step": 1475 }, { "epoch": 0.0, "learning_rate": 0.00019999961975070604, "loss": 2.6362, "step": 1480 }, { "epoch": 0.0, "learning_rate": 0.0001999996171770635, "loss": 2.6904, "step": 1485 }, { "epoch": 0.0, "learning_rate": 0.00019999961459474073, "loss": 2.5746, "step": 1490 }, { "epoch": 0.0, "learning_rate": 0.00019999961200373772, "loss": 2.7593, "step": 1495 }, { "epoch": 0.0, "learning_rate": 0.00019999960940405448, "loss": 2.3081, "step": 1500 }, { "epoch": 0.0, "learning_rate": 0.000199999606795691, "loss": 2.8279, "step": 1505 }, { "epoch": 0.0, "learning_rate": 0.0001999996041786473, "loss": 2.6013, "step": 1510 }, { "epoch": 0.0, "learning_rate": 0.00019999960155292338, "loss": 2.7186, "step": 1515 }, { "epoch": 0.0, "learning_rate": 0.00019999959891851924, "loss": 2.3538, "step": 1520 }, { "epoch": 0.0, "learning_rate": 0.00019999959627543486, "loss": 2.6952, "step": 1525 }, { "epoch": 0.0, "learning_rate": 0.00019999959362367023, "loss": 2.4529, "step": 1530 }, { "epoch": 0.0, "learning_rate": 0.00019999959096322537, "loss": 2.8643, "step": 1535 }, { "epoch": 0.0, "learning_rate": 0.00019999958829410033, "loss": 2.6397, "step": 1540 }, { "epoch": 0.0, "learning_rate": 0.000199999585616295, "loss": 2.7624, "step": 1545 }, { "epoch": 0.0, "learning_rate": 0.0001999995829298095, "loss": 2.4717, "step": 1550 }, { "epoch": 0.0, "learning_rate": 0.00019999958023464375, "loss": 2.9581, "step": 1555 }, { "epoch": 0.0, "learning_rate": 0.00019999957753079778, "loss": 2.5924, "step": 1560 }, { "epoch": 0.0, "learning_rate": 0.00019999957481827162, "loss": 2.8503, "step": 1565 }, { "epoch": 0.0, "learning_rate": 0.00019999957209706516, "loss": 2.7108, "step": 1570 }, { "epoch": 0.0, "learning_rate": 0.0001999995693671785, "loss": 2.7926, "step": 1575 }, { "epoch": 0.0, "learning_rate": 0.00019999956662861165, "loss": 2.6825, "step": 1580 }, { "epoch": 0.0, "learning_rate": 0.00019999956388136453, "loss": 2.4061, "step": 1585 }, { "epoch": 0.0, "learning_rate": 0.00019999956112543723, "loss": 2.8156, "step": 1590 }, { "epoch": 0.0, "learning_rate": 0.00019999955836082965, "loss": 2.6561, "step": 1595 }, { "epoch": 0.0, "learning_rate": 0.0001999995555875419, "loss": 2.6375, "step": 1600 }, { "epoch": 0.0, "learning_rate": 0.0001999995528055739, "loss": 2.5209, "step": 1605 }, { "epoch": 0.0, "learning_rate": 0.00019999955001492566, "loss": 2.5621, "step": 1610 }, { "epoch": 0.0, "learning_rate": 0.00019999954721559724, "loss": 2.7921, "step": 1615 }, { "epoch": 0.0, "learning_rate": 0.00019999954440758856, "loss": 2.7439, "step": 1620 }, { "epoch": 0.0, "learning_rate": 0.00019999954159089967, "loss": 2.6912, "step": 1625 }, { "epoch": 0.0, "learning_rate": 0.00019999953876553056, "loss": 2.4257, "step": 1630 }, { "epoch": 0.0, "learning_rate": 0.0001999995359314812, "loss": 2.566, "step": 1635 }, { "epoch": 0.0, "learning_rate": 0.00019999953308875166, "loss": 2.687, "step": 1640 }, { "epoch": 0.0, "learning_rate": 0.00019999953023734185, "loss": 2.8141, "step": 1645 }, { "epoch": 0.0, "learning_rate": 0.00019999952737725186, "loss": 2.6272, "step": 1650 }, { "epoch": 0.0, "learning_rate": 0.00019999952450848162, "loss": 2.539, "step": 1655 }, { "epoch": 0.0, "learning_rate": 0.00019999952163103117, "loss": 2.3768, "step": 1660 }, { "epoch": 0.0, "learning_rate": 0.00019999951874490052, "loss": 2.5902, "step": 1665 }, { "epoch": 0.0, "learning_rate": 0.00019999951585008963, "loss": 2.5818, "step": 1670 }, { "epoch": 0.0, "learning_rate": 0.00019999951294659852, "loss": 2.6016, "step": 1675 }, { "epoch": 0.0, "learning_rate": 0.00019999951003442718, "loss": 2.843, "step": 1680 }, { "epoch": 0.0, "learning_rate": 0.00019999950711357563, "loss": 2.7841, "step": 1685 }, { "epoch": 0.0, "learning_rate": 0.00019999950418404385, "loss": 2.4404, "step": 1690 }, { "epoch": 0.0, "learning_rate": 0.00019999950124583186, "loss": 2.7421, "step": 1695 }, { "epoch": 0.0, "learning_rate": 0.00019999949829893968, "loss": 2.9662, "step": 1700 }, { "epoch": 0.0, "learning_rate": 0.00019999949534336723, "loss": 2.5873, "step": 1705 }, { "epoch": 0.0, "learning_rate": 0.0001999994923791146, "loss": 2.5671, "step": 1710 }, { "epoch": 0.0, "learning_rate": 0.00019999948940618173, "loss": 2.5707, "step": 1715 }, { "epoch": 0.0, "learning_rate": 0.00019999948642456861, "loss": 2.7794, "step": 1720 }, { "epoch": 0.0, "learning_rate": 0.00019999948343427533, "loss": 2.7436, "step": 1725 }, { "epoch": 0.0, "learning_rate": 0.0001999994804353018, "loss": 2.8675, "step": 1730 }, { "epoch": 0.0, "learning_rate": 0.00019999947742764803, "loss": 2.6696, "step": 1735 }, { "epoch": 0.0, "learning_rate": 0.0001999994744113141, "loss": 2.7555, "step": 1740 }, { "epoch": 0.0, "learning_rate": 0.00019999947138629992, "loss": 2.6759, "step": 1745 }, { "epoch": 0.0, "learning_rate": 0.00019999946835260555, "loss": 2.5621, "step": 1750 }, { "epoch": 0.0, "learning_rate": 0.00019999946531023093, "loss": 2.7678, "step": 1755 }, { "epoch": 0.0, "learning_rate": 0.0001999994622591761, "loss": 2.6351, "step": 1760 }, { "epoch": 0.0, "learning_rate": 0.00019999945919944105, "loss": 2.6966, "step": 1765 }, { "epoch": 0.0, "learning_rate": 0.0001999994561310258, "loss": 2.6475, "step": 1770 }, { "epoch": 0.0, "learning_rate": 0.00019999945305393033, "loss": 2.7657, "step": 1775 }, { "epoch": 0.0, "learning_rate": 0.00019999944996815466, "loss": 2.3656, "step": 1780 }, { "epoch": 0.0, "learning_rate": 0.00019999944687369874, "loss": 2.7185, "step": 1785 }, { "epoch": 0.0, "learning_rate": 0.0001999994437705626, "loss": 2.5218, "step": 1790 }, { "epoch": 0.0, "learning_rate": 0.0001999994406587463, "loss": 2.6173, "step": 1795 }, { "epoch": 0.0, "learning_rate": 0.00019999943753824974, "loss": 2.6548, "step": 1800 }, { "epoch": 0.0, "learning_rate": 0.00019999943440907297, "loss": 2.8232, "step": 1805 }, { "epoch": 0.0, "learning_rate": 0.000199999431271216, "loss": 2.6127, "step": 1810 }, { "epoch": 0.0, "learning_rate": 0.00019999942812467882, "loss": 2.5608, "step": 1815 }, { "epoch": 0.0, "learning_rate": 0.00019999942496946142, "loss": 2.869, "step": 1820 }, { "epoch": 0.0, "learning_rate": 0.0001999994218055638, "loss": 2.442, "step": 1825 }, { "epoch": 0.0, "learning_rate": 0.000199999418632986, "loss": 2.4997, "step": 1830 }, { "epoch": 0.0, "learning_rate": 0.00019999941545172795, "loss": 2.4263, "step": 1835 }, { "epoch": 0.0, "learning_rate": 0.0001999994122617897, "loss": 2.6923, "step": 1840 }, { "epoch": 0.0, "learning_rate": 0.00019999940906317124, "loss": 2.479, "step": 1845 }, { "epoch": 0.0, "learning_rate": 0.00019999940585587256, "loss": 2.6618, "step": 1850 }, { "epoch": 0.0, "learning_rate": 0.0001999994026398937, "loss": 2.8523, "step": 1855 }, { "epoch": 0.0, "learning_rate": 0.00019999939941523458, "loss": 2.6514, "step": 1860 }, { "epoch": 0.0, "learning_rate": 0.00019999939618189528, "loss": 2.4561, "step": 1865 }, { "epoch": 0.0, "learning_rate": 0.00019999939293987575, "loss": 2.8679, "step": 1870 }, { "epoch": 0.0, "learning_rate": 0.00019999938968917605, "loss": 2.81, "step": 1875 }, { "epoch": 0.0, "learning_rate": 0.00019999938642979611, "loss": 2.7132, "step": 1880 }, { "epoch": 0.0, "learning_rate": 0.00019999938316173598, "loss": 2.6953, "step": 1885 }, { "epoch": 0.0, "learning_rate": 0.0001999993798849956, "loss": 2.5594, "step": 1890 }, { "epoch": 0.0, "learning_rate": 0.00019999937659957503, "loss": 2.8072, "step": 1895 }, { "epoch": 0.0, "learning_rate": 0.00019999937330547428, "loss": 2.7259, "step": 1900 }, { "epoch": 0.0, "learning_rate": 0.0001999993700026933, "loss": 2.602, "step": 1905 }, { "epoch": 0.0, "learning_rate": 0.0001999993666912321, "loss": 2.6015, "step": 1910 }, { "epoch": 0.0, "learning_rate": 0.0001999993633710907, "loss": 2.7177, "step": 1915 }, { "epoch": 0.0, "learning_rate": 0.0001999993600422691, "loss": 2.7859, "step": 1920 }, { "epoch": 0.0, "learning_rate": 0.00019999935670476728, "loss": 2.4358, "step": 1925 }, { "epoch": 0.0, "learning_rate": 0.00019999935335858528, "loss": 2.5998, "step": 1930 }, { "epoch": 0.0, "learning_rate": 0.00019999935000372305, "loss": 2.7423, "step": 1935 }, { "epoch": 0.0, "learning_rate": 0.0001999993466401806, "loss": 2.5114, "step": 1940 }, { "epoch": 0.0, "learning_rate": 0.00019999934326795797, "loss": 2.4934, "step": 1945 }, { "epoch": 0.0, "learning_rate": 0.00019999933988705513, "loss": 2.6945, "step": 1950 }, { "epoch": 0.0, "learning_rate": 0.00019999933649747208, "loss": 2.6669, "step": 1955 }, { "epoch": 0.0, "learning_rate": 0.00019999933309920885, "loss": 2.787, "step": 1960 }, { "epoch": 0.0, "learning_rate": 0.00019999932969226537, "loss": 2.6692, "step": 1965 }, { "epoch": 0.0, "learning_rate": 0.0001999993262766417, "loss": 2.6079, "step": 1970 }, { "epoch": 0.0, "learning_rate": 0.00019999932285233785, "loss": 2.4989, "step": 1975 }, { "epoch": 0.0, "learning_rate": 0.00019999931941935378, "loss": 2.774, "step": 1980 }, { "epoch": 0.0, "learning_rate": 0.0001999993159776895, "loss": 2.6556, "step": 1985 }, { "epoch": 0.0, "learning_rate": 0.00019999931252734503, "loss": 2.5328, "step": 1990 }, { "epoch": 0.0, "learning_rate": 0.00019999930906832033, "loss": 2.6393, "step": 1995 }, { "epoch": 0.0, "learning_rate": 0.00019999930560061547, "loss": 2.6058, "step": 2000 }, { "epoch": 0.0, "learning_rate": 0.00019999930212423036, "loss": 2.8369, "step": 2005 }, { "epoch": 0.0, "learning_rate": 0.0001999992986391651, "loss": 2.64, "step": 2010 }, { "epoch": 0.0, "learning_rate": 0.0001999992951454196, "loss": 2.6686, "step": 2015 }, { "epoch": 0.0, "learning_rate": 0.0001999992916429939, "loss": 2.9274, "step": 2020 }, { "epoch": 0.0, "learning_rate": 0.000199999288131888, "loss": 2.4095, "step": 2025 }, { "epoch": 0.0, "learning_rate": 0.00019999928461210192, "loss": 2.5919, "step": 2030 }, { "epoch": 0.0, "learning_rate": 0.0001999992810836356, "loss": 2.6322, "step": 2035 }, { "epoch": 0.0, "learning_rate": 0.00019999927754648912, "loss": 2.587, "step": 2040 }, { "epoch": 0.0, "learning_rate": 0.00019999927400066243, "loss": 2.6958, "step": 2045 }, { "epoch": 0.0, "learning_rate": 0.00019999927044615554, "loss": 2.5001, "step": 2050 }, { "epoch": 0.0, "learning_rate": 0.00019999926688296844, "loss": 2.7739, "step": 2055 }, { "epoch": 0.0, "learning_rate": 0.00019999926331110116, "loss": 2.6311, "step": 2060 }, { "epoch": 0.0, "learning_rate": 0.00019999925973055365, "loss": 2.2464, "step": 2065 }, { "epoch": 0.0, "learning_rate": 0.00019999925614132597, "loss": 2.6054, "step": 2070 }, { "epoch": 0.0, "learning_rate": 0.00019999925254341806, "loss": 2.7537, "step": 2075 }, { "epoch": 0.0, "learning_rate": 0.00019999924893683, "loss": 2.674, "step": 2080 }, { "epoch": 0.0, "learning_rate": 0.0001999992453215617, "loss": 2.7459, "step": 2085 }, { "epoch": 0.0, "learning_rate": 0.00019999924169761322, "loss": 2.6581, "step": 2090 }, { "epoch": 0.0, "learning_rate": 0.00019999923806498452, "loss": 2.5208, "step": 2095 }, { "epoch": 0.0, "learning_rate": 0.00019999923442367567, "loss": 2.9183, "step": 2100 }, { "epoch": 0.0, "learning_rate": 0.0001999992307736866, "loss": 2.4701, "step": 2105 }, { "epoch": 0.0, "learning_rate": 0.00019999922711501732, "loss": 2.6732, "step": 2110 }, { "epoch": 0.0, "learning_rate": 0.00019999922344766786, "loss": 2.6728, "step": 2115 }, { "epoch": 0.0, "learning_rate": 0.0001999992197716382, "loss": 2.561, "step": 2120 }, { "epoch": 0.0, "learning_rate": 0.00019999921608692835, "loss": 2.4256, "step": 2125 }, { "epoch": 0.0, "learning_rate": 0.00019999921239353832, "loss": 2.5284, "step": 2130 }, { "epoch": 0.0, "learning_rate": 0.00019999920869146807, "loss": 2.7438, "step": 2135 }, { "epoch": 0.0, "learning_rate": 0.00019999920498071761, "loss": 2.5915, "step": 2140 }, { "epoch": 0.0, "learning_rate": 0.000199999201261287, "loss": 2.6054, "step": 2145 }, { "epoch": 0.0, "learning_rate": 0.00019999919753317617, "loss": 2.8368, "step": 2150 }, { "epoch": 0.0, "learning_rate": 0.00019999919379638516, "loss": 2.5809, "step": 2155 }, { "epoch": 0.0, "learning_rate": 0.00019999919005091394, "loss": 2.6437, "step": 2160 }, { "epoch": 0.0, "learning_rate": 0.00019999918629676254, "loss": 2.611, "step": 2165 }, { "epoch": 0.0, "learning_rate": 0.00019999918253393094, "loss": 2.516, "step": 2170 }, { "epoch": 0.0, "learning_rate": 0.00019999917876241917, "loss": 2.3178, "step": 2175 }, { "epoch": 0.0, "learning_rate": 0.0001999991749822272, "loss": 2.5397, "step": 2180 }, { "epoch": 0.0, "learning_rate": 0.000199999171193355, "loss": 2.6697, "step": 2185 }, { "epoch": 0.0, "learning_rate": 0.00019999916739580264, "loss": 2.5902, "step": 2190 }, { "epoch": 0.0, "learning_rate": 0.0001999991635895701, "loss": 2.6564, "step": 2195 }, { "epoch": 0.0, "learning_rate": 0.00019999915977465737, "loss": 2.514, "step": 2200 }, { "epoch": 0.0, "learning_rate": 0.00019999915595106442, "loss": 2.6288, "step": 2205 }, { "epoch": 0.0, "learning_rate": 0.0001999991521187913, "loss": 2.7036, "step": 2210 }, { "epoch": 0.0, "learning_rate": 0.000199999148277838, "loss": 2.6969, "step": 2215 }, { "epoch": 0.0, "learning_rate": 0.0001999991444282045, "loss": 2.4181, "step": 2220 }, { "epoch": 0.0, "learning_rate": 0.00019999914056989082, "loss": 2.5399, "step": 2225 }, { "epoch": 0.0, "learning_rate": 0.00019999913670289696, "loss": 2.3588, "step": 2230 }, { "epoch": 0.0, "learning_rate": 0.00019999913282722287, "loss": 2.7133, "step": 2235 }, { "epoch": 0.0, "learning_rate": 0.00019999912894286864, "loss": 2.5371, "step": 2240 }, { "epoch": 0.0, "learning_rate": 0.0001999991250498342, "loss": 2.8503, "step": 2245 }, { "epoch": 0.0, "learning_rate": 0.00019999912114811958, "loss": 2.4967, "step": 2250 }, { "epoch": 0.0, "learning_rate": 0.00019999911723772478, "loss": 2.6704, "step": 2255 }, { "epoch": 0.0, "learning_rate": 0.00019999911331864976, "loss": 2.5961, "step": 2260 }, { "epoch": 0.0, "learning_rate": 0.00019999910939089458, "loss": 2.5097, "step": 2265 }, { "epoch": 0.0, "learning_rate": 0.0001999991054544592, "loss": 2.598, "step": 2270 }, { "epoch": 0.0, "learning_rate": 0.00019999910150934368, "loss": 2.7947, "step": 2275 }, { "epoch": 0.0, "learning_rate": 0.00019999909755554794, "loss": 2.4228, "step": 2280 }, { "epoch": 0.0, "learning_rate": 0.000199999093593072, "loss": 2.5667, "step": 2285 }, { "epoch": 0.0, "learning_rate": 0.0001999990896219159, "loss": 2.5555, "step": 2290 }, { "epoch": 0.0, "learning_rate": 0.0001999990856420796, "loss": 2.5606, "step": 2295 }, { "epoch": 0.0, "learning_rate": 0.00019999908165356314, "loss": 2.6759, "step": 2300 }, { "epoch": 0.0, "learning_rate": 0.0001999990776563665, "loss": 2.8376, "step": 2305 }, { "epoch": 0.0, "learning_rate": 0.00019999907365048965, "loss": 2.5424, "step": 2310 }, { "epoch": 0.0, "learning_rate": 0.00019999906963593262, "loss": 2.4368, "step": 2315 }, { "epoch": 0.0, "learning_rate": 0.00019999906561269542, "loss": 2.6155, "step": 2320 }, { "epoch": 0.0, "learning_rate": 0.00019999906158077804, "loss": 2.5441, "step": 2325 }, { "epoch": 0.0, "learning_rate": 0.00019999905754018046, "loss": 2.5587, "step": 2330 }, { "epoch": 0.0, "learning_rate": 0.00019999905349090273, "loss": 2.5802, "step": 2335 }, { "epoch": 0.0, "learning_rate": 0.00019999904943294482, "loss": 2.6721, "step": 2340 }, { "epoch": 0.0, "learning_rate": 0.00019999904536630668, "loss": 2.5217, "step": 2345 }, { "epoch": 0.0, "learning_rate": 0.00019999904129098842, "loss": 2.6426, "step": 2350 }, { "epoch": 0.0, "learning_rate": 0.00019999903720698992, "loss": 2.4764, "step": 2355 }, { "epoch": 0.0, "learning_rate": 0.0001999990331143113, "loss": 2.6398, "step": 2360 }, { "epoch": 0.0, "learning_rate": 0.00019999902901295243, "loss": 2.8396, "step": 2365 }, { "epoch": 0.0, "learning_rate": 0.00019999902490291347, "loss": 2.6032, "step": 2370 }, { "epoch": 0.0, "learning_rate": 0.0001999990207841943, "loss": 2.7985, "step": 2375 }, { "epoch": 0.0, "learning_rate": 0.00019999901665679492, "loss": 2.7499, "step": 2380 }, { "epoch": 0.0, "learning_rate": 0.0001999990125207154, "loss": 2.7408, "step": 2385 }, { "epoch": 0.0, "learning_rate": 0.00019999900837595566, "loss": 2.5313, "step": 2390 }, { "epoch": 0.0, "learning_rate": 0.00019999900422251575, "loss": 2.3107, "step": 2395 }, { "epoch": 0.0, "learning_rate": 0.0001999990000603957, "loss": 2.7134, "step": 2400 }, { "epoch": 0.0, "learning_rate": 0.00019999899588959546, "loss": 2.4859, "step": 2405 }, { "epoch": 0.0, "learning_rate": 0.00019999899171011505, "loss": 2.6712, "step": 2410 }, { "epoch": 0.0, "learning_rate": 0.00019999898752195446, "loss": 2.8095, "step": 2415 }, { "epoch": 0.0, "learning_rate": 0.0001999989833251137, "loss": 2.5598, "step": 2420 }, { "epoch": 0.0, "learning_rate": 0.00019999897911959273, "loss": 2.4414, "step": 2425 }, { "epoch": 0.0, "learning_rate": 0.0001999989749053916, "loss": 2.5381, "step": 2430 }, { "epoch": 0.0, "learning_rate": 0.00019999897068251031, "loss": 2.5387, "step": 2435 }, { "epoch": 0.0, "learning_rate": 0.00019999896645094887, "loss": 2.607, "step": 2440 }, { "epoch": 0.0, "learning_rate": 0.00019999896221070725, "loss": 2.8016, "step": 2445 }, { "epoch": 0.0, "learning_rate": 0.0001999989579617854, "loss": 2.7545, "step": 2450 }, { "epoch": 0.0, "learning_rate": 0.00019999895370418345, "loss": 2.7226, "step": 2455 }, { "epoch": 0.0, "learning_rate": 0.00019999894943790127, "loss": 2.7263, "step": 2460 }, { "epoch": 0.0, "learning_rate": 0.00019999894516293897, "loss": 2.714, "step": 2465 }, { "epoch": 0.0, "learning_rate": 0.00019999894087929646, "loss": 2.8175, "step": 2470 }, { "epoch": 0.0, "learning_rate": 0.0001999989365869738, "loss": 2.5644, "step": 2475 }, { "epoch": 0.0, "learning_rate": 0.00019999893228597095, "loss": 2.4785, "step": 2480 }, { "epoch": 0.0, "learning_rate": 0.00019999892797628794, "loss": 2.6025, "step": 2485 }, { "epoch": 0.0, "learning_rate": 0.00019999892365792475, "loss": 2.7969, "step": 2490 }, { "epoch": 0.0, "learning_rate": 0.00019999891933088142, "loss": 2.7892, "step": 2495 }, { "epoch": 0.0, "learning_rate": 0.0001999989149951579, "loss": 2.556, "step": 2500 }, { "epoch": 0.0, "learning_rate": 0.00019999891065075422, "loss": 2.5106, "step": 2505 }, { "epoch": 0.0, "learning_rate": 0.00019999890629767038, "loss": 2.5166, "step": 2510 }, { "epoch": 0.0, "learning_rate": 0.00019999890193590634, "loss": 2.5847, "step": 2515 }, { "epoch": 0.0, "learning_rate": 0.00019999889756546215, "loss": 2.6806, "step": 2520 }, { "epoch": 0.0, "learning_rate": 0.0001999988931863378, "loss": 2.6937, "step": 2525 }, { "epoch": 0.0, "learning_rate": 0.00019999888879853326, "loss": 2.6377, "step": 2530 }, { "epoch": 0.0, "learning_rate": 0.00019999888440204856, "loss": 2.3986, "step": 2535 }, { "epoch": 0.0, "learning_rate": 0.00019999887999688372, "loss": 2.8614, "step": 2540 }, { "epoch": 0.0, "learning_rate": 0.00019999887558303872, "loss": 2.8034, "step": 2545 }, { "epoch": 0.0, "learning_rate": 0.00019999887116051352, "loss": 2.6652, "step": 2550 }, { "epoch": 0.0, "learning_rate": 0.00019999886672930818, "loss": 2.703, "step": 2555 }, { "epoch": 0.0, "learning_rate": 0.00019999886228942265, "loss": 2.5679, "step": 2560 }, { "epoch": 0.0, "learning_rate": 0.00019999885784085698, "loss": 2.3774, "step": 2565 }, { "epoch": 0.0, "learning_rate": 0.00019999885338361115, "loss": 2.7305, "step": 2570 }, { "epoch": 0.0, "learning_rate": 0.00019999884891768512, "loss": 2.1613, "step": 2575 }, { "epoch": 0.0, "learning_rate": 0.00019999884444307897, "loss": 2.6024, "step": 2580 }, { "epoch": 0.0, "learning_rate": 0.00019999883995979264, "loss": 2.5547, "step": 2585 }, { "epoch": 0.0, "learning_rate": 0.00019999883546782614, "loss": 2.6603, "step": 2590 }, { "epoch": 0.0, "learning_rate": 0.00019999883096717948, "loss": 2.3867, "step": 2595 }, { "epoch": 0.0, "learning_rate": 0.00019999882645785268, "loss": 2.5352, "step": 2600 }, { "epoch": 0.0, "learning_rate": 0.0001999988219398457, "loss": 2.5642, "step": 2605 }, { "epoch": 0.0, "learning_rate": 0.00019999881741315857, "loss": 2.86, "step": 2610 }, { "epoch": 0.0, "learning_rate": 0.00019999881287779126, "loss": 2.7749, "step": 2615 }, { "epoch": 0.0, "learning_rate": 0.0001999988083337438, "loss": 2.3401, "step": 2620 }, { "epoch": 0.0, "learning_rate": 0.0001999988037810162, "loss": 2.4999, "step": 2625 }, { "epoch": 0.0, "learning_rate": 0.00019999879921960841, "loss": 2.6999, "step": 2630 }, { "epoch": 0.0, "learning_rate": 0.00019999879464952048, "loss": 2.6279, "step": 2635 }, { "epoch": 0.0, "learning_rate": 0.00019999879007075237, "loss": 2.5854, "step": 2640 }, { "epoch": 0.0, "learning_rate": 0.00019999878548330414, "loss": 2.6713, "step": 2645 }, { "epoch": 0.0, "learning_rate": 0.00019999878088717573, "loss": 2.8, "step": 2650 }, { "epoch": 0.0, "learning_rate": 0.0001999987762823672, "loss": 2.4333, "step": 2655 }, { "epoch": 0.0, "learning_rate": 0.00019999877166887843, "loss": 2.8168, "step": 2660 }, { "epoch": 0.0, "learning_rate": 0.00019999876704670957, "loss": 2.467, "step": 2665 }, { "epoch": 0.0, "learning_rate": 0.00019999876241586054, "loss": 2.5969, "step": 2670 }, { "epoch": 0.0, "learning_rate": 0.00019999875777633135, "loss": 2.5396, "step": 2675 }, { "epoch": 0.0, "learning_rate": 0.000199998753128122, "loss": 2.5875, "step": 2680 }, { "epoch": 0.0, "learning_rate": 0.0001999987484712325, "loss": 2.5898, "step": 2685 }, { "epoch": 0.0, "learning_rate": 0.00019999874380566285, "loss": 2.7047, "step": 2690 }, { "epoch": 0.0, "learning_rate": 0.00019999873913141306, "loss": 2.5165, "step": 2695 }, { "epoch": 0.0, "learning_rate": 0.0001999987344484831, "loss": 2.5659, "step": 2700 }, { "epoch": 0.0, "learning_rate": 0.000199998729756873, "loss": 2.696, "step": 2705 }, { "epoch": 0.0, "learning_rate": 0.00019999872505658273, "loss": 2.474, "step": 2710 }, { "epoch": 0.0, "learning_rate": 0.00019999872034761232, "loss": 2.5292, "step": 2715 }, { "epoch": 0.0, "learning_rate": 0.00019999871562996173, "loss": 2.8167, "step": 2720 }, { "epoch": 0.0, "learning_rate": 0.00019999871090363103, "loss": 2.6075, "step": 2725 }, { "epoch": 0.0, "learning_rate": 0.00019999870616862017, "loss": 2.3508, "step": 2730 }, { "epoch": 0.0, "learning_rate": 0.00019999870142492913, "loss": 2.6551, "step": 2735 }, { "epoch": 0.0, "learning_rate": 0.00019999869667255795, "loss": 2.3473, "step": 2740 }, { "epoch": 0.0, "learning_rate": 0.00019999869191150664, "loss": 2.5731, "step": 2745 }, { "epoch": 0.0, "learning_rate": 0.00019999868714177515, "loss": 2.5612, "step": 2750 }, { "epoch": 0.0, "learning_rate": 0.00019999868236336355, "loss": 2.4575, "step": 2755 }, { "epoch": 0.0, "learning_rate": 0.0001999986775762718, "loss": 2.6937, "step": 2760 }, { "epoch": 0.0, "learning_rate": 0.00019999867278049988, "loss": 2.7548, "step": 2765 }, { "epoch": 0.0, "learning_rate": 0.0001999986679760478, "loss": 2.7528, "step": 2770 }, { "epoch": 0.0, "learning_rate": 0.0001999986631629156, "loss": 2.552, "step": 2775 }, { "epoch": 0.0, "learning_rate": 0.00019999865834110326, "loss": 2.6023, "step": 2780 }, { "epoch": 0.0, "learning_rate": 0.00019999865351061076, "loss": 2.7166, "step": 2785 }, { "epoch": 0.0, "learning_rate": 0.00019999864867143813, "loss": 2.5235, "step": 2790 }, { "epoch": 0.0, "learning_rate": 0.0001999986438235853, "loss": 2.5894, "step": 2795 }, { "epoch": 0.0, "learning_rate": 0.00019999863896705237, "loss": 2.625, "step": 2800 }, { "epoch": 0.0, "learning_rate": 0.0001999986341018393, "loss": 2.506, "step": 2805 }, { "epoch": 0.0, "learning_rate": 0.00019999862922794607, "loss": 2.8826, "step": 2810 }, { "epoch": 0.0, "learning_rate": 0.0001999986243453727, "loss": 2.3339, "step": 2815 }, { "epoch": 0.0, "learning_rate": 0.0001999986194541192, "loss": 2.7514, "step": 2820 }, { "epoch": 0.0, "learning_rate": 0.00019999861455418554, "loss": 2.6697, "step": 2825 }, { "epoch": 0.0, "learning_rate": 0.00019999860964557171, "loss": 2.6295, "step": 2830 }, { "epoch": 0.0, "learning_rate": 0.0001999986047282778, "loss": 2.7592, "step": 2835 }, { "epoch": 0.0, "learning_rate": 0.0001999985998023037, "loss": 2.6853, "step": 2840 }, { "epoch": 0.0, "learning_rate": 0.00019999859486764948, "loss": 2.4563, "step": 2845 }, { "epoch": 0.0, "learning_rate": 0.0001999985899243151, "loss": 2.5586, "step": 2850 }, { "epoch": 0.0, "learning_rate": 0.00019999858497230058, "loss": 2.4144, "step": 2855 }, { "epoch": 0.0, "learning_rate": 0.00019999858001160597, "loss": 2.5669, "step": 2860 }, { "epoch": 0.0, "learning_rate": 0.00019999857504223115, "loss": 2.5779, "step": 2865 }, { "epoch": 0.0, "learning_rate": 0.00019999857006417626, "loss": 2.5671, "step": 2870 }, { "epoch": 0.0, "learning_rate": 0.0001999985650774412, "loss": 2.6281, "step": 2875 }, { "epoch": 0.0, "learning_rate": 0.000199998560082026, "loss": 2.7483, "step": 2880 }, { "epoch": 0.0, "learning_rate": 0.00019999855507793065, "loss": 2.496, "step": 2885 }, { "epoch": 0.0, "learning_rate": 0.0001999985500651552, "loss": 2.7721, "step": 2890 }, { "epoch": 0.0, "learning_rate": 0.00019999854504369955, "loss": 2.463, "step": 2895 }, { "epoch": 0.0, "learning_rate": 0.00019999854001356382, "loss": 2.4655, "step": 2900 }, { "epoch": 0.0, "learning_rate": 0.0001999985349747479, "loss": 2.6399, "step": 2905 }, { "epoch": 0.0, "learning_rate": 0.0001999985299272519, "loss": 2.8935, "step": 2910 }, { "epoch": 0.0, "learning_rate": 0.00019999852487107575, "loss": 2.6516, "step": 2915 }, { "epoch": 0.0, "learning_rate": 0.00019999851980621945, "loss": 2.7925, "step": 2920 }, { "epoch": 0.0, "learning_rate": 0.00019999851473268302, "loss": 2.5695, "step": 2925 }, { "epoch": 0.0, "learning_rate": 0.00019999850965046647, "loss": 2.2234, "step": 2930 }, { "epoch": 0.0, "learning_rate": 0.00019999850455956978, "loss": 2.7042, "step": 2935 }, { "epoch": 0.0, "learning_rate": 0.00019999849945999295, "loss": 2.5192, "step": 2940 }, { "epoch": 0.0, "learning_rate": 0.00019999849435173598, "loss": 2.6676, "step": 2945 }, { "epoch": 0.0, "learning_rate": 0.00019999848923479892, "loss": 2.5299, "step": 2950 }, { "epoch": 0.0, "learning_rate": 0.00019999848410918168, "loss": 2.5762, "step": 2955 }, { "epoch": 0.0, "learning_rate": 0.00019999847897488434, "loss": 2.6976, "step": 2960 }, { "epoch": 0.0, "learning_rate": 0.00019999847383190688, "loss": 2.5738, "step": 2965 }, { "epoch": 0.0, "learning_rate": 0.00019999846868024924, "loss": 2.5577, "step": 2970 }, { "epoch": 0.0, "learning_rate": 0.0001999984635199115, "loss": 2.7278, "step": 2975 }, { "epoch": 0.0, "learning_rate": 0.00019999845835089365, "loss": 2.6825, "step": 2980 }, { "epoch": 0.0, "learning_rate": 0.00019999845317319564, "loss": 2.694, "step": 2985 }, { "epoch": 0.0, "learning_rate": 0.00019999844798681752, "loss": 2.4306, "step": 2990 }, { "epoch": 0.0, "learning_rate": 0.00019999844279175924, "loss": 2.6339, "step": 2995 }, { "epoch": 0.0, "learning_rate": 0.00019999843758802084, "loss": 2.3934, "step": 3000 }, { "epoch": 0.0, "learning_rate": 0.00019999843237560234, "loss": 2.5188, "step": 3005 }, { "epoch": 0.0, "learning_rate": 0.0001999984271545037, "loss": 2.7121, "step": 3010 }, { "epoch": 0.0, "learning_rate": 0.00019999842192472493, "loss": 2.6028, "step": 3015 }, { "epoch": 0.0, "learning_rate": 0.000199998416686266, "loss": 2.5672, "step": 3020 }, { "epoch": 0.0, "learning_rate": 0.00019999841143912702, "loss": 2.6899, "step": 3025 }, { "epoch": 0.0, "learning_rate": 0.0001999984061833079, "loss": 2.509, "step": 3030 }, { "epoch": 0.0, "learning_rate": 0.00019999840091880863, "loss": 2.5404, "step": 3035 }, { "epoch": 0.0, "learning_rate": 0.00019999839564562922, "loss": 2.6446, "step": 3040 }, { "epoch": 0.0, "learning_rate": 0.00019999839036376972, "loss": 2.6019, "step": 3045 }, { "epoch": 0.0, "learning_rate": 0.00019999838507323006, "loss": 2.6164, "step": 3050 }, { "epoch": 0.0, "learning_rate": 0.0001999983797740103, "loss": 2.4493, "step": 3055 }, { "epoch": 0.0, "learning_rate": 0.00019999837446611041, "loss": 2.7621, "step": 3060 }, { "epoch": 0.0, "learning_rate": 0.00019999836914953042, "loss": 2.6179, "step": 3065 }, { "epoch": 0.0, "learning_rate": 0.0001999983638242703, "loss": 2.8092, "step": 3070 }, { "epoch": 0.0, "learning_rate": 0.00019999835849033006, "loss": 2.4296, "step": 3075 }, { "epoch": 0.0, "learning_rate": 0.0001999983531477097, "loss": 2.5016, "step": 3080 }, { "epoch": 0.0, "learning_rate": 0.0001999983477964092, "loss": 2.5551, "step": 3085 }, { "epoch": 0.0, "learning_rate": 0.00019999834243642858, "loss": 2.7123, "step": 3090 }, { "epoch": 0.0, "learning_rate": 0.00019999833706776788, "loss": 2.7107, "step": 3095 }, { "epoch": 0.0, "learning_rate": 0.00019999833169042703, "loss": 2.5635, "step": 3100 }, { "epoch": 0.0, "learning_rate": 0.00019999832630440608, "loss": 2.4079, "step": 3105 }, { "epoch": 0.0, "learning_rate": 0.000199998320909705, "loss": 2.6807, "step": 3110 }, { "epoch": 0.0, "learning_rate": 0.00019999831550632382, "loss": 2.6134, "step": 3115 }, { "epoch": 0.0, "learning_rate": 0.00019999831009426247, "loss": 2.4098, "step": 3120 }, { "epoch": 0.0, "learning_rate": 0.00019999830467352106, "loss": 2.3827, "step": 3125 }, { "epoch": 0.0, "learning_rate": 0.00019999829924409953, "loss": 2.6387, "step": 3130 }, { "epoch": 0.0, "learning_rate": 0.00019999829380599787, "loss": 2.261, "step": 3135 }, { "epoch": 0.0, "learning_rate": 0.0001999982883592161, "loss": 2.6373, "step": 3140 }, { "epoch": 0.0, "learning_rate": 0.0001999982829037542, "loss": 2.4054, "step": 3145 }, { "epoch": 0.0, "learning_rate": 0.00019999827743961222, "loss": 2.5333, "step": 3150 }, { "epoch": 0.0, "learning_rate": 0.0001999982719667901, "loss": 2.4284, "step": 3155 }, { "epoch": 0.0, "learning_rate": 0.0001999982664852879, "loss": 2.2998, "step": 3160 }, { "epoch": 0.0, "learning_rate": 0.00019999826099510552, "loss": 2.4798, "step": 3165 }, { "epoch": 0.0, "learning_rate": 0.0001999982554962431, "loss": 2.7227, "step": 3170 }, { "epoch": 0.0, "learning_rate": 0.00019999824998870054, "loss": 2.5127, "step": 3175 }, { "epoch": 0.0, "learning_rate": 0.00019999824447247786, "loss": 2.5939, "step": 3180 }, { "epoch": 0.0, "learning_rate": 0.00019999823894757506, "loss": 2.5919, "step": 3185 }, { "epoch": 0.0, "learning_rate": 0.0001999982334139922, "loss": 2.5951, "step": 3190 }, { "epoch": 0.0, "learning_rate": 0.00019999822787172917, "loss": 2.7247, "step": 3195 }, { "epoch": 0.0, "learning_rate": 0.00019999822232078605, "loss": 2.6713, "step": 3200 }, { "epoch": 0.0, "learning_rate": 0.00019999821676116284, "loss": 2.4816, "step": 3205 }, { "epoch": 0.0, "learning_rate": 0.0001999982111928595, "loss": 2.5262, "step": 3210 }, { "epoch": 0.0, "learning_rate": 0.00019999820561587608, "loss": 2.7087, "step": 3215 }, { "epoch": 0.0, "learning_rate": 0.00019999820003021253, "loss": 2.8628, "step": 3220 }, { "epoch": 0.0, "learning_rate": 0.00019999819443586889, "loss": 2.6129, "step": 3225 }, { "epoch": 0.0, "learning_rate": 0.00019999818883284514, "loss": 2.3413, "step": 3230 }, { "epoch": 0.0, "learning_rate": 0.00019999818322114128, "loss": 2.6626, "step": 3235 }, { "epoch": 0.0, "learning_rate": 0.0001999981776007573, "loss": 2.6738, "step": 3240 }, { "epoch": 0.0, "learning_rate": 0.00019999817197169324, "loss": 2.5838, "step": 3245 }, { "epoch": 0.0, "learning_rate": 0.00019999816633394906, "loss": 2.5262, "step": 3250 }, { "epoch": 0.0, "learning_rate": 0.0001999981606875248, "loss": 2.852, "step": 3255 }, { "epoch": 0.0, "learning_rate": 0.00019999815503242042, "loss": 2.4902, "step": 3260 }, { "epoch": 0.0, "learning_rate": 0.00019999814936863593, "loss": 2.6218, "step": 3265 }, { "epoch": 0.0, "learning_rate": 0.00019999814369617137, "loss": 2.6053, "step": 3270 }, { "epoch": 0.0, "learning_rate": 0.00019999813801502666, "loss": 2.6452, "step": 3275 }, { "epoch": 0.0, "learning_rate": 0.00019999813232520185, "loss": 2.7253, "step": 3280 }, { "epoch": 0.0, "learning_rate": 0.00019999812662669698, "loss": 2.4042, "step": 3285 }, { "epoch": 0.0, "learning_rate": 0.000199998120919512, "loss": 2.531, "step": 3290 }, { "epoch": 0.0, "learning_rate": 0.00019999811520364692, "loss": 2.5915, "step": 3295 }, { "epoch": 0.0, "learning_rate": 0.0001999981094791017, "loss": 2.7373, "step": 3300 }, { "epoch": 0.0, "learning_rate": 0.00019999810374587642, "loss": 2.6961, "step": 3305 }, { "epoch": 0.0, "learning_rate": 0.00019999809800397105, "loss": 2.6234, "step": 3310 }, { "epoch": 0.0, "learning_rate": 0.00019999809225338555, "loss": 2.7268, "step": 3315 }, { "epoch": 0.0, "learning_rate": 0.00019999808649412, "loss": 2.5442, "step": 3320 }, { "epoch": 0.0, "learning_rate": 0.0001999980807261743, "loss": 2.489, "step": 3325 }, { "epoch": 0.0, "learning_rate": 0.00019999807494954855, "loss": 2.5636, "step": 3330 }, { "epoch": 0.0, "learning_rate": 0.0001999980691642427, "loss": 2.3505, "step": 3335 }, { "epoch": 0.0, "learning_rate": 0.0001999980633702567, "loss": 2.2822, "step": 3340 }, { "epoch": 0.0, "learning_rate": 0.00019999805756759065, "loss": 2.5858, "step": 3345 }, { "epoch": 0.0, "learning_rate": 0.00019999805175624448, "loss": 2.5202, "step": 3350 }, { "epoch": 0.0, "learning_rate": 0.00019999804593621824, "loss": 2.5763, "step": 3355 }, { "epoch": 0.0, "learning_rate": 0.00019999804010751193, "loss": 2.3721, "step": 3360 }, { "epoch": 0.0, "learning_rate": 0.00019999803427012548, "loss": 2.8356, "step": 3365 }, { "epoch": 0.0, "learning_rate": 0.00019999802842405898, "loss": 2.676, "step": 3370 }, { "epoch": 0.0, "learning_rate": 0.00019999802256931234, "loss": 2.6306, "step": 3375 }, { "epoch": 0.0, "learning_rate": 0.00019999801670588565, "loss": 2.5209, "step": 3380 }, { "epoch": 0.0, "learning_rate": 0.00019999801083377884, "loss": 2.4824, "step": 3385 }, { "epoch": 0.0, "learning_rate": 0.00019999800495299196, "loss": 2.8387, "step": 3390 }, { "epoch": 0.0, "learning_rate": 0.00019999799906352496, "loss": 2.8534, "step": 3395 }, { "epoch": 0.0, "learning_rate": 0.00019999799316537792, "loss": 2.3639, "step": 3400 }, { "epoch": 0.0, "learning_rate": 0.00019999798725855078, "loss": 2.6524, "step": 3405 }, { "epoch": 0.0, "learning_rate": 0.00019999798134304352, "loss": 2.8767, "step": 3410 }, { "epoch": 0.0, "learning_rate": 0.0001999979754188562, "loss": 2.4607, "step": 3415 }, { "epoch": 0.0, "learning_rate": 0.00019999796948598877, "loss": 2.6159, "step": 3420 }, { "epoch": 0.0, "learning_rate": 0.00019999796354444127, "loss": 2.5035, "step": 3425 }, { "epoch": 0.0, "learning_rate": 0.00019999795759421368, "loss": 2.5866, "step": 3430 }, { "epoch": 0.0, "learning_rate": 0.000199997951635306, "loss": 2.5947, "step": 3435 }, { "epoch": 0.0, "learning_rate": 0.00019999794566771825, "loss": 2.6712, "step": 3440 }, { "epoch": 0.0, "learning_rate": 0.00019999793969145043, "loss": 2.7967, "step": 3445 }, { "epoch": 0.0, "learning_rate": 0.00019999793370650249, "loss": 2.4733, "step": 3450 }, { "epoch": 0.0, "learning_rate": 0.00019999792771287447, "loss": 2.5811, "step": 3455 }, { "epoch": 0.0, "learning_rate": 0.00019999792171056637, "loss": 2.7599, "step": 3460 }, { "epoch": 0.0, "learning_rate": 0.0001999979156995782, "loss": 2.4645, "step": 3465 }, { "epoch": 0.0, "learning_rate": 0.00019999790967990995, "loss": 2.7224, "step": 3470 }, { "epoch": 0.0, "learning_rate": 0.0001999979036515616, "loss": 2.5087, "step": 3475 }, { "epoch": 0.0, "learning_rate": 0.00019999789761453317, "loss": 2.8189, "step": 3480 }, { "epoch": 0.0, "learning_rate": 0.00019999789156882467, "loss": 2.5146, "step": 3485 }, { "epoch": 0.0, "learning_rate": 0.0001999978855144361, "loss": 2.7451, "step": 3490 }, { "epoch": 0.0, "learning_rate": 0.00019999787945136746, "loss": 2.6152, "step": 3495 }, { "epoch": 0.0, "learning_rate": 0.0001999978733796187, "loss": 2.399, "step": 3500 }, { "epoch": 0.0, "learning_rate": 0.00019999786729918987, "loss": 2.5335, "step": 3505 }, { "epoch": 0.0, "learning_rate": 0.000199997861210081, "loss": 2.4403, "step": 3510 }, { "epoch": 0.0, "learning_rate": 0.00019999785511229203, "loss": 2.6851, "step": 3515 }, { "epoch": 0.0, "learning_rate": 0.00019999784900582296, "loss": 2.7007, "step": 3520 }, { "epoch": 0.0, "learning_rate": 0.00019999784289067386, "loss": 2.4623, "step": 3525 }, { "epoch": 0.0, "learning_rate": 0.00019999783676684466, "loss": 2.5301, "step": 3530 }, { "epoch": 0.0, "learning_rate": 0.0001999978306343354, "loss": 2.2027, "step": 3535 }, { "epoch": 0.0, "learning_rate": 0.000199997824493146, "loss": 2.6527, "step": 3540 }, { "epoch": 0.0, "learning_rate": 0.0001999978183432766, "loss": 2.631, "step": 3545 }, { "epoch": 0.0, "learning_rate": 0.0001999978121847271, "loss": 2.5573, "step": 3550 }, { "epoch": 0.0, "learning_rate": 0.00019999780601749753, "loss": 2.4258, "step": 3555 }, { "epoch": 0.0, "learning_rate": 0.0001999977998415879, "loss": 2.6852, "step": 3560 }, { "epoch": 0.0, "learning_rate": 0.0001999977936569982, "loss": 2.5886, "step": 3565 }, { "epoch": 0.0, "learning_rate": 0.00019999778746372838, "loss": 2.6689, "step": 3570 }, { "epoch": 0.0, "learning_rate": 0.00019999778126177854, "loss": 2.6879, "step": 3575 }, { "epoch": 0.0, "learning_rate": 0.00019999777505114863, "loss": 2.6052, "step": 3580 }, { "epoch": 0.0, "learning_rate": 0.00019999776883183862, "loss": 2.5364, "step": 3585 }, { "epoch": 0.0, "learning_rate": 0.00019999776260384858, "loss": 2.7237, "step": 3590 }, { "epoch": 0.0, "learning_rate": 0.00019999775636717843, "loss": 2.6191, "step": 3595 }, { "epoch": 0.0, "learning_rate": 0.00019999775012182825, "loss": 2.6068, "step": 3600 }, { "epoch": 0.0, "learning_rate": 0.00019999774386779797, "loss": 2.4453, "step": 3605 }, { "epoch": 0.0, "learning_rate": 0.00019999773760508762, "loss": 2.6097, "step": 3610 }, { "epoch": 0.0, "learning_rate": 0.00019999773133369723, "loss": 2.4836, "step": 3615 }, { "epoch": 0.0, "learning_rate": 0.00019999772505362677, "loss": 2.7328, "step": 3620 }, { "epoch": 0.0, "learning_rate": 0.00019999771876487622, "loss": 2.6265, "step": 3625 }, { "epoch": 0.0, "learning_rate": 0.00019999771246744565, "loss": 2.5548, "step": 3630 }, { "epoch": 0.0, "learning_rate": 0.00019999770616133496, "loss": 2.4861, "step": 3635 }, { "epoch": 0.0, "learning_rate": 0.00019999769984654426, "loss": 2.6367, "step": 3640 }, { "epoch": 0.0, "learning_rate": 0.00019999769352307346, "loss": 2.5729, "step": 3645 }, { "epoch": 0.0, "learning_rate": 0.0001999976871909226, "loss": 2.6693, "step": 3650 }, { "epoch": 0.0, "learning_rate": 0.0001999976808500917, "loss": 2.6048, "step": 3655 }, { "epoch": 0.0, "learning_rate": 0.0001999976745005807, "loss": 2.5641, "step": 3660 }, { "epoch": 0.0, "learning_rate": 0.00019999766814238968, "loss": 2.5885, "step": 3665 }, { "epoch": 0.0, "learning_rate": 0.0001999976617755186, "loss": 2.7146, "step": 3670 }, { "epoch": 0.0, "learning_rate": 0.00019999765539996744, "loss": 2.632, "step": 3675 }, { "epoch": 0.0, "learning_rate": 0.00019999764901573624, "loss": 2.4664, "step": 3680 }, { "epoch": 0.0, "learning_rate": 0.00019999764262282495, "loss": 2.2652, "step": 3685 }, { "epoch": 0.0, "learning_rate": 0.00019999763622123362, "loss": 2.5475, "step": 3690 }, { "epoch": 0.0, "learning_rate": 0.00019999762981096224, "loss": 2.5403, "step": 3695 }, { "epoch": 0.0, "learning_rate": 0.0001999976233920108, "loss": 2.8656, "step": 3700 }, { "epoch": 0.0, "learning_rate": 0.0001999976169643793, "loss": 2.6758, "step": 3705 }, { "epoch": 0.0, "learning_rate": 0.00019999761052806773, "loss": 2.4518, "step": 3710 }, { "epoch": 0.0, "learning_rate": 0.00019999760408307614, "loss": 2.5698, "step": 3715 }, { "epoch": 0.0, "learning_rate": 0.00019999759762940445, "loss": 2.5766, "step": 3720 }, { "epoch": 0.0, "learning_rate": 0.00019999759116705272, "loss": 2.6878, "step": 3725 }, { "epoch": 0.0, "learning_rate": 0.00019999758469602097, "loss": 2.4704, "step": 3730 }, { "epoch": 0.0, "learning_rate": 0.00019999757821630913, "loss": 2.56, "step": 3735 }, { "epoch": 0.0, "learning_rate": 0.00019999757172791725, "loss": 2.4563, "step": 3740 }, { "epoch": 0.0, "learning_rate": 0.00019999756523084533, "loss": 2.2081, "step": 3745 }, { "epoch": 0.0, "learning_rate": 0.00019999755872509333, "loss": 2.6101, "step": 3750 }, { "epoch": 0.0, "learning_rate": 0.00019999755221066127, "loss": 2.5998, "step": 3755 }, { "epoch": 0.0, "learning_rate": 0.0001999975456875492, "loss": 2.4085, "step": 3760 }, { "epoch": 0.0, "learning_rate": 0.00019999753915575706, "loss": 2.3679, "step": 3765 }, { "epoch": 0.0, "learning_rate": 0.0001999975326152849, "loss": 2.7311, "step": 3770 }, { "epoch": 0.0, "learning_rate": 0.00019999752606613265, "loss": 2.6587, "step": 3775 }, { "epoch": 0.0, "learning_rate": 0.00019999751950830038, "loss": 2.4725, "step": 3780 }, { "epoch": 0.0, "learning_rate": 0.00019999751294178805, "loss": 2.5657, "step": 3785 }, { "epoch": 0.0, "learning_rate": 0.00019999750636659567, "loss": 2.4418, "step": 3790 }, { "epoch": 0.0, "learning_rate": 0.00019999749978272325, "loss": 2.4117, "step": 3795 }, { "epoch": 0.0, "learning_rate": 0.0001999974931901708, "loss": 2.6565, "step": 3800 }, { "epoch": 0.0, "learning_rate": 0.00019999748658893827, "loss": 2.6, "step": 3805 }, { "epoch": 0.0, "learning_rate": 0.00019999747997902573, "loss": 2.4808, "step": 3810 }, { "epoch": 0.0, "learning_rate": 0.00019999747336043312, "loss": 2.4266, "step": 3815 }, { "epoch": 0.0, "learning_rate": 0.00019999746673316046, "loss": 2.5674, "step": 3820 }, { "epoch": 0.0, "learning_rate": 0.00019999746009720777, "loss": 2.6225, "step": 3825 }, { "epoch": 0.0, "learning_rate": 0.00019999745345257504, "loss": 2.7522, "step": 3830 }, { "epoch": 0.0, "learning_rate": 0.00019999744679926226, "loss": 2.4364, "step": 3835 }, { "epoch": 0.0, "learning_rate": 0.00019999744013726947, "loss": 2.5721, "step": 3840 }, { "epoch": 0.0, "learning_rate": 0.00019999743346659662, "loss": 2.4399, "step": 3845 }, { "epoch": 0.0, "learning_rate": 0.00019999742678724372, "loss": 2.671, "step": 3850 }, { "epoch": 0.0, "learning_rate": 0.00019999742009921078, "loss": 2.4909, "step": 3855 }, { "epoch": 0.0, "learning_rate": 0.0001999974134024978, "loss": 2.5082, "step": 3860 }, { "epoch": 0.0, "learning_rate": 0.00019999740669710478, "loss": 2.375, "step": 3865 }, { "epoch": 0.0, "learning_rate": 0.00019999739998303177, "loss": 2.6448, "step": 3870 }, { "epoch": 0.0, "learning_rate": 0.00019999739326027866, "loss": 2.5758, "step": 3875 }, { "epoch": 0.0, "learning_rate": 0.00019999738652884554, "loss": 2.512, "step": 3880 }, { "epoch": 0.0, "learning_rate": 0.00019999737978873238, "loss": 2.4797, "step": 3885 }, { "epoch": 0.0, "learning_rate": 0.0001999973730399392, "loss": 2.4649, "step": 3890 }, { "epoch": 0.0, "learning_rate": 0.00019999736628246594, "loss": 2.6255, "step": 3895 }, { "epoch": 0.0, "learning_rate": 0.0001999973595163127, "loss": 2.2548, "step": 3900 }, { "epoch": 0.0, "learning_rate": 0.00019999735274147938, "loss": 2.485, "step": 3905 }, { "epoch": 0.0, "learning_rate": 0.0001999973459579661, "loss": 2.5685, "step": 3910 }, { "epoch": 0.0, "learning_rate": 0.0001999973391657727, "loss": 2.6105, "step": 3915 }, { "epoch": 0.0, "learning_rate": 0.00019999733236489932, "loss": 2.7149, "step": 3920 }, { "epoch": 0.0, "learning_rate": 0.0001999973255553459, "loss": 2.6762, "step": 3925 }, { "epoch": 0.0, "learning_rate": 0.00019999731873711246, "loss": 2.3135, "step": 3930 }, { "epoch": 0.0, "learning_rate": 0.00019999731191019897, "loss": 2.4766, "step": 3935 }, { "epoch": 0.0, "learning_rate": 0.00019999730507460546, "loss": 2.6979, "step": 3940 }, { "epoch": 0.0, "learning_rate": 0.0001999972982303319, "loss": 2.3291, "step": 3945 }, { "epoch": 0.0, "learning_rate": 0.00019999729137737835, "loss": 2.4732, "step": 3950 }, { "epoch": 0.0, "learning_rate": 0.00019999728451574472, "loss": 2.8404, "step": 3955 }, { "epoch": 0.0, "learning_rate": 0.00019999727764543113, "loss": 2.4579, "step": 3960 }, { "epoch": 0.0, "learning_rate": 0.00019999727076643747, "loss": 2.5311, "step": 3965 }, { "epoch": 0.0, "learning_rate": 0.00019999726387876377, "loss": 2.677, "step": 3970 }, { "epoch": 0.0, "learning_rate": 0.00019999725698241009, "loss": 2.7106, "step": 3975 }, { "epoch": 0.0, "learning_rate": 0.00019999725007737636, "loss": 2.4307, "step": 3980 }, { "epoch": 0.0, "learning_rate": 0.00019999724316366262, "loss": 2.5786, "step": 3985 }, { "epoch": 0.0, "learning_rate": 0.00019999723624126886, "loss": 2.4742, "step": 3990 }, { "epoch": 0.0, "learning_rate": 0.00019999722931019504, "loss": 2.7372, "step": 3995 }, { "epoch": 0.0, "learning_rate": 0.0001999972223704412, "loss": 2.4513, "step": 4000 }, { "epoch": 0.0, "learning_rate": 0.00019999721542200737, "loss": 2.5488, "step": 4005 }, { "epoch": 0.0, "learning_rate": 0.0001999972084648935, "loss": 2.5753, "step": 4010 }, { "epoch": 0.0, "learning_rate": 0.00019999720149909962, "loss": 2.6319, "step": 4015 }, { "epoch": 0.0, "learning_rate": 0.00019999719452462573, "loss": 2.5538, "step": 4020 }, { "epoch": 0.0, "learning_rate": 0.00019999718754147182, "loss": 2.4668, "step": 4025 }, { "epoch": 0.0, "learning_rate": 0.00019999718054963787, "loss": 2.6052, "step": 4030 }, { "epoch": 0.0, "learning_rate": 0.0001999971735491239, "loss": 2.3874, "step": 4035 }, { "epoch": 0.0, "learning_rate": 0.00019999716653992992, "loss": 2.62, "step": 4040 }, { "epoch": 0.0, "learning_rate": 0.00019999715952205593, "loss": 2.6644, "step": 4045 }, { "epoch": 0.0, "learning_rate": 0.00019999715249550192, "loss": 2.4428, "step": 4050 }, { "epoch": 0.0, "learning_rate": 0.0001999971454602679, "loss": 2.6147, "step": 4055 }, { "epoch": 0.0, "learning_rate": 0.00019999713841635386, "loss": 2.639, "step": 4060 }, { "epoch": 0.0, "learning_rate": 0.0001999971313637598, "loss": 2.6309, "step": 4065 }, { "epoch": 0.0, "learning_rate": 0.00019999712430248575, "loss": 2.373, "step": 4070 }, { "epoch": 0.0, "learning_rate": 0.00019999711723253167, "loss": 2.6889, "step": 4075 }, { "epoch": 0.0, "learning_rate": 0.00019999711015389755, "loss": 2.3853, "step": 4080 }, { "epoch": 0.0, "learning_rate": 0.00019999710306658347, "loss": 2.4082, "step": 4085 }, { "epoch": 0.0, "learning_rate": 0.00019999709597058932, "loss": 2.5107, "step": 4090 }, { "epoch": 0.0, "learning_rate": 0.0001999970888659152, "loss": 2.7697, "step": 4095 }, { "epoch": 0.0, "learning_rate": 0.00019999708175256106, "loss": 2.4964, "step": 4100 }, { "epoch": 0.0, "learning_rate": 0.00019999707463052695, "loss": 2.4221, "step": 4105 }, { "epoch": 0.0, "learning_rate": 0.00019999706749981278, "loss": 2.6319, "step": 4110 }, { "epoch": 0.0, "learning_rate": 0.0001999970603604186, "loss": 2.6704, "step": 4115 }, { "epoch": 0.0, "learning_rate": 0.00019999705321234443, "loss": 2.5134, "step": 4120 }, { "epoch": 0.0, "learning_rate": 0.00019999704605559027, "loss": 2.6706, "step": 4125 }, { "epoch": 0.0, "learning_rate": 0.00019999703889015606, "loss": 2.5624, "step": 4130 }, { "epoch": 0.0, "learning_rate": 0.00019999703171604186, "loss": 2.2552, "step": 4135 }, { "epoch": 0.0, "learning_rate": 0.00019999702453324768, "loss": 2.528, "step": 4140 }, { "epoch": 0.0, "learning_rate": 0.0001999970173417735, "loss": 2.7461, "step": 4145 }, { "epoch": 0.0, "learning_rate": 0.00019999701014161928, "loss": 2.5758, "step": 4150 }, { "epoch": 0.0, "learning_rate": 0.00019999700293278508, "loss": 2.63, "step": 4155 }, { "epoch": 0.0, "learning_rate": 0.0001999969957152709, "loss": 2.4362, "step": 4160 }, { "epoch": 0.0, "learning_rate": 0.00019999698848907667, "loss": 2.4211, "step": 4165 }, { "epoch": 0.0, "learning_rate": 0.00019999698125420246, "loss": 2.4353, "step": 4170 }, { "epoch": 0.0, "learning_rate": 0.00019999697401064823, "loss": 2.7979, "step": 4175 }, { "epoch": 0.0, "learning_rate": 0.00019999696675841402, "loss": 2.6134, "step": 4180 }, { "epoch": 0.0, "learning_rate": 0.0001999969594974998, "loss": 2.4935, "step": 4185 }, { "epoch": 0.0, "learning_rate": 0.00019999695222790558, "loss": 2.5087, "step": 4190 }, { "epoch": 0.0, "learning_rate": 0.00019999694494963137, "loss": 2.7233, "step": 4195 }, { "epoch": 0.0, "learning_rate": 0.00019999693766267719, "loss": 2.6594, "step": 4200 }, { "epoch": 0.0, "learning_rate": 0.00019999693036704296, "loss": 2.5277, "step": 4205 }, { "epoch": 0.0, "learning_rate": 0.00019999692306272877, "loss": 2.413, "step": 4210 }, { "epoch": 0.01, "learning_rate": 0.00019999691574973456, "loss": 2.4858, "step": 4215 }, { "epoch": 0.01, "learning_rate": 0.00019999690842806037, "loss": 2.4431, "step": 4220 }, { "epoch": 0.01, "learning_rate": 0.0001999969010977062, "loss": 2.2495, "step": 4225 }, { "epoch": 0.01, "learning_rate": 0.000199996893758672, "loss": 2.5721, "step": 4230 }, { "epoch": 0.01, "learning_rate": 0.00019999688641095785, "loss": 2.7159, "step": 4235 }, { "epoch": 0.01, "learning_rate": 0.00019999687905456365, "loss": 2.5314, "step": 4240 }, { "epoch": 0.01, "learning_rate": 0.0001999968716894895, "loss": 2.6268, "step": 4245 }, { "epoch": 0.01, "learning_rate": 0.00019999686431573536, "loss": 2.4966, "step": 4250 }, { "epoch": 0.01, "learning_rate": 0.00019999685693330123, "loss": 2.3179, "step": 4255 }, { "epoch": 0.01, "learning_rate": 0.0001999968495421871, "loss": 2.7163, "step": 4260 }, { "epoch": 0.01, "learning_rate": 0.00019999684214239298, "loss": 2.3001, "step": 4265 }, { "epoch": 0.01, "learning_rate": 0.00019999683473391887, "loss": 2.6254, "step": 4270 }, { "epoch": 0.01, "learning_rate": 0.00019999682731676477, "loss": 2.468, "step": 4275 }, { "epoch": 0.01, "learning_rate": 0.00019999681989093067, "loss": 2.5722, "step": 4280 }, { "epoch": 0.01, "learning_rate": 0.00019999681245641662, "loss": 2.6148, "step": 4285 }, { "epoch": 0.01, "learning_rate": 0.00019999680501322256, "loss": 2.4819, "step": 4290 }, { "epoch": 0.01, "learning_rate": 0.00019999679756134854, "loss": 2.6615, "step": 4295 }, { "epoch": 0.01, "learning_rate": 0.0001999967901007945, "loss": 2.5589, "step": 4300 }, { "epoch": 0.01, "learning_rate": 0.0001999967826315605, "loss": 2.6975, "step": 4305 }, { "epoch": 0.01, "learning_rate": 0.00019999677515364649, "loss": 2.5358, "step": 4310 }, { "epoch": 0.01, "learning_rate": 0.00019999676766705254, "loss": 2.6952, "step": 4315 }, { "epoch": 0.01, "learning_rate": 0.00019999676017177858, "loss": 2.4479, "step": 4320 }, { "epoch": 0.01, "learning_rate": 0.00019999675266782463, "loss": 2.5406, "step": 4325 }, { "epoch": 0.01, "learning_rate": 0.0001999967451551907, "loss": 2.7184, "step": 4330 }, { "epoch": 0.01, "learning_rate": 0.0001999967376338768, "loss": 2.6204, "step": 4335 }, { "epoch": 0.01, "learning_rate": 0.00019999673010388292, "loss": 2.4248, "step": 4340 }, { "epoch": 0.01, "learning_rate": 0.00019999672256520908, "loss": 2.5927, "step": 4345 }, { "epoch": 0.01, "learning_rate": 0.00019999671501785523, "loss": 2.5201, "step": 4350 }, { "epoch": 0.01, "learning_rate": 0.0001999967074618214, "loss": 2.6657, "step": 4355 }, { "epoch": 0.01, "learning_rate": 0.00019999669989710764, "loss": 2.3259, "step": 4360 }, { "epoch": 0.01, "learning_rate": 0.00019999669232371385, "loss": 2.7273, "step": 4365 }, { "epoch": 0.01, "learning_rate": 0.0001999966847416401, "loss": 2.7191, "step": 4370 }, { "epoch": 0.01, "learning_rate": 0.0001999966771508864, "loss": 2.5581, "step": 4375 }, { "epoch": 0.01, "learning_rate": 0.00019999666955145272, "loss": 2.721, "step": 4380 }, { "epoch": 0.01, "learning_rate": 0.000199996661943339, "loss": 2.5538, "step": 4385 }, { "epoch": 0.01, "learning_rate": 0.00019999665432654537, "loss": 2.5718, "step": 4390 }, { "epoch": 0.01, "learning_rate": 0.0001999966467010718, "loss": 2.435, "step": 4395 }, { "epoch": 0.01, "learning_rate": 0.0001999966390669182, "loss": 2.438, "step": 4400 }, { "epoch": 0.01, "learning_rate": 0.00019999663142408465, "loss": 2.6362, "step": 4405 }, { "epoch": 0.01, "learning_rate": 0.0001999966237725711, "loss": 2.6154, "step": 4410 }, { "epoch": 0.01, "learning_rate": 0.00019999661611237762, "loss": 2.343, "step": 4415 }, { "epoch": 0.01, "learning_rate": 0.00019999660844350413, "loss": 2.4809, "step": 4420 }, { "epoch": 0.01, "learning_rate": 0.00019999660076595071, "loss": 2.4247, "step": 4425 }, { "epoch": 0.01, "learning_rate": 0.0001999965930797173, "loss": 2.5994, "step": 4430 }, { "epoch": 0.01, "learning_rate": 0.00019999658538480395, "loss": 2.573, "step": 4435 }, { "epoch": 0.01, "learning_rate": 0.00019999657768121062, "loss": 2.585, "step": 4440 }, { "epoch": 0.01, "learning_rate": 0.0001999965699689373, "loss": 2.7138, "step": 4445 }, { "epoch": 0.01, "learning_rate": 0.00019999656224798407, "loss": 2.4242, "step": 4450 }, { "epoch": 0.01, "learning_rate": 0.0001999965545183508, "loss": 2.6523, "step": 4455 }, { "epoch": 0.01, "learning_rate": 0.00019999654678003764, "loss": 2.5392, "step": 4460 }, { "epoch": 0.01, "learning_rate": 0.00019999653903304446, "loss": 2.4328, "step": 4465 }, { "epoch": 0.01, "learning_rate": 0.00019999653127737138, "loss": 2.4089, "step": 4470 }, { "epoch": 0.01, "learning_rate": 0.00019999652351301828, "loss": 2.6982, "step": 4475 }, { "epoch": 0.01, "learning_rate": 0.00019999651573998522, "loss": 2.6046, "step": 4480 }, { "epoch": 0.01, "learning_rate": 0.00019999650795827223, "loss": 2.5182, "step": 4485 }, { "epoch": 0.01, "learning_rate": 0.00019999650016787928, "loss": 2.8916, "step": 4490 }, { "epoch": 0.01, "learning_rate": 0.00019999649236880634, "loss": 2.6526, "step": 4495 }, { "epoch": 0.01, "learning_rate": 0.0001999964845610535, "loss": 2.5543, "step": 4500 }, { "epoch": 0.01, "learning_rate": 0.00019999647674462064, "loss": 2.5293, "step": 4505 }, { "epoch": 0.01, "learning_rate": 0.00019999646891950785, "loss": 2.4421, "step": 4510 }, { "epoch": 0.01, "learning_rate": 0.0001999964610857151, "loss": 2.8573, "step": 4515 }, { "epoch": 0.01, "learning_rate": 0.0001999964532432424, "loss": 2.5611, "step": 4520 }, { "epoch": 0.01, "learning_rate": 0.00019999644539208972, "loss": 2.4044, "step": 4525 }, { "epoch": 0.01, "learning_rate": 0.00019999643753225712, "loss": 2.6072, "step": 4530 }, { "epoch": 0.01, "learning_rate": 0.00019999642966374458, "loss": 2.5995, "step": 4535 }, { "epoch": 0.01, "learning_rate": 0.00019999642178655203, "loss": 2.5983, "step": 4540 }, { "epoch": 0.01, "learning_rate": 0.00019999641390067957, "loss": 2.431, "step": 4545 }, { "epoch": 0.01, "learning_rate": 0.00019999640600612716, "loss": 2.4112, "step": 4550 }, { "epoch": 0.01, "learning_rate": 0.00019999639810289475, "loss": 2.7427, "step": 4555 }, { "epoch": 0.01, "learning_rate": 0.00019999639019098244, "loss": 2.4633, "step": 4560 }, { "epoch": 0.01, "learning_rate": 0.00019999638227039017, "loss": 2.7051, "step": 4565 }, { "epoch": 0.01, "learning_rate": 0.00019999637434111795, "loss": 2.6208, "step": 4570 }, { "epoch": 0.01, "learning_rate": 0.00019999636640316578, "loss": 2.3909, "step": 4575 }, { "epoch": 0.01, "learning_rate": 0.00019999635845653366, "loss": 2.6401, "step": 4580 }, { "epoch": 0.01, "learning_rate": 0.00019999635050122158, "loss": 2.544, "step": 4585 }, { "epoch": 0.01, "learning_rate": 0.0001999963425372296, "loss": 2.4398, "step": 4590 }, { "epoch": 0.01, "learning_rate": 0.00019999633456455765, "loss": 2.5029, "step": 4595 }, { "epoch": 0.01, "learning_rate": 0.00019999632658320574, "loss": 2.4968, "step": 4600 }, { "epoch": 0.01, "learning_rate": 0.0001999963185931739, "loss": 2.595, "step": 4605 }, { "epoch": 0.01, "learning_rate": 0.0001999963105944621, "loss": 2.5892, "step": 4610 }, { "epoch": 0.01, "learning_rate": 0.00019999630258707036, "loss": 2.418, "step": 4615 }, { "epoch": 0.01, "learning_rate": 0.0001999962945709987, "loss": 2.3198, "step": 4620 }, { "epoch": 0.01, "learning_rate": 0.0001999962865462471, "loss": 2.5866, "step": 4625 }, { "epoch": 0.01, "learning_rate": 0.00019999627851281554, "loss": 2.6689, "step": 4630 }, { "epoch": 0.01, "learning_rate": 0.00019999627047070405, "loss": 2.6745, "step": 4635 }, { "epoch": 0.01, "learning_rate": 0.0001999962624199126, "loss": 2.5815, "step": 4640 }, { "epoch": 0.01, "learning_rate": 0.00019999625436044124, "loss": 2.4527, "step": 4645 }, { "epoch": 0.01, "learning_rate": 0.00019999624629228992, "loss": 2.3316, "step": 4650 }, { "epoch": 0.01, "learning_rate": 0.00019999623821545867, "loss": 2.6772, "step": 4655 }, { "epoch": 0.01, "learning_rate": 0.00019999623012994752, "loss": 2.5055, "step": 4660 }, { "epoch": 0.01, "learning_rate": 0.0001999962220357564, "loss": 2.6098, "step": 4665 }, { "epoch": 0.01, "learning_rate": 0.00019999621393288535, "loss": 2.5823, "step": 4670 }, { "epoch": 0.01, "learning_rate": 0.00019999620582133437, "loss": 2.5924, "step": 4675 }, { "epoch": 0.01, "learning_rate": 0.00019999619770110343, "loss": 2.5865, "step": 4680 }, { "epoch": 0.01, "learning_rate": 0.0001999961895721926, "loss": 2.4947, "step": 4685 }, { "epoch": 0.01, "learning_rate": 0.00019999618143460183, "loss": 2.5983, "step": 4690 }, { "epoch": 0.01, "learning_rate": 0.0001999961732883311, "loss": 2.3177, "step": 4695 }, { "epoch": 0.01, "learning_rate": 0.00019999616513338044, "loss": 2.4433, "step": 4700 }, { "epoch": 0.01, "learning_rate": 0.00019999615696974986, "loss": 2.5015, "step": 4705 }, { "epoch": 0.01, "learning_rate": 0.00019999614879743935, "loss": 2.618, "step": 4710 }, { "epoch": 0.01, "learning_rate": 0.0001999961406164489, "loss": 2.2983, "step": 4715 }, { "epoch": 0.01, "learning_rate": 0.00019999613242677856, "loss": 2.4485, "step": 4720 }, { "epoch": 0.01, "learning_rate": 0.00019999612422842827, "loss": 2.506, "step": 4725 }, { "epoch": 0.01, "learning_rate": 0.00019999611602139808, "loss": 2.7101, "step": 4730 }, { "epoch": 0.01, "learning_rate": 0.0001999961078056879, "loss": 2.6012, "step": 4735 }, { "epoch": 0.01, "learning_rate": 0.00019999609958129785, "loss": 2.6079, "step": 4740 }, { "epoch": 0.01, "learning_rate": 0.00019999609134822787, "loss": 2.6829, "step": 4745 }, { "epoch": 0.01, "learning_rate": 0.00019999608310647794, "loss": 2.6195, "step": 4750 }, { "epoch": 0.01, "learning_rate": 0.00019999607485604812, "loss": 2.3357, "step": 4755 }, { "epoch": 0.01, "learning_rate": 0.00019999606659693836, "loss": 2.6011, "step": 4760 }, { "epoch": 0.01, "learning_rate": 0.0001999960583291487, "loss": 2.7226, "step": 4765 }, { "epoch": 0.01, "learning_rate": 0.0001999960500526791, "loss": 2.5665, "step": 4770 }, { "epoch": 0.01, "learning_rate": 0.00019999604176752957, "loss": 2.5458, "step": 4775 }, { "epoch": 0.01, "learning_rate": 0.00019999603347370013, "loss": 2.6584, "step": 4780 }, { "epoch": 0.01, "learning_rate": 0.00019999602517119076, "loss": 2.8255, "step": 4785 }, { "epoch": 0.01, "learning_rate": 0.00019999601686000151, "loss": 2.5149, "step": 4790 }, { "epoch": 0.01, "learning_rate": 0.0001999960085401323, "loss": 2.5149, "step": 4795 }, { "epoch": 0.01, "learning_rate": 0.0001999960002115832, "loss": 2.529, "step": 4800 }, { "epoch": 0.01, "learning_rate": 0.00019999599187435417, "loss": 2.4177, "step": 4805 }, { "epoch": 0.01, "learning_rate": 0.00019999598352844525, "loss": 2.8094, "step": 4810 }, { "epoch": 0.01, "learning_rate": 0.0001999959751738564, "loss": 2.726, "step": 4815 }, { "epoch": 0.01, "learning_rate": 0.0001999959668105876, "loss": 2.493, "step": 4820 }, { "epoch": 0.01, "learning_rate": 0.00019999595843863893, "loss": 2.4468, "step": 4825 }, { "epoch": 0.01, "learning_rate": 0.00019999595005801036, "loss": 2.451, "step": 4830 }, { "epoch": 0.01, "learning_rate": 0.00019999594166870185, "loss": 2.4086, "step": 4835 }, { "epoch": 0.01, "learning_rate": 0.00019999593327071344, "loss": 2.6423, "step": 4840 }, { "epoch": 0.01, "learning_rate": 0.00019999592486404512, "loss": 2.6097, "step": 4845 }, { "epoch": 0.01, "learning_rate": 0.0001999959164486969, "loss": 2.7155, "step": 4850 }, { "epoch": 0.01, "learning_rate": 0.00019999590802466876, "loss": 2.3753, "step": 4855 }, { "epoch": 0.01, "learning_rate": 0.0001999958995919607, "loss": 2.7185, "step": 4860 }, { "epoch": 0.01, "learning_rate": 0.00019999589115057275, "loss": 2.565, "step": 4865 }, { "epoch": 0.01, "learning_rate": 0.00019999588270050488, "loss": 2.4212, "step": 4870 }, { "epoch": 0.01, "learning_rate": 0.00019999587424175715, "loss": 2.5576, "step": 4875 }, { "epoch": 0.01, "learning_rate": 0.00019999586577432944, "loss": 2.5254, "step": 4880 }, { "epoch": 0.01, "learning_rate": 0.0001999958572982219, "loss": 2.3984, "step": 4885 }, { "epoch": 0.01, "learning_rate": 0.0001999958488134344, "loss": 2.5082, "step": 4890 }, { "epoch": 0.01, "learning_rate": 0.00019999584031996702, "loss": 2.6574, "step": 4895 }, { "epoch": 0.01, "learning_rate": 0.00019999583181781977, "loss": 2.3945, "step": 4900 }, { "epoch": 0.01, "learning_rate": 0.00019999582330699255, "loss": 2.5044, "step": 4905 }, { "epoch": 0.01, "learning_rate": 0.0001999958147874855, "loss": 2.6875, "step": 4910 }, { "epoch": 0.01, "learning_rate": 0.0001999958062592985, "loss": 2.5123, "step": 4915 }, { "epoch": 0.01, "learning_rate": 0.00019999579772243163, "loss": 2.4858, "step": 4920 }, { "epoch": 0.01, "learning_rate": 0.00019999578917688485, "loss": 2.3913, "step": 4925 }, { "epoch": 0.01, "learning_rate": 0.0001999957806226582, "loss": 2.6742, "step": 4930 }, { "epoch": 0.01, "learning_rate": 0.0001999957720597516, "loss": 2.7184, "step": 4935 }, { "epoch": 0.01, "learning_rate": 0.00019999576348816514, "loss": 2.5892, "step": 4940 }, { "epoch": 0.01, "learning_rate": 0.00019999575490789876, "loss": 2.6998, "step": 4945 }, { "epoch": 0.01, "learning_rate": 0.00019999574631895253, "loss": 2.4694, "step": 4950 }, { "epoch": 0.01, "learning_rate": 0.00019999573772132637, "loss": 2.569, "step": 4955 }, { "epoch": 0.01, "learning_rate": 0.00019999572911502032, "loss": 2.3539, "step": 4960 }, { "epoch": 0.01, "learning_rate": 0.0001999957205000344, "loss": 2.2503, "step": 4965 }, { "epoch": 0.01, "learning_rate": 0.00019999571187636857, "loss": 2.4465, "step": 4970 }, { "epoch": 0.01, "learning_rate": 0.00019999570324402287, "loss": 2.6047, "step": 4975 }, { "epoch": 0.01, "learning_rate": 0.00019999569460299723, "loss": 2.6365, "step": 4980 }, { "epoch": 0.01, "learning_rate": 0.00019999568595329174, "loss": 2.5386, "step": 4985 }, { "epoch": 0.01, "learning_rate": 0.00019999567729490637, "loss": 2.5822, "step": 4990 }, { "epoch": 0.01, "learning_rate": 0.00019999566862784107, "loss": 2.411, "step": 4995 }, { "epoch": 0.01, "learning_rate": 0.0001999956599520959, "loss": 2.9174, "step": 5000 }, { "epoch": 0.01, "learning_rate": 0.00019999565126767088, "loss": 2.4363, "step": 5005 }, { "epoch": 0.01, "learning_rate": 0.00019999564257456594, "loss": 2.4856, "step": 5010 }, { "epoch": 0.01, "learning_rate": 0.00019999563387278113, "loss": 2.4739, "step": 5015 }, { "epoch": 0.01, "learning_rate": 0.00019999562516231643, "loss": 2.6323, "step": 5020 }, { "epoch": 0.01, "learning_rate": 0.00019999561644317186, "loss": 2.8467, "step": 5025 }, { "epoch": 0.01, "learning_rate": 0.0001999956077153474, "loss": 2.4187, "step": 5030 }, { "epoch": 0.01, "learning_rate": 0.00019999559897884305, "loss": 2.5242, "step": 5035 }, { "epoch": 0.01, "learning_rate": 0.0001999955902336588, "loss": 2.494, "step": 5040 }, { "epoch": 0.01, "learning_rate": 0.00019999558147979475, "loss": 2.4001, "step": 5045 }, { "epoch": 0.01, "learning_rate": 0.00019999557271725073, "loss": 2.6845, "step": 5050 }, { "epoch": 0.01, "learning_rate": 0.00019999556394602688, "loss": 2.5759, "step": 5055 }, { "epoch": 0.01, "learning_rate": 0.00019999555516612313, "loss": 2.5307, "step": 5060 }, { "epoch": 0.01, "learning_rate": 0.0001999955463775395, "loss": 2.4896, "step": 5065 }, { "epoch": 0.01, "learning_rate": 0.00019999553758027602, "loss": 2.4706, "step": 5070 }, { "epoch": 0.01, "learning_rate": 0.00019999552877433265, "loss": 2.488, "step": 5075 }, { "epoch": 0.01, "learning_rate": 0.0001999955199597094, "loss": 2.5807, "step": 5080 }, { "epoch": 0.01, "learning_rate": 0.0001999955111364063, "loss": 2.6193, "step": 5085 }, { "epoch": 0.01, "learning_rate": 0.0001999955023044233, "loss": 2.4777, "step": 5090 }, { "epoch": 0.01, "learning_rate": 0.00019999549346376045, "loss": 2.662, "step": 5095 }, { "epoch": 0.01, "learning_rate": 0.0001999954846144177, "loss": 2.2384, "step": 5100 }, { "epoch": 0.01, "learning_rate": 0.0001999954757563951, "loss": 2.4218, "step": 5105 }, { "epoch": 0.01, "learning_rate": 0.0001999954668896926, "loss": 2.507, "step": 5110 }, { "epoch": 0.01, "learning_rate": 0.00019999545801431027, "loss": 2.4263, "step": 5115 }, { "epoch": 0.01, "learning_rate": 0.00019999544913024806, "loss": 2.6976, "step": 5120 }, { "epoch": 0.01, "learning_rate": 0.000199995440237506, "loss": 2.6457, "step": 5125 }, { "epoch": 0.01, "learning_rate": 0.00019999543133608402, "loss": 2.4234, "step": 5130 }, { "epoch": 0.01, "learning_rate": 0.00019999542242598223, "loss": 2.6219, "step": 5135 }, { "epoch": 0.01, "learning_rate": 0.00019999541350720055, "loss": 2.6571, "step": 5140 }, { "epoch": 0.01, "learning_rate": 0.000199995404579739, "loss": 2.5993, "step": 5145 }, { "epoch": 0.01, "learning_rate": 0.0001999953956435976, "loss": 2.8249, "step": 5150 }, { "epoch": 0.01, "learning_rate": 0.0001999953866987763, "loss": 2.6825, "step": 5155 }, { "epoch": 0.01, "learning_rate": 0.00019999537774527518, "loss": 2.3894, "step": 5160 }, { "epoch": 0.01, "learning_rate": 0.00019999536878309416, "loss": 2.356, "step": 5165 }, { "epoch": 0.01, "learning_rate": 0.00019999535981223332, "loss": 2.5617, "step": 5170 }, { "epoch": 0.01, "learning_rate": 0.0001999953508326926, "loss": 2.4124, "step": 5175 }, { "epoch": 0.01, "learning_rate": 0.00019999534184447204, "loss": 2.5813, "step": 5180 }, { "epoch": 0.01, "learning_rate": 0.00019999533284757162, "loss": 2.4507, "step": 5185 }, { "epoch": 0.01, "learning_rate": 0.00019999532384199132, "loss": 2.6176, "step": 5190 }, { "epoch": 0.01, "learning_rate": 0.00019999531482773114, "loss": 2.3542, "step": 5195 }, { "epoch": 0.01, "learning_rate": 0.00019999530580479114, "loss": 2.4879, "step": 5200 }, { "epoch": 0.01, "learning_rate": 0.00019999529677317132, "loss": 2.5045, "step": 5205 }, { "epoch": 0.01, "learning_rate": 0.00019999528773287159, "loss": 2.3635, "step": 5210 }, { "epoch": 0.01, "learning_rate": 0.00019999527868389203, "loss": 2.6403, "step": 5215 }, { "epoch": 0.01, "learning_rate": 0.0001999952696262326, "loss": 2.5783, "step": 5220 }, { "epoch": 0.01, "learning_rate": 0.00019999526055989334, "loss": 2.5197, "step": 5225 }, { "epoch": 0.01, "learning_rate": 0.0001999952514848742, "loss": 2.3619, "step": 5230 }, { "epoch": 0.01, "learning_rate": 0.00019999524240117523, "loss": 2.4988, "step": 5235 }, { "epoch": 0.01, "learning_rate": 0.0001999952333087964, "loss": 2.5926, "step": 5240 }, { "epoch": 0.01, "learning_rate": 0.00019999522420773773, "loss": 2.2195, "step": 5245 }, { "epoch": 0.01, "learning_rate": 0.0001999952150979992, "loss": 2.4463, "step": 5250 }, { "epoch": 0.01, "learning_rate": 0.00019999520597958084, "loss": 2.4735, "step": 5255 }, { "epoch": 0.01, "learning_rate": 0.00019999519685248262, "loss": 2.4903, "step": 5260 }, { "epoch": 0.01, "learning_rate": 0.00019999518771670455, "loss": 2.4087, "step": 5265 }, { "epoch": 0.01, "learning_rate": 0.00019999517857224666, "loss": 2.7214, "step": 5270 }, { "epoch": 0.01, "learning_rate": 0.0001999951694191089, "loss": 2.8339, "step": 5275 }, { "epoch": 0.01, "learning_rate": 0.00019999516025729134, "loss": 2.3704, "step": 5280 }, { "epoch": 0.01, "learning_rate": 0.00019999515108679389, "loss": 2.7017, "step": 5285 }, { "epoch": 0.01, "learning_rate": 0.0001999951419076166, "loss": 2.4457, "step": 5290 }, { "epoch": 0.01, "learning_rate": 0.00019999513271975948, "loss": 2.3882, "step": 5295 }, { "epoch": 0.01, "learning_rate": 0.0001999951235232225, "loss": 2.6735, "step": 5300 }, { "epoch": 0.01, "learning_rate": 0.0001999951143180057, "loss": 2.4836, "step": 5305 }, { "epoch": 0.01, "learning_rate": 0.00019999510510410908, "loss": 2.8034, "step": 5310 }, { "epoch": 0.01, "learning_rate": 0.0001999950958815326, "loss": 2.5311, "step": 5315 }, { "epoch": 0.01, "learning_rate": 0.0001999950866502763, "loss": 2.387, "step": 5320 }, { "epoch": 0.01, "learning_rate": 0.00019999507741034014, "loss": 2.3596, "step": 5325 }, { "epoch": 0.01, "learning_rate": 0.00019999506816172416, "loss": 2.6885, "step": 5330 }, { "epoch": 0.01, "learning_rate": 0.00019999505890442833, "loss": 2.6245, "step": 5335 }, { "epoch": 0.01, "learning_rate": 0.00019999504963845268, "loss": 2.5971, "step": 5340 }, { "epoch": 0.01, "learning_rate": 0.0001999950403637972, "loss": 2.6143, "step": 5345 }, { "epoch": 0.01, "learning_rate": 0.00019999503108046187, "loss": 2.2141, "step": 5350 }, { "epoch": 0.01, "learning_rate": 0.0001999950217884467, "loss": 2.3336, "step": 5355 }, { "epoch": 0.01, "learning_rate": 0.00019999501248775173, "loss": 2.3443, "step": 5360 }, { "epoch": 0.01, "learning_rate": 0.00019999500317837693, "loss": 2.362, "step": 5365 }, { "epoch": 0.01, "learning_rate": 0.0001999949938603223, "loss": 2.478, "step": 5370 }, { "epoch": 0.01, "learning_rate": 0.00019999498453358782, "loss": 2.6018, "step": 5375 }, { "epoch": 0.01, "learning_rate": 0.00019999497519817354, "loss": 2.7575, "step": 5380 }, { "epoch": 0.01, "learning_rate": 0.00019999496585407938, "loss": 2.6625, "step": 5385 }, { "epoch": 0.01, "learning_rate": 0.00019999495650130546, "loss": 2.6967, "step": 5390 }, { "epoch": 0.01, "learning_rate": 0.0001999949471398517, "loss": 2.3785, "step": 5395 }, { "epoch": 0.01, "learning_rate": 0.0001999949377697181, "loss": 2.5965, "step": 5400 }, { "epoch": 0.01, "learning_rate": 0.00019999492839090468, "loss": 2.5036, "step": 5405 }, { "epoch": 0.01, "learning_rate": 0.00019999491900341143, "loss": 2.3819, "step": 5410 }, { "epoch": 0.01, "learning_rate": 0.00019999490960723838, "loss": 2.4559, "step": 5415 }, { "epoch": 0.01, "learning_rate": 0.0001999949002023855, "loss": 2.7719, "step": 5420 }, { "epoch": 0.01, "learning_rate": 0.00019999489078885278, "loss": 2.5455, "step": 5425 }, { "epoch": 0.01, "learning_rate": 0.00019999488136664026, "loss": 2.4121, "step": 5430 }, { "epoch": 0.01, "learning_rate": 0.0001999948719357479, "loss": 2.5166, "step": 5435 }, { "epoch": 0.01, "learning_rate": 0.00019999486249617574, "loss": 2.642, "step": 5440 }, { "epoch": 0.01, "learning_rate": 0.00019999485304792377, "loss": 2.3874, "step": 5445 }, { "epoch": 0.01, "learning_rate": 0.00019999484359099195, "loss": 2.492, "step": 5450 }, { "epoch": 0.01, "learning_rate": 0.0001999948341253804, "loss": 2.5342, "step": 5455 }, { "epoch": 0.01, "learning_rate": 0.00019999482465108895, "loss": 2.1746, "step": 5460 }, { "epoch": 0.01, "learning_rate": 0.0001999948151681177, "loss": 2.4772, "step": 5465 }, { "epoch": 0.01, "learning_rate": 0.00019999480567646667, "loss": 2.2773, "step": 5470 }, { "epoch": 0.01, "learning_rate": 0.00019999479617613579, "loss": 2.3527, "step": 5475 }, { "epoch": 0.01, "learning_rate": 0.00019999478666712515, "loss": 2.6607, "step": 5480 }, { "epoch": 0.01, "learning_rate": 0.00019999477714943465, "loss": 2.6414, "step": 5485 }, { "epoch": 0.01, "learning_rate": 0.00019999476762306434, "loss": 2.6242, "step": 5490 }, { "epoch": 0.01, "learning_rate": 0.00019999475808801423, "loss": 2.5702, "step": 5495 }, { "epoch": 0.01, "learning_rate": 0.00019999474854428436, "loss": 2.5194, "step": 5500 }, { "epoch": 0.01, "learning_rate": 0.00019999473899187463, "loss": 2.3377, "step": 5505 }, { "epoch": 0.01, "learning_rate": 0.0001999947294307851, "loss": 2.2831, "step": 5510 }, { "epoch": 0.01, "learning_rate": 0.0001999947198610158, "loss": 2.5063, "step": 5515 }, { "epoch": 0.01, "learning_rate": 0.00019999471028256665, "loss": 2.6298, "step": 5520 }, { "epoch": 0.01, "learning_rate": 0.00019999470069543773, "loss": 2.3577, "step": 5525 }, { "epoch": 0.01, "learning_rate": 0.000199994691099629, "loss": 2.4752, "step": 5530 }, { "epoch": 0.01, "learning_rate": 0.00019999468149514045, "loss": 2.7031, "step": 5535 }, { "epoch": 0.01, "learning_rate": 0.00019999467188197212, "loss": 2.514, "step": 5540 }, { "epoch": 0.01, "learning_rate": 0.00019999466226012396, "loss": 2.6673, "step": 5545 }, { "epoch": 0.01, "learning_rate": 0.00019999465262959603, "loss": 2.6299, "step": 5550 }, { "epoch": 0.01, "learning_rate": 0.00019999464299038828, "loss": 2.667, "step": 5555 }, { "epoch": 0.01, "learning_rate": 0.00019999463334250076, "loss": 2.5126, "step": 5560 }, { "epoch": 0.01, "learning_rate": 0.0001999946236859334, "loss": 2.5674, "step": 5565 }, { "epoch": 0.01, "learning_rate": 0.0001999946140206863, "loss": 2.278, "step": 5570 }, { "epoch": 0.01, "learning_rate": 0.00019999460434675937, "loss": 2.0909, "step": 5575 }, { "epoch": 0.01, "learning_rate": 0.00019999459466415263, "loss": 2.2677, "step": 5580 }, { "epoch": 0.01, "learning_rate": 0.00019999458497286612, "loss": 2.2392, "step": 5585 }, { "epoch": 0.01, "learning_rate": 0.00019999457527289982, "loss": 2.7081, "step": 5590 }, { "epoch": 0.01, "learning_rate": 0.00019999456556425374, "loss": 2.4179, "step": 5595 }, { "epoch": 0.01, "learning_rate": 0.00019999455584692784, "loss": 2.5304, "step": 5600 }, { "epoch": 0.01, "learning_rate": 0.00019999454612092216, "loss": 2.4348, "step": 5605 }, { "epoch": 0.01, "learning_rate": 0.0001999945363862367, "loss": 2.672, "step": 5610 }, { "epoch": 0.01, "learning_rate": 0.00019999452664287143, "loss": 2.226, "step": 5615 }, { "epoch": 0.01, "learning_rate": 0.0001999945168908264, "loss": 2.543, "step": 5620 }, { "epoch": 0.01, "learning_rate": 0.00019999450713010158, "loss": 2.5913, "step": 5625 }, { "epoch": 0.01, "learning_rate": 0.00019999449736069695, "loss": 2.7104, "step": 5630 }, { "epoch": 0.01, "learning_rate": 0.00019999448758261255, "loss": 2.6155, "step": 5635 }, { "epoch": 0.01, "learning_rate": 0.00019999447779584837, "loss": 2.6001, "step": 5640 }, { "epoch": 0.01, "learning_rate": 0.0001999944680004044, "loss": 2.2563, "step": 5645 }, { "epoch": 0.01, "learning_rate": 0.00019999445819628064, "loss": 2.4679, "step": 5650 }, { "epoch": 0.01, "learning_rate": 0.00019999444838347713, "loss": 2.2942, "step": 5655 }, { "epoch": 0.01, "learning_rate": 0.0001999944385619938, "loss": 2.4746, "step": 5660 }, { "epoch": 0.01, "learning_rate": 0.0001999944287318307, "loss": 2.6527, "step": 5665 }, { "epoch": 0.01, "learning_rate": 0.0001999944188929878, "loss": 2.7976, "step": 5670 }, { "epoch": 0.01, "learning_rate": 0.00019999440904546517, "loss": 2.6258, "step": 5675 }, { "epoch": 0.01, "learning_rate": 0.00019999439918926272, "loss": 2.3405, "step": 5680 }, { "epoch": 0.01, "learning_rate": 0.00019999438932438054, "loss": 2.5708, "step": 5685 }, { "epoch": 0.01, "learning_rate": 0.00019999437945081855, "loss": 2.7506, "step": 5690 }, { "epoch": 0.01, "learning_rate": 0.0001999943695685768, "loss": 2.4775, "step": 5695 }, { "epoch": 0.01, "learning_rate": 0.00019999435967765528, "loss": 2.6628, "step": 5700 }, { "epoch": 0.01, "learning_rate": 0.00019999434977805396, "loss": 2.5022, "step": 5705 }, { "epoch": 0.01, "learning_rate": 0.00019999433986977287, "loss": 2.4642, "step": 5710 }, { "epoch": 0.01, "learning_rate": 0.00019999432995281203, "loss": 2.5368, "step": 5715 }, { "epoch": 0.01, "learning_rate": 0.00019999432002717143, "loss": 2.6161, "step": 5720 }, { "epoch": 0.01, "learning_rate": 0.00019999431009285102, "loss": 2.6161, "step": 5725 }, { "epoch": 0.01, "learning_rate": 0.00019999430014985088, "loss": 2.6724, "step": 5730 }, { "epoch": 0.01, "learning_rate": 0.00019999429019817096, "loss": 2.4299, "step": 5735 }, { "epoch": 0.01, "learning_rate": 0.00019999428023781125, "loss": 2.6254, "step": 5740 }, { "epoch": 0.01, "learning_rate": 0.0001999942702687718, "loss": 2.4306, "step": 5745 }, { "epoch": 0.01, "learning_rate": 0.0001999942602910526, "loss": 2.2805, "step": 5750 }, { "epoch": 0.01, "learning_rate": 0.0001999942503046536, "loss": 2.6213, "step": 5755 }, { "epoch": 0.01, "learning_rate": 0.00019999424030957483, "loss": 2.4528, "step": 5760 }, { "epoch": 0.01, "learning_rate": 0.00019999423030581633, "loss": 2.5347, "step": 5765 }, { "epoch": 0.01, "learning_rate": 0.00019999422029337805, "loss": 2.4189, "step": 5770 }, { "epoch": 0.01, "learning_rate": 0.00019999421027226002, "loss": 2.7847, "step": 5775 }, { "epoch": 0.01, "learning_rate": 0.00019999420024246223, "loss": 2.386, "step": 5780 }, { "epoch": 0.01, "learning_rate": 0.00019999419020398467, "loss": 2.3243, "step": 5785 }, { "epoch": 0.01, "learning_rate": 0.00019999418015682733, "loss": 2.501, "step": 5790 }, { "epoch": 0.01, "learning_rate": 0.00019999417010099026, "loss": 2.5756, "step": 5795 }, { "epoch": 0.01, "learning_rate": 0.0001999941600364734, "loss": 2.5002, "step": 5800 }, { "epoch": 0.01, "learning_rate": 0.00019999414996327685, "loss": 2.5806, "step": 5805 }, { "epoch": 0.01, "learning_rate": 0.0001999941398814005, "loss": 2.6412, "step": 5810 }, { "epoch": 0.01, "learning_rate": 0.0001999941297908444, "loss": 2.2917, "step": 5815 }, { "epoch": 0.01, "learning_rate": 0.00019999411969160856, "loss": 2.5026, "step": 5820 }, { "epoch": 0.01, "learning_rate": 0.00019999410958369297, "loss": 2.4119, "step": 5825 }, { "epoch": 0.01, "learning_rate": 0.00019999409946709761, "loss": 2.5459, "step": 5830 }, { "epoch": 0.01, "learning_rate": 0.0001999940893418225, "loss": 2.3567, "step": 5835 }, { "epoch": 0.01, "learning_rate": 0.00019999407920786766, "loss": 2.5446, "step": 5840 }, { "epoch": 0.01, "learning_rate": 0.00019999406906523305, "loss": 2.4614, "step": 5845 }, { "epoch": 0.01, "learning_rate": 0.00019999405891391872, "loss": 2.4516, "step": 5850 }, { "epoch": 0.01, "learning_rate": 0.0001999940487539246, "loss": 2.7649, "step": 5855 }, { "epoch": 0.01, "learning_rate": 0.0001999940385852508, "loss": 2.4139, "step": 5860 }, { "epoch": 0.01, "learning_rate": 0.0001999940284078972, "loss": 2.5276, "step": 5865 }, { "epoch": 0.01, "learning_rate": 0.00019999401822186385, "loss": 2.6131, "step": 5870 }, { "epoch": 0.01, "learning_rate": 0.00019999400802715077, "loss": 2.2981, "step": 5875 }, { "epoch": 0.01, "learning_rate": 0.00019999399782375795, "loss": 2.435, "step": 5880 }, { "epoch": 0.01, "learning_rate": 0.00019999398761168542, "loss": 2.4162, "step": 5885 }, { "epoch": 0.01, "learning_rate": 0.00019999397739093312, "loss": 2.4246, "step": 5890 }, { "epoch": 0.01, "learning_rate": 0.00019999396716150107, "loss": 2.6836, "step": 5895 }, { "epoch": 0.01, "learning_rate": 0.0001999939569233893, "loss": 2.537, "step": 5900 }, { "epoch": 0.01, "learning_rate": 0.00019999394667659777, "loss": 2.4223, "step": 5905 }, { "epoch": 0.01, "learning_rate": 0.00019999393642112652, "loss": 2.376, "step": 5910 }, { "epoch": 0.01, "learning_rate": 0.00019999392615697553, "loss": 2.5755, "step": 5915 }, { "epoch": 0.01, "learning_rate": 0.0001999939158841448, "loss": 2.5933, "step": 5920 }, { "epoch": 0.01, "learning_rate": 0.00019999390560263434, "loss": 2.0977, "step": 5925 }, { "epoch": 0.01, "learning_rate": 0.00019999389531244414, "loss": 2.705, "step": 5930 }, { "epoch": 0.01, "learning_rate": 0.0001999938850135742, "loss": 2.5654, "step": 5935 }, { "epoch": 0.01, "learning_rate": 0.00019999387470602455, "loss": 2.3034, "step": 5940 }, { "epoch": 0.01, "learning_rate": 0.00019999386438979518, "loss": 2.5059, "step": 5945 }, { "epoch": 0.01, "learning_rate": 0.00019999385406488601, "loss": 2.5865, "step": 5950 }, { "epoch": 0.01, "learning_rate": 0.00019999384373129718, "loss": 2.7423, "step": 5955 }, { "epoch": 0.01, "learning_rate": 0.00019999383338902858, "loss": 2.5133, "step": 5960 }, { "epoch": 0.01, "learning_rate": 0.0001999938230380803, "loss": 2.347, "step": 5965 }, { "epoch": 0.01, "learning_rate": 0.00019999381267845226, "loss": 2.657, "step": 5970 }, { "epoch": 0.01, "learning_rate": 0.00019999380231014452, "loss": 2.4238, "step": 5975 }, { "epoch": 0.01, "learning_rate": 0.00019999379193315703, "loss": 2.4641, "step": 5980 }, { "epoch": 0.01, "learning_rate": 0.00019999378154748982, "loss": 2.5307, "step": 5985 }, { "epoch": 0.01, "learning_rate": 0.0001999937711531429, "loss": 2.5566, "step": 5990 }, { "epoch": 0.01, "learning_rate": 0.00019999376075011623, "loss": 2.5494, "step": 5995 }, { "epoch": 0.01, "learning_rate": 0.00019999375033840985, "loss": 2.4661, "step": 6000 }, { "epoch": 0.01, "learning_rate": 0.00019999373991802378, "loss": 2.6792, "step": 6005 }, { "epoch": 0.01, "learning_rate": 0.00019999372948895796, "loss": 2.4679, "step": 6010 }, { "epoch": 0.01, "learning_rate": 0.00019999371905121243, "loss": 2.4224, "step": 6015 }, { "epoch": 0.01, "learning_rate": 0.00019999370860478716, "loss": 2.58, "step": 6020 }, { "epoch": 0.01, "learning_rate": 0.0001999936981496822, "loss": 2.4141, "step": 6025 }, { "epoch": 0.01, "learning_rate": 0.00019999368768589752, "loss": 2.4102, "step": 6030 }, { "epoch": 0.01, "learning_rate": 0.00019999367721343313, "loss": 2.4277, "step": 6035 }, { "epoch": 0.01, "learning_rate": 0.00019999366673228904, "loss": 2.3389, "step": 6040 }, { "epoch": 0.01, "learning_rate": 0.0001999936562424652, "loss": 2.6248, "step": 6045 }, { "epoch": 0.01, "learning_rate": 0.00019999364574396165, "loss": 2.5639, "step": 6050 }, { "epoch": 0.01, "learning_rate": 0.00019999363523677842, "loss": 2.3538, "step": 6055 }, { "epoch": 0.01, "learning_rate": 0.00019999362472091544, "loss": 2.6417, "step": 6060 }, { "epoch": 0.01, "learning_rate": 0.00019999361419637278, "loss": 2.4795, "step": 6065 }, { "epoch": 0.01, "learning_rate": 0.0001999936036631504, "loss": 2.6217, "step": 6070 }, { "epoch": 0.01, "learning_rate": 0.00019999359312124836, "loss": 2.5051, "step": 6075 }, { "epoch": 0.01, "learning_rate": 0.00019999358257066655, "loss": 2.5224, "step": 6080 }, { "epoch": 0.01, "learning_rate": 0.00019999357201140505, "loss": 2.4752, "step": 6085 }, { "epoch": 0.01, "learning_rate": 0.00019999356144346386, "loss": 2.5671, "step": 6090 }, { "epoch": 0.01, "learning_rate": 0.00019999355086684295, "loss": 2.553, "step": 6095 }, { "epoch": 0.01, "learning_rate": 0.00019999354028154233, "loss": 2.7043, "step": 6100 }, { "epoch": 0.01, "learning_rate": 0.00019999352968756205, "loss": 2.415, "step": 6105 }, { "epoch": 0.01, "learning_rate": 0.00019999351908490202, "loss": 2.5664, "step": 6110 }, { "epoch": 0.01, "learning_rate": 0.0001999935084735623, "loss": 2.5261, "step": 6115 }, { "epoch": 0.01, "learning_rate": 0.00019999349785354288, "loss": 2.4543, "step": 6120 }, { "epoch": 0.01, "learning_rate": 0.00019999348722484378, "loss": 2.5206, "step": 6125 }, { "epoch": 0.01, "learning_rate": 0.000199993476587465, "loss": 2.5553, "step": 6130 }, { "epoch": 0.01, "learning_rate": 0.00019999346594140648, "loss": 2.4298, "step": 6135 }, { "epoch": 0.01, "learning_rate": 0.0001999934552866683, "loss": 2.5042, "step": 6140 }, { "epoch": 0.01, "learning_rate": 0.00019999344462325042, "loss": 2.4607, "step": 6145 }, { "epoch": 0.01, "learning_rate": 0.00019999343395115282, "loss": 2.6575, "step": 6150 }, { "epoch": 0.01, "learning_rate": 0.00019999342327037554, "loss": 2.6144, "step": 6155 }, { "epoch": 0.01, "learning_rate": 0.00019999341258091857, "loss": 2.4543, "step": 6160 }, { "epoch": 0.01, "learning_rate": 0.00019999340188278192, "loss": 2.6144, "step": 6165 }, { "epoch": 0.01, "learning_rate": 0.00019999339117596557, "loss": 2.8006, "step": 6170 }, { "epoch": 0.01, "learning_rate": 0.0001999933804604695, "loss": 2.7669, "step": 6175 }, { "epoch": 0.01, "learning_rate": 0.00019999336973629378, "loss": 2.722, "step": 6180 }, { "epoch": 0.01, "learning_rate": 0.00019999335900343837, "loss": 2.6892, "step": 6185 }, { "epoch": 0.01, "learning_rate": 0.0001999933482619033, "loss": 2.5959, "step": 6190 }, { "epoch": 0.01, "learning_rate": 0.00019999333751168847, "loss": 2.6445, "step": 6195 }, { "epoch": 0.01, "learning_rate": 0.00019999332675279402, "loss": 2.6802, "step": 6200 }, { "epoch": 0.01, "learning_rate": 0.00019999331598521988, "loss": 2.6725, "step": 6205 }, { "epoch": 0.01, "learning_rate": 0.00019999330520896602, "loss": 2.3971, "step": 6210 }, { "epoch": 0.01, "learning_rate": 0.0001999932944240325, "loss": 2.7448, "step": 6215 }, { "epoch": 0.01, "learning_rate": 0.0001999932836304193, "loss": 2.7078, "step": 6220 }, { "epoch": 0.01, "learning_rate": 0.00019999327282812642, "loss": 2.1525, "step": 6225 }, { "epoch": 0.01, "learning_rate": 0.00019999326201715386, "loss": 2.6948, "step": 6230 }, { "epoch": 0.01, "learning_rate": 0.00019999325119750164, "loss": 2.6289, "step": 6235 }, { "epoch": 0.01, "learning_rate": 0.0001999932403691697, "loss": 2.519, "step": 6240 }, { "epoch": 0.01, "learning_rate": 0.00019999322953215813, "loss": 2.6539, "step": 6245 }, { "epoch": 0.01, "learning_rate": 0.00019999321868646687, "loss": 2.5327, "step": 6250 }, { "epoch": 0.01, "learning_rate": 0.00019999320783209591, "loss": 2.5113, "step": 6255 }, { "epoch": 0.01, "learning_rate": 0.0001999931969690453, "loss": 2.5714, "step": 6260 }, { "epoch": 0.01, "learning_rate": 0.00019999318609731503, "loss": 2.4982, "step": 6265 }, { "epoch": 0.01, "learning_rate": 0.0001999931752169051, "loss": 2.4783, "step": 6270 }, { "epoch": 0.01, "learning_rate": 0.00019999316432781544, "loss": 2.4137, "step": 6275 }, { "epoch": 0.01, "learning_rate": 0.00019999315343004616, "loss": 2.3633, "step": 6280 }, { "epoch": 0.01, "learning_rate": 0.0001999931425235972, "loss": 2.4293, "step": 6285 }, { "epoch": 0.01, "learning_rate": 0.00019999313160846854, "loss": 2.4783, "step": 6290 }, { "epoch": 0.01, "learning_rate": 0.00019999312068466024, "loss": 2.4389, "step": 6295 }, { "epoch": 0.01, "learning_rate": 0.00019999310975217228, "loss": 2.4359, "step": 6300 }, { "epoch": 0.01, "learning_rate": 0.0001999930988110047, "loss": 2.7623, "step": 6305 }, { "epoch": 0.01, "learning_rate": 0.00019999308786115738, "loss": 2.5565, "step": 6310 }, { "epoch": 0.01, "learning_rate": 0.0001999930769026304, "loss": 2.5909, "step": 6315 }, { "epoch": 0.01, "learning_rate": 0.0001999930659354238, "loss": 2.4685, "step": 6320 }, { "epoch": 0.01, "learning_rate": 0.00019999305495953753, "loss": 2.3457, "step": 6325 }, { "epoch": 0.01, "learning_rate": 0.0001999930439749716, "loss": 2.4567, "step": 6330 }, { "epoch": 0.01, "learning_rate": 0.00019999303298172597, "loss": 2.6181, "step": 6335 }, { "epoch": 0.01, "learning_rate": 0.00019999302197980075, "loss": 2.6353, "step": 6340 }, { "epoch": 0.01, "learning_rate": 0.00019999301096919583, "loss": 2.7282, "step": 6345 }, { "epoch": 0.01, "learning_rate": 0.00019999299994991128, "loss": 2.2838, "step": 6350 }, { "epoch": 0.01, "learning_rate": 0.00019999298892194704, "loss": 2.492, "step": 6355 }, { "epoch": 0.01, "learning_rate": 0.0001999929778853032, "loss": 2.7185, "step": 6360 }, { "epoch": 0.01, "learning_rate": 0.00019999296683997962, "loss": 2.4134, "step": 6365 }, { "epoch": 0.01, "learning_rate": 0.00019999295578597645, "loss": 2.3234, "step": 6370 }, { "epoch": 0.01, "learning_rate": 0.00019999294472329362, "loss": 2.2621, "step": 6375 }, { "epoch": 0.01, "learning_rate": 0.00019999293365193115, "loss": 2.5966, "step": 6380 }, { "epoch": 0.01, "learning_rate": 0.00019999292257188902, "loss": 2.3967, "step": 6385 }, { "epoch": 0.01, "learning_rate": 0.0001999929114831672, "loss": 2.5198, "step": 6390 }, { "epoch": 0.01, "learning_rate": 0.0001999929003857658, "loss": 2.7163, "step": 6395 }, { "epoch": 0.01, "learning_rate": 0.0001999928892796847, "loss": 2.6591, "step": 6400 }, { "epoch": 0.01, "learning_rate": 0.000199992878164924, "loss": 2.4431, "step": 6405 }, { "epoch": 0.01, "learning_rate": 0.00019999286704148363, "loss": 2.5429, "step": 6410 }, { "epoch": 0.01, "learning_rate": 0.0001999928559093636, "loss": 2.476, "step": 6415 }, { "epoch": 0.01, "learning_rate": 0.00019999284476856396, "loss": 2.7277, "step": 6420 }, { "epoch": 0.01, "learning_rate": 0.00019999283361908467, "loss": 2.5014, "step": 6425 }, { "epoch": 0.01, "learning_rate": 0.00019999282246092575, "loss": 2.3429, "step": 6430 }, { "epoch": 0.01, "learning_rate": 0.00019999281129408716, "loss": 2.4629, "step": 6435 }, { "epoch": 0.01, "learning_rate": 0.00019999280011856897, "loss": 2.5457, "step": 6440 }, { "epoch": 0.01, "learning_rate": 0.0001999927889343711, "loss": 2.72, "step": 6445 }, { "epoch": 0.01, "learning_rate": 0.00019999277774149362, "loss": 2.6957, "step": 6450 }, { "epoch": 0.01, "learning_rate": 0.0001999927665399365, "loss": 2.7006, "step": 6455 }, { "epoch": 0.01, "learning_rate": 0.00019999275532969974, "loss": 2.1886, "step": 6460 }, { "epoch": 0.01, "learning_rate": 0.00019999274411078335, "loss": 2.6278, "step": 6465 }, { "epoch": 0.01, "learning_rate": 0.00019999273288318734, "loss": 2.423, "step": 6470 }, { "epoch": 0.01, "learning_rate": 0.00019999272164691168, "loss": 2.4197, "step": 6475 }, { "epoch": 0.01, "learning_rate": 0.00019999271040195638, "loss": 2.4111, "step": 6480 }, { "epoch": 0.01, "learning_rate": 0.00019999269914832145, "loss": 2.4731, "step": 6485 }, { "epoch": 0.01, "learning_rate": 0.0001999926878860069, "loss": 2.7607, "step": 6490 }, { "epoch": 0.01, "learning_rate": 0.00019999267661501274, "loss": 2.5821, "step": 6495 }, { "epoch": 0.01, "learning_rate": 0.00019999266533533893, "loss": 2.6406, "step": 6500 }, { "epoch": 0.01, "learning_rate": 0.0001999926540469855, "loss": 2.4918, "step": 6505 }, { "epoch": 0.01, "learning_rate": 0.00019999264274995246, "loss": 2.5968, "step": 6510 }, { "epoch": 0.01, "learning_rate": 0.0001999926314442398, "loss": 2.6865, "step": 6515 }, { "epoch": 0.01, "learning_rate": 0.00019999262012984748, "loss": 2.4477, "step": 6520 }, { "epoch": 0.01, "learning_rate": 0.00019999260880677557, "loss": 2.7181, "step": 6525 }, { "epoch": 0.01, "learning_rate": 0.000199992597475024, "loss": 2.6592, "step": 6530 }, { "epoch": 0.01, "learning_rate": 0.00019999258613459287, "loss": 2.4172, "step": 6535 }, { "epoch": 0.01, "learning_rate": 0.00019999257478548206, "loss": 2.5165, "step": 6540 }, { "epoch": 0.01, "learning_rate": 0.00019999256342769165, "loss": 2.2554, "step": 6545 }, { "epoch": 0.01, "learning_rate": 0.00019999255206122165, "loss": 2.2255, "step": 6550 }, { "epoch": 0.01, "learning_rate": 0.000199992540686072, "loss": 2.6928, "step": 6555 }, { "epoch": 0.01, "learning_rate": 0.00019999252930224276, "loss": 2.5198, "step": 6560 }, { "epoch": 0.01, "learning_rate": 0.0001999925179097339, "loss": 2.4785, "step": 6565 }, { "epoch": 0.01, "learning_rate": 0.0001999925065085454, "loss": 2.444, "step": 6570 }, { "epoch": 0.01, "learning_rate": 0.00019999249509867735, "loss": 2.2624, "step": 6575 }, { "epoch": 0.01, "learning_rate": 0.00019999248368012963, "loss": 2.5451, "step": 6580 }, { "epoch": 0.01, "learning_rate": 0.00019999247225290233, "loss": 2.5046, "step": 6585 }, { "epoch": 0.01, "learning_rate": 0.00019999246081699542, "loss": 2.651, "step": 6590 }, { "epoch": 0.01, "learning_rate": 0.00019999244937240888, "loss": 2.5855, "step": 6595 }, { "epoch": 0.01, "learning_rate": 0.00019999243791914273, "loss": 2.6744, "step": 6600 }, { "epoch": 0.01, "learning_rate": 0.00019999242645719697, "loss": 2.4147, "step": 6605 }, { "epoch": 0.01, "learning_rate": 0.00019999241498657163, "loss": 2.4975, "step": 6610 }, { "epoch": 0.01, "learning_rate": 0.00019999240350726668, "loss": 2.5738, "step": 6615 }, { "epoch": 0.01, "learning_rate": 0.00019999239201928215, "loss": 2.6384, "step": 6620 }, { "epoch": 0.01, "learning_rate": 0.000199992380522618, "loss": 2.4977, "step": 6625 }, { "epoch": 0.01, "learning_rate": 0.0001999923690172742, "loss": 2.4549, "step": 6630 }, { "epoch": 0.01, "learning_rate": 0.00019999235750325087, "loss": 2.5053, "step": 6635 }, { "epoch": 0.01, "learning_rate": 0.00019999234598054791, "loss": 2.6512, "step": 6640 }, { "epoch": 0.01, "learning_rate": 0.00019999233444916535, "loss": 2.6486, "step": 6645 }, { "epoch": 0.01, "learning_rate": 0.0001999923229091032, "loss": 2.6118, "step": 6650 }, { "epoch": 0.01, "learning_rate": 0.00019999231136036145, "loss": 2.6748, "step": 6655 }, { "epoch": 0.01, "learning_rate": 0.0001999922998029401, "loss": 2.4618, "step": 6660 }, { "epoch": 0.01, "learning_rate": 0.00019999228823683915, "loss": 2.667, "step": 6665 }, { "epoch": 0.01, "learning_rate": 0.00019999227666205862, "loss": 2.5785, "step": 6670 }, { "epoch": 0.01, "learning_rate": 0.0001999922650785985, "loss": 2.3421, "step": 6675 }, { "epoch": 0.01, "learning_rate": 0.00019999225348645876, "loss": 2.5338, "step": 6680 }, { "epoch": 0.01, "learning_rate": 0.00019999224188563947, "loss": 2.378, "step": 6685 }, { "epoch": 0.01, "learning_rate": 0.00019999223027614057, "loss": 2.5207, "step": 6690 }, { "epoch": 0.01, "learning_rate": 0.0001999922186579621, "loss": 2.4966, "step": 6695 }, { "epoch": 0.01, "learning_rate": 0.000199992207031104, "loss": 2.6312, "step": 6700 }, { "epoch": 0.01, "learning_rate": 0.00019999219539556636, "loss": 2.7636, "step": 6705 }, { "epoch": 0.01, "learning_rate": 0.00019999218375134912, "loss": 2.5626, "step": 6710 }, { "epoch": 0.01, "learning_rate": 0.00019999217209845226, "loss": 2.6757, "step": 6715 }, { "epoch": 0.01, "learning_rate": 0.00019999216043687585, "loss": 2.3545, "step": 6720 }, { "epoch": 0.01, "learning_rate": 0.00019999214876661986, "loss": 2.4726, "step": 6725 }, { "epoch": 0.01, "learning_rate": 0.00019999213708768427, "loss": 2.7951, "step": 6730 }, { "epoch": 0.01, "learning_rate": 0.00019999212540006912, "loss": 2.4741, "step": 6735 }, { "epoch": 0.01, "learning_rate": 0.0001999921137037744, "loss": 2.2713, "step": 6740 }, { "epoch": 0.01, "learning_rate": 0.00019999210199880008, "loss": 2.3387, "step": 6745 }, { "epoch": 0.01, "learning_rate": 0.0001999920902851462, "loss": 2.4719, "step": 6750 }, { "epoch": 0.01, "learning_rate": 0.00019999207856281271, "loss": 2.5354, "step": 6755 }, { "epoch": 0.01, "learning_rate": 0.0001999920668317997, "loss": 2.3241, "step": 6760 }, { "epoch": 0.01, "learning_rate": 0.00019999205509210706, "loss": 2.6475, "step": 6765 }, { "epoch": 0.01, "learning_rate": 0.00019999204334373487, "loss": 2.554, "step": 6770 }, { "epoch": 0.01, "learning_rate": 0.0001999920315866831, "loss": 2.4501, "step": 6775 }, { "epoch": 0.01, "learning_rate": 0.00019999201982095178, "loss": 2.3314, "step": 6780 }, { "epoch": 0.01, "learning_rate": 0.00019999200804654088, "loss": 2.4591, "step": 6785 }, { "epoch": 0.01, "learning_rate": 0.0001999919962634504, "loss": 2.603, "step": 6790 }, { "epoch": 0.01, "learning_rate": 0.00019999198447168034, "loss": 2.4301, "step": 6795 }, { "epoch": 0.01, "learning_rate": 0.00019999197267123075, "loss": 2.4142, "step": 6800 }, { "epoch": 0.01, "learning_rate": 0.00019999196086210158, "loss": 2.4162, "step": 6805 }, { "epoch": 0.01, "learning_rate": 0.00019999194904429284, "loss": 2.4825, "step": 6810 }, { "epoch": 0.01, "learning_rate": 0.00019999193721780456, "loss": 2.3559, "step": 6815 }, { "epoch": 0.01, "learning_rate": 0.00019999192538263668, "loss": 2.3979, "step": 6820 }, { "epoch": 0.01, "learning_rate": 0.00019999191353878924, "loss": 2.355, "step": 6825 }, { "epoch": 0.01, "learning_rate": 0.00019999190168626228, "loss": 2.3177, "step": 6830 }, { "epoch": 0.01, "learning_rate": 0.0001999918898250557, "loss": 2.7116, "step": 6835 }, { "epoch": 0.01, "learning_rate": 0.00019999187795516961, "loss": 2.7235, "step": 6840 }, { "epoch": 0.01, "learning_rate": 0.00019999186607660394, "loss": 2.5057, "step": 6845 }, { "epoch": 0.01, "learning_rate": 0.00019999185418935873, "loss": 2.5062, "step": 6850 }, { "epoch": 0.01, "learning_rate": 0.00019999184229343395, "loss": 2.5559, "step": 6855 }, { "epoch": 0.01, "learning_rate": 0.00019999183038882961, "loss": 2.4072, "step": 6860 }, { "epoch": 0.01, "learning_rate": 0.00019999181847554572, "loss": 2.3689, "step": 6865 }, { "epoch": 0.01, "learning_rate": 0.0001999918065535823, "loss": 2.3674, "step": 6870 }, { "epoch": 0.01, "learning_rate": 0.0001999917946229393, "loss": 2.4241, "step": 6875 }, { "epoch": 0.01, "learning_rate": 0.0001999917826836168, "loss": 2.6018, "step": 6880 }, { "epoch": 0.01, "learning_rate": 0.0001999917707356147, "loss": 2.5347, "step": 6885 }, { "epoch": 0.01, "learning_rate": 0.00019999175877893305, "loss": 2.3115, "step": 6890 }, { "epoch": 0.01, "learning_rate": 0.00019999174681357187, "loss": 2.5077, "step": 6895 }, { "epoch": 0.01, "learning_rate": 0.00019999173483953113, "loss": 2.6317, "step": 6900 }, { "epoch": 0.01, "learning_rate": 0.00019999172285681084, "loss": 2.5362, "step": 6905 }, { "epoch": 0.01, "learning_rate": 0.00019999171086541105, "loss": 2.5657, "step": 6910 }, { "epoch": 0.01, "learning_rate": 0.00019999169886533168, "loss": 2.5338, "step": 6915 }, { "epoch": 0.01, "learning_rate": 0.00019999168685657276, "loss": 2.6462, "step": 6920 }, { "epoch": 0.01, "learning_rate": 0.0001999916748391343, "loss": 2.5496, "step": 6925 }, { "epoch": 0.01, "learning_rate": 0.00019999166281301634, "loss": 2.629, "step": 6930 }, { "epoch": 0.01, "learning_rate": 0.00019999165077821883, "loss": 2.7223, "step": 6935 }, { "epoch": 0.01, "learning_rate": 0.00019999163873474175, "loss": 2.3582, "step": 6940 }, { "epoch": 0.01, "learning_rate": 0.00019999162668258514, "loss": 2.4163, "step": 6945 }, { "epoch": 0.01, "learning_rate": 0.000199991614621749, "loss": 2.6175, "step": 6950 }, { "epoch": 0.01, "learning_rate": 0.00019999160255223335, "loss": 2.5896, "step": 6955 }, { "epoch": 0.01, "learning_rate": 0.00019999159047403816, "loss": 2.5343, "step": 6960 }, { "epoch": 0.01, "learning_rate": 0.00019999157838716342, "loss": 2.6274, "step": 6965 }, { "epoch": 0.01, "learning_rate": 0.00019999156629160912, "loss": 2.578, "step": 6970 }, { "epoch": 0.01, "learning_rate": 0.00019999155418737533, "loss": 2.5981, "step": 6975 }, { "epoch": 0.01, "learning_rate": 0.00019999154207446198, "loss": 2.4209, "step": 6980 }, { "epoch": 0.01, "learning_rate": 0.00019999152995286914, "loss": 2.5243, "step": 6985 }, { "epoch": 0.01, "learning_rate": 0.00019999151782259676, "loss": 2.7483, "step": 6990 }, { "epoch": 0.01, "learning_rate": 0.0001999915056836448, "loss": 2.4876, "step": 6995 }, { "epoch": 0.01, "learning_rate": 0.00019999149353601338, "loss": 2.6989, "step": 7000 }, { "epoch": 0.01, "learning_rate": 0.00019999148137970243, "loss": 2.634, "step": 7005 }, { "epoch": 0.01, "learning_rate": 0.00019999146921471196, "loss": 2.4256, "step": 7010 }, { "epoch": 0.01, "learning_rate": 0.00019999145704104195, "loss": 2.4754, "step": 7015 }, { "epoch": 0.01, "learning_rate": 0.00019999144485869242, "loss": 2.4822, "step": 7020 }, { "epoch": 0.01, "learning_rate": 0.00019999143266766336, "loss": 2.5492, "step": 7025 }, { "epoch": 0.01, "learning_rate": 0.0001999914204679548, "loss": 2.5732, "step": 7030 }, { "epoch": 0.01, "learning_rate": 0.0001999914082595667, "loss": 2.2351, "step": 7035 }, { "epoch": 0.01, "learning_rate": 0.0001999913960424991, "loss": 2.6789, "step": 7040 }, { "epoch": 0.01, "learning_rate": 0.00019999138381675198, "loss": 2.5905, "step": 7045 }, { "epoch": 0.01, "learning_rate": 0.00019999137158232535, "loss": 2.6706, "step": 7050 }, { "epoch": 0.01, "learning_rate": 0.0001999913593392192, "loss": 2.4164, "step": 7055 }, { "epoch": 0.01, "learning_rate": 0.00019999134708743353, "loss": 2.6302, "step": 7060 }, { "epoch": 0.01, "learning_rate": 0.00019999133482696837, "loss": 2.5643, "step": 7065 }, { "epoch": 0.01, "learning_rate": 0.0001999913225578237, "loss": 2.6243, "step": 7070 }, { "epoch": 0.01, "learning_rate": 0.00019999131027999948, "loss": 2.4086, "step": 7075 }, { "epoch": 0.01, "learning_rate": 0.0001999912979934958, "loss": 2.6565, "step": 7080 }, { "epoch": 0.01, "learning_rate": 0.00019999128569831259, "loss": 2.6168, "step": 7085 }, { "epoch": 0.01, "learning_rate": 0.00019999127339444988, "loss": 2.4882, "step": 7090 }, { "epoch": 0.01, "learning_rate": 0.00019999126108190764, "loss": 2.2434, "step": 7095 }, { "epoch": 0.01, "learning_rate": 0.0001999912487606859, "loss": 2.5088, "step": 7100 }, { "epoch": 0.01, "learning_rate": 0.00019999123643078468, "loss": 2.662, "step": 7105 }, { "epoch": 0.01, "learning_rate": 0.00019999122409220394, "loss": 2.3635, "step": 7110 }, { "epoch": 0.01, "learning_rate": 0.00019999121174494374, "loss": 2.5612, "step": 7115 }, { "epoch": 0.01, "learning_rate": 0.00019999119938900398, "loss": 2.4165, "step": 7120 }, { "epoch": 0.01, "learning_rate": 0.00019999118702438478, "loss": 2.5879, "step": 7125 }, { "epoch": 0.01, "learning_rate": 0.00019999117465108602, "loss": 2.5018, "step": 7130 }, { "epoch": 0.01, "learning_rate": 0.00019999116226910782, "loss": 2.6596, "step": 7135 }, { "epoch": 0.01, "learning_rate": 0.0001999911498784501, "loss": 2.6165, "step": 7140 }, { "epoch": 0.01, "learning_rate": 0.0001999911374791129, "loss": 2.2687, "step": 7145 }, { "epoch": 0.01, "learning_rate": 0.0001999911250710962, "loss": 2.7033, "step": 7150 }, { "epoch": 0.01, "learning_rate": 0.00019999111265439997, "loss": 2.4913, "step": 7155 }, { "epoch": 0.01, "learning_rate": 0.00019999110022902428, "loss": 2.7355, "step": 7160 }, { "epoch": 0.01, "learning_rate": 0.0001999910877949691, "loss": 2.4507, "step": 7165 }, { "epoch": 0.01, "learning_rate": 0.00019999107535223444, "loss": 2.5272, "step": 7170 }, { "epoch": 0.01, "learning_rate": 0.00019999106290082027, "loss": 2.749, "step": 7175 }, { "epoch": 0.01, "learning_rate": 0.0001999910504407266, "loss": 2.4736, "step": 7180 }, { "epoch": 0.01, "learning_rate": 0.0001999910379719535, "loss": 2.7673, "step": 7185 }, { "epoch": 0.01, "learning_rate": 0.00019999102549450086, "loss": 2.6617, "step": 7190 }, { "epoch": 0.01, "learning_rate": 0.00019999101300836877, "loss": 2.303, "step": 7195 }, { "epoch": 0.01, "learning_rate": 0.0001999910005135572, "loss": 3.012, "step": 7200 }, { "epoch": 0.01, "learning_rate": 0.00019999098801006614, "loss": 2.5122, "step": 7205 }, { "epoch": 0.01, "learning_rate": 0.00019999097549789558, "loss": 2.4943, "step": 7210 }, { "epoch": 0.01, "learning_rate": 0.00019999096297704556, "loss": 2.3182, "step": 7215 }, { "epoch": 0.01, "learning_rate": 0.00019999095044751606, "loss": 2.5661, "step": 7220 }, { "epoch": 0.01, "learning_rate": 0.00019999093790930706, "loss": 2.3979, "step": 7225 }, { "epoch": 0.01, "learning_rate": 0.00019999092536241862, "loss": 2.4857, "step": 7230 }, { "epoch": 0.01, "learning_rate": 0.00019999091280685068, "loss": 2.2486, "step": 7235 }, { "epoch": 0.01, "learning_rate": 0.0001999909002426033, "loss": 2.5288, "step": 7240 }, { "epoch": 0.01, "learning_rate": 0.0001999908876696764, "loss": 2.5035, "step": 7245 }, { "epoch": 0.01, "learning_rate": 0.00019999087508807003, "loss": 2.4987, "step": 7250 }, { "epoch": 0.01, "learning_rate": 0.00019999086249778425, "loss": 2.4817, "step": 7255 }, { "epoch": 0.01, "learning_rate": 0.00019999084989881895, "loss": 2.4187, "step": 7260 }, { "epoch": 0.01, "learning_rate": 0.00019999083729117418, "loss": 2.4947, "step": 7265 }, { "epoch": 0.01, "learning_rate": 0.00019999082467484996, "loss": 2.5637, "step": 7270 }, { "epoch": 0.01, "learning_rate": 0.00019999081204984627, "loss": 2.6972, "step": 7275 }, { "epoch": 0.01, "learning_rate": 0.0001999907994161631, "loss": 2.4978, "step": 7280 }, { "epoch": 0.01, "learning_rate": 0.0001999907867738005, "loss": 2.61, "step": 7285 }, { "epoch": 0.01, "learning_rate": 0.00019999077412275842, "loss": 2.674, "step": 7290 }, { "epoch": 0.01, "learning_rate": 0.00019999076146303685, "loss": 2.8544, "step": 7295 }, { "epoch": 0.01, "learning_rate": 0.00019999074879463585, "loss": 2.3708, "step": 7300 }, { "epoch": 0.01, "learning_rate": 0.00019999073611755538, "loss": 2.5549, "step": 7305 }, { "epoch": 0.01, "learning_rate": 0.00019999072343179547, "loss": 2.4773, "step": 7310 }, { "epoch": 0.01, "learning_rate": 0.00019999071073735606, "loss": 2.4491, "step": 7315 }, { "epoch": 0.01, "learning_rate": 0.00019999069803423725, "loss": 2.3739, "step": 7320 }, { "epoch": 0.01, "learning_rate": 0.00019999068532243895, "loss": 2.5656, "step": 7325 }, { "epoch": 0.01, "learning_rate": 0.00019999067260196118, "loss": 2.2732, "step": 7330 }, { "epoch": 0.01, "learning_rate": 0.00019999065987280398, "loss": 2.3354, "step": 7335 }, { "epoch": 0.01, "learning_rate": 0.00019999064713496735, "loss": 2.5572, "step": 7340 }, { "epoch": 0.01, "learning_rate": 0.00019999063438845124, "loss": 2.4688, "step": 7345 }, { "epoch": 0.01, "learning_rate": 0.00019999062163325565, "loss": 2.4249, "step": 7350 }, { "epoch": 0.01, "learning_rate": 0.00019999060886938068, "loss": 2.4774, "step": 7355 }, { "epoch": 0.01, "learning_rate": 0.0001999905960968262, "loss": 2.523, "step": 7360 }, { "epoch": 0.01, "learning_rate": 0.0001999905833155923, "loss": 2.5578, "step": 7365 }, { "epoch": 0.01, "learning_rate": 0.000199990570525679, "loss": 2.3641, "step": 7370 }, { "epoch": 0.01, "learning_rate": 0.00019999055772708618, "loss": 2.4622, "step": 7375 }, { "epoch": 0.01, "learning_rate": 0.00019999054491981398, "loss": 2.6698, "step": 7380 }, { "epoch": 0.01, "learning_rate": 0.0001999905321038623, "loss": 2.5802, "step": 7385 }, { "epoch": 0.01, "learning_rate": 0.00019999051927923115, "loss": 2.5884, "step": 7390 }, { "epoch": 0.01, "learning_rate": 0.00019999050644592062, "loss": 2.4902, "step": 7395 }, { "epoch": 0.01, "learning_rate": 0.00019999049360393064, "loss": 2.7938, "step": 7400 }, { "epoch": 0.01, "learning_rate": 0.0001999904807532612, "loss": 2.5248, "step": 7405 }, { "epoch": 0.01, "learning_rate": 0.00019999046789391232, "loss": 2.6136, "step": 7410 }, { "epoch": 0.01, "learning_rate": 0.00019999045502588404, "loss": 2.627, "step": 7415 }, { "epoch": 0.01, "learning_rate": 0.00019999044214917628, "loss": 2.4668, "step": 7420 }, { "epoch": 0.01, "learning_rate": 0.00019999042926378913, "loss": 2.3442, "step": 7425 }, { "epoch": 0.01, "learning_rate": 0.00019999041636972251, "loss": 2.4571, "step": 7430 }, { "epoch": 0.01, "learning_rate": 0.00019999040346697648, "loss": 2.5193, "step": 7435 }, { "epoch": 0.01, "learning_rate": 0.00019999039055555105, "loss": 2.8178, "step": 7440 }, { "epoch": 0.01, "learning_rate": 0.00019999037763544615, "loss": 2.5323, "step": 7445 }, { "epoch": 0.01, "learning_rate": 0.00019999036470666183, "loss": 2.3081, "step": 7450 }, { "epoch": 0.01, "learning_rate": 0.00019999035176919807, "loss": 2.4393, "step": 7455 }, { "epoch": 0.01, "learning_rate": 0.00019999033882305491, "loss": 2.5866, "step": 7460 }, { "epoch": 0.01, "learning_rate": 0.00019999032586823232, "loss": 2.6877, "step": 7465 }, { "epoch": 0.01, "learning_rate": 0.0001999903129047303, "loss": 2.3796, "step": 7470 }, { "epoch": 0.01, "learning_rate": 0.00019999029993254886, "loss": 2.5524, "step": 7475 }, { "epoch": 0.01, "learning_rate": 0.000199990286951688, "loss": 2.5185, "step": 7480 }, { "epoch": 0.01, "learning_rate": 0.00019999027396214771, "loss": 2.742, "step": 7485 }, { "epoch": 0.01, "learning_rate": 0.000199990260963928, "loss": 2.492, "step": 7490 }, { "epoch": 0.01, "learning_rate": 0.0001999902479570289, "loss": 2.6437, "step": 7495 }, { "epoch": 0.01, "learning_rate": 0.00019999023494145037, "loss": 2.5175, "step": 7500 }, { "epoch": 0.01, "learning_rate": 0.00019999022191719243, "loss": 2.0803, "step": 7505 }, { "epoch": 0.01, "learning_rate": 0.00019999020888425507, "loss": 2.5532, "step": 7510 }, { "epoch": 0.01, "learning_rate": 0.0001999901958426383, "loss": 2.4239, "step": 7515 }, { "epoch": 0.01, "learning_rate": 0.00019999018279234212, "loss": 2.2999, "step": 7520 }, { "epoch": 0.01, "learning_rate": 0.0001999901697333665, "loss": 2.3891, "step": 7525 }, { "epoch": 0.01, "learning_rate": 0.0001999901566657115, "loss": 2.6701, "step": 7530 }, { "epoch": 0.01, "learning_rate": 0.00019999014358937708, "loss": 2.6404, "step": 7535 }, { "epoch": 0.01, "learning_rate": 0.00019999013050436327, "loss": 2.4318, "step": 7540 }, { "epoch": 0.01, "learning_rate": 0.00019999011741067007, "loss": 2.5055, "step": 7545 }, { "epoch": 0.01, "learning_rate": 0.00019999010430829742, "loss": 2.5732, "step": 7550 }, { "epoch": 0.01, "learning_rate": 0.00019999009119724538, "loss": 2.5331, "step": 7555 }, { "epoch": 0.01, "learning_rate": 0.00019999007807751395, "loss": 2.3426, "step": 7560 }, { "epoch": 0.01, "learning_rate": 0.0001999900649491031, "loss": 2.5285, "step": 7565 }, { "epoch": 0.01, "learning_rate": 0.00019999005181201287, "loss": 2.2141, "step": 7570 }, { "epoch": 0.01, "learning_rate": 0.00019999003866624324, "loss": 2.3738, "step": 7575 }, { "epoch": 0.01, "learning_rate": 0.0001999900255117942, "loss": 2.138, "step": 7580 }, { "epoch": 0.01, "learning_rate": 0.00019999001234866578, "loss": 2.6575, "step": 7585 }, { "epoch": 0.01, "learning_rate": 0.00019998999917685795, "loss": 2.4234, "step": 7590 }, { "epoch": 0.01, "learning_rate": 0.0001999899859963707, "loss": 2.4141, "step": 7595 }, { "epoch": 0.01, "learning_rate": 0.0001999899728072041, "loss": 2.683, "step": 7600 }, { "epoch": 0.01, "learning_rate": 0.0001999899596093581, "loss": 2.6398, "step": 7605 }, { "epoch": 0.01, "learning_rate": 0.00019998994640283267, "loss": 2.4464, "step": 7610 }, { "epoch": 0.01, "learning_rate": 0.0001999899331876279, "loss": 2.5567, "step": 7615 }, { "epoch": 0.01, "learning_rate": 0.0001999899199637437, "loss": 2.4854, "step": 7620 }, { "epoch": 0.01, "learning_rate": 0.00019998990673118016, "loss": 2.6837, "step": 7625 }, { "epoch": 0.01, "learning_rate": 0.00019998989348993717, "loss": 2.4272, "step": 7630 }, { "epoch": 0.01, "learning_rate": 0.00019998988024001486, "loss": 2.5749, "step": 7635 }, { "epoch": 0.01, "learning_rate": 0.00019998986698141312, "loss": 2.38, "step": 7640 }, { "epoch": 0.01, "learning_rate": 0.00019998985371413202, "loss": 2.5107, "step": 7645 }, { "epoch": 0.01, "learning_rate": 0.00019998984043817153, "loss": 2.5564, "step": 7650 }, { "epoch": 0.01, "learning_rate": 0.00019998982715353165, "loss": 2.4367, "step": 7655 }, { "epoch": 0.01, "learning_rate": 0.0001999898138602124, "loss": 2.4698, "step": 7660 }, { "epoch": 0.01, "learning_rate": 0.00019998980055821377, "loss": 2.5996, "step": 7665 }, { "epoch": 0.01, "learning_rate": 0.00019998978724753577, "loss": 2.4685, "step": 7670 }, { "epoch": 0.01, "learning_rate": 0.00019998977392817838, "loss": 2.4938, "step": 7675 }, { "epoch": 0.01, "learning_rate": 0.00019998976060014162, "loss": 2.3933, "step": 7680 }, { "epoch": 0.01, "learning_rate": 0.0001999897472634255, "loss": 2.3589, "step": 7685 }, { "epoch": 0.01, "learning_rate": 0.00019998973391802997, "loss": 2.526, "step": 7690 }, { "epoch": 0.01, "learning_rate": 0.0001999897205639551, "loss": 2.3884, "step": 7695 }, { "epoch": 0.01, "learning_rate": 0.00019998970720120087, "loss": 2.6155, "step": 7700 }, { "epoch": 0.01, "learning_rate": 0.00019998969382976727, "loss": 2.347, "step": 7705 }, { "epoch": 0.01, "learning_rate": 0.00019998968044965428, "loss": 2.3685, "step": 7710 }, { "epoch": 0.01, "learning_rate": 0.0001999896670608619, "loss": 2.6701, "step": 7715 }, { "epoch": 0.01, "learning_rate": 0.0001999896536633902, "loss": 2.5651, "step": 7720 }, { "epoch": 0.01, "learning_rate": 0.00019998964025723913, "loss": 2.323, "step": 7725 }, { "epoch": 0.01, "learning_rate": 0.00019998962684240868, "loss": 2.6822, "step": 7730 }, { "epoch": 0.01, "learning_rate": 0.00019998961341889885, "loss": 2.4658, "step": 7735 }, { "epoch": 0.01, "learning_rate": 0.0001999895999867097, "loss": 2.5973, "step": 7740 }, { "epoch": 0.01, "learning_rate": 0.00019998958654584117, "loss": 2.3331, "step": 7745 }, { "epoch": 0.01, "learning_rate": 0.0001999895730962933, "loss": 2.4025, "step": 7750 }, { "epoch": 0.01, "learning_rate": 0.00019998955963806606, "loss": 2.4536, "step": 7755 }, { "epoch": 0.01, "learning_rate": 0.00019998954617115943, "loss": 2.6764, "step": 7760 }, { "epoch": 0.01, "learning_rate": 0.00019998953269557347, "loss": 2.4254, "step": 7765 }, { "epoch": 0.01, "learning_rate": 0.00019998951921130814, "loss": 2.4398, "step": 7770 }, { "epoch": 0.01, "learning_rate": 0.0001999895057183635, "loss": 2.5879, "step": 7775 }, { "epoch": 0.01, "learning_rate": 0.00019998949221673948, "loss": 2.1881, "step": 7780 }, { "epoch": 0.01, "learning_rate": 0.00019998947870643612, "loss": 2.5155, "step": 7785 }, { "epoch": 0.01, "learning_rate": 0.00019998946518745342, "loss": 2.3662, "step": 7790 }, { "epoch": 0.01, "learning_rate": 0.00019998945165979133, "loss": 2.3525, "step": 7795 }, { "epoch": 0.01, "learning_rate": 0.00019998943812344993, "loss": 2.5677, "step": 7800 }, { "epoch": 0.01, "learning_rate": 0.00019998942457842916, "loss": 2.4675, "step": 7805 }, { "epoch": 0.01, "learning_rate": 0.0001999894110247291, "loss": 2.5602, "step": 7810 }, { "epoch": 0.01, "learning_rate": 0.00019998939746234963, "loss": 2.5901, "step": 7815 }, { "epoch": 0.01, "learning_rate": 0.00019998938389129083, "loss": 2.701, "step": 7820 }, { "epoch": 0.01, "learning_rate": 0.0001999893703115527, "loss": 2.6018, "step": 7825 }, { "epoch": 0.01, "learning_rate": 0.00019998935672313524, "loss": 2.4727, "step": 7830 }, { "epoch": 0.01, "learning_rate": 0.0001999893431260384, "loss": 2.5026, "step": 7835 }, { "epoch": 0.01, "learning_rate": 0.00019998932952026229, "loss": 2.5641, "step": 7840 }, { "epoch": 0.01, "learning_rate": 0.00019998931590580678, "loss": 2.6295, "step": 7845 }, { "epoch": 0.01, "learning_rate": 0.00019998930228267198, "loss": 2.4993, "step": 7850 }, { "epoch": 0.01, "learning_rate": 0.00019998928865085783, "loss": 2.7407, "step": 7855 }, { "epoch": 0.01, "learning_rate": 0.00019998927501036433, "loss": 2.6448, "step": 7860 }, { "epoch": 0.01, "learning_rate": 0.0001999892613611915, "loss": 2.5057, "step": 7865 }, { "epoch": 0.01, "learning_rate": 0.00019998924770333937, "loss": 2.4465, "step": 7870 }, { "epoch": 0.01, "learning_rate": 0.0001999892340368079, "loss": 2.6736, "step": 7875 }, { "epoch": 0.01, "learning_rate": 0.00019998922036159708, "loss": 2.4964, "step": 7880 }, { "epoch": 0.01, "learning_rate": 0.00019998920667770696, "loss": 2.4419, "step": 7885 }, { "epoch": 0.01, "learning_rate": 0.0001999891929851375, "loss": 2.6466, "step": 7890 }, { "epoch": 0.01, "learning_rate": 0.0001999891792838887, "loss": 2.3967, "step": 7895 }, { "epoch": 0.01, "learning_rate": 0.0001999891655739606, "loss": 2.5252, "step": 7900 }, { "epoch": 0.01, "learning_rate": 0.0001999891518553532, "loss": 2.5781, "step": 7905 }, { "epoch": 0.01, "learning_rate": 0.00019998913812806644, "loss": 2.2417, "step": 7910 }, { "epoch": 0.01, "learning_rate": 0.00019998912439210035, "loss": 2.3379, "step": 7915 }, { "epoch": 0.01, "learning_rate": 0.00019998911064745496, "loss": 2.6051, "step": 7920 }, { "epoch": 0.01, "learning_rate": 0.00019998909689413025, "loss": 2.498, "step": 7925 }, { "epoch": 0.01, "learning_rate": 0.00019998908313212624, "loss": 2.4427, "step": 7930 }, { "epoch": 0.01, "learning_rate": 0.0001999890693614429, "loss": 2.3317, "step": 7935 }, { "epoch": 0.01, "learning_rate": 0.00019998905558208025, "loss": 2.4034, "step": 7940 }, { "epoch": 0.01, "learning_rate": 0.0001999890417940383, "loss": 2.5472, "step": 7945 }, { "epoch": 0.01, "learning_rate": 0.00019998902799731703, "loss": 2.4709, "step": 7950 }, { "epoch": 0.01, "learning_rate": 0.0001999890141919164, "loss": 2.5028, "step": 7955 }, { "epoch": 0.01, "learning_rate": 0.00019998900037783654, "loss": 2.2752, "step": 7960 }, { "epoch": 0.01, "learning_rate": 0.00019998898655507733, "loss": 2.2431, "step": 7965 }, { "epoch": 0.01, "learning_rate": 0.00019998897272363882, "loss": 2.5253, "step": 7970 }, { "epoch": 0.01, "learning_rate": 0.00019998895888352102, "loss": 2.6414, "step": 7975 }, { "epoch": 0.01, "learning_rate": 0.00019998894503472388, "loss": 2.7794, "step": 7980 }, { "epoch": 0.01, "learning_rate": 0.00019998893117724746, "loss": 2.5362, "step": 7985 }, { "epoch": 0.01, "learning_rate": 0.00019998891731109173, "loss": 2.4186, "step": 7990 }, { "epoch": 0.01, "learning_rate": 0.00019998890343625672, "loss": 2.6099, "step": 7995 }, { "epoch": 0.01, "learning_rate": 0.00019998888955274238, "loss": 2.505, "step": 8000 }, { "epoch": 0.01, "learning_rate": 0.00019998887566054875, "loss": 2.499, "step": 8005 }, { "epoch": 0.01, "learning_rate": 0.00019998886175967584, "loss": 2.7635, "step": 8010 }, { "epoch": 0.01, "learning_rate": 0.00019998884785012362, "loss": 2.4564, "step": 8015 }, { "epoch": 0.01, "learning_rate": 0.00019998883393189208, "loss": 2.5485, "step": 8020 }, { "epoch": 0.01, "learning_rate": 0.00019998882000498133, "loss": 2.5103, "step": 8025 }, { "epoch": 0.01, "learning_rate": 0.0001999888060693912, "loss": 2.5329, "step": 8030 }, { "epoch": 0.01, "learning_rate": 0.00019998879212512183, "loss": 2.6389, "step": 8035 }, { "epoch": 0.01, "learning_rate": 0.00019998877817217314, "loss": 2.4754, "step": 8040 }, { "epoch": 0.01, "learning_rate": 0.00019998876421054517, "loss": 2.5635, "step": 8045 }, { "epoch": 0.01, "learning_rate": 0.00019998875024023792, "loss": 2.3957, "step": 8050 }, { "epoch": 0.01, "learning_rate": 0.00019998873626125136, "loss": 2.5177, "step": 8055 }, { "epoch": 0.01, "learning_rate": 0.00019998872227358554, "loss": 2.5145, "step": 8060 }, { "epoch": 0.01, "learning_rate": 0.00019998870827724044, "loss": 2.6447, "step": 8065 }, { "epoch": 0.01, "learning_rate": 0.00019998869427221604, "loss": 2.4512, "step": 8070 }, { "epoch": 0.01, "learning_rate": 0.00019998868025851237, "loss": 2.4837, "step": 8075 }, { "epoch": 0.01, "learning_rate": 0.00019998866623612943, "loss": 2.4513, "step": 8080 }, { "epoch": 0.01, "learning_rate": 0.0001999886522050672, "loss": 2.3954, "step": 8085 }, { "epoch": 0.01, "learning_rate": 0.00019998863816532566, "loss": 2.4231, "step": 8090 }, { "epoch": 0.01, "learning_rate": 0.0001999886241169049, "loss": 2.4992, "step": 8095 }, { "epoch": 0.01, "learning_rate": 0.00019998861005980482, "loss": 2.4991, "step": 8100 }, { "epoch": 0.01, "learning_rate": 0.0001999885959940255, "loss": 2.3801, "step": 8105 }, { "epoch": 0.01, "learning_rate": 0.00019998858191956688, "loss": 2.6345, "step": 8110 }, { "epoch": 0.01, "learning_rate": 0.000199988567836429, "loss": 2.7197, "step": 8115 }, { "epoch": 0.01, "learning_rate": 0.00019998855374461186, "loss": 2.3708, "step": 8120 }, { "epoch": 0.01, "learning_rate": 0.00019998853964411543, "loss": 2.0928, "step": 8125 }, { "epoch": 0.01, "learning_rate": 0.00019998852553493974, "loss": 2.3053, "step": 8130 }, { "epoch": 0.01, "learning_rate": 0.0001999885114170848, "loss": 2.6692, "step": 8135 }, { "epoch": 0.01, "learning_rate": 0.00019998849729055057, "loss": 2.6266, "step": 8140 }, { "epoch": 0.01, "learning_rate": 0.0001999884831553371, "loss": 2.5056, "step": 8145 }, { "epoch": 0.01, "learning_rate": 0.00019998846901144433, "loss": 2.2743, "step": 8150 }, { "epoch": 0.01, "learning_rate": 0.0001999884548588723, "loss": 2.4226, "step": 8155 }, { "epoch": 0.01, "learning_rate": 0.00019998844069762104, "loss": 2.6256, "step": 8160 }, { "epoch": 0.01, "learning_rate": 0.0001999884265276905, "loss": 2.4524, "step": 8165 }, { "epoch": 0.01, "learning_rate": 0.00019998841234908073, "loss": 2.5078, "step": 8170 }, { "epoch": 0.01, "learning_rate": 0.0001999883981617917, "loss": 2.5066, "step": 8175 }, { "epoch": 0.01, "learning_rate": 0.00019998838396582337, "loss": 2.4922, "step": 8180 }, { "epoch": 0.01, "learning_rate": 0.00019998836976117584, "loss": 2.3698, "step": 8185 }, { "epoch": 0.01, "learning_rate": 0.00019998835554784904, "loss": 2.5426, "step": 8190 }, { "epoch": 0.01, "learning_rate": 0.00019998834132584295, "loss": 2.4569, "step": 8195 }, { "epoch": 0.01, "learning_rate": 0.00019998832709515764, "loss": 2.5148, "step": 8200 }, { "epoch": 0.01, "learning_rate": 0.0001999883128557931, "loss": 2.4442, "step": 8205 }, { "epoch": 0.01, "learning_rate": 0.00019998829860774927, "loss": 2.4735, "step": 8210 }, { "epoch": 0.01, "learning_rate": 0.00019998828435102624, "loss": 2.6126, "step": 8215 }, { "epoch": 0.01, "learning_rate": 0.00019998827008562393, "loss": 2.5147, "step": 8220 }, { "epoch": 0.01, "learning_rate": 0.00019998825581154237, "loss": 2.354, "step": 8225 }, { "epoch": 0.01, "learning_rate": 0.0001999882415287816, "loss": 2.4763, "step": 8230 }, { "epoch": 0.01, "learning_rate": 0.00019998822723734156, "loss": 2.4824, "step": 8235 }, { "epoch": 0.01, "learning_rate": 0.00019998821293722228, "loss": 2.6852, "step": 8240 }, { "epoch": 0.01, "learning_rate": 0.00019998819862842378, "loss": 2.4895, "step": 8245 }, { "epoch": 0.01, "learning_rate": 0.00019998818431094605, "loss": 2.4536, "step": 8250 }, { "epoch": 0.01, "learning_rate": 0.00019998816998478903, "loss": 2.7339, "step": 8255 }, { "epoch": 0.01, "learning_rate": 0.00019998815564995282, "loss": 2.6485, "step": 8260 }, { "epoch": 0.01, "learning_rate": 0.00019998814130643735, "loss": 2.599, "step": 8265 }, { "epoch": 0.01, "learning_rate": 0.00019998812695424265, "loss": 2.4026, "step": 8270 }, { "epoch": 0.01, "learning_rate": 0.00019998811259336875, "loss": 2.5381, "step": 8275 }, { "epoch": 0.01, "learning_rate": 0.0001999880982238156, "loss": 2.6003, "step": 8280 }, { "epoch": 0.01, "learning_rate": 0.0001999880838455832, "loss": 2.2663, "step": 8285 }, { "epoch": 0.01, "learning_rate": 0.0001999880694586716, "loss": 2.6823, "step": 8290 }, { "epoch": 0.01, "learning_rate": 0.00019998805506308073, "loss": 2.3228, "step": 8295 }, { "epoch": 0.01, "learning_rate": 0.0001999880406588107, "loss": 2.3697, "step": 8300 }, { "epoch": 0.01, "learning_rate": 0.0001999880262458614, "loss": 2.347, "step": 8305 }, { "epoch": 0.01, "learning_rate": 0.00019998801182423287, "loss": 2.5555, "step": 8310 }, { "epoch": 0.01, "learning_rate": 0.00019998799739392515, "loss": 2.4845, "step": 8315 }, { "epoch": 0.01, "learning_rate": 0.00019998798295493817, "loss": 2.6127, "step": 8320 }, { "epoch": 0.01, "learning_rate": 0.000199987968507272, "loss": 2.5411, "step": 8325 }, { "epoch": 0.01, "learning_rate": 0.0001999879540509266, "loss": 2.4625, "step": 8330 }, { "epoch": 0.01, "learning_rate": 0.000199987939585902, "loss": 2.2127, "step": 8335 }, { "epoch": 0.01, "learning_rate": 0.00019998792511219816, "loss": 2.3251, "step": 8340 }, { "epoch": 0.01, "learning_rate": 0.00019998791062981512, "loss": 2.4014, "step": 8345 }, { "epoch": 0.01, "learning_rate": 0.00019998789613875285, "loss": 2.4815, "step": 8350 }, { "epoch": 0.01, "learning_rate": 0.0001999878816390114, "loss": 2.2377, "step": 8355 }, { "epoch": 0.01, "learning_rate": 0.00019998786713059071, "loss": 2.4975, "step": 8360 }, { "epoch": 0.01, "learning_rate": 0.00019998785261349084, "loss": 2.3934, "step": 8365 }, { "epoch": 0.01, "learning_rate": 0.00019998783808771172, "loss": 2.2827, "step": 8370 }, { "epoch": 0.01, "learning_rate": 0.00019998782355325342, "loss": 2.282, "step": 8375 }, { "epoch": 0.01, "learning_rate": 0.00019998780901011595, "loss": 2.7165, "step": 8380 }, { "epoch": 0.01, "learning_rate": 0.00019998779445829922, "loss": 2.3389, "step": 8385 }, { "epoch": 0.01, "learning_rate": 0.0001999877798978033, "loss": 2.4213, "step": 8390 }, { "epoch": 0.01, "learning_rate": 0.00019998776532862818, "loss": 2.7093, "step": 8395 }, { "epoch": 0.01, "learning_rate": 0.00019998775075077385, "loss": 2.3988, "step": 8400 }, { "epoch": 0.01, "learning_rate": 0.00019998773616424035, "loss": 2.4379, "step": 8405 }, { "epoch": 0.01, "learning_rate": 0.0001999877215690276, "loss": 2.382, "step": 8410 }, { "epoch": 0.01, "learning_rate": 0.00019998770696513568, "loss": 2.3973, "step": 8415 }, { "epoch": 0.01, "learning_rate": 0.00019998769235256457, "loss": 2.5994, "step": 8420 }, { "epoch": 0.01, "learning_rate": 0.0001999876777313143, "loss": 2.3849, "step": 8425 }, { "epoch": 0.01, "learning_rate": 0.0001999876631013848, "loss": 2.4507, "step": 8430 }, { "epoch": 0.01, "learning_rate": 0.00019998764846277612, "loss": 2.3732, "step": 8435 }, { "epoch": 0.01, "learning_rate": 0.0001999876338154882, "loss": 2.2734, "step": 8440 }, { "epoch": 0.01, "learning_rate": 0.00019998761915952112, "loss": 2.6901, "step": 8445 }, { "epoch": 0.01, "learning_rate": 0.0001999876044948749, "loss": 2.5284, "step": 8450 }, { "epoch": 0.01, "learning_rate": 0.00019998758982154942, "loss": 2.4051, "step": 8455 }, { "epoch": 0.01, "learning_rate": 0.0001999875751395448, "loss": 2.6417, "step": 8460 }, { "epoch": 0.01, "learning_rate": 0.000199987560448861, "loss": 2.4962, "step": 8465 }, { "epoch": 0.01, "learning_rate": 0.000199987545749498, "loss": 2.5253, "step": 8470 }, { "epoch": 0.01, "learning_rate": 0.0001999875310414558, "loss": 2.3919, "step": 8475 }, { "epoch": 0.01, "learning_rate": 0.00019998751632473445, "loss": 2.3322, "step": 8480 }, { "epoch": 0.01, "learning_rate": 0.00019998750159933392, "loss": 2.4986, "step": 8485 }, { "epoch": 0.01, "learning_rate": 0.00019998748686525422, "loss": 2.4157, "step": 8490 }, { "epoch": 0.01, "learning_rate": 0.0001999874721224953, "loss": 2.2829, "step": 8495 }, { "epoch": 0.01, "learning_rate": 0.00019998745737105723, "loss": 2.4797, "step": 8500 }, { "epoch": 0.01, "learning_rate": 0.00019998744261094, "loss": 2.3625, "step": 8505 }, { "epoch": 0.01, "learning_rate": 0.0001999874278421436, "loss": 2.4289, "step": 8510 }, { "epoch": 0.01, "learning_rate": 0.000199987413064668, "loss": 2.5305, "step": 8515 }, { "epoch": 0.01, "learning_rate": 0.00019998739827851325, "loss": 2.3624, "step": 8520 }, { "epoch": 0.01, "learning_rate": 0.00019998738348367932, "loss": 2.4507, "step": 8525 }, { "epoch": 0.01, "learning_rate": 0.00019998736868016622, "loss": 2.438, "step": 8530 }, { "epoch": 0.01, "learning_rate": 0.00019998735386797395, "loss": 2.5493, "step": 8535 }, { "epoch": 0.01, "learning_rate": 0.00019998733904710254, "loss": 2.6027, "step": 8540 }, { "epoch": 0.01, "learning_rate": 0.00019998732421755194, "loss": 2.6591, "step": 8545 }, { "epoch": 0.01, "learning_rate": 0.0001999873093793222, "loss": 2.5961, "step": 8550 }, { "epoch": 0.01, "learning_rate": 0.00019998729453241326, "loss": 2.6149, "step": 8555 }, { "epoch": 0.01, "learning_rate": 0.00019998727967682523, "loss": 2.4328, "step": 8560 }, { "epoch": 0.01, "learning_rate": 0.00019998726481255797, "loss": 2.7167, "step": 8565 }, { "epoch": 0.01, "learning_rate": 0.0001999872499396116, "loss": 2.621, "step": 8570 }, { "epoch": 0.01, "learning_rate": 0.00019998723505798604, "loss": 2.4871, "step": 8575 }, { "epoch": 0.01, "learning_rate": 0.00019998722016768134, "loss": 2.4709, "step": 8580 }, { "epoch": 0.01, "learning_rate": 0.0001999872052686975, "loss": 2.639, "step": 8585 }, { "epoch": 0.01, "learning_rate": 0.0001999871903610345, "loss": 2.4479, "step": 8590 }, { "epoch": 0.01, "learning_rate": 0.00019998717544469235, "loss": 2.4216, "step": 8595 }, { "epoch": 0.01, "learning_rate": 0.00019998716051967104, "loss": 2.7285, "step": 8600 }, { "epoch": 0.01, "learning_rate": 0.00019998714558597057, "loss": 2.6347, "step": 8605 }, { "epoch": 0.01, "learning_rate": 0.000199987130643591, "loss": 2.7399, "step": 8610 }, { "epoch": 0.01, "learning_rate": 0.00019998711569253224, "loss": 2.4631, "step": 8615 }, { "epoch": 0.01, "learning_rate": 0.00019998710073279433, "loss": 2.6147, "step": 8620 }, { "epoch": 0.01, "learning_rate": 0.0001999870857643773, "loss": 2.7385, "step": 8625 }, { "epoch": 0.01, "learning_rate": 0.00019998707078728111, "loss": 2.5324, "step": 8630 }, { "epoch": 0.01, "learning_rate": 0.00019998705580150583, "loss": 2.6185, "step": 8635 }, { "epoch": 0.01, "learning_rate": 0.00019998704080705136, "loss": 2.4729, "step": 8640 }, { "epoch": 0.01, "learning_rate": 0.00019998702580391778, "loss": 2.3925, "step": 8645 }, { "epoch": 0.01, "learning_rate": 0.00019998701079210505, "loss": 2.7546, "step": 8650 }, { "epoch": 0.01, "learning_rate": 0.00019998699577161318, "loss": 2.5315, "step": 8655 }, { "epoch": 0.01, "learning_rate": 0.00019998698074244218, "loss": 2.5595, "step": 8660 }, { "epoch": 0.01, "learning_rate": 0.00019998696570459204, "loss": 2.4245, "step": 8665 }, { "epoch": 0.01, "learning_rate": 0.00019998695065806277, "loss": 2.5762, "step": 8670 }, { "epoch": 0.01, "learning_rate": 0.0001999869356028544, "loss": 2.2482, "step": 8675 }, { "epoch": 0.01, "learning_rate": 0.0001999869205389669, "loss": 2.6133, "step": 8680 }, { "epoch": 0.01, "learning_rate": 0.00019998690546640022, "loss": 2.3625, "step": 8685 }, { "epoch": 0.01, "learning_rate": 0.00019998689038515445, "loss": 2.3822, "step": 8690 }, { "epoch": 0.01, "learning_rate": 0.00019998687529522956, "loss": 2.3996, "step": 8695 }, { "epoch": 0.01, "learning_rate": 0.00019998686019662552, "loss": 2.3756, "step": 8700 }, { "epoch": 0.01, "learning_rate": 0.00019998684508934237, "loss": 2.4049, "step": 8705 }, { "epoch": 0.01, "learning_rate": 0.00019998682997338012, "loss": 2.6368, "step": 8710 }, { "epoch": 0.01, "learning_rate": 0.0001999868148487387, "loss": 2.585, "step": 8715 }, { "epoch": 0.01, "learning_rate": 0.00019998679971541821, "loss": 2.4734, "step": 8720 }, { "epoch": 0.01, "learning_rate": 0.0001999867845734186, "loss": 2.5682, "step": 8725 }, { "epoch": 0.01, "learning_rate": 0.00019998676942273986, "loss": 2.2908, "step": 8730 }, { "epoch": 0.01, "learning_rate": 0.00019998675426338204, "loss": 2.3191, "step": 8735 }, { "epoch": 0.01, "learning_rate": 0.00019998673909534507, "loss": 2.4821, "step": 8740 }, { "epoch": 0.01, "learning_rate": 0.00019998672391862896, "loss": 2.5785, "step": 8745 }, { "epoch": 0.01, "learning_rate": 0.0001999867087332338, "loss": 2.5052, "step": 8750 }, { "epoch": 0.01, "learning_rate": 0.00019998669353915948, "loss": 2.5428, "step": 8755 }, { "epoch": 0.01, "learning_rate": 0.0001999866783364061, "loss": 2.6008, "step": 8760 }, { "epoch": 0.01, "learning_rate": 0.00019998666312497359, "loss": 2.2407, "step": 8765 }, { "epoch": 0.01, "learning_rate": 0.00019998664790486196, "loss": 2.3564, "step": 8770 }, { "epoch": 0.01, "learning_rate": 0.00019998663267607126, "loss": 2.4997, "step": 8775 }, { "epoch": 0.01, "learning_rate": 0.00019998661743860142, "loss": 2.613, "step": 8780 }, { "epoch": 0.01, "learning_rate": 0.0001999866021924525, "loss": 2.588, "step": 8785 }, { "epoch": 0.01, "learning_rate": 0.00019998658693762449, "loss": 2.1804, "step": 8790 }, { "epoch": 0.01, "learning_rate": 0.0001999865716741174, "loss": 2.3337, "step": 8795 }, { "epoch": 0.01, "learning_rate": 0.00019998655640193115, "loss": 2.9856, "step": 8800 }, { "epoch": 0.01, "learning_rate": 0.00019998654112106582, "loss": 2.5314, "step": 8805 }, { "epoch": 0.01, "learning_rate": 0.00019998652583152145, "loss": 2.454, "step": 8810 }, { "epoch": 0.01, "learning_rate": 0.00019998651053329793, "loss": 2.3367, "step": 8815 }, { "epoch": 0.01, "learning_rate": 0.00019998649522639535, "loss": 2.497, "step": 8820 }, { "epoch": 0.01, "learning_rate": 0.00019998647991081368, "loss": 2.328, "step": 8825 }, { "epoch": 0.01, "learning_rate": 0.00019998646458655288, "loss": 2.4763, "step": 8830 }, { "epoch": 0.01, "learning_rate": 0.00019998644925361302, "loss": 2.6595, "step": 8835 }, { "epoch": 0.01, "learning_rate": 0.00019998643391199407, "loss": 2.4011, "step": 8840 }, { "epoch": 0.01, "learning_rate": 0.00019998641856169605, "loss": 2.2727, "step": 8845 }, { "epoch": 0.01, "learning_rate": 0.00019998640320271894, "loss": 2.5044, "step": 8850 }, { "epoch": 0.01, "learning_rate": 0.00019998638783506274, "loss": 2.5862, "step": 8855 }, { "epoch": 0.01, "learning_rate": 0.00019998637245872747, "loss": 2.5068, "step": 8860 }, { "epoch": 0.01, "learning_rate": 0.0001999863570737131, "loss": 2.6231, "step": 8865 }, { "epoch": 0.01, "learning_rate": 0.00019998634168001965, "loss": 2.2372, "step": 8870 }, { "epoch": 0.01, "learning_rate": 0.00019998632627764716, "loss": 2.4787, "step": 8875 }, { "epoch": 0.01, "learning_rate": 0.00019998631086659557, "loss": 2.3037, "step": 8880 }, { "epoch": 0.01, "learning_rate": 0.0001999862954468649, "loss": 2.4987, "step": 8885 }, { "epoch": 0.01, "learning_rate": 0.00019998628001845515, "loss": 2.4102, "step": 8890 }, { "epoch": 0.01, "learning_rate": 0.00019998626458136634, "loss": 2.4029, "step": 8895 }, { "epoch": 0.01, "learning_rate": 0.00019998624913559847, "loss": 2.4455, "step": 8900 }, { "epoch": 0.01, "learning_rate": 0.0001999862336811515, "loss": 2.7084, "step": 8905 }, { "epoch": 0.01, "learning_rate": 0.0001999862182180255, "loss": 2.3189, "step": 8910 }, { "epoch": 0.01, "learning_rate": 0.0001999862027462204, "loss": 2.3658, "step": 8915 }, { "epoch": 0.01, "learning_rate": 0.00019998618726573626, "loss": 2.3893, "step": 8920 }, { "epoch": 0.01, "learning_rate": 0.00019998617177657306, "loss": 2.6753, "step": 8925 }, { "epoch": 0.01, "learning_rate": 0.00019998615627873077, "loss": 2.3598, "step": 8930 }, { "epoch": 0.01, "learning_rate": 0.00019998614077220946, "loss": 2.4231, "step": 8935 }, { "epoch": 0.01, "learning_rate": 0.00019998612525700904, "loss": 2.5501, "step": 8940 }, { "epoch": 0.01, "learning_rate": 0.0001999861097331296, "loss": 2.1479, "step": 8945 }, { "epoch": 0.01, "learning_rate": 0.0001999860942005711, "loss": 2.3937, "step": 8950 }, { "epoch": 0.01, "learning_rate": 0.0001999860786593335, "loss": 2.4062, "step": 8955 }, { "epoch": 0.01, "learning_rate": 0.0001999860631094169, "loss": 2.3751, "step": 8960 }, { "epoch": 0.01, "learning_rate": 0.00019998604755082122, "loss": 2.4611, "step": 8965 }, { "epoch": 0.01, "learning_rate": 0.0001999860319835465, "loss": 2.5114, "step": 8970 }, { "epoch": 0.01, "learning_rate": 0.0001999860164075927, "loss": 2.4888, "step": 8975 }, { "epoch": 0.01, "learning_rate": 0.0001999860008229599, "loss": 2.4488, "step": 8980 }, { "epoch": 0.01, "learning_rate": 0.00019998598522964802, "loss": 2.4818, "step": 8985 }, { "epoch": 0.01, "learning_rate": 0.0001999859696276571, "loss": 2.2975, "step": 8990 }, { "epoch": 0.01, "learning_rate": 0.00019998595401698714, "loss": 2.3841, "step": 8995 }, { "epoch": 0.01, "learning_rate": 0.00019998593839763812, "loss": 2.3571, "step": 9000 }, { "epoch": 0.01, "learning_rate": 0.0001999859227696101, "loss": 2.4845, "step": 9005 }, { "epoch": 0.01, "learning_rate": 0.00019998590713290296, "loss": 2.5427, "step": 9010 }, { "epoch": 0.01, "learning_rate": 0.00019998589148751685, "loss": 2.6918, "step": 9015 }, { "epoch": 0.01, "learning_rate": 0.00019998587583345168, "loss": 2.2234, "step": 9020 }, { "epoch": 0.01, "learning_rate": 0.0001999858601707075, "loss": 2.3232, "step": 9025 }, { "epoch": 0.01, "learning_rate": 0.00019998584449928424, "loss": 2.4575, "step": 9030 }, { "epoch": 0.01, "learning_rate": 0.00019998582881918195, "loss": 2.3962, "step": 9035 }, { "epoch": 0.01, "learning_rate": 0.00019998581313040065, "loss": 2.7066, "step": 9040 }, { "epoch": 0.01, "learning_rate": 0.00019998579743294034, "loss": 2.4354, "step": 9045 }, { "epoch": 0.01, "learning_rate": 0.00019998578172680096, "loss": 2.3431, "step": 9050 }, { "epoch": 0.01, "learning_rate": 0.00019998576601198257, "loss": 2.5614, "step": 9055 }, { "epoch": 0.01, "learning_rate": 0.0001999857502884851, "loss": 2.5538, "step": 9060 }, { "epoch": 0.01, "learning_rate": 0.00019998573455630867, "loss": 2.3665, "step": 9065 }, { "epoch": 0.01, "learning_rate": 0.0001999857188154532, "loss": 2.4864, "step": 9070 }, { "epoch": 0.01, "learning_rate": 0.00019998570306591873, "loss": 2.6007, "step": 9075 }, { "epoch": 0.01, "learning_rate": 0.0001999856873077052, "loss": 2.5006, "step": 9080 }, { "epoch": 0.01, "learning_rate": 0.00019998567154081264, "loss": 2.5271, "step": 9085 }, { "epoch": 0.01, "learning_rate": 0.0001999856557652411, "loss": 2.47, "step": 9090 }, { "epoch": 0.01, "learning_rate": 0.0001999856399809905, "loss": 2.3941, "step": 9095 }, { "epoch": 0.01, "learning_rate": 0.00019998562418806093, "loss": 2.493, "step": 9100 }, { "epoch": 0.01, "learning_rate": 0.00019998560838645232, "loss": 2.5728, "step": 9105 }, { "epoch": 0.01, "learning_rate": 0.00019998559257616472, "loss": 2.594, "step": 9110 }, { "epoch": 0.01, "learning_rate": 0.00019998557675719806, "loss": 2.6892, "step": 9115 }, { "epoch": 0.01, "learning_rate": 0.00019998556092955242, "loss": 2.3456, "step": 9120 }, { "epoch": 0.01, "learning_rate": 0.00019998554509322776, "loss": 2.5928, "step": 9125 }, { "epoch": 0.01, "learning_rate": 0.00019998552924822412, "loss": 2.6073, "step": 9130 }, { "epoch": 0.01, "learning_rate": 0.00019998551339454147, "loss": 2.4351, "step": 9135 }, { "epoch": 0.01, "learning_rate": 0.00019998549753217978, "loss": 2.4864, "step": 9140 }, { "epoch": 0.01, "learning_rate": 0.0001999854816611391, "loss": 2.4912, "step": 9145 }, { "epoch": 0.01, "learning_rate": 0.00019998546578141942, "loss": 2.4551, "step": 9150 }, { "epoch": 0.01, "learning_rate": 0.00019998544989302072, "loss": 2.4463, "step": 9155 }, { "epoch": 0.01, "learning_rate": 0.00019998543399594304, "loss": 2.3603, "step": 9160 }, { "epoch": 0.01, "learning_rate": 0.00019998541809018638, "loss": 2.6566, "step": 9165 }, { "epoch": 0.01, "learning_rate": 0.0001999854021757507, "loss": 2.3337, "step": 9170 }, { "epoch": 0.01, "learning_rate": 0.000199985386252636, "loss": 2.2276, "step": 9175 }, { "epoch": 0.01, "learning_rate": 0.00019998537032084234, "loss": 2.2761, "step": 9180 }, { "epoch": 0.01, "learning_rate": 0.00019998535438036968, "loss": 2.4544, "step": 9185 }, { "epoch": 0.01, "learning_rate": 0.00019998533843121804, "loss": 2.6465, "step": 9190 }, { "epoch": 0.01, "learning_rate": 0.00019998532247338742, "loss": 2.7588, "step": 9195 }, { "epoch": 0.01, "learning_rate": 0.00019998530650687778, "loss": 2.5723, "step": 9200 }, { "epoch": 0.01, "learning_rate": 0.00019998529053168916, "loss": 2.7165, "step": 9205 }, { "epoch": 0.01, "learning_rate": 0.00019998527454782156, "loss": 2.5715, "step": 9210 }, { "epoch": 0.01, "learning_rate": 0.00019998525855527497, "loss": 2.5548, "step": 9215 }, { "epoch": 0.01, "learning_rate": 0.0001999852425540494, "loss": 2.3141, "step": 9220 }, { "epoch": 0.01, "learning_rate": 0.00019998522654414487, "loss": 2.3339, "step": 9225 }, { "epoch": 0.01, "learning_rate": 0.00019998521052556133, "loss": 2.3205, "step": 9230 }, { "epoch": 0.01, "learning_rate": 0.0001999851944982988, "loss": 2.5238, "step": 9235 }, { "epoch": 0.01, "learning_rate": 0.00019998517846235735, "loss": 2.4086, "step": 9240 }, { "epoch": 0.01, "learning_rate": 0.00019998516241773686, "loss": 2.2727, "step": 9245 }, { "epoch": 0.01, "learning_rate": 0.0001999851463644374, "loss": 2.0573, "step": 9250 }, { "epoch": 0.01, "learning_rate": 0.000199985130302459, "loss": 2.4938, "step": 9255 }, { "epoch": 0.01, "learning_rate": 0.0001999851142318016, "loss": 2.4737, "step": 9260 }, { "epoch": 0.01, "learning_rate": 0.00019998509815246526, "loss": 2.6544, "step": 9265 }, { "epoch": 0.01, "learning_rate": 0.00019998508206444995, "loss": 2.629, "step": 9270 }, { "epoch": 0.01, "learning_rate": 0.00019998506596775566, "loss": 2.1496, "step": 9275 }, { "epoch": 0.01, "learning_rate": 0.0001999850498623824, "loss": 2.6678, "step": 9280 }, { "epoch": 0.01, "learning_rate": 0.00019998503374833018, "loss": 2.4694, "step": 9285 }, { "epoch": 0.01, "learning_rate": 0.00019998501762559897, "loss": 2.5012, "step": 9290 }, { "epoch": 0.01, "learning_rate": 0.00019998500149418882, "loss": 2.6216, "step": 9295 }, { "epoch": 0.01, "learning_rate": 0.00019998498535409972, "loss": 2.6186, "step": 9300 }, { "epoch": 0.01, "learning_rate": 0.00019998496920533163, "loss": 2.3252, "step": 9305 }, { "epoch": 0.01, "learning_rate": 0.00019998495304788462, "loss": 2.4164, "step": 9310 }, { "epoch": 0.01, "learning_rate": 0.00019998493688175864, "loss": 2.4692, "step": 9315 }, { "epoch": 0.01, "learning_rate": 0.00019998492070695369, "loss": 2.5259, "step": 9320 }, { "epoch": 0.01, "learning_rate": 0.0001999849045234698, "loss": 2.554, "step": 9325 }, { "epoch": 0.01, "learning_rate": 0.00019998488833130695, "loss": 2.3446, "step": 9330 }, { "epoch": 0.01, "learning_rate": 0.00019998487213046515, "loss": 2.3739, "step": 9335 }, { "epoch": 0.01, "learning_rate": 0.0001999848559209444, "loss": 2.3847, "step": 9340 }, { "epoch": 0.01, "learning_rate": 0.0001999848397027447, "loss": 2.4115, "step": 9345 }, { "epoch": 0.01, "learning_rate": 0.00019998482347586606, "loss": 2.4375, "step": 9350 }, { "epoch": 0.01, "learning_rate": 0.00019998480724030848, "loss": 2.5581, "step": 9355 }, { "epoch": 0.01, "learning_rate": 0.00019998479099607195, "loss": 2.485, "step": 9360 }, { "epoch": 0.01, "learning_rate": 0.00019998477474315646, "loss": 2.2013, "step": 9365 }, { "epoch": 0.01, "learning_rate": 0.00019998475848156204, "loss": 2.4933, "step": 9370 }, { "epoch": 0.01, "learning_rate": 0.0001999847422112887, "loss": 2.5338, "step": 9375 }, { "epoch": 0.01, "learning_rate": 0.0001999847259323364, "loss": 2.4976, "step": 9380 }, { "epoch": 0.01, "learning_rate": 0.00019998470964470517, "loss": 2.2246, "step": 9385 }, { "epoch": 0.01, "learning_rate": 0.000199984693348395, "loss": 2.7514, "step": 9390 }, { "epoch": 0.01, "learning_rate": 0.0001999846770434059, "loss": 2.3547, "step": 9395 }, { "epoch": 0.01, "learning_rate": 0.00019998466072973788, "loss": 2.674, "step": 9400 }, { "epoch": 0.01, "learning_rate": 0.00019998464440739092, "loss": 2.2843, "step": 9405 }, { "epoch": 0.01, "learning_rate": 0.00019998462807636503, "loss": 2.5339, "step": 9410 }, { "epoch": 0.01, "learning_rate": 0.00019998461173666022, "loss": 2.3399, "step": 9415 }, { "epoch": 0.01, "learning_rate": 0.00019998459538827644, "loss": 2.6921, "step": 9420 }, { "epoch": 0.01, "learning_rate": 0.00019998457903121377, "loss": 2.4712, "step": 9425 }, { "epoch": 0.01, "learning_rate": 0.00019998456266547216, "loss": 2.5702, "step": 9430 }, { "epoch": 0.01, "learning_rate": 0.00019998454629105165, "loss": 2.5769, "step": 9435 }, { "epoch": 0.01, "learning_rate": 0.0001999845299079522, "loss": 2.3923, "step": 9440 }, { "epoch": 0.01, "learning_rate": 0.00019998451351617384, "loss": 2.5397, "step": 9445 }, { "epoch": 0.01, "learning_rate": 0.00019998449711571657, "loss": 2.5886, "step": 9450 }, { "epoch": 0.01, "learning_rate": 0.00019998448070658036, "loss": 2.388, "step": 9455 }, { "epoch": 0.01, "learning_rate": 0.00019998446428876526, "loss": 2.5055, "step": 9460 }, { "epoch": 0.01, "learning_rate": 0.00019998444786227122, "loss": 2.6634, "step": 9465 }, { "epoch": 0.01, "learning_rate": 0.00019998443142709829, "loss": 2.3508, "step": 9470 }, { "epoch": 0.01, "learning_rate": 0.00019998441498324642, "loss": 2.5842, "step": 9475 }, { "epoch": 0.01, "learning_rate": 0.00019998439853071567, "loss": 2.4448, "step": 9480 }, { "epoch": 0.01, "learning_rate": 0.00019998438206950597, "loss": 2.5319, "step": 9485 }, { "epoch": 0.01, "learning_rate": 0.00019998436559961742, "loss": 2.4871, "step": 9490 }, { "epoch": 0.01, "learning_rate": 0.00019998434912104994, "loss": 2.3699, "step": 9495 }, { "epoch": 0.01, "learning_rate": 0.00019998433263380353, "loss": 2.4543, "step": 9500 }, { "epoch": 0.01, "learning_rate": 0.00019998431613787825, "loss": 2.3799, "step": 9505 }, { "epoch": 0.01, "learning_rate": 0.00019998429963327406, "loss": 2.5814, "step": 9510 }, { "epoch": 0.01, "learning_rate": 0.00019998428311999097, "loss": 2.5413, "step": 9515 }, { "epoch": 0.01, "learning_rate": 0.00019998426659802897, "loss": 2.4872, "step": 9520 }, { "epoch": 0.01, "learning_rate": 0.00019998425006738808, "loss": 2.4769, "step": 9525 }, { "epoch": 0.01, "learning_rate": 0.0001999842335280683, "loss": 2.4815, "step": 9530 }, { "epoch": 0.01, "learning_rate": 0.00019998421698006963, "loss": 2.6861, "step": 9535 }, { "epoch": 0.01, "learning_rate": 0.00019998420042339205, "loss": 2.6536, "step": 9540 }, { "epoch": 0.01, "learning_rate": 0.0001999841838580356, "loss": 2.4387, "step": 9545 }, { "epoch": 0.01, "learning_rate": 0.00019998416728400024, "loss": 2.4469, "step": 9550 }, { "epoch": 0.01, "learning_rate": 0.000199984150701286, "loss": 2.4486, "step": 9555 }, { "epoch": 0.01, "learning_rate": 0.00019998413410989287, "loss": 2.4776, "step": 9560 }, { "epoch": 0.01, "learning_rate": 0.00019998411750982086, "loss": 2.3943, "step": 9565 }, { "epoch": 0.01, "learning_rate": 0.00019998410090106997, "loss": 2.4478, "step": 9570 }, { "epoch": 0.01, "learning_rate": 0.00019998408428364018, "loss": 2.0753, "step": 9575 }, { "epoch": 0.01, "learning_rate": 0.00019998406765753152, "loss": 2.4195, "step": 9580 }, { "epoch": 0.01, "learning_rate": 0.00019998405102274397, "loss": 2.2805, "step": 9585 }, { "epoch": 0.01, "learning_rate": 0.00019998403437927758, "loss": 2.365, "step": 9590 }, { "epoch": 0.01, "learning_rate": 0.0001999840177271323, "loss": 2.5554, "step": 9595 }, { "epoch": 0.01, "learning_rate": 0.0001999840010663081, "loss": 2.5545, "step": 9600 }, { "epoch": 0.01, "learning_rate": 0.00019998398439680505, "loss": 2.5549, "step": 9605 }, { "epoch": 0.01, "learning_rate": 0.00019998396771862313, "loss": 2.3808, "step": 9610 }, { "epoch": 0.01, "learning_rate": 0.00019998395103176236, "loss": 2.6222, "step": 9615 }, { "epoch": 0.01, "learning_rate": 0.0001999839343362227, "loss": 2.2378, "step": 9620 }, { "epoch": 0.01, "learning_rate": 0.00019998391763200417, "loss": 2.4597, "step": 9625 }, { "epoch": 0.01, "learning_rate": 0.00019998390091910677, "loss": 2.3753, "step": 9630 }, { "epoch": 0.01, "learning_rate": 0.0001999838841975305, "loss": 2.4199, "step": 9635 }, { "epoch": 0.01, "learning_rate": 0.00019998386746727538, "loss": 2.436, "step": 9640 }, { "epoch": 0.01, "learning_rate": 0.0001999838507283414, "loss": 2.5025, "step": 9645 }, { "epoch": 0.01, "learning_rate": 0.00019998383398072856, "loss": 2.5538, "step": 9650 }, { "epoch": 0.01, "learning_rate": 0.00019998381722443687, "loss": 2.537, "step": 9655 }, { "epoch": 0.01, "learning_rate": 0.0001999838004594663, "loss": 2.4635, "step": 9660 }, { "epoch": 0.01, "learning_rate": 0.00019998378368581688, "loss": 2.3456, "step": 9665 }, { "epoch": 0.01, "learning_rate": 0.00019998376690348862, "loss": 2.1818, "step": 9670 }, { "epoch": 0.01, "learning_rate": 0.00019998375011248147, "loss": 2.6232, "step": 9675 }, { "epoch": 0.01, "learning_rate": 0.0001999837333127955, "loss": 2.4076, "step": 9680 }, { "epoch": 0.01, "learning_rate": 0.00019998371650443067, "loss": 2.4803, "step": 9685 }, { "epoch": 0.01, "learning_rate": 0.00019998369968738698, "loss": 2.3332, "step": 9690 }, { "epoch": 0.01, "learning_rate": 0.00019998368286166444, "loss": 2.4951, "step": 9695 }, { "epoch": 0.01, "learning_rate": 0.00019998366602726306, "loss": 2.562, "step": 9700 }, { "epoch": 0.01, "learning_rate": 0.00019998364918418285, "loss": 2.4566, "step": 9705 }, { "epoch": 0.01, "learning_rate": 0.0001999836323324238, "loss": 2.3356, "step": 9710 }, { "epoch": 0.01, "learning_rate": 0.00019998361547198586, "loss": 2.4319, "step": 9715 }, { "epoch": 0.01, "learning_rate": 0.00019998359860286914, "loss": 2.5457, "step": 9720 }, { "epoch": 0.01, "learning_rate": 0.00019998358172507353, "loss": 2.6287, "step": 9725 }, { "epoch": 0.01, "learning_rate": 0.00019998356483859914, "loss": 2.3725, "step": 9730 }, { "epoch": 0.01, "learning_rate": 0.00019998354794344584, "loss": 2.4876, "step": 9735 }, { "epoch": 0.01, "learning_rate": 0.00019998353103961377, "loss": 2.5241, "step": 9740 }, { "epoch": 0.01, "learning_rate": 0.00019998351412710283, "loss": 2.5487, "step": 9745 }, { "epoch": 0.01, "learning_rate": 0.0001999834972059131, "loss": 2.4608, "step": 9750 }, { "epoch": 0.01, "learning_rate": 0.0001999834802760445, "loss": 2.5501, "step": 9755 }, { "epoch": 0.01, "learning_rate": 0.00019998346333749707, "loss": 2.4901, "step": 9760 }, { "epoch": 0.01, "learning_rate": 0.00019998344639027082, "loss": 2.5478, "step": 9765 }, { "epoch": 0.01, "learning_rate": 0.00019998342943436577, "loss": 2.5238, "step": 9770 }, { "epoch": 0.01, "learning_rate": 0.00019998341246978187, "loss": 2.4842, "step": 9775 }, { "epoch": 0.01, "learning_rate": 0.00019998339549651915, "loss": 2.5017, "step": 9780 }, { "epoch": 0.01, "learning_rate": 0.00019998337851457763, "loss": 2.3788, "step": 9785 }, { "epoch": 0.01, "learning_rate": 0.00019998336152395724, "loss": 2.3579, "step": 9790 }, { "epoch": 0.01, "learning_rate": 0.00019998334452465808, "loss": 2.4703, "step": 9795 }, { "epoch": 0.01, "learning_rate": 0.00019998332751668008, "loss": 2.4456, "step": 9800 }, { "epoch": 0.01, "learning_rate": 0.00019998331050002328, "loss": 2.4326, "step": 9805 }, { "epoch": 0.01, "learning_rate": 0.00019998329347468765, "loss": 2.5756, "step": 9810 }, { "epoch": 0.01, "learning_rate": 0.0001999832764406732, "loss": 2.668, "step": 9815 }, { "epoch": 0.01, "learning_rate": 0.00019998325939797994, "loss": 2.3295, "step": 9820 }, { "epoch": 0.01, "learning_rate": 0.00019998324234660788, "loss": 2.4382, "step": 9825 }, { "epoch": 0.01, "learning_rate": 0.00019998322528655703, "loss": 2.2991, "step": 9830 }, { "epoch": 0.01, "learning_rate": 0.00019998320821782738, "loss": 2.7737, "step": 9835 }, { "epoch": 0.01, "learning_rate": 0.00019998319114041888, "loss": 2.3518, "step": 9840 }, { "epoch": 0.01, "learning_rate": 0.0001999831740543316, "loss": 2.2385, "step": 9845 }, { "epoch": 0.01, "learning_rate": 0.0001999831569595655, "loss": 2.503, "step": 9850 }, { "epoch": 0.01, "learning_rate": 0.00019998313985612062, "loss": 2.2877, "step": 9855 }, { "epoch": 0.01, "learning_rate": 0.00019998312274399695, "loss": 2.4239, "step": 9860 }, { "epoch": 0.01, "learning_rate": 0.00019998310562319445, "loss": 2.4293, "step": 9865 }, { "epoch": 0.01, "learning_rate": 0.00019998308849371318, "loss": 2.3999, "step": 9870 }, { "epoch": 0.01, "learning_rate": 0.0001999830713555531, "loss": 2.3348, "step": 9875 }, { "epoch": 0.01, "learning_rate": 0.00019998305420871424, "loss": 2.3459, "step": 9880 }, { "epoch": 0.01, "learning_rate": 0.0001999830370531966, "loss": 2.4825, "step": 9885 }, { "epoch": 0.01, "learning_rate": 0.00019998301988900013, "loss": 2.4744, "step": 9890 }, { "epoch": 0.01, "learning_rate": 0.00019998300271612492, "loss": 2.4583, "step": 9895 }, { "epoch": 0.01, "learning_rate": 0.00019998298553457086, "loss": 2.3683, "step": 9900 }, { "epoch": 0.01, "learning_rate": 0.00019998296834433806, "loss": 2.6798, "step": 9905 }, { "epoch": 0.01, "learning_rate": 0.00019998295114542647, "loss": 2.5938, "step": 9910 }, { "epoch": 0.01, "learning_rate": 0.00019998293393783608, "loss": 2.1591, "step": 9915 }, { "epoch": 0.01, "learning_rate": 0.00019998291672156693, "loss": 2.5002, "step": 9920 }, { "epoch": 0.01, "learning_rate": 0.000199982899496619, "loss": 2.4791, "step": 9925 }, { "epoch": 0.01, "learning_rate": 0.0001999828822629923, "loss": 2.3907, "step": 9930 }, { "epoch": 0.01, "learning_rate": 0.0001999828650206868, "loss": 2.3974, "step": 9935 }, { "epoch": 0.01, "learning_rate": 0.0001999828477697025, "loss": 2.4692, "step": 9940 }, { "epoch": 0.01, "learning_rate": 0.00019998283051003948, "loss": 2.7643, "step": 9945 }, { "epoch": 0.01, "learning_rate": 0.00019998281324169764, "loss": 2.0845, "step": 9950 }, { "epoch": 0.01, "learning_rate": 0.00019998279596467706, "loss": 2.4465, "step": 9955 }, { "epoch": 0.01, "learning_rate": 0.0001999827786789777, "loss": 2.3468, "step": 9960 }, { "epoch": 0.01, "learning_rate": 0.00019998276138459957, "loss": 2.5807, "step": 9965 }, { "epoch": 0.01, "learning_rate": 0.0001999827440815427, "loss": 2.3106, "step": 9970 }, { "epoch": 0.01, "learning_rate": 0.00019998272676980704, "loss": 2.3665, "step": 9975 }, { "epoch": 0.01, "learning_rate": 0.00019998270944939262, "loss": 2.7522, "step": 9980 }, { "epoch": 0.01, "learning_rate": 0.00019998269212029946, "loss": 2.5925, "step": 9985 }, { "epoch": 0.01, "learning_rate": 0.00019998267478252749, "loss": 2.7227, "step": 9990 }, { "epoch": 0.01, "learning_rate": 0.0001999826574360768, "loss": 2.5847, "step": 9995 }, { "epoch": 0.01, "learning_rate": 0.00019998264008094734, "loss": 2.6716, "step": 10000 }, { "epoch": 0.01, "learning_rate": 0.00019998262271713914, "loss": 2.3696, "step": 10005 }, { "epoch": 0.01, "learning_rate": 0.00019998260534465217, "loss": 2.2694, "step": 10010 }, { "epoch": 0.01, "learning_rate": 0.00019998258796348643, "loss": 2.501, "step": 10015 }, { "epoch": 0.01, "learning_rate": 0.00019998257057364196, "loss": 2.4445, "step": 10020 }, { "epoch": 0.01, "learning_rate": 0.00019998255317511875, "loss": 2.2597, "step": 10025 }, { "epoch": 0.01, "learning_rate": 0.00019998253576791676, "loss": 2.5403, "step": 10030 }, { "epoch": 0.01, "learning_rate": 0.00019998251835203604, "loss": 2.4338, "step": 10035 }, { "epoch": 0.01, "learning_rate": 0.00019998250092747658, "loss": 2.4408, "step": 10040 }, { "epoch": 0.01, "learning_rate": 0.0001999824834942384, "loss": 2.3402, "step": 10045 }, { "epoch": 0.01, "learning_rate": 0.00019998246605232143, "loss": 2.2358, "step": 10050 }, { "epoch": 0.01, "learning_rate": 0.00019998244860172575, "loss": 2.4517, "step": 10055 }, { "epoch": 0.01, "learning_rate": 0.00019998243114245133, "loss": 2.2565, "step": 10060 }, { "epoch": 0.01, "learning_rate": 0.00019998241367449814, "loss": 2.5145, "step": 10065 }, { "epoch": 0.01, "learning_rate": 0.00019998239619786626, "loss": 2.2973, "step": 10070 }, { "epoch": 0.01, "learning_rate": 0.00019998237871255562, "loss": 2.4887, "step": 10075 }, { "epoch": 0.01, "learning_rate": 0.00019998236121856623, "loss": 2.2674, "step": 10080 }, { "epoch": 0.01, "learning_rate": 0.00019998234371589813, "loss": 2.472, "step": 10085 }, { "epoch": 0.01, "learning_rate": 0.0001999823262045513, "loss": 2.3701, "step": 10090 }, { "epoch": 0.01, "learning_rate": 0.00019998230868452575, "loss": 2.3732, "step": 10095 }, { "epoch": 0.01, "learning_rate": 0.00019998229115582148, "loss": 2.5064, "step": 10100 }, { "epoch": 0.01, "learning_rate": 0.00019998227361843848, "loss": 2.4974, "step": 10105 }, { "epoch": 0.01, "learning_rate": 0.0001999822560723767, "loss": 2.3062, "step": 10110 }, { "epoch": 0.01, "learning_rate": 0.00019998223851763625, "loss": 2.6467, "step": 10115 }, { "epoch": 0.01, "learning_rate": 0.00019998222095421708, "loss": 2.5184, "step": 10120 }, { "epoch": 0.01, "learning_rate": 0.00019998220338211917, "loss": 2.5022, "step": 10125 }, { "epoch": 0.01, "learning_rate": 0.00019998218580134257, "loss": 2.5341, "step": 10130 }, { "epoch": 0.01, "learning_rate": 0.00019998216821188723, "loss": 2.5238, "step": 10135 }, { "epoch": 0.01, "learning_rate": 0.00019998215061375318, "loss": 2.554, "step": 10140 }, { "epoch": 0.01, "learning_rate": 0.00019998213300694042, "loss": 2.1777, "step": 10145 }, { "epoch": 0.01, "learning_rate": 0.00019998211539144894, "loss": 2.4832, "step": 10150 }, { "epoch": 0.01, "learning_rate": 0.00019998209776727875, "loss": 2.2942, "step": 10155 }, { "epoch": 0.01, "learning_rate": 0.00019998208013442985, "loss": 2.1188, "step": 10160 }, { "epoch": 0.01, "learning_rate": 0.00019998206249290226, "loss": 2.1989, "step": 10165 }, { "epoch": 0.01, "learning_rate": 0.00019998204484269593, "loss": 2.6481, "step": 10170 }, { "epoch": 0.01, "learning_rate": 0.00019998202718381094, "loss": 2.5382, "step": 10175 }, { "epoch": 0.01, "learning_rate": 0.00019998200951624722, "loss": 2.5676, "step": 10180 }, { "epoch": 0.01, "learning_rate": 0.0001999819918400048, "loss": 2.5093, "step": 10185 }, { "epoch": 0.01, "learning_rate": 0.00019998197415508368, "loss": 2.4684, "step": 10190 }, { "epoch": 0.01, "learning_rate": 0.0001999819564614839, "loss": 2.6515, "step": 10195 }, { "epoch": 0.01, "learning_rate": 0.00019998193875920536, "loss": 2.6088, "step": 10200 }, { "epoch": 0.01, "learning_rate": 0.00019998192104824818, "loss": 2.2971, "step": 10205 }, { "epoch": 0.01, "learning_rate": 0.00019998190332861225, "loss": 2.5469, "step": 10210 }, { "epoch": 0.01, "learning_rate": 0.0001999818856002977, "loss": 2.294, "step": 10215 }, { "epoch": 0.01, "learning_rate": 0.0001999818678633044, "loss": 2.6456, "step": 10220 }, { "epoch": 0.01, "learning_rate": 0.00019998185011763244, "loss": 2.3013, "step": 10225 }, { "epoch": 0.01, "learning_rate": 0.0001999818323632818, "loss": 2.3921, "step": 10230 }, { "epoch": 0.01, "learning_rate": 0.00019998181460025241, "loss": 2.5392, "step": 10235 }, { "epoch": 0.01, "learning_rate": 0.0001999817968285444, "loss": 2.3093, "step": 10240 }, { "epoch": 0.01, "learning_rate": 0.0001999817790481577, "loss": 2.5359, "step": 10245 }, { "epoch": 0.01, "learning_rate": 0.0001999817612590923, "loss": 2.4225, "step": 10250 }, { "epoch": 0.01, "learning_rate": 0.00019998174346134824, "loss": 2.4767, "step": 10255 }, { "epoch": 0.01, "learning_rate": 0.0001999817256549255, "loss": 2.5893, "step": 10260 }, { "epoch": 0.01, "learning_rate": 0.00019998170783982406, "loss": 2.2597, "step": 10265 }, { "epoch": 0.01, "learning_rate": 0.00019998169001604396, "loss": 2.3343, "step": 10270 }, { "epoch": 0.01, "learning_rate": 0.0001999816721835852, "loss": 2.351, "step": 10275 }, { "epoch": 0.01, "learning_rate": 0.00019998165434244775, "loss": 2.4915, "step": 10280 }, { "epoch": 0.01, "learning_rate": 0.00019998163649263164, "loss": 2.7405, "step": 10285 }, { "epoch": 0.01, "learning_rate": 0.00019998161863413685, "loss": 2.4724, "step": 10290 }, { "epoch": 0.01, "learning_rate": 0.0001999816007669634, "loss": 2.3576, "step": 10295 }, { "epoch": 0.01, "learning_rate": 0.00019998158289111128, "loss": 2.3789, "step": 10300 }, { "epoch": 0.01, "learning_rate": 0.0001999815650065805, "loss": 2.2525, "step": 10305 }, { "epoch": 0.01, "learning_rate": 0.00019998154711337108, "loss": 2.3558, "step": 10310 }, { "epoch": 0.01, "learning_rate": 0.00019998152921148297, "loss": 2.5284, "step": 10315 }, { "epoch": 0.01, "learning_rate": 0.0001999815113009162, "loss": 2.4762, "step": 10320 }, { "epoch": 0.01, "learning_rate": 0.0001999814933816708, "loss": 2.3437, "step": 10325 }, { "epoch": 0.01, "learning_rate": 0.0001999814754537467, "loss": 2.5542, "step": 10330 }, { "epoch": 0.01, "learning_rate": 0.00019998145751714397, "loss": 2.571, "step": 10335 }, { "epoch": 0.01, "learning_rate": 0.00019998143957186256, "loss": 2.3391, "step": 10340 }, { "epoch": 0.01, "learning_rate": 0.00019998142161790255, "loss": 2.418, "step": 10345 }, { "epoch": 0.01, "learning_rate": 0.00019998140365526385, "loss": 2.5464, "step": 10350 }, { "epoch": 0.01, "learning_rate": 0.00019998138568394654, "loss": 2.2553, "step": 10355 }, { "epoch": 0.01, "learning_rate": 0.00019998136770395052, "loss": 2.5427, "step": 10360 }, { "epoch": 0.01, "learning_rate": 0.00019998134971527593, "loss": 2.4642, "step": 10365 }, { "epoch": 0.01, "learning_rate": 0.00019998133171792264, "loss": 2.2765, "step": 10370 }, { "epoch": 0.01, "learning_rate": 0.0001999813137118907, "loss": 2.2825, "step": 10375 }, { "epoch": 0.01, "learning_rate": 0.00019998129569718016, "loss": 2.8506, "step": 10380 }, { "epoch": 0.01, "learning_rate": 0.00019998127767379095, "loss": 2.453, "step": 10385 }, { "epoch": 0.01, "learning_rate": 0.00019998125964172314, "loss": 2.2256, "step": 10390 }, { "epoch": 0.01, "learning_rate": 0.00019998124160097667, "loss": 2.5469, "step": 10395 }, { "epoch": 0.01, "learning_rate": 0.00019998122355155155, "loss": 2.7639, "step": 10400 }, { "epoch": 0.01, "learning_rate": 0.00019998120549344784, "loss": 2.3243, "step": 10405 }, { "epoch": 0.01, "learning_rate": 0.00019998118742666545, "loss": 2.281, "step": 10410 }, { "epoch": 0.01, "learning_rate": 0.00019998116935120446, "loss": 2.5782, "step": 10415 }, { "epoch": 0.01, "learning_rate": 0.00019998115126706485, "loss": 2.4022, "step": 10420 }, { "epoch": 0.01, "learning_rate": 0.0001999811331742466, "loss": 2.5827, "step": 10425 }, { "epoch": 0.01, "learning_rate": 0.0001999811150727497, "loss": 2.3773, "step": 10430 }, { "epoch": 0.01, "learning_rate": 0.0001999810969625742, "loss": 2.5919, "step": 10435 }, { "epoch": 0.01, "learning_rate": 0.0001999810788437201, "loss": 2.5754, "step": 10440 }, { "epoch": 0.01, "learning_rate": 0.00019998106071618735, "loss": 2.5282, "step": 10445 }, { "epoch": 0.01, "learning_rate": 0.00019998104257997598, "loss": 2.4171, "step": 10450 }, { "epoch": 0.01, "learning_rate": 0.00019998102443508602, "loss": 2.3195, "step": 10455 }, { "epoch": 0.01, "learning_rate": 0.00019998100628151742, "loss": 2.4589, "step": 10460 }, { "epoch": 0.01, "learning_rate": 0.00019998098811927018, "loss": 2.4459, "step": 10465 }, { "epoch": 0.01, "learning_rate": 0.0001999809699483444, "loss": 2.4079, "step": 10470 }, { "epoch": 0.01, "learning_rate": 0.00019998095176873996, "loss": 2.5142, "step": 10475 }, { "epoch": 0.01, "learning_rate": 0.0001999809335804569, "loss": 2.4839, "step": 10480 }, { "epoch": 0.01, "learning_rate": 0.0001999809153834953, "loss": 2.3362, "step": 10485 }, { "epoch": 0.01, "learning_rate": 0.00019998089717785503, "loss": 2.2893, "step": 10490 }, { "epoch": 0.01, "learning_rate": 0.00019998087896353614, "loss": 2.2023, "step": 10495 }, { "epoch": 0.01, "learning_rate": 0.00019998086074053868, "loss": 2.4619, "step": 10500 }, { "epoch": 0.01, "learning_rate": 0.00019998084250886264, "loss": 2.6596, "step": 10505 }, { "epoch": 0.01, "learning_rate": 0.00019998082426850796, "loss": 2.2436, "step": 10510 }, { "epoch": 0.01, "learning_rate": 0.0001999808060194747, "loss": 2.3952, "step": 10515 }, { "epoch": 0.01, "learning_rate": 0.00019998078776176285, "loss": 2.2106, "step": 10520 }, { "epoch": 0.01, "learning_rate": 0.0001999807694953724, "loss": 2.4022, "step": 10525 }, { "epoch": 0.01, "learning_rate": 0.00019998075122030335, "loss": 2.5848, "step": 10530 }, { "epoch": 0.01, "learning_rate": 0.0001999807329365557, "loss": 2.5176, "step": 10535 }, { "epoch": 0.01, "learning_rate": 0.00019998071464412948, "loss": 2.4297, "step": 10540 }, { "epoch": 0.01, "learning_rate": 0.00019998069634302468, "loss": 2.4786, "step": 10545 }, { "epoch": 0.01, "learning_rate": 0.00019998067803324125, "loss": 2.2828, "step": 10550 }, { "epoch": 0.01, "learning_rate": 0.00019998065971477927, "loss": 2.2195, "step": 10555 }, { "epoch": 0.01, "learning_rate": 0.0001999806413876387, "loss": 2.6033, "step": 10560 }, { "epoch": 0.01, "learning_rate": 0.00019998062305181952, "loss": 2.1797, "step": 10565 }, { "epoch": 0.01, "learning_rate": 0.00019998060470732178, "loss": 2.3711, "step": 10570 }, { "epoch": 0.01, "learning_rate": 0.00019998058635414546, "loss": 2.1863, "step": 10575 }, { "epoch": 0.01, "learning_rate": 0.0001999805679922906, "loss": 2.5752, "step": 10580 }, { "epoch": 0.01, "learning_rate": 0.00019998054962175712, "loss": 2.5801, "step": 10585 }, { "epoch": 0.01, "learning_rate": 0.00019998053124254506, "loss": 2.4377, "step": 10590 }, { "epoch": 0.01, "learning_rate": 0.00019998051285465443, "loss": 2.7624, "step": 10595 }, { "epoch": 0.01, "learning_rate": 0.00019998049445808525, "loss": 2.5697, "step": 10600 }, { "epoch": 0.01, "learning_rate": 0.00019998047605283752, "loss": 2.5927, "step": 10605 }, { "epoch": 0.01, "learning_rate": 0.0001999804576389112, "loss": 2.5098, "step": 10610 }, { "epoch": 0.01, "learning_rate": 0.00019998043921630628, "loss": 2.4638, "step": 10615 }, { "epoch": 0.01, "learning_rate": 0.00019998042078502284, "loss": 2.3502, "step": 10620 }, { "epoch": 0.01, "learning_rate": 0.0001999804023450608, "loss": 2.2765, "step": 10625 }, { "epoch": 0.01, "learning_rate": 0.0001999803838964202, "loss": 2.5749, "step": 10630 }, { "epoch": 0.01, "learning_rate": 0.0001999803654391011, "loss": 2.3107, "step": 10635 }, { "epoch": 0.01, "learning_rate": 0.00019998034697310337, "loss": 2.5372, "step": 10640 }, { "epoch": 0.01, "learning_rate": 0.00019998032849842713, "loss": 2.2403, "step": 10645 }, { "epoch": 0.01, "learning_rate": 0.00019998031001507228, "loss": 2.4946, "step": 10650 }, { "epoch": 0.01, "learning_rate": 0.00019998029152303894, "loss": 2.3198, "step": 10655 }, { "epoch": 0.01, "learning_rate": 0.00019998027302232702, "loss": 2.5626, "step": 10660 }, { "epoch": 0.01, "learning_rate": 0.00019998025451293657, "loss": 2.1714, "step": 10665 }, { "epoch": 0.01, "learning_rate": 0.00019998023599486752, "loss": 2.3108, "step": 10670 }, { "epoch": 0.01, "learning_rate": 0.00019998021746811997, "loss": 2.4947, "step": 10675 }, { "epoch": 0.01, "learning_rate": 0.00019998019893269385, "loss": 2.319, "step": 10680 }, { "epoch": 0.01, "learning_rate": 0.00019998018038858918, "loss": 2.1148, "step": 10685 }, { "epoch": 0.01, "learning_rate": 0.000199980161835806, "loss": 2.325, "step": 10690 }, { "epoch": 0.01, "learning_rate": 0.00019998014327434426, "loss": 2.4553, "step": 10695 }, { "epoch": 0.01, "learning_rate": 0.000199980124704204, "loss": 2.5565, "step": 10700 }, { "epoch": 0.01, "learning_rate": 0.0001999801061253852, "loss": 2.5449, "step": 10705 }, { "epoch": 0.01, "learning_rate": 0.00019998008753788785, "loss": 2.6652, "step": 10710 }, { "epoch": 0.01, "learning_rate": 0.00019998006894171195, "loss": 2.3767, "step": 10715 }, { "epoch": 0.01, "learning_rate": 0.00019998005033685753, "loss": 2.4926, "step": 10720 }, { "epoch": 0.01, "learning_rate": 0.0001999800317233246, "loss": 2.1747, "step": 10725 }, { "epoch": 0.01, "learning_rate": 0.0001999800131011131, "loss": 2.5619, "step": 10730 }, { "epoch": 0.01, "learning_rate": 0.0001999799944702231, "loss": 2.6276, "step": 10735 }, { "epoch": 0.01, "learning_rate": 0.00019997997583065457, "loss": 2.4658, "step": 10740 }, { "epoch": 0.01, "learning_rate": 0.00019997995718240753, "loss": 2.5733, "step": 10745 }, { "epoch": 0.01, "learning_rate": 0.00019997993852548192, "loss": 2.5212, "step": 10750 }, { "epoch": 0.01, "learning_rate": 0.00019997991985987783, "loss": 2.4937, "step": 10755 }, { "epoch": 0.01, "learning_rate": 0.0001999799011855952, "loss": 2.415, "step": 10760 }, { "epoch": 0.01, "learning_rate": 0.00019997988250263407, "loss": 2.3701, "step": 10765 }, { "epoch": 0.01, "learning_rate": 0.00019997986381099442, "loss": 2.5911, "step": 10770 }, { "epoch": 0.01, "learning_rate": 0.00019997984511067624, "loss": 2.7455, "step": 10775 }, { "epoch": 0.01, "learning_rate": 0.00019997982640167954, "loss": 2.4073, "step": 10780 }, { "epoch": 0.01, "learning_rate": 0.00019997980768400434, "loss": 2.5364, "step": 10785 }, { "epoch": 0.01, "learning_rate": 0.00019997978895765063, "loss": 2.5758, "step": 10790 }, { "epoch": 0.01, "learning_rate": 0.00019997977022261844, "loss": 2.4763, "step": 10795 }, { "epoch": 0.01, "learning_rate": 0.00019997975147890772, "loss": 2.3824, "step": 10800 }, { "epoch": 0.01, "learning_rate": 0.0001999797327265185, "loss": 2.384, "step": 10805 }, { "epoch": 0.01, "learning_rate": 0.00019997971396545076, "loss": 2.4721, "step": 10810 }, { "epoch": 0.01, "learning_rate": 0.00019997969519570453, "loss": 2.8403, "step": 10815 }, { "epoch": 0.01, "learning_rate": 0.0001999796764172798, "loss": 2.3952, "step": 10820 }, { "epoch": 0.01, "learning_rate": 0.00019997965763017658, "loss": 2.5735, "step": 10825 }, { "epoch": 0.01, "learning_rate": 0.00019997963883439487, "loss": 2.4688, "step": 10830 }, { "epoch": 0.01, "learning_rate": 0.00019997962002993463, "loss": 2.6084, "step": 10835 }, { "epoch": 0.01, "learning_rate": 0.00019997960121679592, "loss": 2.4667, "step": 10840 }, { "epoch": 0.01, "learning_rate": 0.00019997958239497868, "loss": 2.7458, "step": 10845 }, { "epoch": 0.01, "learning_rate": 0.000199979563564483, "loss": 2.4085, "step": 10850 }, { "epoch": 0.01, "learning_rate": 0.0001999795447253088, "loss": 2.5596, "step": 10855 }, { "epoch": 0.01, "learning_rate": 0.00019997952587745614, "loss": 2.592, "step": 10860 }, { "epoch": 0.01, "learning_rate": 0.00019997950702092498, "loss": 2.46, "step": 10865 }, { "epoch": 0.01, "learning_rate": 0.00019997948815571532, "loss": 2.5783, "step": 10870 }, { "epoch": 0.01, "learning_rate": 0.00019997946928182721, "loss": 2.441, "step": 10875 }, { "epoch": 0.01, "learning_rate": 0.00019997945039926062, "loss": 2.4168, "step": 10880 }, { "epoch": 0.01, "learning_rate": 0.0001999794315080155, "loss": 2.2499, "step": 10885 }, { "epoch": 0.01, "learning_rate": 0.00019997941260809196, "loss": 2.0727, "step": 10890 }, { "epoch": 0.01, "learning_rate": 0.0001999793936994899, "loss": 2.3397, "step": 10895 }, { "epoch": 0.01, "learning_rate": 0.0001999793747822094, "loss": 2.3988, "step": 10900 }, { "epoch": 0.01, "learning_rate": 0.00019997935585625042, "loss": 2.5731, "step": 10905 }, { "epoch": 0.01, "learning_rate": 0.00019997933692161298, "loss": 2.4224, "step": 10910 }, { "epoch": 0.01, "learning_rate": 0.00019997931797829704, "loss": 2.4046, "step": 10915 }, { "epoch": 0.01, "learning_rate": 0.00019997929902630267, "loss": 2.6267, "step": 10920 }, { "epoch": 0.01, "learning_rate": 0.0001999792800656298, "loss": 2.4695, "step": 10925 }, { "epoch": 0.01, "learning_rate": 0.00019997926109627847, "loss": 2.3921, "step": 10930 }, { "epoch": 0.01, "learning_rate": 0.00019997924211824869, "loss": 2.2779, "step": 10935 }, { "epoch": 0.01, "learning_rate": 0.00019997922313154043, "loss": 2.6126, "step": 10940 }, { "epoch": 0.01, "learning_rate": 0.00019997920413615373, "loss": 2.5356, "step": 10945 }, { "epoch": 0.01, "learning_rate": 0.00019997918513208856, "loss": 2.5047, "step": 10950 }, { "epoch": 0.01, "learning_rate": 0.00019997916611934496, "loss": 2.5633, "step": 10955 }, { "epoch": 0.01, "learning_rate": 0.00019997914709792288, "loss": 2.667, "step": 10960 }, { "epoch": 0.01, "learning_rate": 0.00019997912806782236, "loss": 2.4727, "step": 10965 }, { "epoch": 0.01, "learning_rate": 0.00019997910902904337, "loss": 2.4113, "step": 10970 }, { "epoch": 0.01, "learning_rate": 0.00019997908998158593, "loss": 2.6202, "step": 10975 }, { "epoch": 0.01, "learning_rate": 0.00019997907092545006, "loss": 2.5698, "step": 10980 }, { "epoch": 0.01, "learning_rate": 0.00019997905186063574, "loss": 2.6121, "step": 10985 }, { "epoch": 0.01, "learning_rate": 0.00019997903278714299, "loss": 2.6337, "step": 10990 }, { "epoch": 0.01, "learning_rate": 0.00019997901370497179, "loss": 2.4949, "step": 10995 }, { "epoch": 0.01, "learning_rate": 0.00019997899461412212, "loss": 2.2693, "step": 11000 }, { "epoch": 0.01, "learning_rate": 0.00019997897551459403, "loss": 2.5515, "step": 11005 }, { "epoch": 0.01, "learning_rate": 0.0001999789564063875, "loss": 2.2905, "step": 11010 }, { "epoch": 0.01, "learning_rate": 0.00019997893728950254, "loss": 2.5977, "step": 11015 }, { "epoch": 0.01, "learning_rate": 0.00019997891816393915, "loss": 2.5307, "step": 11020 }, { "epoch": 0.01, "learning_rate": 0.0001999788990296973, "loss": 2.2319, "step": 11025 }, { "epoch": 0.01, "learning_rate": 0.00019997887988677706, "loss": 2.1402, "step": 11030 }, { "epoch": 0.01, "learning_rate": 0.00019997886073517838, "loss": 2.4426, "step": 11035 }, { "epoch": 0.01, "learning_rate": 0.00019997884157490125, "loss": 2.3057, "step": 11040 }, { "epoch": 0.01, "learning_rate": 0.0001999788224059457, "loss": 2.4781, "step": 11045 }, { "epoch": 0.01, "learning_rate": 0.00019997880322831173, "loss": 2.2988, "step": 11050 }, { "epoch": 0.01, "learning_rate": 0.00019997878404199934, "loss": 2.4854, "step": 11055 }, { "epoch": 0.01, "learning_rate": 0.00019997876484700852, "loss": 2.6659, "step": 11060 }, { "epoch": 0.01, "learning_rate": 0.0001999787456433393, "loss": 2.5361, "step": 11065 }, { "epoch": 0.01, "learning_rate": 0.00019997872643099168, "loss": 2.2406, "step": 11070 }, { "epoch": 0.01, "learning_rate": 0.0001999787072099656, "loss": 2.5424, "step": 11075 }, { "epoch": 0.01, "learning_rate": 0.00019997868798026113, "loss": 2.4807, "step": 11080 }, { "epoch": 0.01, "learning_rate": 0.00019997866874187824, "loss": 2.4698, "step": 11085 }, { "epoch": 0.01, "learning_rate": 0.0001999786494948169, "loss": 2.7042, "step": 11090 }, { "epoch": 0.01, "learning_rate": 0.0001999786302390772, "loss": 2.5465, "step": 11095 }, { "epoch": 0.01, "learning_rate": 0.0001999786109746591, "loss": 2.396, "step": 11100 }, { "epoch": 0.01, "learning_rate": 0.00019997859170156255, "loss": 2.3966, "step": 11105 }, { "epoch": 0.01, "learning_rate": 0.0001999785724197876, "loss": 2.2927, "step": 11110 }, { "epoch": 0.01, "learning_rate": 0.0001999785531293343, "loss": 2.336, "step": 11115 }, { "epoch": 0.01, "learning_rate": 0.00019997853383020252, "loss": 2.5682, "step": 11120 }, { "epoch": 0.01, "learning_rate": 0.00019997851452239243, "loss": 2.5712, "step": 11125 }, { "epoch": 0.01, "learning_rate": 0.0001999784952059039, "loss": 2.6416, "step": 11130 }, { "epoch": 0.01, "learning_rate": 0.00019997847588073696, "loss": 2.3506, "step": 11135 }, { "epoch": 0.01, "learning_rate": 0.00019997845654689162, "loss": 2.5925, "step": 11140 }, { "epoch": 0.01, "learning_rate": 0.0001999784372043679, "loss": 2.473, "step": 11145 }, { "epoch": 0.01, "learning_rate": 0.0001999784178531658, "loss": 2.498, "step": 11150 }, { "epoch": 0.01, "learning_rate": 0.00019997839849328531, "loss": 2.4977, "step": 11155 }, { "epoch": 0.01, "learning_rate": 0.00019997837912472642, "loss": 2.4077, "step": 11160 }, { "epoch": 0.01, "learning_rate": 0.00019997835974748917, "loss": 2.3962, "step": 11165 }, { "epoch": 0.01, "learning_rate": 0.0001999783403615735, "loss": 2.4046, "step": 11170 }, { "epoch": 0.01, "learning_rate": 0.00019997832096697946, "loss": 2.5895, "step": 11175 }, { "epoch": 0.01, "learning_rate": 0.00019997830156370702, "loss": 2.4535, "step": 11180 }, { "epoch": 0.01, "learning_rate": 0.0001999782821517562, "loss": 2.7525, "step": 11185 }, { "epoch": 0.01, "learning_rate": 0.00019997826273112707, "loss": 2.3057, "step": 11190 }, { "epoch": 0.01, "learning_rate": 0.00019997824330181952, "loss": 2.6935, "step": 11195 }, { "epoch": 0.01, "learning_rate": 0.0001999782238638336, "loss": 2.5052, "step": 11200 }, { "epoch": 0.01, "learning_rate": 0.00019997820441716927, "loss": 2.3999, "step": 11205 }, { "epoch": 0.01, "learning_rate": 0.0001999781849618266, "loss": 2.2644, "step": 11210 }, { "epoch": 0.01, "learning_rate": 0.00019997816549780558, "loss": 2.082, "step": 11215 }, { "epoch": 0.01, "learning_rate": 0.0001999781460251062, "loss": 2.5398, "step": 11220 }, { "epoch": 0.01, "learning_rate": 0.0001999781265437284, "loss": 2.6221, "step": 11225 }, { "epoch": 0.01, "learning_rate": 0.00019997810705367228, "loss": 2.4482, "step": 11230 }, { "epoch": 0.01, "learning_rate": 0.0001999780875549378, "loss": 2.7528, "step": 11235 }, { "epoch": 0.01, "learning_rate": 0.0001999780680475249, "loss": 2.4769, "step": 11240 }, { "epoch": 0.01, "learning_rate": 0.0001999780485314337, "loss": 2.4324, "step": 11245 }, { "epoch": 0.01, "learning_rate": 0.00019997802900666414, "loss": 2.2279, "step": 11250 }, { "epoch": 0.01, "learning_rate": 0.0001999780094732162, "loss": 2.5746, "step": 11255 }, { "epoch": 0.01, "learning_rate": 0.0001999779899310899, "loss": 2.5469, "step": 11260 }, { "epoch": 0.01, "learning_rate": 0.00019997797038028527, "loss": 2.6787, "step": 11265 }, { "epoch": 0.01, "learning_rate": 0.00019997795082080227, "loss": 2.7183, "step": 11270 }, { "epoch": 0.01, "learning_rate": 0.00019997793125264095, "loss": 2.157, "step": 11275 }, { "epoch": 0.01, "learning_rate": 0.00019997791167580126, "loss": 2.355, "step": 11280 }, { "epoch": 0.01, "learning_rate": 0.00019997789209028324, "loss": 2.3888, "step": 11285 }, { "epoch": 0.01, "learning_rate": 0.00019997787249608688, "loss": 2.3385, "step": 11290 }, { "epoch": 0.01, "learning_rate": 0.00019997785289321214, "loss": 2.3096, "step": 11295 }, { "epoch": 0.01, "learning_rate": 0.0001999778332816591, "loss": 2.4811, "step": 11300 }, { "epoch": 0.01, "learning_rate": 0.0001999778136614277, "loss": 2.482, "step": 11305 }, { "epoch": 0.01, "learning_rate": 0.00019997779403251799, "loss": 2.3757, "step": 11310 }, { "epoch": 0.01, "learning_rate": 0.0001999777743949299, "loss": 2.5208, "step": 11315 }, { "epoch": 0.01, "learning_rate": 0.00019997775474866352, "loss": 2.3709, "step": 11320 }, { "epoch": 0.01, "learning_rate": 0.00019997773509371878, "loss": 2.5319, "step": 11325 }, { "epoch": 0.01, "learning_rate": 0.00019997771543009573, "loss": 2.4362, "step": 11330 }, { "epoch": 0.01, "learning_rate": 0.00019997769575779431, "loss": 2.4256, "step": 11335 }, { "epoch": 0.01, "learning_rate": 0.00019997767607681462, "loss": 2.4115, "step": 11340 }, { "epoch": 0.01, "learning_rate": 0.00019997765638715657, "loss": 2.3964, "step": 11345 }, { "epoch": 0.01, "learning_rate": 0.0001999776366888202, "loss": 2.581, "step": 11350 }, { "epoch": 0.01, "learning_rate": 0.00019997761698180552, "loss": 2.3648, "step": 11355 }, { "epoch": 0.01, "learning_rate": 0.00019997759726611252, "loss": 2.4494, "step": 11360 }, { "epoch": 0.01, "learning_rate": 0.00019997757754174118, "loss": 2.2324, "step": 11365 }, { "epoch": 0.01, "learning_rate": 0.00019997755780869154, "loss": 2.4793, "step": 11370 }, { "epoch": 0.01, "learning_rate": 0.0001999775380669636, "loss": 2.3191, "step": 11375 }, { "epoch": 0.01, "learning_rate": 0.00019997751831655734, "loss": 2.7056, "step": 11380 }, { "epoch": 0.01, "learning_rate": 0.00019997749855747276, "loss": 2.3016, "step": 11385 }, { "epoch": 0.01, "learning_rate": 0.00019997747878970986, "loss": 2.3911, "step": 11390 }, { "epoch": 0.01, "learning_rate": 0.00019997745901326866, "loss": 1.878, "step": 11395 }, { "epoch": 0.01, "learning_rate": 0.00019997743922814919, "loss": 2.6576, "step": 11400 }, { "epoch": 0.01, "learning_rate": 0.00019997741943435135, "loss": 2.3337, "step": 11405 }, { "epoch": 0.01, "learning_rate": 0.00019997739963187525, "loss": 2.6586, "step": 11410 }, { "epoch": 0.01, "learning_rate": 0.00019997737982072083, "loss": 2.3893, "step": 11415 }, { "epoch": 0.01, "learning_rate": 0.00019997736000088812, "loss": 2.5075, "step": 11420 }, { "epoch": 0.01, "learning_rate": 0.0001999773401723771, "loss": 2.4445, "step": 11425 }, { "epoch": 0.01, "learning_rate": 0.00019997732033518782, "loss": 2.5811, "step": 11430 }, { "epoch": 0.01, "learning_rate": 0.00019997730048932022, "loss": 2.2631, "step": 11435 }, { "epoch": 0.01, "learning_rate": 0.00019997728063477432, "loss": 2.652, "step": 11440 }, { "epoch": 0.01, "learning_rate": 0.00019997726077155013, "loss": 2.4095, "step": 11445 }, { "epoch": 0.01, "learning_rate": 0.0001999772408996477, "loss": 2.626, "step": 11450 }, { "epoch": 0.01, "learning_rate": 0.0001999772210190669, "loss": 2.5359, "step": 11455 }, { "epoch": 0.01, "learning_rate": 0.00019997720112980788, "loss": 2.3061, "step": 11460 }, { "epoch": 0.01, "learning_rate": 0.00019997718123187055, "loss": 2.6007, "step": 11465 }, { "epoch": 0.01, "learning_rate": 0.00019997716132525494, "loss": 2.4207, "step": 11470 }, { "epoch": 0.01, "learning_rate": 0.00019997714140996108, "loss": 2.4945, "step": 11475 }, { "epoch": 0.01, "learning_rate": 0.0001999771214859889, "loss": 2.6721, "step": 11480 }, { "epoch": 0.01, "learning_rate": 0.00019997710155333844, "loss": 2.4411, "step": 11485 }, { "epoch": 0.01, "learning_rate": 0.0001999770816120097, "loss": 2.5658, "step": 11490 }, { "epoch": 0.01, "learning_rate": 0.00019997706166200273, "loss": 2.4054, "step": 11495 }, { "epoch": 0.01, "learning_rate": 0.00019997704170331745, "loss": 2.2281, "step": 11500 }, { "epoch": 0.01, "learning_rate": 0.00019997702173595394, "loss": 2.5157, "step": 11505 }, { "epoch": 0.01, "learning_rate": 0.00019997700175991212, "loss": 2.4029, "step": 11510 }, { "epoch": 0.01, "learning_rate": 0.00019997698177519205, "loss": 2.429, "step": 11515 }, { "epoch": 0.01, "learning_rate": 0.00019997696178179376, "loss": 2.3668, "step": 11520 }, { "epoch": 0.01, "learning_rate": 0.00019997694177971713, "loss": 2.3173, "step": 11525 }, { "epoch": 0.01, "learning_rate": 0.00019997692176896228, "loss": 2.492, "step": 11530 }, { "epoch": 0.01, "learning_rate": 0.00019997690174952917, "loss": 2.5129, "step": 11535 }, { "epoch": 0.01, "learning_rate": 0.00019997688172141779, "loss": 2.5201, "step": 11540 }, { "epoch": 0.01, "learning_rate": 0.00019997686168462818, "loss": 2.5068, "step": 11545 }, { "epoch": 0.01, "learning_rate": 0.00019997684163916026, "loss": 2.2567, "step": 11550 }, { "epoch": 0.01, "learning_rate": 0.00019997682158501414, "loss": 2.4654, "step": 11555 }, { "epoch": 0.01, "learning_rate": 0.00019997680152218975, "loss": 2.536, "step": 11560 }, { "epoch": 0.01, "learning_rate": 0.00019997678145068713, "loss": 2.661, "step": 11565 }, { "epoch": 0.01, "learning_rate": 0.00019997676137050623, "loss": 2.5768, "step": 11570 }, { "epoch": 0.01, "learning_rate": 0.0001999767412816471, "loss": 2.3125, "step": 11575 }, { "epoch": 0.01, "learning_rate": 0.0001999767211841097, "loss": 2.4354, "step": 11580 }, { "epoch": 0.01, "learning_rate": 0.0001999767010778941, "loss": 2.784, "step": 11585 }, { "epoch": 0.01, "learning_rate": 0.00019997668096300025, "loss": 2.3771, "step": 11590 }, { "epoch": 0.01, "learning_rate": 0.00019997666083942814, "loss": 2.5635, "step": 11595 }, { "epoch": 0.01, "learning_rate": 0.00019997664070717783, "loss": 2.58, "step": 11600 }, { "epoch": 0.01, "learning_rate": 0.00019997662056624925, "loss": 2.3966, "step": 11605 }, { "epoch": 0.01, "learning_rate": 0.00019997660041664247, "loss": 2.3584, "step": 11610 }, { "epoch": 0.01, "learning_rate": 0.0001999765802583574, "loss": 2.7565, "step": 11615 }, { "epoch": 0.01, "learning_rate": 0.00019997656009139417, "loss": 2.7276, "step": 11620 }, { "epoch": 0.01, "learning_rate": 0.00019997653991575266, "loss": 2.3058, "step": 11625 }, { "epoch": 0.01, "learning_rate": 0.00019997651973143295, "loss": 2.3889, "step": 11630 }, { "epoch": 0.01, "learning_rate": 0.00019997649953843502, "loss": 2.3774, "step": 11635 }, { "epoch": 0.01, "learning_rate": 0.00019997647933675885, "loss": 2.6754, "step": 11640 }, { "epoch": 0.01, "learning_rate": 0.00019997645912640447, "loss": 2.4136, "step": 11645 }, { "epoch": 0.01, "learning_rate": 0.00019997643890737186, "loss": 2.4073, "step": 11650 }, { "epoch": 0.01, "learning_rate": 0.00019997641867966105, "loss": 2.4641, "step": 11655 }, { "epoch": 0.01, "learning_rate": 0.000199976398443272, "loss": 2.5109, "step": 11660 }, { "epoch": 0.01, "learning_rate": 0.00019997637819820475, "loss": 2.3103, "step": 11665 }, { "epoch": 0.01, "learning_rate": 0.00019997635794445927, "loss": 2.1382, "step": 11670 }, { "epoch": 0.01, "learning_rate": 0.0001999763376820356, "loss": 2.4605, "step": 11675 }, { "epoch": 0.01, "learning_rate": 0.00019997631741093373, "loss": 2.4008, "step": 11680 }, { "epoch": 0.01, "learning_rate": 0.00019997629713115362, "loss": 2.3952, "step": 11685 }, { "epoch": 0.01, "learning_rate": 0.00019997627684269532, "loss": 2.5662, "step": 11690 }, { "epoch": 0.01, "learning_rate": 0.00019997625654555881, "loss": 2.575, "step": 11695 }, { "epoch": 0.01, "learning_rate": 0.00019997623623974414, "loss": 2.4816, "step": 11700 }, { "epoch": 0.01, "learning_rate": 0.00019997621592525121, "loss": 2.7463, "step": 11705 }, { "epoch": 0.01, "learning_rate": 0.00019997619560208014, "loss": 2.6753, "step": 11710 }, { "epoch": 0.01, "learning_rate": 0.00019997617527023085, "loss": 2.5616, "step": 11715 }, { "epoch": 0.01, "learning_rate": 0.00019997615492970336, "loss": 2.4672, "step": 11720 }, { "epoch": 0.01, "learning_rate": 0.00019997613458049766, "loss": 2.3435, "step": 11725 }, { "epoch": 0.01, "learning_rate": 0.0001999761142226138, "loss": 2.561, "step": 11730 }, { "epoch": 0.01, "learning_rate": 0.0001999760938560517, "loss": 2.3806, "step": 11735 }, { "epoch": 0.01, "learning_rate": 0.00019997607348081145, "loss": 2.3155, "step": 11740 }, { "epoch": 0.01, "learning_rate": 0.00019997605309689305, "loss": 2.6049, "step": 11745 }, { "epoch": 0.01, "learning_rate": 0.00019997603270429642, "loss": 2.5219, "step": 11750 }, { "epoch": 0.01, "learning_rate": 0.00019997601230302162, "loss": 2.5796, "step": 11755 }, { "epoch": 0.01, "learning_rate": 0.00019997599189306863, "loss": 2.4046, "step": 11760 }, { "epoch": 0.01, "learning_rate": 0.00019997597147443746, "loss": 2.499, "step": 11765 }, { "epoch": 0.01, "learning_rate": 0.00019997595104712812, "loss": 2.4154, "step": 11770 }, { "epoch": 0.01, "learning_rate": 0.0001999759306111406, "loss": 2.3173, "step": 11775 }, { "epoch": 0.01, "learning_rate": 0.00019997591016647493, "loss": 2.4123, "step": 11780 }, { "epoch": 0.01, "learning_rate": 0.00019997588971313107, "loss": 2.5128, "step": 11785 }, { "epoch": 0.01, "learning_rate": 0.00019997586925110902, "loss": 2.4484, "step": 11790 }, { "epoch": 0.01, "learning_rate": 0.00019997584878040883, "loss": 2.5272, "step": 11795 }, { "epoch": 0.01, "learning_rate": 0.00019997582830103046, "loss": 2.5432, "step": 11800 }, { "epoch": 0.01, "learning_rate": 0.00019997580781297392, "loss": 2.4824, "step": 11805 }, { "epoch": 0.01, "learning_rate": 0.00019997578731623924, "loss": 2.2588, "step": 11810 }, { "epoch": 0.01, "learning_rate": 0.00019997576681082637, "loss": 2.3555, "step": 11815 }, { "epoch": 0.01, "learning_rate": 0.00019997574629673537, "loss": 2.2945, "step": 11820 }, { "epoch": 0.01, "learning_rate": 0.0001999757257739662, "loss": 2.2826, "step": 11825 }, { "epoch": 0.01, "learning_rate": 0.00019997570524251887, "loss": 2.5673, "step": 11830 }, { "epoch": 0.01, "learning_rate": 0.00019997568470239338, "loss": 2.445, "step": 11835 }, { "epoch": 0.01, "learning_rate": 0.00019997566415358976, "loss": 2.4448, "step": 11840 }, { "epoch": 0.01, "learning_rate": 0.000199975643596108, "loss": 2.4024, "step": 11845 }, { "epoch": 0.01, "learning_rate": 0.00019997562302994805, "loss": 2.3226, "step": 11850 }, { "epoch": 0.01, "learning_rate": 0.00019997560245510996, "loss": 2.4601, "step": 11855 }, { "epoch": 0.01, "learning_rate": 0.00019997558187159376, "loss": 2.444, "step": 11860 }, { "epoch": 0.01, "learning_rate": 0.0001999755612793994, "loss": 2.5258, "step": 11865 }, { "epoch": 0.01, "learning_rate": 0.00019997554067852688, "loss": 2.7061, "step": 11870 }, { "epoch": 0.01, "learning_rate": 0.00019997552006897622, "loss": 2.5543, "step": 11875 }, { "epoch": 0.01, "learning_rate": 0.00019997549945074744, "loss": 2.4321, "step": 11880 }, { "epoch": 0.01, "learning_rate": 0.00019997547882384052, "loss": 2.2429, "step": 11885 }, { "epoch": 0.01, "learning_rate": 0.00019997545818825548, "loss": 2.4161, "step": 11890 }, { "epoch": 0.01, "learning_rate": 0.00019997543754399227, "loss": 2.4542, "step": 11895 }, { "epoch": 0.01, "learning_rate": 0.00019997541689105095, "loss": 2.4715, "step": 11900 }, { "epoch": 0.01, "learning_rate": 0.0001999753962294315, "loss": 2.4635, "step": 11905 }, { "epoch": 0.01, "learning_rate": 0.00019997537555913394, "loss": 2.6533, "step": 11910 }, { "epoch": 0.01, "learning_rate": 0.00019997535488015824, "loss": 2.3862, "step": 11915 }, { "epoch": 0.01, "learning_rate": 0.0001999753341925044, "loss": 2.3898, "step": 11920 }, { "epoch": 0.01, "learning_rate": 0.00019997531349617246, "loss": 2.6894, "step": 11925 }, { "epoch": 0.01, "learning_rate": 0.0001999752927911624, "loss": 2.5738, "step": 11930 }, { "epoch": 0.01, "learning_rate": 0.0001999752720774742, "loss": 2.3446, "step": 11935 }, { "epoch": 0.01, "learning_rate": 0.00019997525135510792, "loss": 2.6286, "step": 11940 }, { "epoch": 0.01, "learning_rate": 0.00019997523062406352, "loss": 2.535, "step": 11945 }, { "epoch": 0.01, "learning_rate": 0.000199975209884341, "loss": 2.5846, "step": 11950 }, { "epoch": 0.01, "learning_rate": 0.00019997518913594036, "loss": 2.6708, "step": 11955 }, { "epoch": 0.01, "learning_rate": 0.00019997516837886158, "loss": 2.3358, "step": 11960 }, { "epoch": 0.01, "learning_rate": 0.00019997514761310473, "loss": 2.5653, "step": 11965 }, { "epoch": 0.01, "learning_rate": 0.0001999751268386698, "loss": 2.4606, "step": 11970 }, { "epoch": 0.01, "learning_rate": 0.00019997510605555672, "loss": 2.6728, "step": 11975 }, { "epoch": 0.01, "learning_rate": 0.00019997508526376555, "loss": 2.5948, "step": 11980 }, { "epoch": 0.01, "learning_rate": 0.00019997506446329627, "loss": 2.4636, "step": 11985 }, { "epoch": 0.01, "learning_rate": 0.00019997504365414895, "loss": 2.6507, "step": 11990 }, { "epoch": 0.01, "learning_rate": 0.00019997502283632349, "loss": 2.4276, "step": 11995 }, { "epoch": 0.01, "learning_rate": 0.0001999750020098199, "loss": 2.4268, "step": 12000 }, { "epoch": 0.01, "learning_rate": 0.00019997498117463826, "loss": 2.3155, "step": 12005 }, { "epoch": 0.01, "learning_rate": 0.00019997496033077853, "loss": 2.5103, "step": 12010 }, { "epoch": 0.01, "learning_rate": 0.0001999749394782407, "loss": 2.5187, "step": 12015 }, { "epoch": 0.01, "learning_rate": 0.00019997491861702477, "loss": 2.5975, "step": 12020 }, { "epoch": 0.01, "learning_rate": 0.0001999748977471308, "loss": 2.4078, "step": 12025 }, { "epoch": 0.01, "learning_rate": 0.0001999748768685587, "loss": 2.3794, "step": 12030 }, { "epoch": 0.01, "learning_rate": 0.00019997485598130855, "loss": 2.3124, "step": 12035 }, { "epoch": 0.01, "learning_rate": 0.00019997483508538027, "loss": 2.5713, "step": 12040 }, { "epoch": 0.01, "learning_rate": 0.00019997481418077397, "loss": 2.1336, "step": 12045 }, { "epoch": 0.01, "learning_rate": 0.00019997479326748957, "loss": 2.7057, "step": 12050 }, { "epoch": 0.01, "learning_rate": 0.00019997477234552709, "loss": 2.4896, "step": 12055 }, { "epoch": 0.01, "learning_rate": 0.00019997475141488656, "loss": 2.3709, "step": 12060 }, { "epoch": 0.01, "learning_rate": 0.00019997473047556793, "loss": 2.1572, "step": 12065 }, { "epoch": 0.01, "learning_rate": 0.00019997470952757122, "loss": 2.5542, "step": 12070 }, { "epoch": 0.01, "learning_rate": 0.00019997468857089646, "loss": 2.4698, "step": 12075 }, { "epoch": 0.01, "learning_rate": 0.00019997466760554366, "loss": 2.4545, "step": 12080 }, { "epoch": 0.01, "learning_rate": 0.00019997464663151275, "loss": 2.4641, "step": 12085 }, { "epoch": 0.01, "learning_rate": 0.0001999746256488038, "loss": 2.153, "step": 12090 }, { "epoch": 0.01, "learning_rate": 0.0001999746046574168, "loss": 2.6339, "step": 12095 }, { "epoch": 0.01, "learning_rate": 0.00019997458365735173, "loss": 2.3097, "step": 12100 }, { "epoch": 0.01, "learning_rate": 0.00019997456264860862, "loss": 2.196, "step": 12105 }, { "epoch": 0.01, "learning_rate": 0.00019997454163118742, "loss": 2.1926, "step": 12110 }, { "epoch": 0.01, "learning_rate": 0.0001999745206050882, "loss": 2.524, "step": 12115 }, { "epoch": 0.01, "learning_rate": 0.0001999744995703109, "loss": 2.6172, "step": 12120 }, { "epoch": 0.01, "learning_rate": 0.00019997447852685557, "loss": 2.401, "step": 12125 }, { "epoch": 0.01, "learning_rate": 0.0001999744574747222, "loss": 2.5944, "step": 12130 }, { "epoch": 0.01, "learning_rate": 0.0001999744364139108, "loss": 2.1631, "step": 12135 }, { "epoch": 0.01, "learning_rate": 0.00019997441534442132, "loss": 2.3836, "step": 12140 }, { "epoch": 0.01, "learning_rate": 0.0001999743942662538, "loss": 2.5634, "step": 12145 }, { "epoch": 0.01, "learning_rate": 0.00019997437317940825, "loss": 2.4223, "step": 12150 }, { "epoch": 0.01, "learning_rate": 0.00019997435208388465, "loss": 2.21, "step": 12155 }, { "epoch": 0.01, "learning_rate": 0.00019997433097968307, "loss": 2.3197, "step": 12160 }, { "epoch": 0.01, "learning_rate": 0.0001999743098668034, "loss": 2.3689, "step": 12165 }, { "epoch": 0.01, "learning_rate": 0.00019997428874524568, "loss": 2.3294, "step": 12170 }, { "epoch": 0.01, "learning_rate": 0.00019997426761500997, "loss": 2.2847, "step": 12175 }, { "epoch": 0.01, "learning_rate": 0.0001999742464760962, "loss": 2.5656, "step": 12180 }, { "epoch": 0.01, "learning_rate": 0.00019997422532850442, "loss": 2.4576, "step": 12185 }, { "epoch": 0.01, "learning_rate": 0.00019997420417223463, "loss": 2.5989, "step": 12190 }, { "epoch": 0.01, "learning_rate": 0.00019997418300728682, "loss": 2.323, "step": 12195 }, { "epoch": 0.01, "learning_rate": 0.00019997416183366093, "loss": 2.3168, "step": 12200 }, { "epoch": 0.01, "learning_rate": 0.00019997414065135708, "loss": 2.224, "step": 12205 }, { "epoch": 0.01, "learning_rate": 0.0001999741194603752, "loss": 2.5972, "step": 12210 }, { "epoch": 0.01, "learning_rate": 0.00019997409826071527, "loss": 2.5446, "step": 12215 }, { "epoch": 0.01, "learning_rate": 0.00019997407705237737, "loss": 2.4893, "step": 12220 }, { "epoch": 0.01, "learning_rate": 0.0001999740558353614, "loss": 2.5806, "step": 12225 }, { "epoch": 0.01, "learning_rate": 0.0001999740346096675, "loss": 2.4046, "step": 12230 }, { "epoch": 0.01, "learning_rate": 0.00019997401337529553, "loss": 2.3519, "step": 12235 }, { "epoch": 0.01, "learning_rate": 0.00019997399213224557, "loss": 2.1366, "step": 12240 }, { "epoch": 0.01, "learning_rate": 0.0001999739708805176, "loss": 2.3766, "step": 12245 }, { "epoch": 0.01, "learning_rate": 0.00019997394962011165, "loss": 2.6111, "step": 12250 }, { "epoch": 0.01, "learning_rate": 0.00019997392835102766, "loss": 2.2581, "step": 12255 }, { "epoch": 0.01, "learning_rate": 0.00019997390707326572, "loss": 2.5957, "step": 12260 }, { "epoch": 0.01, "learning_rate": 0.00019997388578682574, "loss": 2.5943, "step": 12265 }, { "epoch": 0.01, "learning_rate": 0.00019997386449170778, "loss": 2.7057, "step": 12270 }, { "epoch": 0.01, "learning_rate": 0.00019997384318791182, "loss": 2.3825, "step": 12275 }, { "epoch": 0.01, "learning_rate": 0.00019997382187543787, "loss": 2.5424, "step": 12280 }, { "epoch": 0.01, "learning_rate": 0.00019997380055428594, "loss": 2.5994, "step": 12285 }, { "epoch": 0.01, "learning_rate": 0.000199973779224456, "loss": 2.4017, "step": 12290 }, { "epoch": 0.01, "learning_rate": 0.0001999737578859481, "loss": 2.4752, "step": 12295 }, { "epoch": 0.01, "learning_rate": 0.00019997373653876218, "loss": 2.5205, "step": 12300 }, { "epoch": 0.01, "learning_rate": 0.0001999737151828983, "loss": 2.3572, "step": 12305 }, { "epoch": 0.01, "learning_rate": 0.00019997369381835644, "loss": 2.5049, "step": 12310 }, { "epoch": 0.01, "learning_rate": 0.0001999736724451366, "loss": 2.3371, "step": 12315 }, { "epoch": 0.01, "learning_rate": 0.00019997365106323877, "loss": 2.3831, "step": 12320 }, { "epoch": 0.01, "learning_rate": 0.000199973629672663, "loss": 2.3459, "step": 12325 }, { "epoch": 0.01, "learning_rate": 0.0001999736082734092, "loss": 2.6217, "step": 12330 }, { "epoch": 0.01, "learning_rate": 0.00019997358686547746, "loss": 2.597, "step": 12335 }, { "epoch": 0.01, "learning_rate": 0.00019997356544886776, "loss": 2.4469, "step": 12340 }, { "epoch": 0.01, "learning_rate": 0.00019997354402358008, "loss": 2.4035, "step": 12345 }, { "epoch": 0.01, "learning_rate": 0.00019997352258961445, "loss": 2.4356, "step": 12350 }, { "epoch": 0.01, "learning_rate": 0.0001999735011469708, "loss": 2.6113, "step": 12355 }, { "epoch": 0.01, "learning_rate": 0.00019997347969564927, "loss": 2.5397, "step": 12360 }, { "epoch": 0.01, "learning_rate": 0.0001999734582356497, "loss": 2.5205, "step": 12365 }, { "epoch": 0.01, "learning_rate": 0.00019997343676697222, "loss": 2.525, "step": 12370 }, { "epoch": 0.01, "learning_rate": 0.00019997341528961676, "loss": 2.4736, "step": 12375 }, { "epoch": 0.01, "learning_rate": 0.00019997339380358335, "loss": 2.4842, "step": 12380 }, { "epoch": 0.01, "learning_rate": 0.000199973372308872, "loss": 2.5738, "step": 12385 }, { "epoch": 0.01, "learning_rate": 0.0001999733508054827, "loss": 2.4422, "step": 12390 }, { "epoch": 0.01, "learning_rate": 0.00019997332929341544, "loss": 2.286, "step": 12395 }, { "epoch": 0.01, "learning_rate": 0.00019997330777267024, "loss": 2.5369, "step": 12400 }, { "epoch": 0.01, "learning_rate": 0.00019997328624324706, "loss": 2.325, "step": 12405 }, { "epoch": 0.01, "learning_rate": 0.00019997326470514598, "loss": 2.5605, "step": 12410 }, { "epoch": 0.01, "learning_rate": 0.00019997324315836697, "loss": 2.4599, "step": 12415 }, { "epoch": 0.01, "learning_rate": 0.00019997322160290998, "loss": 2.514, "step": 12420 }, { "epoch": 0.01, "learning_rate": 0.00019997320003877506, "loss": 2.2743, "step": 12425 }, { "epoch": 0.01, "learning_rate": 0.0001999731784659622, "loss": 2.4482, "step": 12430 }, { "epoch": 0.01, "learning_rate": 0.00019997315688447142, "loss": 2.308, "step": 12435 }, { "epoch": 0.01, "learning_rate": 0.0001999731352943027, "loss": 2.2427, "step": 12440 }, { "epoch": 0.01, "learning_rate": 0.00019997311369545604, "loss": 2.1984, "step": 12445 }, { "epoch": 0.01, "learning_rate": 0.00019997309208793146, "loss": 2.368, "step": 12450 }, { "epoch": 0.01, "learning_rate": 0.00019997307047172895, "loss": 2.2944, "step": 12455 }, { "epoch": 0.01, "learning_rate": 0.0001999730488468485, "loss": 2.4126, "step": 12460 }, { "epoch": 0.01, "learning_rate": 0.00019997302721329015, "loss": 2.3809, "step": 12465 }, { "epoch": 0.01, "learning_rate": 0.0001999730055710539, "loss": 2.4451, "step": 12470 }, { "epoch": 0.01, "learning_rate": 0.00019997298392013967, "loss": 2.4656, "step": 12475 }, { "epoch": 0.01, "learning_rate": 0.00019997296226054756, "loss": 2.53, "step": 12480 }, { "epoch": 0.01, "learning_rate": 0.00019997294059227754, "loss": 2.561, "step": 12485 }, { "epoch": 0.01, "learning_rate": 0.0001999729189153296, "loss": 2.2128, "step": 12490 }, { "epoch": 0.01, "learning_rate": 0.00019997289722970373, "loss": 2.2274, "step": 12495 }, { "epoch": 0.01, "learning_rate": 0.00019997287553539996, "loss": 2.5228, "step": 12500 }, { "epoch": 0.01, "learning_rate": 0.0001999728538324183, "loss": 2.3405, "step": 12505 }, { "epoch": 0.01, "learning_rate": 0.0001999728321207587, "loss": 2.6303, "step": 12510 }, { "epoch": 0.01, "learning_rate": 0.00019997281040042122, "loss": 2.482, "step": 12515 }, { "epoch": 0.01, "learning_rate": 0.00019997278867140583, "loss": 2.6349, "step": 12520 }, { "epoch": 0.01, "learning_rate": 0.00019997276693371253, "loss": 2.4412, "step": 12525 }, { "epoch": 0.01, "learning_rate": 0.00019997274518734136, "loss": 2.4151, "step": 12530 }, { "epoch": 0.01, "learning_rate": 0.00019997272343229226, "loss": 2.4361, "step": 12535 }, { "epoch": 0.01, "learning_rate": 0.0001999727016685653, "loss": 2.2396, "step": 12540 }, { "epoch": 0.01, "learning_rate": 0.00019997267989616042, "loss": 2.5266, "step": 12545 }, { "epoch": 0.01, "learning_rate": 0.00019997265811507763, "loss": 2.4731, "step": 12550 }, { "epoch": 0.01, "learning_rate": 0.00019997263632531698, "loss": 2.4637, "step": 12555 }, { "epoch": 0.01, "learning_rate": 0.00019997261452687844, "loss": 2.4655, "step": 12560 }, { "epoch": 0.01, "learning_rate": 0.00019997259271976203, "loss": 2.2793, "step": 12565 }, { "epoch": 0.01, "learning_rate": 0.0001999725709039677, "loss": 2.4703, "step": 12570 }, { "epoch": 0.01, "learning_rate": 0.00019997254907949552, "loss": 2.3971, "step": 12575 }, { "epoch": 0.01, "learning_rate": 0.00019997252724634546, "loss": 2.4912, "step": 12580 }, { "epoch": 0.01, "learning_rate": 0.00019997250540451747, "loss": 2.6492, "step": 12585 }, { "epoch": 0.01, "learning_rate": 0.00019997248355401167, "loss": 2.6137, "step": 12590 }, { "epoch": 0.01, "learning_rate": 0.00019997246169482796, "loss": 2.2331, "step": 12595 }, { "epoch": 0.01, "learning_rate": 0.0001999724398269664, "loss": 2.4599, "step": 12600 }, { "epoch": 0.01, "learning_rate": 0.00019997241795042696, "loss": 2.6955, "step": 12605 }, { "epoch": 0.01, "learning_rate": 0.00019997239606520966, "loss": 2.4918, "step": 12610 }, { "epoch": 0.01, "learning_rate": 0.00019997237417131446, "loss": 2.4573, "step": 12615 }, { "epoch": 0.01, "learning_rate": 0.00019997235226874141, "loss": 2.5273, "step": 12620 }, { "epoch": 0.01, "learning_rate": 0.00019997233035749052, "loss": 2.5456, "step": 12625 }, { "epoch": 0.01, "learning_rate": 0.00019997230843756173, "loss": 2.4297, "step": 12630 }, { "epoch": 0.01, "learning_rate": 0.00019997228650895513, "loss": 2.5807, "step": 12635 }, { "epoch": 0.01, "learning_rate": 0.00019997226457167062, "loss": 2.6845, "step": 12640 }, { "epoch": 0.02, "learning_rate": 0.0001999722426257083, "loss": 2.4075, "step": 12645 }, { "epoch": 0.02, "learning_rate": 0.0001999722206710681, "loss": 2.4821, "step": 12650 }, { "epoch": 0.02, "learning_rate": 0.00019997219870775005, "loss": 2.3636, "step": 12655 }, { "epoch": 0.02, "learning_rate": 0.00019997217673575418, "loss": 2.3911, "step": 12660 }, { "epoch": 0.02, "learning_rate": 0.00019997215475508042, "loss": 2.5498, "step": 12665 }, { "epoch": 0.02, "learning_rate": 0.00019997213276572883, "loss": 2.2082, "step": 12670 }, { "epoch": 0.02, "learning_rate": 0.0001999721107676994, "loss": 2.5635, "step": 12675 }, { "epoch": 0.02, "learning_rate": 0.0001999720887609921, "loss": 2.2998, "step": 12680 }, { "epoch": 0.02, "learning_rate": 0.000199972066745607, "loss": 2.2889, "step": 12685 }, { "epoch": 0.02, "learning_rate": 0.00019997204472154407, "loss": 2.5273, "step": 12690 }, { "epoch": 0.02, "learning_rate": 0.00019997202268880329, "loss": 2.4649, "step": 12695 }, { "epoch": 0.02, "learning_rate": 0.00019997200064738463, "loss": 2.5811, "step": 12700 }, { "epoch": 0.02, "learning_rate": 0.0001999719785972882, "loss": 2.4679, "step": 12705 }, { "epoch": 0.02, "learning_rate": 0.0001999719565385139, "loss": 2.5025, "step": 12710 }, { "epoch": 0.02, "learning_rate": 0.00019997193447106182, "loss": 2.3796, "step": 12715 }, { "epoch": 0.02, "learning_rate": 0.00019997191239493186, "loss": 2.1805, "step": 12720 }, { "epoch": 0.02, "learning_rate": 0.0001999718903101241, "loss": 2.5732, "step": 12725 }, { "epoch": 0.02, "learning_rate": 0.00019997186821663852, "loss": 2.3069, "step": 12730 }, { "epoch": 0.02, "learning_rate": 0.0001999718461144751, "loss": 2.4859, "step": 12735 }, { "epoch": 0.02, "learning_rate": 0.00019997182400363387, "loss": 2.6011, "step": 12740 }, { "epoch": 0.02, "learning_rate": 0.00019997180188411484, "loss": 2.5648, "step": 12745 }, { "epoch": 0.02, "learning_rate": 0.00019997177975591797, "loss": 2.4831, "step": 12750 }, { "epoch": 0.02, "learning_rate": 0.00019997175761904333, "loss": 2.3341, "step": 12755 }, { "epoch": 0.02, "learning_rate": 0.00019997173547349083, "loss": 2.6718, "step": 12760 }, { "epoch": 0.02, "learning_rate": 0.00019997171331926055, "loss": 2.4975, "step": 12765 }, { "epoch": 0.02, "learning_rate": 0.0001999716911563524, "loss": 2.6542, "step": 12770 }, { "epoch": 0.02, "learning_rate": 0.00019997166898476652, "loss": 2.3267, "step": 12775 }, { "epoch": 0.02, "learning_rate": 0.00019997164680450282, "loss": 2.2833, "step": 12780 }, { "epoch": 0.02, "learning_rate": 0.00019997162461556133, "loss": 2.4378, "step": 12785 }, { "epoch": 0.02, "learning_rate": 0.000199971602417942, "loss": 2.1509, "step": 12790 }, { "epoch": 0.02, "learning_rate": 0.00019997158021164488, "loss": 2.743, "step": 12795 }, { "epoch": 0.02, "learning_rate": 0.00019997155799667, "loss": 2.3859, "step": 12800 } ], "logging_steps": 5, "max_steps": 1685984, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "total_flos": 3.8338610650381025e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }