{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 119547, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00024811801548585953, "loss": 8.8316, "step": 100 }, { "epoch": 0.0, "learning_rate": 0.00029403430324938403, "loss": 5.464, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.0002998014193905167, "loss": 4.4569, "step": 300 }, { "epoch": 0.0, "learning_rate": 0.0002995500515304113, "loss": 4.0926, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.0002992986836703059, "loss": 3.9091, "step": 500 }, { "epoch": 0.01, "learning_rate": 0.00029904731581020046, "loss": 3.7998, "step": 600 }, { "epoch": 0.01, "learning_rate": 0.0002987959479500951, "loss": 3.7103, "step": 700 }, { "epoch": 0.01, "learning_rate": 0.0002985445800899897, "loss": 3.663, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.00029829321222988426, "loss": 3.6162, "step": 900 }, { "epoch": 0.01, "learning_rate": 0.0002980418443697789, "loss": 3.5725, "step": 1000 }, { "epoch": 0.01, "eval_accuracy": 0.37403400092106887, "eval_loss": 3.5909957885742188, "eval_runtime": 37.0746, "eval_samples_per_second": 302.526, "eval_steps_per_second": 2.535, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.00029779047650967344, "loss": 3.5506, "step": 1100 }, { "epoch": 0.01, "learning_rate": 0.00029753910864956806, "loss": 3.5285, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.0002972877407894626, "loss": 3.5064, "step": 1300 }, { "epoch": 0.01, "learning_rate": 0.00029703637292935724, "loss": 3.4907, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.0002967875187478529, "loss": 3.4708, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.00029653615088774747, "loss": 3.456, "step": 1600 }, { "epoch": 0.01, "learning_rate": 0.0002962847830276421, "loss": 3.4413, "step": 1700 }, { "epoch": 0.02, "learning_rate": 0.00029603341516753665, "loss": 3.4224, "step": 1800 }, { "epoch": 0.02, "learning_rate": 0.00029578204730743127, "loss": 3.4184, "step": 1900 }, { "epoch": 0.02, "learning_rate": 0.00029553067944732583, "loss": 3.4011, "step": 2000 }, { "epoch": 0.02, "eval_accuracy": 0.39113344827973534, "eval_loss": 3.4203457832336426, "eval_runtime": 36.9935, "eval_samples_per_second": 303.189, "eval_steps_per_second": 2.541, "step": 2000 }, { "epoch": 0.02, "learning_rate": 0.00029527931158722045, "loss": 3.3915, "step": 2100 }, { "epoch": 0.02, "learning_rate": 0.000295027943727115, "loss": 3.3831, "step": 2200 }, { "epoch": 0.02, "learning_rate": 0.00029477657586700963, "loss": 3.3814, "step": 2300 }, { "epoch": 0.02, "learning_rate": 0.0002945252080069042, "loss": 3.3734, "step": 2400 }, { "epoch": 0.02, "learning_rate": 0.00029427384014679876, "loss": 3.3624, "step": 2500 }, { "epoch": 0.02, "learning_rate": 0.0002940224722866934, "loss": 3.3559, "step": 2600 }, { "epoch": 0.02, "learning_rate": 0.000293771104426588, "loss": 3.3459, "step": 2700 }, { "epoch": 0.02, "learning_rate": 0.00029351973656648256, "loss": 3.3462, "step": 2800 }, { "epoch": 0.02, "learning_rate": 0.0002932683687063772, "loss": 3.3306, "step": 2900 }, { "epoch": 0.03, "learning_rate": 0.0002930170008462718, "loss": 3.335, "step": 3000 }, { "epoch": 0.03, "eval_accuracy": 0.39839248205600547, "eval_loss": 3.3489201068878174, "eval_runtime": 36.3401, "eval_samples_per_second": 308.64, "eval_steps_per_second": 2.587, "step": 3000 }, { "epoch": 0.03, "learning_rate": 0.00029276563298616636, "loss": 3.3239, "step": 3100 }, { "epoch": 0.03, "learning_rate": 0.000292514265126061, "loss": 3.3132, "step": 3200 }, { "epoch": 0.03, "learning_rate": 0.00029226289726595555, "loss": 3.3157, "step": 3300 }, { "epoch": 0.03, "learning_rate": 0.00029201152940585017, "loss": 3.3077, "step": 3400 }, { "epoch": 0.03, "learning_rate": 0.00029176016154574473, "loss": 3.308, "step": 3500 }, { "epoch": 0.03, "learning_rate": 0.00029150879368563935, "loss": 3.2971, "step": 3600 }, { "epoch": 0.03, "learning_rate": 0.0002912574258255339, "loss": 3.2953, "step": 3700 }, { "epoch": 0.03, "learning_rate": 0.00029100605796542853, "loss": 3.2915, "step": 3800 }, { "epoch": 0.03, "learning_rate": 0.0002907572037839242, "loss": 3.289, "step": 3900 }, { "epoch": 0.03, "learning_rate": 0.00029050583592381876, "loss": 3.2835, "step": 4000 }, { "epoch": 0.03, "eval_accuracy": 0.4028412728722747, "eval_loss": 3.306710958480835, "eval_runtime": 37.7903, "eval_samples_per_second": 296.796, "eval_steps_per_second": 2.487, "step": 4000 }, { "epoch": 0.03, "learning_rate": 0.0002902544680637134, "loss": 3.2759, "step": 4100 }, { "epoch": 0.04, "learning_rate": 0.00029000310020360794, "loss": 3.2803, "step": 4200 }, { "epoch": 0.04, "learning_rate": 0.0002897517323435025, "loss": 3.2752, "step": 4300 }, { "epoch": 0.04, "learning_rate": 0.0002895003644833971, "loss": 3.28, "step": 4400 }, { "epoch": 0.04, "learning_rate": 0.0002892489966232917, "loss": 3.2788, "step": 4500 }, { "epoch": 0.04, "learning_rate": 0.0002889976287631863, "loss": 3.2663, "step": 4600 }, { "epoch": 0.04, "learning_rate": 0.0002887462609030809, "loss": 3.2647, "step": 4700 }, { "epoch": 0.04, "learning_rate": 0.0002884948930429755, "loss": 3.2643, "step": 4800 }, { "epoch": 0.04, "learning_rate": 0.0002882435251828701, "loss": 3.2686, "step": 4900 }, { "epoch": 0.04, "learning_rate": 0.00028799215732276467, "loss": 3.2477, "step": 5000 }, { "epoch": 0.04, "eval_accuracy": 0.4059681332629427, "eval_loss": 3.2766220569610596, "eval_runtime": 36.7229, "eval_samples_per_second": 305.423, "eval_steps_per_second": 2.56, "step": 5000 }, { "epoch": 0.04, "learning_rate": 0.0002877407894626593, "loss": 3.247, "step": 5100 }, { "epoch": 0.04, "learning_rate": 0.0002874894216025539, "loss": 3.2555, "step": 5200 }, { "epoch": 0.04, "learning_rate": 0.00028723805374244847, "loss": 3.2528, "step": 5300 }, { "epoch": 0.05, "learning_rate": 0.0002869866858823431, "loss": 3.2441, "step": 5400 }, { "epoch": 0.05, "learning_rate": 0.00028673531802223765, "loss": 3.2458, "step": 5500 }, { "epoch": 0.05, "learning_rate": 0.00028648395016213227, "loss": 3.2394, "step": 5600 }, { "epoch": 0.05, "learning_rate": 0.00028623258230202683, "loss": 3.2464, "step": 5700 }, { "epoch": 0.05, "learning_rate": 0.00028598121444192145, "loss": 3.2484, "step": 5800 }, { "epoch": 0.05, "learning_rate": 0.000285729846581816, "loss": 3.2372, "step": 5900 }, { "epoch": 0.05, "learning_rate": 0.0002854784787217106, "loss": 3.2373, "step": 6000 }, { "epoch": 0.05, "eval_accuracy": 0.40810372134296335, "eval_loss": 3.256094455718994, "eval_runtime": 36.3187, "eval_samples_per_second": 308.822, "eval_steps_per_second": 2.588, "step": 6000 }, { "epoch": 0.05, "learning_rate": 0.0002852271108616052, "loss": 3.2314, "step": 6100 }, { "epoch": 0.05, "learning_rate": 0.00028497825668010086, "loss": 3.2393, "step": 6200 }, { "epoch": 0.05, "learning_rate": 0.0002847268888199954, "loss": 3.23, "step": 6300 }, { "epoch": 0.05, "learning_rate": 0.0002844780346384911, "loss": 3.2317, "step": 6400 }, { "epoch": 0.05, "learning_rate": 0.0002842266667783857, "loss": 3.2188, "step": 6500 }, { "epoch": 0.06, "learning_rate": 0.00028397529891828027, "loss": 3.2251, "step": 6600 }, { "epoch": 0.06, "learning_rate": 0.0002837239310581749, "loss": 3.2235, "step": 6700 }, { "epoch": 0.06, "learning_rate": 0.00028347256319806945, "loss": 3.2174, "step": 6800 }, { "epoch": 0.06, "learning_rate": 0.00028322119533796407, "loss": 3.2212, "step": 6900 }, { "epoch": 0.06, "learning_rate": 0.00028296982747785863, "loss": 3.2208, "step": 7000 }, { "epoch": 0.06, "eval_accuracy": 0.4099135655475305, "eval_loss": 3.2382774353027344, "eval_runtime": 36.4193, "eval_samples_per_second": 307.969, "eval_steps_per_second": 2.581, "step": 7000 }, { "epoch": 0.06, "learning_rate": 0.00028271845961775325, "loss": 3.215, "step": 7100 }, { "epoch": 0.06, "learning_rate": 0.0002824670917576478, "loss": 3.2124, "step": 7200 }, { "epoch": 0.06, "learning_rate": 0.00028221572389754244, "loss": 3.2214, "step": 7300 }, { "epoch": 0.06, "learning_rate": 0.000281964356037437, "loss": 3.2157, "step": 7400 }, { "epoch": 0.06, "learning_rate": 0.0002817129881773316, "loss": 3.212, "step": 7500 }, { "epoch": 0.06, "learning_rate": 0.00028146162031722624, "loss": 3.2063, "step": 7600 }, { "epoch": 0.06, "learning_rate": 0.0002812102524571208, "loss": 3.2089, "step": 7700 }, { "epoch": 0.07, "learning_rate": 0.0002809588845970154, "loss": 3.2056, "step": 7800 }, { "epoch": 0.07, "learning_rate": 0.00028070751673691, "loss": 3.206, "step": 7900 }, { "epoch": 0.07, "learning_rate": 0.0002804561488768046, "loss": 3.2021, "step": 8000 }, { "epoch": 0.07, "eval_accuracy": 0.4112453244521325, "eval_loss": 3.2249624729156494, "eval_runtime": 37.3966, "eval_samples_per_second": 299.92, "eval_steps_per_second": 2.514, "step": 8000 }, { "epoch": 0.07, "learning_rate": 0.00028020478101669917, "loss": 3.2098, "step": 8100 }, { "epoch": 0.07, "learning_rate": 0.0002799534131565938, "loss": 3.2099, "step": 8200 }, { "epoch": 0.07, "learning_rate": 0.00027970204529648835, "loss": 3.2075, "step": 8300 }, { "epoch": 0.07, "learning_rate": 0.00027945067743638297, "loss": 3.205, "step": 8400 }, { "epoch": 0.07, "learning_rate": 0.00027920182325487863, "loss": 3.1931, "step": 8500 }, { "epoch": 0.07, "learning_rate": 0.0002789504553947732, "loss": 3.1969, "step": 8600 }, { "epoch": 0.07, "learning_rate": 0.0002786990875346678, "loss": 3.1974, "step": 8700 }, { "epoch": 0.07, "learning_rate": 0.0002784502333531634, "loss": 3.1958, "step": 8800 }, { "epoch": 0.07, "learning_rate": 0.00027819886549305804, "loss": 3.1925, "step": 8900 }, { "epoch": 0.08, "learning_rate": 0.00027794749763295265, "loss": 3.194, "step": 9000 }, { "epoch": 0.08, "eval_accuracy": 0.41224642524178057, "eval_loss": 3.2142982482910156, "eval_runtime": 37.0575, "eval_samples_per_second": 302.665, "eval_steps_per_second": 2.537, "step": 9000 }, { "epoch": 0.08, "learning_rate": 0.0002776961297728472, "loss": 3.1941, "step": 9100 }, { "epoch": 0.08, "learning_rate": 0.00027744476191274184, "loss": 3.1943, "step": 9200 }, { "epoch": 0.08, "learning_rate": 0.0002771933940526364, "loss": 3.197, "step": 9300 }, { "epoch": 0.08, "learning_rate": 0.000276942026192531, "loss": 3.1912, "step": 9400 }, { "epoch": 0.08, "learning_rate": 0.0002766906583324256, "loss": 3.1941, "step": 9500 }, { "epoch": 0.08, "learning_rate": 0.0002764392904723202, "loss": 3.1904, "step": 9600 }, { "epoch": 0.08, "learning_rate": 0.00027618792261221477, "loss": 3.1807, "step": 9700 }, { "epoch": 0.08, "learning_rate": 0.00027593906843071043, "loss": 3.1854, "step": 9800 }, { "epoch": 0.08, "learning_rate": 0.00027568770057060505, "loss": 3.1859, "step": 9900 }, { "epoch": 0.08, "learning_rate": 0.0002754363327104996, "loss": 3.1971, "step": 10000 }, { "epoch": 0.08, "eval_accuracy": 0.413248228065643, "eval_loss": 3.2038817405700684, "eval_runtime": 36.9865, "eval_samples_per_second": 303.246, "eval_steps_per_second": 2.541, "step": 10000 }, { "epoch": 0.08, "learning_rate": 0.00027518496485039423, "loss": 3.1776, "step": 10100 }, { "epoch": 0.09, "learning_rate": 0.0002749335969902888, "loss": 3.1872, "step": 10200 }, { "epoch": 0.09, "learning_rate": 0.0002746822291301834, "loss": 3.1792, "step": 10300 }, { "epoch": 0.09, "learning_rate": 0.000274430861270078, "loss": 3.1858, "step": 10400 }, { "epoch": 0.09, "learning_rate": 0.00027417949340997254, "loss": 3.1825, "step": 10500 }, { "epoch": 0.09, "learning_rate": 0.00027392812554986716, "loss": 3.1798, "step": 10600 }, { "epoch": 0.09, "learning_rate": 0.0002736767576897618, "loss": 3.1819, "step": 10700 }, { "epoch": 0.09, "learning_rate": 0.00027342538982965634, "loss": 3.1778, "step": 10800 }, { "epoch": 0.09, "learning_rate": 0.00027317402196955096, "loss": 3.185, "step": 10900 }, { "epoch": 0.09, "learning_rate": 0.0002729226541094455, "loss": 3.1794, "step": 11000 }, { "epoch": 0.09, "eval_accuracy": 0.41429074887393713, "eval_loss": 3.1947903633117676, "eval_runtime": 37.2827, "eval_samples_per_second": 300.837, "eval_steps_per_second": 2.521, "step": 11000 }, { "epoch": 0.09, "learning_rate": 0.00027267128624934014, "loss": 3.1782, "step": 11100 }, { "epoch": 0.09, "learning_rate": 0.00027242243206783575, "loss": 3.1752, "step": 11200 }, { "epoch": 0.09, "learning_rate": 0.00027217106420773037, "loss": 3.172, "step": 11300 }, { "epoch": 0.1, "learning_rate": 0.000271919696347625, "loss": 3.1794, "step": 11400 }, { "epoch": 0.1, "learning_rate": 0.00027166832848751955, "loss": 3.1773, "step": 11500 }, { "epoch": 0.1, "learning_rate": 0.00027141696062741417, "loss": 3.1776, "step": 11600 }, { "epoch": 0.1, "learning_rate": 0.0002711681064459098, "loss": 3.1866, "step": 11700 }, { "epoch": 0.1, "learning_rate": 0.0002709167385858044, "loss": 3.1707, "step": 11800 }, { "epoch": 0.1, "learning_rate": 0.00027066537072569896, "loss": 3.1705, "step": 11900 }, { "epoch": 0.1, "learning_rate": 0.0002704140028655936, "loss": 3.1731, "step": 12000 }, { "epoch": 0.1, "eval_accuracy": 0.41493381221427206, "eval_loss": 3.1884472370147705, "eval_runtime": 36.6321, "eval_samples_per_second": 306.18, "eval_steps_per_second": 2.566, "step": 12000 }, { "epoch": 0.1, "learning_rate": 0.00027016514868408924, "loss": 3.1688, "step": 12100 }, { "epoch": 0.1, "learning_rate": 0.0002699137808239838, "loss": 3.1698, "step": 12200 }, { "epoch": 0.1, "learning_rate": 0.0002696624129638784, "loss": 3.1661, "step": 12300 }, { "epoch": 0.1, "learning_rate": 0.000269411045103773, "loss": 3.163, "step": 12400 }, { "epoch": 0.1, "learning_rate": 0.0002691596772436676, "loss": 3.166, "step": 12500 }, { "epoch": 0.11, "learning_rate": 0.00026890830938356217, "loss": 3.1684, "step": 12600 }, { "epoch": 0.11, "learning_rate": 0.0002686569415234568, "loss": 3.1665, "step": 12700 }, { "epoch": 0.11, "learning_rate": 0.0002684055736633514, "loss": 3.1623, "step": 12800 }, { "epoch": 0.11, "learning_rate": 0.00026815420580324597, "loss": 3.1674, "step": 12900 }, { "epoch": 0.11, "learning_rate": 0.0002679028379431406, "loss": 3.1596, "step": 13000 }, { "epoch": 0.11, "eval_accuracy": 0.41567656441304324, "eval_loss": 3.181196928024292, "eval_runtime": 38.8685, "eval_samples_per_second": 288.563, "eval_steps_per_second": 2.418, "step": 13000 }, { "epoch": 0.11, "learning_rate": 0.00026765147008303515, "loss": 3.1659, "step": 13100 }, { "epoch": 0.11, "learning_rate": 0.00026740010222292977, "loss": 3.1528, "step": 13200 }, { "epoch": 0.11, "learning_rate": 0.00026714873436282433, "loss": 3.1656, "step": 13300 }, { "epoch": 0.11, "learning_rate": 0.00026689736650271895, "loss": 3.1594, "step": 13400 }, { "epoch": 0.11, "learning_rate": 0.0002666459986426135, "loss": 3.1593, "step": 13500 }, { "epoch": 0.11, "learning_rate": 0.00026639463078250813, "loss": 3.1579, "step": 13600 }, { "epoch": 0.11, "learning_rate": 0.0002661432629224027, "loss": 3.1599, "step": 13700 }, { "epoch": 0.12, "learning_rate": 0.0002658918950622973, "loss": 3.1529, "step": 13800 }, { "epoch": 0.12, "learning_rate": 0.0002656405272021919, "loss": 3.1615, "step": 13900 }, { "epoch": 0.12, "learning_rate": 0.0002653891593420865, "loss": 3.1628, "step": 14000 }, { "epoch": 0.12, "eval_accuracy": 0.41594754961977826, "eval_loss": 3.1771674156188965, "eval_runtime": 37.2285, "eval_samples_per_second": 301.275, "eval_steps_per_second": 2.525, "step": 14000 }, { "epoch": 0.12, "learning_rate": 0.00026513779148198106, "loss": 3.1594, "step": 14100 }, { "epoch": 0.12, "learning_rate": 0.0002648864236218757, "loss": 3.158, "step": 14200 }, { "epoch": 0.12, "learning_rate": 0.0002646400831189724, "loss": 3.1588, "step": 14300 }, { "epoch": 0.12, "learning_rate": 0.000264388715258867, "loss": 3.1606, "step": 14400 }, { "epoch": 0.12, "learning_rate": 0.00026413734739876157, "loss": 3.1555, "step": 14500 }, { "epoch": 0.12, "learning_rate": 0.0002638859795386562, "loss": 3.1574, "step": 14600 }, { "epoch": 0.12, "learning_rate": 0.00026363461167855075, "loss": 3.1526, "step": 14700 }, { "epoch": 0.12, "learning_rate": 0.00026338324381844537, "loss": 3.1457, "step": 14800 }, { "epoch": 0.12, "learning_rate": 0.00026313187595833993, "loss": 3.1655, "step": 14900 }, { "epoch": 0.13, "learning_rate": 0.0002628805080982345, "loss": 3.1658, "step": 15000 }, { "epoch": 0.13, "eval_accuracy": 0.416945842272569, "eval_loss": 3.170196294784546, "eval_runtime": 38.2091, "eval_samples_per_second": 293.542, "eval_steps_per_second": 2.46, "step": 15000 }, { "epoch": 0.13, "learning_rate": 0.0002626291402381291, "loss": 3.1537, "step": 15100 }, { "epoch": 0.13, "learning_rate": 0.00026237777237802373, "loss": 3.1596, "step": 15200 }, { "epoch": 0.13, "learning_rate": 0.00026212891819651934, "loss": 3.1558, "step": 15300 }, { "epoch": 0.13, "learning_rate": 0.00026187755033641396, "loss": 3.1568, "step": 15400 }, { "epoch": 0.13, "learning_rate": 0.0002616261824763085, "loss": 3.1488, "step": 15500 }, { "epoch": 0.13, "learning_rate": 0.00026137481461620314, "loss": 3.1452, "step": 15600 }, { "epoch": 0.13, "learning_rate": 0.0002611234467560977, "loss": 3.1503, "step": 15700 }, { "epoch": 0.13, "learning_rate": 0.0002608720788959923, "loss": 3.1456, "step": 15800 }, { "epoch": 0.13, "learning_rate": 0.00026062071103588694, "loss": 3.1469, "step": 15900 }, { "epoch": 0.13, "learning_rate": 0.0002603693431757815, "loss": 3.1479, "step": 16000 }, { "epoch": 0.13, "eval_accuracy": 0.41732213261145495, "eval_loss": 3.1664865016937256, "eval_runtime": 36.9736, "eval_samples_per_second": 303.351, "eval_steps_per_second": 2.542, "step": 16000 }, { "epoch": 0.13, "learning_rate": 0.0002601179753156761, "loss": 3.152, "step": 16100 }, { "epoch": 0.14, "learning_rate": 0.00025986660745557074, "loss": 3.1515, "step": 16200 }, { "epoch": 0.14, "learning_rate": 0.0002596152395954653, "loss": 3.1403, "step": 16300 }, { "epoch": 0.14, "learning_rate": 0.0002593638717353599, "loss": 3.1482, "step": 16400 }, { "epoch": 0.14, "learning_rate": 0.0002591125038752545, "loss": 3.1384, "step": 16500 }, { "epoch": 0.14, "learning_rate": 0.0002588611360151491, "loss": 3.1423, "step": 16600 }, { "epoch": 0.14, "learning_rate": 0.0002586097681550437, "loss": 3.1388, "step": 16700 }, { "epoch": 0.14, "learning_rate": 0.00025835840029493824, "loss": 3.1502, "step": 16800 }, { "epoch": 0.14, "learning_rate": 0.00025810703243483286, "loss": 3.1423, "step": 16900 }, { "epoch": 0.14, "learning_rate": 0.0002578556645747274, "loss": 3.1401, "step": 17000 }, { "epoch": 0.14, "eval_accuracy": 0.4181765082503061, "eval_loss": 3.161729097366333, "eval_runtime": 36.3895, "eval_samples_per_second": 308.221, "eval_steps_per_second": 2.583, "step": 17000 }, { "epoch": 0.14, "learning_rate": 0.00025760429671462204, "loss": 3.1444, "step": 17100 }, { "epoch": 0.14, "learning_rate": 0.0002573529288545166, "loss": 3.1344, "step": 17200 }, { "epoch": 0.14, "learning_rate": 0.0002571015609944112, "loss": 3.1362, "step": 17300 }, { "epoch": 0.15, "learning_rate": 0.00025685019313430584, "loss": 3.1449, "step": 17400 }, { "epoch": 0.15, "learning_rate": 0.0002565988252742004, "loss": 3.1403, "step": 17500 }, { "epoch": 0.15, "learning_rate": 0.000256347457414095, "loss": 3.1485, "step": 17600 }, { "epoch": 0.15, "learning_rate": 0.00025609608955398964, "loss": 3.1465, "step": 17700 }, { "epoch": 0.15, "learning_rate": 0.0002558447216938842, "loss": 3.1388, "step": 17800 }, { "epoch": 0.15, "learning_rate": 0.0002555933538337788, "loss": 3.1412, "step": 17900 }, { "epoch": 0.15, "learning_rate": 0.0002553419859736734, "loss": 3.1386, "step": 18000 }, { "epoch": 0.15, "eval_accuracy": 0.4183225313668887, "eval_loss": 3.1586148738861084, "eval_runtime": 36.9298, "eval_samples_per_second": 303.711, "eval_steps_per_second": 2.545, "step": 18000 }, { "epoch": 0.15, "learning_rate": 0.000255090618113568, "loss": 3.1421, "step": 18100 }, { "epoch": 0.15, "learning_rate": 0.00025483925025346257, "loss": 3.1355, "step": 18200 }, { "epoch": 0.15, "learning_rate": 0.0002545878823933572, "loss": 3.1399, "step": 18300 }, { "epoch": 0.15, "learning_rate": 0.00025433651453325175, "loss": 3.1349, "step": 18400 }, { "epoch": 0.15, "learning_rate": 0.0002540851466731463, "loss": 3.1413, "step": 18500 }, { "epoch": 0.16, "learning_rate": 0.00025383377881304093, "loss": 3.1278, "step": 18600 }, { "epoch": 0.16, "learning_rate": 0.0002535824109529355, "loss": 3.1405, "step": 18700 }, { "epoch": 0.16, "learning_rate": 0.0002533310430928301, "loss": 3.1277, "step": 18800 }, { "epoch": 0.16, "learning_rate": 0.00025307967523272474, "loss": 3.1341, "step": 18900 }, { "epoch": 0.16, "learning_rate": 0.0002528283073726193, "loss": 3.1396, "step": 19000 }, { "epoch": 0.16, "eval_accuracy": 0.41871075628741844, "eval_loss": 3.1532347202301025, "eval_runtime": 36.5045, "eval_samples_per_second": 307.25, "eval_steps_per_second": 2.575, "step": 19000 }, { "epoch": 0.16, "learning_rate": 0.00025257945319111496, "loss": 3.1329, "step": 19100 }, { "epoch": 0.16, "learning_rate": 0.0002523280853310095, "loss": 3.1358, "step": 19200 }, { "epoch": 0.16, "learning_rate": 0.00025207671747090414, "loss": 3.1377, "step": 19300 }, { "epoch": 0.16, "learning_rate": 0.0002518253496107987, "loss": 3.1289, "step": 19400 }, { "epoch": 0.16, "learning_rate": 0.0002515739817506933, "loss": 3.1348, "step": 19500 }, { "epoch": 0.16, "learning_rate": 0.00025132261389058794, "loss": 3.1324, "step": 19600 }, { "epoch": 0.16, "learning_rate": 0.0002510712460304825, "loss": 3.136, "step": 19700 }, { "epoch": 0.17, "learning_rate": 0.0002508198781703771, "loss": 3.1337, "step": 19800 }, { "epoch": 0.17, "learning_rate": 0.00025056851031027175, "loss": 3.132, "step": 19900 }, { "epoch": 0.17, "learning_rate": 0.0002503171424501663, "loss": 3.1345, "step": 20000 }, { "epoch": 0.17, "eval_accuracy": 0.41896770080986667, "eval_loss": 3.150233268737793, "eval_runtime": 36.0939, "eval_samples_per_second": 310.745, "eval_steps_per_second": 2.604, "step": 20000 }, { "epoch": 0.17, "learning_rate": 0.0002500682882686619, "loss": 3.1301, "step": 20100 }, { "epoch": 0.17, "learning_rate": 0.00024981692040855653, "loss": 3.1261, "step": 20200 }, { "epoch": 0.17, "learning_rate": 0.00024956555254845115, "loss": 3.1279, "step": 20300 }, { "epoch": 0.17, "learning_rate": 0.0002493141846883457, "loss": 3.1235, "step": 20400 }, { "epoch": 0.17, "learning_rate": 0.00024906281682824034, "loss": 3.1302, "step": 20500 }, { "epoch": 0.17, "learning_rate": 0.0002488114489681349, "loss": 3.1287, "step": 20600 }, { "epoch": 0.17, "learning_rate": 0.0002485600811080295, "loss": 3.1314, "step": 20700 }, { "epoch": 0.17, "learning_rate": 0.0002483087132479241, "loss": 3.1226, "step": 20800 }, { "epoch": 0.17, "learning_rate": 0.0002480573453878187, "loss": 3.1289, "step": 20900 }, { "epoch": 0.18, "learning_rate": 0.00024780597752771327, "loss": 3.1319, "step": 21000 }, { "epoch": 0.18, "eval_accuracy": 0.41907721814730364, "eval_loss": 3.1475839614868164, "eval_runtime": 36.3645, "eval_samples_per_second": 308.432, "eval_steps_per_second": 2.585, "step": 21000 }, { "epoch": 0.18, "learning_rate": 0.0002475546096676079, "loss": 3.1304, "step": 21100 }, { "epoch": 0.18, "learning_rate": 0.00024730324180750245, "loss": 3.1309, "step": 21200 }, { "epoch": 0.18, "learning_rate": 0.00024705187394739707, "loss": 3.1254, "step": 21300 }, { "epoch": 0.18, "learning_rate": 0.00024680050608729163, "loss": 3.1293, "step": 21400 }, { "epoch": 0.18, "learning_rate": 0.00024654913822718625, "loss": 3.1278, "step": 21500 }, { "epoch": 0.18, "learning_rate": 0.0002462977703670808, "loss": 3.1216, "step": 21600 }, { "epoch": 0.18, "learning_rate": 0.00024604640250697543, "loss": 3.1281, "step": 21700 }, { "epoch": 0.18, "learning_rate": 0.0002457975483254711, "loss": 3.1182, "step": 21800 }, { "epoch": 0.18, "learning_rate": 0.00024554618046536566, "loss": 3.1231, "step": 21900 }, { "epoch": 0.18, "learning_rate": 0.0002452948126052603, "loss": 3.1238, "step": 22000 }, { "epoch": 0.18, "eval_accuracy": 0.42022504408774863, "eval_loss": 3.1434154510498047, "eval_runtime": 36.9095, "eval_samples_per_second": 303.878, "eval_steps_per_second": 2.547, "step": 22000 }, { "epoch": 0.18, "learning_rate": 0.00024504344474515484, "loss": 3.1249, "step": 22100 }, { "epoch": 0.19, "learning_rate": 0.00024479207688504946, "loss": 3.1316, "step": 22200 }, { "epoch": 0.19, "learning_rate": 0.000244540709024944, "loss": 3.1152, "step": 22300 }, { "epoch": 0.19, "learning_rate": 0.00024428934116483864, "loss": 3.1204, "step": 22400 }, { "epoch": 0.19, "learning_rate": 0.00024403797330473323, "loss": 3.1237, "step": 22500 }, { "epoch": 0.19, "learning_rate": 0.00024378660544462782, "loss": 3.1256, "step": 22600 }, { "epoch": 0.19, "learning_rate": 0.00024353523758452241, "loss": 3.1272, "step": 22700 }, { "epoch": 0.19, "learning_rate": 0.000243283869724417, "loss": 3.12, "step": 22800 }, { "epoch": 0.19, "learning_rate": 0.00024303250186431162, "loss": 3.1182, "step": 22900 }, { "epoch": 0.19, "learning_rate": 0.0002427811340042062, "loss": 3.1224, "step": 23000 }, { "epoch": 0.19, "eval_accuracy": 0.42017309355588756, "eval_loss": 3.1407454013824463, "eval_runtime": 36.6142, "eval_samples_per_second": 306.329, "eval_steps_per_second": 2.567, "step": 23000 }, { "epoch": 0.19, "learning_rate": 0.0002425297661441008, "loss": 3.1174, "step": 23100 }, { "epoch": 0.19, "learning_rate": 0.00024227839828399537, "loss": 3.1199, "step": 23200 }, { "epoch": 0.19, "learning_rate": 0.00024202703042389, "loss": 3.1147, "step": 23300 }, { "epoch": 0.2, "learning_rate": 0.00024177566256378458, "loss": 3.1201, "step": 23400 }, { "epoch": 0.2, "learning_rate": 0.00024152429470367917, "loss": 3.1231, "step": 23500 }, { "epoch": 0.2, "learning_rate": 0.00024127544052217483, "loss": 3.1172, "step": 23600 }, { "epoch": 0.2, "learning_rate": 0.0002410240726620694, "loss": 3.1176, "step": 23700 }, { "epoch": 0.2, "learning_rate": 0.00024077521848056506, "loss": 3.1119, "step": 23800 }, { "epoch": 0.2, "learning_rate": 0.00024052385062045965, "loss": 3.1212, "step": 23900 }, { "epoch": 0.2, "learning_rate": 0.00024027248276035424, "loss": 3.1183, "step": 24000 }, { "epoch": 0.2, "eval_accuracy": 0.4208589809832972, "eval_loss": 3.137460947036743, "eval_runtime": 37.115, "eval_samples_per_second": 302.196, "eval_steps_per_second": 2.533, "step": 24000 }, { "epoch": 0.2, "learning_rate": 0.00024002111490024883, "loss": 3.1287, "step": 24100 }, { "epoch": 0.2, "learning_rate": 0.00023976974704014342, "loss": 3.1157, "step": 24200 }, { "epoch": 0.2, "learning_rate": 0.00023951837918003804, "loss": 3.1199, "step": 24300 }, { "epoch": 0.2, "learning_rate": 0.0002392670113199326, "loss": 3.1162, "step": 24400 }, { "epoch": 0.2, "learning_rate": 0.00023901564345982722, "loss": 3.1179, "step": 24500 }, { "epoch": 0.21, "learning_rate": 0.0002387642755997218, "loss": 3.1214, "step": 24600 }, { "epoch": 0.21, "learning_rate": 0.00023851290773961638, "loss": 3.1138, "step": 24700 }, { "epoch": 0.21, "learning_rate": 0.000238261539879511, "loss": 3.1117, "step": 24800 }, { "epoch": 0.21, "learning_rate": 0.00023801017201940556, "loss": 3.1117, "step": 24900 }, { "epoch": 0.21, "learning_rate": 0.00023775880415930018, "loss": 3.1131, "step": 25000 }, { "epoch": 0.21, "eval_accuracy": 0.4210050040998798, "eval_loss": 3.1347129344940186, "eval_runtime": 36.8178, "eval_samples_per_second": 304.635, "eval_steps_per_second": 2.553, "step": 25000 }, { "epoch": 0.21, "learning_rate": 0.00023750743629919474, "loss": 3.118, "step": 25100 }, { "epoch": 0.21, "learning_rate": 0.00023725606843908936, "loss": 3.1158, "step": 25200 }, { "epoch": 0.21, "learning_rate": 0.00023700470057898395, "loss": 3.1178, "step": 25300 }, { "epoch": 0.21, "learning_rate": 0.00023675333271887855, "loss": 3.1019, "step": 25400 }, { "epoch": 0.21, "learning_rate": 0.00023650196485877314, "loss": 3.1105, "step": 25500 }, { "epoch": 0.21, "learning_rate": 0.00023625059699866773, "loss": 3.1158, "step": 25600 }, { "epoch": 0.21, "learning_rate": 0.00023599922913856232, "loss": 3.1166, "step": 25700 }, { "epoch": 0.22, "learning_rate": 0.00023574786127845694, "loss": 3.1172, "step": 25800 }, { "epoch": 0.22, "learning_rate": 0.0002354964934183515, "loss": 3.1233, "step": 25900 }, { "epoch": 0.22, "learning_rate": 0.00023524763923684716, "loss": 3.1106, "step": 26000 }, { "epoch": 0.22, "eval_accuracy": 0.42156382333449405, "eval_loss": 3.131035566329956, "eval_runtime": 36.1307, "eval_samples_per_second": 310.428, "eval_steps_per_second": 2.602, "step": 26000 }, { "epoch": 0.22, "learning_rate": 0.00023499627137674175, "loss": 3.1186, "step": 26100 }, { "epoch": 0.22, "learning_rate": 0.00023474490351663635, "loss": 3.1069, "step": 26200 }, { "epoch": 0.22, "learning_rate": 0.00023449353565653094, "loss": 3.114, "step": 26300 }, { "epoch": 0.22, "learning_rate": 0.00023424216779642553, "loss": 3.114, "step": 26400 }, { "epoch": 0.22, "learning_rate": 0.0002339907999363201, "loss": 3.1072, "step": 26500 }, { "epoch": 0.22, "learning_rate": 0.0002337394320762147, "loss": 3.1141, "step": 26600 }, { "epoch": 0.22, "learning_rate": 0.0002334880642161093, "loss": 3.1125, "step": 26700 }, { "epoch": 0.22, "learning_rate": 0.0002332366963560039, "loss": 3.1202, "step": 26800 }, { "epoch": 0.23, "learning_rate": 0.00023298532849589849, "loss": 3.1177, "step": 26900 }, { "epoch": 0.23, "learning_rate": 0.0002327339606357931, "loss": 3.114, "step": 27000 }, { "epoch": 0.23, "eval_accuracy": 0.42156241926606536, "eval_loss": 3.129709482192993, "eval_runtime": 36.5135, "eval_samples_per_second": 307.174, "eval_steps_per_second": 2.574, "step": 27000 }, { "epoch": 0.23, "learning_rate": 0.00023248259277568767, "loss": 3.1107, "step": 27100 }, { "epoch": 0.23, "learning_rate": 0.00023223122491558229, "loss": 3.1111, "step": 27200 }, { "epoch": 0.23, "learning_rate": 0.00023197985705547685, "loss": 3.106, "step": 27300 }, { "epoch": 0.23, "learning_rate": 0.00023172848919537147, "loss": 3.1081, "step": 27400 }, { "epoch": 0.23, "learning_rate": 0.00023147712133526606, "loss": 3.1077, "step": 27500 }, { "epoch": 0.23, "learning_rate": 0.00023122575347516065, "loss": 3.116, "step": 27600 }, { "epoch": 0.23, "learning_rate": 0.00023097438561505524, "loss": 3.1168, "step": 27700 }, { "epoch": 0.23, "learning_rate": 0.00023072301775494983, "loss": 3.1137, "step": 27800 }, { "epoch": 0.23, "learning_rate": 0.00023047164989484442, "loss": 3.1065, "step": 27900 }, { "epoch": 0.23, "learning_rate": 0.00023022028203473904, "loss": 3.1083, "step": 28000 }, { "epoch": 0.23, "eval_accuracy": 0.42211211205589316, "eval_loss": 3.1262805461883545, "eval_runtime": 36.106, "eval_samples_per_second": 310.641, "eval_steps_per_second": 2.603, "step": 28000 }, { "epoch": 0.24, "learning_rate": 0.0002299689141746336, "loss": 3.1193, "step": 28100 }, { "epoch": 0.24, "learning_rate": 0.00022972257367173034, "loss": 3.0997, "step": 28200 }, { "epoch": 0.24, "learning_rate": 0.0002294712058116249, "loss": 3.1013, "step": 28300 }, { "epoch": 0.24, "learning_rate": 0.00022921983795151952, "loss": 3.1049, "step": 28400 }, { "epoch": 0.24, "learning_rate": 0.00022896847009141409, "loss": 3.1152, "step": 28500 }, { "epoch": 0.24, "learning_rate": 0.00022871710223130868, "loss": 3.1077, "step": 28600 }, { "epoch": 0.24, "learning_rate": 0.00022846573437120327, "loss": 3.1146, "step": 28700 }, { "epoch": 0.24, "learning_rate": 0.00022821436651109786, "loss": 3.1054, "step": 28800 }, { "epoch": 0.24, "learning_rate": 0.00022796299865099248, "loss": 3.1087, "step": 28900 }, { "epoch": 0.24, "learning_rate": 0.00022771163079088704, "loss": 3.1045, "step": 29000 }, { "epoch": 0.24, "eval_accuracy": 0.4221415974928954, "eval_loss": 3.124873161315918, "eval_runtime": 37.1734, "eval_samples_per_second": 301.721, "eval_steps_per_second": 2.529, "step": 29000 }, { "epoch": 0.24, "learning_rate": 0.0002274627766093827, "loss": 3.1024, "step": 29100 }, { "epoch": 0.24, "learning_rate": 0.0002272114087492773, "loss": 3.0938, "step": 29200 }, { "epoch": 0.25, "learning_rate": 0.00022696004088917189, "loss": 3.1049, "step": 29300 }, { "epoch": 0.25, "learning_rate": 0.00022670867302906648, "loss": 3.109, "step": 29400 }, { "epoch": 0.25, "learning_rate": 0.00022645730516896107, "loss": 3.1033, "step": 29500 }, { "epoch": 0.25, "learning_rate": 0.0002262059373088557, "loss": 3.1066, "step": 29600 }, { "epoch": 0.25, "learning_rate": 0.00022595456944875025, "loss": 3.1087, "step": 29700 }, { "epoch": 0.25, "learning_rate": 0.00022570320158864487, "loss": 3.101, "step": 29800 }, { "epoch": 0.25, "learning_rate": 0.00022545183372853943, "loss": 3.1137, "step": 29900 }, { "epoch": 0.25, "learning_rate": 0.00022520046586843405, "loss": 3.1084, "step": 30000 }, { "epoch": 0.25, "eval_accuracy": 0.4223374650386961, "eval_loss": 3.1216838359832764, "eval_runtime": 39.4599, "eval_samples_per_second": 284.238, "eval_steps_per_second": 2.382, "step": 30000 }, { "epoch": 0.25, "learning_rate": 0.00022494909800832864, "loss": 3.1006, "step": 30100 }, { "epoch": 0.25, "learning_rate": 0.00022469773014822323, "loss": 3.1045, "step": 30200 }, { "epoch": 0.25, "learning_rate": 0.00022444636228811783, "loss": 3.1001, "step": 30300 }, { "epoch": 0.25, "learning_rate": 0.00022419499442801244, "loss": 3.0988, "step": 30400 }, { "epoch": 0.26, "learning_rate": 0.000223943626567907, "loss": 3.0981, "step": 30500 }, { "epoch": 0.26, "learning_rate": 0.00022369477238640264, "loss": 3.1027, "step": 30600 }, { "epoch": 0.26, "learning_rate": 0.00022344340452629723, "loss": 3.1046, "step": 30700 }, { "epoch": 0.26, "learning_rate": 0.00022319203666619185, "loss": 3.1025, "step": 30800 }, { "epoch": 0.26, "learning_rate": 0.00022294066880608642, "loss": 3.1025, "step": 30900 }, { "epoch": 0.26, "learning_rate": 0.00022268930094598103, "loss": 3.097, "step": 31000 }, { "epoch": 0.26, "eval_accuracy": 0.42269550248800924, "eval_loss": 3.1202731132507324, "eval_runtime": 36.6594, "eval_samples_per_second": 305.952, "eval_steps_per_second": 2.564, "step": 31000 }, { "epoch": 0.26, "learning_rate": 0.0002224379330858756, "loss": 3.104, "step": 31100 }, { "epoch": 0.26, "learning_rate": 0.00022218656522577022, "loss": 3.0977, "step": 31200 }, { "epoch": 0.26, "learning_rate": 0.0002219351973656648, "loss": 3.1121, "step": 31300 }, { "epoch": 0.26, "learning_rate": 0.0002216838295055594, "loss": 3.1011, "step": 31400 }, { "epoch": 0.26, "learning_rate": 0.000221432461645454, "loss": 3.0963, "step": 31500 }, { "epoch": 0.26, "learning_rate": 0.00022118109378534858, "loss": 3.1082, "step": 31600 }, { "epoch": 0.27, "learning_rate": 0.00022092972592524317, "loss": 3.0994, "step": 31700 }, { "epoch": 0.27, "learning_rate": 0.0002206783580651378, "loss": 3.0957, "step": 31800 }, { "epoch": 0.27, "learning_rate": 0.00022042699020503236, "loss": 3.0947, "step": 31900 }, { "epoch": 0.27, "learning_rate": 0.00022017562234492697, "loss": 3.0926, "step": 32000 }, { "epoch": 0.27, "eval_accuracy": 0.42268707807743716, "eval_loss": 3.119593381881714, "eval_runtime": 37.8215, "eval_samples_per_second": 296.551, "eval_steps_per_second": 2.485, "step": 32000 }, { "epoch": 0.27, "learning_rate": 0.00021992425448482154, "loss": 3.0955, "step": 32100 }, { "epoch": 0.27, "learning_rate": 0.00021967288662471616, "loss": 3.0973, "step": 32200 }, { "epoch": 0.27, "learning_rate": 0.00021942151876461075, "loss": 3.1098, "step": 32300 }, { "epoch": 0.27, "learning_rate": 0.0002191701509045053, "loss": 3.1007, "step": 32400 }, { "epoch": 0.27, "learning_rate": 0.00021891878304439993, "loss": 3.0992, "step": 32500 }, { "epoch": 0.27, "learning_rate": 0.0002186674151842945, "loss": 3.1029, "step": 32600 }, { "epoch": 0.27, "learning_rate": 0.00021841604732418911, "loss": 3.0947, "step": 32700 }, { "epoch": 0.27, "learning_rate": 0.00021816719314268475, "loss": 3.0941, "step": 32800 }, { "epoch": 0.28, "learning_rate": 0.00021791582528257934, "loss": 3.1004, "step": 32900 }, { "epoch": 0.28, "learning_rate": 0.00021766445742247396, "loss": 3.1003, "step": 33000 }, { "epoch": 0.28, "eval_accuracy": 0.4228331011940198, "eval_loss": 3.1163218021392822, "eval_runtime": 37.158, "eval_samples_per_second": 301.846, "eval_steps_per_second": 2.53, "step": 33000 }, { "epoch": 0.28, "learning_rate": 0.00021741308956236852, "loss": 3.0986, "step": 33100 }, { "epoch": 0.28, "learning_rate": 0.00021716172170226314, "loss": 3.0999, "step": 33200 }, { "epoch": 0.28, "learning_rate": 0.0002169103538421577, "loss": 3.0994, "step": 33300 }, { "epoch": 0.28, "learning_rate": 0.00021665898598205232, "loss": 3.0976, "step": 33400 }, { "epoch": 0.28, "learning_rate": 0.00021640761812194691, "loss": 3.0949, "step": 33500 }, { "epoch": 0.28, "learning_rate": 0.0002161562502618415, "loss": 3.0923, "step": 33600 }, { "epoch": 0.28, "learning_rate": 0.0002159048824017361, "loss": 3.0909, "step": 33700 }, { "epoch": 0.28, "learning_rate": 0.0002156535145416307, "loss": 3.0944, "step": 33800 }, { "epoch": 0.28, "learning_rate": 0.00021540214668152528, "loss": 3.0997, "step": 33900 }, { "epoch": 0.28, "learning_rate": 0.0002151507788214199, "loss": 3.097, "step": 34000 }, { "epoch": 0.28, "eval_accuracy": 0.4235625147427185, "eval_loss": 3.1130168437957764, "eval_runtime": 36.3501, "eval_samples_per_second": 308.555, "eval_steps_per_second": 2.586, "step": 34000 }, { "epoch": 0.29, "learning_rate": 0.00021489941096131446, "loss": 3.0878, "step": 34100 }, { "epoch": 0.29, "learning_rate": 0.00021464804310120905, "loss": 3.094, "step": 34200 }, { "epoch": 0.29, "learning_rate": 0.00021439667524110364, "loss": 3.0976, "step": 34300 }, { "epoch": 0.29, "learning_rate": 0.00021414530738099824, "loss": 3.0959, "step": 34400 }, { "epoch": 0.29, "learning_rate": 0.00021389393952089285, "loss": 3.098, "step": 34500 }, { "epoch": 0.29, "learning_rate": 0.00021364257166078742, "loss": 3.0891, "step": 34600 }, { "epoch": 0.29, "learning_rate": 0.00021339120380068204, "loss": 3.0881, "step": 34700 }, { "epoch": 0.29, "learning_rate": 0.0002131398359405766, "loss": 3.0934, "step": 34800 }, { "epoch": 0.29, "learning_rate": 0.00021288846808047122, "loss": 3.0997, "step": 34900 }, { "epoch": 0.29, "learning_rate": 0.00021263961389896685, "loss": 3.0934, "step": 35000 }, { "epoch": 0.29, "eval_accuracy": 0.4233083783571276, "eval_loss": 3.112696886062622, "eval_runtime": 36.2826, "eval_samples_per_second": 309.129, "eval_steps_per_second": 2.591, "step": 35000 }, { "epoch": 0.29, "learning_rate": 0.00021238824603886144, "loss": 3.0886, "step": 35100 }, { "epoch": 0.29, "learning_rate": 0.00021213687817875606, "loss": 3.0891, "step": 35200 }, { "epoch": 0.3, "learning_rate": 0.00021188551031865063, "loss": 3.0952, "step": 35300 }, { "epoch": 0.3, "learning_rate": 0.00021163414245854525, "loss": 3.0869, "step": 35400 }, { "epoch": 0.3, "learning_rate": 0.0002113827745984398, "loss": 3.0905, "step": 35500 }, { "epoch": 0.3, "learning_rate": 0.00021113140673833443, "loss": 3.0939, "step": 35600 }, { "epoch": 0.3, "learning_rate": 0.00021088255255683006, "loss": 3.0958, "step": 35700 }, { "epoch": 0.3, "learning_rate": 0.00021063118469672465, "loss": 3.0882, "step": 35800 }, { "epoch": 0.3, "learning_rate": 0.00021037981683661927, "loss": 3.0852, "step": 35900 }, { "epoch": 0.3, "learning_rate": 0.00021012844897651384, "loss": 3.0957, "step": 36000 }, { "epoch": 0.3, "eval_accuracy": 0.4237239826120166, "eval_loss": 3.110541820526123, "eval_runtime": 37.0216, "eval_samples_per_second": 302.958, "eval_steps_per_second": 2.539, "step": 36000 }, { "epoch": 0.3, "learning_rate": 0.00020987708111640845, "loss": 3.0968, "step": 36100 }, { "epoch": 0.3, "learning_rate": 0.00020962571325630302, "loss": 3.0909, "step": 36200 }, { "epoch": 0.3, "learning_rate": 0.00020937434539619764, "loss": 3.0826, "step": 36300 }, { "epoch": 0.3, "learning_rate": 0.00020912297753609223, "loss": 3.086, "step": 36400 }, { "epoch": 0.31, "learning_rate": 0.0002088716096759868, "loss": 3.091, "step": 36500 }, { "epoch": 0.31, "learning_rate": 0.00020862275549448245, "loss": 3.0865, "step": 36600 }, { "epoch": 0.31, "learning_rate": 0.00020837138763437704, "loss": 3.092, "step": 36700 }, { "epoch": 0.31, "learning_rate": 0.00020812001977427164, "loss": 3.0916, "step": 36800 }, { "epoch": 0.31, "learning_rate": 0.00020786865191416623, "loss": 3.0924, "step": 36900 }, { "epoch": 0.31, "learning_rate": 0.00020761728405406082, "loss": 3.0915, "step": 37000 }, { "epoch": 0.31, "eval_accuracy": 0.42398513933975085, "eval_loss": 3.10992169380188, "eval_runtime": 36.5153, "eval_samples_per_second": 307.159, "eval_steps_per_second": 2.574, "step": 37000 }, { "epoch": 0.31, "learning_rate": 0.00020736591619395544, "loss": 3.0841, "step": 37100 }, { "epoch": 0.31, "learning_rate": 0.00020711454833385, "loss": 3.088, "step": 37200 }, { "epoch": 0.31, "learning_rate": 0.00020686318047374462, "loss": 3.0941, "step": 37300 }, { "epoch": 0.31, "learning_rate": 0.00020661181261363918, "loss": 3.0898, "step": 37400 }, { "epoch": 0.31, "learning_rate": 0.0002063604447535338, "loss": 3.0885, "step": 37500 }, { "epoch": 0.31, "learning_rate": 0.0002061090768934284, "loss": 3.0918, "step": 37600 }, { "epoch": 0.32, "learning_rate": 0.00020585770903332298, "loss": 3.0962, "step": 37700 }, { "epoch": 0.32, "learning_rate": 0.00020560634117321758, "loss": 3.096, "step": 37800 }, { "epoch": 0.32, "learning_rate": 0.0002053549733131122, "loss": 3.0846, "step": 37900 }, { "epoch": 0.32, "learning_rate": 0.00020510611913160783, "loss": 3.0908, "step": 38000 }, { "epoch": 0.32, "eval_accuracy": 0.4245425545059364, "eval_loss": 3.1069419384002686, "eval_runtime": 37.2669, "eval_samples_per_second": 300.964, "eval_steps_per_second": 2.522, "step": 38000 }, { "epoch": 0.32, "learning_rate": 0.0002048547512715024, "loss": 3.0851, "step": 38100 }, { "epoch": 0.32, "learning_rate": 0.000204603383411397, "loss": 3.0859, "step": 38200 }, { "epoch": 0.32, "learning_rate": 0.0002043520155512916, "loss": 3.0877, "step": 38300 }, { "epoch": 0.32, "learning_rate": 0.0002041006476911862, "loss": 3.08, "step": 38400 }, { "epoch": 0.32, "learning_rate": 0.00020384927983108079, "loss": 3.0872, "step": 38500 }, { "epoch": 0.32, "learning_rate": 0.00020359791197097535, "loss": 3.0934, "step": 38600 }, { "epoch": 0.32, "learning_rate": 0.00020334654411086997, "loss": 3.0898, "step": 38700 }, { "epoch": 0.32, "learning_rate": 0.00020309517625076456, "loss": 3.091, "step": 38800 }, { "epoch": 0.33, "learning_rate": 0.00020284380839065915, "loss": 3.0903, "step": 38900 }, { "epoch": 0.33, "learning_rate": 0.00020259244053055374, "loss": 3.0764, "step": 39000 }, { "epoch": 0.33, "eval_accuracy": 0.42455589315600883, "eval_loss": 3.104147434234619, "eval_runtime": 36.3216, "eval_samples_per_second": 308.797, "eval_steps_per_second": 2.588, "step": 39000 }, { "epoch": 0.33, "learning_rate": 0.00020234107267044833, "loss": 3.0781, "step": 39100 }, { "epoch": 0.33, "learning_rate": 0.00020208970481034292, "loss": 3.0805, "step": 39200 }, { "epoch": 0.33, "learning_rate": 0.00020183833695023754, "loss": 3.0861, "step": 39300 }, { "epoch": 0.33, "learning_rate": 0.0002015869690901321, "loss": 3.0906, "step": 39400 }, { "epoch": 0.33, "learning_rate": 0.00020133560123002672, "loss": 3.0837, "step": 39500 }, { "epoch": 0.33, "learning_rate": 0.0002010842333699213, "loss": 3.0827, "step": 39600 }, { "epoch": 0.33, "learning_rate": 0.0002008328655098159, "loss": 3.082, "step": 39700 }, { "epoch": 0.33, "learning_rate": 0.0002005814976497105, "loss": 3.0838, "step": 39800 }, { "epoch": 0.33, "learning_rate": 0.0002003301297896051, "loss": 3.0834, "step": 39900 }, { "epoch": 0.33, "learning_rate": 0.00020007876192949968, "loss": 3.0855, "step": 40000 }, { "epoch": 0.33, "eval_accuracy": 0.42506837813247667, "eval_loss": 3.1023147106170654, "eval_runtime": 36.3302, "eval_samples_per_second": 308.724, "eval_steps_per_second": 2.587, "step": 40000 }, { "epoch": 0.34, "learning_rate": 0.00019982990774799532, "loss": 3.0823, "step": 40100 }, { "epoch": 0.34, "learning_rate": 0.00019957853988788993, "loss": 3.0877, "step": 40200 }, { "epoch": 0.34, "learning_rate": 0.0001993271720277845, "loss": 3.0891, "step": 40300 }, { "epoch": 0.34, "learning_rate": 0.0001990758041676791, "loss": 3.0847, "step": 40400 }, { "epoch": 0.34, "learning_rate": 0.0001988244363075737, "loss": 3.0769, "step": 40500 }, { "epoch": 0.34, "learning_rate": 0.00019857306844746827, "loss": 3.0842, "step": 40600 }, { "epoch": 0.34, "learning_rate": 0.0001983217005873629, "loss": 3.0771, "step": 40700 }, { "epoch": 0.34, "learning_rate": 0.00019807033272725745, "loss": 3.0878, "step": 40800 }, { "epoch": 0.34, "learning_rate": 0.00019781896486715207, "loss": 3.0876, "step": 40900 }, { "epoch": 0.34, "learning_rate": 0.00019756759700704666, "loss": 3.0782, "step": 41000 }, { "epoch": 0.34, "eval_accuracy": 0.42481002954159974, "eval_loss": 3.100797414779663, "eval_runtime": 37.0564, "eval_samples_per_second": 302.674, "eval_steps_per_second": 2.537, "step": 41000 }, { "epoch": 0.34, "learning_rate": 0.00019731622914694126, "loss": 3.0788, "step": 41100 }, { "epoch": 0.34, "learning_rate": 0.00019706486128683585, "loss": 3.0811, "step": 41200 }, { "epoch": 0.35, "learning_rate": 0.00019681349342673044, "loss": 3.0799, "step": 41300 }, { "epoch": 0.35, "learning_rate": 0.00019656212556662503, "loss": 3.0737, "step": 41400 }, { "epoch": 0.35, "learning_rate": 0.00019631075770651965, "loss": 3.0815, "step": 41500 }, { "epoch": 0.35, "learning_rate": 0.0001960593898464142, "loss": 3.0885, "step": 41600 }, { "epoch": 0.35, "learning_rate": 0.00019580802198630883, "loss": 3.0785, "step": 41700 }, { "epoch": 0.35, "learning_rate": 0.0001955566541262034, "loss": 3.0738, "step": 41800 }, { "epoch": 0.35, "learning_rate": 0.000195305286266098, "loss": 3.0826, "step": 41900 }, { "epoch": 0.35, "learning_rate": 0.0001950539184059926, "loss": 3.0821, "step": 42000 }, { "epoch": 0.35, "eval_accuracy": 0.4254973210374381, "eval_loss": 3.0979230403900146, "eval_runtime": 36.8694, "eval_samples_per_second": 304.209, "eval_steps_per_second": 2.55, "step": 42000 }, { "epoch": 0.35, "learning_rate": 0.00019480255054588717, "loss": 3.0689, "step": 42100 }, { "epoch": 0.35, "learning_rate": 0.00019455369636438286, "loss": 3.0767, "step": 42200 }, { "epoch": 0.35, "learning_rate": 0.0001943048421828785, "loss": 3.0768, "step": 42300 }, { "epoch": 0.35, "learning_rate": 0.00019405598800137415, "loss": 3.0746, "step": 42400 }, { "epoch": 0.36, "learning_rate": 0.00019380462014126872, "loss": 3.0812, "step": 42500 }, { "epoch": 0.36, "learning_rate": 0.00019355325228116333, "loss": 3.0721, "step": 42600 }, { "epoch": 0.36, "learning_rate": 0.0001933018844210579, "loss": 3.0701, "step": 42700 }, { "epoch": 0.36, "learning_rate": 0.0001930505165609525, "loss": 3.0769, "step": 42800 }, { "epoch": 0.36, "learning_rate": 0.00019279914870084708, "loss": 3.0827, "step": 42900 }, { "epoch": 0.36, "learning_rate": 0.00019254778084074167, "loss": 3.075, "step": 43000 }, { "epoch": 0.36, "eval_accuracy": 0.425449582710863, "eval_loss": 3.0971269607543945, "eval_runtime": 36.1836, "eval_samples_per_second": 309.975, "eval_steps_per_second": 2.598, "step": 43000 }, { "epoch": 0.36, "learning_rate": 0.0001922964129806363, "loss": 3.0742, "step": 43100 }, { "epoch": 0.36, "learning_rate": 0.00019204504512053086, "loss": 3.0804, "step": 43200 }, { "epoch": 0.36, "learning_rate": 0.00019179367726042547, "loss": 3.0788, "step": 43300 }, { "epoch": 0.36, "learning_rate": 0.00019154230940032004, "loss": 3.078, "step": 43400 }, { "epoch": 0.36, "learning_rate": 0.00019129094154021466, "loss": 3.0729, "step": 43500 }, { "epoch": 0.36, "learning_rate": 0.00019103957368010925, "loss": 3.0704, "step": 43600 }, { "epoch": 0.37, "learning_rate": 0.00019078820582000384, "loss": 3.0793, "step": 43700 }, { "epoch": 0.37, "learning_rate": 0.00019053683795989843, "loss": 3.0789, "step": 43800 }, { "epoch": 0.37, "learning_rate": 0.00019028547009979305, "loss": 3.0835, "step": 43900 }, { "epoch": 0.37, "learning_rate": 0.00019003661591828868, "loss": 3.0794, "step": 44000 }, { "epoch": 0.37, "eval_accuracy": 0.4256580868725218, "eval_loss": 3.0950751304626465, "eval_runtime": 36.1829, "eval_samples_per_second": 309.98, "eval_steps_per_second": 2.598, "step": 44000 }, { "epoch": 0.37, "learning_rate": 0.00018978524805818325, "loss": 3.0746, "step": 44100 }, { "epoch": 0.37, "learning_rate": 0.00018953388019807787, "loss": 3.0778, "step": 44200 }, { "epoch": 0.37, "learning_rate": 0.00018928251233797246, "loss": 3.0743, "step": 44300 }, { "epoch": 0.37, "learning_rate": 0.00018903114447786705, "loss": 3.0822, "step": 44400 }, { "epoch": 0.37, "learning_rate": 0.00018877977661776164, "loss": 3.0782, "step": 44500 }, { "epoch": 0.37, "learning_rate": 0.0001885284087576562, "loss": 3.0705, "step": 44600 }, { "epoch": 0.37, "learning_rate": 0.00018827704089755082, "loss": 3.0737, "step": 44700 }, { "epoch": 0.37, "learning_rate": 0.0001880256730374454, "loss": 3.0736, "step": 44800 }, { "epoch": 0.38, "learning_rate": 0.00018777681885594105, "loss": 3.0712, "step": 44900 }, { "epoch": 0.38, "learning_rate": 0.00018752545099583567, "loss": 3.0836, "step": 45000 }, { "epoch": 0.38, "eval_accuracy": 0.42573460860188483, "eval_loss": 3.0936806201934814, "eval_runtime": 36.1343, "eval_samples_per_second": 310.398, "eval_steps_per_second": 2.601, "step": 45000 }, { "epoch": 0.38, "learning_rate": 0.00018727408313573023, "loss": 3.0763, "step": 45100 }, { "epoch": 0.38, "learning_rate": 0.00018702271527562485, "loss": 3.0831, "step": 45200 }, { "epoch": 0.38, "learning_rate": 0.0001867713474155194, "loss": 3.0768, "step": 45300 }, { "epoch": 0.38, "learning_rate": 0.00018651997955541403, "loss": 3.0686, "step": 45400 }, { "epoch": 0.38, "learning_rate": 0.00018626861169530862, "loss": 3.0766, "step": 45500 }, { "epoch": 0.38, "learning_rate": 0.0001860172438352032, "loss": 3.0721, "step": 45600 }, { "epoch": 0.38, "learning_rate": 0.0001857658759750978, "loss": 3.0812, "step": 45700 }, { "epoch": 0.38, "learning_rate": 0.00018551450811499242, "loss": 3.0853, "step": 45800 }, { "epoch": 0.38, "learning_rate": 0.000185263140254887, "loss": 3.0753, "step": 45900 }, { "epoch": 0.38, "learning_rate": 0.0001850117723947816, "loss": 3.0744, "step": 46000 }, { "epoch": 0.38, "eval_accuracy": 0.42582517101553463, "eval_loss": 3.092123508453369, "eval_runtime": 36.2715, "eval_samples_per_second": 309.224, "eval_steps_per_second": 2.592, "step": 46000 }, { "epoch": 0.39, "learning_rate": 0.00018476040453467617, "loss": 3.077, "step": 46100 }, { "epoch": 0.39, "learning_rate": 0.0001845090366745708, "loss": 3.0822, "step": 46200 }, { "epoch": 0.39, "learning_rate": 0.00018425766881446535, "loss": 3.0791, "step": 46300 }, { "epoch": 0.39, "learning_rate": 0.00018400630095435997, "loss": 3.0776, "step": 46400 }, { "epoch": 0.39, "learning_rate": 0.00018375493309425456, "loss": 3.0781, "step": 46500 }, { "epoch": 0.39, "learning_rate": 0.00018350356523414913, "loss": 3.0756, "step": 46600 }, { "epoch": 0.39, "learning_rate": 0.00018325219737404374, "loss": 3.0739, "step": 46700 }, { "epoch": 0.39, "learning_rate": 0.0001830008295139383, "loss": 3.0697, "step": 46800 }, { "epoch": 0.39, "learning_rate": 0.00018274946165383293, "loss": 3.0747, "step": 46900 }, { "epoch": 0.39, "learning_rate": 0.00018249809379372752, "loss": 3.0692, "step": 47000 }, { "epoch": 0.39, "eval_accuracy": 0.42626464443371115, "eval_loss": 3.090735912322998, "eval_runtime": 36.0323, "eval_samples_per_second": 311.276, "eval_steps_per_second": 2.609, "step": 47000 }, { "epoch": 0.39, "learning_rate": 0.0001822467259336221, "loss": 3.0701, "step": 47100 }, { "epoch": 0.39, "learning_rate": 0.0001819953580735167, "loss": 3.0706, "step": 47200 }, { "epoch": 0.4, "learning_rate": 0.00018174399021341132, "loss": 3.0734, "step": 47300 }, { "epoch": 0.4, "learning_rate": 0.00018149262235330588, "loss": 3.0719, "step": 47400 }, { "epoch": 0.4, "learning_rate": 0.0001812412544932005, "loss": 3.07, "step": 47500 }, { "epoch": 0.4, "learning_rate": 0.00018098988663309507, "loss": 3.0743, "step": 47600 }, { "epoch": 0.4, "learning_rate": 0.00018074103245159073, "loss": 3.0768, "step": 47700 }, { "epoch": 0.4, "learning_rate": 0.00018048966459148532, "loss": 3.0598, "step": 47800 }, { "epoch": 0.4, "learning_rate": 0.0001802382967313799, "loss": 3.0653, "step": 47900 }, { "epoch": 0.4, "learning_rate": 0.00017998692887127453, "loss": 3.0717, "step": 48000 }, { "epoch": 0.4, "eval_accuracy": 0.42618812270434814, "eval_loss": 3.0900797843933105, "eval_runtime": 36.3, "eval_samples_per_second": 308.981, "eval_steps_per_second": 2.59, "step": 48000 }, { "epoch": 0.4, "learning_rate": 0.0001797355610111691, "loss": 3.0752, "step": 48100 }, { "epoch": 0.4, "learning_rate": 0.0001794841931510637, "loss": 3.0656, "step": 48200 }, { "epoch": 0.4, "learning_rate": 0.00017923282529095827, "loss": 3.0758, "step": 48300 }, { "epoch": 0.4, "learning_rate": 0.00017898145743085287, "loss": 3.0827, "step": 48400 }, { "epoch": 0.41, "learning_rate": 0.00017873008957074746, "loss": 3.068, "step": 48500 }, { "epoch": 0.41, "learning_rate": 0.00017847872171064205, "loss": 3.0645, "step": 48600 }, { "epoch": 0.41, "learning_rate": 0.00017822735385053667, "loss": 3.0752, "step": 48700 }, { "epoch": 0.41, "learning_rate": 0.00017797598599043123, "loss": 3.0726, "step": 48800 }, { "epoch": 0.41, "learning_rate": 0.00017772461813032585, "loss": 3.0736, "step": 48900 }, { "epoch": 0.41, "learning_rate": 0.00017747325027022041, "loss": 3.0697, "step": 49000 }, { "epoch": 0.41, "eval_accuracy": 0.42656862524852013, "eval_loss": 3.0877325534820557, "eval_runtime": 37.2501, "eval_samples_per_second": 301.1, "eval_steps_per_second": 2.523, "step": 49000 }, { "epoch": 0.41, "learning_rate": 0.00017722188241011503, "loss": 3.0779, "step": 49100 }, { "epoch": 0.41, "learning_rate": 0.00017697051455000962, "loss": 3.0736, "step": 49200 }, { "epoch": 0.41, "learning_rate": 0.00017671914668990421, "loss": 3.0657, "step": 49300 }, { "epoch": 0.41, "learning_rate": 0.0001764677788297988, "loss": 3.065, "step": 49400 }, { "epoch": 0.41, "learning_rate": 0.00017621641096969342, "loss": 3.0683, "step": 49500 }, { "epoch": 0.41, "learning_rate": 0.000175965043109588, "loss": 3.0656, "step": 49600 }, { "epoch": 0.42, "learning_rate": 0.0001757136752494826, "loss": 3.0714, "step": 49700 }, { "epoch": 0.42, "learning_rate": 0.00017546482106797824, "loss": 3.0804, "step": 49800 }, { "epoch": 0.42, "learning_rate": 0.00017521345320787283, "loss": 3.0636, "step": 49900 }, { "epoch": 0.42, "learning_rate": 0.00017496208534776742, "loss": 3.0689, "step": 50000 }, { "epoch": 0.42, "eval_accuracy": 0.426702713783459, "eval_loss": 3.0857808589935303, "eval_runtime": 36.1585, "eval_samples_per_second": 310.189, "eval_steps_per_second": 2.6, "step": 50000 }, { "epoch": 0.42, "learning_rate": 0.00017471071748766202, "loss": 3.0627, "step": 50100 }, { "epoch": 0.42, "learning_rate": 0.00017445934962755663, "loss": 3.0655, "step": 50200 }, { "epoch": 0.42, "learning_rate": 0.0001742079817674512, "loss": 3.0711, "step": 50300 }, { "epoch": 0.42, "learning_rate": 0.0001739566139073458, "loss": 3.0684, "step": 50400 }, { "epoch": 0.42, "learning_rate": 0.00017370775972584142, "loss": 3.066, "step": 50500 }, { "epoch": 0.42, "learning_rate": 0.00017345639186573604, "loss": 3.0587, "step": 50600 }, { "epoch": 0.42, "learning_rate": 0.0001732050240056306, "loss": 3.0705, "step": 50700 }, { "epoch": 0.42, "learning_rate": 0.00017295365614552522, "loss": 3.0652, "step": 50800 }, { "epoch": 0.43, "learning_rate": 0.0001727022882854198, "loss": 3.0718, "step": 50900 }, { "epoch": 0.43, "learning_rate": 0.0001724509204253144, "loss": 3.067, "step": 51000 }, { "epoch": 0.43, "eval_accuracy": 0.42674553787053365, "eval_loss": 3.08451247215271, "eval_runtime": 37.357, "eval_samples_per_second": 300.238, "eval_steps_per_second": 2.516, "step": 51000 }, { "epoch": 0.43, "learning_rate": 0.000172199552565209, "loss": 3.0652, "step": 51100 }, { "epoch": 0.43, "learning_rate": 0.0001719481847051036, "loss": 3.0697, "step": 51200 }, { "epoch": 0.43, "learning_rate": 0.00017169681684499818, "loss": 3.0699, "step": 51300 }, { "epoch": 0.43, "learning_rate": 0.0001714454489848928, "loss": 3.0656, "step": 51400 }, { "epoch": 0.43, "learning_rate": 0.00017119408112478736, "loss": 3.0579, "step": 51500 }, { "epoch": 0.43, "learning_rate": 0.00017094271326468198, "loss": 3.0586, "step": 51600 }, { "epoch": 0.43, "learning_rate": 0.00017069134540457655, "loss": 3.0725, "step": 51700 }, { "epoch": 0.43, "learning_rate": 0.00017043997754447116, "loss": 3.0713, "step": 51800 }, { "epoch": 0.43, "learning_rate": 0.00017018860968436573, "loss": 3.0674, "step": 51900 }, { "epoch": 0.43, "learning_rate": 0.00016993724182426035, "loss": 3.0635, "step": 52000 }, { "epoch": 0.43, "eval_accuracy": 0.4271583339885653, "eval_loss": 3.082775115966797, "eval_runtime": 36.4468, "eval_samples_per_second": 307.736, "eval_steps_per_second": 2.579, "step": 52000 }, { "epoch": 0.44, "learning_rate": 0.00016968587396415494, "loss": 3.0589, "step": 52100 }, { "epoch": 0.44, "learning_rate": 0.0001694345061040495, "loss": 3.0656, "step": 52200 }, { "epoch": 0.44, "learning_rate": 0.00016918313824394412, "loss": 3.0657, "step": 52300 }, { "epoch": 0.44, "learning_rate": 0.00016893177038383868, "loss": 3.0622, "step": 52400 }, { "epoch": 0.44, "learning_rate": 0.0001686804025237333, "loss": 3.0627, "step": 52500 }, { "epoch": 0.44, "learning_rate": 0.0001684290346636279, "loss": 3.063, "step": 52600 }, { "epoch": 0.44, "learning_rate": 0.00016817766680352249, "loss": 3.0637, "step": 52700 }, { "epoch": 0.44, "learning_rate": 0.00016792629894341708, "loss": 3.0639, "step": 52800 }, { "epoch": 0.44, "learning_rate": 0.0001676774447619127, "loss": 3.0639, "step": 52900 }, { "epoch": 0.44, "learning_rate": 0.00016742607690180733, "loss": 3.0678, "step": 53000 }, { "epoch": 0.44, "eval_accuracy": 0.4273408628842935, "eval_loss": 3.0823299884796143, "eval_runtime": 36.1917, "eval_samples_per_second": 309.906, "eval_steps_per_second": 2.597, "step": 53000 }, { "epoch": 0.44, "learning_rate": 0.0001671747090417019, "loss": 3.0582, "step": 53100 }, { "epoch": 0.45, "learning_rate": 0.0001669233411815965, "loss": 3.0708, "step": 53200 }, { "epoch": 0.45, "learning_rate": 0.0001666719733214911, "loss": 3.0692, "step": 53300 }, { "epoch": 0.45, "learning_rate": 0.0001664206054613857, "loss": 3.0671, "step": 53400 }, { "epoch": 0.45, "learning_rate": 0.00016616923760128029, "loss": 3.0662, "step": 53500 }, { "epoch": 0.45, "learning_rate": 0.0001659178697411749, "loss": 3.0653, "step": 53600 }, { "epoch": 0.45, "learning_rate": 0.00016566650188106947, "loss": 3.0669, "step": 53700 }, { "epoch": 0.45, "learning_rate": 0.0001654151340209641, "loss": 3.0552, "step": 53800 }, { "epoch": 0.45, "learning_rate": 0.00016516376616085865, "loss": 3.0569, "step": 53900 }, { "epoch": 0.45, "learning_rate": 0.00016491239830075327, "loss": 3.067, "step": 54000 }, { "epoch": 0.45, "eval_accuracy": 0.4276448436991025, "eval_loss": 3.0794825553894043, "eval_runtime": 36.2802, "eval_samples_per_second": 309.15, "eval_steps_per_second": 2.591, "step": 54000 }, { "epoch": 0.45, "learning_rate": 0.00016466103044064783, "loss": 3.0623, "step": 54100 }, { "epoch": 0.45, "learning_rate": 0.00016440966258054242, "loss": 3.0612, "step": 54200 }, { "epoch": 0.45, "learning_rate": 0.00016415829472043704, "loss": 3.0588, "step": 54300 }, { "epoch": 0.46, "learning_rate": 0.0001639069268603316, "loss": 3.064, "step": 54400 }, { "epoch": 0.46, "learning_rate": 0.00016365555900022623, "loss": 3.0564, "step": 54500 }, { "epoch": 0.46, "learning_rate": 0.0001634041911401208, "loss": 3.0605, "step": 54600 }, { "epoch": 0.46, "learning_rate": 0.00016315533695861645, "loss": 3.0591, "step": 54700 }, { "epoch": 0.46, "learning_rate": 0.00016290396909851104, "loss": 3.0639, "step": 54800 }, { "epoch": 0.46, "learning_rate": 0.00016265260123840563, "loss": 3.0612, "step": 54900 }, { "epoch": 0.46, "learning_rate": 0.00016240123337830025, "loss": 3.0597, "step": 55000 }, { "epoch": 0.46, "eval_accuracy": 0.4277283857706089, "eval_loss": 3.078927516937256, "eval_runtime": 36.9604, "eval_samples_per_second": 303.46, "eval_steps_per_second": 2.543, "step": 55000 }, { "epoch": 0.46, "learning_rate": 0.00016214986551819482, "loss": 3.0607, "step": 55100 }, { "epoch": 0.46, "learning_rate": 0.00016190101133669048, "loss": 3.0505, "step": 55200 }, { "epoch": 0.46, "learning_rate": 0.00016164964347658507, "loss": 3.0628, "step": 55300 }, { "epoch": 0.46, "learning_rate": 0.00016139827561647966, "loss": 3.0592, "step": 55400 }, { "epoch": 0.46, "learning_rate": 0.00016114690775637428, "loss": 3.0488, "step": 55500 }, { "epoch": 0.47, "learning_rate": 0.00016089553989626884, "loss": 3.0533, "step": 55600 }, { "epoch": 0.47, "learning_rate": 0.00016064417203616346, "loss": 3.0666, "step": 55700 }, { "epoch": 0.47, "learning_rate": 0.00016039280417605803, "loss": 3.0596, "step": 55800 }, { "epoch": 0.47, "learning_rate": 0.00016014143631595264, "loss": 3.0604, "step": 55900 }, { "epoch": 0.47, "learning_rate": 0.0001598900684558472, "loss": 3.0648, "step": 56000 }, { "epoch": 0.47, "eval_accuracy": 0.4278596661686904, "eval_loss": 3.0768725872039795, "eval_runtime": 37.0258, "eval_samples_per_second": 302.924, "eval_steps_per_second": 2.539, "step": 56000 }, { "epoch": 0.47, "learning_rate": 0.00015963870059574183, "loss": 3.0614, "step": 56100 }, { "epoch": 0.47, "learning_rate": 0.00015938733273563642, "loss": 3.0541, "step": 56200 }, { "epoch": 0.47, "learning_rate": 0.00015913596487553098, "loss": 3.0595, "step": 56300 }, { "epoch": 0.47, "learning_rate": 0.0001588845970154256, "loss": 3.0624, "step": 56400 }, { "epoch": 0.47, "learning_rate": 0.00015863322915532016, "loss": 3.055, "step": 56500 }, { "epoch": 0.47, "learning_rate": 0.00015838186129521478, "loss": 3.0585, "step": 56600 }, { "epoch": 0.47, "learning_rate": 0.00015813049343510937, "loss": 3.0555, "step": 56700 }, { "epoch": 0.48, "learning_rate": 0.00015787912557500397, "loss": 3.0501, "step": 56800 }, { "epoch": 0.48, "learning_rate": 0.00015762775771489856, "loss": 3.0667, "step": 56900 }, { "epoch": 0.48, "learning_rate": 0.00015737638985479315, "loss": 3.0681, "step": 57000 }, { "epoch": 0.48, "eval_accuracy": 0.42812924730699675, "eval_loss": 3.075896739959717, "eval_runtime": 36.4669, "eval_samples_per_second": 307.567, "eval_steps_per_second": 2.578, "step": 57000 }, { "epoch": 0.48, "learning_rate": 0.00015712502199468774, "loss": 3.0554, "step": 57100 }, { "epoch": 0.48, "learning_rate": 0.00015687365413458236, "loss": 3.063, "step": 57200 }, { "epoch": 0.48, "learning_rate": 0.00015662228627447692, "loss": 3.0611, "step": 57300 }, { "epoch": 0.48, "learning_rate": 0.00015637091841437154, "loss": 3.0647, "step": 57400 }, { "epoch": 0.48, "learning_rate": 0.0001561195505542661, "loss": 3.0552, "step": 57500 }, { "epoch": 0.48, "learning_rate": 0.00015586818269416072, "loss": 3.0629, "step": 57600 }, { "epoch": 0.48, "learning_rate": 0.00015561932851265638, "loss": 3.0619, "step": 57700 }, { "epoch": 0.48, "learning_rate": 0.00015536796065255095, "loss": 3.0531, "step": 57800 }, { "epoch": 0.48, "learning_rate": 0.00015511659279244557, "loss": 3.063, "step": 57900 }, { "epoch": 0.49, "learning_rate": 0.00015486522493234013, "loss": 3.0513, "step": 58000 }, { "epoch": 0.49, "eval_accuracy": 0.42832300875015444, "eval_loss": 3.0737130641937256, "eval_runtime": 36.8692, "eval_samples_per_second": 304.211, "eval_steps_per_second": 2.55, "step": 58000 }, { "epoch": 0.49, "learning_rate": 0.00015461385707223472, "loss": 3.0546, "step": 58100 }, { "epoch": 0.49, "learning_rate": 0.0001543624892121293, "loss": 3.0545, "step": 58200 }, { "epoch": 0.49, "learning_rate": 0.0001541111213520239, "loss": 3.0543, "step": 58300 }, { "epoch": 0.49, "learning_rate": 0.00015385975349191852, "loss": 3.0525, "step": 58400 }, { "epoch": 0.49, "learning_rate": 0.0001536083856318131, "loss": 3.0533, "step": 58500 }, { "epoch": 0.49, "learning_rate": 0.0001533570177717077, "loss": 3.0616, "step": 58600 }, { "epoch": 0.49, "learning_rate": 0.00015310564991160227, "loss": 3.0542, "step": 58700 }, { "epoch": 0.49, "learning_rate": 0.0001528542820514969, "loss": 3.0543, "step": 58800 }, { "epoch": 0.49, "learning_rate": 0.00015260291419139148, "loss": 3.0603, "step": 58900 }, { "epoch": 0.49, "learning_rate": 0.00015235154633128607, "loss": 3.0566, "step": 59000 }, { "epoch": 0.49, "eval_accuracy": 0.42880530625540564, "eval_loss": 3.0726654529571533, "eval_runtime": 36.7531, "eval_samples_per_second": 305.171, "eval_steps_per_second": 2.558, "step": 59000 }, { "epoch": 0.49, "learning_rate": 0.00015210017847118066, "loss": 3.0475, "step": 59100 }, { "epoch": 0.5, "learning_rate": 0.00015184881061107528, "loss": 3.0545, "step": 59200 }, { "epoch": 0.5, "learning_rate": 0.00015159744275096984, "loss": 3.0616, "step": 59300 }, { "epoch": 0.5, "learning_rate": 0.00015134607489086446, "loss": 3.0503, "step": 59400 }, { "epoch": 0.5, "learning_rate": 0.00015109470703075903, "loss": 3.0462, "step": 59500 }, { "epoch": 0.5, "learning_rate": 0.00015084333917065365, "loss": 3.0586, "step": 59600 }, { "epoch": 0.5, "learning_rate": 0.00015059448498914928, "loss": 3.0502, "step": 59700 }, { "epoch": 0.5, "learning_rate": 0.00015034563080764494, "loss": 3.0535, "step": 59800 }, { "epoch": 0.5, "learning_rate": 0.0001500942629475395, "loss": 3.0608, "step": 59900 }, { "epoch": 0.5, "learning_rate": 0.0001498428950874341, "loss": 3.0552, "step": 60000 }, { "epoch": 0.5, "eval_accuracy": 0.42880671032383433, "eval_loss": 3.071218967437744, "eval_runtime": 36.2431, "eval_samples_per_second": 309.466, "eval_steps_per_second": 2.594, "step": 60000 }, { "epoch": 0.5, "learning_rate": 0.0001495915272273287, "loss": 3.0546, "step": 60100 }, { "epoch": 0.5, "learning_rate": 0.0001493401593672233, "loss": 3.053, "step": 60200 }, { "epoch": 0.5, "learning_rate": 0.0001490887915071179, "loss": 3.0506, "step": 60300 }, { "epoch": 0.51, "learning_rate": 0.0001488374236470125, "loss": 3.0562, "step": 60400 }, { "epoch": 0.51, "learning_rate": 0.00014858605578690708, "loss": 3.0572, "step": 60500 }, { "epoch": 0.51, "learning_rate": 0.00014833468792680167, "loss": 3.0568, "step": 60600 }, { "epoch": 0.51, "learning_rate": 0.00014808332006669626, "loss": 3.0571, "step": 60700 }, { "epoch": 0.51, "learning_rate": 0.00014783195220659085, "loss": 3.0483, "step": 60800 }, { "epoch": 0.51, "learning_rate": 0.00014758058434648544, "loss": 3.0486, "step": 60900 }, { "epoch": 0.51, "learning_rate": 0.00014732921648638004, "loss": 3.0457, "step": 61000 }, { "epoch": 0.51, "eval_accuracy": 0.42915000505464634, "eval_loss": 3.0692341327667236, "eval_runtime": 36.4055, "eval_samples_per_second": 308.086, "eval_steps_per_second": 2.582, "step": 61000 }, { "epoch": 0.51, "learning_rate": 0.00014707784862627463, "loss": 3.0448, "step": 61100 }, { "epoch": 0.51, "learning_rate": 0.00014682648076616922, "loss": 3.0498, "step": 61200 }, { "epoch": 0.51, "learning_rate": 0.0001465751129060638, "loss": 3.0505, "step": 61300 }, { "epoch": 0.51, "learning_rate": 0.0001463237450459584, "loss": 3.0531, "step": 61400 }, { "epoch": 0.51, "learning_rate": 0.000146072377185853, "loss": 3.0526, "step": 61500 }, { "epoch": 0.52, "learning_rate": 0.00014582100932574758, "loss": 3.0585, "step": 61600 }, { "epoch": 0.52, "learning_rate": 0.0001455696414656422, "loss": 3.0519, "step": 61700 }, { "epoch": 0.52, "learning_rate": 0.0001453182736055368, "loss": 3.0545, "step": 61800 }, { "epoch": 0.52, "learning_rate": 0.00014506690574543138, "loss": 3.0521, "step": 61900 }, { "epoch": 0.52, "learning_rate": 0.00014481553788532598, "loss": 3.0425, "step": 62000 }, { "epoch": 0.52, "eval_accuracy": 0.4291008626596426, "eval_loss": 3.0679004192352295, "eval_runtime": 36.1636, "eval_samples_per_second": 310.146, "eval_steps_per_second": 2.599, "step": 62000 }, { "epoch": 0.52, "learning_rate": 0.0001445666837038216, "loss": 3.0616, "step": 62100 }, { "epoch": 0.52, "learning_rate": 0.0001443153158437162, "loss": 3.057, "step": 62200 }, { "epoch": 0.52, "learning_rate": 0.0001440639479836108, "loss": 3.052, "step": 62300 }, { "epoch": 0.52, "learning_rate": 0.0001438125801235054, "loss": 3.0501, "step": 62400 }, { "epoch": 0.52, "learning_rate": 0.0001435612122634, "loss": 3.0457, "step": 62500 }, { "epoch": 0.52, "learning_rate": 0.0001433098444032946, "loss": 3.0506, "step": 62600 }, { "epoch": 0.52, "learning_rate": 0.00014305847654318918, "loss": 3.0478, "step": 62700 }, { "epoch": 0.53, "learning_rate": 0.00014280710868308378, "loss": 3.0545, "step": 62800 }, { "epoch": 0.53, "learning_rate": 0.00014255574082297834, "loss": 3.0554, "step": 62900 }, { "epoch": 0.53, "learning_rate": 0.00014230437296287296, "loss": 3.0573, "step": 63000 }, { "epoch": 0.53, "eval_accuracy": 0.42917527828636254, "eval_loss": 3.0663866996765137, "eval_runtime": 36.3952, "eval_samples_per_second": 308.172, "eval_steps_per_second": 2.583, "step": 63000 }, { "epoch": 0.53, "learning_rate": 0.00014205300510276755, "loss": 3.0485, "step": 63100 }, { "epoch": 0.53, "learning_rate": 0.00014180163724266214, "loss": 3.0476, "step": 63200 }, { "epoch": 0.53, "learning_rate": 0.00014155026938255673, "loss": 3.0442, "step": 63300 }, { "epoch": 0.53, "learning_rate": 0.00014129890152245132, "loss": 3.0486, "step": 63400 }, { "epoch": 0.53, "learning_rate": 0.00014104753366234592, "loss": 3.0384, "step": 63500 }, { "epoch": 0.53, "learning_rate": 0.0001407961658022405, "loss": 3.0539, "step": 63600 }, { "epoch": 0.53, "learning_rate": 0.0001405447979421351, "loss": 3.0429, "step": 63700 }, { "epoch": 0.53, "learning_rate": 0.0001402934300820297, "loss": 3.0444, "step": 63800 }, { "epoch": 0.53, "learning_rate": 0.00014004457590052535, "loss": 3.0489, "step": 63900 }, { "epoch": 0.54, "learning_rate": 0.00013979320804041994, "loss": 3.0555, "step": 64000 }, { "epoch": 0.54, "eval_accuracy": 0.42978183584755186, "eval_loss": 3.0650320053100586, "eval_runtime": 37.0145, "eval_samples_per_second": 303.016, "eval_steps_per_second": 2.54, "step": 64000 }, { "epoch": 0.54, "learning_rate": 0.00013954184018031453, "loss": 3.0507, "step": 64100 }, { "epoch": 0.54, "learning_rate": 0.00013929047232020912, "loss": 3.0453, "step": 64200 }, { "epoch": 0.54, "learning_rate": 0.00013903910446010372, "loss": 3.0495, "step": 64300 }, { "epoch": 0.54, "learning_rate": 0.0001387877365999983, "loss": 3.0446, "step": 64400 }, { "epoch": 0.54, "learning_rate": 0.0001385363687398929, "loss": 3.0488, "step": 64500 }, { "epoch": 0.54, "learning_rate": 0.00013828500087978752, "loss": 3.0498, "step": 64600 }, { "epoch": 0.54, "learning_rate": 0.0001380336330196821, "loss": 3.0441, "step": 64700 }, { "epoch": 0.54, "learning_rate": 0.00013778226515957667, "loss": 3.0435, "step": 64800 }, { "epoch": 0.54, "learning_rate": 0.00013753089729947126, "loss": 3.0517, "step": 64900 }, { "epoch": 0.54, "learning_rate": 0.00013727952943936585, "loss": 3.0421, "step": 65000 }, { "epoch": 0.54, "eval_accuracy": 0.4294195861929527, "eval_loss": 3.0636541843414307, "eval_runtime": 36.752, "eval_samples_per_second": 305.181, "eval_steps_per_second": 2.558, "step": 65000 }, { "epoch": 0.54, "learning_rate": 0.00013702816157926045, "loss": 3.0412, "step": 65100 }, { "epoch": 0.55, "learning_rate": 0.00013677679371915506, "loss": 3.0548, "step": 65200 }, { "epoch": 0.55, "learning_rate": 0.00013652542585904966, "loss": 3.0409, "step": 65300 }, { "epoch": 0.55, "learning_rate": 0.00013627405799894425, "loss": 3.0377, "step": 65400 }, { "epoch": 0.55, "learning_rate": 0.00013602269013883884, "loss": 3.0429, "step": 65500 }, { "epoch": 0.55, "learning_rate": 0.00013577383595733447, "loss": 3.0467, "step": 65600 }, { "epoch": 0.55, "learning_rate": 0.00013552498177583013, "loss": 3.0496, "step": 65700 }, { "epoch": 0.55, "learning_rate": 0.00013527361391572472, "loss": 3.0424, "step": 65800 }, { "epoch": 0.55, "learning_rate": 0.00013502224605561932, "loss": 3.043, "step": 65900 }, { "epoch": 0.55, "learning_rate": 0.0001347708781955139, "loss": 3.0496, "step": 66000 }, { "epoch": 0.55, "eval_accuracy": 0.42957333168589307, "eval_loss": 3.062688112258911, "eval_runtime": 36.3303, "eval_samples_per_second": 308.723, "eval_steps_per_second": 2.587, "step": 66000 }, { "epoch": 0.55, "learning_rate": 0.0001345195103354085, "loss": 3.0434, "step": 66100 }, { "epoch": 0.55, "learning_rate": 0.0001342681424753031, "loss": 3.0392, "step": 66200 }, { "epoch": 0.55, "learning_rate": 0.00013401677461519768, "loss": 3.041, "step": 66300 }, { "epoch": 0.56, "learning_rate": 0.00013376540675509227, "loss": 3.0526, "step": 66400 }, { "epoch": 0.56, "learning_rate": 0.0001335140388949869, "loss": 3.046, "step": 66500 }, { "epoch": 0.56, "learning_rate": 0.00013326267103488148, "loss": 3.0398, "step": 66600 }, { "epoch": 0.56, "learning_rate": 0.00013301130317477607, "loss": 3.0473, "step": 66700 }, { "epoch": 0.56, "learning_rate": 0.00013275993531467066, "loss": 3.0368, "step": 66800 }, { "epoch": 0.56, "learning_rate": 0.00013250856745456523, "loss": 3.0427, "step": 66900 }, { "epoch": 0.56, "learning_rate": 0.00013225719959445985, "loss": 3.0415, "step": 67000 }, { "epoch": 0.56, "eval_accuracy": 0.4300071888303548, "eval_loss": 3.060805320739746, "eval_runtime": 37.0174, "eval_samples_per_second": 302.993, "eval_steps_per_second": 2.539, "step": 67000 }, { "epoch": 0.56, "learning_rate": 0.00013200583173435444, "loss": 3.0429, "step": 67100 }, { "epoch": 0.56, "learning_rate": 0.00013175446387424903, "loss": 3.0494, "step": 67200 }, { "epoch": 0.56, "learning_rate": 0.00013150309601414362, "loss": 3.0384, "step": 67300 }, { "epoch": 0.56, "learning_rate": 0.0001312517281540382, "loss": 3.0438, "step": 67400 }, { "epoch": 0.56, "learning_rate": 0.0001310003602939328, "loss": 3.0427, "step": 67500 }, { "epoch": 0.57, "learning_rate": 0.0001307489924338274, "loss": 3.0447, "step": 67600 }, { "epoch": 0.57, "learning_rate": 0.00013050013825232306, "loss": 3.0438, "step": 67700 }, { "epoch": 0.57, "learning_rate": 0.00013024877039221765, "loss": 3.0403, "step": 67800 }, { "epoch": 0.57, "learning_rate": 0.00012999740253211224, "loss": 3.0478, "step": 67900 }, { "epoch": 0.57, "learning_rate": 0.00012974603467200683, "loss": 3.0412, "step": 68000 }, { "epoch": 0.57, "eval_accuracy": 0.4298436148584137, "eval_loss": 3.0598626136779785, "eval_runtime": 36.2351, "eval_samples_per_second": 309.534, "eval_steps_per_second": 2.594, "step": 68000 }, { "epoch": 0.57, "learning_rate": 0.00012949466681190142, "loss": 3.0411, "step": 68100 }, { "epoch": 0.57, "learning_rate": 0.000129243298951796, "loss": 3.035, "step": 68200 }, { "epoch": 0.57, "learning_rate": 0.0001289919310916906, "loss": 3.0464, "step": 68300 }, { "epoch": 0.57, "learning_rate": 0.0001287405632315852, "loss": 3.0369, "step": 68400 }, { "epoch": 0.57, "learning_rate": 0.00012848919537147979, "loss": 3.0428, "step": 68500 }, { "epoch": 0.57, "learning_rate": 0.0001282378275113744, "loss": 3.0436, "step": 68600 }, { "epoch": 0.57, "learning_rate": 0.000127986459651269, "loss": 3.0454, "step": 68700 }, { "epoch": 0.58, "learning_rate": 0.00012773509179116356, "loss": 3.0361, "step": 68800 }, { "epoch": 0.58, "learning_rate": 0.00012748372393105815, "loss": 3.0437, "step": 68900 }, { "epoch": 0.58, "learning_rate": 0.00012723235607095274, "loss": 3.0373, "step": 69000 }, { "epoch": 0.58, "eval_accuracy": 0.4302465824974446, "eval_loss": 3.057598829269409, "eval_runtime": 36.2031, "eval_samples_per_second": 309.808, "eval_steps_per_second": 2.596, "step": 69000 }, { "epoch": 0.58, "learning_rate": 0.0001269835018894484, "loss": 3.0426, "step": 69100 }, { "epoch": 0.58, "learning_rate": 0.000126732134029343, "loss": 3.041, "step": 69200 }, { "epoch": 0.58, "learning_rate": 0.0001264807661692376, "loss": 3.036, "step": 69300 }, { "epoch": 0.58, "learning_rate": 0.00012622939830913218, "loss": 3.0396, "step": 69400 }, { "epoch": 0.58, "learning_rate": 0.00012597803044902677, "loss": 3.0418, "step": 69500 }, { "epoch": 0.58, "learning_rate": 0.00012572666258892136, "loss": 3.0335, "step": 69600 }, { "epoch": 0.58, "learning_rate": 0.00012547529472881595, "loss": 3.0334, "step": 69700 }, { "epoch": 0.58, "learning_rate": 0.00012522392686871054, "loss": 3.0381, "step": 69800 }, { "epoch": 0.58, "learning_rate": 0.00012497255900860516, "loss": 3.0393, "step": 69900 }, { "epoch": 0.59, "learning_rate": 0.00012472119114849975, "loss": 3.0393, "step": 70000 }, { "epoch": 0.59, "eval_accuracy": 0.43052950228582343, "eval_loss": 3.05704665184021, "eval_runtime": 36.208, "eval_samples_per_second": 309.765, "eval_steps_per_second": 2.596, "step": 70000 }, { "epoch": 0.59, "learning_rate": 0.00012446982328839434, "loss": 3.0383, "step": 70100 }, { "epoch": 0.59, "learning_rate": 0.00012421845542828894, "loss": 3.0441, "step": 70200 }, { "epoch": 0.59, "learning_rate": 0.00012396708756818353, "loss": 3.0388, "step": 70300 }, { "epoch": 0.59, "learning_rate": 0.00012371823338667916, "loss": 3.0403, "step": 70400 }, { "epoch": 0.59, "learning_rate": 0.00012346686552657378, "loss": 3.0368, "step": 70500 }, { "epoch": 0.59, "learning_rate": 0.00012321549766646837, "loss": 3.0405, "step": 70600 }, { "epoch": 0.59, "learning_rate": 0.00012296412980636296, "loss": 3.0351, "step": 70700 }, { "epoch": 0.59, "learning_rate": 0.0001227152756248586, "loss": 3.0355, "step": 70800 }, { "epoch": 0.59, "learning_rate": 0.0001224639077647532, "loss": 3.038, "step": 70900 }, { "epoch": 0.59, "learning_rate": 0.00012221253990464778, "loss": 3.0312, "step": 71000 }, { "epoch": 0.59, "eval_accuracy": 0.43072256169476675, "eval_loss": 3.056051254272461, "eval_runtime": 35.9605, "eval_samples_per_second": 311.897, "eval_steps_per_second": 2.614, "step": 71000 }, { "epoch": 0.59, "learning_rate": 0.00012196117204454238, "loss": 3.0336, "step": 71100 }, { "epoch": 0.6, "learning_rate": 0.00012170980418443696, "loss": 3.0371, "step": 71200 }, { "epoch": 0.6, "learning_rate": 0.00012145843632433155, "loss": 3.0415, "step": 71300 }, { "epoch": 0.6, "learning_rate": 0.00012120706846422614, "loss": 3.033, "step": 71400 }, { "epoch": 0.6, "learning_rate": 0.00012095570060412075, "loss": 3.0401, "step": 71500 }, { "epoch": 0.6, "learning_rate": 0.00012070433274401534, "loss": 3.0407, "step": 71600 }, { "epoch": 0.6, "learning_rate": 0.00012045296488390993, "loss": 3.0389, "step": 71700 }, { "epoch": 0.6, "learning_rate": 0.00012020159702380452, "loss": 3.0326, "step": 71800 }, { "epoch": 0.6, "learning_rate": 0.00011995022916369911, "loss": 3.0343, "step": 71900 }, { "epoch": 0.6, "learning_rate": 0.00011969886130359372, "loss": 3.0397, "step": 72000 }, { "epoch": 0.6, "eval_accuracy": 0.43072607186583844, "eval_loss": 3.0532803535461426, "eval_runtime": 36.5519, "eval_samples_per_second": 306.851, "eval_steps_per_second": 2.572, "step": 72000 }, { "epoch": 0.6, "learning_rate": 0.00011945000712208935, "loss": 3.041, "step": 72100 }, { "epoch": 0.6, "learning_rate": 0.00011919863926198396, "loss": 3.0375, "step": 72200 }, { "epoch": 0.6, "learning_rate": 0.00011894727140187855, "loss": 3.03, "step": 72300 }, { "epoch": 0.61, "learning_rate": 0.00011869590354177314, "loss": 3.0314, "step": 72400 }, { "epoch": 0.61, "learning_rate": 0.00011844453568166773, "loss": 3.0399, "step": 72500 }, { "epoch": 0.61, "learning_rate": 0.00011819316782156232, "loss": 3.043, "step": 72600 }, { "epoch": 0.61, "learning_rate": 0.00011794179996145693, "loss": 3.0324, "step": 72700 }, { "epoch": 0.61, "learning_rate": 0.00011769043210135152, "loss": 3.037, "step": 72800 }, { "epoch": 0.61, "learning_rate": 0.00011743906424124611, "loss": 3.0391, "step": 72900 }, { "epoch": 0.61, "learning_rate": 0.00011718769638114069, "loss": 3.0303, "step": 73000 }, { "epoch": 0.61, "eval_accuracy": 0.43111219068372514, "eval_loss": 3.0526981353759766, "eval_runtime": 36.4926, "eval_samples_per_second": 307.35, "eval_steps_per_second": 2.576, "step": 73000 }, { "epoch": 0.61, "learning_rate": 0.00011693632852103528, "loss": 3.0329, "step": 73100 }, { "epoch": 0.61, "learning_rate": 0.00011668496066092987, "loss": 3.0346, "step": 73200 }, { "epoch": 0.61, "learning_rate": 0.00011643359280082448, "loss": 3.0405, "step": 73300 }, { "epoch": 0.61, "learning_rate": 0.00011618222494071907, "loss": 3.0344, "step": 73400 }, { "epoch": 0.61, "learning_rate": 0.00011593085708061366, "loss": 3.0389, "step": 73500 }, { "epoch": 0.62, "learning_rate": 0.00011567948922050825, "loss": 3.0361, "step": 73600 }, { "epoch": 0.62, "learning_rate": 0.00011542812136040285, "loss": 3.0329, "step": 73700 }, { "epoch": 0.62, "learning_rate": 0.00011517675350029745, "loss": 3.0304, "step": 73800 }, { "epoch": 0.62, "learning_rate": 0.00011492538564019204, "loss": 3.0316, "step": 73900 }, { "epoch": 0.62, "learning_rate": 0.00011467401778008663, "loss": 3.0403, "step": 74000 }, { "epoch": 0.62, "eval_accuracy": 0.43146250575668055, "eval_loss": 3.0502421855926514, "eval_runtime": 36.2647, "eval_samples_per_second": 309.281, "eval_steps_per_second": 2.592, "step": 74000 }, { "epoch": 0.62, "learning_rate": 0.00011442516359858228, "loss": 3.0443, "step": 74100 }, { "epoch": 0.62, "learning_rate": 0.00011417379573847687, "loss": 3.0376, "step": 74200 }, { "epoch": 0.62, "learning_rate": 0.00011392242787837146, "loss": 3.0313, "step": 74300 }, { "epoch": 0.62, "learning_rate": 0.00011367106001826606, "loss": 3.0429, "step": 74400 }, { "epoch": 0.62, "learning_rate": 0.00011341969215816065, "loss": 3.0342, "step": 74500 }, { "epoch": 0.62, "learning_rate": 0.00011316832429805525, "loss": 3.0335, "step": 74600 }, { "epoch": 0.62, "learning_rate": 0.00011291695643794984, "loss": 3.0375, "step": 74700 }, { "epoch": 0.63, "learning_rate": 0.00011266558857784443, "loss": 3.0247, "step": 74800 }, { "epoch": 0.63, "learning_rate": 0.000112414220717739, "loss": 3.0309, "step": 74900 }, { "epoch": 0.63, "learning_rate": 0.00011216285285763361, "loss": 3.0326, "step": 75000 }, { "epoch": 0.63, "eval_accuracy": 0.43156359868354544, "eval_loss": 3.049257278442383, "eval_runtime": 36.2389, "eval_samples_per_second": 309.501, "eval_steps_per_second": 2.594, "step": 75000 }, { "epoch": 0.63, "learning_rate": 0.0001119114849975282, "loss": 3.0389, "step": 75100 }, { "epoch": 0.63, "learning_rate": 0.00011166011713742279, "loss": 3.0309, "step": 75200 }, { "epoch": 0.63, "learning_rate": 0.00011141126295591844, "loss": 3.0375, "step": 75300 }, { "epoch": 0.63, "learning_rate": 0.00011115989509581303, "loss": 3.0351, "step": 75400 }, { "epoch": 0.63, "learning_rate": 0.00011090852723570762, "loss": 3.0324, "step": 75500 }, { "epoch": 0.63, "learning_rate": 0.00011065715937560223, "loss": 3.0369, "step": 75600 }, { "epoch": 0.63, "learning_rate": 0.00011040579151549682, "loss": 3.0289, "step": 75700 }, { "epoch": 0.63, "learning_rate": 0.00011015442365539141, "loss": 3.0346, "step": 75800 }, { "epoch": 0.63, "learning_rate": 0.000109903055795286, "loss": 3.0234, "step": 75900 }, { "epoch": 0.64, "learning_rate": 0.0001096516879351806, "loss": 3.0322, "step": 76000 }, { "epoch": 0.64, "eval_accuracy": 0.4314962033989688, "eval_loss": 3.0480940341949463, "eval_runtime": 35.8603, "eval_samples_per_second": 312.77, "eval_steps_per_second": 2.621, "step": 76000 }, { "epoch": 0.64, "learning_rate": 0.0001094003200750752, "loss": 3.027, "step": 76100 }, { "epoch": 0.64, "learning_rate": 0.00010914895221496979, "loss": 3.03, "step": 76200 }, { "epoch": 0.64, "learning_rate": 0.00010890009803346544, "loss": 3.0236, "step": 76300 }, { "epoch": 0.64, "learning_rate": 0.00010864873017336003, "loss": 3.0343, "step": 76400 }, { "epoch": 0.64, "learning_rate": 0.00010839736231325462, "loss": 3.0335, "step": 76500 }, { "epoch": 0.64, "learning_rate": 0.00010814599445314921, "loss": 3.0263, "step": 76600 }, { "epoch": 0.64, "learning_rate": 0.00010789462659304382, "loss": 3.0269, "step": 76700 }, { "epoch": 0.64, "learning_rate": 0.00010764325873293841, "loss": 3.0391, "step": 76800 }, { "epoch": 0.64, "learning_rate": 0.000107391890872833, "loss": 3.0361, "step": 76900 }, { "epoch": 0.64, "learning_rate": 0.00010714052301272759, "loss": 3.0265, "step": 77000 }, { "epoch": 0.64, "eval_accuracy": 0.4318640693272827, "eval_loss": 3.0469460487365723, "eval_runtime": 37.1071, "eval_samples_per_second": 302.26, "eval_steps_per_second": 2.533, "step": 77000 }, { "epoch": 0.64, "learning_rate": 0.00010688915515262217, "loss": 3.0313, "step": 77100 }, { "epoch": 0.65, "learning_rate": 0.00010663778729251676, "loss": 3.0319, "step": 77200 }, { "epoch": 0.65, "learning_rate": 0.00010638641943241136, "loss": 3.0247, "step": 77300 }, { "epoch": 0.65, "learning_rate": 0.00010613505157230595, "loss": 3.0264, "step": 77400 }, { "epoch": 0.65, "learning_rate": 0.0001058861973908016, "loss": 3.0262, "step": 77500 }, { "epoch": 0.65, "learning_rate": 0.0001056348295306962, "loss": 3.0327, "step": 77600 }, { "epoch": 0.65, "learning_rate": 0.00010538346167059079, "loss": 3.0318, "step": 77700 }, { "epoch": 0.65, "learning_rate": 0.00010513209381048538, "loss": 3.0356, "step": 77800 }, { "epoch": 0.65, "learning_rate": 0.00010488072595037997, "loss": 3.0374, "step": 77900 }, { "epoch": 0.65, "learning_rate": 0.00010462935809027457, "loss": 3.0231, "step": 78000 }, { "epoch": 0.65, "eval_accuracy": 0.43201430464915136, "eval_loss": 3.045305013656616, "eval_runtime": 37.1474, "eval_samples_per_second": 301.933, "eval_steps_per_second": 2.53, "step": 78000 }, { "epoch": 0.65, "learning_rate": 0.00010437799023016916, "loss": 3.0296, "step": 78100 }, { "epoch": 0.65, "learning_rate": 0.00010412662237006376, "loss": 3.025, "step": 78200 }, { "epoch": 0.65, "learning_rate": 0.00010387525450995835, "loss": 3.0329, "step": 78300 }, { "epoch": 0.66, "learning_rate": 0.00010362388664985294, "loss": 3.0268, "step": 78400 }, { "epoch": 0.66, "learning_rate": 0.00010337251878974754, "loss": 3.0259, "step": 78500 }, { "epoch": 0.66, "learning_rate": 0.00010312115092964213, "loss": 3.0298, "step": 78600 }, { "epoch": 0.66, "learning_rate": 0.00010286978306953673, "loss": 3.0296, "step": 78700 }, { "epoch": 0.66, "learning_rate": 0.00010261841520943132, "loss": 3.0291, "step": 78800 }, { "epoch": 0.66, "learning_rate": 0.0001023670473493259, "loss": 3.0371, "step": 78900 }, { "epoch": 0.66, "learning_rate": 0.0001021156794892205, "loss": 3.0259, "step": 79000 }, { "epoch": 0.66, "eval_accuracy": 0.43211188740494455, "eval_loss": 3.044191837310791, "eval_runtime": 37.3457, "eval_samples_per_second": 300.329, "eval_steps_per_second": 2.517, "step": 79000 }, { "epoch": 0.66, "learning_rate": 0.00010186431162911509, "loss": 3.0266, "step": 79100 }, { "epoch": 0.66, "learning_rate": 0.00010161294376900968, "loss": 3.0272, "step": 79200 }, { "epoch": 0.66, "learning_rate": 0.00010136157590890427, "loss": 3.0191, "step": 79300 }, { "epoch": 0.66, "learning_rate": 0.00010111020804879886, "loss": 3.0178, "step": 79400 }, { "epoch": 0.67, "learning_rate": 0.00010085884018869347, "loss": 3.0178, "step": 79500 }, { "epoch": 0.67, "learning_rate": 0.0001006099860071891, "loss": 3.0264, "step": 79600 }, { "epoch": 0.67, "learning_rate": 0.00010036113182568475, "loss": 3.0172, "step": 79700 }, { "epoch": 0.67, "learning_rate": 0.00010010976396557934, "loss": 3.0276, "step": 79800 }, { "epoch": 0.67, "learning_rate": 9.985839610547395e-05, "loss": 3.0254, "step": 79900 }, { "epoch": 0.67, "learning_rate": 9.960702824536854e-05, "loss": 3.0219, "step": 80000 }, { "epoch": 0.67, "eval_accuracy": 0.43250292046233163, "eval_loss": 3.0422935485839844, "eval_runtime": 37.0202, "eval_samples_per_second": 302.97, "eval_steps_per_second": 2.539, "step": 80000 }, { "epoch": 0.67, "learning_rate": 9.935566038526313e-05, "loss": 3.0265, "step": 80100 }, { "epoch": 0.67, "learning_rate": 9.910429252515772e-05, "loss": 3.025, "step": 80200 }, { "epoch": 0.67, "learning_rate": 9.885292466505231e-05, "loss": 3.0164, "step": 80300 }, { "epoch": 0.67, "learning_rate": 9.860155680494692e-05, "loss": 3.0307, "step": 80400 }, { "epoch": 0.67, "learning_rate": 9.835018894484151e-05, "loss": 3.0268, "step": 80500 }, { "epoch": 0.67, "learning_rate": 9.80988210847361e-05, "loss": 3.0261, "step": 80600 }, { "epoch": 0.68, "learning_rate": 9.784745322463069e-05, "loss": 3.0213, "step": 80700 }, { "epoch": 0.68, "learning_rate": 9.75960853645253e-05, "loss": 3.0222, "step": 80800 }, { "epoch": 0.68, "learning_rate": 9.734471750441989e-05, "loss": 3.0249, "step": 80900 }, { "epoch": 0.68, "learning_rate": 9.709334964431448e-05, "loss": 3.0233, "step": 81000 }, { "epoch": 0.68, "eval_accuracy": 0.4324165702539679, "eval_loss": 3.0414962768554688, "eval_runtime": 37.0887, "eval_samples_per_second": 302.41, "eval_steps_per_second": 2.534, "step": 81000 }, { "epoch": 0.68, "learning_rate": 9.684198178420906e-05, "loss": 3.0177, "step": 81100 }, { "epoch": 0.68, "learning_rate": 9.659061392410365e-05, "loss": 3.0309, "step": 81200 }, { "epoch": 0.68, "learning_rate": 9.633924606399824e-05, "loss": 3.0245, "step": 81300 }, { "epoch": 0.68, "learning_rate": 9.608787820389284e-05, "loss": 3.0287, "step": 81400 }, { "epoch": 0.68, "learning_rate": 9.583651034378743e-05, "loss": 3.0152, "step": 81500 }, { "epoch": 0.68, "learning_rate": 9.558514248368203e-05, "loss": 3.0204, "step": 81600 }, { "epoch": 0.68, "learning_rate": 9.533377462357662e-05, "loss": 3.0258, "step": 81700 }, { "epoch": 0.68, "learning_rate": 9.508240676347121e-05, "loss": 3.0255, "step": 81800 }, { "epoch": 0.69, "learning_rate": 9.483103890336581e-05, "loss": 3.0245, "step": 81900 }, { "epoch": 0.69, "learning_rate": 9.45796710432604e-05, "loss": 3.0261, "step": 82000 }, { "epoch": 0.69, "eval_accuracy": 0.43273810192413537, "eval_loss": 3.040773868560791, "eval_runtime": 36.3004, "eval_samples_per_second": 308.977, "eval_steps_per_second": 2.59, "step": 82000 }, { "epoch": 0.69, "learning_rate": 9.433081686175605e-05, "loss": 3.0236, "step": 82100 }, { "epoch": 0.69, "learning_rate": 9.407944900165064e-05, "loss": 3.0339, "step": 82200 }, { "epoch": 0.69, "learning_rate": 9.382808114154523e-05, "loss": 3.021, "step": 82300 }, { "epoch": 0.69, "learning_rate": 9.357671328143983e-05, "loss": 3.0208, "step": 82400 }, { "epoch": 0.69, "learning_rate": 9.332534542133443e-05, "loss": 3.0175, "step": 82500 }, { "epoch": 0.69, "learning_rate": 9.307397756122902e-05, "loss": 3.0294, "step": 82600 }, { "epoch": 0.69, "learning_rate": 9.282260970112361e-05, "loss": 3.0258, "step": 82700 }, { "epoch": 0.69, "learning_rate": 9.25712418410182e-05, "loss": 3.0144, "step": 82800 }, { "epoch": 0.69, "learning_rate": 9.231987398091278e-05, "loss": 3.016, "step": 82900 }, { "epoch": 0.69, "learning_rate": 9.206850612080737e-05, "loss": 3.0221, "step": 83000 }, { "epoch": 0.69, "eval_accuracy": 0.43296696507801, "eval_loss": 3.038726806640625, "eval_runtime": 36.1807, "eval_samples_per_second": 309.999, "eval_steps_per_second": 2.598, "step": 83000 }, { "epoch": 0.7, "learning_rate": 9.181965193930304e-05, "loss": 3.0217, "step": 83100 }, { "epoch": 0.7, "learning_rate": 9.156828407919761e-05, "loss": 3.0149, "step": 83200 }, { "epoch": 0.7, "learning_rate": 9.131691621909222e-05, "loss": 3.0247, "step": 83300 }, { "epoch": 0.7, "learning_rate": 9.106554835898681e-05, "loss": 3.021, "step": 83400 }, { "epoch": 0.7, "learning_rate": 9.081669417748246e-05, "loss": 3.0239, "step": 83500 }, { "epoch": 0.7, "learning_rate": 9.056532631737705e-05, "loss": 3.0349, "step": 83600 }, { "epoch": 0.7, "learning_rate": 9.031395845727164e-05, "loss": 3.026, "step": 83700 }, { "epoch": 0.7, "learning_rate": 9.006259059716623e-05, "loss": 3.0178, "step": 83800 }, { "epoch": 0.7, "learning_rate": 8.981122273706082e-05, "loss": 3.0249, "step": 83900 }, { "epoch": 0.7, "learning_rate": 8.955985487695543e-05, "loss": 3.0296, "step": 84000 }, { "epoch": 0.7, "eval_accuracy": 0.43312211463937905, "eval_loss": 3.0376861095428467, "eval_runtime": 38.9475, "eval_samples_per_second": 287.978, "eval_steps_per_second": 2.414, "step": 84000 }, { "epoch": 0.7, "learning_rate": 8.930848701685002e-05, "loss": 3.0205, "step": 84100 }, { "epoch": 0.7, "learning_rate": 8.905711915674461e-05, "loss": 3.0214, "step": 84200 }, { "epoch": 0.71, "learning_rate": 8.88057512966392e-05, "loss": 3.0283, "step": 84300 }, { "epoch": 0.71, "learning_rate": 8.85543834365338e-05, "loss": 3.0163, "step": 84400 }, { "epoch": 0.71, "learning_rate": 8.83030155764284e-05, "loss": 3.02, "step": 84500 }, { "epoch": 0.71, "learning_rate": 8.805164771632299e-05, "loss": 3.0189, "step": 84600 }, { "epoch": 0.71, "learning_rate": 8.780027985621758e-05, "loss": 3.0167, "step": 84700 }, { "epoch": 0.71, "learning_rate": 8.754891199611217e-05, "loss": 3.0177, "step": 84800 }, { "epoch": 0.71, "learning_rate": 8.729754413600678e-05, "loss": 3.0226, "step": 84900 }, { "epoch": 0.71, "learning_rate": 8.704617627590137e-05, "loss": 3.0186, "step": 85000 }, { "epoch": 0.71, "eval_accuracy": 0.4335391229626967, "eval_loss": 3.03602933883667, "eval_runtime": 36.1657, "eval_samples_per_second": 310.128, "eval_steps_per_second": 2.599, "step": 85000 }, { "epoch": 0.71, "learning_rate": 8.679480841579594e-05, "loss": 3.0144, "step": 85100 }, { "epoch": 0.71, "learning_rate": 8.65459542342916e-05, "loss": 3.0128, "step": 85200 }, { "epoch": 0.71, "learning_rate": 8.629458637418618e-05, "loss": 3.0189, "step": 85300 }, { "epoch": 0.71, "learning_rate": 8.604321851408077e-05, "loss": 3.0231, "step": 85400 }, { "epoch": 0.72, "learning_rate": 8.579185065397537e-05, "loss": 3.0161, "step": 85500 }, { "epoch": 0.72, "learning_rate": 8.554048279386996e-05, "loss": 3.0188, "step": 85600 }, { "epoch": 0.72, "learning_rate": 8.528911493376456e-05, "loss": 3.027, "step": 85700 }, { "epoch": 0.72, "learning_rate": 8.503774707365915e-05, "loss": 3.017, "step": 85800 }, { "epoch": 0.72, "learning_rate": 8.478637921355374e-05, "loss": 3.0173, "step": 85900 }, { "epoch": 0.72, "learning_rate": 8.453501135344834e-05, "loss": 3.0151, "step": 86000 }, { "epoch": 0.72, "eval_accuracy": 0.43330745167196466, "eval_loss": 3.034996747970581, "eval_runtime": 36.1826, "eval_samples_per_second": 309.983, "eval_steps_per_second": 2.598, "step": 86000 }, { "epoch": 0.72, "learning_rate": 8.428364349334294e-05, "loss": 3.0227, "step": 86100 }, { "epoch": 0.72, "learning_rate": 8.403227563323753e-05, "loss": 3.0163, "step": 86200 }, { "epoch": 0.72, "learning_rate": 8.378090777313212e-05, "loss": 3.0096, "step": 86300 }, { "epoch": 0.72, "learning_rate": 8.352953991302671e-05, "loss": 3.0147, "step": 86400 }, { "epoch": 0.72, "learning_rate": 8.32781720529213e-05, "loss": 3.0051, "step": 86500 }, { "epoch": 0.72, "learning_rate": 8.302680419281591e-05, "loss": 3.0201, "step": 86600 }, { "epoch": 0.73, "learning_rate": 8.277795001131154e-05, "loss": 3.0169, "step": 86700 }, { "epoch": 0.73, "learning_rate": 8.252658215120615e-05, "loss": 3.008, "step": 86800 }, { "epoch": 0.73, "learning_rate": 8.227521429110074e-05, "loss": 3.0117, "step": 86900 }, { "epoch": 0.73, "learning_rate": 8.202384643099533e-05, "loss": 3.0121, "step": 87000 }, { "epoch": 0.73, "eval_accuracy": 0.43354333516798277, "eval_loss": 3.033334493637085, "eval_runtime": 37.3178, "eval_samples_per_second": 300.553, "eval_steps_per_second": 2.519, "step": 87000 }, { "epoch": 0.73, "learning_rate": 8.177247857088992e-05, "loss": 3.014, "step": 87100 }, { "epoch": 0.73, "learning_rate": 8.15211107107845e-05, "loss": 3.0168, "step": 87200 }, { "epoch": 0.73, "learning_rate": 8.126974285067909e-05, "loss": 3.0092, "step": 87300 }, { "epoch": 0.73, "learning_rate": 8.10183749905737e-05, "loss": 3.0231, "step": 87400 }, { "epoch": 0.73, "learning_rate": 8.076700713046829e-05, "loss": 3.0133, "step": 87500 }, { "epoch": 0.73, "learning_rate": 8.051563927036288e-05, "loss": 3.0135, "step": 87600 }, { "epoch": 0.73, "learning_rate": 8.026427141025747e-05, "loss": 3.0188, "step": 87700 }, { "epoch": 0.73, "learning_rate": 8.001290355015206e-05, "loss": 3.0151, "step": 87800 }, { "epoch": 0.74, "learning_rate": 7.976153569004667e-05, "loss": 3.0211, "step": 87900 }, { "epoch": 0.74, "learning_rate": 7.951016782994126e-05, "loss": 3.0142, "step": 88000 }, { "epoch": 0.74, "eval_accuracy": 0.4337988756220023, "eval_loss": 3.032519817352295, "eval_runtime": 37.5602, "eval_samples_per_second": 298.614, "eval_steps_per_second": 2.503, "step": 88000 }, { "epoch": 0.74, "learning_rate": 7.925879996983585e-05, "loss": 3.0117, "step": 88100 }, { "epoch": 0.74, "learning_rate": 7.900743210973044e-05, "loss": 3.0092, "step": 88200 }, { "epoch": 0.74, "learning_rate": 7.875606424962505e-05, "loss": 3.0124, "step": 88300 }, { "epoch": 0.74, "learning_rate": 7.850469638951964e-05, "loss": 3.0104, "step": 88400 }, { "epoch": 0.74, "learning_rate": 7.825584220801528e-05, "loss": 3.0136, "step": 88500 }, { "epoch": 0.74, "learning_rate": 7.800447434790988e-05, "loss": 3.0186, "step": 88600 }, { "epoch": 0.74, "learning_rate": 7.775310648780447e-05, "loss": 3.0107, "step": 88700 }, { "epoch": 0.74, "learning_rate": 7.750173862769906e-05, "loss": 3.0129, "step": 88800 }, { "epoch": 0.74, "learning_rate": 7.725037076759365e-05, "loss": 3.0117, "step": 88900 }, { "epoch": 0.74, "learning_rate": 7.699900290748825e-05, "loss": 3.0088, "step": 89000 }, { "epoch": 0.74, "eval_accuracy": 0.4338164264773608, "eval_loss": 3.031200647354126, "eval_runtime": 36.6187, "eval_samples_per_second": 306.292, "eval_steps_per_second": 2.567, "step": 89000 }, { "epoch": 0.75, "learning_rate": 7.674763504738283e-05, "loss": 3.0107, "step": 89100 }, { "epoch": 0.75, "learning_rate": 7.649626718727742e-05, "loss": 3.0104, "step": 89200 }, { "epoch": 0.75, "learning_rate": 7.624489932717202e-05, "loss": 3.0141, "step": 89300 }, { "epoch": 0.75, "learning_rate": 7.59935314670666e-05, "loss": 3.0263, "step": 89400 }, { "epoch": 0.75, "learning_rate": 7.57421636069612e-05, "loss": 3.0093, "step": 89500 }, { "epoch": 0.75, "learning_rate": 7.54907957468558e-05, "loss": 3.0057, "step": 89600 }, { "epoch": 0.75, "learning_rate": 7.52394278867504e-05, "loss": 3.0104, "step": 89700 }, { "epoch": 0.75, "learning_rate": 7.498806002664499e-05, "loss": 3.0202, "step": 89800 }, { "epoch": 0.75, "learning_rate": 7.473669216653958e-05, "loss": 3.0118, "step": 89900 }, { "epoch": 0.75, "learning_rate": 7.448532430643417e-05, "loss": 3.0087, "step": 90000 }, { "epoch": 0.75, "eval_accuracy": 0.43394138856751324, "eval_loss": 3.0297725200653076, "eval_runtime": 36.7079, "eval_samples_per_second": 305.547, "eval_steps_per_second": 2.561, "step": 90000 }, { "epoch": 0.75, "learning_rate": 7.423395644632877e-05, "loss": 3.0163, "step": 90100 }, { "epoch": 0.75, "learning_rate": 7.398258858622336e-05, "loss": 3.0168, "step": 90200 }, { "epoch": 0.76, "learning_rate": 7.373122072611796e-05, "loss": 3.0145, "step": 90300 }, { "epoch": 0.76, "learning_rate": 7.347985286601255e-05, "loss": 3.0112, "step": 90400 }, { "epoch": 0.76, "learning_rate": 7.322848500590714e-05, "loss": 3.0094, "step": 90500 }, { "epoch": 0.76, "learning_rate": 7.297711714580173e-05, "loss": 3.0129, "step": 90600 }, { "epoch": 0.76, "learning_rate": 7.272574928569632e-05, "loss": 3.0033, "step": 90700 }, { "epoch": 0.76, "learning_rate": 7.247438142559092e-05, "loss": 3.0115, "step": 90800 }, { "epoch": 0.76, "learning_rate": 7.222301356548552e-05, "loss": 3.0075, "step": 90900 }, { "epoch": 0.76, "learning_rate": 7.197164570538011e-05, "loss": 3.0134, "step": 91000 }, { "epoch": 0.76, "eval_accuracy": 0.43423554090332145, "eval_loss": 3.0285885334014893, "eval_runtime": 36.591, "eval_samples_per_second": 306.523, "eval_steps_per_second": 2.569, "step": 91000 }, { "epoch": 0.76, "learning_rate": 7.172279152387576e-05, "loss": 3.019, "step": 91100 }, { "epoch": 0.76, "learning_rate": 7.147142366377035e-05, "loss": 3.0166, "step": 91200 }, { "epoch": 0.76, "learning_rate": 7.122005580366494e-05, "loss": 3.0114, "step": 91300 }, { "epoch": 0.76, "learning_rate": 7.097120162216059e-05, "loss": 3.015, "step": 91400 }, { "epoch": 0.77, "learning_rate": 7.071983376205518e-05, "loss": 3.0123, "step": 91500 }, { "epoch": 0.77, "learning_rate": 7.046846590194977e-05, "loss": 3.007, "step": 91600 }, { "epoch": 0.77, "learning_rate": 7.021709804184436e-05, "loss": 3.005, "step": 91700 }, { "epoch": 0.77, "learning_rate": 6.996573018173895e-05, "loss": 3.0122, "step": 91800 }, { "epoch": 0.77, "learning_rate": 6.971436232163356e-05, "loss": 3.0069, "step": 91900 }, { "epoch": 0.77, "learning_rate": 6.946299446152815e-05, "loss": 3.0136, "step": 92000 }, { "epoch": 0.77, "eval_accuracy": 0.43437735181461806, "eval_loss": 3.0268590450286865, "eval_runtime": 36.7262, "eval_samples_per_second": 305.395, "eval_steps_per_second": 2.559, "step": 92000 }, { "epoch": 0.77, "learning_rate": 6.921162660142274e-05, "loss": 3.0063, "step": 92100 }, { "epoch": 0.77, "learning_rate": 6.896025874131733e-05, "loss": 3.007, "step": 92200 }, { "epoch": 0.77, "learning_rate": 6.870889088121192e-05, "loss": 3.0132, "step": 92300 }, { "epoch": 0.77, "learning_rate": 6.845752302110651e-05, "loss": 3.0145, "step": 92400 }, { "epoch": 0.77, "learning_rate": 6.82061551610011e-05, "loss": 3.0116, "step": 92500 }, { "epoch": 0.77, "learning_rate": 6.79547873008957e-05, "loss": 3.0138, "step": 92600 }, { "epoch": 0.78, "learning_rate": 6.77034194407903e-05, "loss": 3.0075, "step": 92700 }, { "epoch": 0.78, "learning_rate": 6.745205158068489e-05, "loss": 3.0098, "step": 92800 }, { "epoch": 0.78, "learning_rate": 6.720068372057948e-05, "loss": 3.0058, "step": 92900 }, { "epoch": 0.78, "learning_rate": 6.694931586047407e-05, "loss": 3.0043, "step": 93000 }, { "epoch": 0.78, "eval_accuracy": 0.43468133262942704, "eval_loss": 3.0255324840545654, "eval_runtime": 36.1761, "eval_samples_per_second": 310.039, "eval_steps_per_second": 2.598, "step": 93000 }, { "epoch": 0.78, "learning_rate": 6.669794800036866e-05, "loss": 3.0167, "step": 93100 }, { "epoch": 0.78, "learning_rate": 6.644658014026327e-05, "loss": 3.0077, "step": 93200 }, { "epoch": 0.78, "learning_rate": 6.619521228015785e-05, "loss": 3.0087, "step": 93300 }, { "epoch": 0.78, "learning_rate": 6.594384442005244e-05, "loss": 3.0137, "step": 93400 }, { "epoch": 0.78, "learning_rate": 6.569499023854809e-05, "loss": 3.015, "step": 93500 }, { "epoch": 0.78, "learning_rate": 6.544362237844268e-05, "loss": 3.0046, "step": 93600 }, { "epoch": 0.78, "learning_rate": 6.519225451833728e-05, "loss": 3.0015, "step": 93700 }, { "epoch": 0.78, "learning_rate": 6.494088665823187e-05, "loss": 3.0074, "step": 93800 }, { "epoch": 0.79, "learning_rate": 6.468951879812646e-05, "loss": 3.0082, "step": 93900 }, { "epoch": 0.79, "learning_rate": 6.443815093802106e-05, "loss": 2.9995, "step": 94000 }, { "epoch": 0.79, "eval_accuracy": 0.43484701270401116, "eval_loss": 3.023953914642334, "eval_runtime": 36.328, "eval_samples_per_second": 308.742, "eval_steps_per_second": 2.588, "step": 94000 }, { "epoch": 0.79, "learning_rate": 6.418678307791566e-05, "loss": 3.0039, "step": 94100 }, { "epoch": 0.79, "learning_rate": 6.393541521781025e-05, "loss": 3.0095, "step": 94200 }, { "epoch": 0.79, "learning_rate": 6.368404735770483e-05, "loss": 3.0028, "step": 94300 }, { "epoch": 0.79, "learning_rate": 6.343267949759943e-05, "loss": 3.0082, "step": 94400 }, { "epoch": 0.79, "learning_rate": 6.318131163749403e-05, "loss": 3.0069, "step": 94500 }, { "epoch": 0.79, "learning_rate": 6.292994377738862e-05, "loss": 3.0004, "step": 94600 }, { "epoch": 0.79, "learning_rate": 6.267857591728321e-05, "loss": 3.0087, "step": 94700 }, { "epoch": 0.79, "learning_rate": 6.24272080571778e-05, "loss": 3.0062, "step": 94800 }, { "epoch": 0.79, "learning_rate": 6.21758401970724e-05, "loss": 3.0113, "step": 94900 }, { "epoch": 0.79, "learning_rate": 6.1924472336967e-05, "loss": 3.001, "step": 95000 }, { "epoch": 0.79, "eval_accuracy": 0.434945999528233, "eval_loss": 3.0230536460876465, "eval_runtime": 36.4523, "eval_samples_per_second": 307.69, "eval_steps_per_second": 2.579, "step": 95000 }, { "epoch": 0.8, "learning_rate": 6.167310447686157e-05, "loss": 3.0026, "step": 95100 }, { "epoch": 0.8, "learning_rate": 6.142173661675618e-05, "loss": 3.01, "step": 95200 }, { "epoch": 0.8, "learning_rate": 6.117036875665077e-05, "loss": 3.0073, "step": 95300 }, { "epoch": 0.8, "learning_rate": 6.091900089654536e-05, "loss": 3.0101, "step": 95400 }, { "epoch": 0.8, "learning_rate": 6.067014671504101e-05, "loss": 3.0013, "step": 95500 }, { "epoch": 0.8, "learning_rate": 6.04187788549356e-05, "loss": 3.004, "step": 95600 }, { "epoch": 0.8, "learning_rate": 6.01674109948302e-05, "loss": 3.0042, "step": 95700 }, { "epoch": 0.8, "learning_rate": 5.991604313472479e-05, "loss": 3.0081, "step": 95800 }, { "epoch": 0.8, "learning_rate": 5.966467527461938e-05, "loss": 3.0048, "step": 95900 }, { "epoch": 0.8, "learning_rate": 5.941582109311503e-05, "loss": 3.007, "step": 96000 }, { "epoch": 0.8, "eval_accuracy": 0.4351959237085379, "eval_loss": 3.02174973487854, "eval_runtime": 36.367, "eval_samples_per_second": 308.412, "eval_steps_per_second": 2.585, "step": 96000 }, { "epoch": 0.8, "learning_rate": 5.9164453233009627e-05, "loss": 3.006, "step": 96100 }, { "epoch": 0.8, "learning_rate": 5.891308537290422e-05, "loss": 3.0025, "step": 96200 }, { "epoch": 0.81, "learning_rate": 5.866171751279881e-05, "loss": 3.006, "step": 96300 }, { "epoch": 0.81, "learning_rate": 5.841034965269341e-05, "loss": 2.9956, "step": 96400 }, { "epoch": 0.81, "learning_rate": 5.815898179258799e-05, "loss": 2.9968, "step": 96500 }, { "epoch": 0.81, "learning_rate": 5.790761393248258e-05, "loss": 3.0023, "step": 96600 }, { "epoch": 0.81, "learning_rate": 5.765624607237718e-05, "loss": 3.0014, "step": 96700 }, { "epoch": 0.81, "learning_rate": 5.740487821227177e-05, "loss": 2.9961, "step": 96800 }, { "epoch": 0.81, "learning_rate": 5.715351035216637e-05, "loss": 3.0024, "step": 96900 }, { "epoch": 0.81, "learning_rate": 5.690214249206096e-05, "loss": 3.0035, "step": 97000 }, { "epoch": 0.81, "eval_accuracy": 0.43532720410661935, "eval_loss": 3.02020263671875, "eval_runtime": 37.453, "eval_samples_per_second": 299.469, "eval_steps_per_second": 2.51, "step": 97000 }, { "epoch": 0.81, "learning_rate": 5.665328831055661e-05, "loss": 3.0032, "step": 97100 }, { "epoch": 0.81, "learning_rate": 5.64019204504512e-05, "loss": 2.9961, "step": 97200 }, { "epoch": 0.81, "learning_rate": 5.61505525903458e-05, "loss": 3.0048, "step": 97300 }, { "epoch": 0.81, "learning_rate": 5.589918473024039e-05, "loss": 2.9939, "step": 97400 }, { "epoch": 0.82, "learning_rate": 5.565033054873604e-05, "loss": 2.9995, "step": 97500 }, { "epoch": 0.82, "learning_rate": 5.539896268863063e-05, "loss": 3.0067, "step": 97600 }, { "epoch": 0.82, "learning_rate": 5.514759482852523e-05, "loss": 2.9924, "step": 97700 }, { "epoch": 0.82, "learning_rate": 5.489622696841981e-05, "loss": 2.9997, "step": 97800 }, { "epoch": 0.82, "learning_rate": 5.46448591083144e-05, "loss": 3.0077, "step": 97900 }, { "epoch": 0.82, "learning_rate": 5.4393491248209e-05, "loss": 2.9966, "step": 98000 }, { "epoch": 0.82, "eval_accuracy": 0.43553711233670683, "eval_loss": 3.019421100616455, "eval_runtime": 36.419, "eval_samples_per_second": 307.971, "eval_steps_per_second": 2.581, "step": 98000 }, { "epoch": 0.82, "learning_rate": 5.414212338810359e-05, "loss": 3.0027, "step": 98100 }, { "epoch": 0.82, "learning_rate": 5.3890755527998184e-05, "loss": 3.0008, "step": 98200 }, { "epoch": 0.82, "learning_rate": 5.363938766789278e-05, "loss": 3.0019, "step": 98300 }, { "epoch": 0.82, "learning_rate": 5.338801980778737e-05, "loss": 2.9993, "step": 98400 }, { "epoch": 0.82, "learning_rate": 5.313665194768197e-05, "loss": 3.0025, "step": 98500 }, { "epoch": 0.82, "learning_rate": 5.2885284087576555e-05, "loss": 2.9987, "step": 98600 }, { "epoch": 0.83, "learning_rate": 5.263391622747115e-05, "loss": 3.0054, "step": 98700 }, { "epoch": 0.83, "learning_rate": 5.2382548367365745e-05, "loss": 3.0064, "step": 98800 }, { "epoch": 0.83, "learning_rate": 5.2131180507260336e-05, "loss": 3.0096, "step": 98900 }, { "epoch": 0.83, "learning_rate": 5.1879812647154934e-05, "loss": 2.9881, "step": 99000 }, { "epoch": 0.83, "eval_accuracy": 0.4356613723926449, "eval_loss": 3.0177648067474365, "eval_runtime": 37.6095, "eval_samples_per_second": 298.223, "eval_steps_per_second": 2.499, "step": 99000 }, { "epoch": 0.83, "learning_rate": 5.1628444787049525e-05, "loss": 3.0002, "step": 99100 }, { "epoch": 0.83, "learning_rate": 5.1377076926944117e-05, "loss": 2.9966, "step": 99200 }, { "epoch": 0.83, "learning_rate": 5.1128222745439764e-05, "loss": 3.0012, "step": 99300 }, { "epoch": 0.83, "learning_rate": 5.0876854885334356e-05, "loss": 2.9964, "step": 99400 }, { "epoch": 0.83, "learning_rate": 5.0625487025228954e-05, "loss": 2.9981, "step": 99500 }, { "epoch": 0.83, "learning_rate": 5.037411916512354e-05, "loss": 2.9986, "step": 99600 }, { "epoch": 0.83, "learning_rate": 5.012275130501813e-05, "loss": 2.9981, "step": 99700 }, { "epoch": 0.83, "learning_rate": 4.987138344491273e-05, "loss": 3.0057, "step": 99800 }, { "epoch": 0.84, "learning_rate": 4.962001558480732e-05, "loss": 2.994, "step": 99900 }, { "epoch": 0.84, "learning_rate": 4.936864772470192e-05, "loss": 3.0028, "step": 100000 }, { "epoch": 0.84, "eval_accuracy": 0.43574631853258, "eval_loss": 3.0173962116241455, "eval_runtime": 36.2768, "eval_samples_per_second": 309.179, "eval_steps_per_second": 2.591, "step": 100000 }, { "epoch": 0.84, "learning_rate": 4.911727986459651e-05, "loss": 3.0028, "step": 100100 }, { "epoch": 0.84, "learning_rate": 4.8865912004491106e-05, "loss": 2.9969, "step": 100200 }, { "epoch": 0.84, "learning_rate": 4.86145441443857e-05, "loss": 3.0029, "step": 100300 }, { "epoch": 0.84, "learning_rate": 4.836317628428029e-05, "loss": 3.0033, "step": 100400 }, { "epoch": 0.84, "learning_rate": 4.811180842417488e-05, "loss": 2.9945, "step": 100500 }, { "epoch": 0.84, "learning_rate": 4.786044056406947e-05, "loss": 2.9985, "step": 100600 }, { "epoch": 0.84, "learning_rate": 4.760907270396406e-05, "loss": 2.9952, "step": 100700 }, { "epoch": 0.84, "learning_rate": 4.735770484385866e-05, "loss": 2.9859, "step": 100800 }, { "epoch": 0.84, "learning_rate": 4.710633698375325e-05, "loss": 2.9951, "step": 100900 }, { "epoch": 0.84, "learning_rate": 4.685496912364785e-05, "loss": 2.9933, "step": 101000 }, { "epoch": 0.84, "eval_accuracy": 0.4362117672166871, "eval_loss": 3.01594614982605, "eval_runtime": 36.0518, "eval_samples_per_second": 311.108, "eval_steps_per_second": 2.607, "step": 101000 }, { "epoch": 0.85, "learning_rate": 4.660360126354244e-05, "loss": 2.9979, "step": 101100 }, { "epoch": 0.85, "learning_rate": 4.635223340343704e-05, "loss": 2.9961, "step": 101200 }, { "epoch": 0.85, "learning_rate": 4.6100865543331624e-05, "loss": 3.0076, "step": 101300 }, { "epoch": 0.85, "learning_rate": 4.5849497683226215e-05, "loss": 3.0, "step": 101400 }, { "epoch": 0.85, "learning_rate": 4.559812982312081e-05, "loss": 2.9964, "step": 101500 }, { "epoch": 0.85, "learning_rate": 4.5346761963015404e-05, "loss": 2.9951, "step": 101600 }, { "epoch": 0.85, "learning_rate": 4.5095394102909996e-05, "loss": 2.9964, "step": 101700 }, { "epoch": 0.85, "learning_rate": 4.4844026242804594e-05, "loss": 3.0034, "step": 101800 }, { "epoch": 0.85, "learning_rate": 4.4592658382699185e-05, "loss": 2.994, "step": 101900 }, { "epoch": 0.85, "learning_rate": 4.434129052259378e-05, "loss": 3.0002, "step": 102000 }, { "epoch": 0.85, "eval_accuracy": 0.43605310748424636, "eval_loss": 3.01462721824646, "eval_runtime": 36.4761, "eval_samples_per_second": 307.489, "eval_steps_per_second": 2.577, "step": 102000 }, { "epoch": 0.85, "learning_rate": 4.408992266248837e-05, "loss": 2.9951, "step": 102100 }, { "epoch": 0.85, "learning_rate": 4.383855480238296e-05, "loss": 2.9959, "step": 102200 }, { "epoch": 0.86, "learning_rate": 4.358718694227756e-05, "loss": 2.9922, "step": 102300 }, { "epoch": 0.86, "learning_rate": 4.333581908217215e-05, "loss": 2.9888, "step": 102400 }, { "epoch": 0.86, "learning_rate": 4.3084451222066746e-05, "loss": 2.9951, "step": 102500 }, { "epoch": 0.86, "learning_rate": 4.283559704056239e-05, "loss": 2.994, "step": 102600 }, { "epoch": 0.86, "learning_rate": 4.2586742859058035e-05, "loss": 2.9969, "step": 102700 }, { "epoch": 0.86, "learning_rate": 4.2335374998952626e-05, "loss": 2.9959, "step": 102800 }, { "epoch": 0.86, "learning_rate": 4.2084007138847224e-05, "loss": 3.0063, "step": 102900 }, { "epoch": 0.86, "learning_rate": 4.1832639278741816e-05, "loss": 2.9901, "step": 103000 }, { "epoch": 0.86, "eval_accuracy": 0.4364076347624878, "eval_loss": 3.0131843090057373, "eval_runtime": 36.6938, "eval_samples_per_second": 305.665, "eval_steps_per_second": 2.562, "step": 103000 }, { "epoch": 0.86, "learning_rate": 4.1581271418636414e-05, "loss": 2.996, "step": 103100 }, { "epoch": 0.86, "learning_rate": 4.1329903558531005e-05, "loss": 2.9928, "step": 103200 }, { "epoch": 0.86, "learning_rate": 4.1078535698425596e-05, "loss": 2.9981, "step": 103300 }, { "epoch": 0.86, "learning_rate": 4.082716783832019e-05, "loss": 2.9999, "step": 103400 }, { "epoch": 0.87, "learning_rate": 4.057579997821478e-05, "loss": 2.9879, "step": 103500 }, { "epoch": 0.87, "learning_rate": 4.032443211810937e-05, "loss": 2.9927, "step": 103600 }, { "epoch": 0.87, "learning_rate": 4.007306425800397e-05, "loss": 2.997, "step": 103700 }, { "epoch": 0.87, "learning_rate": 3.982169639789856e-05, "loss": 2.9899, "step": 103800 }, { "epoch": 0.87, "learning_rate": 3.957032853779316e-05, "loss": 3.0014, "step": 103900 }, { "epoch": 0.87, "learning_rate": 3.931896067768775e-05, "loss": 2.9895, "step": 104000 }, { "epoch": 0.87, "eval_accuracy": 0.4363837655992002, "eval_loss": 3.012049674987793, "eval_runtime": 36.7749, "eval_samples_per_second": 304.99, "eval_steps_per_second": 2.556, "step": 104000 }, { "epoch": 0.87, "learning_rate": 3.906759281758235e-05, "loss": 3.0, "step": 104100 }, { "epoch": 0.87, "learning_rate": 3.881622495747693e-05, "loss": 2.9981, "step": 104200 }, { "epoch": 0.87, "learning_rate": 3.856485709737152e-05, "loss": 2.9975, "step": 104300 }, { "epoch": 0.87, "learning_rate": 3.831600291586718e-05, "loss": 3.0007, "step": 104400 }, { "epoch": 0.87, "learning_rate": 3.806463505576176e-05, "loss": 2.9958, "step": 104500 }, { "epoch": 0.87, "learning_rate": 3.781326719565636e-05, "loss": 2.9939, "step": 104600 }, { "epoch": 0.88, "learning_rate": 3.756189933555095e-05, "loss": 2.9936, "step": 104700 }, { "epoch": 0.88, "learning_rate": 3.731053147544555e-05, "loss": 3.0004, "step": 104800 }, { "epoch": 0.88, "learning_rate": 3.705916361534014e-05, "loss": 2.9945, "step": 104900 }, { "epoch": 0.88, "learning_rate": 3.680779575523473e-05, "loss": 2.9882, "step": 105000 }, { "epoch": 0.88, "eval_accuracy": 0.4366730036955081, "eval_loss": 3.0106048583984375, "eval_runtime": 36.7107, "eval_samples_per_second": 305.524, "eval_steps_per_second": 2.561, "step": 105000 }, { "epoch": 0.88, "learning_rate": 3.655642789512932e-05, "loss": 2.9919, "step": 105100 }, { "epoch": 0.88, "learning_rate": 3.630506003502392e-05, "loss": 2.9894, "step": 105200 }, { "epoch": 0.88, "learning_rate": 3.605369217491851e-05, "loss": 3.0005, "step": 105300 }, { "epoch": 0.88, "learning_rate": 3.580483799341416e-05, "loss": 2.9884, "step": 105400 }, { "epoch": 0.88, "learning_rate": 3.555347013330875e-05, "loss": 2.9865, "step": 105500 }, { "epoch": 0.88, "learning_rate": 3.530210227320335e-05, "loss": 2.9909, "step": 105600 }, { "epoch": 0.88, "learning_rate": 3.5050734413097934e-05, "loss": 2.9961, "step": 105700 }, { "epoch": 0.89, "learning_rate": 3.479936655299253e-05, "loss": 2.9905, "step": 105800 }, { "epoch": 0.89, "learning_rate": 3.454799869288712e-05, "loss": 2.9913, "step": 105900 }, { "epoch": 0.89, "learning_rate": 3.429914451138277e-05, "loss": 2.9866, "step": 106000 }, { "epoch": 0.89, "eval_accuracy": 0.4369524133128152, "eval_loss": 3.008857250213623, "eval_runtime": 36.004, "eval_samples_per_second": 311.521, "eval_steps_per_second": 2.611, "step": 106000 }, { "epoch": 0.89, "learning_rate": 3.404777665127736e-05, "loss": 2.9893, "step": 106100 }, { "epoch": 0.89, "learning_rate": 3.379640879117196e-05, "loss": 2.989, "step": 106200 }, { "epoch": 0.89, "learning_rate": 3.354504093106655e-05, "loss": 2.9886, "step": 106300 }, { "epoch": 0.89, "learning_rate": 3.329367307096114e-05, "loss": 2.9835, "step": 106400 }, { "epoch": 0.89, "learning_rate": 3.3042305210855734e-05, "loss": 2.9918, "step": 106500 }, { "epoch": 0.89, "learning_rate": 3.279093735075033e-05, "loss": 2.9855, "step": 106600 }, { "epoch": 0.89, "learning_rate": 3.2539569490644923e-05, "loss": 2.9895, "step": 106700 }, { "epoch": 0.89, "learning_rate": 3.229071530914057e-05, "loss": 2.9791, "step": 106800 }, { "epoch": 0.89, "learning_rate": 3.203934744903516e-05, "loss": 2.9955, "step": 106900 }, { "epoch": 0.9, "learning_rate": 3.178797958892976e-05, "loss": 2.9961, "step": 107000 }, { "epoch": 0.9, "eval_accuracy": 0.43725920226448156, "eval_loss": 3.007978677749634, "eval_runtime": 36.4349, "eval_samples_per_second": 307.837, "eval_steps_per_second": 2.58, "step": 107000 }, { "epoch": 0.9, "learning_rate": 3.153661172882435e-05, "loss": 2.9921, "step": 107100 }, { "epoch": 0.9, "learning_rate": 3.128524386871894e-05, "loss": 2.9937, "step": 107200 }, { "epoch": 0.9, "learning_rate": 3.1033876008613534e-05, "loss": 2.9894, "step": 107300 }, { "epoch": 0.9, "learning_rate": 3.078250814850813e-05, "loss": 2.9919, "step": 107400 }, { "epoch": 0.9, "learning_rate": 3.0531140288402724e-05, "loss": 2.9906, "step": 107500 }, { "epoch": 0.9, "learning_rate": 3.0279772428297315e-05, "loss": 2.9839, "step": 107600 }, { "epoch": 0.9, "learning_rate": 3.002840456819191e-05, "loss": 2.9871, "step": 107700 }, { "epoch": 0.9, "learning_rate": 2.9777036708086504e-05, "loss": 2.9891, "step": 107800 }, { "epoch": 0.9, "learning_rate": 2.9525668847981092e-05, "loss": 2.9898, "step": 107900 }, { "epoch": 0.9, "learning_rate": 2.9274300987875687e-05, "loss": 2.9876, "step": 108000 }, { "epoch": 0.9, "eval_accuracy": 0.4373946948678491, "eval_loss": 3.0067296028137207, "eval_runtime": 36.3734, "eval_samples_per_second": 308.357, "eval_steps_per_second": 2.584, "step": 108000 }, { "epoch": 0.9, "learning_rate": 2.902293312777028e-05, "loss": 2.9917, "step": 108100 }, { "epoch": 0.91, "learning_rate": 2.8771565267664876e-05, "loss": 2.9916, "step": 108200 }, { "epoch": 0.91, "learning_rate": 2.8520197407559464e-05, "loss": 2.9981, "step": 108300 }, { "epoch": 0.91, "learning_rate": 2.826882954745406e-05, "loss": 2.9817, "step": 108400 }, { "epoch": 0.91, "learning_rate": 2.8017461687348653e-05, "loss": 2.9875, "step": 108500 }, { "epoch": 0.91, "learning_rate": 2.7766093827243248e-05, "loss": 2.9904, "step": 108600 }, { "epoch": 0.91, "learning_rate": 2.751472596713784e-05, "loss": 2.9882, "step": 108700 }, { "epoch": 0.91, "learning_rate": 2.7265871785633487e-05, "loss": 2.9885, "step": 108800 }, { "epoch": 0.91, "learning_rate": 2.701450392552808e-05, "loss": 2.9898, "step": 108900 }, { "epoch": 0.91, "learning_rate": 2.6763136065422673e-05, "loss": 2.9873, "step": 109000 }, { "epoch": 0.91, "eval_accuracy": 0.43755826883979015, "eval_loss": 3.0054852962493896, "eval_runtime": 36.8956, "eval_samples_per_second": 303.993, "eval_steps_per_second": 2.548, "step": 109000 }, { "epoch": 0.91, "learning_rate": 2.6511768205317264e-05, "loss": 2.9921, "step": 109100 }, { "epoch": 0.91, "learning_rate": 2.626040034521186e-05, "loss": 2.9863, "step": 109200 }, { "epoch": 0.91, "learning_rate": 2.600903248510645e-05, "loss": 2.9902, "step": 109300 }, { "epoch": 0.92, "learning_rate": 2.5757664625001045e-05, "loss": 2.9823, "step": 109400 }, { "epoch": 0.92, "learning_rate": 2.5508810443496693e-05, "loss": 2.9978, "step": 109500 }, { "epoch": 0.92, "learning_rate": 2.5257442583391284e-05, "loss": 2.9859, "step": 109600 }, { "epoch": 0.92, "learning_rate": 2.500607472328588e-05, "loss": 2.9821, "step": 109700 }, { "epoch": 0.92, "learning_rate": 2.4754706863180473e-05, "loss": 2.9932, "step": 109800 }, { "epoch": 0.92, "learning_rate": 2.4503339003075065e-05, "loss": 2.9906, "step": 109900 }, { "epoch": 0.92, "learning_rate": 2.4251971142969656e-05, "loss": 2.9891, "step": 110000 }, { "epoch": 0.92, "eval_accuracy": 0.4375182528895728, "eval_loss": 3.004079580307007, "eval_runtime": 36.2219, "eval_samples_per_second": 309.647, "eval_steps_per_second": 2.595, "step": 110000 }, { "epoch": 0.92, "learning_rate": 2.400060328286425e-05, "loss": 2.9875, "step": 110100 }, { "epoch": 0.92, "learning_rate": 2.3749235422758845e-05, "loss": 2.9859, "step": 110200 }, { "epoch": 0.92, "learning_rate": 2.349786756265344e-05, "loss": 2.9865, "step": 110300 }, { "epoch": 0.92, "learning_rate": 2.3246499702548028e-05, "loss": 2.994, "step": 110400 }, { "epoch": 0.92, "learning_rate": 2.2995131842442623e-05, "loss": 2.9817, "step": 110500 }, { "epoch": 0.93, "learning_rate": 2.2743763982337217e-05, "loss": 2.9915, "step": 110600 }, { "epoch": 0.93, "learning_rate": 2.2492396122231812e-05, "loss": 2.9927, "step": 110700 }, { "epoch": 0.93, "learning_rate": 2.22410282621264e-05, "loss": 2.9908, "step": 110800 }, { "epoch": 0.93, "learning_rate": 2.1989660402020994e-05, "loss": 2.9897, "step": 110900 }, { "epoch": 0.93, "learning_rate": 2.173829254191559e-05, "loss": 2.9835, "step": 111000 }, { "epoch": 0.93, "eval_accuracy": 0.4377632628303773, "eval_loss": 3.0032153129577637, "eval_runtime": 36.5662, "eval_samples_per_second": 306.731, "eval_steps_per_second": 2.571, "step": 111000 }, { "epoch": 0.93, "learning_rate": 2.1486924681810184e-05, "loss": 2.9787, "step": 111100 }, { "epoch": 0.93, "learning_rate": 2.123555682170477e-05, "loss": 2.9831, "step": 111200 }, { "epoch": 0.93, "learning_rate": 2.0984188961599366e-05, "loss": 2.9913, "step": 111300 }, { "epoch": 0.93, "learning_rate": 2.073282110149396e-05, "loss": 2.9904, "step": 111400 }, { "epoch": 0.93, "learning_rate": 2.0481453241388556e-05, "loss": 2.9842, "step": 111500 }, { "epoch": 0.93, "learning_rate": 2.0230085381283147e-05, "loss": 2.987, "step": 111600 }, { "epoch": 0.93, "learning_rate": 1.9978717521177738e-05, "loss": 2.9868, "step": 111700 }, { "epoch": 0.94, "learning_rate": 1.9727349661072333e-05, "loss": 2.9887, "step": 111800 }, { "epoch": 0.94, "learning_rate": 1.9475981800966928e-05, "loss": 2.9844, "step": 111900 }, { "epoch": 0.94, "learning_rate": 1.922461394086152e-05, "loss": 2.9887, "step": 112000 }, { "epoch": 0.94, "eval_accuracy": 0.4380391622766127, "eval_loss": 3.0022435188293457, "eval_runtime": 36.4456, "eval_samples_per_second": 307.746, "eval_steps_per_second": 2.579, "step": 112000 }, { "epoch": 0.94, "learning_rate": 1.8973246080756113e-05, "loss": 2.9792, "step": 112100 }, { "epoch": 0.94, "learning_rate": 1.8721878220650705e-05, "loss": 2.9813, "step": 112200 }, { "epoch": 0.94, "learning_rate": 1.84705103605453e-05, "loss": 2.9852, "step": 112300 }, { "epoch": 0.94, "learning_rate": 1.821914250043989e-05, "loss": 2.9927, "step": 112400 }, { "epoch": 0.94, "learning_rate": 1.797028831893554e-05, "loss": 2.9869, "step": 112500 }, { "epoch": 0.94, "learning_rate": 1.7718920458830133e-05, "loss": 2.9798, "step": 112600 }, { "epoch": 0.94, "learning_rate": 1.7467552598724724e-05, "loss": 2.982, "step": 112700 }, { "epoch": 0.94, "learning_rate": 1.721618473861932e-05, "loss": 2.9787, "step": 112800 }, { "epoch": 0.94, "learning_rate": 1.6964816878513914e-05, "loss": 2.9891, "step": 112900 }, { "epoch": 0.95, "learning_rate": 1.671596269700956e-05, "loss": 2.9876, "step": 113000 }, { "epoch": 0.95, "eval_accuracy": 0.43829610679906095, "eval_loss": 3.0009684562683105, "eval_runtime": 37.5779, "eval_samples_per_second": 298.473, "eval_steps_per_second": 2.501, "step": 113000 }, { "epoch": 0.95, "learning_rate": 1.6464594836904153e-05, "loss": 2.9809, "step": 113100 }, { "epoch": 0.95, "learning_rate": 1.6213226976798747e-05, "loss": 2.9933, "step": 113200 }, { "epoch": 0.95, "learning_rate": 1.596185911669334e-05, "loss": 2.9868, "step": 113300 }, { "epoch": 0.95, "learning_rate": 1.5710491256587933e-05, "loss": 2.9867, "step": 113400 }, { "epoch": 0.95, "learning_rate": 1.5459123396482525e-05, "loss": 2.9831, "step": 113500 }, { "epoch": 0.95, "learning_rate": 1.5207755536377118e-05, "loss": 2.9857, "step": 113600 }, { "epoch": 0.95, "learning_rate": 1.495638767627171e-05, "loss": 2.9861, "step": 113700 }, { "epoch": 0.95, "learning_rate": 1.4705019816166305e-05, "loss": 2.9797, "step": 113800 }, { "epoch": 0.95, "learning_rate": 1.4453651956060897e-05, "loss": 2.9819, "step": 113900 }, { "epoch": 0.95, "learning_rate": 1.4202284095955491e-05, "loss": 2.9818, "step": 114000 }, { "epoch": 0.95, "eval_accuracy": 0.4384379177103575, "eval_loss": 2.9998745918273926, "eval_runtime": 36.4806, "eval_samples_per_second": 307.451, "eval_steps_per_second": 2.577, "step": 114000 }, { "epoch": 0.95, "learning_rate": 1.3950916235850083e-05, "loss": 2.9861, "step": 114100 }, { "epoch": 0.96, "learning_rate": 1.3699548375744677e-05, "loss": 2.9859, "step": 114200 }, { "epoch": 0.96, "learning_rate": 1.3448180515639268e-05, "loss": 2.9864, "step": 114300 }, { "epoch": 0.96, "learning_rate": 1.3196812655533863e-05, "loss": 2.9818, "step": 114400 }, { "epoch": 0.96, "learning_rate": 1.2945444795428454e-05, "loss": 2.9732, "step": 114500 }, { "epoch": 0.96, "learning_rate": 1.2694076935323049e-05, "loss": 2.9859, "step": 114600 }, { "epoch": 0.96, "learning_rate": 1.244270907521764e-05, "loss": 2.9828, "step": 114700 }, { "epoch": 0.96, "learning_rate": 1.2191341215112235e-05, "loss": 2.9837, "step": 114800 }, { "epoch": 0.96, "learning_rate": 1.1939973355006828e-05, "loss": 2.9748, "step": 114900 }, { "epoch": 0.96, "learning_rate": 1.1688605494901421e-05, "loss": 2.9797, "step": 115000 }, { "epoch": 0.96, "eval_accuracy": 0.43843651364192887, "eval_loss": 2.999021291732788, "eval_runtime": 36.1681, "eval_samples_per_second": 310.108, "eval_steps_per_second": 2.599, "step": 115000 }, { "epoch": 0.96, "learning_rate": 1.1437237634796014e-05, "loss": 2.9813, "step": 115100 }, { "epoch": 0.96, "learning_rate": 1.1185869774690607e-05, "loss": 2.978, "step": 115200 }, { "epoch": 0.96, "learning_rate": 1.09345019145852e-05, "loss": 2.9886, "step": 115300 }, { "epoch": 0.97, "learning_rate": 1.0683134054479795e-05, "loss": 2.9744, "step": 115400 }, { "epoch": 0.97, "learning_rate": 1.0431766194374386e-05, "loss": 2.9804, "step": 115500 }, { "epoch": 0.97, "learning_rate": 1.0182912012870034e-05, "loss": 2.984, "step": 115600 }, { "epoch": 0.97, "learning_rate": 9.931544152764628e-06, "loss": 2.985, "step": 115700 }, { "epoch": 0.97, "learning_rate": 9.68017629265922e-06, "loss": 2.9843, "step": 115800 }, { "epoch": 0.97, "learning_rate": 9.428808432553814e-06, "loss": 2.9809, "step": 115900 }, { "epoch": 0.97, "learning_rate": 9.177440572448405e-06, "loss": 2.9842, "step": 116000 }, { "epoch": 0.97, "eval_accuracy": 0.43876225751738235, "eval_loss": 2.9980885982513428, "eval_runtime": 36.1964, "eval_samples_per_second": 309.865, "eval_steps_per_second": 2.597, "step": 116000 }, { "epoch": 0.97, "learning_rate": 8.926072712342998e-06, "loss": 2.9702, "step": 116100 }, { "epoch": 0.97, "learning_rate": 8.674704852237591e-06, "loss": 2.9799, "step": 116200 }, { "epoch": 0.97, "learning_rate": 8.423336992132184e-06, "loss": 2.9825, "step": 116300 }, { "epoch": 0.97, "learning_rate": 8.171969132026777e-06, "loss": 2.9726, "step": 116400 }, { "epoch": 0.97, "learning_rate": 7.920601271921372e-06, "loss": 2.9788, "step": 116500 }, { "epoch": 0.98, "learning_rate": 7.669233411815965e-06, "loss": 2.988, "step": 116600 }, { "epoch": 0.98, "learning_rate": 7.417865551710557e-06, "loss": 2.9795, "step": 116700 }, { "epoch": 0.98, "learning_rate": 7.16649769160515e-06, "loss": 2.9797, "step": 116800 }, { "epoch": 0.98, "learning_rate": 6.915129831499744e-06, "loss": 2.9735, "step": 116900 }, { "epoch": 0.98, "learning_rate": 6.663761971394337e-06, "loss": 2.9739, "step": 117000 }, { "epoch": 0.98, "eval_accuracy": 0.43866397272737484, "eval_loss": 2.9972493648529053, "eval_runtime": 36.7078, "eval_samples_per_second": 305.548, "eval_steps_per_second": 2.561, "step": 117000 }, { "epoch": 0.98, "learning_rate": 6.41239411128893e-06, "loss": 2.9765, "step": 117100 }, { "epoch": 0.98, "learning_rate": 6.161026251183523e-06, "loss": 2.9859, "step": 117200 }, { "epoch": 0.98, "learning_rate": 5.909658391078116e-06, "loss": 2.9897, "step": 117300 }, { "epoch": 0.98, "learning_rate": 5.658290530972709e-06, "loss": 2.9855, "step": 117400 }, { "epoch": 0.98, "learning_rate": 5.406922670867302e-06, "loss": 2.9747, "step": 117500 }, { "epoch": 0.98, "learning_rate": 5.155554810761895e-06, "loss": 2.9732, "step": 117600 }, { "epoch": 0.98, "learning_rate": 4.9041869506564885e-06, "loss": 2.9796, "step": 117700 }, { "epoch": 0.99, "learning_rate": 4.6528190905510815e-06, "loss": 2.9782, "step": 117800 }, { "epoch": 0.99, "learning_rate": 4.4014512304456745e-06, "loss": 2.9841, "step": 117900 }, { "epoch": 0.99, "learning_rate": 4.150083370340268e-06, "loss": 2.9804, "step": 118000 }, { "epoch": 0.99, "eval_accuracy": 0.43883737517831667, "eval_loss": 2.9965155124664307, "eval_runtime": 36.1347, "eval_samples_per_second": 310.394, "eval_steps_per_second": 2.601, "step": 118000 }, { "epoch": 0.99, "learning_rate": 3.898715510234861e-06, "loss": 2.9836, "step": 118100 }, { "epoch": 0.99, "learning_rate": 3.6473476501294542e-06, "loss": 2.9815, "step": 118200 }, { "epoch": 0.99, "learning_rate": 3.398493468625101e-06, "loss": 2.9744, "step": 118300 }, { "epoch": 0.99, "learning_rate": 3.147125608519694e-06, "loss": 2.9847, "step": 118400 }, { "epoch": 0.99, "learning_rate": 2.8957577484142875e-06, "loss": 2.9733, "step": 118500 }, { "epoch": 0.99, "learning_rate": 2.6469035669099345e-06, "loss": 2.9766, "step": 118600 }, { "epoch": 0.99, "learning_rate": 2.395535706804528e-06, "loss": 2.9802, "step": 118700 }, { "epoch": 0.99, "learning_rate": 2.144167846699121e-06, "loss": 2.9757, "step": 118800 }, { "epoch": 0.99, "learning_rate": 1.8927999865937138e-06, "loss": 2.9775, "step": 118900 }, { "epoch": 1.0, "learning_rate": 1.641432126488307e-06, "loss": 2.9828, "step": 119000 }, { "epoch": 1.0, "eval_accuracy": 0.43901218169768724, "eval_loss": 2.995953321456909, "eval_runtime": 36.3994, "eval_samples_per_second": 308.137, "eval_steps_per_second": 2.582, "step": 119000 }, { "epoch": 1.0, "learning_rate": 1.3900642663829e-06, "loss": 2.9783, "step": 119100 }, { "epoch": 1.0, "learning_rate": 1.1386964062774932e-06, "loss": 2.9723, "step": 119200 }, { "epoch": 1.0, "learning_rate": 8.873285461720863e-07, "loss": 2.9817, "step": 119300 }, { "epoch": 1.0, "learning_rate": 6.359606860666794e-07, "loss": 2.9792, "step": 119400 }, { "epoch": 1.0, "learning_rate": 3.8459282596127255e-07, "loss": 2.982, "step": 119500 }, { "epoch": 1.0, "step": 119547, "total_flos": 1.455921831670228e+20, "train_loss": 3.081914688561298, "train_runtime": 169290.0352, "train_samples_per_second": 169.48, "train_steps_per_second": 0.706 } ], "max_steps": 119547, "num_train_epochs": 1, "total_flos": 1.455921831670228e+20, "trial_name": null, "trial_params": null }