| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "global_step": 119547, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00024811801548585953, | |
| "loss": 8.8316, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.00029403430324938403, | |
| "loss": 5.464, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0002998014193905167, | |
| "loss": 4.4569, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0002995500515304113, | |
| "loss": 4.0926, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 0.0002992986836703059, | |
| "loss": 3.9091, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029904731581020046, | |
| "loss": 3.7998, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002987959479500951, | |
| "loss": 3.7103, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002985445800899897, | |
| "loss": 3.663, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029829321222988426, | |
| "loss": 3.6162, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002980418443697789, | |
| "loss": 3.5725, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "eval_accuracy": 0.37403400092106887, | |
| "eval_loss": 3.5909957885742188, | |
| "eval_runtime": 37.0746, | |
| "eval_samples_per_second": 302.526, | |
| "eval_steps_per_second": 2.535, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029779047650967344, | |
| "loss": 3.5506, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029753910864956806, | |
| "loss": 3.5285, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002972877407894626, | |
| "loss": 3.5064, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029703637292935724, | |
| "loss": 3.4907, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002967875187478529, | |
| "loss": 3.4708, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.00029653615088774747, | |
| "loss": 3.456, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0002962847830276421, | |
| "loss": 3.4413, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029603341516753665, | |
| "loss": 3.4224, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029578204730743127, | |
| "loss": 3.4184, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029553067944732583, | |
| "loss": 3.4011, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.39113344827973534, | |
| "eval_loss": 3.4203457832336426, | |
| "eval_runtime": 36.9935, | |
| "eval_samples_per_second": 303.189, | |
| "eval_steps_per_second": 2.541, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029527931158722045, | |
| "loss": 3.3915, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000295027943727115, | |
| "loss": 3.3831, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029477657586700963, | |
| "loss": 3.3814, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002945252080069042, | |
| "loss": 3.3734, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029427384014679876, | |
| "loss": 3.3624, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002940224722866934, | |
| "loss": 3.3559, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.000293771104426588, | |
| "loss": 3.3459, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.00029351973656648256, | |
| "loss": 3.3462, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 0.0002932683687063772, | |
| "loss": 3.3306, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002930170008462718, | |
| "loss": 3.335, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.39839248205600547, | |
| "eval_loss": 3.3489201068878174, | |
| "eval_runtime": 36.3401, | |
| "eval_samples_per_second": 308.64, | |
| "eval_steps_per_second": 2.587, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029276563298616636, | |
| "loss": 3.3239, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.000292514265126061, | |
| "loss": 3.3132, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029226289726595555, | |
| "loss": 3.3157, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029201152940585017, | |
| "loss": 3.3077, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029176016154574473, | |
| "loss": 3.308, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029150879368563935, | |
| "loss": 3.2971, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002912574258255339, | |
| "loss": 3.2953, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029100605796542853, | |
| "loss": 3.2915, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002907572037839242, | |
| "loss": 3.289, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00029050583592381876, | |
| "loss": 3.2835, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 0.4028412728722747, | |
| "eval_loss": 3.306710958480835, | |
| "eval_runtime": 37.7903, | |
| "eval_samples_per_second": 296.796, | |
| "eval_steps_per_second": 2.487, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0002902544680637134, | |
| "loss": 3.2759, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00029000310020360794, | |
| "loss": 3.2803, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002897517323435025, | |
| "loss": 3.2752, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002895003644833971, | |
| "loss": 3.28, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002892489966232917, | |
| "loss": 3.2788, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002889976287631863, | |
| "loss": 3.2663, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002887462609030809, | |
| "loss": 3.2647, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002884948930429755, | |
| "loss": 3.2643, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002882435251828701, | |
| "loss": 3.2686, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028799215732276467, | |
| "loss": 3.2477, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.4059681332629427, | |
| "eval_loss": 3.2766220569610596, | |
| "eval_runtime": 36.7229, | |
| "eval_samples_per_second": 305.423, | |
| "eval_steps_per_second": 2.56, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002877407894626593, | |
| "loss": 3.247, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002874894216025539, | |
| "loss": 3.2555, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00028723805374244847, | |
| "loss": 3.2528, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002869866858823431, | |
| "loss": 3.2441, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028673531802223765, | |
| "loss": 3.2458, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028648395016213227, | |
| "loss": 3.2394, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028623258230202683, | |
| "loss": 3.2464, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028598121444192145, | |
| "loss": 3.2484, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.000285729846581816, | |
| "loss": 3.2372, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002854784787217106, | |
| "loss": 3.2373, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.40810372134296335, | |
| "eval_loss": 3.256094455718994, | |
| "eval_runtime": 36.3187, | |
| "eval_samples_per_second": 308.822, | |
| "eval_steps_per_second": 2.588, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002852271108616052, | |
| "loss": 3.2314, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.00028497825668010086, | |
| "loss": 3.2393, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002847268888199954, | |
| "loss": 3.23, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002844780346384911, | |
| "loss": 3.2317, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 0.0002842266667783857, | |
| "loss": 3.2188, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028397529891828027, | |
| "loss": 3.2251, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002837239310581749, | |
| "loss": 3.2235, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028347256319806945, | |
| "loss": 3.2174, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028322119533796407, | |
| "loss": 3.2212, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028296982747785863, | |
| "loss": 3.2208, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_accuracy": 0.4099135655475305, | |
| "eval_loss": 3.2382774353027344, | |
| "eval_runtime": 36.4193, | |
| "eval_samples_per_second": 307.969, | |
| "eval_steps_per_second": 2.581, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028271845961775325, | |
| "loss": 3.215, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002824670917576478, | |
| "loss": 3.2124, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028221572389754244, | |
| "loss": 3.2214, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.000281964356037437, | |
| "loss": 3.2157, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002817129881773316, | |
| "loss": 3.212, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00028146162031722624, | |
| "loss": 3.2063, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0002812102524571208, | |
| "loss": 3.2089, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002809588845970154, | |
| "loss": 3.2056, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028070751673691, | |
| "loss": 3.206, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002804561488768046, | |
| "loss": 3.2021, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.4112453244521325, | |
| "eval_loss": 3.2249624729156494, | |
| "eval_runtime": 37.3966, | |
| "eval_samples_per_second": 299.92, | |
| "eval_steps_per_second": 2.514, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00028020478101669917, | |
| "loss": 3.2098, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002799534131565938, | |
| "loss": 3.2099, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027970204529648835, | |
| "loss": 3.2075, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027945067743638297, | |
| "loss": 3.205, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027920182325487863, | |
| "loss": 3.1931, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002789504553947732, | |
| "loss": 3.1969, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002786990875346678, | |
| "loss": 3.1974, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.0002784502333531634, | |
| "loss": 3.1958, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00027819886549305804, | |
| "loss": 3.1925, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027794749763295265, | |
| "loss": 3.194, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.41224642524178057, | |
| "eval_loss": 3.2142982482910156, | |
| "eval_runtime": 37.0575, | |
| "eval_samples_per_second": 302.665, | |
| "eval_steps_per_second": 2.537, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002776961297728472, | |
| "loss": 3.1941, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027744476191274184, | |
| "loss": 3.1943, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002771933940526364, | |
| "loss": 3.197, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000276942026192531, | |
| "loss": 3.1912, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002766906583324256, | |
| "loss": 3.1941, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002764392904723202, | |
| "loss": 3.1904, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027618792261221477, | |
| "loss": 3.1807, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027593906843071043, | |
| "loss": 3.1854, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027568770057060505, | |
| "loss": 3.1859, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.0002754363327104996, | |
| "loss": 3.1971, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.413248228065643, | |
| "eval_loss": 3.2038817405700684, | |
| "eval_runtime": 36.9865, | |
| "eval_samples_per_second": 303.246, | |
| "eval_steps_per_second": 2.541, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.00027518496485039423, | |
| "loss": 3.1776, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002749335969902888, | |
| "loss": 3.1872, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002746822291301834, | |
| "loss": 3.1792, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.000274430861270078, | |
| "loss": 3.1858, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027417949340997254, | |
| "loss": 3.1825, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027392812554986716, | |
| "loss": 3.1798, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002736767576897618, | |
| "loss": 3.1819, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027342538982965634, | |
| "loss": 3.1778, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027317402196955096, | |
| "loss": 3.185, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.0002729226541094455, | |
| "loss": 3.1794, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.41429074887393713, | |
| "eval_loss": 3.1947903633117676, | |
| "eval_runtime": 37.2827, | |
| "eval_samples_per_second": 300.837, | |
| "eval_steps_per_second": 2.521, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027267128624934014, | |
| "loss": 3.1782, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027242243206783575, | |
| "loss": 3.1752, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00027217106420773037, | |
| "loss": 3.172, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000271919696347625, | |
| "loss": 3.1794, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027166832848751955, | |
| "loss": 3.1773, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027141696062741417, | |
| "loss": 3.1776, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002711681064459098, | |
| "loss": 3.1866, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002709167385858044, | |
| "loss": 3.1707, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027066537072569896, | |
| "loss": 3.1705, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002704140028655936, | |
| "loss": 3.1731, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.41493381221427206, | |
| "eval_loss": 3.1884472370147705, | |
| "eval_runtime": 36.6321, | |
| "eval_samples_per_second": 306.18, | |
| "eval_steps_per_second": 2.566, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.00027016514868408924, | |
| "loss": 3.1688, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002699137808239838, | |
| "loss": 3.1698, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002696624129638784, | |
| "loss": 3.1661, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.000269411045103773, | |
| "loss": 3.163, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0002691596772436676, | |
| "loss": 3.166, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026890830938356217, | |
| "loss": 3.1684, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002686569415234568, | |
| "loss": 3.1665, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002684055736633514, | |
| "loss": 3.1623, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026815420580324597, | |
| "loss": 3.1674, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002679028379431406, | |
| "loss": 3.1596, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 0.41567656441304324, | |
| "eval_loss": 3.181196928024292, | |
| "eval_runtime": 38.8685, | |
| "eval_samples_per_second": 288.563, | |
| "eval_steps_per_second": 2.418, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026765147008303515, | |
| "loss": 3.1659, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026740010222292977, | |
| "loss": 3.1528, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026714873436282433, | |
| "loss": 3.1656, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026689736650271895, | |
| "loss": 3.1594, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002666459986426135, | |
| "loss": 3.1593, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.00026639463078250813, | |
| "loss": 3.1579, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 0.0002661432629224027, | |
| "loss": 3.1599, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002658918950622973, | |
| "loss": 3.1529, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002656405272021919, | |
| "loss": 3.1615, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002653891593420865, | |
| "loss": 3.1628, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.41594754961977826, | |
| "eval_loss": 3.1771674156188965, | |
| "eval_runtime": 37.2285, | |
| "eval_samples_per_second": 301.275, | |
| "eval_steps_per_second": 2.525, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026513779148198106, | |
| "loss": 3.1594, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002648864236218757, | |
| "loss": 3.158, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002646400831189724, | |
| "loss": 3.1588, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.000264388715258867, | |
| "loss": 3.1606, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026413734739876157, | |
| "loss": 3.1555, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0002638859795386562, | |
| "loss": 3.1574, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026363461167855075, | |
| "loss": 3.1526, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026338324381844537, | |
| "loss": 3.1457, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00026313187595833993, | |
| "loss": 3.1655, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002628805080982345, | |
| "loss": 3.1658, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.416945842272569, | |
| "eval_loss": 3.170196294784546, | |
| "eval_runtime": 38.2091, | |
| "eval_samples_per_second": 293.542, | |
| "eval_steps_per_second": 2.46, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002626291402381291, | |
| "loss": 3.1537, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026237777237802373, | |
| "loss": 3.1596, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026212891819651934, | |
| "loss": 3.1558, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026187755033641396, | |
| "loss": 3.1568, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002616261824763085, | |
| "loss": 3.1488, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026137481461620314, | |
| "loss": 3.1452, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002611234467560977, | |
| "loss": 3.1503, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002608720788959923, | |
| "loss": 3.1456, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00026062071103588694, | |
| "loss": 3.1469, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002603693431757815, | |
| "loss": 3.1479, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 0.41732213261145495, | |
| "eval_loss": 3.1664865016937256, | |
| "eval_runtime": 36.9736, | |
| "eval_samples_per_second": 303.351, | |
| "eval_steps_per_second": 2.542, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.0002601179753156761, | |
| "loss": 3.152, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025986660745557074, | |
| "loss": 3.1515, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002596152395954653, | |
| "loss": 3.1403, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002593638717353599, | |
| "loss": 3.1482, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002591125038752545, | |
| "loss": 3.1384, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002588611360151491, | |
| "loss": 3.1423, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002586097681550437, | |
| "loss": 3.1388, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025835840029493824, | |
| "loss": 3.1502, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025810703243483286, | |
| "loss": 3.1423, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002578556645747274, | |
| "loss": 3.1401, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.4181765082503061, | |
| "eval_loss": 3.161729097366333, | |
| "eval_runtime": 36.3895, | |
| "eval_samples_per_second": 308.221, | |
| "eval_steps_per_second": 2.583, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00025760429671462204, | |
| "loss": 3.1444, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002573529288545166, | |
| "loss": 3.1344, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.0002571015609944112, | |
| "loss": 3.1362, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025685019313430584, | |
| "loss": 3.1449, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002565988252742004, | |
| "loss": 3.1403, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.000256347457414095, | |
| "loss": 3.1485, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025609608955398964, | |
| "loss": 3.1465, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002558447216938842, | |
| "loss": 3.1388, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002555933538337788, | |
| "loss": 3.1412, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002553419859736734, | |
| "loss": 3.1386, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.4183225313668887, | |
| "eval_loss": 3.1586148738861084, | |
| "eval_runtime": 36.9298, | |
| "eval_samples_per_second": 303.711, | |
| "eval_steps_per_second": 2.545, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.000255090618113568, | |
| "loss": 3.1421, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025483925025346257, | |
| "loss": 3.1355, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002545878823933572, | |
| "loss": 3.1399, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.00025433651453325175, | |
| "loss": 3.1349, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0002540851466731463, | |
| "loss": 3.1413, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025383377881304093, | |
| "loss": 3.1278, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002535824109529355, | |
| "loss": 3.1405, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002533310430928301, | |
| "loss": 3.1277, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025307967523272474, | |
| "loss": 3.1341, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002528283073726193, | |
| "loss": 3.1396, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.41871075628741844, | |
| "eval_loss": 3.1532347202301025, | |
| "eval_runtime": 36.5045, | |
| "eval_samples_per_second": 307.25, | |
| "eval_steps_per_second": 2.575, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025257945319111496, | |
| "loss": 3.1329, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002523280853310095, | |
| "loss": 3.1358, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025207671747090414, | |
| "loss": 3.1377, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002518253496107987, | |
| "loss": 3.1289, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002515739817506933, | |
| "loss": 3.1348, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00025132261389058794, | |
| "loss": 3.1324, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0002510712460304825, | |
| "loss": 3.136, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002508198781703771, | |
| "loss": 3.1337, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00025056851031027175, | |
| "loss": 3.132, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002503171424501663, | |
| "loss": 3.1345, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.41896770080986667, | |
| "eval_loss": 3.150233268737793, | |
| "eval_runtime": 36.0939, | |
| "eval_samples_per_second": 310.745, | |
| "eval_steps_per_second": 2.604, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002500682882686619, | |
| "loss": 3.1301, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024981692040855653, | |
| "loss": 3.1261, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024956555254845115, | |
| "loss": 3.1279, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002493141846883457, | |
| "loss": 3.1235, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00024906281682824034, | |
| "loss": 3.1302, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002488114489681349, | |
| "loss": 3.1287, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002485600811080295, | |
| "loss": 3.1314, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002483087132479241, | |
| "loss": 3.1226, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.0002480573453878187, | |
| "loss": 3.1289, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024780597752771327, | |
| "loss": 3.1319, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.41907721814730364, | |
| "eval_loss": 3.1475839614868164, | |
| "eval_runtime": 36.3645, | |
| "eval_samples_per_second": 308.432, | |
| "eval_steps_per_second": 2.585, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002475546096676079, | |
| "loss": 3.1304, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024730324180750245, | |
| "loss": 3.1309, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024705187394739707, | |
| "loss": 3.1254, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024680050608729163, | |
| "loss": 3.1293, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024654913822718625, | |
| "loss": 3.1278, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002462977703670808, | |
| "loss": 3.1216, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024604640250697543, | |
| "loss": 3.1281, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002457975483254711, | |
| "loss": 3.1182, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024554618046536566, | |
| "loss": 3.1231, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0002452948126052603, | |
| "loss": 3.1238, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 0.42022504408774863, | |
| "eval_loss": 3.1434154510498047, | |
| "eval_runtime": 36.9095, | |
| "eval_samples_per_second": 303.878, | |
| "eval_steps_per_second": 2.547, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.00024504344474515484, | |
| "loss": 3.1249, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024479207688504946, | |
| "loss": 3.1316, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.000244540709024944, | |
| "loss": 3.1152, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024428934116483864, | |
| "loss": 3.1204, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024403797330473323, | |
| "loss": 3.1237, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024378660544462782, | |
| "loss": 3.1256, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024353523758452241, | |
| "loss": 3.1272, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.000243283869724417, | |
| "loss": 3.12, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024303250186431162, | |
| "loss": 3.1182, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002427811340042062, | |
| "loss": 3.1224, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.42017309355588756, | |
| "eval_loss": 3.1407454013824463, | |
| "eval_runtime": 36.6142, | |
| "eval_samples_per_second": 306.329, | |
| "eval_steps_per_second": 2.567, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0002425297661441008, | |
| "loss": 3.1174, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024227839828399537, | |
| "loss": 3.1199, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00024202703042389, | |
| "loss": 3.1147, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024177566256378458, | |
| "loss": 3.1201, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024152429470367917, | |
| "loss": 3.1231, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024127544052217483, | |
| "loss": 3.1172, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002410240726620694, | |
| "loss": 3.1176, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024077521848056506, | |
| "loss": 3.1119, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024052385062045965, | |
| "loss": 3.1212, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024027248276035424, | |
| "loss": 3.1183, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.4208589809832972, | |
| "eval_loss": 3.137460947036743, | |
| "eval_runtime": 37.115, | |
| "eval_samples_per_second": 302.196, | |
| "eval_steps_per_second": 2.533, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00024002111490024883, | |
| "loss": 3.1287, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00023976974704014342, | |
| "loss": 3.1157, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00023951837918003804, | |
| "loss": 3.1199, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0002392670113199326, | |
| "loss": 3.1162, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00023901564345982722, | |
| "loss": 3.1179, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.0002387642755997218, | |
| "loss": 3.1214, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023851290773961638, | |
| "loss": 3.1138, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.000238261539879511, | |
| "loss": 3.1117, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023801017201940556, | |
| "loss": 3.1117, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023775880415930018, | |
| "loss": 3.1131, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.4210050040998798, | |
| "eval_loss": 3.1347129344940186, | |
| "eval_runtime": 36.8178, | |
| "eval_samples_per_second": 304.635, | |
| "eval_steps_per_second": 2.553, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023750743629919474, | |
| "loss": 3.118, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023725606843908936, | |
| "loss": 3.1158, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023700470057898395, | |
| "loss": 3.1178, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023675333271887855, | |
| "loss": 3.1019, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023650196485877314, | |
| "loss": 3.1105, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023625059699866773, | |
| "loss": 3.1158, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 0.00023599922913856232, | |
| "loss": 3.1166, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023574786127845694, | |
| "loss": 3.1172, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002354964934183515, | |
| "loss": 3.1233, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023524763923684716, | |
| "loss": 3.1106, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.42156382333449405, | |
| "eval_loss": 3.131035566329956, | |
| "eval_runtime": 36.1307, | |
| "eval_samples_per_second": 310.428, | |
| "eval_steps_per_second": 2.602, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023499627137674175, | |
| "loss": 3.1186, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023474490351663635, | |
| "loss": 3.1069, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023449353565653094, | |
| "loss": 3.114, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00023424216779642553, | |
| "loss": 3.114, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002339907999363201, | |
| "loss": 3.1072, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002337394320762147, | |
| "loss": 3.1141, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002334880642161093, | |
| "loss": 3.1125, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.0002332366963560039, | |
| "loss": 3.1202, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023298532849589849, | |
| "loss": 3.1177, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.0002327339606357931, | |
| "loss": 3.114, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.42156241926606536, | |
| "eval_loss": 3.129709482192993, | |
| "eval_runtime": 36.5135, | |
| "eval_samples_per_second": 307.174, | |
| "eval_steps_per_second": 2.574, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023248259277568767, | |
| "loss": 3.1107, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023223122491558229, | |
| "loss": 3.1111, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023197985705547685, | |
| "loss": 3.106, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023172848919537147, | |
| "loss": 3.1081, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023147712133526606, | |
| "loss": 3.1077, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023122575347516065, | |
| "loss": 3.116, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023097438561505524, | |
| "loss": 3.1168, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023072301775494983, | |
| "loss": 3.1137, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023047164989484442, | |
| "loss": 3.1065, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00023022028203473904, | |
| "loss": 3.1083, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 0.42211211205589316, | |
| "eval_loss": 3.1262805461883545, | |
| "eval_runtime": 36.106, | |
| "eval_samples_per_second": 310.641, | |
| "eval_steps_per_second": 2.603, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002299689141746336, | |
| "loss": 3.1193, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022972257367173034, | |
| "loss": 3.0997, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002294712058116249, | |
| "loss": 3.1013, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022921983795151952, | |
| "loss": 3.1049, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022896847009141409, | |
| "loss": 3.1152, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022871710223130868, | |
| "loss": 3.1077, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022846573437120327, | |
| "loss": 3.1146, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022821436651109786, | |
| "loss": 3.1054, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022796299865099248, | |
| "loss": 3.1087, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.00022771163079088704, | |
| "loss": 3.1045, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.4221415974928954, | |
| "eval_loss": 3.124873161315918, | |
| "eval_runtime": 37.1734, | |
| "eval_samples_per_second": 301.721, | |
| "eval_steps_per_second": 2.529, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002274627766093827, | |
| "loss": 3.1024, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002272114087492773, | |
| "loss": 3.0938, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022696004088917189, | |
| "loss": 3.1049, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022670867302906648, | |
| "loss": 3.109, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022645730516896107, | |
| "loss": 3.1033, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.0002262059373088557, | |
| "loss": 3.1066, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022595456944875025, | |
| "loss": 3.1087, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022570320158864487, | |
| "loss": 3.101, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022545183372853943, | |
| "loss": 3.1137, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022520046586843405, | |
| "loss": 3.1084, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_accuracy": 0.4223374650386961, | |
| "eval_loss": 3.1216838359832764, | |
| "eval_runtime": 39.4599, | |
| "eval_samples_per_second": 284.238, | |
| "eval_steps_per_second": 2.382, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022494909800832864, | |
| "loss": 3.1006, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022469773014822323, | |
| "loss": 3.1045, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022444636228811783, | |
| "loss": 3.1001, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00022419499442801244, | |
| "loss": 3.0988, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.000223943626567907, | |
| "loss": 3.0981, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022369477238640264, | |
| "loss": 3.1027, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022344340452629723, | |
| "loss": 3.1046, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022319203666619185, | |
| "loss": 3.1025, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022294066880608642, | |
| "loss": 3.1025, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022268930094598103, | |
| "loss": 3.097, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.42269550248800924, | |
| "eval_loss": 3.1202731132507324, | |
| "eval_runtime": 36.6594, | |
| "eval_samples_per_second": 305.952, | |
| "eval_steps_per_second": 2.564, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002224379330858756, | |
| "loss": 3.104, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022218656522577022, | |
| "loss": 3.0977, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002219351973656648, | |
| "loss": 3.1121, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.0002216838295055594, | |
| "loss": 3.1011, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.000221432461645454, | |
| "loss": 3.0963, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00022118109378534858, | |
| "loss": 3.1082, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00022092972592524317, | |
| "loss": 3.0994, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002206783580651378, | |
| "loss": 3.0957, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00022042699020503236, | |
| "loss": 3.0947, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00022017562234492697, | |
| "loss": 3.0926, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_accuracy": 0.42268707807743716, | |
| "eval_loss": 3.119593381881714, | |
| "eval_runtime": 37.8215, | |
| "eval_samples_per_second": 296.551, | |
| "eval_steps_per_second": 2.485, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021992425448482154, | |
| "loss": 3.0955, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021967288662471616, | |
| "loss": 3.0973, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021942151876461075, | |
| "loss": 3.1098, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002191701509045053, | |
| "loss": 3.1007, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021891878304439993, | |
| "loss": 3.0992, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.0002186674151842945, | |
| "loss": 3.1029, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021841604732418911, | |
| "loss": 3.0947, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 0.00021816719314268475, | |
| "loss": 3.0941, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021791582528257934, | |
| "loss": 3.1004, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021766445742247396, | |
| "loss": 3.1003, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.4228331011940198, | |
| "eval_loss": 3.1163218021392822, | |
| "eval_runtime": 37.158, | |
| "eval_samples_per_second": 301.846, | |
| "eval_steps_per_second": 2.53, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021741308956236852, | |
| "loss": 3.0986, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021716172170226314, | |
| "loss": 3.0999, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002169103538421577, | |
| "loss": 3.0994, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021665898598205232, | |
| "loss": 3.0976, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021640761812194691, | |
| "loss": 3.0949, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002161562502618415, | |
| "loss": 3.0923, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002159048824017361, | |
| "loss": 3.0909, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002156535145416307, | |
| "loss": 3.0944, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00021540214668152528, | |
| "loss": 3.0997, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0002151507788214199, | |
| "loss": 3.097, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.4235625147427185, | |
| "eval_loss": 3.1130168437957764, | |
| "eval_runtime": 36.3501, | |
| "eval_samples_per_second": 308.555, | |
| "eval_steps_per_second": 2.586, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021489941096131446, | |
| "loss": 3.0878, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021464804310120905, | |
| "loss": 3.094, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021439667524110364, | |
| "loss": 3.0976, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021414530738099824, | |
| "loss": 3.0959, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021389393952089285, | |
| "loss": 3.098, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021364257166078742, | |
| "loss": 3.0891, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021339120380068204, | |
| "loss": 3.0881, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002131398359405766, | |
| "loss": 3.0934, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021288846808047122, | |
| "loss": 3.0997, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021263961389896685, | |
| "loss": 3.0934, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.4233083783571276, | |
| "eval_loss": 3.112696886062622, | |
| "eval_runtime": 36.2826, | |
| "eval_samples_per_second": 309.129, | |
| "eval_steps_per_second": 2.591, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021238824603886144, | |
| "loss": 3.0886, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00021213687817875606, | |
| "loss": 3.0891, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021188551031865063, | |
| "loss": 3.0952, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021163414245854525, | |
| "loss": 3.0869, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.0002113827745984398, | |
| "loss": 3.0905, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021113140673833443, | |
| "loss": 3.0939, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021088255255683006, | |
| "loss": 3.0958, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021063118469672465, | |
| "loss": 3.0882, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021037981683661927, | |
| "loss": 3.0852, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00021012844897651384, | |
| "loss": 3.0957, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_accuracy": 0.4237239826120166, | |
| "eval_loss": 3.110541820526123, | |
| "eval_runtime": 37.0216, | |
| "eval_samples_per_second": 302.958, | |
| "eval_steps_per_second": 2.539, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00020987708111640845, | |
| "loss": 3.0968, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00020962571325630302, | |
| "loss": 3.0909, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00020937434539619764, | |
| "loss": 3.0826, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.00020912297753609223, | |
| "loss": 3.086, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002088716096759868, | |
| "loss": 3.091, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020862275549448245, | |
| "loss": 3.0865, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020837138763437704, | |
| "loss": 3.092, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020812001977427164, | |
| "loss": 3.0916, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020786865191416623, | |
| "loss": 3.0924, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020761728405406082, | |
| "loss": 3.0915, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.42398513933975085, | |
| "eval_loss": 3.10992169380188, | |
| "eval_runtime": 36.5153, | |
| "eval_samples_per_second": 307.159, | |
| "eval_steps_per_second": 2.574, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020736591619395544, | |
| "loss": 3.0841, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020711454833385, | |
| "loss": 3.088, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020686318047374462, | |
| "loss": 3.0941, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00020661181261363918, | |
| "loss": 3.0898, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002063604447535338, | |
| "loss": 3.0885, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.0002061090768934284, | |
| "loss": 3.0918, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020585770903332298, | |
| "loss": 3.0962, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020560634117321758, | |
| "loss": 3.096, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002053549733131122, | |
| "loss": 3.0846, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020510611913160783, | |
| "loss": 3.0908, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.4245425545059364, | |
| "eval_loss": 3.1069419384002686, | |
| "eval_runtime": 37.2669, | |
| "eval_samples_per_second": 300.964, | |
| "eval_steps_per_second": 2.522, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002048547512715024, | |
| "loss": 3.0851, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.000204603383411397, | |
| "loss": 3.0859, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002043520155512916, | |
| "loss": 3.0877, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0002041006476911862, | |
| "loss": 3.08, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020384927983108079, | |
| "loss": 3.0872, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020359791197097535, | |
| "loss": 3.0934, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020334654411086997, | |
| "loss": 3.0898, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.00020309517625076456, | |
| "loss": 3.091, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020284380839065915, | |
| "loss": 3.0903, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020259244053055374, | |
| "loss": 3.0764, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.42455589315600883, | |
| "eval_loss": 3.104147434234619, | |
| "eval_runtime": 36.3216, | |
| "eval_samples_per_second": 308.797, | |
| "eval_steps_per_second": 2.588, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020234107267044833, | |
| "loss": 3.0781, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020208970481034292, | |
| "loss": 3.0805, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020183833695023754, | |
| "loss": 3.0861, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002015869690901321, | |
| "loss": 3.0906, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020133560123002672, | |
| "loss": 3.0837, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002010842333699213, | |
| "loss": 3.0827, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002008328655098159, | |
| "loss": 3.082, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002005814976497105, | |
| "loss": 3.0838, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002003301297896051, | |
| "loss": 3.0834, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.00020007876192949968, | |
| "loss": 3.0855, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.42506837813247667, | |
| "eval_loss": 3.1023147106170654, | |
| "eval_runtime": 36.3302, | |
| "eval_samples_per_second": 308.724, | |
| "eval_steps_per_second": 2.587, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019982990774799532, | |
| "loss": 3.0823, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019957853988788993, | |
| "loss": 3.0877, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001993271720277845, | |
| "loss": 3.0891, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001990758041676791, | |
| "loss": 3.0847, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001988244363075737, | |
| "loss": 3.0769, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019857306844746827, | |
| "loss": 3.0842, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.0001983217005873629, | |
| "loss": 3.0771, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019807033272725745, | |
| "loss": 3.0878, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019781896486715207, | |
| "loss": 3.0876, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019756759700704666, | |
| "loss": 3.0782, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 0.42481002954159974, | |
| "eval_loss": 3.100797414779663, | |
| "eval_runtime": 37.0564, | |
| "eval_samples_per_second": 302.674, | |
| "eval_steps_per_second": 2.537, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019731622914694126, | |
| "loss": 3.0788, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00019706486128683585, | |
| "loss": 3.0811, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019681349342673044, | |
| "loss": 3.0799, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019656212556662503, | |
| "loss": 3.0737, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019631075770651965, | |
| "loss": 3.0815, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001960593898464142, | |
| "loss": 3.0885, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019580802198630883, | |
| "loss": 3.0785, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001955566541262034, | |
| "loss": 3.0738, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.000195305286266098, | |
| "loss": 3.0826, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001950539184059926, | |
| "loss": 3.0821, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_accuracy": 0.4254973210374381, | |
| "eval_loss": 3.0979230403900146, | |
| "eval_runtime": 36.8694, | |
| "eval_samples_per_second": 304.209, | |
| "eval_steps_per_second": 2.55, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019480255054588717, | |
| "loss": 3.0689, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019455369636438286, | |
| "loss": 3.0767, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.0001943048421828785, | |
| "loss": 3.0768, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00019405598800137415, | |
| "loss": 3.0746, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019380462014126872, | |
| "loss": 3.0812, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019355325228116333, | |
| "loss": 3.0721, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001933018844210579, | |
| "loss": 3.0701, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001930505165609525, | |
| "loss": 3.0769, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019279914870084708, | |
| "loss": 3.0827, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019254778084074167, | |
| "loss": 3.075, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.425449582710863, | |
| "eval_loss": 3.0971269607543945, | |
| "eval_runtime": 36.1836, | |
| "eval_samples_per_second": 309.975, | |
| "eval_steps_per_second": 2.598, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.0001922964129806363, | |
| "loss": 3.0742, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019204504512053086, | |
| "loss": 3.0804, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019179367726042547, | |
| "loss": 3.0788, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019154230940032004, | |
| "loss": 3.078, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019129094154021466, | |
| "loss": 3.0729, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00019103957368010925, | |
| "loss": 3.0704, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019078820582000384, | |
| "loss": 3.0793, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019053683795989843, | |
| "loss": 3.0789, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019028547009979305, | |
| "loss": 3.0835, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019003661591828868, | |
| "loss": 3.0794, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.4256580868725218, | |
| "eval_loss": 3.0950751304626465, | |
| "eval_runtime": 36.1829, | |
| "eval_samples_per_second": 309.98, | |
| "eval_steps_per_second": 2.598, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018978524805818325, | |
| "loss": 3.0746, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018953388019807787, | |
| "loss": 3.0778, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018928251233797246, | |
| "loss": 3.0743, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018903114447786705, | |
| "loss": 3.0822, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018877977661776164, | |
| "loss": 3.0782, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001885284087576562, | |
| "loss": 3.0705, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00018827704089755082, | |
| "loss": 3.0737, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0001880256730374454, | |
| "loss": 3.0736, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018777681885594105, | |
| "loss": 3.0712, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018752545099583567, | |
| "loss": 3.0836, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.42573460860188483, | |
| "eval_loss": 3.0936806201934814, | |
| "eval_runtime": 36.1343, | |
| "eval_samples_per_second": 310.398, | |
| "eval_steps_per_second": 2.601, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018727408313573023, | |
| "loss": 3.0763, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018702271527562485, | |
| "loss": 3.0831, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001867713474155194, | |
| "loss": 3.0768, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018651997955541403, | |
| "loss": 3.0686, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018626861169530862, | |
| "loss": 3.0766, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001860172438352032, | |
| "loss": 3.0721, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001857658759750978, | |
| "loss": 3.0812, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00018551450811499242, | |
| "loss": 3.0853, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.000185263140254887, | |
| "loss": 3.0753, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.0001850117723947816, | |
| "loss": 3.0744, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.42582517101553463, | |
| "eval_loss": 3.092123508453369, | |
| "eval_runtime": 36.2715, | |
| "eval_samples_per_second": 309.224, | |
| "eval_steps_per_second": 2.592, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018476040453467617, | |
| "loss": 3.077, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001845090366745708, | |
| "loss": 3.0822, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018425766881446535, | |
| "loss": 3.0791, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018400630095435997, | |
| "loss": 3.0776, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018375493309425456, | |
| "loss": 3.0781, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018350356523414913, | |
| "loss": 3.0756, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018325219737404374, | |
| "loss": 3.0739, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001830008295139383, | |
| "loss": 3.0697, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018274946165383293, | |
| "loss": 3.0747, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00018249809379372752, | |
| "loss": 3.0692, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.42626464443371115, | |
| "eval_loss": 3.090735912322998, | |
| "eval_runtime": 36.0323, | |
| "eval_samples_per_second": 311.276, | |
| "eval_steps_per_second": 2.609, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001822467259336221, | |
| "loss": 3.0701, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.0001819953580735167, | |
| "loss": 3.0706, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018174399021341132, | |
| "loss": 3.0734, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018149262235330588, | |
| "loss": 3.0719, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001812412544932005, | |
| "loss": 3.07, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018098988663309507, | |
| "loss": 3.0743, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018074103245159073, | |
| "loss": 3.0768, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00018048966459148532, | |
| "loss": 3.0598, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001802382967313799, | |
| "loss": 3.0653, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00017998692887127453, | |
| "loss": 3.0717, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.42618812270434814, | |
| "eval_loss": 3.0900797843933105, | |
| "eval_runtime": 36.3, | |
| "eval_samples_per_second": 308.981, | |
| "eval_steps_per_second": 2.59, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001797355610111691, | |
| "loss": 3.0752, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.0001794841931510637, | |
| "loss": 3.0656, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00017923282529095827, | |
| "loss": 3.0758, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 0.00017898145743085287, | |
| "loss": 3.0827, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017873008957074746, | |
| "loss": 3.068, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017847872171064205, | |
| "loss": 3.0645, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017822735385053667, | |
| "loss": 3.0752, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017797598599043123, | |
| "loss": 3.0726, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017772461813032585, | |
| "loss": 3.0736, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017747325027022041, | |
| "loss": 3.0697, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.42656862524852013, | |
| "eval_loss": 3.0877325534820557, | |
| "eval_runtime": 37.2501, | |
| "eval_samples_per_second": 301.1, | |
| "eval_steps_per_second": 2.523, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017722188241011503, | |
| "loss": 3.0779, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017697051455000962, | |
| "loss": 3.0736, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017671914668990421, | |
| "loss": 3.0657, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001764677788297988, | |
| "loss": 3.065, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.00017621641096969342, | |
| "loss": 3.0683, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.000175965043109588, | |
| "loss": 3.0656, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001757136752494826, | |
| "loss": 3.0714, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017546482106797824, | |
| "loss": 3.0804, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017521345320787283, | |
| "loss": 3.0636, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017496208534776742, | |
| "loss": 3.0689, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 0.426702713783459, | |
| "eval_loss": 3.0857808589935303, | |
| "eval_runtime": 36.1585, | |
| "eval_samples_per_second": 310.189, | |
| "eval_steps_per_second": 2.6, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017471071748766202, | |
| "loss": 3.0627, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017445934962755663, | |
| "loss": 3.0655, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001742079817674512, | |
| "loss": 3.0711, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001739566139073458, | |
| "loss": 3.0684, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017370775972584142, | |
| "loss": 3.066, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017345639186573604, | |
| "loss": 3.0587, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.0001732050240056306, | |
| "loss": 3.0705, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00017295365614552522, | |
| "loss": 3.0652, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001727022882854198, | |
| "loss": 3.0718, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001724509204253144, | |
| "loss": 3.067, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.42674553787053365, | |
| "eval_loss": 3.08451247215271, | |
| "eval_runtime": 37.357, | |
| "eval_samples_per_second": 300.238, | |
| "eval_steps_per_second": 2.516, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.000172199552565209, | |
| "loss": 3.0652, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001719481847051036, | |
| "loss": 3.0697, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017169681684499818, | |
| "loss": 3.0699, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.0001714454489848928, | |
| "loss": 3.0656, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017119408112478736, | |
| "loss": 3.0579, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017094271326468198, | |
| "loss": 3.0586, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017069134540457655, | |
| "loss": 3.0725, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017043997754447116, | |
| "loss": 3.0713, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00017018860968436573, | |
| "loss": 3.0674, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 0.00016993724182426035, | |
| "loss": 3.0635, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.4271583339885653, | |
| "eval_loss": 3.082775115966797, | |
| "eval_runtime": 36.4468, | |
| "eval_samples_per_second": 307.736, | |
| "eval_steps_per_second": 2.579, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016968587396415494, | |
| "loss": 3.0589, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001694345061040495, | |
| "loss": 3.0656, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016918313824394412, | |
| "loss": 3.0657, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016893177038383868, | |
| "loss": 3.0622, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001686804025237333, | |
| "loss": 3.0627, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001684290346636279, | |
| "loss": 3.063, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016817766680352249, | |
| "loss": 3.0637, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016792629894341708, | |
| "loss": 3.0639, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001676774447619127, | |
| "loss": 3.0639, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00016742607690180733, | |
| "loss": 3.0678, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.4273408628842935, | |
| "eval_loss": 3.0823299884796143, | |
| "eval_runtime": 36.1917, | |
| "eval_samples_per_second": 309.906, | |
| "eval_steps_per_second": 2.597, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001671747090417019, | |
| "loss": 3.0582, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001669233411815965, | |
| "loss": 3.0708, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001666719733214911, | |
| "loss": 3.0692, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001664206054613857, | |
| "loss": 3.0671, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016616923760128029, | |
| "loss": 3.0662, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001659178697411749, | |
| "loss": 3.0653, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016566650188106947, | |
| "loss": 3.0669, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0001654151340209641, | |
| "loss": 3.0552, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016516376616085865, | |
| "loss": 3.0569, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016491239830075327, | |
| "loss": 3.067, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.4276448436991025, | |
| "eval_loss": 3.0794825553894043, | |
| "eval_runtime": 36.2802, | |
| "eval_samples_per_second": 309.15, | |
| "eval_steps_per_second": 2.591, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016466103044064783, | |
| "loss": 3.0623, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016440966258054242, | |
| "loss": 3.0612, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00016415829472043704, | |
| "loss": 3.0588, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001639069268603316, | |
| "loss": 3.064, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016365555900022623, | |
| "loss": 3.0564, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.0001634041911401208, | |
| "loss": 3.0605, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016315533695861645, | |
| "loss": 3.0591, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016290396909851104, | |
| "loss": 3.0639, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016265260123840563, | |
| "loss": 3.0612, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016240123337830025, | |
| "loss": 3.0597, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 0.4277283857706089, | |
| "eval_loss": 3.078927516937256, | |
| "eval_runtime": 36.9604, | |
| "eval_samples_per_second": 303.46, | |
| "eval_steps_per_second": 2.543, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016214986551819482, | |
| "loss": 3.0607, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016190101133669048, | |
| "loss": 3.0505, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016164964347658507, | |
| "loss": 3.0628, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016139827561647966, | |
| "loss": 3.0592, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 0.00016114690775637428, | |
| "loss": 3.0488, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00016089553989626884, | |
| "loss": 3.0533, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00016064417203616346, | |
| "loss": 3.0666, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00016039280417605803, | |
| "loss": 3.0596, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00016014143631595264, | |
| "loss": 3.0604, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001598900684558472, | |
| "loss": 3.0648, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.4278596661686904, | |
| "eval_loss": 3.0768725872039795, | |
| "eval_runtime": 37.0258, | |
| "eval_samples_per_second": 302.924, | |
| "eval_steps_per_second": 2.539, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015963870059574183, | |
| "loss": 3.0614, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015938733273563642, | |
| "loss": 3.0541, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015913596487553098, | |
| "loss": 3.0595, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.0001588845970154256, | |
| "loss": 3.0624, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015863322915532016, | |
| "loss": 3.055, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015838186129521478, | |
| "loss": 3.0585, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015813049343510937, | |
| "loss": 3.0555, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015787912557500397, | |
| "loss": 3.0501, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015762775771489856, | |
| "loss": 3.0667, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015737638985479315, | |
| "loss": 3.0681, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.42812924730699675, | |
| "eval_loss": 3.075896739959717, | |
| "eval_runtime": 36.4669, | |
| "eval_samples_per_second": 307.567, | |
| "eval_steps_per_second": 2.578, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015712502199468774, | |
| "loss": 3.0554, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015687365413458236, | |
| "loss": 3.063, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015662228627447692, | |
| "loss": 3.0611, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015637091841437154, | |
| "loss": 3.0647, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.0001561195505542661, | |
| "loss": 3.0552, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015586818269416072, | |
| "loss": 3.0629, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015561932851265638, | |
| "loss": 3.0619, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015536796065255095, | |
| "loss": 3.0531, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015511659279244557, | |
| "loss": 3.063, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015486522493234013, | |
| "loss": 3.0513, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.42832300875015444, | |
| "eval_loss": 3.0737130641937256, | |
| "eval_runtime": 36.8692, | |
| "eval_samples_per_second": 304.211, | |
| "eval_steps_per_second": 2.55, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015461385707223472, | |
| "loss": 3.0546, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001543624892121293, | |
| "loss": 3.0545, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001541111213520239, | |
| "loss": 3.0543, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015385975349191852, | |
| "loss": 3.0525, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001536083856318131, | |
| "loss": 3.0533, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001533570177717077, | |
| "loss": 3.0616, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015310564991160227, | |
| "loss": 3.0542, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0001528542820514969, | |
| "loss": 3.0543, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015260291419139148, | |
| "loss": 3.0603, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015235154633128607, | |
| "loss": 3.0566, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.42880530625540564, | |
| "eval_loss": 3.0726654529571533, | |
| "eval_runtime": 36.7531, | |
| "eval_samples_per_second": 305.171, | |
| "eval_steps_per_second": 2.558, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.00015210017847118066, | |
| "loss": 3.0475, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015184881061107528, | |
| "loss": 3.0545, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015159744275096984, | |
| "loss": 3.0616, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015134607489086446, | |
| "loss": 3.0503, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015109470703075903, | |
| "loss": 3.0462, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015084333917065365, | |
| "loss": 3.0586, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015059448498914928, | |
| "loss": 3.0502, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.00015034563080764494, | |
| "loss": 3.0535, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001500942629475395, | |
| "loss": 3.0608, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001498428950874341, | |
| "loss": 3.0552, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.42880671032383433, | |
| "eval_loss": 3.071218967437744, | |
| "eval_runtime": 36.2431, | |
| "eval_samples_per_second": 309.466, | |
| "eval_steps_per_second": 2.594, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001495915272273287, | |
| "loss": 3.0546, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001493401593672233, | |
| "loss": 3.053, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001490887915071179, | |
| "loss": 3.0506, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001488374236470125, | |
| "loss": 3.0562, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014858605578690708, | |
| "loss": 3.0572, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014833468792680167, | |
| "loss": 3.0568, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014808332006669626, | |
| "loss": 3.0571, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014783195220659085, | |
| "loss": 3.0483, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014758058434648544, | |
| "loss": 3.0486, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014732921648638004, | |
| "loss": 3.0457, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.42915000505464634, | |
| "eval_loss": 3.0692341327667236, | |
| "eval_runtime": 36.4055, | |
| "eval_samples_per_second": 308.086, | |
| "eval_steps_per_second": 2.582, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014707784862627463, | |
| "loss": 3.0448, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014682648076616922, | |
| "loss": 3.0498, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001465751129060638, | |
| "loss": 3.0505, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.0001463237450459584, | |
| "loss": 3.0531, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.000146072377185853, | |
| "loss": 3.0526, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014582100932574758, | |
| "loss": 3.0585, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001455696414656422, | |
| "loss": 3.0519, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001453182736055368, | |
| "loss": 3.0545, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014506690574543138, | |
| "loss": 3.0521, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014481553788532598, | |
| "loss": 3.0425, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.4291008626596426, | |
| "eval_loss": 3.0679004192352295, | |
| "eval_runtime": 36.1636, | |
| "eval_samples_per_second": 310.146, | |
| "eval_steps_per_second": 2.599, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001445666837038216, | |
| "loss": 3.0616, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001443153158437162, | |
| "loss": 3.057, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001440639479836108, | |
| "loss": 3.052, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001438125801235054, | |
| "loss": 3.0501, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001435612122634, | |
| "loss": 3.0457, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001433098444032946, | |
| "loss": 3.0506, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.00014305847654318918, | |
| "loss": 3.0478, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014280710868308378, | |
| "loss": 3.0545, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014255574082297834, | |
| "loss": 3.0554, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014230437296287296, | |
| "loss": 3.0573, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.42917527828636254, | |
| "eval_loss": 3.0663866996765137, | |
| "eval_runtime": 36.3952, | |
| "eval_samples_per_second": 308.172, | |
| "eval_steps_per_second": 2.583, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014205300510276755, | |
| "loss": 3.0485, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014180163724266214, | |
| "loss": 3.0476, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014155026938255673, | |
| "loss": 3.0442, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014129890152245132, | |
| "loss": 3.0486, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014104753366234592, | |
| "loss": 3.0384, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001407961658022405, | |
| "loss": 3.0539, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001405447979421351, | |
| "loss": 3.0429, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0001402934300820297, | |
| "loss": 3.0444, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00014004457590052535, | |
| "loss": 3.0489, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013979320804041994, | |
| "loss": 3.0555, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.42978183584755186, | |
| "eval_loss": 3.0650320053100586, | |
| "eval_runtime": 37.0145, | |
| "eval_samples_per_second": 303.016, | |
| "eval_steps_per_second": 2.54, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013954184018031453, | |
| "loss": 3.0507, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013929047232020912, | |
| "loss": 3.0453, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013903910446010372, | |
| "loss": 3.0495, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001387877365999983, | |
| "loss": 3.0446, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001385363687398929, | |
| "loss": 3.0488, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013828500087978752, | |
| "loss": 3.0498, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001380336330196821, | |
| "loss": 3.0441, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013778226515957667, | |
| "loss": 3.0435, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013753089729947126, | |
| "loss": 3.0517, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013727952943936585, | |
| "loss": 3.0421, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 0.4294195861929527, | |
| "eval_loss": 3.0636541843414307, | |
| "eval_runtime": 36.752, | |
| "eval_samples_per_second": 305.181, | |
| "eval_steps_per_second": 2.558, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.00013702816157926045, | |
| "loss": 3.0412, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013677679371915506, | |
| "loss": 3.0548, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013652542585904966, | |
| "loss": 3.0409, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013627405799894425, | |
| "loss": 3.0377, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013602269013883884, | |
| "loss": 3.0429, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013577383595733447, | |
| "loss": 3.0467, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013552498177583013, | |
| "loss": 3.0496, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013527361391572472, | |
| "loss": 3.0424, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013502224605561932, | |
| "loss": 3.043, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001347708781955139, | |
| "loss": 3.0496, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.42957333168589307, | |
| "eval_loss": 3.062688112258911, | |
| "eval_runtime": 36.3303, | |
| "eval_samples_per_second": 308.723, | |
| "eval_steps_per_second": 2.587, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001345195103354085, | |
| "loss": 3.0434, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.0001342681424753031, | |
| "loss": 3.0392, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00013401677461519768, | |
| "loss": 3.041, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013376540675509227, | |
| "loss": 3.0526, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001335140388949869, | |
| "loss": 3.046, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013326267103488148, | |
| "loss": 3.0398, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013301130317477607, | |
| "loss": 3.0473, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013275993531467066, | |
| "loss": 3.0368, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013250856745456523, | |
| "loss": 3.0427, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013225719959445985, | |
| "loss": 3.0415, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.4300071888303548, | |
| "eval_loss": 3.060805320739746, | |
| "eval_runtime": 37.0174, | |
| "eval_samples_per_second": 302.993, | |
| "eval_steps_per_second": 2.539, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013200583173435444, | |
| "loss": 3.0429, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013175446387424903, | |
| "loss": 3.0494, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00013150309601414362, | |
| "loss": 3.0384, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001312517281540382, | |
| "loss": 3.0438, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.0001310003602939328, | |
| "loss": 3.0427, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001307489924338274, | |
| "loss": 3.0447, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00013050013825232306, | |
| "loss": 3.0438, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00013024877039221765, | |
| "loss": 3.0403, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012999740253211224, | |
| "loss": 3.0478, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012974603467200683, | |
| "loss": 3.0412, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.4298436148584137, | |
| "eval_loss": 3.0598626136779785, | |
| "eval_runtime": 36.2351, | |
| "eval_samples_per_second": 309.534, | |
| "eval_steps_per_second": 2.594, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012949466681190142, | |
| "loss": 3.0411, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.000129243298951796, | |
| "loss": 3.035, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001289919310916906, | |
| "loss": 3.0464, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001287405632315852, | |
| "loss": 3.0369, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00012848919537147979, | |
| "loss": 3.0428, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0001282378275113744, | |
| "loss": 3.0436, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.000127986459651269, | |
| "loss": 3.0454, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012773509179116356, | |
| "loss": 3.0361, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012748372393105815, | |
| "loss": 3.0437, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012723235607095274, | |
| "loss": 3.0373, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.4302465824974446, | |
| "eval_loss": 3.057598829269409, | |
| "eval_runtime": 36.2031, | |
| "eval_samples_per_second": 309.808, | |
| "eval_steps_per_second": 2.596, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001269835018894484, | |
| "loss": 3.0426, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.000126732134029343, | |
| "loss": 3.041, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.0001264807661692376, | |
| "loss": 3.036, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012622939830913218, | |
| "loss": 3.0396, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012597803044902677, | |
| "loss": 3.0418, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012572666258892136, | |
| "loss": 3.0335, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012547529472881595, | |
| "loss": 3.0334, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012522392686871054, | |
| "loss": 3.0381, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00012497255900860516, | |
| "loss": 3.0393, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012472119114849975, | |
| "loss": 3.0393, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.43052950228582343, | |
| "eval_loss": 3.05704665184021, | |
| "eval_runtime": 36.208, | |
| "eval_samples_per_second": 309.765, | |
| "eval_steps_per_second": 2.596, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012446982328839434, | |
| "loss": 3.0383, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012421845542828894, | |
| "loss": 3.0441, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012396708756818353, | |
| "loss": 3.0388, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012371823338667916, | |
| "loss": 3.0403, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012346686552657378, | |
| "loss": 3.0368, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012321549766646837, | |
| "loss": 3.0405, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012296412980636296, | |
| "loss": 3.0351, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001227152756248586, | |
| "loss": 3.0355, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001224639077647532, | |
| "loss": 3.038, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012221253990464778, | |
| "loss": 3.0312, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.43072256169476675, | |
| "eval_loss": 3.056051254272461, | |
| "eval_runtime": 35.9605, | |
| "eval_samples_per_second": 311.897, | |
| "eval_steps_per_second": 2.614, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.00012196117204454238, | |
| "loss": 3.0336, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012170980418443696, | |
| "loss": 3.0371, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012145843632433155, | |
| "loss": 3.0415, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012120706846422614, | |
| "loss": 3.033, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012095570060412075, | |
| "loss": 3.0401, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012070433274401534, | |
| "loss": 3.0407, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012045296488390993, | |
| "loss": 3.0389, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00012020159702380452, | |
| "loss": 3.0326, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011995022916369911, | |
| "loss": 3.0343, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011969886130359372, | |
| "loss": 3.0397, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 0.43072607186583844, | |
| "eval_loss": 3.0532803535461426, | |
| "eval_runtime": 36.5519, | |
| "eval_samples_per_second": 306.851, | |
| "eval_steps_per_second": 2.572, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011945000712208935, | |
| "loss": 3.041, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011919863926198396, | |
| "loss": 3.0375, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00011894727140187855, | |
| "loss": 3.03, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011869590354177314, | |
| "loss": 3.0314, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011844453568166773, | |
| "loss": 3.0399, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011819316782156232, | |
| "loss": 3.043, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011794179996145693, | |
| "loss": 3.0324, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011769043210135152, | |
| "loss": 3.037, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011743906424124611, | |
| "loss": 3.0391, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011718769638114069, | |
| "loss": 3.0303, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.43111219068372514, | |
| "eval_loss": 3.0526981353759766, | |
| "eval_runtime": 36.4926, | |
| "eval_samples_per_second": 307.35, | |
| "eval_steps_per_second": 2.576, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011693632852103528, | |
| "loss": 3.0329, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011668496066092987, | |
| "loss": 3.0346, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011643359280082448, | |
| "loss": 3.0405, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011618222494071907, | |
| "loss": 3.0344, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00011593085708061366, | |
| "loss": 3.0389, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011567948922050825, | |
| "loss": 3.0361, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011542812136040285, | |
| "loss": 3.0329, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011517675350029745, | |
| "loss": 3.0304, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011492538564019204, | |
| "loss": 3.0316, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011467401778008663, | |
| "loss": 3.0403, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.43146250575668055, | |
| "eval_loss": 3.0502421855926514, | |
| "eval_runtime": 36.2647, | |
| "eval_samples_per_second": 309.281, | |
| "eval_steps_per_second": 2.592, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011442516359858228, | |
| "loss": 3.0443, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011417379573847687, | |
| "loss": 3.0376, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011392242787837146, | |
| "loss": 3.0313, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011367106001826606, | |
| "loss": 3.0429, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011341969215816065, | |
| "loss": 3.0342, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011316832429805525, | |
| "loss": 3.0335, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 0.00011291695643794984, | |
| "loss": 3.0375, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011266558857784443, | |
| "loss": 3.0247, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.000112414220717739, | |
| "loss": 3.0309, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011216285285763361, | |
| "loss": 3.0326, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_accuracy": 0.43156359868354544, | |
| "eval_loss": 3.049257278442383, | |
| "eval_runtime": 36.2389, | |
| "eval_samples_per_second": 309.501, | |
| "eval_steps_per_second": 2.594, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.0001119114849975282, | |
| "loss": 3.0389, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011166011713742279, | |
| "loss": 3.0309, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011141126295591844, | |
| "loss": 3.0375, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011115989509581303, | |
| "loss": 3.0351, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011090852723570762, | |
| "loss": 3.0324, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011065715937560223, | |
| "loss": 3.0369, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011040579151549682, | |
| "loss": 3.0289, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00011015442365539141, | |
| "loss": 3.0346, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.000109903055795286, | |
| "loss": 3.0234, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001096516879351806, | |
| "loss": 3.0322, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.4314962033989688, | |
| "eval_loss": 3.0480940341949463, | |
| "eval_runtime": 35.8603, | |
| "eval_samples_per_second": 312.77, | |
| "eval_steps_per_second": 2.621, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.0001094003200750752, | |
| "loss": 3.027, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010914895221496979, | |
| "loss": 3.03, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010890009803346544, | |
| "loss": 3.0236, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010864873017336003, | |
| "loss": 3.0343, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010839736231325462, | |
| "loss": 3.0335, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010814599445314921, | |
| "loss": 3.0263, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010789462659304382, | |
| "loss": 3.0269, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010764325873293841, | |
| "loss": 3.0391, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.000107391890872833, | |
| "loss": 3.0361, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010714052301272759, | |
| "loss": 3.0265, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.4318640693272827, | |
| "eval_loss": 3.0469460487365723, | |
| "eval_runtime": 37.1071, | |
| "eval_samples_per_second": 302.26, | |
| "eval_steps_per_second": 2.533, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00010688915515262217, | |
| "loss": 3.0313, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010663778729251676, | |
| "loss": 3.0319, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010638641943241136, | |
| "loss": 3.0247, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010613505157230595, | |
| "loss": 3.0264, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001058861973908016, | |
| "loss": 3.0262, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0001056348295306962, | |
| "loss": 3.0327, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010538346167059079, | |
| "loss": 3.0318, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010513209381048538, | |
| "loss": 3.0356, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010488072595037997, | |
| "loss": 3.0374, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010462935809027457, | |
| "loss": 3.0231, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.43201430464915136, | |
| "eval_loss": 3.045305013656616, | |
| "eval_runtime": 37.1474, | |
| "eval_samples_per_second": 301.933, | |
| "eval_steps_per_second": 2.53, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010437799023016916, | |
| "loss": 3.0296, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010412662237006376, | |
| "loss": 3.025, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00010387525450995835, | |
| "loss": 3.0329, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010362388664985294, | |
| "loss": 3.0268, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010337251878974754, | |
| "loss": 3.0259, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010312115092964213, | |
| "loss": 3.0298, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010286978306953673, | |
| "loss": 3.0296, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010261841520943132, | |
| "loss": 3.0291, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001023670473493259, | |
| "loss": 3.0371, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.0001021156794892205, | |
| "loss": 3.0259, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.43211188740494455, | |
| "eval_loss": 3.044191837310791, | |
| "eval_runtime": 37.3457, | |
| "eval_samples_per_second": 300.329, | |
| "eval_steps_per_second": 2.517, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010186431162911509, | |
| "loss": 3.0266, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010161294376900968, | |
| "loss": 3.0272, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010136157590890427, | |
| "loss": 3.0191, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00010111020804879886, | |
| "loss": 3.0178, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00010085884018869347, | |
| "loss": 3.0178, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.0001006099860071891, | |
| "loss": 3.0264, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00010036113182568475, | |
| "loss": 3.0172, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00010010976396557934, | |
| "loss": 3.0276, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.985839610547395e-05, | |
| "loss": 3.0254, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.960702824536854e-05, | |
| "loss": 3.0219, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.43250292046233163, | |
| "eval_loss": 3.0422935485839844, | |
| "eval_runtime": 37.0202, | |
| "eval_samples_per_second": 302.97, | |
| "eval_steps_per_second": 2.539, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.935566038526313e-05, | |
| "loss": 3.0265, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.910429252515772e-05, | |
| "loss": 3.025, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.885292466505231e-05, | |
| "loss": 3.0164, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.860155680494692e-05, | |
| "loss": 3.0307, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.835018894484151e-05, | |
| "loss": 3.0268, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.80988210847361e-05, | |
| "loss": 3.0261, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.784745322463069e-05, | |
| "loss": 3.0213, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.75960853645253e-05, | |
| "loss": 3.0222, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.734471750441989e-05, | |
| "loss": 3.0249, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.709334964431448e-05, | |
| "loss": 3.0233, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.4324165702539679, | |
| "eval_loss": 3.0414962768554688, | |
| "eval_runtime": 37.0887, | |
| "eval_samples_per_second": 302.41, | |
| "eval_steps_per_second": 2.534, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.684198178420906e-05, | |
| "loss": 3.0177, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.659061392410365e-05, | |
| "loss": 3.0309, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.633924606399824e-05, | |
| "loss": 3.0245, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.608787820389284e-05, | |
| "loss": 3.0287, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.583651034378743e-05, | |
| "loss": 3.0152, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.558514248368203e-05, | |
| "loss": 3.0204, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.533377462357662e-05, | |
| "loss": 3.0258, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 9.508240676347121e-05, | |
| "loss": 3.0255, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.483103890336581e-05, | |
| "loss": 3.0245, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.45796710432604e-05, | |
| "loss": 3.0261, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.43273810192413537, | |
| "eval_loss": 3.040773868560791, | |
| "eval_runtime": 36.3004, | |
| "eval_samples_per_second": 308.977, | |
| "eval_steps_per_second": 2.59, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.433081686175605e-05, | |
| "loss": 3.0236, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.407944900165064e-05, | |
| "loss": 3.0339, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.382808114154523e-05, | |
| "loss": 3.021, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.357671328143983e-05, | |
| "loss": 3.0208, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.332534542133443e-05, | |
| "loss": 3.0175, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.307397756122902e-05, | |
| "loss": 3.0294, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.282260970112361e-05, | |
| "loss": 3.0258, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.25712418410182e-05, | |
| "loss": 3.0144, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.231987398091278e-05, | |
| "loss": 3.016, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 9.206850612080737e-05, | |
| "loss": 3.0221, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.43296696507801, | |
| "eval_loss": 3.038726806640625, | |
| "eval_runtime": 36.1807, | |
| "eval_samples_per_second": 309.999, | |
| "eval_steps_per_second": 2.598, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.181965193930304e-05, | |
| "loss": 3.0217, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.156828407919761e-05, | |
| "loss": 3.0149, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.131691621909222e-05, | |
| "loss": 3.0247, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.106554835898681e-05, | |
| "loss": 3.021, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.081669417748246e-05, | |
| "loss": 3.0239, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.056532631737705e-05, | |
| "loss": 3.0349, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.031395845727164e-05, | |
| "loss": 3.026, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.006259059716623e-05, | |
| "loss": 3.0178, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.981122273706082e-05, | |
| "loss": 3.0249, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.955985487695543e-05, | |
| "loss": 3.0296, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 0.43312211463937905, | |
| "eval_loss": 3.0376861095428467, | |
| "eval_runtime": 38.9475, | |
| "eval_samples_per_second": 287.978, | |
| "eval_steps_per_second": 2.414, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.930848701685002e-05, | |
| "loss": 3.0205, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.905711915674461e-05, | |
| "loss": 3.0214, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.88057512966392e-05, | |
| "loss": 3.0283, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.85543834365338e-05, | |
| "loss": 3.0163, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.83030155764284e-05, | |
| "loss": 3.02, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.805164771632299e-05, | |
| "loss": 3.0189, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.780027985621758e-05, | |
| "loss": 3.0167, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.754891199611217e-05, | |
| "loss": 3.0177, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.729754413600678e-05, | |
| "loss": 3.0226, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.704617627590137e-05, | |
| "loss": 3.0186, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.4335391229626967, | |
| "eval_loss": 3.03602933883667, | |
| "eval_runtime": 36.1657, | |
| "eval_samples_per_second": 310.128, | |
| "eval_steps_per_second": 2.599, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.679480841579594e-05, | |
| "loss": 3.0144, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.65459542342916e-05, | |
| "loss": 3.0128, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.629458637418618e-05, | |
| "loss": 3.0189, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 8.604321851408077e-05, | |
| "loss": 3.0231, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.579185065397537e-05, | |
| "loss": 3.0161, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.554048279386996e-05, | |
| "loss": 3.0188, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.528911493376456e-05, | |
| "loss": 3.027, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.503774707365915e-05, | |
| "loss": 3.017, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.478637921355374e-05, | |
| "loss": 3.0173, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.453501135344834e-05, | |
| "loss": 3.0151, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 0.43330745167196466, | |
| "eval_loss": 3.034996747970581, | |
| "eval_runtime": 36.1826, | |
| "eval_samples_per_second": 309.983, | |
| "eval_steps_per_second": 2.598, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.428364349334294e-05, | |
| "loss": 3.0227, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.403227563323753e-05, | |
| "loss": 3.0163, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.378090777313212e-05, | |
| "loss": 3.0096, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.352953991302671e-05, | |
| "loss": 3.0147, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.32781720529213e-05, | |
| "loss": 3.0051, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 8.302680419281591e-05, | |
| "loss": 3.0201, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.277795001131154e-05, | |
| "loss": 3.0169, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.252658215120615e-05, | |
| "loss": 3.008, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.227521429110074e-05, | |
| "loss": 3.0117, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.202384643099533e-05, | |
| "loss": 3.0121, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.43354333516798277, | |
| "eval_loss": 3.033334493637085, | |
| "eval_runtime": 37.3178, | |
| "eval_samples_per_second": 300.553, | |
| "eval_steps_per_second": 2.519, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.177247857088992e-05, | |
| "loss": 3.014, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.15211107107845e-05, | |
| "loss": 3.0168, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.126974285067909e-05, | |
| "loss": 3.0092, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.10183749905737e-05, | |
| "loss": 3.0231, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.076700713046829e-05, | |
| "loss": 3.0133, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.051563927036288e-05, | |
| "loss": 3.0135, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.026427141025747e-05, | |
| "loss": 3.0188, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 8.001290355015206e-05, | |
| "loss": 3.0151, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.976153569004667e-05, | |
| "loss": 3.0211, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.951016782994126e-05, | |
| "loss": 3.0142, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.4337988756220023, | |
| "eval_loss": 3.032519817352295, | |
| "eval_runtime": 37.5602, | |
| "eval_samples_per_second": 298.614, | |
| "eval_steps_per_second": 2.503, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.925879996983585e-05, | |
| "loss": 3.0117, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.900743210973044e-05, | |
| "loss": 3.0092, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.875606424962505e-05, | |
| "loss": 3.0124, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.850469638951964e-05, | |
| "loss": 3.0104, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.825584220801528e-05, | |
| "loss": 3.0136, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.800447434790988e-05, | |
| "loss": 3.0186, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.775310648780447e-05, | |
| "loss": 3.0107, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.750173862769906e-05, | |
| "loss": 3.0129, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.725037076759365e-05, | |
| "loss": 3.0117, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.699900290748825e-05, | |
| "loss": 3.0088, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.4338164264773608, | |
| "eval_loss": 3.031200647354126, | |
| "eval_runtime": 36.6187, | |
| "eval_samples_per_second": 306.292, | |
| "eval_steps_per_second": 2.567, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.674763504738283e-05, | |
| "loss": 3.0107, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.649626718727742e-05, | |
| "loss": 3.0104, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.624489932717202e-05, | |
| "loss": 3.0141, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.59935314670666e-05, | |
| "loss": 3.0263, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.57421636069612e-05, | |
| "loss": 3.0093, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.54907957468558e-05, | |
| "loss": 3.0057, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.52394278867504e-05, | |
| "loss": 3.0104, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.498806002664499e-05, | |
| "loss": 3.0202, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.473669216653958e-05, | |
| "loss": 3.0118, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.448532430643417e-05, | |
| "loss": 3.0087, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.43394138856751324, | |
| "eval_loss": 3.0297725200653076, | |
| "eval_runtime": 36.7079, | |
| "eval_samples_per_second": 305.547, | |
| "eval_steps_per_second": 2.561, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.423395644632877e-05, | |
| "loss": 3.0163, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 7.398258858622336e-05, | |
| "loss": 3.0168, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.373122072611796e-05, | |
| "loss": 3.0145, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.347985286601255e-05, | |
| "loss": 3.0112, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.322848500590714e-05, | |
| "loss": 3.0094, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.297711714580173e-05, | |
| "loss": 3.0129, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.272574928569632e-05, | |
| "loss": 3.0033, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.247438142559092e-05, | |
| "loss": 3.0115, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.222301356548552e-05, | |
| "loss": 3.0075, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.197164570538011e-05, | |
| "loss": 3.0134, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.43423554090332145, | |
| "eval_loss": 3.0285885334014893, | |
| "eval_runtime": 36.591, | |
| "eval_samples_per_second": 306.523, | |
| "eval_steps_per_second": 2.569, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.172279152387576e-05, | |
| "loss": 3.019, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.147142366377035e-05, | |
| "loss": 3.0166, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.122005580366494e-05, | |
| "loss": 3.0114, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 7.097120162216059e-05, | |
| "loss": 3.015, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.071983376205518e-05, | |
| "loss": 3.0123, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.046846590194977e-05, | |
| "loss": 3.007, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 7.021709804184436e-05, | |
| "loss": 3.005, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.996573018173895e-05, | |
| "loss": 3.0122, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.971436232163356e-05, | |
| "loss": 3.0069, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.946299446152815e-05, | |
| "loss": 3.0136, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.43437735181461806, | |
| "eval_loss": 3.0268590450286865, | |
| "eval_runtime": 36.7262, | |
| "eval_samples_per_second": 305.395, | |
| "eval_steps_per_second": 2.559, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.921162660142274e-05, | |
| "loss": 3.0063, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.896025874131733e-05, | |
| "loss": 3.007, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.870889088121192e-05, | |
| "loss": 3.0132, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.845752302110651e-05, | |
| "loss": 3.0145, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.82061551610011e-05, | |
| "loss": 3.0116, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.79547873008957e-05, | |
| "loss": 3.0138, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.77034194407903e-05, | |
| "loss": 3.0075, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.745205158068489e-05, | |
| "loss": 3.0098, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.720068372057948e-05, | |
| "loss": 3.0058, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.694931586047407e-05, | |
| "loss": 3.0043, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.43468133262942704, | |
| "eval_loss": 3.0255324840545654, | |
| "eval_runtime": 36.1761, | |
| "eval_samples_per_second": 310.039, | |
| "eval_steps_per_second": 2.598, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.669794800036866e-05, | |
| "loss": 3.0167, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.644658014026327e-05, | |
| "loss": 3.0077, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.619521228015785e-05, | |
| "loss": 3.0087, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.594384442005244e-05, | |
| "loss": 3.0137, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.569499023854809e-05, | |
| "loss": 3.015, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.544362237844268e-05, | |
| "loss": 3.0046, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.519225451833728e-05, | |
| "loss": 3.0015, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 6.494088665823187e-05, | |
| "loss": 3.0074, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.468951879812646e-05, | |
| "loss": 3.0082, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.443815093802106e-05, | |
| "loss": 2.9995, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.43484701270401116, | |
| "eval_loss": 3.023953914642334, | |
| "eval_runtime": 36.328, | |
| "eval_samples_per_second": 308.742, | |
| "eval_steps_per_second": 2.588, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.418678307791566e-05, | |
| "loss": 3.0039, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.393541521781025e-05, | |
| "loss": 3.0095, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.368404735770483e-05, | |
| "loss": 3.0028, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.343267949759943e-05, | |
| "loss": 3.0082, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.318131163749403e-05, | |
| "loss": 3.0069, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.292994377738862e-05, | |
| "loss": 3.0004, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.267857591728321e-05, | |
| "loss": 3.0087, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.24272080571778e-05, | |
| "loss": 3.0062, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.21758401970724e-05, | |
| "loss": 3.0113, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 6.1924472336967e-05, | |
| "loss": 3.001, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_accuracy": 0.434945999528233, | |
| "eval_loss": 3.0230536460876465, | |
| "eval_runtime": 36.4523, | |
| "eval_samples_per_second": 307.69, | |
| "eval_steps_per_second": 2.579, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.167310447686157e-05, | |
| "loss": 3.0026, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.142173661675618e-05, | |
| "loss": 3.01, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.117036875665077e-05, | |
| "loss": 3.0073, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.091900089654536e-05, | |
| "loss": 3.0101, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.067014671504101e-05, | |
| "loss": 3.0013, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.04187788549356e-05, | |
| "loss": 3.004, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.01674109948302e-05, | |
| "loss": 3.0042, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.991604313472479e-05, | |
| "loss": 3.0081, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.966467527461938e-05, | |
| "loss": 3.0048, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.941582109311503e-05, | |
| "loss": 3.007, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.4351959237085379, | |
| "eval_loss": 3.02174973487854, | |
| "eval_runtime": 36.367, | |
| "eval_samples_per_second": 308.412, | |
| "eval_steps_per_second": 2.585, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.9164453233009627e-05, | |
| "loss": 3.006, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 5.891308537290422e-05, | |
| "loss": 3.0025, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.866171751279881e-05, | |
| "loss": 3.006, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.841034965269341e-05, | |
| "loss": 2.9956, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.815898179258799e-05, | |
| "loss": 2.9968, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.790761393248258e-05, | |
| "loss": 3.0023, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.765624607237718e-05, | |
| "loss": 3.0014, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.740487821227177e-05, | |
| "loss": 2.9961, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.715351035216637e-05, | |
| "loss": 3.0024, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.690214249206096e-05, | |
| "loss": 3.0035, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.43532720410661935, | |
| "eval_loss": 3.02020263671875, | |
| "eval_runtime": 37.453, | |
| "eval_samples_per_second": 299.469, | |
| "eval_steps_per_second": 2.51, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.665328831055661e-05, | |
| "loss": 3.0032, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.64019204504512e-05, | |
| "loss": 2.9961, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.61505525903458e-05, | |
| "loss": 3.0048, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.589918473024039e-05, | |
| "loss": 2.9939, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.565033054873604e-05, | |
| "loss": 2.9995, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.539896268863063e-05, | |
| "loss": 3.0067, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.514759482852523e-05, | |
| "loss": 2.9924, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.489622696841981e-05, | |
| "loss": 2.9997, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.46448591083144e-05, | |
| "loss": 3.0077, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.4393491248209e-05, | |
| "loss": 2.9966, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_accuracy": 0.43553711233670683, | |
| "eval_loss": 3.019421100616455, | |
| "eval_runtime": 36.419, | |
| "eval_samples_per_second": 307.971, | |
| "eval_steps_per_second": 2.581, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.414212338810359e-05, | |
| "loss": 3.0027, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.3890755527998184e-05, | |
| "loss": 3.0008, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.363938766789278e-05, | |
| "loss": 3.0019, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.338801980778737e-05, | |
| "loss": 2.9993, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.313665194768197e-05, | |
| "loss": 3.0025, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 5.2885284087576555e-05, | |
| "loss": 2.9987, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.263391622747115e-05, | |
| "loss": 3.0054, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.2382548367365745e-05, | |
| "loss": 3.0064, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.2131180507260336e-05, | |
| "loss": 3.0096, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.1879812647154934e-05, | |
| "loss": 2.9881, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.4356613723926449, | |
| "eval_loss": 3.0177648067474365, | |
| "eval_runtime": 37.6095, | |
| "eval_samples_per_second": 298.223, | |
| "eval_steps_per_second": 2.499, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.1628444787049525e-05, | |
| "loss": 3.0002, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.1377076926944117e-05, | |
| "loss": 2.9966, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.1128222745439764e-05, | |
| "loss": 3.0012, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.0876854885334356e-05, | |
| "loss": 2.9964, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.0625487025228954e-05, | |
| "loss": 2.9981, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.037411916512354e-05, | |
| "loss": 2.9986, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.012275130501813e-05, | |
| "loss": 2.9981, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 4.987138344491273e-05, | |
| "loss": 3.0057, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.962001558480732e-05, | |
| "loss": 2.994, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.936864772470192e-05, | |
| "loss": 3.0028, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.43574631853258, | |
| "eval_loss": 3.0173962116241455, | |
| "eval_runtime": 36.2768, | |
| "eval_samples_per_second": 309.179, | |
| "eval_steps_per_second": 2.591, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.911727986459651e-05, | |
| "loss": 3.0028, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.8865912004491106e-05, | |
| "loss": 2.9969, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.86145441443857e-05, | |
| "loss": 3.0029, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.836317628428029e-05, | |
| "loss": 3.0033, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.811180842417488e-05, | |
| "loss": 2.9945, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.786044056406947e-05, | |
| "loss": 2.9985, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.760907270396406e-05, | |
| "loss": 2.9952, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.735770484385866e-05, | |
| "loss": 2.9859, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.710633698375325e-05, | |
| "loss": 2.9951, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.685496912364785e-05, | |
| "loss": 2.9933, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.4362117672166871, | |
| "eval_loss": 3.01594614982605, | |
| "eval_runtime": 36.0518, | |
| "eval_samples_per_second": 311.108, | |
| "eval_steps_per_second": 2.607, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.660360126354244e-05, | |
| "loss": 2.9979, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.635223340343704e-05, | |
| "loss": 2.9961, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.6100865543331624e-05, | |
| "loss": 3.0076, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.5849497683226215e-05, | |
| "loss": 3.0, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.559812982312081e-05, | |
| "loss": 2.9964, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.5346761963015404e-05, | |
| "loss": 2.9951, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.5095394102909996e-05, | |
| "loss": 2.9964, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.4844026242804594e-05, | |
| "loss": 3.0034, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.4592658382699185e-05, | |
| "loss": 2.994, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.434129052259378e-05, | |
| "loss": 3.0002, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 0.43605310748424636, | |
| "eval_loss": 3.01462721824646, | |
| "eval_runtime": 36.4761, | |
| "eval_samples_per_second": 307.489, | |
| "eval_steps_per_second": 2.577, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.408992266248837e-05, | |
| "loss": 2.9951, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 4.383855480238296e-05, | |
| "loss": 2.9959, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.358718694227756e-05, | |
| "loss": 2.9922, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.333581908217215e-05, | |
| "loss": 2.9888, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.3084451222066746e-05, | |
| "loss": 2.9951, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.283559704056239e-05, | |
| "loss": 2.994, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.2586742859058035e-05, | |
| "loss": 2.9969, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.2335374998952626e-05, | |
| "loss": 2.9959, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.2084007138847224e-05, | |
| "loss": 3.0063, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.1832639278741816e-05, | |
| "loss": 2.9901, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.4364076347624878, | |
| "eval_loss": 3.0131843090057373, | |
| "eval_runtime": 36.6938, | |
| "eval_samples_per_second": 305.665, | |
| "eval_steps_per_second": 2.562, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.1581271418636414e-05, | |
| "loss": 2.996, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.1329903558531005e-05, | |
| "loss": 2.9928, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.1078535698425596e-05, | |
| "loss": 2.9981, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.082716783832019e-05, | |
| "loss": 2.9999, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.057579997821478e-05, | |
| "loss": 2.9879, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.032443211810937e-05, | |
| "loss": 2.9927, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.007306425800397e-05, | |
| "loss": 2.997, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.982169639789856e-05, | |
| "loss": 2.9899, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.957032853779316e-05, | |
| "loss": 3.0014, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.931896067768775e-05, | |
| "loss": 2.9895, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.4363837655992002, | |
| "eval_loss": 3.012049674987793, | |
| "eval_runtime": 36.7749, | |
| "eval_samples_per_second": 304.99, | |
| "eval_steps_per_second": 2.556, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.906759281758235e-05, | |
| "loss": 3.0, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.881622495747693e-05, | |
| "loss": 2.9981, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.856485709737152e-05, | |
| "loss": 2.9975, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.831600291586718e-05, | |
| "loss": 3.0007, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.806463505576176e-05, | |
| "loss": 2.9958, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 3.781326719565636e-05, | |
| "loss": 2.9939, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.756189933555095e-05, | |
| "loss": 2.9936, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.731053147544555e-05, | |
| "loss": 3.0004, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.705916361534014e-05, | |
| "loss": 2.9945, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.680779575523473e-05, | |
| "loss": 2.9882, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.4366730036955081, | |
| "eval_loss": 3.0106048583984375, | |
| "eval_runtime": 36.7107, | |
| "eval_samples_per_second": 305.524, | |
| "eval_steps_per_second": 2.561, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.655642789512932e-05, | |
| "loss": 2.9919, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.630506003502392e-05, | |
| "loss": 2.9894, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.605369217491851e-05, | |
| "loss": 3.0005, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.580483799341416e-05, | |
| "loss": 2.9884, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.555347013330875e-05, | |
| "loss": 2.9865, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.530210227320335e-05, | |
| "loss": 2.9909, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.5050734413097934e-05, | |
| "loss": 2.9961, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.479936655299253e-05, | |
| "loss": 2.9905, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.454799869288712e-05, | |
| "loss": 2.9913, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.429914451138277e-05, | |
| "loss": 2.9866, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_accuracy": 0.4369524133128152, | |
| "eval_loss": 3.008857250213623, | |
| "eval_runtime": 36.004, | |
| "eval_samples_per_second": 311.521, | |
| "eval_steps_per_second": 2.611, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.404777665127736e-05, | |
| "loss": 2.9893, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.379640879117196e-05, | |
| "loss": 2.989, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.354504093106655e-05, | |
| "loss": 2.9886, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.329367307096114e-05, | |
| "loss": 2.9835, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.3042305210855734e-05, | |
| "loss": 2.9918, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.279093735075033e-05, | |
| "loss": 2.9855, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.2539569490644923e-05, | |
| "loss": 2.9895, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.229071530914057e-05, | |
| "loss": 2.9791, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.203934744903516e-05, | |
| "loss": 2.9955, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.178797958892976e-05, | |
| "loss": 2.9961, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.43725920226448156, | |
| "eval_loss": 3.007978677749634, | |
| "eval_runtime": 36.4349, | |
| "eval_samples_per_second": 307.837, | |
| "eval_steps_per_second": 2.58, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.153661172882435e-05, | |
| "loss": 2.9921, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.128524386871894e-05, | |
| "loss": 2.9937, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.1033876008613534e-05, | |
| "loss": 2.9894, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.078250814850813e-05, | |
| "loss": 2.9919, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.0531140288402724e-05, | |
| "loss": 2.9906, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.0279772428297315e-05, | |
| "loss": 2.9839, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.002840456819191e-05, | |
| "loss": 2.9871, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.9777036708086504e-05, | |
| "loss": 2.9891, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.9525668847981092e-05, | |
| "loss": 2.9898, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.9274300987875687e-05, | |
| "loss": 2.9876, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.4373946948678491, | |
| "eval_loss": 3.0067296028137207, | |
| "eval_runtime": 36.3734, | |
| "eval_samples_per_second": 308.357, | |
| "eval_steps_per_second": 2.584, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.902293312777028e-05, | |
| "loss": 2.9917, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.8771565267664876e-05, | |
| "loss": 2.9916, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.8520197407559464e-05, | |
| "loss": 2.9981, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.826882954745406e-05, | |
| "loss": 2.9817, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.8017461687348653e-05, | |
| "loss": 2.9875, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.7766093827243248e-05, | |
| "loss": 2.9904, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.751472596713784e-05, | |
| "loss": 2.9882, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.7265871785633487e-05, | |
| "loss": 2.9885, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.701450392552808e-05, | |
| "loss": 2.9898, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.6763136065422673e-05, | |
| "loss": 2.9873, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 0.43755826883979015, | |
| "eval_loss": 3.0054852962493896, | |
| "eval_runtime": 36.8956, | |
| "eval_samples_per_second": 303.993, | |
| "eval_steps_per_second": 2.548, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.6511768205317264e-05, | |
| "loss": 2.9921, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.626040034521186e-05, | |
| "loss": 2.9863, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.600903248510645e-05, | |
| "loss": 2.9902, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.5757664625001045e-05, | |
| "loss": 2.9823, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.5508810443496693e-05, | |
| "loss": 2.9978, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.5257442583391284e-05, | |
| "loss": 2.9859, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.500607472328588e-05, | |
| "loss": 2.9821, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.4754706863180473e-05, | |
| "loss": 2.9932, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.4503339003075065e-05, | |
| "loss": 2.9906, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.4251971142969656e-05, | |
| "loss": 2.9891, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.4375182528895728, | |
| "eval_loss": 3.004079580307007, | |
| "eval_runtime": 36.2219, | |
| "eval_samples_per_second": 309.647, | |
| "eval_steps_per_second": 2.595, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.400060328286425e-05, | |
| "loss": 2.9875, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3749235422758845e-05, | |
| "loss": 2.9859, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.349786756265344e-05, | |
| "loss": 2.9865, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.3246499702548028e-05, | |
| "loss": 2.994, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 2.2995131842442623e-05, | |
| "loss": 2.9817, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.2743763982337217e-05, | |
| "loss": 2.9915, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.2492396122231812e-05, | |
| "loss": 2.9927, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.22410282621264e-05, | |
| "loss": 2.9908, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.1989660402020994e-05, | |
| "loss": 2.9897, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.173829254191559e-05, | |
| "loss": 2.9835, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.4377632628303773, | |
| "eval_loss": 3.0032153129577637, | |
| "eval_runtime": 36.5662, | |
| "eval_samples_per_second": 306.731, | |
| "eval_steps_per_second": 2.571, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.1486924681810184e-05, | |
| "loss": 2.9787, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.123555682170477e-05, | |
| "loss": 2.9831, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.0984188961599366e-05, | |
| "loss": 2.9913, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.073282110149396e-05, | |
| "loss": 2.9904, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.0481453241388556e-05, | |
| "loss": 2.9842, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.0230085381283147e-05, | |
| "loss": 2.987, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 1.9978717521177738e-05, | |
| "loss": 2.9868, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.9727349661072333e-05, | |
| "loss": 2.9887, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.9475981800966928e-05, | |
| "loss": 2.9844, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.922461394086152e-05, | |
| "loss": 2.9887, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.4380391622766127, | |
| "eval_loss": 3.0022435188293457, | |
| "eval_runtime": 36.4456, | |
| "eval_samples_per_second": 307.746, | |
| "eval_steps_per_second": 2.579, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.8973246080756113e-05, | |
| "loss": 2.9792, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.8721878220650705e-05, | |
| "loss": 2.9813, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.84705103605453e-05, | |
| "loss": 2.9852, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.821914250043989e-05, | |
| "loss": 2.9927, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.797028831893554e-05, | |
| "loss": 2.9869, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.7718920458830133e-05, | |
| "loss": 2.9798, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.7467552598724724e-05, | |
| "loss": 2.982, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.721618473861932e-05, | |
| "loss": 2.9787, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.6964816878513914e-05, | |
| "loss": 2.9891, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.671596269700956e-05, | |
| "loss": 2.9876, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.43829610679906095, | |
| "eval_loss": 3.0009684562683105, | |
| "eval_runtime": 37.5779, | |
| "eval_samples_per_second": 298.473, | |
| "eval_steps_per_second": 2.501, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.6464594836904153e-05, | |
| "loss": 2.9809, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.6213226976798747e-05, | |
| "loss": 2.9933, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.596185911669334e-05, | |
| "loss": 2.9868, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5710491256587933e-05, | |
| "loss": 2.9867, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5459123396482525e-05, | |
| "loss": 2.9831, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5207755536377118e-05, | |
| "loss": 2.9857, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.495638767627171e-05, | |
| "loss": 2.9861, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4705019816166305e-05, | |
| "loss": 2.9797, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4453651956060897e-05, | |
| "loss": 2.9819, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.4202284095955491e-05, | |
| "loss": 2.9818, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.4384379177103575, | |
| "eval_loss": 2.9998745918273926, | |
| "eval_runtime": 36.4806, | |
| "eval_samples_per_second": 307.451, | |
| "eval_steps_per_second": 2.577, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.3950916235850083e-05, | |
| "loss": 2.9861, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.3699548375744677e-05, | |
| "loss": 2.9859, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.3448180515639268e-05, | |
| "loss": 2.9864, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.3196812655533863e-05, | |
| "loss": 2.9818, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.2945444795428454e-05, | |
| "loss": 2.9732, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.2694076935323049e-05, | |
| "loss": 2.9859, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.244270907521764e-05, | |
| "loss": 2.9828, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.2191341215112235e-05, | |
| "loss": 2.9837, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1939973355006828e-05, | |
| "loss": 2.9748, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1688605494901421e-05, | |
| "loss": 2.9797, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.43843651364192887, | |
| "eval_loss": 2.999021291732788, | |
| "eval_runtime": 36.1681, | |
| "eval_samples_per_second": 310.108, | |
| "eval_steps_per_second": 2.599, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1437237634796014e-05, | |
| "loss": 2.9813, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.1185869774690607e-05, | |
| "loss": 2.978, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.09345019145852e-05, | |
| "loss": 2.9886, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0683134054479795e-05, | |
| "loss": 2.9744, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0431766194374386e-05, | |
| "loss": 2.9804, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0182912012870034e-05, | |
| "loss": 2.984, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.931544152764628e-06, | |
| "loss": 2.985, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.68017629265922e-06, | |
| "loss": 2.9843, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.428808432553814e-06, | |
| "loss": 2.9809, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 9.177440572448405e-06, | |
| "loss": 2.9842, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.43876225751738235, | |
| "eval_loss": 2.9980885982513428, | |
| "eval_runtime": 36.1964, | |
| "eval_samples_per_second": 309.865, | |
| "eval_steps_per_second": 2.597, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.926072712342998e-06, | |
| "loss": 2.9702, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.674704852237591e-06, | |
| "loss": 2.9799, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.423336992132184e-06, | |
| "loss": 2.9825, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 8.171969132026777e-06, | |
| "loss": 2.9726, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 7.920601271921372e-06, | |
| "loss": 2.9788, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 7.669233411815965e-06, | |
| "loss": 2.988, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 7.417865551710557e-06, | |
| "loss": 2.9795, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 7.16649769160515e-06, | |
| "loss": 2.9797, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.915129831499744e-06, | |
| "loss": 2.9735, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.663761971394337e-06, | |
| "loss": 2.9739, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.43866397272737484, | |
| "eval_loss": 2.9972493648529053, | |
| "eval_runtime": 36.7078, | |
| "eval_samples_per_second": 305.548, | |
| "eval_steps_per_second": 2.561, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.41239411128893e-06, | |
| "loss": 2.9765, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 6.161026251183523e-06, | |
| "loss": 2.9859, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.909658391078116e-06, | |
| "loss": 2.9897, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.658290530972709e-06, | |
| "loss": 2.9855, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.406922670867302e-06, | |
| "loss": 2.9747, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 5.155554810761895e-06, | |
| "loss": 2.9732, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 4.9041869506564885e-06, | |
| "loss": 2.9796, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.6528190905510815e-06, | |
| "loss": 2.9782, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.4014512304456745e-06, | |
| "loss": 2.9841, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 4.150083370340268e-06, | |
| "loss": 2.9804, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.43883737517831667, | |
| "eval_loss": 2.9965155124664307, | |
| "eval_runtime": 36.1347, | |
| "eval_samples_per_second": 310.394, | |
| "eval_steps_per_second": 2.601, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.898715510234861e-06, | |
| "loss": 2.9836, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.6473476501294542e-06, | |
| "loss": 2.9815, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.398493468625101e-06, | |
| "loss": 2.9744, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.147125608519694e-06, | |
| "loss": 2.9847, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.8957577484142875e-06, | |
| "loss": 2.9733, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.6469035669099345e-06, | |
| "loss": 2.9766, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.395535706804528e-06, | |
| "loss": 2.9802, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.144167846699121e-06, | |
| "loss": 2.9757, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.8927999865937138e-06, | |
| "loss": 2.9775, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.641432126488307e-06, | |
| "loss": 2.9828, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.43901218169768724, | |
| "eval_loss": 2.995953321456909, | |
| "eval_runtime": 36.3994, | |
| "eval_samples_per_second": 308.137, | |
| "eval_steps_per_second": 2.582, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.3900642663829e-06, | |
| "loss": 2.9783, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.1386964062774932e-06, | |
| "loss": 2.9723, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 8.873285461720863e-07, | |
| "loss": 2.9817, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 6.359606860666794e-07, | |
| "loss": 2.9792, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3.8459282596127255e-07, | |
| "loss": 2.982, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 119547, | |
| "total_flos": 1.455921831670228e+20, | |
| "train_loss": 3.081914688561298, | |
| "train_runtime": 169290.0352, | |
| "train_samples_per_second": 169.48, | |
| "train_steps_per_second": 0.706 | |
| } | |
| ], | |
| "max_steps": 119547, | |
| "num_train_epochs": 1, | |
| "total_flos": 1.455921831670228e+20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |