Qwen3-4B-Code-Analyzer-Step2000 / trainer_state.json
FutureMa's picture
Upload Qwen3-4B-Code-Analyzer-Step2000 fine-tuned model
d71de19 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.79247152055473,
"eval_steps": 500.0,
"global_step": 2000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00039623576027736503,
"grad_norm": 23.5,
"learning_rate": 2.6315789473684213e-07,
"loss": 1.1837007999420166,
"step": 1,
"token_acc": 0.8159329621764334
},
{
"epoch": 0.00396235760277365,
"grad_norm": 21.375,
"learning_rate": 2.631578947368421e-06,
"loss": 1.123257319132487,
"step": 10,
"token_acc": 0.8210087927828165
},
{
"epoch": 0.0079247152055473,
"grad_norm": 7.09375,
"learning_rate": 5.263157894736842e-06,
"loss": 0.9101140975952149,
"step": 20,
"token_acc": 0.8285831313786395
},
{
"epoch": 0.01188707280832095,
"grad_norm": 2.8125,
"learning_rate": 7.894736842105265e-06,
"loss": 0.5795128822326661,
"step": 30,
"token_acc": 0.8505745886743207
},
{
"epoch": 0.0158494304110946,
"grad_norm": 1.7578125,
"learning_rate": 1.0526315789473684e-05,
"loss": 0.42301692962646487,
"step": 40,
"token_acc": 0.8767900103104593
},
{
"epoch": 0.01981178801386825,
"grad_norm": 1.4921875,
"learning_rate": 1.3157894736842108e-05,
"loss": 0.35302650928497314,
"step": 50,
"token_acc": 0.8897820845537251
},
{
"epoch": 0.0237741456166419,
"grad_norm": 1.375,
"learning_rate": 1.578947368421053e-05,
"loss": 0.33079302310943604,
"step": 60,
"token_acc": 0.8967102736745091
},
{
"epoch": 0.02773650321941555,
"grad_norm": 1.25,
"learning_rate": 1.8421052631578947e-05,
"loss": 0.31292335987091063,
"step": 70,
"token_acc": 0.9010626512129326
},
{
"epoch": 0.0316988608221892,
"grad_norm": 1.109375,
"learning_rate": 1.999986824534997e-05,
"loss": 0.3141467094421387,
"step": 80,
"token_acc": 0.9005368650633087
},
{
"epoch": 0.03566121842496285,
"grad_norm": 1.296875,
"learning_rate": 1.9998386045408938e-05,
"loss": 0.29496400356292723,
"step": 90,
"token_acc": 0.9055861965123218
},
{
"epoch": 0.0396235760277365,
"grad_norm": 1.28125,
"learning_rate": 1.999525719713366e-05,
"loss": 0.2913074970245361,
"step": 100,
"token_acc": 0.9076149509114921
},
{
"epoch": 0.04358593363051015,
"grad_norm": 1.6015625,
"learning_rate": 1.999048221581858e-05,
"loss": 0.2880474805831909,
"step": 110,
"token_acc": 0.9073922051522615
},
{
"epoch": 0.0475482912332838,
"grad_norm": 1.390625,
"learning_rate": 1.9984061887862118e-05,
"loss": 0.27746291160583497,
"step": 120,
"token_acc": 0.9101783276777932
},
{
"epoch": 0.05151064883605745,
"grad_norm": 1.171875,
"learning_rate": 1.9975997270637172e-05,
"loss": 0.273817777633667,
"step": 130,
"token_acc": 0.909736600422787
},
{
"epoch": 0.0554730064388311,
"grad_norm": 1.2578125,
"learning_rate": 1.9966289692316944e-05,
"loss": 0.2767889976501465,
"step": 140,
"token_acc": 0.9082912026144594
},
{
"epoch": 0.05943536404160475,
"grad_norm": 1.4921875,
"learning_rate": 1.9954940751656245e-05,
"loss": 0.27089781761169435,
"step": 150,
"token_acc": 0.9099060425408418
},
{
"epoch": 0.0633977216443784,
"grad_norm": 1.171875,
"learning_rate": 1.994195231772815e-05,
"loss": 0.25421991348266604,
"step": 160,
"token_acc": 0.9162766481231006
},
{
"epoch": 0.06736007924715205,
"grad_norm": 1.359375,
"learning_rate": 1.9927326529616203e-05,
"loss": 0.2611961841583252,
"step": 170,
"token_acc": 0.9147679722152482
},
{
"epoch": 0.0713224368499257,
"grad_norm": 1.4296875,
"learning_rate": 1.9911065796062137e-05,
"loss": 0.264358377456665,
"step": 180,
"token_acc": 0.9137104702605277
},
{
"epoch": 0.07528479445269935,
"grad_norm": 1.2265625,
"learning_rate": 1.9893172795069144e-05,
"loss": 0.27645695209503174,
"step": 190,
"token_acc": 0.9085774438661551
},
{
"epoch": 0.079247152055473,
"grad_norm": 1.3125,
"learning_rate": 1.9873650473460862e-05,
"loss": 0.2564415693283081,
"step": 200,
"token_acc": 0.9148068228524455
},
{
"epoch": 0.08320950965824665,
"grad_norm": 1.2265625,
"learning_rate": 1.9852502046396035e-05,
"loss": 0.2584503650665283,
"step": 210,
"token_acc": 0.9148747112137906
},
{
"epoch": 0.0871718672610203,
"grad_norm": 1.1171875,
"learning_rate": 1.982973099683902e-05,
"loss": 0.25623598098754885,
"step": 220,
"token_acc": 0.916684382955295
},
{
"epoch": 0.09113422486379395,
"grad_norm": 1.4296875,
"learning_rate": 1.980534107498616e-05,
"loss": 0.2456662178039551,
"step": 230,
"token_acc": 0.9188118082346068
},
{
"epoch": 0.0950965824665676,
"grad_norm": 1.4609375,
"learning_rate": 1.977933629764817e-05,
"loss": 0.2530802249908447,
"step": 240,
"token_acc": 0.9152495545682131
},
{
"epoch": 0.09905894006934125,
"grad_norm": 1.3203125,
"learning_rate": 1.9751720947588603e-05,
"loss": 0.24223690032958983,
"step": 250,
"token_acc": 0.9186887231706855
},
{
"epoch": 0.1030212976721149,
"grad_norm": 1.21875,
"learning_rate": 1.9722499572818496e-05,
"loss": 0.23485193252563477,
"step": 260,
"token_acc": 0.9216265054055996
},
{
"epoch": 0.10698365527488855,
"grad_norm": 1.5,
"learning_rate": 1.969167698584738e-05,
"loss": 0.24744803905487062,
"step": 270,
"token_acc": 0.9177383756974582
},
{
"epoch": 0.1109460128776622,
"grad_norm": 1.234375,
"learning_rate": 1.9659258262890683e-05,
"loss": 0.25014376640319824,
"step": 280,
"token_acc": 0.9170167948905685
},
{
"epoch": 0.11490837048043585,
"grad_norm": 1.5078125,
"learning_rate": 1.9625248743033725e-05,
"loss": 0.23340215682983398,
"step": 290,
"token_acc": 0.9214856049225197
},
{
"epoch": 0.1188707280832095,
"grad_norm": 1.171875,
"learning_rate": 1.9589654027352412e-05,
"loss": 0.24289028644561766,
"step": 300,
"token_acc": 0.9185406963850078
},
{
"epoch": 0.12283308568598315,
"grad_norm": 1.0625,
"learning_rate": 1.9552479977990802e-05,
"loss": 0.24520406723022461,
"step": 310,
"token_acc": 0.9184403422069023
},
{
"epoch": 0.1267954432887568,
"grad_norm": 1.359375,
"learning_rate": 1.9513732717195638e-05,
"loss": 0.2427917242050171,
"step": 320,
"token_acc": 0.9178514285714285
},
{
"epoch": 0.13075780089153047,
"grad_norm": 0.9609375,
"learning_rate": 1.9473418626308086e-05,
"loss": 0.21972455978393554,
"step": 330,
"token_acc": 0.9259012550960103
},
{
"epoch": 0.1347201584943041,
"grad_norm": 1.3984375,
"learning_rate": 1.9431544344712776e-05,
"loss": 0.2463603973388672,
"step": 340,
"token_acc": 0.9171354320865818
},
{
"epoch": 0.13868251609707777,
"grad_norm": 1.25,
"learning_rate": 1.9388116768744344e-05,
"loss": 0.23121447563171388,
"step": 350,
"token_acc": 0.9208610209876757
},
{
"epoch": 0.1426448736998514,
"grad_norm": 1.0546875,
"learning_rate": 1.9343143050551684e-05,
"loss": 0.2372572898864746,
"step": 360,
"token_acc": 0.9205740491816241
},
{
"epoch": 0.14660723130262507,
"grad_norm": 1.6328125,
"learning_rate": 1.929663059692002e-05,
"loss": 0.23370888233184814,
"step": 370,
"token_acc": 0.9218769547078884
},
{
"epoch": 0.1505695889053987,
"grad_norm": 1.3515625,
"learning_rate": 1.924858706805112e-05,
"loss": 0.22563014030456544,
"step": 380,
"token_acc": 0.9239206109486627
},
{
"epoch": 0.15453194650817237,
"grad_norm": 1.2890625,
"learning_rate": 1.9199020376301666e-05,
"loss": 0.22754812240600586,
"step": 390,
"token_acc": 0.923770752222635
},
{
"epoch": 0.158494304110946,
"grad_norm": 1.2109375,
"learning_rate": 1.9147938684880213e-05,
"loss": 0.233451247215271,
"step": 400,
"token_acc": 0.9208578517882449
},
{
"epoch": 0.16245666171371967,
"grad_norm": 1.359375,
"learning_rate": 1.9095350406502736e-05,
"loss": 0.22117164134979247,
"step": 410,
"token_acc": 0.9251948698253339
},
{
"epoch": 0.1664190193164933,
"grad_norm": 1.265625,
"learning_rate": 1.9041264202007158e-05,
"loss": 0.23051214218139648,
"step": 420,
"token_acc": 0.9227009356565836
},
{
"epoch": 0.17038137691926697,
"grad_norm": 1.296875,
"learning_rate": 1.8985688978926972e-05,
"loss": 0.22384767532348632,
"step": 430,
"token_acc": 0.9254292644524351
},
{
"epoch": 0.1743437345220406,
"grad_norm": 0.96484375,
"learning_rate": 1.892863389002424e-05,
"loss": 0.22796776294708251,
"step": 440,
"token_acc": 0.9236655948553054
},
{
"epoch": 0.17830609212481427,
"grad_norm": 1.1015625,
"learning_rate": 1.887010833178222e-05,
"loss": 0.2255650520324707,
"step": 450,
"token_acc": 0.9233627684120709
},
{
"epoch": 0.1822684497275879,
"grad_norm": 1.3828125,
"learning_rate": 1.8810121942857848e-05,
"loss": 0.21253745555877684,
"step": 460,
"token_acc": 0.9272634714542769
},
{
"epoch": 0.18623080733036157,
"grad_norm": 1.171875,
"learning_rate": 1.8748684602494327e-05,
"loss": 0.22184033393859864,
"step": 470,
"token_acc": 0.9256473357586134
},
{
"epoch": 0.1901931649331352,
"grad_norm": 1.046875,
"learning_rate": 1.8685806428894113e-05,
"loss": 0.2163544178009033,
"step": 480,
"token_acc": 0.92641120988206
},
{
"epoch": 0.19415552253590887,
"grad_norm": 1.234375,
"learning_rate": 1.8621497777552508e-05,
"loss": 0.2326265335083008,
"step": 490,
"token_acc": 0.9219484631704639
},
{
"epoch": 0.1981178801386825,
"grad_norm": 1.1328125,
"learning_rate": 1.8555769239552232e-05,
"loss": 0.21914072036743165,
"step": 500,
"token_acc": 0.9266210447862321
},
{
"epoch": 0.20208023774145617,
"grad_norm": 1.546875,
"learning_rate": 1.848863163981914e-05,
"loss": 0.22959327697753906,
"step": 510,
"token_acc": 0.9215090641842234
},
{
"epoch": 0.2060425953442298,
"grad_norm": 1.109375,
"learning_rate": 1.8420096035339454e-05,
"loss": 0.21052975654602052,
"step": 520,
"token_acc": 0.9286930380232219
},
{
"epoch": 0.21000495294700347,
"grad_norm": 1.0703125,
"learning_rate": 1.8350173713338777e-05,
"loss": 0.22955830097198487,
"step": 530,
"token_acc": 0.9225931053342221
},
{
"epoch": 0.2139673105497771,
"grad_norm": 1.2890625,
"learning_rate": 1.827887618942318e-05,
"loss": 0.21942346096038817,
"step": 540,
"token_acc": 0.9257000477242205
},
{
"epoch": 0.21792966815255077,
"grad_norm": 1.234375,
"learning_rate": 1.8206215205682683e-05,
"loss": 0.21607930660247804,
"step": 550,
"token_acc": 0.9265396164644921
},
{
"epoch": 0.2218920257553244,
"grad_norm": 1.359375,
"learning_rate": 1.8132202728757428e-05,
"loss": 0.21843266487121582,
"step": 560,
"token_acc": 0.9258849850056328
},
{
"epoch": 0.22585438335809807,
"grad_norm": 1.3671875,
"learning_rate": 1.805685094786689e-05,
"loss": 0.21874871253967285,
"step": 570,
"token_acc": 0.9250736338016231
},
{
"epoch": 0.2298167409608717,
"grad_norm": 1.1640625,
"learning_rate": 1.7980172272802398e-05,
"loss": 0.22817540168762207,
"step": 580,
"token_acc": 0.9221536778365731
},
{
"epoch": 0.23377909856364537,
"grad_norm": 1.3671875,
"learning_rate": 1.790217933188336e-05,
"loss": 0.20628876686096193,
"step": 590,
"token_acc": 0.9291559217209775
},
{
"epoch": 0.237741456166419,
"grad_norm": 1.2265625,
"learning_rate": 1.7822884969877493e-05,
"loss": 0.22458946704864502,
"step": 600,
"token_acc": 0.9231406464867372
},
{
"epoch": 0.24170381376919267,
"grad_norm": 1.3046875,
"learning_rate": 1.7742302245885384e-05,
"loss": 0.20527830123901367,
"step": 610,
"token_acc": 0.9306424304540271
},
{
"epoch": 0.2456661713719663,
"grad_norm": 1.1171875,
"learning_rate": 1.766044443118978e-05,
"loss": 0.2055346965789795,
"step": 620,
"token_acc": 0.9294153185205075
},
{
"epoch": 0.24962852897473997,
"grad_norm": 1.1640625,
"learning_rate": 1.7577325007069927e-05,
"loss": 0.21000022888183595,
"step": 630,
"token_acc": 0.9276756514760238
},
{
"epoch": 0.2535908865775136,
"grad_norm": 1.2265625,
"learning_rate": 1.7492957662581297e-05,
"loss": 0.20726590156555175,
"step": 640,
"token_acc": 0.9288681287625508
},
{
"epoch": 0.25755324418028724,
"grad_norm": 1.1328125,
"learning_rate": 1.7407356292301134e-05,
"loss": 0.20893335342407227,
"step": 650,
"token_acc": 0.9287459199802928
},
{
"epoch": 0.26151560178306094,
"grad_norm": 1.46875,
"learning_rate": 1.7320534994040148e-05,
"loss": 0.2122333526611328,
"step": 660,
"token_acc": 0.9268251113697004
},
{
"epoch": 0.26547795938583457,
"grad_norm": 2.21875,
"learning_rate": 1.7232508066520702e-05,
"loss": 0.2119227170944214,
"step": 670,
"token_acc": 0.9272324174995067
},
{
"epoch": 0.2694403169886082,
"grad_norm": 1.2890625,
"learning_rate": 1.7143290007021942e-05,
"loss": 0.2144456148147583,
"step": 680,
"token_acc": 0.9266572858854115
},
{
"epoch": 0.27340267459138184,
"grad_norm": 1.3515625,
"learning_rate": 1.7052895508992236e-05,
"loss": 0.20908637046813966,
"step": 690,
"token_acc": 0.9279253384640653
},
{
"epoch": 0.27736503219415554,
"grad_norm": 1.2734375,
"learning_rate": 1.696133945962927e-05,
"loss": 0.21407780647277833,
"step": 700,
"token_acc": 0.9275297697109584
},
{
"epoch": 0.2813273897969292,
"grad_norm": 1.3984375,
"learning_rate": 1.6868636937428254e-05,
"loss": 0.20272161960601806,
"step": 710,
"token_acc": 0.9313989228518674
},
{
"epoch": 0.2852897473997028,
"grad_norm": 1.359375,
"learning_rate": 1.677480320969865e-05,
"loss": 0.20830063819885253,
"step": 720,
"token_acc": 0.9284670505715276
},
{
"epoch": 0.2892521050024765,
"grad_norm": 1.3046875,
"learning_rate": 1.6679853730049743e-05,
"loss": 0.20571448802947997,
"step": 730,
"token_acc": 0.9288137503522119
},
{
"epoch": 0.29321446260525014,
"grad_norm": 1.4375,
"learning_rate": 1.6583804135845582e-05,
"loss": 0.207275128364563,
"step": 740,
"token_acc": 0.9295052506473598
},
{
"epoch": 0.2971768202080238,
"grad_norm": 1.1640625,
"learning_rate": 1.648667024562963e-05,
"loss": 0.2059840202331543,
"step": 750,
"token_acc": 0.9303702716282313
},
{
"epoch": 0.3011391778107974,
"grad_norm": 1.4296875,
"learning_rate": 1.638846805651961e-05,
"loss": 0.20929555892944335,
"step": 760,
"token_acc": 0.9285013576720667
},
{
"epoch": 0.3051015354135711,
"grad_norm": 1.2265625,
"learning_rate": 1.62892137415729e-05,
"loss": 0.2164773464202881,
"step": 770,
"token_acc": 0.9268445872201972
},
{
"epoch": 0.30906389301634474,
"grad_norm": 1.078125,
"learning_rate": 1.6188923647122946e-05,
"loss": 0.20146725177764893,
"step": 780,
"token_acc": 0.9308608962964089
},
{
"epoch": 0.3130262506191184,
"grad_norm": 1.1796875,
"learning_rate": 1.608761429008721e-05,
"loss": 0.19116392135620117,
"step": 790,
"token_acc": 0.9360810066351728
},
{
"epoch": 0.316988608221892,
"grad_norm": 1.40625,
"learning_rate": 1.5985302355246932e-05,
"loss": 0.19471538066864014,
"step": 800,
"token_acc": 0.9334035945789697
},
{
"epoch": 0.3209509658246657,
"grad_norm": 1.3984375,
"learning_rate": 1.5882004692499324e-05,
"loss": 0.20449495315551758,
"step": 810,
"token_acc": 0.9296946281131374
},
{
"epoch": 0.32491332342743934,
"grad_norm": 1.4140625,
"learning_rate": 1.5777738314082514e-05,
"loss": 0.2058267116546631,
"step": 820,
"token_acc": 0.930226312581988
},
{
"epoch": 0.328875681030213,
"grad_norm": 1.4296875,
"learning_rate": 1.567252039177378e-05,
"loss": 0.19794673919677735,
"step": 830,
"token_acc": 0.931884692988862
},
{
"epoch": 0.3328380386329866,
"grad_norm": 1.390625,
"learning_rate": 1.5566368254061505e-05,
"loss": 0.20482149124145507,
"step": 840,
"token_acc": 0.9305290785274152
},
{
"epoch": 0.3368003962357603,
"grad_norm": 1.53125,
"learning_rate": 1.5459299383291347e-05,
"loss": 0.19639644622802735,
"step": 850,
"token_acc": 0.9322417158382036
},
{
"epoch": 0.34076275383853394,
"grad_norm": 1.4375,
"learning_rate": 1.5351331412787004e-05,
"loss": 0.2021495819091797,
"step": 860,
"token_acc": 0.9298179216523921
},
{
"epoch": 0.3447251114413076,
"grad_norm": 1.1875,
"learning_rate": 1.52424821239462e-05,
"loss": 0.20063307285308837,
"step": 870,
"token_acc": 0.9313979538110527
},
{
"epoch": 0.3486874690440812,
"grad_norm": 1.6328125,
"learning_rate": 1.5132769443312207e-05,
"loss": 0.20427477359771729,
"step": 880,
"token_acc": 0.9299313715863092
},
{
"epoch": 0.3526498266468549,
"grad_norm": 1.4609375,
"learning_rate": 1.5022211439621521e-05,
"loss": 0.20063276290893556,
"step": 890,
"token_acc": 0.9309864789183134
},
{
"epoch": 0.35661218424962854,
"grad_norm": 1.328125,
"learning_rate": 1.4910826320828085e-05,
"loss": 0.19403212070465087,
"step": 900,
"token_acc": 0.9340383217142124
},
{
"epoch": 0.3605745418524022,
"grad_norm": 1.21875,
"learning_rate": 1.4798632431104591e-05,
"loss": 0.1897117853164673,
"step": 910,
"token_acc": 0.9360307874252368
},
{
"epoch": 0.3645368994551758,
"grad_norm": 1.4296875,
"learning_rate": 1.4685648247821376e-05,
"loss": 0.19313969612121581,
"step": 920,
"token_acc": 0.9329953036961753
},
{
"epoch": 0.3684992570579495,
"grad_norm": 1.0859375,
"learning_rate": 1.457189237850332e-05,
"loss": 0.203882098197937,
"step": 930,
"token_acc": 0.9312272344443193
},
{
"epoch": 0.37246161466072314,
"grad_norm": 0.875,
"learning_rate": 1.4457383557765385e-05,
"loss": 0.1886841893196106,
"step": 940,
"token_acc": 0.9355444372139664
},
{
"epoch": 0.3764239722634968,
"grad_norm": 1.359375,
"learning_rate": 1.4342140644227151e-05,
"loss": 0.1905367612838745,
"step": 950,
"token_acc": 0.9352085303078055
},
{
"epoch": 0.3803863298662704,
"grad_norm": 1.140625,
"learning_rate": 1.4226182617406996e-05,
"loss": 0.19780998229980468,
"step": 960,
"token_acc": 0.9324879595849204
},
{
"epoch": 0.3843486874690441,
"grad_norm": 1.3203125,
"learning_rate": 1.41095285745963e-05,
"loss": 0.19177125692367553,
"step": 970,
"token_acc": 0.9343932834841926
},
{
"epoch": 0.38831104507181774,
"grad_norm": 1.7578125,
"learning_rate": 1.399219772771431e-05,
"loss": 0.1960275650024414,
"step": 980,
"token_acc": 0.9329073312723757
},
{
"epoch": 0.3922734026745914,
"grad_norm": 1.34375,
"learning_rate": 1.3874209400144092e-05,
"loss": 0.18507509231567382,
"step": 990,
"token_acc": 0.9359859759133133
},
{
"epoch": 0.396235760277365,
"grad_norm": 1.2734375,
"learning_rate": 1.3755583023550128e-05,
"loss": 0.1876603364944458,
"step": 1000,
"token_acc": 0.9350970511384845
},
{
"epoch": 0.4001981178801387,
"grad_norm": 1.28125,
"learning_rate": 1.3636338134678104e-05,
"loss": 0.17850277423858643,
"step": 1010,
"token_acc": 0.9377877376733048
},
{
"epoch": 0.40416047548291234,
"grad_norm": 1.234375,
"learning_rate": 1.3516494372137368e-05,
"loss": 0.1958215355873108,
"step": 1020,
"token_acc": 0.9318651647470785
},
{
"epoch": 0.408122833085686,
"grad_norm": 1.6640625,
"learning_rate": 1.3396071473166614e-05,
"loss": 0.18602523803710938,
"step": 1030,
"token_acc": 0.9359838557500786
},
{
"epoch": 0.4120851906884596,
"grad_norm": 1.2109375,
"learning_rate": 1.327508927038334e-05,
"loss": 0.18929693698883057,
"step": 1040,
"token_acc": 0.9350099237438629
},
{
"epoch": 0.4160475482912333,
"grad_norm": 1.03125,
"learning_rate": 1.3153567688517567e-05,
"loss": 0.18981436491012574,
"step": 1050,
"token_acc": 0.934143741104814
},
{
"epoch": 0.42000990589400694,
"grad_norm": 1.3359375,
"learning_rate": 1.3031526741130435e-05,
"loss": 0.1816575288772583,
"step": 1060,
"token_acc": 0.9370538611291369
},
{
"epoch": 0.4239722634967806,
"grad_norm": 1.3203125,
"learning_rate": 1.2908986527318121e-05,
"loss": 0.19676063060760499,
"step": 1070,
"token_acc": 0.932801285003426
},
{
"epoch": 0.4279346210995542,
"grad_norm": 1.2734375,
"learning_rate": 1.2785967228401688e-05,
"loss": 0.19254275560379028,
"step": 1080,
"token_acc": 0.9333315147712704
},
{
"epoch": 0.4318969787023279,
"grad_norm": 1.5078125,
"learning_rate": 1.266248910460341e-05,
"loss": 0.18717528581619264,
"step": 1090,
"token_acc": 0.9360305301291446
},
{
"epoch": 0.43585933630510154,
"grad_norm": 1.734375,
"learning_rate": 1.2538572491710079e-05,
"loss": 0.1824967622756958,
"step": 1100,
"token_acc": 0.9372006812944594
},
{
"epoch": 0.4398216939078752,
"grad_norm": 1.0078125,
"learning_rate": 1.2414237797723876e-05,
"loss": 0.17919249534606935,
"step": 1110,
"token_acc": 0.9387596071733562
},
{
"epoch": 0.4437840515106488,
"grad_norm": 1.296875,
"learning_rate": 1.2289505499501341e-05,
"loss": 0.18926095962524414,
"step": 1120,
"token_acc": 0.9342525248667318
},
{
"epoch": 0.4477464091134225,
"grad_norm": 1.28125,
"learning_rate": 1.2164396139381029e-05,
"loss": 0.20064361095428468,
"step": 1130,
"token_acc": 0.9315847075431296
},
{
"epoch": 0.45170876671619614,
"grad_norm": 1.484375,
"learning_rate": 1.2038930321800346e-05,
"loss": 0.1895804524421692,
"step": 1140,
"token_acc": 0.9349271790531848
},
{
"epoch": 0.4556711243189698,
"grad_norm": 1.5703125,
"learning_rate": 1.1913128709902182e-05,
"loss": 0.1807018995285034,
"step": 1150,
"token_acc": 0.9369057628872647
},
{
"epoch": 0.4596334819217434,
"grad_norm": 1.4140625,
"learning_rate": 1.1787012022131863e-05,
"loss": 0.1842559814453125,
"step": 1160,
"token_acc": 0.9362108645620739
},
{
"epoch": 0.4635958395245171,
"grad_norm": 1.3203125,
"learning_rate": 1.1660601028825013e-05,
"loss": 0.19840482473373414,
"step": 1170,
"token_acc": 0.9314812356169233
},
{
"epoch": 0.46755819712729074,
"grad_norm": 1.046875,
"learning_rate": 1.1533916548786856e-05,
"loss": 0.1772662878036499,
"step": 1180,
"token_acc": 0.9394712189028833
},
{
"epoch": 0.4715205547300644,
"grad_norm": 1.2734375,
"learning_rate": 1.1406979445863515e-05,
"loss": 0.18831554651260377,
"step": 1190,
"token_acc": 0.935608596292791
},
{
"epoch": 0.475482912332838,
"grad_norm": 1.34375,
"learning_rate": 1.127981062550595e-05,
"loss": 0.18489151000976561,
"step": 1200,
"token_acc": 0.9360608419277421
},
{
"epoch": 0.4794452699356117,
"grad_norm": 1.3984375,
"learning_rate": 1.1152431031326978e-05,
"loss": 0.17761152982711792,
"step": 1210,
"token_acc": 0.9386175400572799
},
{
"epoch": 0.48340762753838534,
"grad_norm": 3.109375,
"learning_rate": 1.102486164165207e-05,
"loss": 0.18663549423217773,
"step": 1220,
"token_acc": 0.9355476517845982
},
{
"epoch": 0.487369985141159,
"grad_norm": 1.25,
"learning_rate": 1.0897123466064376e-05,
"loss": 0.18886669874191284,
"step": 1230,
"token_acc": 0.9356319723508901
},
{
"epoch": 0.4913323427439326,
"grad_norm": 1.34375,
"learning_rate": 1.0769237541944639e-05,
"loss": 0.18777060508728027,
"step": 1240,
"token_acc": 0.9354588236528564
},
{
"epoch": 0.4952947003467063,
"grad_norm": 1.5,
"learning_rate": 1.0641224931006518e-05,
"loss": 0.17902556657791138,
"step": 1250,
"token_acc": 0.9375767442118891
},
{
"epoch": 0.49925705794947994,
"grad_norm": 1.15625,
"learning_rate": 1.0513106715827897e-05,
"loss": 0.18400684595108033,
"step": 1260,
"token_acc": 0.9370039916704695
},
{
"epoch": 0.5032194155522536,
"grad_norm": 1.40625,
"learning_rate": 1.0384903996378784e-05,
"loss": 0.17728078365325928,
"step": 1270,
"token_acc": 0.9389623546976645
},
{
"epoch": 0.5071817731550272,
"grad_norm": 1.21875,
"learning_rate": 1.02566378865463e-05,
"loss": 0.18042536973953247,
"step": 1280,
"token_acc": 0.9374939011828994
},
{
"epoch": 0.5111441307578009,
"grad_norm": 1.4609375,
"learning_rate": 1.0128329510657426e-05,
"loss": 0.18618935346603394,
"step": 1290,
"token_acc": 0.9355284924654325
},
{
"epoch": 0.5151064883605745,
"grad_norm": 1.453125,
"learning_rate": 1e-05,
"loss": 0.19038233757019044,
"step": 1300,
"token_acc": 0.9348978046934141
},
{
"epoch": 0.5190688459633482,
"grad_norm": 1.921875,
"learning_rate": 9.871670489342577e-06,
"loss": 0.18166159391403197,
"step": 1310,
"token_acc": 0.9376118246059261
},
{
"epoch": 0.5230312035661219,
"grad_norm": 1.3828125,
"learning_rate": 9.743362113453705e-06,
"loss": 0.18087191581726075,
"step": 1320,
"token_acc": 0.9368352123903884
},
{
"epoch": 0.5269935611688955,
"grad_norm": 1.7890625,
"learning_rate": 9.615096003621221e-06,
"loss": 0.17757006883621215,
"step": 1330,
"token_acc": 0.9385874468359324
},
{
"epoch": 0.5309559187716691,
"grad_norm": 1.5,
"learning_rate": 9.486893284172103e-06,
"loss": 0.1725843906402588,
"step": 1340,
"token_acc": 0.9396233946138856
},
{
"epoch": 0.5349182763744428,
"grad_norm": 1.1953125,
"learning_rate": 9.358775068993484e-06,
"loss": 0.17776031494140626,
"step": 1350,
"token_acc": 0.9395069576186172
},
{
"epoch": 0.5388806339772164,
"grad_norm": 1.2890625,
"learning_rate": 9.230762458055363e-06,
"loss": 0.18048588037490845,
"step": 1360,
"token_acc": 0.9376439779197635
},
{
"epoch": 0.54284299157999,
"grad_norm": 1.34375,
"learning_rate": 9.102876533935626e-06,
"loss": 0.1871953248977661,
"step": 1370,
"token_acc": 0.9352319496539852
},
{
"epoch": 0.5468053491827637,
"grad_norm": 1.8203125,
"learning_rate": 8.975138358347931e-06,
"loss": 0.17401375770568847,
"step": 1380,
"token_acc": 0.9395100063574127
},
{
"epoch": 0.5507677067855374,
"grad_norm": 1.5625,
"learning_rate": 8.847568968673025e-06,
"loss": 0.1821776032447815,
"step": 1390,
"token_acc": 0.9382589568204417
},
{
"epoch": 0.5547300643883111,
"grad_norm": 1.3203125,
"learning_rate": 8.720189374494055e-06,
"loss": 0.18482091426849365,
"step": 1400,
"token_acc": 0.9366791672453971
},
{
"epoch": 0.5586924219910847,
"grad_norm": 1.140625,
"learning_rate": 8.593020554136491e-06,
"loss": 0.17976686954498292,
"step": 1410,
"token_acc": 0.938686745381246
},
{
"epoch": 0.5626547795938583,
"grad_norm": 1.7734375,
"learning_rate": 8.466083451213145e-06,
"loss": 0.16887048482894898,
"step": 1420,
"token_acc": 0.9413505379807353
},
{
"epoch": 0.566617137196632,
"grad_norm": 1.53125,
"learning_rate": 8.339398971174987e-06,
"loss": 0.181710684299469,
"step": 1430,
"token_acc": 0.9381945571057755
},
{
"epoch": 0.5705794947994056,
"grad_norm": 1.421875,
"learning_rate": 8.212987977868138e-06,
"loss": 0.192651104927063,
"step": 1440,
"token_acc": 0.9346234811416059
},
{
"epoch": 0.5745418524021793,
"grad_norm": 1.4375,
"learning_rate": 8.086871290097822e-06,
"loss": 0.1725835084915161,
"step": 1450,
"token_acc": 0.9401547502340085
},
{
"epoch": 0.578504210004953,
"grad_norm": 1.390625,
"learning_rate": 7.961069678199658e-06,
"loss": 0.18463332653045655,
"step": 1460,
"token_acc": 0.9356090428523226
},
{
"epoch": 0.5824665676077266,
"grad_norm": 1.390625,
"learning_rate": 7.835603860618973e-06,
"loss": 0.18219418525695802,
"step": 1470,
"token_acc": 0.9381983863723681
},
{
"epoch": 0.5864289252105003,
"grad_norm": 1.3125,
"learning_rate": 7.710494500498662e-06,
"loss": 0.17673687934875487,
"step": 1480,
"token_acc": 0.9390315988583202
},
{
"epoch": 0.5903912828132739,
"grad_norm": 1.6796875,
"learning_rate": 7.585762202276129e-06,
"loss": 0.1698865532875061,
"step": 1490,
"token_acc": 0.9415788913714225
},
{
"epoch": 0.5943536404160475,
"grad_norm": 1.53125,
"learning_rate": 7.461427508289922e-06,
"loss": 0.17974636554718018,
"step": 1500,
"token_acc": 0.9385133263736498
},
{
"epoch": 0.5983159980188212,
"grad_norm": 1.0859375,
"learning_rate": 7.337510895396591e-06,
"loss": 0.1787565231323242,
"step": 1510,
"token_acc": 0.9384560906515581
},
{
"epoch": 0.6022783556215948,
"grad_norm": 1.4609375,
"learning_rate": 7.214032771598316e-06,
"loss": 0.1744428515434265,
"step": 1520,
"token_acc": 0.9401470564435646
},
{
"epoch": 0.6062407132243685,
"grad_norm": 1.265625,
"learning_rate": 7.091013472681883e-06,
"loss": 0.17123017311096192,
"step": 1530,
"token_acc": 0.9405837916975914
},
{
"epoch": 0.6102030708271422,
"grad_norm": 1.5234375,
"learning_rate": 6.968473258869566e-06,
"loss": 0.1690650463104248,
"step": 1540,
"token_acc": 0.941058213231226
},
{
"epoch": 0.6141654284299158,
"grad_norm": 1.453125,
"learning_rate": 6.846432311482436e-06,
"loss": 0.18313372135162354,
"step": 1550,
"token_acc": 0.9371285854342504
},
{
"epoch": 0.6181277860326895,
"grad_norm": 1.7890625,
"learning_rate": 6.724910729616665e-06,
"loss": 0.17572647333145142,
"step": 1560,
"token_acc": 0.939426531245842
},
{
"epoch": 0.6220901436354631,
"grad_norm": 1.25,
"learning_rate": 6.603928526833386e-06,
"loss": 0.16190264225006104,
"step": 1570,
"token_acc": 0.9443632366772048
},
{
"epoch": 0.6260525012382367,
"grad_norm": 1.265625,
"learning_rate": 6.483505627862632e-06,
"loss": 0.1694807767868042,
"step": 1580,
"token_acc": 0.9416789717779672
},
{
"epoch": 0.6300148588410104,
"grad_norm": 1.5,
"learning_rate": 6.363661865321898e-06,
"loss": 0.17748751640319824,
"step": 1590,
"token_acc": 0.9385861686705892
},
{
"epoch": 0.633977216443784,
"grad_norm": 1.3984375,
"learning_rate": 6.244416976449875e-06,
"loss": 0.17347029447555543,
"step": 1600,
"token_acc": 0.9403739289918152
},
{
"epoch": 0.6379395740465577,
"grad_norm": 1.171875,
"learning_rate": 6.125790599855912e-06,
"loss": 0.1826688289642334,
"step": 1610,
"token_acc": 0.9372320591550186
},
{
"epoch": 0.6419019316493314,
"grad_norm": 1.2890625,
"learning_rate": 6.007802272285693e-06,
"loss": 0.17403693199157716,
"step": 1620,
"token_acc": 0.9401551062440614
},
{
"epoch": 0.645864289252105,
"grad_norm": 1.625,
"learning_rate": 5.890471425403703e-06,
"loss": 0.18286362886428834,
"step": 1630,
"token_acc": 0.9368950000596794
},
{
"epoch": 0.6498266468548787,
"grad_norm": 1.4375,
"learning_rate": 5.773817382593008e-06,
"loss": 0.1804821014404297,
"step": 1640,
"token_acc": 0.9376396973396319
},
{
"epoch": 0.6537890044576523,
"grad_norm": 1.375,
"learning_rate": 5.65785935577285e-06,
"loss": 0.17369402647018434,
"step": 1650,
"token_acc": 0.9392859770259903
},
{
"epoch": 0.657751362060426,
"grad_norm": 1.3515625,
"learning_rate": 5.542616442234618e-06,
"loss": 0.1656261920928955,
"step": 1660,
"token_acc": 0.943150599230765
},
{
"epoch": 0.6617137196631996,
"grad_norm": 1.2890625,
"learning_rate": 5.428107621496681e-06,
"loss": 0.17441051006317138,
"step": 1670,
"token_acc": 0.9392566132136696
},
{
"epoch": 0.6656760772659732,
"grad_norm": 1.3125,
"learning_rate": 5.3143517521786255e-06,
"loss": 0.17141460180282592,
"step": 1680,
"token_acc": 0.9404770520787022
},
{
"epoch": 0.6696384348687469,
"grad_norm": 1.4921875,
"learning_rate": 5.201367568895408e-06,
"loss": 0.1779789924621582,
"step": 1690,
"token_acc": 0.9389050144048604
},
{
"epoch": 0.6736007924715206,
"grad_norm": 1.2890625,
"learning_rate": 5.089173679171922e-06,
"loss": 0.1696174383163452,
"step": 1700,
"token_acc": 0.9415787866940171
},
{
"epoch": 0.6775631500742942,
"grad_norm": 1.265625,
"learning_rate": 4.977788560378484e-06,
"loss": 0.17647080421447753,
"step": 1710,
"token_acc": 0.9402322070530992
},
{
"epoch": 0.6815255076770679,
"grad_norm": 1.3203125,
"learning_rate": 4.867230556687797e-06,
"loss": 0.17825334072113036,
"step": 1720,
"token_acc": 0.9382623548644003
},
{
"epoch": 0.6854878652798415,
"grad_norm": 1.390625,
"learning_rate": 4.7575178760538e-06,
"loss": 0.1728861927986145,
"step": 1730,
"token_acc": 0.939594911427579
},
{
"epoch": 0.6894502228826151,
"grad_norm": 1.5234375,
"learning_rate": 4.648668587212998e-06,
"loss": 0.179952073097229,
"step": 1740,
"token_acc": 0.9381945052060547
},
{
"epoch": 0.6934125804853888,
"grad_norm": 1.28125,
"learning_rate": 4.5407006167086575e-06,
"loss": 0.17567566633224488,
"step": 1750,
"token_acc": 0.9399701307689505
},
{
"epoch": 0.6973749380881624,
"grad_norm": 1.328125,
"learning_rate": 4.433631745938497e-06,
"loss": 0.17287354469299315,
"step": 1760,
"token_acc": 0.9405146011104378
},
{
"epoch": 0.7013372956909361,
"grad_norm": 1.5859375,
"learning_rate": 4.327479608226226e-06,
"loss": 0.17426562309265137,
"step": 1770,
"token_acc": 0.9401683220236025
},
{
"epoch": 0.7052996532937098,
"grad_norm": 1.375,
"learning_rate": 4.222261685917489e-06,
"loss": 0.1734224557876587,
"step": 1780,
"token_acc": 0.9401309334234104
},
{
"epoch": 0.7092620108964834,
"grad_norm": 1.2578125,
"learning_rate": 4.117995307500677e-06,
"loss": 0.17531417608261107,
"step": 1790,
"token_acc": 0.9409358352138655
},
{
"epoch": 0.7132243684992571,
"grad_norm": 1.1171875,
"learning_rate": 4.014697644753069e-06,
"loss": 0.17481131553649903,
"step": 1800,
"token_acc": 0.9396957170350632
},
{
"epoch": 0.7171867261020307,
"grad_norm": 1.3046875,
"learning_rate": 3.912385709912794e-06,
"loss": 0.17085225582122804,
"step": 1810,
"token_acc": 0.9413413462722593
},
{
"epoch": 0.7211490837048044,
"grad_norm": 1.765625,
"learning_rate": 3.8110763528770543e-06,
"loss": 0.18243337869644166,
"step": 1820,
"token_acc": 0.9380230355884426
},
{
"epoch": 0.725111441307578,
"grad_norm": 1.2109375,
"learning_rate": 3.7107862584271016e-06,
"loss": 0.16808085441589354,
"step": 1830,
"token_acc": 0.9410576758514462
},
{
"epoch": 0.7290737989103516,
"grad_norm": 1.28125,
"learning_rate": 3.6115319434803897e-06,
"loss": 0.16966335773468016,
"step": 1840,
"token_acc": 0.9421915175440875
},
{
"epoch": 0.7330361565131253,
"grad_norm": 1.484375,
"learning_rate": 3.5133297543703724e-06,
"loss": 0.18466969728469848,
"step": 1850,
"token_acc": 0.9361129341986922
},
{
"epoch": 0.736998514115899,
"grad_norm": 1.34375,
"learning_rate": 3.416195864154426e-06,
"loss": 0.17389074563980103,
"step": 1860,
"token_acc": 0.9401176608095999
},
{
"epoch": 0.7409608717186726,
"grad_norm": 1.8046875,
"learning_rate": 3.3201462699502606e-06,
"loss": 0.18031821250915528,
"step": 1870,
"token_acc": 0.9371710958652052
},
{
"epoch": 0.7449232293214463,
"grad_norm": 1.3828125,
"learning_rate": 3.2251967903013515e-06,
"loss": 0.16321947574615478,
"step": 1880,
"token_acc": 0.9434022207870669
},
{
"epoch": 0.7488855869242199,
"grad_norm": 1.40625,
"learning_rate": 3.1313630625717462e-06,
"loss": 0.165952730178833,
"step": 1890,
"token_acc": 0.9433088620870477
},
{
"epoch": 0.7528479445269936,
"grad_norm": 1.6953125,
"learning_rate": 3.0386605403707347e-06,
"loss": 0.17759935855865477,
"step": 1900,
"token_acc": 0.9391445944776249
},
{
"epoch": 0.7568103021297672,
"grad_norm": 1.5625,
"learning_rate": 2.947104491007766e-06,
"loss": 0.17080872058868407,
"step": 1910,
"token_acc": 0.9417546272928465
},
{
"epoch": 0.7607726597325408,
"grad_norm": 1.296875,
"learning_rate": 2.8567099929780596e-06,
"loss": 0.17588542699813842,
"step": 1920,
"token_acc": 0.9384443609064742
},
{
"epoch": 0.7647350173353145,
"grad_norm": 1.671875,
"learning_rate": 2.767491933479304e-06,
"loss": 0.17596354484558105,
"step": 1930,
"token_acc": 0.9390227163544026
},
{
"epoch": 0.7686973749380882,
"grad_norm": 1.4921875,
"learning_rate": 2.679465005959856e-06,
"loss": 0.1740294098854065,
"step": 1940,
"token_acc": 0.9401780685412244
},
{
"epoch": 0.7726597325408618,
"grad_norm": 1.359375,
"learning_rate": 2.5926437076988685e-06,
"loss": 0.16495332717895508,
"step": 1950,
"token_acc": 0.9424596695186467
},
{
"epoch": 0.7766220901436355,
"grad_norm": 1.359375,
"learning_rate": 2.507042337418707e-06,
"loss": 0.17244219779968262,
"step": 1960,
"token_acc": 0.940576109936575
},
{
"epoch": 0.7805844477464091,
"grad_norm": 1.7109375,
"learning_rate": 2.4226749929300774e-06,
"loss": 0.17762508392333984,
"step": 1970,
"token_acc": 0.9379922601444852
},
{
"epoch": 0.7845468053491828,
"grad_norm": 1.5078125,
"learning_rate": 2.339555568810221e-06,
"loss": 0.16768510341644288,
"step": 1980,
"token_acc": 0.9424666806336723
},
{
"epoch": 0.7885091629519564,
"grad_norm": 1.2734375,
"learning_rate": 2.2576977541146193e-06,
"loss": 0.1687544584274292,
"step": 1990,
"token_acc": 0.941843418155467
},
{
"epoch": 0.79247152055473,
"grad_norm": 1.3984375,
"learning_rate": 2.1771150301225097e-06,
"loss": 0.17961428165435792,
"step": 2000,
"token_acc": 0.9382880764646055
}
],
"logging_steps": 10,
"max_steps": 2524,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.3391808311045652e+19,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}