LedgerBERT-Market-Sentiment / trainer_state.json
walterhernandez's picture
Upload folder using huggingface_hub
4f96a1e verified
{
"best_global_step": 7998,
"best_metric": 1.0033386945724487,
"best_model_checkpoint": "./../../../models/LedgerBERT-SciBERT-base-v3-News-Class/2025-10-15_00-24-07/market_direction/checkpoint-7998",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 7998,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_accuracy": 0.37046413502109704,
"eval_f1_macro": 0.31648220525898246,
"eval_f1_weighted": 0.3428571794493407,
"eval_loss": 1.0956553220748901,
"eval_precision_macro": 0.3404493817232522,
"eval_precision_weighted": 0.3541600044961222,
"eval_recall_macro": 0.3400488233349732,
"eval_recall_weighted": 0.37046413502109704,
"eval_runtime": 5.1696,
"eval_samples_per_second": 458.454,
"eval_steps_per_second": 57.452,
"step": 0
},
{
"epoch": 0.00037509377344336085,
"grad_norm": 6.950562000274658,
"learning_rate": 0.0,
"loss": 1.0664,
"step": 1
},
{
"epoch": 0.0037509377344336083,
"grad_norm": 7.2157673835754395,
"learning_rate": 3.6e-07,
"loss": 1.1053,
"step": 10
},
{
"epoch": 0.007501875468867217,
"grad_norm": 7.101637840270996,
"learning_rate": 7.6e-07,
"loss": 1.081,
"step": 20
},
{
"epoch": 0.011252813203300824,
"grad_norm": 7.503627777099609,
"learning_rate": 1.1600000000000001e-06,
"loss": 1.1167,
"step": 30
},
{
"epoch": 0.015003750937734433,
"grad_norm": 6.733654975891113,
"learning_rate": 1.56e-06,
"loss": 1.1038,
"step": 40
},
{
"epoch": 0.018754688672168042,
"grad_norm": 6.987666130065918,
"learning_rate": 1.9600000000000003e-06,
"loss": 1.1025,
"step": 50
},
{
"epoch": 0.02250562640660165,
"grad_norm": 9.358382225036621,
"learning_rate": 2.3600000000000003e-06,
"loss": 1.1166,
"step": 60
},
{
"epoch": 0.02625656414103526,
"grad_norm": 6.5409040451049805,
"learning_rate": 2.7600000000000003e-06,
"loss": 1.0841,
"step": 70
},
{
"epoch": 0.030007501875468866,
"grad_norm": 7.030813217163086,
"learning_rate": 3.1600000000000002e-06,
"loss": 1.0583,
"step": 80
},
{
"epoch": 0.03375843960990248,
"grad_norm": 6.986401081085205,
"learning_rate": 3.5600000000000002e-06,
"loss": 1.1288,
"step": 90
},
{
"epoch": 0.037509377344336084,
"grad_norm": 5.53237247467041,
"learning_rate": 3.96e-06,
"loss": 1.0573,
"step": 100
},
{
"epoch": 0.04126031507876969,
"grad_norm": 14.836161613464355,
"learning_rate": 4.360000000000001e-06,
"loss": 1.0754,
"step": 110
},
{
"epoch": 0.0450112528132033,
"grad_norm": 8.877525329589844,
"learning_rate": 4.76e-06,
"loss": 1.0753,
"step": 120
},
{
"epoch": 0.04876219054763691,
"grad_norm": 10.311164855957031,
"learning_rate": 5.1600000000000006e-06,
"loss": 1.1246,
"step": 130
},
{
"epoch": 0.05251312828207052,
"grad_norm": 5.360109329223633,
"learning_rate": 5.560000000000001e-06,
"loss": 1.0218,
"step": 140
},
{
"epoch": 0.056264066016504126,
"grad_norm": 10.249133110046387,
"learning_rate": 5.9600000000000005e-06,
"loss": 1.0723,
"step": 150
},
{
"epoch": 0.06001500375093773,
"grad_norm": 7.881443500518799,
"learning_rate": 6.360000000000001e-06,
"loss": 1.0727,
"step": 160
},
{
"epoch": 0.06376594148537135,
"grad_norm": 5.892578601837158,
"learning_rate": 6.760000000000001e-06,
"loss": 1.0498,
"step": 170
},
{
"epoch": 0.06751687921980495,
"grad_norm": 6.164844512939453,
"learning_rate": 7.16e-06,
"loss": 1.1182,
"step": 180
},
{
"epoch": 0.07126781695423856,
"grad_norm": 6.351868629455566,
"learning_rate": 7.5600000000000005e-06,
"loss": 1.0735,
"step": 190
},
{
"epoch": 0.07501875468867217,
"grad_norm": 6.895458698272705,
"learning_rate": 7.960000000000002e-06,
"loss": 1.0503,
"step": 200
},
{
"epoch": 0.07876969242310577,
"grad_norm": 8.486842155456543,
"learning_rate": 8.36e-06,
"loss": 1.0965,
"step": 210
},
{
"epoch": 0.08252063015753938,
"grad_norm": 8.301511764526367,
"learning_rate": 8.76e-06,
"loss": 1.1157,
"step": 220
},
{
"epoch": 0.08627156789197299,
"grad_norm": 11.515487670898438,
"learning_rate": 9.16e-06,
"loss": 1.0854,
"step": 230
},
{
"epoch": 0.0900225056264066,
"grad_norm": 6.189631938934326,
"learning_rate": 9.56e-06,
"loss": 1.054,
"step": 240
},
{
"epoch": 0.09377344336084022,
"grad_norm": 4.8885393142700195,
"learning_rate": 9.960000000000001e-06,
"loss": 1.0693,
"step": 250
},
{
"epoch": 0.09752438109527382,
"grad_norm": 6.190073490142822,
"learning_rate": 1.036e-05,
"loss": 1.0786,
"step": 260
},
{
"epoch": 0.10127531882970743,
"grad_norm": 8.178174018859863,
"learning_rate": 1.0760000000000002e-05,
"loss": 1.0374,
"step": 270
},
{
"epoch": 0.10502625656414104,
"grad_norm": 5.824592113494873,
"learning_rate": 1.1160000000000002e-05,
"loss": 1.0829,
"step": 280
},
{
"epoch": 0.10877719429857464,
"grad_norm": 7.339807033538818,
"learning_rate": 1.156e-05,
"loss": 1.1085,
"step": 290
},
{
"epoch": 0.11252813203300825,
"grad_norm": 6.39154577255249,
"learning_rate": 1.196e-05,
"loss": 1.0505,
"step": 300
},
{
"epoch": 0.11627906976744186,
"grad_norm": 7.54710054397583,
"learning_rate": 1.236e-05,
"loss": 1.049,
"step": 310
},
{
"epoch": 0.12003000750187547,
"grad_norm": 10.610452651977539,
"learning_rate": 1.2760000000000001e-05,
"loss": 1.1105,
"step": 320
},
{
"epoch": 0.12378094523630907,
"grad_norm": 6.961548328399658,
"learning_rate": 1.3160000000000001e-05,
"loss": 1.0392,
"step": 330
},
{
"epoch": 0.1275318829707427,
"grad_norm": 8.800139427185059,
"learning_rate": 1.3560000000000002e-05,
"loss": 1.1473,
"step": 340
},
{
"epoch": 0.1312828207051763,
"grad_norm": 7.540011405944824,
"learning_rate": 1.396e-05,
"loss": 1.0891,
"step": 350
},
{
"epoch": 0.1350337584396099,
"grad_norm": 11.337075233459473,
"learning_rate": 1.4360000000000001e-05,
"loss": 1.0715,
"step": 360
},
{
"epoch": 0.13878469617404351,
"grad_norm": 5.6576457023620605,
"learning_rate": 1.4760000000000001e-05,
"loss": 1.0702,
"step": 370
},
{
"epoch": 0.14253563390847712,
"grad_norm": 8.98009967803955,
"learning_rate": 1.516e-05,
"loss": 1.0752,
"step": 380
},
{
"epoch": 0.14628657164291073,
"grad_norm": 4.932474613189697,
"learning_rate": 1.556e-05,
"loss": 1.0641,
"step": 390
},
{
"epoch": 0.15003750937734434,
"grad_norm": 6.130215644836426,
"learning_rate": 1.5960000000000003e-05,
"loss": 1.0133,
"step": 400
},
{
"epoch": 0.15378844711177794,
"grad_norm": 16.0273380279541,
"learning_rate": 1.636e-05,
"loss": 1.0442,
"step": 410
},
{
"epoch": 0.15753938484621155,
"grad_norm": 12.93301010131836,
"learning_rate": 1.6760000000000002e-05,
"loss": 1.1161,
"step": 420
},
{
"epoch": 0.16129032258064516,
"grad_norm": 9.27346420288086,
"learning_rate": 1.7160000000000002e-05,
"loss": 1.0539,
"step": 430
},
{
"epoch": 0.16504126031507876,
"grad_norm": 5.5671186447143555,
"learning_rate": 1.756e-05,
"loss": 0.9452,
"step": 440
},
{
"epoch": 0.16879219804951237,
"grad_norm": 7.939000606536865,
"learning_rate": 1.796e-05,
"loss": 1.0522,
"step": 450
},
{
"epoch": 0.17254313578394598,
"grad_norm": 9.265899658203125,
"learning_rate": 1.8360000000000004e-05,
"loss": 1.0866,
"step": 460
},
{
"epoch": 0.17629407351837958,
"grad_norm": 6.934913158416748,
"learning_rate": 1.876e-05,
"loss": 0.9723,
"step": 470
},
{
"epoch": 0.1800450112528132,
"grad_norm": 6.007977485656738,
"learning_rate": 1.916e-05,
"loss": 0.9742,
"step": 480
},
{
"epoch": 0.1837959489872468,
"grad_norm": 7.842029094696045,
"learning_rate": 1.9560000000000002e-05,
"loss": 0.9334,
"step": 490
},
{
"epoch": 0.18754688672168043,
"grad_norm": 10.53432559967041,
"learning_rate": 1.9960000000000002e-05,
"loss": 0.8706,
"step": 500
},
{
"epoch": 0.19129782445611404,
"grad_norm": 9.365771293640137,
"learning_rate": 1.997599359829288e-05,
"loss": 1.0399,
"step": 510
},
{
"epoch": 0.19504876219054765,
"grad_norm": 9.351228713989258,
"learning_rate": 1.99493198186183e-05,
"loss": 0.9525,
"step": 520
},
{
"epoch": 0.19879969992498125,
"grad_norm": 12.21917724609375,
"learning_rate": 1.992264603894372e-05,
"loss": 0.9793,
"step": 530
},
{
"epoch": 0.20255063765941486,
"grad_norm": 17.076719284057617,
"learning_rate": 1.9895972259269142e-05,
"loss": 1.0403,
"step": 540
},
{
"epoch": 0.20630157539384847,
"grad_norm": 6.928652286529541,
"learning_rate": 1.9869298479594562e-05,
"loss": 0.9047,
"step": 550
},
{
"epoch": 0.21005251312828208,
"grad_norm": 6.858879089355469,
"learning_rate": 1.984262469991998e-05,
"loss": 1.012,
"step": 560
},
{
"epoch": 0.21380345086271568,
"grad_norm": 5.987520217895508,
"learning_rate": 1.98159509202454e-05,
"loss": 0.9345,
"step": 570
},
{
"epoch": 0.2175543885971493,
"grad_norm": 12.161517143249512,
"learning_rate": 1.978927714057082e-05,
"loss": 0.9955,
"step": 580
},
{
"epoch": 0.2213053263315829,
"grad_norm": 9.229764938354492,
"learning_rate": 1.976260336089624e-05,
"loss": 0.998,
"step": 590
},
{
"epoch": 0.2250562640660165,
"grad_norm": 9.257465362548828,
"learning_rate": 1.973592958122166e-05,
"loss": 0.9882,
"step": 600
},
{
"epoch": 0.2288072018004501,
"grad_norm": 11.260259628295898,
"learning_rate": 1.970925580154708e-05,
"loss": 0.9727,
"step": 610
},
{
"epoch": 0.23255813953488372,
"grad_norm": 5.7551984786987305,
"learning_rate": 1.96825820218725e-05,
"loss": 0.9139,
"step": 620
},
{
"epoch": 0.23630907726931732,
"grad_norm": 7.264505863189697,
"learning_rate": 1.9655908242197922e-05,
"loss": 0.8718,
"step": 630
},
{
"epoch": 0.24006001500375093,
"grad_norm": 13.518917083740234,
"learning_rate": 1.9629234462523342e-05,
"loss": 1.0478,
"step": 640
},
{
"epoch": 0.24381095273818454,
"grad_norm": 7.133944034576416,
"learning_rate": 1.960256068284876e-05,
"loss": 0.951,
"step": 650
},
{
"epoch": 0.24756189047261815,
"grad_norm": 10.491629600524902,
"learning_rate": 1.957588690317418e-05,
"loss": 0.9271,
"step": 660
},
{
"epoch": 0.25131282820705175,
"grad_norm": 6.807431697845459,
"learning_rate": 1.95492131234996e-05,
"loss": 1.0804,
"step": 670
},
{
"epoch": 0.2550637659414854,
"grad_norm": 9.180730819702148,
"learning_rate": 1.9522539343825024e-05,
"loss": 0.9079,
"step": 680
},
{
"epoch": 0.25881470367591897,
"grad_norm": 6.459209442138672,
"learning_rate": 1.9495865564150443e-05,
"loss": 0.9989,
"step": 690
},
{
"epoch": 0.2625656414103526,
"grad_norm": 5.8546929359436035,
"learning_rate": 1.9469191784475863e-05,
"loss": 0.951,
"step": 700
},
{
"epoch": 0.2663165791447862,
"grad_norm": 10.301909446716309,
"learning_rate": 1.9442518004801282e-05,
"loss": 0.8549,
"step": 710
},
{
"epoch": 0.2700675168792198,
"grad_norm": 17.759777069091797,
"learning_rate": 1.9415844225126702e-05,
"loss": 1.1818,
"step": 720
},
{
"epoch": 0.2738184546136534,
"grad_norm": 7.105804920196533,
"learning_rate": 1.938917044545212e-05,
"loss": 1.034,
"step": 730
},
{
"epoch": 0.27756939234808703,
"grad_norm": 8.125602722167969,
"learning_rate": 1.936249666577754e-05,
"loss": 0.9509,
"step": 740
},
{
"epoch": 0.2813203300825206,
"grad_norm": 6.968907833099365,
"learning_rate": 1.933582288610296e-05,
"loss": 0.9292,
"step": 750
},
{
"epoch": 0.28507126781695424,
"grad_norm": 9.841052055358887,
"learning_rate": 1.930914910642838e-05,
"loss": 1.0401,
"step": 760
},
{
"epoch": 0.2888222055513878,
"grad_norm": 6.7177910804748535,
"learning_rate": 1.9282475326753804e-05,
"loss": 1.0079,
"step": 770
},
{
"epoch": 0.29257314328582146,
"grad_norm": 8.652711868286133,
"learning_rate": 1.9255801547079223e-05,
"loss": 0.8986,
"step": 780
},
{
"epoch": 0.29632408102025504,
"grad_norm": 7.266161918640137,
"learning_rate": 1.9229127767404643e-05,
"loss": 0.9805,
"step": 790
},
{
"epoch": 0.30007501875468867,
"grad_norm": 7.372107982635498,
"learning_rate": 1.9202453987730062e-05,
"loss": 1.0254,
"step": 800
},
{
"epoch": 0.3038259564891223,
"grad_norm": 6.467881202697754,
"learning_rate": 1.9175780208055482e-05,
"loss": 0.9931,
"step": 810
},
{
"epoch": 0.3075768942235559,
"grad_norm": 8.692418098449707,
"learning_rate": 1.9149106428380905e-05,
"loss": 0.8585,
"step": 820
},
{
"epoch": 0.3113278319579895,
"grad_norm": 7.981175422668457,
"learning_rate": 1.9122432648706325e-05,
"loss": 0.9164,
"step": 830
},
{
"epoch": 0.3150787696924231,
"grad_norm": 11.882697105407715,
"learning_rate": 1.9095758869031744e-05,
"loss": 1.0325,
"step": 840
},
{
"epoch": 0.31882970742685673,
"grad_norm": 10.736306190490723,
"learning_rate": 1.9069085089357164e-05,
"loss": 0.9888,
"step": 850
},
{
"epoch": 0.3225806451612903,
"grad_norm": 5.334744453430176,
"learning_rate": 1.9042411309682583e-05,
"loss": 0.9364,
"step": 860
},
{
"epoch": 0.32633158289572395,
"grad_norm": 6.579550743103027,
"learning_rate": 1.9015737530008003e-05,
"loss": 0.9395,
"step": 870
},
{
"epoch": 0.3300825206301575,
"grad_norm": 7.336994171142578,
"learning_rate": 1.8989063750333423e-05,
"loss": 0.9363,
"step": 880
},
{
"epoch": 0.33383345836459116,
"grad_norm": 9.523600578308105,
"learning_rate": 1.8962389970658842e-05,
"loss": 0.9405,
"step": 890
},
{
"epoch": 0.33758439609902474,
"grad_norm": 9.350625038146973,
"learning_rate": 1.8935716190984262e-05,
"loss": 1.0351,
"step": 900
},
{
"epoch": 0.3413353338334584,
"grad_norm": 9.00391674041748,
"learning_rate": 1.8909042411309685e-05,
"loss": 0.9721,
"step": 910
},
{
"epoch": 0.34508627156789196,
"grad_norm": 5.69331693649292,
"learning_rate": 1.8882368631635105e-05,
"loss": 0.8811,
"step": 920
},
{
"epoch": 0.3488372093023256,
"grad_norm": 6.127689361572266,
"learning_rate": 1.8855694851960524e-05,
"loss": 1.0079,
"step": 930
},
{
"epoch": 0.35258814703675917,
"grad_norm": 11.653777122497559,
"learning_rate": 1.8829021072285944e-05,
"loss": 0.9518,
"step": 940
},
{
"epoch": 0.3563390847711928,
"grad_norm": 7.30828332901001,
"learning_rate": 1.8802347292611363e-05,
"loss": 0.8464,
"step": 950
},
{
"epoch": 0.3600900225056264,
"grad_norm": 9.21927547454834,
"learning_rate": 1.8775673512936786e-05,
"loss": 1.0584,
"step": 960
},
{
"epoch": 0.36384096024006,
"grad_norm": 6.939789772033691,
"learning_rate": 1.8748999733262206e-05,
"loss": 0.9,
"step": 970
},
{
"epoch": 0.3675918979744936,
"grad_norm": 12.434165954589844,
"learning_rate": 1.8722325953587626e-05,
"loss": 1.015,
"step": 980
},
{
"epoch": 0.37134283570892723,
"grad_norm": 11.779828071594238,
"learning_rate": 1.8695652173913045e-05,
"loss": 0.9725,
"step": 990
},
{
"epoch": 0.37509377344336087,
"grad_norm": 12.166790962219238,
"learning_rate": 1.8668978394238465e-05,
"loss": 1.0591,
"step": 1000
},
{
"epoch": 0.37884471117779445,
"grad_norm": 8.87903881072998,
"learning_rate": 1.8642304614563884e-05,
"loss": 0.9767,
"step": 1010
},
{
"epoch": 0.3825956489122281,
"grad_norm": 5.176930904388428,
"learning_rate": 1.8615630834889304e-05,
"loss": 0.8934,
"step": 1020
},
{
"epoch": 0.38634658664666166,
"grad_norm": 7.772132396697998,
"learning_rate": 1.8588957055214724e-05,
"loss": 0.9488,
"step": 1030
},
{
"epoch": 0.3900975243810953,
"grad_norm": 10.097055435180664,
"learning_rate": 1.8562283275540143e-05,
"loss": 0.9725,
"step": 1040
},
{
"epoch": 0.3938484621155289,
"grad_norm": 10.014994621276855,
"learning_rate": 1.8535609495865566e-05,
"loss": 0.9432,
"step": 1050
},
{
"epoch": 0.3975993998499625,
"grad_norm": 10.885961532592773,
"learning_rate": 1.8508935716190986e-05,
"loss": 1.0393,
"step": 1060
},
{
"epoch": 0.4013503375843961,
"grad_norm": 7.621641635894775,
"learning_rate": 1.8482261936516406e-05,
"loss": 0.9801,
"step": 1070
},
{
"epoch": 0.4051012753188297,
"grad_norm": 6.268519878387451,
"learning_rate": 1.8455588156841825e-05,
"loss": 0.9922,
"step": 1080
},
{
"epoch": 0.4088522130532633,
"grad_norm": 6.714245796203613,
"learning_rate": 1.8428914377167245e-05,
"loss": 1.0355,
"step": 1090
},
{
"epoch": 0.41260315078769694,
"grad_norm": 11.643074035644531,
"learning_rate": 1.8402240597492668e-05,
"loss": 1.0575,
"step": 1100
},
{
"epoch": 0.4163540885221305,
"grad_norm": 6.439828395843506,
"learning_rate": 1.8375566817818087e-05,
"loss": 0.9101,
"step": 1110
},
{
"epoch": 0.42010502625656415,
"grad_norm": 6.833279609680176,
"learning_rate": 1.8348893038143507e-05,
"loss": 0.935,
"step": 1120
},
{
"epoch": 0.42385596399099773,
"grad_norm": 7.262381553649902,
"learning_rate": 1.8322219258468927e-05,
"loss": 0.977,
"step": 1130
},
{
"epoch": 0.42760690172543137,
"grad_norm": 5.480360984802246,
"learning_rate": 1.8295545478794346e-05,
"loss": 0.8673,
"step": 1140
},
{
"epoch": 0.43135783945986494,
"grad_norm": 8.4745454788208,
"learning_rate": 1.8268871699119766e-05,
"loss": 0.88,
"step": 1150
},
{
"epoch": 0.4351087771942986,
"grad_norm": 16.769878387451172,
"learning_rate": 1.8242197919445185e-05,
"loss": 0.9576,
"step": 1160
},
{
"epoch": 0.43885971492873216,
"grad_norm": 7.4179582595825195,
"learning_rate": 1.8215524139770605e-05,
"loss": 0.9263,
"step": 1170
},
{
"epoch": 0.4426106526631658,
"grad_norm": 11.899470329284668,
"learning_rate": 1.8188850360096028e-05,
"loss": 0.9328,
"step": 1180
},
{
"epoch": 0.4463615903975994,
"grad_norm": 8.113855361938477,
"learning_rate": 1.8162176580421448e-05,
"loss": 0.9684,
"step": 1190
},
{
"epoch": 0.450112528132033,
"grad_norm": 7.619154453277588,
"learning_rate": 1.8135502800746867e-05,
"loss": 0.918,
"step": 1200
},
{
"epoch": 0.45386346586646664,
"grad_norm": 7.7961602210998535,
"learning_rate": 1.8108829021072287e-05,
"loss": 0.8574,
"step": 1210
},
{
"epoch": 0.4576144036009002,
"grad_norm": 8.734787940979004,
"learning_rate": 1.8082155241397707e-05,
"loss": 0.9009,
"step": 1220
},
{
"epoch": 0.46136534133533386,
"grad_norm": 5.773232936859131,
"learning_rate": 1.8055481461723126e-05,
"loss": 1.0554,
"step": 1230
},
{
"epoch": 0.46511627906976744,
"grad_norm": 7.872585773468018,
"learning_rate": 1.802880768204855e-05,
"loss": 0.8688,
"step": 1240
},
{
"epoch": 0.46886721680420107,
"grad_norm": 7.2498602867126465,
"learning_rate": 1.800213390237397e-05,
"loss": 0.9726,
"step": 1250
},
{
"epoch": 0.47261815453863465,
"grad_norm": 11.007004737854004,
"learning_rate": 1.797546012269939e-05,
"loss": 0.9338,
"step": 1260
},
{
"epoch": 0.4763690922730683,
"grad_norm": 10.418313980102539,
"learning_rate": 1.7948786343024808e-05,
"loss": 0.9217,
"step": 1270
},
{
"epoch": 0.48012003000750186,
"grad_norm": 11.935880661010742,
"learning_rate": 1.7922112563350228e-05,
"loss": 0.8656,
"step": 1280
},
{
"epoch": 0.4838709677419355,
"grad_norm": 10.331807136535645,
"learning_rate": 1.789543878367565e-05,
"loss": 0.9948,
"step": 1290
},
{
"epoch": 0.4876219054763691,
"grad_norm": 7.979977607727051,
"learning_rate": 1.7868765004001067e-05,
"loss": 0.9068,
"step": 1300
},
{
"epoch": 0.4913728432108027,
"grad_norm": 7.865904808044434,
"learning_rate": 1.7842091224326486e-05,
"loss": 0.8362,
"step": 1310
},
{
"epoch": 0.4951237809452363,
"grad_norm": 11.6406888961792,
"learning_rate": 1.781541744465191e-05,
"loss": 1.0061,
"step": 1320
},
{
"epoch": 0.4988747186796699,
"grad_norm": 9.274069786071777,
"learning_rate": 1.778874366497733e-05,
"loss": 0.9448,
"step": 1330
},
{
"epoch": 0.5026256564141035,
"grad_norm": 9.999556541442871,
"learning_rate": 1.776206988530275e-05,
"loss": 0.9188,
"step": 1340
},
{
"epoch": 0.5063765941485371,
"grad_norm": 10.032958984375,
"learning_rate": 1.773539610562817e-05,
"loss": 0.9794,
"step": 1350
},
{
"epoch": 0.5101275318829708,
"grad_norm": 5.453114032745361,
"learning_rate": 1.7708722325953588e-05,
"loss": 1.0102,
"step": 1360
},
{
"epoch": 0.5138784696174044,
"grad_norm": 13.257373809814453,
"learning_rate": 1.7682048546279008e-05,
"loss": 0.9801,
"step": 1370
},
{
"epoch": 0.5176294073518379,
"grad_norm": 5.355706691741943,
"learning_rate": 1.765537476660443e-05,
"loss": 0.9126,
"step": 1380
},
{
"epoch": 0.5213803450862715,
"grad_norm": 9.768399238586426,
"learning_rate": 1.762870098692985e-05,
"loss": 0.9423,
"step": 1390
},
{
"epoch": 0.5251312828207052,
"grad_norm": 8.362143516540527,
"learning_rate": 1.760202720725527e-05,
"loss": 1.0289,
"step": 1400
},
{
"epoch": 0.5288822205551388,
"grad_norm": 10.58354377746582,
"learning_rate": 1.757535342758069e-05,
"loss": 0.9593,
"step": 1410
},
{
"epoch": 0.5326331582895724,
"grad_norm": 8.964977264404297,
"learning_rate": 1.754867964790611e-05,
"loss": 1.1002,
"step": 1420
},
{
"epoch": 0.536384096024006,
"grad_norm": 11.886764526367188,
"learning_rate": 1.7522005868231532e-05,
"loss": 0.842,
"step": 1430
},
{
"epoch": 0.5401350337584396,
"grad_norm": 9.155001640319824,
"learning_rate": 1.7495332088556948e-05,
"loss": 1.0402,
"step": 1440
},
{
"epoch": 0.5438859714928732,
"grad_norm": 7.865649223327637,
"learning_rate": 1.7468658308882368e-05,
"loss": 0.9502,
"step": 1450
},
{
"epoch": 0.5476369092273068,
"grad_norm": 8.232137680053711,
"learning_rate": 1.744198452920779e-05,
"loss": 0.9042,
"step": 1460
},
{
"epoch": 0.5513878469617405,
"grad_norm": 7.428460597991943,
"learning_rate": 1.741531074953321e-05,
"loss": 0.8664,
"step": 1470
},
{
"epoch": 0.5551387846961741,
"grad_norm": 6.769949913024902,
"learning_rate": 1.738863696985863e-05,
"loss": 0.9676,
"step": 1480
},
{
"epoch": 0.5588897224306076,
"grad_norm": 7.262323379516602,
"learning_rate": 1.736196319018405e-05,
"loss": 0.9461,
"step": 1490
},
{
"epoch": 0.5626406601650412,
"grad_norm": 7.46332311630249,
"learning_rate": 1.733528941050947e-05,
"loss": 0.9928,
"step": 1500
},
{
"epoch": 0.5663915978994749,
"grad_norm": 13.346348762512207,
"learning_rate": 1.7308615630834892e-05,
"loss": 0.9645,
"step": 1510
},
{
"epoch": 0.5701425356339085,
"grad_norm": 7.057946681976318,
"learning_rate": 1.7281941851160312e-05,
"loss": 0.872,
"step": 1520
},
{
"epoch": 0.5738934733683421,
"grad_norm": 11.920793533325195,
"learning_rate": 1.725526807148573e-05,
"loss": 0.9084,
"step": 1530
},
{
"epoch": 0.5776444111027756,
"grad_norm": 4.696298122406006,
"learning_rate": 1.722859429181115e-05,
"loss": 0.9184,
"step": 1540
},
{
"epoch": 0.5813953488372093,
"grad_norm": 9.623963356018066,
"learning_rate": 1.720192051213657e-05,
"loss": 0.8924,
"step": 1550
},
{
"epoch": 0.5851462865716429,
"grad_norm": 10.262091636657715,
"learning_rate": 1.717524673246199e-05,
"loss": 0.9476,
"step": 1560
},
{
"epoch": 0.5888972243060765,
"grad_norm": 10.587578773498535,
"learning_rate": 1.7148572952787413e-05,
"loss": 0.9443,
"step": 1570
},
{
"epoch": 0.5926481620405101,
"grad_norm": 8.189558029174805,
"learning_rate": 1.7121899173112833e-05,
"loss": 0.9245,
"step": 1580
},
{
"epoch": 0.5963990997749438,
"grad_norm": 7.582670211791992,
"learning_rate": 1.709522539343825e-05,
"loss": 0.8533,
"step": 1590
},
{
"epoch": 0.6001500375093773,
"grad_norm": 8.973713874816895,
"learning_rate": 1.7068551613763672e-05,
"loss": 0.9197,
"step": 1600
},
{
"epoch": 0.6039009752438109,
"grad_norm": 7.140238285064697,
"learning_rate": 1.7041877834089092e-05,
"loss": 0.8815,
"step": 1610
},
{
"epoch": 0.6076519129782446,
"grad_norm": 7.83927059173584,
"learning_rate": 1.701520405441451e-05,
"loss": 0.9782,
"step": 1620
},
{
"epoch": 0.6114028507126782,
"grad_norm": 6.876523494720459,
"learning_rate": 1.698853027473993e-05,
"loss": 0.9575,
"step": 1630
},
{
"epoch": 0.6151537884471118,
"grad_norm": 10.362568855285645,
"learning_rate": 1.696185649506535e-05,
"loss": 0.8977,
"step": 1640
},
{
"epoch": 0.6189047261815454,
"grad_norm": 9.509383201599121,
"learning_rate": 1.6935182715390774e-05,
"loss": 0.996,
"step": 1650
},
{
"epoch": 0.622655663915979,
"grad_norm": 5.023642539978027,
"learning_rate": 1.6908508935716193e-05,
"loss": 0.9131,
"step": 1660
},
{
"epoch": 0.6264066016504126,
"grad_norm": 6.320276260375977,
"learning_rate": 1.6881835156041613e-05,
"loss": 0.9765,
"step": 1670
},
{
"epoch": 0.6301575393848462,
"grad_norm": 10.261762619018555,
"learning_rate": 1.6855161376367033e-05,
"loss": 0.9057,
"step": 1680
},
{
"epoch": 0.6339084771192798,
"grad_norm": 8.115468978881836,
"learning_rate": 1.6828487596692452e-05,
"loss": 0.8892,
"step": 1690
},
{
"epoch": 0.6376594148537135,
"grad_norm": 10.657661437988281,
"learning_rate": 1.6801813817017875e-05,
"loss": 0.9186,
"step": 1700
},
{
"epoch": 0.641410352588147,
"grad_norm": 7.065814018249512,
"learning_rate": 1.6775140037343295e-05,
"loss": 0.8878,
"step": 1710
},
{
"epoch": 0.6451612903225806,
"grad_norm": 8.048439979553223,
"learning_rate": 1.6748466257668714e-05,
"loss": 0.946,
"step": 1720
},
{
"epoch": 0.6489122280570142,
"grad_norm": 10.228202819824219,
"learning_rate": 1.672179247799413e-05,
"loss": 0.838,
"step": 1730
},
{
"epoch": 0.6526631657914479,
"grad_norm": 10.011300086975098,
"learning_rate": 1.6695118698319554e-05,
"loss": 1.0565,
"step": 1740
},
{
"epoch": 0.6564141035258815,
"grad_norm": 8.266985893249512,
"learning_rate": 1.6668444918644973e-05,
"loss": 0.9523,
"step": 1750
},
{
"epoch": 0.660165041260315,
"grad_norm": 7.511131763458252,
"learning_rate": 1.6641771138970393e-05,
"loss": 1.0325,
"step": 1760
},
{
"epoch": 0.6639159789947486,
"grad_norm": 7.235232830047607,
"learning_rate": 1.6615097359295813e-05,
"loss": 0.9197,
"step": 1770
},
{
"epoch": 0.6676669167291823,
"grad_norm": 8.137916564941406,
"learning_rate": 1.6588423579621232e-05,
"loss": 0.886,
"step": 1780
},
{
"epoch": 0.6714178544636159,
"grad_norm": 7.320621013641357,
"learning_rate": 1.6561749799946655e-05,
"loss": 0.8866,
"step": 1790
},
{
"epoch": 0.6751687921980495,
"grad_norm": 8.104268074035645,
"learning_rate": 1.6535076020272075e-05,
"loss": 0.9554,
"step": 1800
},
{
"epoch": 0.6789197299324832,
"grad_norm": 8.669350624084473,
"learning_rate": 1.6508402240597494e-05,
"loss": 0.907,
"step": 1810
},
{
"epoch": 0.6826706676669168,
"grad_norm": 7.718722820281982,
"learning_rate": 1.6481728460922914e-05,
"loss": 0.9931,
"step": 1820
},
{
"epoch": 0.6864216054013503,
"grad_norm": 6.479692459106445,
"learning_rate": 1.6455054681248334e-05,
"loss": 0.9669,
"step": 1830
},
{
"epoch": 0.6901725431357839,
"grad_norm": 5.159636497497559,
"learning_rate": 1.6428380901573757e-05,
"loss": 1.0003,
"step": 1840
},
{
"epoch": 0.6939234808702176,
"grad_norm": 6.043707847595215,
"learning_rate": 1.6401707121899176e-05,
"loss": 0.894,
"step": 1850
},
{
"epoch": 0.6976744186046512,
"grad_norm": 8.509610176086426,
"learning_rate": 1.6375033342224596e-05,
"loss": 1.0656,
"step": 1860
},
{
"epoch": 0.7014253563390848,
"grad_norm": 10.496292114257812,
"learning_rate": 1.6348359562550015e-05,
"loss": 0.9162,
"step": 1870
},
{
"epoch": 0.7051762940735183,
"grad_norm": 9.357151985168457,
"learning_rate": 1.6321685782875435e-05,
"loss": 0.8575,
"step": 1880
},
{
"epoch": 0.708927231807952,
"grad_norm": 7.78256368637085,
"learning_rate": 1.6295012003200855e-05,
"loss": 0.7904,
"step": 1890
},
{
"epoch": 0.7126781695423856,
"grad_norm": 6.14832067489624,
"learning_rate": 1.6268338223526274e-05,
"loss": 0.8348,
"step": 1900
},
{
"epoch": 0.7164291072768192,
"grad_norm": 7.879366874694824,
"learning_rate": 1.6241664443851694e-05,
"loss": 0.8826,
"step": 1910
},
{
"epoch": 0.7201800450112528,
"grad_norm": 6.204752445220947,
"learning_rate": 1.6214990664177114e-05,
"loss": 0.9157,
"step": 1920
},
{
"epoch": 0.7239309827456865,
"grad_norm": 7.274019241333008,
"learning_rate": 1.6188316884502537e-05,
"loss": 0.8869,
"step": 1930
},
{
"epoch": 0.72768192048012,
"grad_norm": 5.929676055908203,
"learning_rate": 1.6161643104827956e-05,
"loss": 0.9372,
"step": 1940
},
{
"epoch": 0.7314328582145536,
"grad_norm": 9.161755561828613,
"learning_rate": 1.6134969325153376e-05,
"loss": 0.9211,
"step": 1950
},
{
"epoch": 0.7351837959489872,
"grad_norm": 5.079675674438477,
"learning_rate": 1.6108295545478795e-05,
"loss": 0.8084,
"step": 1960
},
{
"epoch": 0.7389347336834209,
"grad_norm": 8.15173053741455,
"learning_rate": 1.6081621765804215e-05,
"loss": 1.0033,
"step": 1970
},
{
"epoch": 0.7426856714178545,
"grad_norm": 6.805727005004883,
"learning_rate": 1.6054947986129638e-05,
"loss": 1.0074,
"step": 1980
},
{
"epoch": 0.746436609152288,
"grad_norm": 8.05391788482666,
"learning_rate": 1.6028274206455058e-05,
"loss": 0.9942,
"step": 1990
},
{
"epoch": 0.7501875468867217,
"grad_norm": 6.02817440032959,
"learning_rate": 1.6001600426780477e-05,
"loss": 1.0494,
"step": 2000
},
{
"epoch": 0.7539384846211553,
"grad_norm": 9.404801368713379,
"learning_rate": 1.5974926647105897e-05,
"loss": 0.9451,
"step": 2010
},
{
"epoch": 0.7576894223555889,
"grad_norm": 5.526783466339111,
"learning_rate": 1.5948252867431316e-05,
"loss": 0.9378,
"step": 2020
},
{
"epoch": 0.7614403600900225,
"grad_norm": 8.972588539123535,
"learning_rate": 1.5921579087756736e-05,
"loss": 0.9808,
"step": 2030
},
{
"epoch": 0.7651912978244562,
"grad_norm": 4.961981296539307,
"learning_rate": 1.5894905308082156e-05,
"loss": 0.9078,
"step": 2040
},
{
"epoch": 0.7689422355588897,
"grad_norm": 3.8509440422058105,
"learning_rate": 1.5868231528407575e-05,
"loss": 1.0518,
"step": 2050
},
{
"epoch": 0.7726931732933233,
"grad_norm": 7.673577785491943,
"learning_rate": 1.5841557748732995e-05,
"loss": 0.9075,
"step": 2060
},
{
"epoch": 0.7764441110277569,
"grad_norm": 8.731016159057617,
"learning_rate": 1.5814883969058418e-05,
"loss": 0.9208,
"step": 2070
},
{
"epoch": 0.7801950487621906,
"grad_norm": 6.979492664337158,
"learning_rate": 1.5788210189383838e-05,
"loss": 0.8977,
"step": 2080
},
{
"epoch": 0.7839459864966242,
"grad_norm": 8.666240692138672,
"learning_rate": 1.5761536409709257e-05,
"loss": 0.899,
"step": 2090
},
{
"epoch": 0.7876969242310577,
"grad_norm": 6.528694152832031,
"learning_rate": 1.5734862630034677e-05,
"loss": 0.844,
"step": 2100
},
{
"epoch": 0.7914478619654913,
"grad_norm": 7.253232479095459,
"learning_rate": 1.5708188850360096e-05,
"loss": 0.7766,
"step": 2110
},
{
"epoch": 0.795198799699925,
"grad_norm": 6.888519287109375,
"learning_rate": 1.568151507068552e-05,
"loss": 0.9393,
"step": 2120
},
{
"epoch": 0.7989497374343586,
"grad_norm": 6.408233165740967,
"learning_rate": 1.565484129101094e-05,
"loss": 1.0171,
"step": 2130
},
{
"epoch": 0.8027006751687922,
"grad_norm": 9.36056137084961,
"learning_rate": 1.562816751133636e-05,
"loss": 0.9127,
"step": 2140
},
{
"epoch": 0.8064516129032258,
"grad_norm": 11.695134162902832,
"learning_rate": 1.5601493731661778e-05,
"loss": 1.0232,
"step": 2150
},
{
"epoch": 0.8102025506376594,
"grad_norm": 6.716568470001221,
"learning_rate": 1.5574819951987198e-05,
"loss": 0.9904,
"step": 2160
},
{
"epoch": 0.813953488372093,
"grad_norm": 5.994268417358398,
"learning_rate": 1.5548146172312617e-05,
"loss": 0.8897,
"step": 2170
},
{
"epoch": 0.8177044261065266,
"grad_norm": 8.419204711914062,
"learning_rate": 1.5521472392638037e-05,
"loss": 0.8315,
"step": 2180
},
{
"epoch": 0.8214553638409603,
"grad_norm": 6.702762603759766,
"learning_rate": 1.5494798612963457e-05,
"loss": 0.9393,
"step": 2190
},
{
"epoch": 0.8252063015753939,
"grad_norm": 9.53264045715332,
"learning_rate": 1.5468124833288876e-05,
"loss": 1.0074,
"step": 2200
},
{
"epoch": 0.8289572393098275,
"grad_norm": 5.6720476150512695,
"learning_rate": 1.54414510536143e-05,
"loss": 0.7935,
"step": 2210
},
{
"epoch": 0.832708177044261,
"grad_norm": 7.338003158569336,
"learning_rate": 1.541477727393972e-05,
"loss": 0.898,
"step": 2220
},
{
"epoch": 0.8364591147786947,
"grad_norm": 6.529892444610596,
"learning_rate": 1.538810349426514e-05,
"loss": 0.8197,
"step": 2230
},
{
"epoch": 0.8402100525131283,
"grad_norm": 9.971487045288086,
"learning_rate": 1.5361429714590558e-05,
"loss": 0.9551,
"step": 2240
},
{
"epoch": 0.8439609902475619,
"grad_norm": 5.594128608703613,
"learning_rate": 1.5334755934915978e-05,
"loss": 1.1114,
"step": 2250
},
{
"epoch": 0.8477119279819955,
"grad_norm": 5.723794460296631,
"learning_rate": 1.53080821552414e-05,
"loss": 0.9341,
"step": 2260
},
{
"epoch": 0.8514628657164292,
"grad_norm": 5.728211879730225,
"learning_rate": 1.528140837556682e-05,
"loss": 0.9961,
"step": 2270
},
{
"epoch": 0.8552138034508627,
"grad_norm": 7.517919063568115,
"learning_rate": 1.525473459589224e-05,
"loss": 0.8542,
"step": 2280
},
{
"epoch": 0.8589647411852963,
"grad_norm": 4.70159387588501,
"learning_rate": 1.522806081621766e-05,
"loss": 1.0348,
"step": 2290
},
{
"epoch": 0.8627156789197299,
"grad_norm": 5.308437347412109,
"learning_rate": 1.5201387036543081e-05,
"loss": 0.9645,
"step": 2300
},
{
"epoch": 0.8664666166541636,
"grad_norm": 5.659054756164551,
"learning_rate": 1.5174713256868499e-05,
"loss": 0.8317,
"step": 2310
},
{
"epoch": 0.8702175543885972,
"grad_norm": 5.970462799072266,
"learning_rate": 1.5148039477193918e-05,
"loss": 0.9889,
"step": 2320
},
{
"epoch": 0.8739684921230307,
"grad_norm": 5.605343818664551,
"learning_rate": 1.512136569751934e-05,
"loss": 0.8545,
"step": 2330
},
{
"epoch": 0.8777194298574643,
"grad_norm": 9.641878128051758,
"learning_rate": 1.509469191784476e-05,
"loss": 1.0026,
"step": 2340
},
{
"epoch": 0.881470367591898,
"grad_norm": 9.36474323272705,
"learning_rate": 1.5068018138170179e-05,
"loss": 0.927,
"step": 2350
},
{
"epoch": 0.8852213053263316,
"grad_norm": 8.28822135925293,
"learning_rate": 1.50413443584956e-05,
"loss": 0.9955,
"step": 2360
},
{
"epoch": 0.8889722430607652,
"grad_norm": 7.714781284332275,
"learning_rate": 1.501467057882102e-05,
"loss": 0.9366,
"step": 2370
},
{
"epoch": 0.8927231807951987,
"grad_norm": 3.879307508468628,
"learning_rate": 1.498799679914644e-05,
"loss": 0.9002,
"step": 2380
},
{
"epoch": 0.8964741185296324,
"grad_norm": 5.898133754730225,
"learning_rate": 1.4961323019471861e-05,
"loss": 0.8564,
"step": 2390
},
{
"epoch": 0.900225056264066,
"grad_norm": 6.275933265686035,
"learning_rate": 1.493464923979728e-05,
"loss": 0.9471,
"step": 2400
},
{
"epoch": 0.9039759939984996,
"grad_norm": 6.680263519287109,
"learning_rate": 1.4907975460122702e-05,
"loss": 0.8609,
"step": 2410
},
{
"epoch": 0.9077269317329333,
"grad_norm": 7.0698676109313965,
"learning_rate": 1.4881301680448121e-05,
"loss": 0.7758,
"step": 2420
},
{
"epoch": 0.9114778694673669,
"grad_norm": 10.66848373413086,
"learning_rate": 1.4854627900773541e-05,
"loss": 0.8225,
"step": 2430
},
{
"epoch": 0.9152288072018004,
"grad_norm": 8.714693069458008,
"learning_rate": 1.4827954121098962e-05,
"loss": 0.8777,
"step": 2440
},
{
"epoch": 0.918979744936234,
"grad_norm": 31.062232971191406,
"learning_rate": 1.480128034142438e-05,
"loss": 1.0204,
"step": 2450
},
{
"epoch": 0.9227306826706677,
"grad_norm": 11.140453338623047,
"learning_rate": 1.47746065617498e-05,
"loss": 0.9509,
"step": 2460
},
{
"epoch": 0.9264816204051013,
"grad_norm": 6.338695526123047,
"learning_rate": 1.4747932782075221e-05,
"loss": 0.8125,
"step": 2470
},
{
"epoch": 0.9302325581395349,
"grad_norm": 8.720800399780273,
"learning_rate": 1.472125900240064e-05,
"loss": 0.8114,
"step": 2480
},
{
"epoch": 0.9339834958739685,
"grad_norm": 11.407164573669434,
"learning_rate": 1.469458522272606e-05,
"loss": 1.0623,
"step": 2490
},
{
"epoch": 0.9377344336084021,
"grad_norm": 6.310417652130127,
"learning_rate": 1.4667911443051482e-05,
"loss": 0.9014,
"step": 2500
},
{
"epoch": 0.9414853713428357,
"grad_norm": 5.94149923324585,
"learning_rate": 1.4641237663376901e-05,
"loss": 0.7657,
"step": 2510
},
{
"epoch": 0.9452363090772693,
"grad_norm": 9.478999137878418,
"learning_rate": 1.4614563883702323e-05,
"loss": 0.8412,
"step": 2520
},
{
"epoch": 0.9489872468117029,
"grad_norm": 8.735868453979492,
"learning_rate": 1.4587890104027742e-05,
"loss": 0.9043,
"step": 2530
},
{
"epoch": 0.9527381845461366,
"grad_norm": 6.766534328460693,
"learning_rate": 1.4561216324353162e-05,
"loss": 0.9538,
"step": 2540
},
{
"epoch": 0.9564891222805701,
"grad_norm": 18.577468872070312,
"learning_rate": 1.4534542544678583e-05,
"loss": 0.9458,
"step": 2550
},
{
"epoch": 0.9602400600150037,
"grad_norm": 9.248088836669922,
"learning_rate": 1.4507868765004003e-05,
"loss": 0.913,
"step": 2560
},
{
"epoch": 0.9639909977494373,
"grad_norm": 7.771203994750977,
"learning_rate": 1.4481194985329422e-05,
"loss": 0.931,
"step": 2570
},
{
"epoch": 0.967741935483871,
"grad_norm": 7.330334663391113,
"learning_rate": 1.4454521205654844e-05,
"loss": 0.9681,
"step": 2580
},
{
"epoch": 0.9714928732183046,
"grad_norm": 6.74515438079834,
"learning_rate": 1.4427847425980263e-05,
"loss": 0.9477,
"step": 2590
},
{
"epoch": 0.9752438109527382,
"grad_norm": 8.954100608825684,
"learning_rate": 1.4401173646305681e-05,
"loss": 0.8958,
"step": 2600
},
{
"epoch": 0.9789947486871718,
"grad_norm": 11.33262825012207,
"learning_rate": 1.4374499866631103e-05,
"loss": 0.7998,
"step": 2610
},
{
"epoch": 0.9827456864216054,
"grad_norm": 7.142065048217773,
"learning_rate": 1.4347826086956522e-05,
"loss": 0.9897,
"step": 2620
},
{
"epoch": 0.986496624156039,
"grad_norm": 8.922056198120117,
"learning_rate": 1.4321152307281942e-05,
"loss": 0.9172,
"step": 2630
},
{
"epoch": 0.9902475618904726,
"grad_norm": 5.288200378417969,
"learning_rate": 1.4294478527607363e-05,
"loss": 0.8836,
"step": 2640
},
{
"epoch": 0.9939984996249063,
"grad_norm": 10.067593574523926,
"learning_rate": 1.4267804747932783e-05,
"loss": 1.0019,
"step": 2650
},
{
"epoch": 0.9977494373593399,
"grad_norm": 5.186861515045166,
"learning_rate": 1.4241130968258204e-05,
"loss": 0.8005,
"step": 2660
},
{
"epoch": 1.0,
"eval_accuracy": 0.5890295358649789,
"eval_f1_macro": 0.5855792301386851,
"eval_f1_weighted": 0.5883403945261724,
"eval_loss": 0.9054797887802124,
"eval_precision_macro": 0.5964531108356991,
"eval_precision_weighted": 0.5920764019753845,
"eval_recall_macro": 0.5799936335134275,
"eval_recall_weighted": 0.5890295358649789,
"eval_runtime": 4.8377,
"eval_samples_per_second": 489.903,
"eval_steps_per_second": 61.393,
"step": 2666
},
{
"epoch": 1.0015003750937734,
"grad_norm": 7.454843044281006,
"learning_rate": 1.4214457188583624e-05,
"loss": 0.9261,
"step": 2670
},
{
"epoch": 1.005251312828207,
"grad_norm": 7.612959384918213,
"learning_rate": 1.4187783408909043e-05,
"loss": 0.8656,
"step": 2680
},
{
"epoch": 1.0090022505626406,
"grad_norm": 5.689546585083008,
"learning_rate": 1.4161109629234465e-05,
"loss": 0.8539,
"step": 2690
},
{
"epoch": 1.0127531882970742,
"grad_norm": 9.812941551208496,
"learning_rate": 1.4134435849559884e-05,
"loss": 0.8154,
"step": 2700
},
{
"epoch": 1.016504126031508,
"grad_norm": 6.9208550453186035,
"learning_rate": 1.4107762069885304e-05,
"loss": 0.8441,
"step": 2710
},
{
"epoch": 1.0202550637659416,
"grad_norm": 5.310056686401367,
"learning_rate": 1.4081088290210725e-05,
"loss": 0.9471,
"step": 2720
},
{
"epoch": 1.0240060015003751,
"grad_norm": 9.985223770141602,
"learning_rate": 1.4054414510536145e-05,
"loss": 0.853,
"step": 2730
},
{
"epoch": 1.0277569392348087,
"grad_norm": 21.524646759033203,
"learning_rate": 1.4027740730861563e-05,
"loss": 0.9408,
"step": 2740
},
{
"epoch": 1.0315078769692423,
"grad_norm": 9.250083923339844,
"learning_rate": 1.4001066951186984e-05,
"loss": 0.8023,
"step": 2750
},
{
"epoch": 1.0352588147036759,
"grad_norm": 6.028738975524902,
"learning_rate": 1.3974393171512404e-05,
"loss": 0.7849,
"step": 2760
},
{
"epoch": 1.0390097524381094,
"grad_norm": 9.787884712219238,
"learning_rate": 1.3947719391837823e-05,
"loss": 0.7474,
"step": 2770
},
{
"epoch": 1.042760690172543,
"grad_norm": 12.639663696289062,
"learning_rate": 1.3921045612163244e-05,
"loss": 0.8167,
"step": 2780
},
{
"epoch": 1.0465116279069768,
"grad_norm": 15.691644668579102,
"learning_rate": 1.3894371832488664e-05,
"loss": 0.7467,
"step": 2790
},
{
"epoch": 1.0502625656414104,
"grad_norm": 7.864928722381592,
"learning_rate": 1.3867698052814085e-05,
"loss": 0.9476,
"step": 2800
},
{
"epoch": 1.054013503375844,
"grad_norm": 8.662647247314453,
"learning_rate": 1.3841024273139505e-05,
"loss": 0.8529,
"step": 2810
},
{
"epoch": 1.0577644411102776,
"grad_norm": 8.244277954101562,
"learning_rate": 1.3814350493464925e-05,
"loss": 0.753,
"step": 2820
},
{
"epoch": 1.0615153788447111,
"grad_norm": 8.806965827941895,
"learning_rate": 1.3787676713790346e-05,
"loss": 0.7577,
"step": 2830
},
{
"epoch": 1.0652663165791447,
"grad_norm": 11.864466667175293,
"learning_rate": 1.3761002934115766e-05,
"loss": 0.8227,
"step": 2840
},
{
"epoch": 1.0690172543135783,
"grad_norm": 16.477638244628906,
"learning_rate": 1.3734329154441187e-05,
"loss": 0.8603,
"step": 2850
},
{
"epoch": 1.072768192048012,
"grad_norm": 10.029014587402344,
"learning_rate": 1.3707655374766607e-05,
"loss": 0.7507,
"step": 2860
},
{
"epoch": 1.0765191297824457,
"grad_norm": 42.02882766723633,
"learning_rate": 1.3680981595092026e-05,
"loss": 0.8731,
"step": 2870
},
{
"epoch": 1.0802700675168793,
"grad_norm": 11.340489387512207,
"learning_rate": 1.3654307815417447e-05,
"loss": 0.8736,
"step": 2880
},
{
"epoch": 1.0840210052513128,
"grad_norm": 10.736079216003418,
"learning_rate": 1.3627634035742865e-05,
"loss": 0.7387,
"step": 2890
},
{
"epoch": 1.0877719429857464,
"grad_norm": 12.158968925476074,
"learning_rate": 1.3600960256068285e-05,
"loss": 0.8563,
"step": 2900
},
{
"epoch": 1.09152288072018,
"grad_norm": 4.968686103820801,
"learning_rate": 1.3574286476393706e-05,
"loss": 0.8865,
"step": 2910
},
{
"epoch": 1.0952738184546136,
"grad_norm": 9.05169677734375,
"learning_rate": 1.3547612696719126e-05,
"loss": 0.9706,
"step": 2920
},
{
"epoch": 1.0990247561890472,
"grad_norm": 8.993448257446289,
"learning_rate": 1.3520938917044546e-05,
"loss": 0.7936,
"step": 2930
},
{
"epoch": 1.102775693923481,
"grad_norm": 9.852548599243164,
"learning_rate": 1.3494265137369967e-05,
"loss": 0.9188,
"step": 2940
},
{
"epoch": 1.1065266316579145,
"grad_norm": 8.509963035583496,
"learning_rate": 1.3467591357695386e-05,
"loss": 0.9182,
"step": 2950
},
{
"epoch": 1.1102775693923481,
"grad_norm": 9.74703311920166,
"learning_rate": 1.3440917578020806e-05,
"loss": 0.8979,
"step": 2960
},
{
"epoch": 1.1140285071267817,
"grad_norm": 11.76938247680664,
"learning_rate": 1.3414243798346227e-05,
"loss": 0.8431,
"step": 2970
},
{
"epoch": 1.1177794448612153,
"grad_norm": 8.194916725158691,
"learning_rate": 1.3387570018671647e-05,
"loss": 0.8794,
"step": 2980
},
{
"epoch": 1.1215303825956489,
"grad_norm": 5.259307861328125,
"learning_rate": 1.3360896238997068e-05,
"loss": 0.8688,
"step": 2990
},
{
"epoch": 1.1252813203300824,
"grad_norm": 8.892224311828613,
"learning_rate": 1.3334222459322488e-05,
"loss": 0.8924,
"step": 3000
},
{
"epoch": 1.129032258064516,
"grad_norm": 10.505491256713867,
"learning_rate": 1.3307548679647908e-05,
"loss": 0.8347,
"step": 3010
},
{
"epoch": 1.1327831957989498,
"grad_norm": 4.74807071685791,
"learning_rate": 1.3280874899973329e-05,
"loss": 0.7728,
"step": 3020
},
{
"epoch": 1.1365341335333834,
"grad_norm": 12.980900764465332,
"learning_rate": 1.3254201120298747e-05,
"loss": 0.7915,
"step": 3030
},
{
"epoch": 1.140285071267817,
"grad_norm": 12.24691104888916,
"learning_rate": 1.3227527340624166e-05,
"loss": 0.8422,
"step": 3040
},
{
"epoch": 1.1440360090022506,
"grad_norm": 6.215153217315674,
"learning_rate": 1.3200853560949588e-05,
"loss": 0.8067,
"step": 3050
},
{
"epoch": 1.1477869467366841,
"grad_norm": 15.73306941986084,
"learning_rate": 1.3174179781275007e-05,
"loss": 0.8135,
"step": 3060
},
{
"epoch": 1.1515378844711177,
"grad_norm": 12.068921089172363,
"learning_rate": 1.3147506001600427e-05,
"loss": 0.7305,
"step": 3070
},
{
"epoch": 1.1552888222055513,
"grad_norm": 6.1044464111328125,
"learning_rate": 1.3120832221925848e-05,
"loss": 0.9578,
"step": 3080
},
{
"epoch": 1.159039759939985,
"grad_norm": 10.416324615478516,
"learning_rate": 1.3094158442251268e-05,
"loss": 0.8362,
"step": 3090
},
{
"epoch": 1.1627906976744187,
"grad_norm": 13.548623085021973,
"learning_rate": 1.3067484662576687e-05,
"loss": 0.7862,
"step": 3100
},
{
"epoch": 1.1665416354088523,
"grad_norm": 9.015273094177246,
"learning_rate": 1.3040810882902109e-05,
"loss": 0.8432,
"step": 3110
},
{
"epoch": 1.1702925731432858,
"grad_norm": 4.893497467041016,
"learning_rate": 1.3014137103227528e-05,
"loss": 0.719,
"step": 3120
},
{
"epoch": 1.1740435108777194,
"grad_norm": 12.783862113952637,
"learning_rate": 1.298746332355295e-05,
"loss": 0.9088,
"step": 3130
},
{
"epoch": 1.177794448612153,
"grad_norm": 10.826465606689453,
"learning_rate": 1.296078954387837e-05,
"loss": 0.8758,
"step": 3140
},
{
"epoch": 1.1815453863465866,
"grad_norm": 9.32836627960205,
"learning_rate": 1.2934115764203789e-05,
"loss": 0.7643,
"step": 3150
},
{
"epoch": 1.1852963240810204,
"grad_norm": 9.504363059997559,
"learning_rate": 1.290744198452921e-05,
"loss": 0.8174,
"step": 3160
},
{
"epoch": 1.189047261815454,
"grad_norm": 12.839066505432129,
"learning_rate": 1.2880768204854628e-05,
"loss": 0.7992,
"step": 3170
},
{
"epoch": 1.1927981995498875,
"grad_norm": 9.912968635559082,
"learning_rate": 1.2854094425180048e-05,
"loss": 0.7793,
"step": 3180
},
{
"epoch": 1.196549137284321,
"grad_norm": 9.632975578308105,
"learning_rate": 1.2827420645505469e-05,
"loss": 0.8062,
"step": 3190
},
{
"epoch": 1.2003000750187547,
"grad_norm": 15.091144561767578,
"learning_rate": 1.2800746865830889e-05,
"loss": 0.8319,
"step": 3200
},
{
"epoch": 1.2040510127531883,
"grad_norm": 9.834930419921875,
"learning_rate": 1.2774073086156308e-05,
"loss": 0.7946,
"step": 3210
},
{
"epoch": 1.2078019504876218,
"grad_norm": 9.097467422485352,
"learning_rate": 1.274739930648173e-05,
"loss": 0.7423,
"step": 3220
},
{
"epoch": 1.2115528882220554,
"grad_norm": 7.097741603851318,
"learning_rate": 1.272072552680715e-05,
"loss": 0.7668,
"step": 3230
},
{
"epoch": 1.215303825956489,
"grad_norm": 16.66200828552246,
"learning_rate": 1.269405174713257e-05,
"loss": 0.8292,
"step": 3240
},
{
"epoch": 1.2190547636909228,
"grad_norm": 4.819615840911865,
"learning_rate": 1.266737796745799e-05,
"loss": 0.7641,
"step": 3250
},
{
"epoch": 1.2228057014253564,
"grad_norm": 12.379060745239258,
"learning_rate": 1.264070418778341e-05,
"loss": 0.7749,
"step": 3260
},
{
"epoch": 1.22655663915979,
"grad_norm": 10.446650505065918,
"learning_rate": 1.2614030408108831e-05,
"loss": 0.8306,
"step": 3270
},
{
"epoch": 1.2303075768942235,
"grad_norm": 13.330952644348145,
"learning_rate": 1.258735662843425e-05,
"loss": 0.7924,
"step": 3280
},
{
"epoch": 1.2340585146286571,
"grad_norm": 11.163646697998047,
"learning_rate": 1.256068284875967e-05,
"loss": 0.8505,
"step": 3290
},
{
"epoch": 1.2378094523630907,
"grad_norm": 10.235424995422363,
"learning_rate": 1.2534009069085092e-05,
"loss": 0.7111,
"step": 3300
},
{
"epoch": 1.2415603900975243,
"grad_norm": 9.529205322265625,
"learning_rate": 1.2507335289410511e-05,
"loss": 0.9559,
"step": 3310
},
{
"epoch": 1.245311327831958,
"grad_norm": 9.511346817016602,
"learning_rate": 1.2480661509735929e-05,
"loss": 1.0107,
"step": 3320
},
{
"epoch": 1.2490622655663917,
"grad_norm": 5.115582466125488,
"learning_rate": 1.245398773006135e-05,
"loss": 0.8128,
"step": 3330
},
{
"epoch": 1.2528132033008252,
"grad_norm": 10.270365715026855,
"learning_rate": 1.242731395038677e-05,
"loss": 0.7537,
"step": 3340
},
{
"epoch": 1.2565641410352588,
"grad_norm": 15.309682846069336,
"learning_rate": 1.240064017071219e-05,
"loss": 0.776,
"step": 3350
},
{
"epoch": 1.2603150787696924,
"grad_norm": 6.9617414474487305,
"learning_rate": 1.2373966391037611e-05,
"loss": 0.7818,
"step": 3360
},
{
"epoch": 1.264066016504126,
"grad_norm": 14.111533164978027,
"learning_rate": 1.234729261136303e-05,
"loss": 0.8766,
"step": 3370
},
{
"epoch": 1.2678169542385596,
"grad_norm": 15.513258934020996,
"learning_rate": 1.2320618831688452e-05,
"loss": 0.8124,
"step": 3380
},
{
"epoch": 1.2715678919729934,
"grad_norm": 10.617011070251465,
"learning_rate": 1.2293945052013872e-05,
"loss": 0.7367,
"step": 3390
},
{
"epoch": 1.275318829707427,
"grad_norm": 10.756956100463867,
"learning_rate": 1.2267271272339291e-05,
"loss": 0.9371,
"step": 3400
},
{
"epoch": 1.2790697674418605,
"grad_norm": 20.27239990234375,
"learning_rate": 1.2240597492664712e-05,
"loss": 0.7812,
"step": 3410
},
{
"epoch": 1.282820705176294,
"grad_norm": 13.26762580871582,
"learning_rate": 1.2213923712990132e-05,
"loss": 0.9214,
"step": 3420
},
{
"epoch": 1.2865716429107277,
"grad_norm": 6.740780830383301,
"learning_rate": 1.2187249933315552e-05,
"loss": 0.7254,
"step": 3430
},
{
"epoch": 1.2903225806451613,
"grad_norm": 8.460843086242676,
"learning_rate": 1.2160576153640973e-05,
"loss": 0.8793,
"step": 3440
},
{
"epoch": 1.2940735183795948,
"grad_norm": 8.37424373626709,
"learning_rate": 1.2133902373966393e-05,
"loss": 0.7178,
"step": 3450
},
{
"epoch": 1.2978244561140286,
"grad_norm": 9.57453441619873,
"learning_rate": 1.210722859429181e-05,
"loss": 0.9584,
"step": 3460
},
{
"epoch": 1.301575393848462,
"grad_norm": 15.27446460723877,
"learning_rate": 1.2080554814617232e-05,
"loss": 0.7314,
"step": 3470
},
{
"epoch": 1.3053263315828958,
"grad_norm": 16.266162872314453,
"learning_rate": 1.2053881034942651e-05,
"loss": 0.8651,
"step": 3480
},
{
"epoch": 1.3090772693173294,
"grad_norm": 9.161102294921875,
"learning_rate": 1.2027207255268071e-05,
"loss": 0.7086,
"step": 3490
},
{
"epoch": 1.312828207051763,
"grad_norm": 12.645145416259766,
"learning_rate": 1.2000533475593492e-05,
"loss": 0.874,
"step": 3500
},
{
"epoch": 1.3165791447861965,
"grad_norm": 9.018929481506348,
"learning_rate": 1.1973859695918912e-05,
"loss": 0.7457,
"step": 3510
},
{
"epoch": 1.32033008252063,
"grad_norm": 10.96903133392334,
"learning_rate": 1.1947185916244333e-05,
"loss": 0.865,
"step": 3520
},
{
"epoch": 1.3240810202550637,
"grad_norm": 15.08077621459961,
"learning_rate": 1.1920512136569753e-05,
"loss": 0.8127,
"step": 3530
},
{
"epoch": 1.3278319579894973,
"grad_norm": 6.171741962432861,
"learning_rate": 1.1893838356895173e-05,
"loss": 0.7038,
"step": 3540
},
{
"epoch": 1.331582895723931,
"grad_norm": 12.167604446411133,
"learning_rate": 1.1867164577220594e-05,
"loss": 0.7373,
"step": 3550
},
{
"epoch": 1.3353338334583646,
"grad_norm": 12.859063148498535,
"learning_rate": 1.1840490797546013e-05,
"loss": 0.8292,
"step": 3560
},
{
"epoch": 1.3390847711927982,
"grad_norm": 9.17769718170166,
"learning_rate": 1.1813817017871435e-05,
"loss": 0.8117,
"step": 3570
},
{
"epoch": 1.3428357089272318,
"grad_norm": 7.380620002746582,
"learning_rate": 1.1787143238196854e-05,
"loss": 0.7943,
"step": 3580
},
{
"epoch": 1.3465866466616654,
"grad_norm": 19.143110275268555,
"learning_rate": 1.1760469458522274e-05,
"loss": 0.7798,
"step": 3590
},
{
"epoch": 1.350337584396099,
"grad_norm": 14.915560722351074,
"learning_rate": 1.1733795678847695e-05,
"loss": 0.8568,
"step": 3600
},
{
"epoch": 1.3540885221305325,
"grad_norm": 16.487377166748047,
"learning_rate": 1.1707121899173113e-05,
"loss": 0.8586,
"step": 3610
},
{
"epoch": 1.3578394598649663,
"grad_norm": 9.255929946899414,
"learning_rate": 1.1680448119498533e-05,
"loss": 0.7957,
"step": 3620
},
{
"epoch": 1.3615903975994,
"grad_norm": 12.38227653503418,
"learning_rate": 1.1653774339823954e-05,
"loss": 0.7957,
"step": 3630
},
{
"epoch": 1.3653413353338335,
"grad_norm": 10.949649810791016,
"learning_rate": 1.1627100560149374e-05,
"loss": 0.6871,
"step": 3640
},
{
"epoch": 1.369092273068267,
"grad_norm": 7.265697956085205,
"learning_rate": 1.1600426780474793e-05,
"loss": 0.7341,
"step": 3650
},
{
"epoch": 1.3728432108027007,
"grad_norm": 12.582711219787598,
"learning_rate": 1.1573753000800215e-05,
"loss": 0.8242,
"step": 3660
},
{
"epoch": 1.3765941485371342,
"grad_norm": 12.345062255859375,
"learning_rate": 1.1547079221125634e-05,
"loss": 0.8768,
"step": 3670
},
{
"epoch": 1.3803450862715678,
"grad_norm": 8.697713851928711,
"learning_rate": 1.1520405441451054e-05,
"loss": 0.855,
"step": 3680
},
{
"epoch": 1.3840960240060016,
"grad_norm": 9.254758834838867,
"learning_rate": 1.1493731661776475e-05,
"loss": 0.909,
"step": 3690
},
{
"epoch": 1.387846961740435,
"grad_norm": 9.739770889282227,
"learning_rate": 1.1467057882101895e-05,
"loss": 0.8582,
"step": 3700
},
{
"epoch": 1.3915978994748688,
"grad_norm": 12.004996299743652,
"learning_rate": 1.1440384102427316e-05,
"loss": 0.7344,
"step": 3710
},
{
"epoch": 1.3953488372093024,
"grad_norm": 13.092066764831543,
"learning_rate": 1.1413710322752736e-05,
"loss": 0.8916,
"step": 3720
},
{
"epoch": 1.399099774943736,
"grad_norm": 12.259298324584961,
"learning_rate": 1.1387036543078155e-05,
"loss": 0.9096,
"step": 3730
},
{
"epoch": 1.4028507126781695,
"grad_norm": 8.312166213989258,
"learning_rate": 1.1360362763403577e-05,
"loss": 0.8647,
"step": 3740
},
{
"epoch": 1.406601650412603,
"grad_norm": 8.59150218963623,
"learning_rate": 1.1333688983728995e-05,
"loss": 0.9202,
"step": 3750
},
{
"epoch": 1.4103525881470367,
"grad_norm": 8.444820404052734,
"learning_rate": 1.1307015204054414e-05,
"loss": 0.7343,
"step": 3760
},
{
"epoch": 1.4141035258814703,
"grad_norm": 12.232796669006348,
"learning_rate": 1.1280341424379836e-05,
"loss": 0.7329,
"step": 3770
},
{
"epoch": 1.417854463615904,
"grad_norm": 9.038057327270508,
"learning_rate": 1.1253667644705255e-05,
"loss": 0.8751,
"step": 3780
},
{
"epoch": 1.4216054013503376,
"grad_norm": 5.729677200317383,
"learning_rate": 1.1226993865030675e-05,
"loss": 0.7319,
"step": 3790
},
{
"epoch": 1.4253563390847712,
"grad_norm": 7.777376651763916,
"learning_rate": 1.1200320085356096e-05,
"loss": 0.802,
"step": 3800
},
{
"epoch": 1.4291072768192048,
"grad_norm": 13.165481567382812,
"learning_rate": 1.1173646305681516e-05,
"loss": 0.7195,
"step": 3810
},
{
"epoch": 1.4328582145536384,
"grad_norm": 10.966960906982422,
"learning_rate": 1.1146972526006935e-05,
"loss": 0.8234,
"step": 3820
},
{
"epoch": 1.436609152288072,
"grad_norm": 8.237056732177734,
"learning_rate": 1.1120298746332357e-05,
"loss": 0.7832,
"step": 3830
},
{
"epoch": 1.4403600900225055,
"grad_norm": 10.419988632202148,
"learning_rate": 1.1093624966657776e-05,
"loss": 0.8292,
"step": 3840
},
{
"epoch": 1.4441110277569393,
"grad_norm": 14.655726432800293,
"learning_rate": 1.1066951186983198e-05,
"loss": 0.8523,
"step": 3850
},
{
"epoch": 1.447861965491373,
"grad_norm": 10.38304328918457,
"learning_rate": 1.1040277407308617e-05,
"loss": 0.856,
"step": 3860
},
{
"epoch": 1.4516129032258065,
"grad_norm": 13.249422073364258,
"learning_rate": 1.1013603627634037e-05,
"loss": 0.8403,
"step": 3870
},
{
"epoch": 1.45536384096024,
"grad_norm": 9.854536056518555,
"learning_rate": 1.0986929847959458e-05,
"loss": 0.794,
"step": 3880
},
{
"epoch": 1.4591147786946737,
"grad_norm": 11.48951530456543,
"learning_rate": 1.0960256068284876e-05,
"loss": 0.7569,
"step": 3890
},
{
"epoch": 1.4628657164291072,
"grad_norm": 8.955044746398926,
"learning_rate": 1.0933582288610296e-05,
"loss": 0.8064,
"step": 3900
},
{
"epoch": 1.4666166541635408,
"grad_norm": 16.088743209838867,
"learning_rate": 1.0906908508935717e-05,
"loss": 0.8518,
"step": 3910
},
{
"epoch": 1.4703675918979746,
"grad_norm": 9.207806587219238,
"learning_rate": 1.0880234729261137e-05,
"loss": 0.875,
"step": 3920
},
{
"epoch": 1.474118529632408,
"grad_norm": 18.738187789916992,
"learning_rate": 1.0853560949586556e-05,
"loss": 0.8164,
"step": 3930
},
{
"epoch": 1.4778694673668418,
"grad_norm": 10.138594627380371,
"learning_rate": 1.0826887169911977e-05,
"loss": 0.791,
"step": 3940
},
{
"epoch": 1.4816204051012754,
"grad_norm": 9.635621070861816,
"learning_rate": 1.0800213390237397e-05,
"loss": 0.7878,
"step": 3950
},
{
"epoch": 1.485371342835709,
"grad_norm": 9.569879531860352,
"learning_rate": 1.0773539610562818e-05,
"loss": 0.8404,
"step": 3960
},
{
"epoch": 1.4891222805701425,
"grad_norm": 9.855542182922363,
"learning_rate": 1.0746865830888238e-05,
"loss": 0.8726,
"step": 3970
},
{
"epoch": 1.492873218304576,
"grad_norm": 16.710786819458008,
"learning_rate": 1.0720192051213658e-05,
"loss": 0.8706,
"step": 3980
},
{
"epoch": 1.49662415603901,
"grad_norm": 13.603216171264648,
"learning_rate": 1.0693518271539079e-05,
"loss": 0.8437,
"step": 3990
},
{
"epoch": 1.5003750937734432,
"grad_norm": 11.3872652053833,
"learning_rate": 1.0666844491864499e-05,
"loss": 0.6512,
"step": 4000
},
{
"epoch": 1.504126031507877,
"grad_norm": 10.2975492477417,
"learning_rate": 1.0640170712189918e-05,
"loss": 0.8774,
"step": 4010
},
{
"epoch": 1.5078769692423106,
"grad_norm": 7.741751194000244,
"learning_rate": 1.061349693251534e-05,
"loss": 0.7528,
"step": 4020
},
{
"epoch": 1.5116279069767442,
"grad_norm": 9.902315139770508,
"learning_rate": 1.0586823152840759e-05,
"loss": 0.7995,
"step": 4030
},
{
"epoch": 1.5153788447111778,
"grad_norm": 11.541082382202148,
"learning_rate": 1.0560149373166177e-05,
"loss": 0.7694,
"step": 4040
},
{
"epoch": 1.5191297824456114,
"grad_norm": 8.56485366821289,
"learning_rate": 1.0533475593491598e-05,
"loss": 0.8002,
"step": 4050
},
{
"epoch": 1.5228807201800452,
"grad_norm": 8.866626739501953,
"learning_rate": 1.0506801813817018e-05,
"loss": 0.792,
"step": 4060
},
{
"epoch": 1.5266316579144785,
"grad_norm": 10.332854270935059,
"learning_rate": 1.0480128034142438e-05,
"loss": 0.7378,
"step": 4070
},
{
"epoch": 1.5303825956489123,
"grad_norm": 8.805913925170898,
"learning_rate": 1.0453454254467859e-05,
"loss": 0.8287,
"step": 4080
},
{
"epoch": 1.5341335333833457,
"grad_norm": 10.885342597961426,
"learning_rate": 1.0426780474793278e-05,
"loss": 0.8454,
"step": 4090
},
{
"epoch": 1.5378844711177795,
"grad_norm": 11.047041893005371,
"learning_rate": 1.04001066951187e-05,
"loss": 0.8955,
"step": 4100
},
{
"epoch": 1.541635408852213,
"grad_norm": 12.287060737609863,
"learning_rate": 1.037343291544412e-05,
"loss": 0.9106,
"step": 4110
},
{
"epoch": 1.5453863465866466,
"grad_norm": 7.6913628578186035,
"learning_rate": 1.0346759135769539e-05,
"loss": 0.8287,
"step": 4120
},
{
"epoch": 1.5491372843210802,
"grad_norm": 12.864625930786133,
"learning_rate": 1.032008535609496e-05,
"loss": 0.8176,
"step": 4130
},
{
"epoch": 1.5528882220555138,
"grad_norm": 17.12616539001465,
"learning_rate": 1.029341157642038e-05,
"loss": 0.8964,
"step": 4140
},
{
"epoch": 1.5566391597899476,
"grad_norm": 9.076611518859863,
"learning_rate": 1.02667377967458e-05,
"loss": 0.8222,
"step": 4150
},
{
"epoch": 1.560390097524381,
"grad_norm": 9.327693939208984,
"learning_rate": 1.0240064017071221e-05,
"loss": 0.9358,
"step": 4160
},
{
"epoch": 1.5641410352588148,
"grad_norm": 7.653916358947754,
"learning_rate": 1.021339023739664e-05,
"loss": 0.7721,
"step": 4170
},
{
"epoch": 1.5678919729932483,
"grad_norm": 10.110307693481445,
"learning_rate": 1.0186716457722058e-05,
"loss": 0.9346,
"step": 4180
},
{
"epoch": 1.571642910727682,
"grad_norm": 11.298696517944336,
"learning_rate": 1.016004267804748e-05,
"loss": 0.8456,
"step": 4190
},
{
"epoch": 1.5753938484621155,
"grad_norm": 13.459417343139648,
"learning_rate": 1.01333688983729e-05,
"loss": 0.7815,
"step": 4200
},
{
"epoch": 1.579144786196549,
"grad_norm": 16.08547592163086,
"learning_rate": 1.0106695118698319e-05,
"loss": 0.7656,
"step": 4210
},
{
"epoch": 1.5828957239309829,
"grad_norm": 8.995433807373047,
"learning_rate": 1.008002133902374e-05,
"loss": 0.8248,
"step": 4220
},
{
"epoch": 1.5866466616654162,
"grad_norm": 10.426254272460938,
"learning_rate": 1.005334755934916e-05,
"loss": 0.7957,
"step": 4230
},
{
"epoch": 1.59039759939985,
"grad_norm": 8.310003280639648,
"learning_rate": 1.0026673779674581e-05,
"loss": 0.8313,
"step": 4240
},
{
"epoch": 1.5941485371342836,
"grad_norm": 14.415204048156738,
"learning_rate": 1e-05,
"loss": 0.7711,
"step": 4250
},
{
"epoch": 1.5978994748687172,
"grad_norm": 8.948083877563477,
"learning_rate": 9.97332622032542e-06,
"loss": 0.7868,
"step": 4260
},
{
"epoch": 1.6016504126031508,
"grad_norm": 16.681766510009766,
"learning_rate": 9.946652440650842e-06,
"loss": 0.8633,
"step": 4270
},
{
"epoch": 1.6054013503375844,
"grad_norm": 11.883402824401855,
"learning_rate": 9.919978660976261e-06,
"loss": 0.8195,
"step": 4280
},
{
"epoch": 1.6091522880720182,
"grad_norm": 11.386548042297363,
"learning_rate": 9.893304881301681e-06,
"loss": 0.7621,
"step": 4290
},
{
"epoch": 1.6129032258064515,
"grad_norm": 13.255663871765137,
"learning_rate": 9.8666311016271e-06,
"loss": 1.0233,
"step": 4300
},
{
"epoch": 1.6166541635408853,
"grad_norm": 10.955714225769043,
"learning_rate": 9.839957321952522e-06,
"loss": 0.9456,
"step": 4310
},
{
"epoch": 1.6204051012753187,
"grad_norm": 7.624833583831787,
"learning_rate": 9.813283542277942e-06,
"loss": 0.9029,
"step": 4320
},
{
"epoch": 1.6241560390097525,
"grad_norm": 8.860147476196289,
"learning_rate": 9.786609762603361e-06,
"loss": 0.835,
"step": 4330
},
{
"epoch": 1.627906976744186,
"grad_norm": 13.29971981048584,
"learning_rate": 9.759935982928782e-06,
"loss": 0.848,
"step": 4340
},
{
"epoch": 1.6316579144786196,
"grad_norm": 10.151264190673828,
"learning_rate": 9.733262203254202e-06,
"loss": 0.7443,
"step": 4350
},
{
"epoch": 1.6354088522130532,
"grad_norm": 14.21789264678955,
"learning_rate": 9.706588423579622e-06,
"loss": 0.908,
"step": 4360
},
{
"epoch": 1.6391597899474868,
"grad_norm": 7.94905424118042,
"learning_rate": 9.679914643905041e-06,
"loss": 0.6919,
"step": 4370
},
{
"epoch": 1.6429107276819206,
"grad_norm": 8.60908031463623,
"learning_rate": 9.653240864230463e-06,
"loss": 0.7309,
"step": 4380
},
{
"epoch": 1.646661665416354,
"grad_norm": 15.03842544555664,
"learning_rate": 9.626567084555882e-06,
"loss": 0.9343,
"step": 4390
},
{
"epoch": 1.6504126031507877,
"grad_norm": 11.684754371643066,
"learning_rate": 9.599893304881302e-06,
"loss": 0.7532,
"step": 4400
},
{
"epoch": 1.6541635408852213,
"grad_norm": 6.24261999130249,
"learning_rate": 9.573219525206723e-06,
"loss": 0.8449,
"step": 4410
},
{
"epoch": 1.657914478619655,
"grad_norm": 5.580635070800781,
"learning_rate": 9.546545745532143e-06,
"loss": 0.644,
"step": 4420
},
{
"epoch": 1.6616654163540885,
"grad_norm": 13.382287979125977,
"learning_rate": 9.519871965857564e-06,
"loss": 0.8713,
"step": 4430
},
{
"epoch": 1.665416354088522,
"grad_norm": 11.218451499938965,
"learning_rate": 9.493198186182982e-06,
"loss": 0.6552,
"step": 4440
},
{
"epoch": 1.6691672918229559,
"grad_norm": 16.548782348632812,
"learning_rate": 9.466524406508403e-06,
"loss": 0.8024,
"step": 4450
},
{
"epoch": 1.6729182295573892,
"grad_norm": 17.210647583007812,
"learning_rate": 9.439850626833823e-06,
"loss": 0.7543,
"step": 4460
},
{
"epoch": 1.676669167291823,
"grad_norm": 13.630977630615234,
"learning_rate": 9.413176847159243e-06,
"loss": 0.8754,
"step": 4470
},
{
"epoch": 1.6804201050262566,
"grad_norm": 13.967558860778809,
"learning_rate": 9.386503067484664e-06,
"loss": 0.7699,
"step": 4480
},
{
"epoch": 1.6841710427606902,
"grad_norm": 11.707578659057617,
"learning_rate": 9.359829287810083e-06,
"loss": 0.8324,
"step": 4490
},
{
"epoch": 1.6879219804951238,
"grad_norm": 9.124420166015625,
"learning_rate": 9.333155508135505e-06,
"loss": 0.794,
"step": 4500
},
{
"epoch": 1.6916729182295573,
"grad_norm": 10.910788536071777,
"learning_rate": 9.306481728460923e-06,
"loss": 0.8241,
"step": 4510
},
{
"epoch": 1.6954238559639911,
"grad_norm": 13.6180419921875,
"learning_rate": 9.279807948786344e-06,
"loss": 0.8882,
"step": 4520
},
{
"epoch": 1.6991747936984245,
"grad_norm": 7.055276393890381,
"learning_rate": 9.253134169111764e-06,
"loss": 0.9011,
"step": 4530
},
{
"epoch": 1.7029257314328583,
"grad_norm": 14.100971221923828,
"learning_rate": 9.226460389437183e-06,
"loss": 0.8026,
"step": 4540
},
{
"epoch": 1.7066766691672917,
"grad_norm": 6.9184441566467285,
"learning_rate": 9.199786609762605e-06,
"loss": 0.6888,
"step": 4550
},
{
"epoch": 1.7104276069017255,
"grad_norm": 9.915225982666016,
"learning_rate": 9.173112830088024e-06,
"loss": 0.8456,
"step": 4560
},
{
"epoch": 1.714178544636159,
"grad_norm": 11.1101655960083,
"learning_rate": 9.146439050413445e-06,
"loss": 0.8979,
"step": 4570
},
{
"epoch": 1.7179294823705926,
"grad_norm": 11.128944396972656,
"learning_rate": 9.119765270738863e-06,
"loss": 0.8386,
"step": 4580
},
{
"epoch": 1.7216804201050264,
"grad_norm": 8.845916748046875,
"learning_rate": 9.093091491064285e-06,
"loss": 0.8375,
"step": 4590
},
{
"epoch": 1.7254313578394598,
"grad_norm": 12.3989839553833,
"learning_rate": 9.066417711389704e-06,
"loss": 0.7884,
"step": 4600
},
{
"epoch": 1.7291822955738936,
"grad_norm": 8.899964332580566,
"learning_rate": 9.039743931715126e-06,
"loss": 0.8391,
"step": 4610
},
{
"epoch": 1.732933233308327,
"grad_norm": 11.830737113952637,
"learning_rate": 9.013070152040545e-06,
"loss": 0.836,
"step": 4620
},
{
"epoch": 1.7366841710427607,
"grad_norm": 14.875555038452148,
"learning_rate": 8.986396372365965e-06,
"loss": 0.8148,
"step": 4630
},
{
"epoch": 1.7404351087771943,
"grad_norm": 8.44090461730957,
"learning_rate": 8.959722592691386e-06,
"loss": 0.7033,
"step": 4640
},
{
"epoch": 1.744186046511628,
"grad_norm": 7.954046726226807,
"learning_rate": 8.933048813016804e-06,
"loss": 0.8364,
"step": 4650
},
{
"epoch": 1.7479369842460615,
"grad_norm": 14.886021614074707,
"learning_rate": 8.906375033342225e-06,
"loss": 0.7641,
"step": 4660
},
{
"epoch": 1.751687921980495,
"grad_norm": 15.42341136932373,
"learning_rate": 8.879701253667645e-06,
"loss": 0.7152,
"step": 4670
},
{
"epoch": 1.7554388597149289,
"grad_norm": 18.62801742553711,
"learning_rate": 8.853027473993066e-06,
"loss": 0.9192,
"step": 4680
},
{
"epoch": 1.7591897974493622,
"grad_norm": 9.787707328796387,
"learning_rate": 8.826353694318486e-06,
"loss": 1.0479,
"step": 4690
},
{
"epoch": 1.762940735183796,
"grad_norm": 10.803950309753418,
"learning_rate": 8.799679914643906e-06,
"loss": 0.7217,
"step": 4700
},
{
"epoch": 1.7666916729182296,
"grad_norm": 5.519962787628174,
"learning_rate": 8.773006134969327e-06,
"loss": 0.8044,
"step": 4710
},
{
"epoch": 1.7704426106526632,
"grad_norm": 10.77694320678711,
"learning_rate": 8.746332355294745e-06,
"loss": 0.7759,
"step": 4720
},
{
"epoch": 1.7741935483870968,
"grad_norm": 8.671502113342285,
"learning_rate": 8.719658575620166e-06,
"loss": 0.834,
"step": 4730
},
{
"epoch": 1.7779444861215303,
"grad_norm": 10.25809097290039,
"learning_rate": 8.692984795945586e-06,
"loss": 0.7088,
"step": 4740
},
{
"epoch": 1.7816954238559641,
"grad_norm": 11.049978256225586,
"learning_rate": 8.666311016271007e-06,
"loss": 0.8777,
"step": 4750
},
{
"epoch": 1.7854463615903975,
"grad_norm": 6.090721130371094,
"learning_rate": 8.639637236596427e-06,
"loss": 0.8311,
"step": 4760
},
{
"epoch": 1.7891972993248313,
"grad_norm": 7.393324375152588,
"learning_rate": 8.612963456921846e-06,
"loss": 0.7623,
"step": 4770
},
{
"epoch": 1.7929482370592649,
"grad_norm": 9.985932350158691,
"learning_rate": 8.586289677247268e-06,
"loss": 0.7164,
"step": 4780
},
{
"epoch": 1.7966991747936985,
"grad_norm": 23.15224266052246,
"learning_rate": 8.559615897572687e-06,
"loss": 0.8139,
"step": 4790
},
{
"epoch": 1.800450112528132,
"grad_norm": 15.539804458618164,
"learning_rate": 8.532942117898107e-06,
"loss": 0.9038,
"step": 4800
},
{
"epoch": 1.8042010502625656,
"grad_norm": 20.424936294555664,
"learning_rate": 8.506268338223526e-06,
"loss": 0.9228,
"step": 4810
},
{
"epoch": 1.8079519879969994,
"grad_norm": 12.960927963256836,
"learning_rate": 8.479594558548948e-06,
"loss": 0.8129,
"step": 4820
},
{
"epoch": 1.8117029257314328,
"grad_norm": 12.578907012939453,
"learning_rate": 8.452920778874367e-06,
"loss": 0.7919,
"step": 4830
},
{
"epoch": 1.8154538634658666,
"grad_norm": 9.88344955444336,
"learning_rate": 8.426246999199787e-06,
"loss": 0.8888,
"step": 4840
},
{
"epoch": 1.8192048012003,
"grad_norm": 9.531432151794434,
"learning_rate": 8.399573219525208e-06,
"loss": 0.8074,
"step": 4850
},
{
"epoch": 1.8229557389347337,
"grad_norm": 10.701923370361328,
"learning_rate": 8.372899439850628e-06,
"loss": 0.8598,
"step": 4860
},
{
"epoch": 1.8267066766691673,
"grad_norm": 10.894915580749512,
"learning_rate": 8.346225660176047e-06,
"loss": 0.6588,
"step": 4870
},
{
"epoch": 1.8304576144036009,
"grad_norm": 9.2036714553833,
"learning_rate": 8.319551880501467e-06,
"loss": 0.8323,
"step": 4880
},
{
"epoch": 1.8342085521380345,
"grad_norm": 8.6634521484375,
"learning_rate": 8.292878100826888e-06,
"loss": 0.7526,
"step": 4890
},
{
"epoch": 1.837959489872468,
"grad_norm": 14.781025886535645,
"learning_rate": 8.266204321152308e-06,
"loss": 0.6999,
"step": 4900
},
{
"epoch": 1.8417104276069018,
"grad_norm": 12.273209571838379,
"learning_rate": 8.239530541477728e-06,
"loss": 0.6734,
"step": 4910
},
{
"epoch": 1.8454613653413352,
"grad_norm": 11.974825859069824,
"learning_rate": 8.212856761803149e-06,
"loss": 0.7195,
"step": 4920
},
{
"epoch": 1.849212303075769,
"grad_norm": 12.195642471313477,
"learning_rate": 8.186182982128569e-06,
"loss": 0.8301,
"step": 4930
},
{
"epoch": 1.8529632408102026,
"grad_norm": 6.2414751052856445,
"learning_rate": 8.159509202453988e-06,
"loss": 0.8528,
"step": 4940
},
{
"epoch": 1.8567141785446362,
"grad_norm": 9.026991844177246,
"learning_rate": 8.132835422779408e-06,
"loss": 0.8165,
"step": 4950
},
{
"epoch": 1.8604651162790697,
"grad_norm": 13.745824813842773,
"learning_rate": 8.106161643104829e-06,
"loss": 0.9866,
"step": 4960
},
{
"epoch": 1.8642160540135033,
"grad_norm": 8.861783027648926,
"learning_rate": 8.079487863430249e-06,
"loss": 0.9738,
"step": 4970
},
{
"epoch": 1.8679669917479371,
"grad_norm": 7.437354564666748,
"learning_rate": 8.052814083755668e-06,
"loss": 0.7223,
"step": 4980
},
{
"epoch": 1.8717179294823705,
"grad_norm": 14.148890495300293,
"learning_rate": 8.02614030408109e-06,
"loss": 0.8356,
"step": 4990
},
{
"epoch": 1.8754688672168043,
"grad_norm": 13.688013076782227,
"learning_rate": 7.99946652440651e-06,
"loss": 0.8949,
"step": 5000
},
{
"epoch": 1.8792198049512379,
"grad_norm": 16.709125518798828,
"learning_rate": 7.972792744731929e-06,
"loss": 0.8775,
"step": 5010
},
{
"epoch": 1.8829707426856714,
"grad_norm": 9.73661994934082,
"learning_rate": 7.946118965057348e-06,
"loss": 0.832,
"step": 5020
},
{
"epoch": 1.886721680420105,
"grad_norm": 10.575983047485352,
"learning_rate": 7.91944518538277e-06,
"loss": 0.7976,
"step": 5030
},
{
"epoch": 1.8904726181545386,
"grad_norm": 9.284303665161133,
"learning_rate": 7.89277140570819e-06,
"loss": 0.9656,
"step": 5040
},
{
"epoch": 1.8942235558889724,
"grad_norm": 6.543034553527832,
"learning_rate": 7.866097626033609e-06,
"loss": 0.7157,
"step": 5050
},
{
"epoch": 1.8979744936234058,
"grad_norm": 5.064873218536377,
"learning_rate": 7.83942384635903e-06,
"loss": 0.7334,
"step": 5060
},
{
"epoch": 1.9017254313578396,
"grad_norm": 16.654563903808594,
"learning_rate": 7.81275006668445e-06,
"loss": 0.7677,
"step": 5070
},
{
"epoch": 1.905476369092273,
"grad_norm": 20.614212036132812,
"learning_rate": 7.78607628700987e-06,
"loss": 0.8525,
"step": 5080
},
{
"epoch": 1.9092273068267067,
"grad_norm": 13.709310531616211,
"learning_rate": 7.75940250733529e-06,
"loss": 0.7237,
"step": 5090
},
{
"epoch": 1.9129782445611403,
"grad_norm": 17.662317276000977,
"learning_rate": 7.73272872766071e-06,
"loss": 0.8529,
"step": 5100
},
{
"epoch": 1.9167291822955739,
"grad_norm": 9.610177040100098,
"learning_rate": 7.70605494798613e-06,
"loss": 0.9447,
"step": 5110
},
{
"epoch": 1.9204801200300075,
"grad_norm": 19.19601821899414,
"learning_rate": 7.67938116831155e-06,
"loss": 0.8738,
"step": 5120
},
{
"epoch": 1.924231057764441,
"grad_norm": 8.228813171386719,
"learning_rate": 7.652707388636971e-06,
"loss": 0.8096,
"step": 5130
},
{
"epoch": 1.9279819954988748,
"grad_norm": 14.475564956665039,
"learning_rate": 7.626033608962391e-06,
"loss": 0.7235,
"step": 5140
},
{
"epoch": 1.9317329332333082,
"grad_norm": 17.313648223876953,
"learning_rate": 7.599359829287811e-06,
"loss": 0.7778,
"step": 5150
},
{
"epoch": 1.935483870967742,
"grad_norm": 6.775811672210693,
"learning_rate": 7.572686049613231e-06,
"loss": 0.7627,
"step": 5160
},
{
"epoch": 1.9392348087021756,
"grad_norm": 11.815681457519531,
"learning_rate": 7.54601226993865e-06,
"loss": 0.8978,
"step": 5170
},
{
"epoch": 1.9429857464366092,
"grad_norm": 13.653975486755371,
"learning_rate": 7.519338490264071e-06,
"loss": 0.7364,
"step": 5180
},
{
"epoch": 1.9467366841710427,
"grad_norm": 9.049905776977539,
"learning_rate": 7.492664710589491e-06,
"loss": 0.8631,
"step": 5190
},
{
"epoch": 1.9504876219054763,
"grad_norm": 14.149343490600586,
"learning_rate": 7.465990930914912e-06,
"loss": 0.8279,
"step": 5200
},
{
"epoch": 1.9542385596399101,
"grad_norm": 15.612215995788574,
"learning_rate": 7.439317151240331e-06,
"loss": 0.9058,
"step": 5210
},
{
"epoch": 1.9579894973743435,
"grad_norm": 11.682372093200684,
"learning_rate": 7.412643371565752e-06,
"loss": 0.8859,
"step": 5220
},
{
"epoch": 1.9617404351087773,
"grad_norm": 9.87074089050293,
"learning_rate": 7.385969591891171e-06,
"loss": 0.8733,
"step": 5230
},
{
"epoch": 1.9654913728432108,
"grad_norm": 9.963356971740723,
"learning_rate": 7.359295812216591e-06,
"loss": 0.7134,
"step": 5240
},
{
"epoch": 1.9692423105776444,
"grad_norm": 4.6800537109375,
"learning_rate": 7.3326220325420115e-06,
"loss": 0.7594,
"step": 5250
},
{
"epoch": 1.972993248312078,
"grad_norm": 13.148963928222656,
"learning_rate": 7.305948252867432e-06,
"loss": 0.947,
"step": 5260
},
{
"epoch": 1.9767441860465116,
"grad_norm": 10.073929786682129,
"learning_rate": 7.279274473192852e-06,
"loss": 0.8769,
"step": 5270
},
{
"epoch": 1.9804951237809454,
"grad_norm": 11.67326831817627,
"learning_rate": 7.252600693518272e-06,
"loss": 0.7545,
"step": 5280
},
{
"epoch": 1.9842460615153787,
"grad_norm": 7.498824119567871,
"learning_rate": 7.2259269138436925e-06,
"loss": 0.7997,
"step": 5290
},
{
"epoch": 1.9879969992498125,
"grad_norm": 9.357927322387695,
"learning_rate": 7.199253134169112e-06,
"loss": 0.8754,
"step": 5300
},
{
"epoch": 1.991747936984246,
"grad_norm": 12.50817584991455,
"learning_rate": 7.172579354494532e-06,
"loss": 0.79,
"step": 5310
},
{
"epoch": 1.9954988747186797,
"grad_norm": 14.613991737365723,
"learning_rate": 7.145905574819952e-06,
"loss": 0.8005,
"step": 5320
},
{
"epoch": 1.9992498124531133,
"grad_norm": 9.007129669189453,
"learning_rate": 7.119231795145373e-06,
"loss": 0.9009,
"step": 5330
},
{
"epoch": 2.0,
"eval_accuracy": 0.580168776371308,
"eval_f1_macro": 0.5788189436128865,
"eval_f1_weighted": 0.5800618837244829,
"eval_loss": 0.9064968228340149,
"eval_precision_macro": 0.5789782500874713,
"eval_precision_weighted": 0.5804785651892536,
"eval_recall_macro": 0.5792145494510413,
"eval_recall_weighted": 0.580168776371308,
"eval_runtime": 4.8637,
"eval_samples_per_second": 487.284,
"eval_steps_per_second": 61.065,
"step": 5332
},
{
"epoch": 2.003000750187547,
"grad_norm": 10.368429183959961,
"learning_rate": 7.092558015470793e-06,
"loss": 0.6842,
"step": 5340
},
{
"epoch": 2.0067516879219807,
"grad_norm": 10.329928398132324,
"learning_rate": 7.065884235796214e-06,
"loss": 0.7709,
"step": 5350
},
{
"epoch": 2.010502625656414,
"grad_norm": 13.128575325012207,
"learning_rate": 7.039210456121633e-06,
"loss": 0.7054,
"step": 5360
},
{
"epoch": 2.014253563390848,
"grad_norm": 10.884894371032715,
"learning_rate": 7.012536676447053e-06,
"loss": 0.8103,
"step": 5370
},
{
"epoch": 2.018004501125281,
"grad_norm": 17.327537536621094,
"learning_rate": 6.985862896772473e-06,
"loss": 0.6551,
"step": 5380
},
{
"epoch": 2.021755438859715,
"grad_norm": 9.725515365600586,
"learning_rate": 6.959189117097893e-06,
"loss": 0.6534,
"step": 5390
},
{
"epoch": 2.0255063765941483,
"grad_norm": 9.302525520324707,
"learning_rate": 6.932515337423313e-06,
"loss": 0.6741,
"step": 5400
},
{
"epoch": 2.029257314328582,
"grad_norm": 12.362338066101074,
"learning_rate": 6.905841557748734e-06,
"loss": 0.7026,
"step": 5410
},
{
"epoch": 2.033008252063016,
"grad_norm": 7.654306411743164,
"learning_rate": 6.879167778074154e-06,
"loss": 0.5962,
"step": 5420
},
{
"epoch": 2.0367591897974493,
"grad_norm": 14.547067642211914,
"learning_rate": 6.852493998399574e-06,
"loss": 0.5578,
"step": 5430
},
{
"epoch": 2.040510127531883,
"grad_norm": 12.792427062988281,
"learning_rate": 6.8258202187249935e-06,
"loss": 0.7636,
"step": 5440
},
{
"epoch": 2.0442610652663165,
"grad_norm": 8.322968482971191,
"learning_rate": 6.799146439050414e-06,
"loss": 0.5881,
"step": 5450
},
{
"epoch": 2.0480120030007503,
"grad_norm": 14.064526557922363,
"learning_rate": 6.772472659375834e-06,
"loss": 0.6907,
"step": 5460
},
{
"epoch": 2.0517629407351836,
"grad_norm": 11.318249702453613,
"learning_rate": 6.745798879701254e-06,
"loss": 0.6179,
"step": 5470
},
{
"epoch": 2.0555138784696174,
"grad_norm": 7.615289688110352,
"learning_rate": 6.7191251000266745e-06,
"loss": 0.5912,
"step": 5480
},
{
"epoch": 2.059264816204051,
"grad_norm": 20.249950408935547,
"learning_rate": 6.692451320352095e-06,
"loss": 0.7777,
"step": 5490
},
{
"epoch": 2.0630157539384846,
"grad_norm": 13.289349555969238,
"learning_rate": 6.665777540677515e-06,
"loss": 0.6271,
"step": 5500
},
{
"epoch": 2.0667666916729184,
"grad_norm": 14.625772476196289,
"learning_rate": 6.639103761002935e-06,
"loss": 0.7248,
"step": 5510
},
{
"epoch": 2.0705176294073517,
"grad_norm": 14.428004264831543,
"learning_rate": 6.612429981328355e-06,
"loss": 0.6791,
"step": 5520
},
{
"epoch": 2.0742685671417855,
"grad_norm": 21.052837371826172,
"learning_rate": 6.585756201653774e-06,
"loss": 0.6244,
"step": 5530
},
{
"epoch": 2.078019504876219,
"grad_norm": 17.523300170898438,
"learning_rate": 6.559082421979195e-06,
"loss": 0.6498,
"step": 5540
},
{
"epoch": 2.0817704426106527,
"grad_norm": 9.524145126342773,
"learning_rate": 6.532408642304615e-06,
"loss": 0.7792,
"step": 5550
},
{
"epoch": 2.085521380345086,
"grad_norm": 14.92676830291748,
"learning_rate": 6.505734862630036e-06,
"loss": 0.5748,
"step": 5560
},
{
"epoch": 2.08927231807952,
"grad_norm": 18.87467384338379,
"learning_rate": 6.479061082955455e-06,
"loss": 0.7199,
"step": 5570
},
{
"epoch": 2.0930232558139537,
"grad_norm": 10.356287002563477,
"learning_rate": 6.452387303280876e-06,
"loss": 0.7016,
"step": 5580
},
{
"epoch": 2.096774193548387,
"grad_norm": 11.189599990844727,
"learning_rate": 6.425713523606295e-06,
"loss": 0.6511,
"step": 5590
},
{
"epoch": 2.100525131282821,
"grad_norm": 12.267254829406738,
"learning_rate": 6.399039743931715e-06,
"loss": 0.6421,
"step": 5600
},
{
"epoch": 2.104276069017254,
"grad_norm": 19.524673461914062,
"learning_rate": 6.3723659642571354e-06,
"loss": 0.6963,
"step": 5610
},
{
"epoch": 2.108027006751688,
"grad_norm": 13.466742515563965,
"learning_rate": 6.345692184582556e-06,
"loss": 0.6727,
"step": 5620
},
{
"epoch": 2.1117779444861213,
"grad_norm": 20.707855224609375,
"learning_rate": 6.319018404907976e-06,
"loss": 0.6695,
"step": 5630
},
{
"epoch": 2.115528882220555,
"grad_norm": 15.425350189208984,
"learning_rate": 6.292344625233396e-06,
"loss": 0.673,
"step": 5640
},
{
"epoch": 2.119279819954989,
"grad_norm": 5.349853038787842,
"learning_rate": 6.2656708455588164e-06,
"loss": 0.6275,
"step": 5650
},
{
"epoch": 2.1230307576894223,
"grad_norm": 13.552290916442871,
"learning_rate": 6.238997065884236e-06,
"loss": 0.6945,
"step": 5660
},
{
"epoch": 2.126781695423856,
"grad_norm": 17.840105056762695,
"learning_rate": 6.212323286209656e-06,
"loss": 0.8054,
"step": 5670
},
{
"epoch": 2.1305326331582894,
"grad_norm": 21.012237548828125,
"learning_rate": 6.185649506535076e-06,
"loss": 0.7306,
"step": 5680
},
{
"epoch": 2.1342835708927232,
"grad_norm": 13.1303129196167,
"learning_rate": 6.158975726860497e-06,
"loss": 0.871,
"step": 5690
},
{
"epoch": 2.1380345086271566,
"grad_norm": 11.506791114807129,
"learning_rate": 6.132301947185917e-06,
"loss": 0.6722,
"step": 5700
},
{
"epoch": 2.1417854463615904,
"grad_norm": 9.709290504455566,
"learning_rate": 6.1056281675113375e-06,
"loss": 0.6695,
"step": 5710
},
{
"epoch": 2.145536384096024,
"grad_norm": 8.551689147949219,
"learning_rate": 6.078954387836757e-06,
"loss": 0.7001,
"step": 5720
},
{
"epoch": 2.1492873218304576,
"grad_norm": 12.69763469696045,
"learning_rate": 6.052280608162177e-06,
"loss": 0.6778,
"step": 5730
},
{
"epoch": 2.1530382595648914,
"grad_norm": 10.49093246459961,
"learning_rate": 6.025606828487597e-06,
"loss": 0.6671,
"step": 5740
},
{
"epoch": 2.1567891972993247,
"grad_norm": 7.214636325836182,
"learning_rate": 5.998933048813017e-06,
"loss": 0.616,
"step": 5750
},
{
"epoch": 2.1605401350337585,
"grad_norm": 8.58086109161377,
"learning_rate": 5.972259269138437e-06,
"loss": 0.6024,
"step": 5760
},
{
"epoch": 2.164291072768192,
"grad_norm": 7.856104373931885,
"learning_rate": 5.945585489463858e-06,
"loss": 0.6202,
"step": 5770
},
{
"epoch": 2.1680420105026257,
"grad_norm": 6.472407341003418,
"learning_rate": 5.918911709789278e-06,
"loss": 0.6141,
"step": 5780
},
{
"epoch": 2.1717929482370595,
"grad_norm": 6.612668991088867,
"learning_rate": 5.892237930114698e-06,
"loss": 0.7841,
"step": 5790
},
{
"epoch": 2.175543885971493,
"grad_norm": 9.869592666625977,
"learning_rate": 5.865564150440118e-06,
"loss": 0.5949,
"step": 5800
},
{
"epoch": 2.1792948237059266,
"grad_norm": 12.85415267944336,
"learning_rate": 5.838890370765538e-06,
"loss": 0.663,
"step": 5810
},
{
"epoch": 2.18304576144036,
"grad_norm": 22.380807876586914,
"learning_rate": 5.8122165910909575e-06,
"loss": 0.6532,
"step": 5820
},
{
"epoch": 2.186796699174794,
"grad_norm": 23.866607666015625,
"learning_rate": 5.785542811416378e-06,
"loss": 0.6704,
"step": 5830
},
{
"epoch": 2.190547636909227,
"grad_norm": 12.608299255371094,
"learning_rate": 5.7588690317417985e-06,
"loss": 0.6231,
"step": 5840
},
{
"epoch": 2.194298574643661,
"grad_norm": 27.60419464111328,
"learning_rate": 5.732195252067219e-06,
"loss": 0.6369,
"step": 5850
},
{
"epoch": 2.1980495123780943,
"grad_norm": 10.39966869354248,
"learning_rate": 5.7055214723926385e-06,
"loss": 0.5964,
"step": 5860
},
{
"epoch": 2.201800450112528,
"grad_norm": 23.611059188842773,
"learning_rate": 5.678847692718059e-06,
"loss": 0.7365,
"step": 5870
},
{
"epoch": 2.205551387846962,
"grad_norm": 10.59642505645752,
"learning_rate": 5.652173913043479e-06,
"loss": 0.6305,
"step": 5880
},
{
"epoch": 2.2093023255813953,
"grad_norm": 15.549806594848633,
"learning_rate": 5.625500133368898e-06,
"loss": 0.624,
"step": 5890
},
{
"epoch": 2.213053263315829,
"grad_norm": 17.546363830566406,
"learning_rate": 5.598826353694319e-06,
"loss": 0.7103,
"step": 5900
},
{
"epoch": 2.2168042010502624,
"grad_norm": 19.833606719970703,
"learning_rate": 5.572152574019739e-06,
"loss": 0.4821,
"step": 5910
},
{
"epoch": 2.2205551387846962,
"grad_norm": 18.05365562438965,
"learning_rate": 5.54547879434516e-06,
"loss": 0.6953,
"step": 5920
},
{
"epoch": 2.2243060765191296,
"grad_norm": 3.1533432006835938,
"learning_rate": 5.518805014670579e-06,
"loss": 0.6899,
"step": 5930
},
{
"epoch": 2.2280570142535634,
"grad_norm": 21.84452247619629,
"learning_rate": 5.492131234996e-06,
"loss": 0.8146,
"step": 5940
},
{
"epoch": 2.231807951987997,
"grad_norm": 20.791135787963867,
"learning_rate": 5.465457455321419e-06,
"loss": 0.4915,
"step": 5950
},
{
"epoch": 2.2355588897224306,
"grad_norm": 16.44775390625,
"learning_rate": 5.438783675646839e-06,
"loss": 0.5946,
"step": 5960
},
{
"epoch": 2.2393098274568644,
"grad_norm": 8.386981964111328,
"learning_rate": 5.412109895972259e-06,
"loss": 0.7348,
"step": 5970
},
{
"epoch": 2.2430607651912977,
"grad_norm": 26.47071075439453,
"learning_rate": 5.38543611629768e-06,
"loss": 0.6261,
"step": 5980
},
{
"epoch": 2.2468117029257315,
"grad_norm": 11.219141960144043,
"learning_rate": 5.3587623366231e-06,
"loss": 0.5324,
"step": 5990
},
{
"epoch": 2.250562640660165,
"grad_norm": 15.969422340393066,
"learning_rate": 5.33208855694852e-06,
"loss": 0.7459,
"step": 6000
},
{
"epoch": 2.2543135783945987,
"grad_norm": 20.990497589111328,
"learning_rate": 5.30541477727394e-06,
"loss": 0.5593,
"step": 6010
},
{
"epoch": 2.258064516129032,
"grad_norm": 10.82603645324707,
"learning_rate": 5.27874099759936e-06,
"loss": 0.6698,
"step": 6020
},
{
"epoch": 2.261815453863466,
"grad_norm": 19.865243911743164,
"learning_rate": 5.25206721792478e-06,
"loss": 0.732,
"step": 6030
},
{
"epoch": 2.2655663915978996,
"grad_norm": 25.37660026550293,
"learning_rate": 5.2253934382502e-06,
"loss": 0.5585,
"step": 6040
},
{
"epoch": 2.269317329332333,
"grad_norm": 19.796749114990234,
"learning_rate": 5.1987196585756205e-06,
"loss": 0.7108,
"step": 6050
},
{
"epoch": 2.273068267066767,
"grad_norm": 12.207030296325684,
"learning_rate": 5.172045878901041e-06,
"loss": 0.6683,
"step": 6060
},
{
"epoch": 2.2768192048012,
"grad_norm": 20.979265213012695,
"learning_rate": 5.1453720992264615e-06,
"loss": 0.6962,
"step": 6070
},
{
"epoch": 2.280570142535634,
"grad_norm": 13.058587074279785,
"learning_rate": 5.118698319551881e-06,
"loss": 0.6119,
"step": 6080
},
{
"epoch": 2.2843210802700673,
"grad_norm": 7.18276309967041,
"learning_rate": 5.092024539877301e-06,
"loss": 0.606,
"step": 6090
},
{
"epoch": 2.288072018004501,
"grad_norm": 21.568151473999023,
"learning_rate": 5.065350760202721e-06,
"loss": 0.6909,
"step": 6100
},
{
"epoch": 2.291822955738935,
"grad_norm": 28.49129867553711,
"learning_rate": 5.038676980528141e-06,
"loss": 0.6764,
"step": 6110
},
{
"epoch": 2.2955738934733683,
"grad_norm": 12.39367389678955,
"learning_rate": 5.012003200853561e-06,
"loss": 0.753,
"step": 6120
},
{
"epoch": 2.299324831207802,
"grad_norm": 17.55943489074707,
"learning_rate": 4.985329421178982e-06,
"loss": 0.7103,
"step": 6130
},
{
"epoch": 2.3030757689422354,
"grad_norm": 16.813745498657227,
"learning_rate": 4.958655641504402e-06,
"loss": 0.645,
"step": 6140
},
{
"epoch": 2.3068267066766692,
"grad_norm": 20.711591720581055,
"learning_rate": 4.931981861829822e-06,
"loss": 0.6337,
"step": 6150
},
{
"epoch": 2.3105776444111026,
"grad_norm": 5.449891567230225,
"learning_rate": 4.905308082155241e-06,
"loss": 0.6224,
"step": 6160
},
{
"epoch": 2.3143285821455364,
"grad_norm": 15.508672714233398,
"learning_rate": 4.878634302480662e-06,
"loss": 0.6718,
"step": 6170
},
{
"epoch": 2.31807951987997,
"grad_norm": 12.16860294342041,
"learning_rate": 4.8519605228060815e-06,
"loss": 0.6044,
"step": 6180
},
{
"epoch": 2.3218304576144035,
"grad_norm": 16.671234130859375,
"learning_rate": 4.825286743131502e-06,
"loss": 0.7397,
"step": 6190
},
{
"epoch": 2.3255813953488373,
"grad_norm": 27.95615577697754,
"learning_rate": 4.798612963456922e-06,
"loss": 0.6451,
"step": 6200
},
{
"epoch": 2.3293323330832707,
"grad_norm": 23.62805938720703,
"learning_rate": 4.771939183782343e-06,
"loss": 0.7978,
"step": 6210
},
{
"epoch": 2.3330832708177045,
"grad_norm": 17.226280212402344,
"learning_rate": 4.7452654041077625e-06,
"loss": 0.6442,
"step": 6220
},
{
"epoch": 2.336834208552138,
"grad_norm": 22.371273040771484,
"learning_rate": 4.718591624433183e-06,
"loss": 0.6885,
"step": 6230
},
{
"epoch": 2.3405851462865717,
"grad_norm": 12.560019493103027,
"learning_rate": 4.6919178447586026e-06,
"loss": 0.6033,
"step": 6240
},
{
"epoch": 2.3443360840210055,
"grad_norm": 14.103109359741211,
"learning_rate": 4.665244065084023e-06,
"loss": 0.6683,
"step": 6250
},
{
"epoch": 2.348087021755439,
"grad_norm": 11.051913261413574,
"learning_rate": 4.638570285409443e-06,
"loss": 0.7092,
"step": 6260
},
{
"epoch": 2.3518379594898726,
"grad_norm": 15.613760948181152,
"learning_rate": 4.611896505734863e-06,
"loss": 0.6974,
"step": 6270
},
{
"epoch": 2.355588897224306,
"grad_norm": 19.85428237915039,
"learning_rate": 4.5852227260602836e-06,
"loss": 0.6637,
"step": 6280
},
{
"epoch": 2.35933983495874,
"grad_norm": 15.703207015991211,
"learning_rate": 4.558548946385703e-06,
"loss": 0.6508,
"step": 6290
},
{
"epoch": 2.363090772693173,
"grad_norm": 11.342123985290527,
"learning_rate": 4.531875166711124e-06,
"loss": 0.7348,
"step": 6300
},
{
"epoch": 2.366841710427607,
"grad_norm": 11.049941062927246,
"learning_rate": 4.505201387036543e-06,
"loss": 0.6421,
"step": 6310
},
{
"epoch": 2.3705926481620407,
"grad_norm": 24.488731384277344,
"learning_rate": 4.478527607361964e-06,
"loss": 0.7123,
"step": 6320
},
{
"epoch": 2.374343585896474,
"grad_norm": 14.967778205871582,
"learning_rate": 4.451853827687383e-06,
"loss": 0.7142,
"step": 6330
},
{
"epoch": 2.378094523630908,
"grad_norm": 9.328021049499512,
"learning_rate": 4.425180048012804e-06,
"loss": 0.6251,
"step": 6340
},
{
"epoch": 2.3818454613653413,
"grad_norm": 17.42303466796875,
"learning_rate": 4.398506268338224e-06,
"loss": 0.6355,
"step": 6350
},
{
"epoch": 2.385596399099775,
"grad_norm": 15.201652526855469,
"learning_rate": 4.371832488663644e-06,
"loss": 0.7441,
"step": 6360
},
{
"epoch": 2.3893473368342084,
"grad_norm": 23.0561466217041,
"learning_rate": 4.345158708989064e-06,
"loss": 0.6641,
"step": 6370
},
{
"epoch": 2.393098274568642,
"grad_norm": 14.52270221710205,
"learning_rate": 4.318484929314484e-06,
"loss": 0.7073,
"step": 6380
},
{
"epoch": 2.396849212303076,
"grad_norm": 13.747902870178223,
"learning_rate": 4.291811149639904e-06,
"loss": 0.81,
"step": 6390
},
{
"epoch": 2.4006001500375094,
"grad_norm": 14.231673240661621,
"learning_rate": 4.265137369965324e-06,
"loss": 0.6939,
"step": 6400
},
{
"epoch": 2.404351087771943,
"grad_norm": 7.63701057434082,
"learning_rate": 4.2384635902907445e-06,
"loss": 0.6873,
"step": 6410
},
{
"epoch": 2.4081020255063765,
"grad_norm": 20.752126693725586,
"learning_rate": 4.211789810616165e-06,
"loss": 0.571,
"step": 6420
},
{
"epoch": 2.4118529632408103,
"grad_norm": 13.460418701171875,
"learning_rate": 4.185116030941585e-06,
"loss": 0.6506,
"step": 6430
},
{
"epoch": 2.4156039009752437,
"grad_norm": 8.838345527648926,
"learning_rate": 4.158442251267005e-06,
"loss": 0.5745,
"step": 6440
},
{
"epoch": 2.4193548387096775,
"grad_norm": 10.570659637451172,
"learning_rate": 4.131768471592425e-06,
"loss": 0.6607,
"step": 6450
},
{
"epoch": 2.423105776444111,
"grad_norm": 12.49052619934082,
"learning_rate": 4.105094691917845e-06,
"loss": 0.5026,
"step": 6460
},
{
"epoch": 2.4268567141785446,
"grad_norm": 9.46437931060791,
"learning_rate": 4.078420912243265e-06,
"loss": 0.6005,
"step": 6470
},
{
"epoch": 2.430607651912978,
"grad_norm": 29.9566593170166,
"learning_rate": 4.051747132568685e-06,
"loss": 0.6292,
"step": 6480
},
{
"epoch": 2.434358589647412,
"grad_norm": 12.318580627441406,
"learning_rate": 4.025073352894106e-06,
"loss": 0.7056,
"step": 6490
},
{
"epoch": 2.4381095273818456,
"grad_norm": 20.635848999023438,
"learning_rate": 3.998399573219526e-06,
"loss": 0.6663,
"step": 6500
},
{
"epoch": 2.441860465116279,
"grad_norm": 13.231310844421387,
"learning_rate": 3.971725793544946e-06,
"loss": 0.7405,
"step": 6510
},
{
"epoch": 2.4456114028507128,
"grad_norm": 16.560197830200195,
"learning_rate": 3.945052013870365e-06,
"loss": 0.6678,
"step": 6520
},
{
"epoch": 2.449362340585146,
"grad_norm": 21.45167350769043,
"learning_rate": 3.918378234195786e-06,
"loss": 0.6032,
"step": 6530
},
{
"epoch": 2.45311327831958,
"grad_norm": 37.360843658447266,
"learning_rate": 3.891704454521205e-06,
"loss": 0.8438,
"step": 6540
},
{
"epoch": 2.4568642160540133,
"grad_norm": 30.98585319519043,
"learning_rate": 3.865030674846626e-06,
"loss": 0.6035,
"step": 6550
},
{
"epoch": 2.460615153788447,
"grad_norm": 13.408466339111328,
"learning_rate": 3.838356895172046e-06,
"loss": 0.5181,
"step": 6560
},
{
"epoch": 2.464366091522881,
"grad_norm": 16.84627914428711,
"learning_rate": 3.8116831154974664e-06,
"loss": 0.6353,
"step": 6570
},
{
"epoch": 2.4681170292573142,
"grad_norm": 19.02153968811035,
"learning_rate": 3.785009335822886e-06,
"loss": 0.6052,
"step": 6580
},
{
"epoch": 2.471867966991748,
"grad_norm": 13.263850212097168,
"learning_rate": 3.7583355561483065e-06,
"loss": 0.8126,
"step": 6590
},
{
"epoch": 2.4756189047261814,
"grad_norm": 22.753215789794922,
"learning_rate": 3.731661776473727e-06,
"loss": 0.6449,
"step": 6600
},
{
"epoch": 2.479369842460615,
"grad_norm": 13.979212760925293,
"learning_rate": 3.704987996799147e-06,
"loss": 0.7421,
"step": 6610
},
{
"epoch": 2.4831207801950486,
"grad_norm": 23.614389419555664,
"learning_rate": 3.6783142171245666e-06,
"loss": 0.8168,
"step": 6620
},
{
"epoch": 2.4868717179294824,
"grad_norm": 7.810019493103027,
"learning_rate": 3.651640437449987e-06,
"loss": 0.6301,
"step": 6630
},
{
"epoch": 2.490622655663916,
"grad_norm": 17.90605926513672,
"learning_rate": 3.624966657775407e-06,
"loss": 0.6369,
"step": 6640
},
{
"epoch": 2.4943735933983495,
"grad_norm": 10.375251770019531,
"learning_rate": 3.598292878100827e-06,
"loss": 0.6254,
"step": 6650
},
{
"epoch": 2.4981245311327833,
"grad_norm": 15.813028335571289,
"learning_rate": 3.571619098426247e-06,
"loss": 0.7866,
"step": 6660
},
{
"epoch": 2.5018754688672167,
"grad_norm": 8.438957214355469,
"learning_rate": 3.5449453187516676e-06,
"loss": 0.7288,
"step": 6670
},
{
"epoch": 2.5056264066016505,
"grad_norm": 23.076040267944336,
"learning_rate": 3.5182715390770877e-06,
"loss": 0.6743,
"step": 6680
},
{
"epoch": 2.509377344336084,
"grad_norm": 14.966166496276855,
"learning_rate": 3.4915977594025073e-06,
"loss": 0.6408,
"step": 6690
},
{
"epoch": 2.5131282820705176,
"grad_norm": 19.553081512451172,
"learning_rate": 3.4649239797279277e-06,
"loss": 0.613,
"step": 6700
},
{
"epoch": 2.5168792198049514,
"grad_norm": 12.050764083862305,
"learning_rate": 3.4382502000533478e-06,
"loss": 0.6547,
"step": 6710
},
{
"epoch": 2.520630157539385,
"grad_norm": 14.52085018157959,
"learning_rate": 3.411576420378768e-06,
"loss": 0.7239,
"step": 6720
},
{
"epoch": 2.5243810952738186,
"grad_norm": 20.222137451171875,
"learning_rate": 3.384902640704188e-06,
"loss": 0.7656,
"step": 6730
},
{
"epoch": 2.528132033008252,
"grad_norm": 14.729280471801758,
"learning_rate": 3.3582288610296083e-06,
"loss": 0.6013,
"step": 6740
},
{
"epoch": 2.5318829707426858,
"grad_norm": 21.984832763671875,
"learning_rate": 3.3315550813550284e-06,
"loss": 0.6453,
"step": 6750
},
{
"epoch": 2.535633908477119,
"grad_norm": 19.643138885498047,
"learning_rate": 3.304881301680448e-06,
"loss": 0.8221,
"step": 6760
},
{
"epoch": 2.539384846211553,
"grad_norm": 17.281740188598633,
"learning_rate": 3.2782075220058684e-06,
"loss": 0.6348,
"step": 6770
},
{
"epoch": 2.5431357839459867,
"grad_norm": 17.821035385131836,
"learning_rate": 3.251533742331289e-06,
"loss": 0.5855,
"step": 6780
},
{
"epoch": 2.54688672168042,
"grad_norm": 14.015131950378418,
"learning_rate": 3.224859962656709e-06,
"loss": 0.5544,
"step": 6790
},
{
"epoch": 2.550637659414854,
"grad_norm": 10.391494750976562,
"learning_rate": 3.1981861829821286e-06,
"loss": 0.588,
"step": 6800
},
{
"epoch": 2.5543885971492872,
"grad_norm": 14.990039825439453,
"learning_rate": 3.171512403307549e-06,
"loss": 0.5782,
"step": 6810
},
{
"epoch": 2.558139534883721,
"grad_norm": 13.448775291442871,
"learning_rate": 3.144838623632969e-06,
"loss": 0.8525,
"step": 6820
},
{
"epoch": 2.5618904726181544,
"grad_norm": 13.461121559143066,
"learning_rate": 3.118164843958389e-06,
"loss": 0.6256,
"step": 6830
},
{
"epoch": 2.565641410352588,
"grad_norm": 13.295988082885742,
"learning_rate": 3.091491064283809e-06,
"loss": 0.7059,
"step": 6840
},
{
"epoch": 2.569392348087022,
"grad_norm": 14.871612548828125,
"learning_rate": 3.0648172846092296e-06,
"loss": 0.6338,
"step": 6850
},
{
"epoch": 2.5731432858214554,
"grad_norm": 30.46957778930664,
"learning_rate": 3.0381435049346496e-06,
"loss": 0.7072,
"step": 6860
},
{
"epoch": 2.5768942235558887,
"grad_norm": 20.661733627319336,
"learning_rate": 3.0114697252600693e-06,
"loss": 0.6634,
"step": 6870
},
{
"epoch": 2.5806451612903225,
"grad_norm": 10.35488224029541,
"learning_rate": 2.9847959455854897e-06,
"loss": 0.5709,
"step": 6880
},
{
"epoch": 2.5843960990247563,
"grad_norm": 22.383169174194336,
"learning_rate": 2.9581221659109098e-06,
"loss": 0.433,
"step": 6890
},
{
"epoch": 2.5881470367591897,
"grad_norm": 21.173015594482422,
"learning_rate": 2.93144838623633e-06,
"loss": 0.6109,
"step": 6900
},
{
"epoch": 2.5918979744936235,
"grad_norm": 25.366735458374023,
"learning_rate": 2.90477460656175e-06,
"loss": 0.8177,
"step": 6910
},
{
"epoch": 2.5956489122280573,
"grad_norm": 18.91875457763672,
"learning_rate": 2.8781008268871703e-06,
"loss": 0.7214,
"step": 6920
},
{
"epoch": 2.5993998499624906,
"grad_norm": 12.457830429077148,
"learning_rate": 2.8514270472125903e-06,
"loss": 0.553,
"step": 6930
},
{
"epoch": 2.603150787696924,
"grad_norm": 6.222160816192627,
"learning_rate": 2.82475326753801e-06,
"loss": 0.679,
"step": 6940
},
{
"epoch": 2.606901725431358,
"grad_norm": 8.99958324432373,
"learning_rate": 2.7980794878634304e-06,
"loss": 0.5929,
"step": 6950
},
{
"epoch": 2.6106526631657916,
"grad_norm": 11.063492774963379,
"learning_rate": 2.771405708188851e-06,
"loss": 0.5185,
"step": 6960
},
{
"epoch": 2.614403600900225,
"grad_norm": 10.320928573608398,
"learning_rate": 2.744731928514271e-06,
"loss": 0.5286,
"step": 6970
},
{
"epoch": 2.6181545386346587,
"grad_norm": 13.718670845031738,
"learning_rate": 2.7180581488396905e-06,
"loss": 0.6508,
"step": 6980
},
{
"epoch": 2.6219054763690925,
"grad_norm": 10.613819122314453,
"learning_rate": 2.691384369165111e-06,
"loss": 0.5805,
"step": 6990
},
{
"epoch": 2.625656414103526,
"grad_norm": 22.765199661254883,
"learning_rate": 2.664710589490531e-06,
"loss": 0.6691,
"step": 7000
},
{
"epoch": 2.6294073518379593,
"grad_norm": 12.34518051147461,
"learning_rate": 2.638036809815951e-06,
"loss": 0.6577,
"step": 7010
},
{
"epoch": 2.633158289572393,
"grad_norm": 15.861391067504883,
"learning_rate": 2.611363030141371e-06,
"loss": 0.5159,
"step": 7020
},
{
"epoch": 2.636909227306827,
"grad_norm": 7.271751880645752,
"learning_rate": 2.5846892504667916e-06,
"loss": 0.6844,
"step": 7030
},
{
"epoch": 2.64066016504126,
"grad_norm": 20.930856704711914,
"learning_rate": 2.5580154707922116e-06,
"loss": 0.7827,
"step": 7040
},
{
"epoch": 2.644411102775694,
"grad_norm": 28.042675018310547,
"learning_rate": 2.5313416911176312e-06,
"loss": 0.6383,
"step": 7050
},
{
"epoch": 2.6481620405101274,
"grad_norm": 25.815296173095703,
"learning_rate": 2.5046679114430517e-06,
"loss": 0.6866,
"step": 7060
},
{
"epoch": 2.651912978244561,
"grad_norm": 16.492206573486328,
"learning_rate": 2.4779941317684717e-06,
"loss": 0.5342,
"step": 7070
},
{
"epoch": 2.6556639159789945,
"grad_norm": 23.266910552978516,
"learning_rate": 2.4513203520938918e-06,
"loss": 0.5564,
"step": 7080
},
{
"epoch": 2.6594148537134283,
"grad_norm": 11.591928482055664,
"learning_rate": 2.424646572419312e-06,
"loss": 0.573,
"step": 7090
},
{
"epoch": 2.663165791447862,
"grad_norm": 14.71267032623291,
"learning_rate": 2.3979727927447323e-06,
"loss": 0.6456,
"step": 7100
},
{
"epoch": 2.6669167291822955,
"grad_norm": 7.238256454467773,
"learning_rate": 2.3712990130701523e-06,
"loss": 0.7731,
"step": 7110
},
{
"epoch": 2.6706676669167293,
"grad_norm": 38.71699523925781,
"learning_rate": 2.3446252333955723e-06,
"loss": 0.7189,
"step": 7120
},
{
"epoch": 2.6744186046511627,
"grad_norm": 24.029537200927734,
"learning_rate": 2.3179514537209924e-06,
"loss": 0.7435,
"step": 7130
},
{
"epoch": 2.6781695423855965,
"grad_norm": 17.704763412475586,
"learning_rate": 2.291277674046413e-06,
"loss": 0.838,
"step": 7140
},
{
"epoch": 2.68192048012003,
"grad_norm": 36.12045669555664,
"learning_rate": 2.2646038943718325e-06,
"loss": 0.7103,
"step": 7150
},
{
"epoch": 2.6856714178544636,
"grad_norm": 20.062591552734375,
"learning_rate": 2.237930114697253e-06,
"loss": 0.5748,
"step": 7160
},
{
"epoch": 2.6894223555888974,
"grad_norm": 9.567973136901855,
"learning_rate": 2.211256335022673e-06,
"loss": 0.7598,
"step": 7170
},
{
"epoch": 2.6931732933233308,
"grad_norm": 19.337631225585938,
"learning_rate": 2.184582555348093e-06,
"loss": 0.5945,
"step": 7180
},
{
"epoch": 2.6969242310577646,
"grad_norm": 11.189875602722168,
"learning_rate": 2.157908775673513e-06,
"loss": 0.77,
"step": 7190
},
{
"epoch": 2.700675168792198,
"grad_norm": 16.071062088012695,
"learning_rate": 2.131234995998933e-06,
"loss": 0.6758,
"step": 7200
},
{
"epoch": 2.7044261065266317,
"grad_norm": 11.37120532989502,
"learning_rate": 2.1045612163243535e-06,
"loss": 0.5912,
"step": 7210
},
{
"epoch": 2.708177044261065,
"grad_norm": 25.354324340820312,
"learning_rate": 2.0778874366497736e-06,
"loss": 0.6741,
"step": 7220
},
{
"epoch": 2.711927981995499,
"grad_norm": 11.246193885803223,
"learning_rate": 2.0512136569751936e-06,
"loss": 0.6073,
"step": 7230
},
{
"epoch": 2.7156789197299327,
"grad_norm": 9.01452350616455,
"learning_rate": 2.0245398773006137e-06,
"loss": 0.7363,
"step": 7240
},
{
"epoch": 2.719429857464366,
"grad_norm": 22.3641414642334,
"learning_rate": 1.9978660976260337e-06,
"loss": 0.6278,
"step": 7250
},
{
"epoch": 2.7231807951988,
"grad_norm": 14.206088066101074,
"learning_rate": 1.9711923179514537e-06,
"loss": 0.6676,
"step": 7260
},
{
"epoch": 2.726931732933233,
"grad_norm": 14.623751640319824,
"learning_rate": 1.9445185382768738e-06,
"loss": 0.6629,
"step": 7270
},
{
"epoch": 2.730682670667667,
"grad_norm": 15.682950019836426,
"learning_rate": 1.9178447586022942e-06,
"loss": 0.8008,
"step": 7280
},
{
"epoch": 2.7344336084021004,
"grad_norm": 16.56915855407715,
"learning_rate": 1.891170978927714e-06,
"loss": 0.8421,
"step": 7290
},
{
"epoch": 2.738184546136534,
"grad_norm": 20.514009475708008,
"learning_rate": 1.8644971992531343e-06,
"loss": 0.6755,
"step": 7300
},
{
"epoch": 2.741935483870968,
"grad_norm": 15.838664054870605,
"learning_rate": 1.8378234195785544e-06,
"loss": 0.6463,
"step": 7310
},
{
"epoch": 2.7456864216054013,
"grad_norm": 30.3530330657959,
"learning_rate": 1.8111496399039746e-06,
"loss": 0.6295,
"step": 7320
},
{
"epoch": 2.7494373593398347,
"grad_norm": 8.959320068359375,
"learning_rate": 1.7844758602293946e-06,
"loss": 0.6443,
"step": 7330
},
{
"epoch": 2.7531882970742685,
"grad_norm": 11.156110763549805,
"learning_rate": 1.757802080554815e-06,
"loss": 0.5971,
"step": 7340
},
{
"epoch": 2.7569392348087023,
"grad_norm": 21.744304656982422,
"learning_rate": 1.731128300880235e-06,
"loss": 0.5818,
"step": 7350
},
{
"epoch": 2.7606901725431356,
"grad_norm": 23.995845794677734,
"learning_rate": 1.7044545212056548e-06,
"loss": 0.6885,
"step": 7360
},
{
"epoch": 2.7644411102775694,
"grad_norm": 13.629135131835938,
"learning_rate": 1.677780741531075e-06,
"loss": 0.771,
"step": 7370
},
{
"epoch": 2.7681920480120032,
"grad_norm": 6.805270671844482,
"learning_rate": 1.651106961856495e-06,
"loss": 0.687,
"step": 7380
},
{
"epoch": 2.7719429857464366,
"grad_norm": 21.93046760559082,
"learning_rate": 1.6244331821819153e-06,
"loss": 0.5681,
"step": 7390
},
{
"epoch": 2.77569392348087,
"grad_norm": 22.271133422851562,
"learning_rate": 1.5977594025073353e-06,
"loss": 0.7504,
"step": 7400
},
{
"epoch": 2.7794448612153038,
"grad_norm": 19.411861419677734,
"learning_rate": 1.5710856228327556e-06,
"loss": 0.7141,
"step": 7410
},
{
"epoch": 2.7831957989497376,
"grad_norm": 21.990013122558594,
"learning_rate": 1.5444118431581756e-06,
"loss": 0.7941,
"step": 7420
},
{
"epoch": 2.786946736684171,
"grad_norm": 26.875274658203125,
"learning_rate": 1.5177380634835959e-06,
"loss": 0.7077,
"step": 7430
},
{
"epoch": 2.7906976744186047,
"grad_norm": 17.144861221313477,
"learning_rate": 1.491064283809016e-06,
"loss": 0.6153,
"step": 7440
},
{
"epoch": 2.7944486121530385,
"grad_norm": 18.100868225097656,
"learning_rate": 1.4643905041344357e-06,
"loss": 0.635,
"step": 7450
},
{
"epoch": 2.798199549887472,
"grad_norm": 17.497039794921875,
"learning_rate": 1.437716724459856e-06,
"loss": 0.7681,
"step": 7460
},
{
"epoch": 2.8019504876219052,
"grad_norm": 11.748749732971191,
"learning_rate": 1.411042944785276e-06,
"loss": 0.7916,
"step": 7470
},
{
"epoch": 2.805701425356339,
"grad_norm": 17.71030616760254,
"learning_rate": 1.3843691651106963e-06,
"loss": 0.6826,
"step": 7480
},
{
"epoch": 2.809452363090773,
"grad_norm": 15.269068717956543,
"learning_rate": 1.3576953854361163e-06,
"loss": 0.6008,
"step": 7490
},
{
"epoch": 2.813203300825206,
"grad_norm": 16.148839950561523,
"learning_rate": 1.3310216057615366e-06,
"loss": 0.7581,
"step": 7500
},
{
"epoch": 2.81695423855964,
"grad_norm": 7.341813564300537,
"learning_rate": 1.3043478260869566e-06,
"loss": 0.4343,
"step": 7510
},
{
"epoch": 2.8207051762940734,
"grad_norm": 11.722135543823242,
"learning_rate": 1.2776740464123769e-06,
"loss": 0.5226,
"step": 7520
},
{
"epoch": 2.824456114028507,
"grad_norm": 17.107776641845703,
"learning_rate": 1.251000266737797e-06,
"loss": 0.6466,
"step": 7530
},
{
"epoch": 2.8282070517629405,
"grad_norm": 15.833941459655762,
"learning_rate": 1.224326487063217e-06,
"loss": 0.7703,
"step": 7540
},
{
"epoch": 2.8319579894973743,
"grad_norm": 19.610742568969727,
"learning_rate": 1.197652707388637e-06,
"loss": 0.6359,
"step": 7550
},
{
"epoch": 2.835708927231808,
"grad_norm": 12.620158195495605,
"learning_rate": 1.1709789277140572e-06,
"loss": 0.6561,
"step": 7560
},
{
"epoch": 2.8394598649662415,
"grad_norm": 20.80132293701172,
"learning_rate": 1.1443051480394773e-06,
"loss": 0.7668,
"step": 7570
},
{
"epoch": 2.8432108027006753,
"grad_norm": 9.778907775878906,
"learning_rate": 1.1176313683648973e-06,
"loss": 0.7067,
"step": 7580
},
{
"epoch": 2.8469617404351086,
"grad_norm": 11.224839210510254,
"learning_rate": 1.0909575886903174e-06,
"loss": 0.5963,
"step": 7590
},
{
"epoch": 2.8507126781695424,
"grad_norm": 11.957784652709961,
"learning_rate": 1.0642838090157376e-06,
"loss": 0.7127,
"step": 7600
},
{
"epoch": 2.854463615903976,
"grad_norm": 17.465967178344727,
"learning_rate": 1.0376100293411576e-06,
"loss": 0.5896,
"step": 7610
},
{
"epoch": 2.8582145536384096,
"grad_norm": 22.074583053588867,
"learning_rate": 1.010936249666578e-06,
"loss": 0.664,
"step": 7620
},
{
"epoch": 2.8619654913728434,
"grad_norm": 45.1811408996582,
"learning_rate": 9.84262469991998e-07,
"loss": 0.6788,
"step": 7630
},
{
"epoch": 2.8657164291072768,
"grad_norm": 12.519074440002441,
"learning_rate": 9.57588690317418e-07,
"loss": 0.5208,
"step": 7640
},
{
"epoch": 2.8694673668417106,
"grad_norm": 14.533720016479492,
"learning_rate": 9.309149106428382e-07,
"loss": 0.6403,
"step": 7650
},
{
"epoch": 2.873218304576144,
"grad_norm": 6.502141952514648,
"learning_rate": 9.042411309682584e-07,
"loss": 0.6661,
"step": 7660
},
{
"epoch": 2.8769692423105777,
"grad_norm": 16.4246826171875,
"learning_rate": 8.775673512936784e-07,
"loss": 0.6631,
"step": 7670
},
{
"epoch": 2.880720180045011,
"grad_norm": 20.435749053955078,
"learning_rate": 8.508935716190984e-07,
"loss": 0.769,
"step": 7680
},
{
"epoch": 2.884471117779445,
"grad_norm": 9.382180213928223,
"learning_rate": 8.242197919445186e-07,
"loss": 0.5407,
"step": 7690
},
{
"epoch": 2.8882220555138787,
"grad_norm": 12.802393913269043,
"learning_rate": 7.975460122699387e-07,
"loss": 0.636,
"step": 7700
},
{
"epoch": 2.891972993248312,
"grad_norm": 5.997576713562012,
"learning_rate": 7.708722325953588e-07,
"loss": 0.561,
"step": 7710
},
{
"epoch": 2.895723930982746,
"grad_norm": 8.369012832641602,
"learning_rate": 7.441984529207789e-07,
"loss": 0.5721,
"step": 7720
},
{
"epoch": 2.899474868717179,
"grad_norm": 19.990249633789062,
"learning_rate": 7.175246732461991e-07,
"loss": 0.7267,
"step": 7730
},
{
"epoch": 2.903225806451613,
"grad_norm": 19.364540100097656,
"learning_rate": 6.908508935716192e-07,
"loss": 0.6867,
"step": 7740
},
{
"epoch": 2.9069767441860463,
"grad_norm": 10.638273239135742,
"learning_rate": 6.641771138970394e-07,
"loss": 0.5809,
"step": 7750
},
{
"epoch": 2.91072768192048,
"grad_norm": 24.913246154785156,
"learning_rate": 6.375033342224594e-07,
"loss": 0.5658,
"step": 7760
},
{
"epoch": 2.914478619654914,
"grad_norm": 6.1255412101745605,
"learning_rate": 6.108295545478795e-07,
"loss": 0.4796,
"step": 7770
},
{
"epoch": 2.9182295573893473,
"grad_norm": 13.97762680053711,
"learning_rate": 5.841557748732996e-07,
"loss": 0.6201,
"step": 7780
},
{
"epoch": 2.921980495123781,
"grad_norm": 24.56553840637207,
"learning_rate": 5.574819951987197e-07,
"loss": 0.5206,
"step": 7790
},
{
"epoch": 2.9257314328582145,
"grad_norm": 20.081579208374023,
"learning_rate": 5.308082155241398e-07,
"loss": 0.5697,
"step": 7800
},
{
"epoch": 2.9294823705926483,
"grad_norm": 11.358619689941406,
"learning_rate": 5.041344358495599e-07,
"loss": 0.6268,
"step": 7810
},
{
"epoch": 2.9332333083270816,
"grad_norm": 11.016149520874023,
"learning_rate": 4.7746065617498e-07,
"loss": 0.5753,
"step": 7820
},
{
"epoch": 2.9369842460615154,
"grad_norm": 17.64615249633789,
"learning_rate": 4.507868765004002e-07,
"loss": 0.7584,
"step": 7830
},
{
"epoch": 2.9407351837959492,
"grad_norm": 17.292207717895508,
"learning_rate": 4.2411309682582024e-07,
"loss": 0.6361,
"step": 7840
},
{
"epoch": 2.9444861215303826,
"grad_norm": 17.94815444946289,
"learning_rate": 3.974393171512404e-07,
"loss": 0.7208,
"step": 7850
},
{
"epoch": 2.948237059264816,
"grad_norm": 13.073601722717285,
"learning_rate": 3.7076553747666047e-07,
"loss": 0.7179,
"step": 7860
},
{
"epoch": 2.9519879969992497,
"grad_norm": 7.956513404846191,
"learning_rate": 3.440917578020806e-07,
"loss": 0.6109,
"step": 7870
},
{
"epoch": 2.9557389347336835,
"grad_norm": 18.16693687438965,
"learning_rate": 3.1741797812750066e-07,
"loss": 0.6499,
"step": 7880
},
{
"epoch": 2.959489872468117,
"grad_norm": 25.006132125854492,
"learning_rate": 2.907441984529208e-07,
"loss": 0.5358,
"step": 7890
},
{
"epoch": 2.9632408102025507,
"grad_norm": 20.937856674194336,
"learning_rate": 2.640704187783409e-07,
"loss": 0.6364,
"step": 7900
},
{
"epoch": 2.9669917479369845,
"grad_norm": 12.37922477722168,
"learning_rate": 2.3739663910376104e-07,
"loss": 0.4916,
"step": 7910
},
{
"epoch": 2.970742685671418,
"grad_norm": 8.240549087524414,
"learning_rate": 2.1072285942918113e-07,
"loss": 0.6811,
"step": 7920
},
{
"epoch": 2.974493623405851,
"grad_norm": 9.405010223388672,
"learning_rate": 1.8404907975460125e-07,
"loss": 0.5338,
"step": 7930
},
{
"epoch": 2.978244561140285,
"grad_norm": 13.773921966552734,
"learning_rate": 1.5737530008002134e-07,
"loss": 0.6314,
"step": 7940
},
{
"epoch": 2.981995498874719,
"grad_norm": 12.41072940826416,
"learning_rate": 1.3070152040544146e-07,
"loss": 0.5497,
"step": 7950
},
{
"epoch": 2.985746436609152,
"grad_norm": 17.232473373413086,
"learning_rate": 1.0402774073086158e-07,
"loss": 0.588,
"step": 7960
},
{
"epoch": 2.989497374343586,
"grad_norm": 27.516319274902344,
"learning_rate": 7.735396105628168e-08,
"loss": 0.7673,
"step": 7970
},
{
"epoch": 2.99324831207802,
"grad_norm": 16.864728927612305,
"learning_rate": 5.0680181381701795e-08,
"loss": 0.6883,
"step": 7980
},
{
"epoch": 2.996999249812453,
"grad_norm": 16.803760528564453,
"learning_rate": 2.40064017071219e-08,
"loss": 0.6337,
"step": 7990
},
{
"epoch": 3.0,
"eval_accuracy": 0.5919831223628692,
"eval_f1_macro": 0.5904844573730711,
"eval_f1_weighted": 0.5917816930917,
"eval_loss": 1.0033386945724487,
"eval_precision_macro": 0.5945695673493336,
"eval_precision_weighted": 0.5926704635628428,
"eval_recall_macro": 0.5877159391363334,
"eval_recall_weighted": 0.5919831223628692,
"eval_runtime": 4.8573,
"eval_samples_per_second": 487.928,
"eval_steps_per_second": 61.145,
"step": 7998
}
],
"logging_steps": 10,
"max_steps": 7998,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.68326808991488e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}