darklorddad commited on
Commit
e7ed158
·
verified ·
1 Parent(s): f28c315

Upload 13 files

Browse files
README.md CHANGED
@@ -1,3 +1,42 @@
 
1
  ---
2
- license: agpl-3.0
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
  ---
3
+ tags:
4
+ - autotrain
5
+ - transformers
6
+ - image-classification
7
+ base_model: microsoft/focalnet-base
8
+ widget:
9
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/tiger.jpg
10
+ example_title: Tiger
11
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/teapot.jpg
12
+ example_title: Teapot
13
+ - src: https://huggingface.co/datasets/mishig/sample_images/resolve/main/palace.jpg
14
+ example_title: Palace
15
  ---
16
+
17
+ # Model Trained Using AutoTrain
18
+
19
+ - Problem type: Image Classification
20
+
21
+ ## Validation Metrics
22
+ loss: 0.6602770090103149
23
+
24
+ f1_macro: 0.8156689398492805
25
+
26
+ f1_micro: 0.8262295081967214
27
+
28
+ f1_weighted: 0.8195565714293827
29
+
30
+ precision_macro: 0.8476504329004328
31
+
32
+ precision_micro: 0.8262295081967214
33
+
34
+ precision_weighted: 0.849387256641355
35
+
36
+ recall_macro: 0.8219166666666666
37
+
38
+ recall_micro: 0.8262295081967214
39
+
40
+ recall_weighted: 0.8262295081967214
41
+
42
+ accuracy: 0.8262295081967214
checkpoint-1224/config.json ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/focalnet-base",
3
+ "_num_labels": 200,
4
+ "architectures": [
5
+ "FocalNetForImageClassification"
6
+ ],
7
+ "depths": [
8
+ 2,
9
+ 2,
10
+ 18,
11
+ 2
12
+ ],
13
+ "drop_path_rate": 0.1,
14
+ "embed_dim": 128,
15
+ "encoder_stride": 32,
16
+ "focal_levels": [
17
+ 2,
18
+ 2,
19
+ 2,
20
+ 2
21
+ ],
22
+ "focal_windows": [
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3
27
+ ],
28
+ "hidden_act": "gelu",
29
+ "hidden_dropout_prob": 0.0,
30
+ "hidden_sizes": [
31
+ 192,
32
+ 384,
33
+ 768,
34
+ 768
35
+ ],
36
+ "id2label": {
37
+ "0": "acadian_flycatcher",
38
+ "1": "american_crow",
39
+ "2": "american_goldfinch",
40
+ "3": "american_pipit",
41
+ "4": "american_redstart",
42
+ "5": "american_three_toed_woodpecker",
43
+ "6": "anna_hummingbird",
44
+ "7": "artic_tern",
45
+ "8": "baird_sparrow",
46
+ "9": "baltimore_oriole",
47
+ "10": "bank_swallow",
48
+ "11": "barn_swallow",
49
+ "12": "bay_breasted_warbler",
50
+ "13": "belted_kingfisher",
51
+ "14": "bewick_wren",
52
+ "15": "black_and_white_warbler",
53
+ "16": "black_billed_cuckoo",
54
+ "17": "black_capped_vireo",
55
+ "18": "black_footed_albatross",
56
+ "19": "black_tern",
57
+ "20": "black_throated_blue_warbler",
58
+ "21": "black_throated_sparrow",
59
+ "22": "blue_grosbeak",
60
+ "23": "blue_headed_vireo",
61
+ "24": "blue_jay",
62
+ "25": "blue_winged_warbler",
63
+ "26": "boat_tailed_grackle",
64
+ "27": "bobolink",
65
+ "28": "bohemian_waxwing",
66
+ "29": "brandt_cormorant",
67
+ "30": "brewer_blackbird",
68
+ "31": "brewer_sparrow",
69
+ "32": "bronzed_cowbird",
70
+ "33": "brown_creeper",
71
+ "34": "brown_pelican",
72
+ "35": "brown_thrasher",
73
+ "36": "cactus_wren",
74
+ "37": "california_gull",
75
+ "38": "canada_warbler",
76
+ "39": "cape_glossy_starling",
77
+ "40": "cape_may_warbler",
78
+ "41": "cardinal",
79
+ "42": "carolina_wren",
80
+ "43": "caspian_tern",
81
+ "44": "cedar_waxwing",
82
+ "45": "cerulean_warbler",
83
+ "46": "chestnut_sided_warbler",
84
+ "47": "chipping_sparrow",
85
+ "48": "chuck_will_widow",
86
+ "49": "clark_nutcracker",
87
+ "50": "clay_colored_sparrow",
88
+ "51": "cliff_swallow",
89
+ "52": "common_raven",
90
+ "53": "common_tern",
91
+ "54": "common_yellowthroat",
92
+ "55": "crested_auklet",
93
+ "56": "dark_eyed_junco",
94
+ "57": "downy_woodpecker",
95
+ "58": "eared_grebe",
96
+ "59": "eastern_towhee",
97
+ "60": "elegant_tern",
98
+ "61": "european_goldfinch",
99
+ "62": "evening_grosbeak",
100
+ "63": "field_sparrow",
101
+ "64": "fish_crow",
102
+ "65": "florida_jay",
103
+ "66": "forsters_tern",
104
+ "67": "fox_sparrow",
105
+ "68": "frigatebird",
106
+ "69": "gadwall",
107
+ "70": "geococcyx",
108
+ "71": "glaucous_winged_gull",
109
+ "72": "golden_winged_warbler",
110
+ "73": "grasshopper_sparrow",
111
+ "74": "gray_catbird",
112
+ "75": "gray_crowned_rosy_finch",
113
+ "76": "gray_kingbird",
114
+ "77": "great_crested_flycatcher",
115
+ "78": "great_grey_shrike",
116
+ "79": "green_jay",
117
+ "80": "green_kingfisher",
118
+ "81": "green_tailed_towhee",
119
+ "82": "green_violetear",
120
+ "83": "groove_billed_ani",
121
+ "84": "harris_sparrow",
122
+ "85": "heermann_gull",
123
+ "86": "henslow_sparrow",
124
+ "87": "herring_gull",
125
+ "88": "hooded_merganser",
126
+ "89": "hooded_oriole",
127
+ "90": "hooded_warbler",
128
+ "91": "horned_grebe",
129
+ "92": "horned_lark",
130
+ "93": "horned_puffin",
131
+ "94": "house_sparrow",
132
+ "95": "house_wren",
133
+ "96": "indigo_bunting",
134
+ "97": "ivory_gull",
135
+ "98": "kentucky_warbler",
136
+ "99": "laysan_albatross",
137
+ "100": "lazuli_bunting",
138
+ "101": "le_conte_sparrow",
139
+ "102": "least_auklet",
140
+ "103": "least_flycatcher",
141
+ "104": "least_tern",
142
+ "105": "lincoln_sparrow",
143
+ "106": "loggerhead_shrike",
144
+ "107": "long_tailed_jaeger",
145
+ "108": "louisiana_waterthrush",
146
+ "109": "magnolia_warbler",
147
+ "110": "mallard",
148
+ "111": "mangrove_cuckoo",
149
+ "112": "marsh_wren",
150
+ "113": "mockingbird",
151
+ "114": "mourning_warbler",
152
+ "115": "myrtle_warbler",
153
+ "116": "nashville_warbler",
154
+ "117": "nelson_sharp_tailed_sparrow",
155
+ "118": "nighthawk",
156
+ "119": "northern_flicker",
157
+ "120": "northern_fulmar",
158
+ "121": "northern_waterthrush",
159
+ "122": "olive_sided_flycatcher",
160
+ "123": "orange_crowned_warbler",
161
+ "124": "orchard_oriole",
162
+ "125": "ovenbird",
163
+ "126": "pacific_loon",
164
+ "127": "painted_bunting",
165
+ "128": "palm_warbler",
166
+ "129": "parakeet_auklet",
167
+ "130": "pelagic_cormorant",
168
+ "131": "philadelphia_vireo",
169
+ "132": "pied_billed_grebe",
170
+ "133": "pied_kingfisher",
171
+ "134": "pigeon_guillemot",
172
+ "135": "pileated_woodpecker",
173
+ "136": "pine_grosbeak",
174
+ "137": "pine_warbler",
175
+ "138": "pomarine_jaeger",
176
+ "139": "prairie_warbler",
177
+ "140": "prothonotary_warbler",
178
+ "141": "purple_finch",
179
+ "142": "red_bellied_woodpecker",
180
+ "143": "red_breasted_merganser",
181
+ "144": "red_cockaded_woodpecker",
182
+ "145": "red_eyed_vireo",
183
+ "146": "red_faced_cormorant",
184
+ "147": "red_headed_woodpecker",
185
+ "148": "red_legged_kittiwake",
186
+ "149": "red_winged_blackbird",
187
+ "150": "rhinoceros_auklet",
188
+ "151": "ring_billed_gull",
189
+ "152": "ringed_kingfisher",
190
+ "153": "rock_wren",
191
+ "154": "rose_breasted_grosbeak",
192
+ "155": "ruby_throated_hummingbird",
193
+ "156": "rufous_hummingbird",
194
+ "157": "rusty_blackbird",
195
+ "158": "sage_thrasher",
196
+ "159": "savannah_sparrow",
197
+ "160": "sayornis",
198
+ "161": "scarlet_tanager",
199
+ "162": "scissor_tailed_flycatcher",
200
+ "163": "scott_oriole",
201
+ "164": "seaside_sparrow",
202
+ "165": "shiny_cowbird",
203
+ "166": "slaty_backed_gull",
204
+ "167": "song_sparrow",
205
+ "168": "sooty_albatross",
206
+ "169": "spotted_catbird",
207
+ "170": "summer_tanager",
208
+ "171": "swainson_warbler",
209
+ "172": "tennessee_warbler",
210
+ "173": "tree_sparrow",
211
+ "174": "tree_swallow",
212
+ "175": "tropical_kingbird",
213
+ "176": "vermilion_flycatcher",
214
+ "177": "vesper_sparrow",
215
+ "178": "warbling_vireo",
216
+ "179": "western_grebe",
217
+ "180": "western_gull",
218
+ "181": "western_meadowlark",
219
+ "182": "western_wood_pewee",
220
+ "183": "whip_poor_will",
221
+ "184": "white_breasted_kingfisher",
222
+ "185": "white_breasted_nuthatch",
223
+ "186": "white_crowned_sparrow",
224
+ "187": "white_eyed_vireo",
225
+ "188": "white_necked_raven",
226
+ "189": "white_pelican",
227
+ "190": "white_throated_sparrow",
228
+ "191": "wilson_warbler",
229
+ "192": "winter_wren",
230
+ "193": "worm_eating_warbler",
231
+ "194": "yellow_bellied_flycatcher",
232
+ "195": "yellow_billed_cuckoo",
233
+ "196": "yellow_breasted_chat",
234
+ "197": "yellow_headed_blackbird",
235
+ "198": "yellow_throated_vireo",
236
+ "199": "yellow_warbler"
237
+ },
238
+ "image_size": 224,
239
+ "initializer_range": 0.02,
240
+ "label2id": {
241
+ "acadian_flycatcher": 0,
242
+ "american_crow": 1,
243
+ "american_goldfinch": 2,
244
+ "american_pipit": 3,
245
+ "american_redstart": 4,
246
+ "american_three_toed_woodpecker": 5,
247
+ "anna_hummingbird": 6,
248
+ "artic_tern": 7,
249
+ "baird_sparrow": 8,
250
+ "baltimore_oriole": 9,
251
+ "bank_swallow": 10,
252
+ "barn_swallow": 11,
253
+ "bay_breasted_warbler": 12,
254
+ "belted_kingfisher": 13,
255
+ "bewick_wren": 14,
256
+ "black_and_white_warbler": 15,
257
+ "black_billed_cuckoo": 16,
258
+ "black_capped_vireo": 17,
259
+ "black_footed_albatross": 18,
260
+ "black_tern": 19,
261
+ "black_throated_blue_warbler": 20,
262
+ "black_throated_sparrow": 21,
263
+ "blue_grosbeak": 22,
264
+ "blue_headed_vireo": 23,
265
+ "blue_jay": 24,
266
+ "blue_winged_warbler": 25,
267
+ "boat_tailed_grackle": 26,
268
+ "bobolink": 27,
269
+ "bohemian_waxwing": 28,
270
+ "brandt_cormorant": 29,
271
+ "brewer_blackbird": 30,
272
+ "brewer_sparrow": 31,
273
+ "bronzed_cowbird": 32,
274
+ "brown_creeper": 33,
275
+ "brown_pelican": 34,
276
+ "brown_thrasher": 35,
277
+ "cactus_wren": 36,
278
+ "california_gull": 37,
279
+ "canada_warbler": 38,
280
+ "cape_glossy_starling": 39,
281
+ "cape_may_warbler": 40,
282
+ "cardinal": 41,
283
+ "carolina_wren": 42,
284
+ "caspian_tern": 43,
285
+ "cedar_waxwing": 44,
286
+ "cerulean_warbler": 45,
287
+ "chestnut_sided_warbler": 46,
288
+ "chipping_sparrow": 47,
289
+ "chuck_will_widow": 48,
290
+ "clark_nutcracker": 49,
291
+ "clay_colored_sparrow": 50,
292
+ "cliff_swallow": 51,
293
+ "common_raven": 52,
294
+ "common_tern": 53,
295
+ "common_yellowthroat": 54,
296
+ "crested_auklet": 55,
297
+ "dark_eyed_junco": 56,
298
+ "downy_woodpecker": 57,
299
+ "eared_grebe": 58,
300
+ "eastern_towhee": 59,
301
+ "elegant_tern": 60,
302
+ "european_goldfinch": 61,
303
+ "evening_grosbeak": 62,
304
+ "field_sparrow": 63,
305
+ "fish_crow": 64,
306
+ "florida_jay": 65,
307
+ "forsters_tern": 66,
308
+ "fox_sparrow": 67,
309
+ "frigatebird": 68,
310
+ "gadwall": 69,
311
+ "geococcyx": 70,
312
+ "glaucous_winged_gull": 71,
313
+ "golden_winged_warbler": 72,
314
+ "grasshopper_sparrow": 73,
315
+ "gray_catbird": 74,
316
+ "gray_crowned_rosy_finch": 75,
317
+ "gray_kingbird": 76,
318
+ "great_crested_flycatcher": 77,
319
+ "great_grey_shrike": 78,
320
+ "green_jay": 79,
321
+ "green_kingfisher": 80,
322
+ "green_tailed_towhee": 81,
323
+ "green_violetear": 82,
324
+ "groove_billed_ani": 83,
325
+ "harris_sparrow": 84,
326
+ "heermann_gull": 85,
327
+ "henslow_sparrow": 86,
328
+ "herring_gull": 87,
329
+ "hooded_merganser": 88,
330
+ "hooded_oriole": 89,
331
+ "hooded_warbler": 90,
332
+ "horned_grebe": 91,
333
+ "horned_lark": 92,
334
+ "horned_puffin": 93,
335
+ "house_sparrow": 94,
336
+ "house_wren": 95,
337
+ "indigo_bunting": 96,
338
+ "ivory_gull": 97,
339
+ "kentucky_warbler": 98,
340
+ "laysan_albatross": 99,
341
+ "lazuli_bunting": 100,
342
+ "le_conte_sparrow": 101,
343
+ "least_auklet": 102,
344
+ "least_flycatcher": 103,
345
+ "least_tern": 104,
346
+ "lincoln_sparrow": 105,
347
+ "loggerhead_shrike": 106,
348
+ "long_tailed_jaeger": 107,
349
+ "louisiana_waterthrush": 108,
350
+ "magnolia_warbler": 109,
351
+ "mallard": 110,
352
+ "mangrove_cuckoo": 111,
353
+ "marsh_wren": 112,
354
+ "mockingbird": 113,
355
+ "mourning_warbler": 114,
356
+ "myrtle_warbler": 115,
357
+ "nashville_warbler": 116,
358
+ "nelson_sharp_tailed_sparrow": 117,
359
+ "nighthawk": 118,
360
+ "northern_flicker": 119,
361
+ "northern_fulmar": 120,
362
+ "northern_waterthrush": 121,
363
+ "olive_sided_flycatcher": 122,
364
+ "orange_crowned_warbler": 123,
365
+ "orchard_oriole": 124,
366
+ "ovenbird": 125,
367
+ "pacific_loon": 126,
368
+ "painted_bunting": 127,
369
+ "palm_warbler": 128,
370
+ "parakeet_auklet": 129,
371
+ "pelagic_cormorant": 130,
372
+ "philadelphia_vireo": 131,
373
+ "pied_billed_grebe": 132,
374
+ "pied_kingfisher": 133,
375
+ "pigeon_guillemot": 134,
376
+ "pileated_woodpecker": 135,
377
+ "pine_grosbeak": 136,
378
+ "pine_warbler": 137,
379
+ "pomarine_jaeger": 138,
380
+ "prairie_warbler": 139,
381
+ "prothonotary_warbler": 140,
382
+ "purple_finch": 141,
383
+ "red_bellied_woodpecker": 142,
384
+ "red_breasted_merganser": 143,
385
+ "red_cockaded_woodpecker": 144,
386
+ "red_eyed_vireo": 145,
387
+ "red_faced_cormorant": 146,
388
+ "red_headed_woodpecker": 147,
389
+ "red_legged_kittiwake": 148,
390
+ "red_winged_blackbird": 149,
391
+ "rhinoceros_auklet": 150,
392
+ "ring_billed_gull": 151,
393
+ "ringed_kingfisher": 152,
394
+ "rock_wren": 153,
395
+ "rose_breasted_grosbeak": 154,
396
+ "ruby_throated_hummingbird": 155,
397
+ "rufous_hummingbird": 156,
398
+ "rusty_blackbird": 157,
399
+ "sage_thrasher": 158,
400
+ "savannah_sparrow": 159,
401
+ "sayornis": 160,
402
+ "scarlet_tanager": 161,
403
+ "scissor_tailed_flycatcher": 162,
404
+ "scott_oriole": 163,
405
+ "seaside_sparrow": 164,
406
+ "shiny_cowbird": 165,
407
+ "slaty_backed_gull": 166,
408
+ "song_sparrow": 167,
409
+ "sooty_albatross": 168,
410
+ "spotted_catbird": 169,
411
+ "summer_tanager": 170,
412
+ "swainson_warbler": 171,
413
+ "tennessee_warbler": 172,
414
+ "tree_sparrow": 173,
415
+ "tree_swallow": 174,
416
+ "tropical_kingbird": 175,
417
+ "vermilion_flycatcher": 176,
418
+ "vesper_sparrow": 177,
419
+ "warbling_vireo": 178,
420
+ "western_grebe": 179,
421
+ "western_gull": 180,
422
+ "western_meadowlark": 181,
423
+ "western_wood_pewee": 182,
424
+ "whip_poor_will": 183,
425
+ "white_breasted_kingfisher": 184,
426
+ "white_breasted_nuthatch": 185,
427
+ "white_crowned_sparrow": 186,
428
+ "white_eyed_vireo": 187,
429
+ "white_necked_raven": 188,
430
+ "white_pelican": 189,
431
+ "white_throated_sparrow": 190,
432
+ "wilson_warbler": 191,
433
+ "winter_wren": 192,
434
+ "worm_eating_warbler": 193,
435
+ "yellow_bellied_flycatcher": 194,
436
+ "yellow_billed_cuckoo": 195,
437
+ "yellow_breasted_chat": 196,
438
+ "yellow_headed_blackbird": 197,
439
+ "yellow_throated_vireo": 198,
440
+ "yellow_warbler": 199
441
+ },
442
+ "layer_norm_eps": 1e-05,
443
+ "layerscale_value": 0.0001,
444
+ "mlp_ratio": 4.0,
445
+ "model_type": "focalnet",
446
+ "normalize_modulator": false,
447
+ "num_channels": 3,
448
+ "out_features": [
449
+ "stage4"
450
+ ],
451
+ "out_indices": [
452
+ 4
453
+ ],
454
+ "patch_size": 4,
455
+ "problem_type": "single_label_classification",
456
+ "stage_names": [
457
+ "stem",
458
+ "stage1",
459
+ "stage2",
460
+ "stage3",
461
+ "stage4"
462
+ ],
463
+ "torch_dtype": "float32",
464
+ "transformers_version": "4.48.0",
465
+ "use_conv_embed": false,
466
+ "use_layerscale": false,
467
+ "use_post_layernorm": false,
468
+ "use_post_layernorm_in_modulation": false
469
+ }
checkpoint-1224/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9025dab054f623985993d46b69629d74fc04f5b47078c37411d1325cb1e73784
3
+ size 14244
checkpoint-1224/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f822abb8b4feee35b1c1eb5e0f6ef28ca4e994b5b569c7d1ec2820a81667c0c
3
+ size 1064
checkpoint-1224/trainer_state.json ADDED
@@ -0,0 +1,1692 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6602770090103149,
3
+ "best_model_checkpoint": "Model-Focalnet-Base-\\checkpoint-1224",
4
+ "epoch": 24.0,
5
+ "eval_steps": 7,
6
+ "global_step": 1224,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.1390728476821192,
13
+ "grad_norm": 2.261648416519165,
14
+ "learning_rate": 7.000000000000001e-07,
15
+ "loss": 5.3125,
16
+ "step": 7
17
+ },
18
+ {
19
+ "epoch": 0.2781456953642384,
20
+ "grad_norm": 2.211456537246704,
21
+ "learning_rate": 1.4000000000000001e-06,
22
+ "loss": 5.3234,
23
+ "step": 14
24
+ },
25
+ {
26
+ "epoch": 0.41721854304635764,
27
+ "grad_norm": 2.1140072345733643,
28
+ "learning_rate": 2.1000000000000002e-06,
29
+ "loss": 5.308,
30
+ "step": 21
31
+ },
32
+ {
33
+ "epoch": 0.5562913907284768,
34
+ "grad_norm": 2.2730369567871094,
35
+ "learning_rate": 2.8000000000000003e-06,
36
+ "loss": 5.3189,
37
+ "step": 28
38
+ },
39
+ {
40
+ "epoch": 0.695364238410596,
41
+ "grad_norm": 2.2154030799865723,
42
+ "learning_rate": 3.5000000000000004e-06,
43
+ "loss": 5.2991,
44
+ "step": 35
45
+ },
46
+ {
47
+ "epoch": 0.8344370860927153,
48
+ "grad_norm": 1.887474536895752,
49
+ "learning_rate": 4.2000000000000004e-06,
50
+ "loss": 5.3036,
51
+ "step": 42
52
+ },
53
+ {
54
+ "epoch": 0.9735099337748344,
55
+ "grad_norm": 2.4402565956115723,
56
+ "learning_rate": 4.9000000000000005e-06,
57
+ "loss": 5.2965,
58
+ "step": 49
59
+ },
60
+ {
61
+ "epoch": 1.0,
62
+ "eval_accuracy": 0.00819672131147541,
63
+ "eval_f1_macro": 0.00337801133638995,
64
+ "eval_f1_micro": 0.00819672131147541,
65
+ "eval_f1_weighted": 0.0035373101357153293,
66
+ "eval_loss": 5.291137218475342,
67
+ "eval_precision_macro": 0.0028020831663437863,
68
+ "eval_precision_micro": 0.00819672131147541,
69
+ "eval_precision_weighted": 0.002911205169282775,
70
+ "eval_recall_macro": 0.008023809523809523,
71
+ "eval_recall_micro": 0.00819672131147541,
72
+ "eval_recall_weighted": 0.00819672131147541,
73
+ "eval_runtime": 65.9936,
74
+ "eval_samples_per_second": 18.487,
75
+ "eval_steps_per_second": 0.303,
76
+ "step": 51
77
+ },
78
+ {
79
+ "epoch": 1.099337748344371,
80
+ "grad_norm": 2.17191219329834,
81
+ "learning_rate": 5.600000000000001e-06,
82
+ "loss": 4.7769,
83
+ "step": 56
84
+ },
85
+ {
86
+ "epoch": 1.23841059602649,
87
+ "grad_norm": 2.5151634216308594,
88
+ "learning_rate": 6.300000000000001e-06,
89
+ "loss": 5.2913,
90
+ "step": 63
91
+ },
92
+ {
93
+ "epoch": 1.3774834437086092,
94
+ "grad_norm": 2.353184938430786,
95
+ "learning_rate": 7.000000000000001e-06,
96
+ "loss": 5.2925,
97
+ "step": 70
98
+ },
99
+ {
100
+ "epoch": 1.5165562913907285,
101
+ "grad_norm": 2.138894557952881,
102
+ "learning_rate": 7.7e-06,
103
+ "loss": 5.2627,
104
+ "step": 77
105
+ },
106
+ {
107
+ "epoch": 1.6556291390728477,
108
+ "grad_norm": 2.234560012817383,
109
+ "learning_rate": 8.400000000000001e-06,
110
+ "loss": 5.2627,
111
+ "step": 84
112
+ },
113
+ {
114
+ "epoch": 1.794701986754967,
115
+ "grad_norm": 2.210279703140259,
116
+ "learning_rate": 9.100000000000001e-06,
117
+ "loss": 5.2633,
118
+ "step": 91
119
+ },
120
+ {
121
+ "epoch": 1.9337748344370862,
122
+ "grad_norm": 2.9447083473205566,
123
+ "learning_rate": 9.800000000000001e-06,
124
+ "loss": 5.2558,
125
+ "step": 98
126
+ },
127
+ {
128
+ "epoch": 2.0,
129
+ "eval_accuracy": 0.01557377049180328,
130
+ "eval_f1_macro": 0.006874468130470725,
131
+ "eval_f1_micro": 0.01557377049180328,
132
+ "eval_f1_weighted": 0.007013333548104455,
133
+ "eval_loss": 5.22589635848999,
134
+ "eval_precision_macro": 0.006156529662888035,
135
+ "eval_precision_micro": 0.01557377049180328,
136
+ "eval_precision_weighted": 0.006207445270776915,
137
+ "eval_recall_macro": 0.01500595238095238,
138
+ "eval_recall_micro": 0.01557377049180328,
139
+ "eval_recall_weighted": 0.01557377049180328,
140
+ "eval_runtime": 56.5165,
141
+ "eval_samples_per_second": 21.587,
142
+ "eval_steps_per_second": 0.354,
143
+ "step": 102
144
+ },
145
+ {
146
+ "epoch": 2.0596026490066226,
147
+ "grad_norm": 2.3659451007843018,
148
+ "learning_rate": 1.05e-05,
149
+ "loss": 4.7295,
150
+ "step": 105
151
+ },
152
+ {
153
+ "epoch": 2.198675496688742,
154
+ "grad_norm": 2.9902284145355225,
155
+ "learning_rate": 1.1200000000000001e-05,
156
+ "loss": 5.2129,
157
+ "step": 112
158
+ },
159
+ {
160
+ "epoch": 2.337748344370861,
161
+ "grad_norm": 4.189450740814209,
162
+ "learning_rate": 1.19e-05,
163
+ "loss": 5.1816,
164
+ "step": 119
165
+ },
166
+ {
167
+ "epoch": 2.47682119205298,
168
+ "grad_norm": 4.748580455780029,
169
+ "learning_rate": 1.2600000000000001e-05,
170
+ "loss": 5.1931,
171
+ "step": 126
172
+ },
173
+ {
174
+ "epoch": 2.6158940397350996,
175
+ "grad_norm": 3.579268455505371,
176
+ "learning_rate": 1.3300000000000001e-05,
177
+ "loss": 5.164,
178
+ "step": 133
179
+ },
180
+ {
181
+ "epoch": 2.7549668874172184,
182
+ "grad_norm": 4.054067611694336,
183
+ "learning_rate": 1.4000000000000001e-05,
184
+ "loss": 5.1483,
185
+ "step": 140
186
+ },
187
+ {
188
+ "epoch": 2.8940397350993377,
189
+ "grad_norm": 3.84291410446167,
190
+ "learning_rate": 1.47e-05,
191
+ "loss": 5.1257,
192
+ "step": 147
193
+ },
194
+ {
195
+ "epoch": 3.0,
196
+ "eval_accuracy": 0.051639344262295085,
197
+ "eval_f1_macro": 0.03261693581398834,
198
+ "eval_f1_micro": 0.051639344262295085,
199
+ "eval_f1_weighted": 0.03516077511642792,
200
+ "eval_loss": 5.062410354614258,
201
+ "eval_precision_macro": 0.03239256187924794,
202
+ "eval_precision_micro": 0.051639344262295085,
203
+ "eval_precision_weighted": 0.034858832269066796,
204
+ "eval_recall_macro": 0.047523809523809524,
205
+ "eval_recall_micro": 0.051639344262295085,
206
+ "eval_recall_weighted": 0.051639344262295085,
207
+ "eval_runtime": 63.3821,
208
+ "eval_samples_per_second": 19.248,
209
+ "eval_steps_per_second": 0.316,
210
+ "step": 153
211
+ },
212
+ {
213
+ "epoch": 3.019867549668874,
214
+ "grad_norm": 4.443902492523193,
215
+ "learning_rate": 1.54e-05,
216
+ "loss": 4.6257,
217
+ "step": 154
218
+ },
219
+ {
220
+ "epoch": 3.1589403973509933,
221
+ "grad_norm": 5.521849155426025,
222
+ "learning_rate": 1.6100000000000002e-05,
223
+ "loss": 5.0162,
224
+ "step": 161
225
+ },
226
+ {
227
+ "epoch": 3.2980132450331126,
228
+ "grad_norm": 6.407104969024658,
229
+ "learning_rate": 1.6800000000000002e-05,
230
+ "loss": 4.9824,
231
+ "step": 168
232
+ },
233
+ {
234
+ "epoch": 3.437086092715232,
235
+ "grad_norm": 5.278021335601807,
236
+ "learning_rate": 1.75e-05,
237
+ "loss": 4.96,
238
+ "step": 175
239
+ },
240
+ {
241
+ "epoch": 3.576158940397351,
242
+ "grad_norm": 7.5942182540893555,
243
+ "learning_rate": 1.8200000000000002e-05,
244
+ "loss": 4.898,
245
+ "step": 182
246
+ },
247
+ {
248
+ "epoch": 3.7152317880794703,
249
+ "grad_norm": 6.050070285797119,
250
+ "learning_rate": 1.8900000000000002e-05,
251
+ "loss": 4.8774,
252
+ "step": 189
253
+ },
254
+ {
255
+ "epoch": 3.8543046357615895,
256
+ "grad_norm": 6.589919567108154,
257
+ "learning_rate": 1.9600000000000002e-05,
258
+ "loss": 4.7924,
259
+ "step": 196
260
+ },
261
+ {
262
+ "epoch": 3.993377483443709,
263
+ "grad_norm": 8.232624053955078,
264
+ "learning_rate": 2.0300000000000002e-05,
265
+ "loss": 4.6994,
266
+ "step": 203
267
+ },
268
+ {
269
+ "epoch": 4.0,
270
+ "eval_accuracy": 0.11721311475409836,
271
+ "eval_f1_macro": 0.08082684992031455,
272
+ "eval_f1_micro": 0.11721311475409836,
273
+ "eval_f1_weighted": 0.08625745944487533,
274
+ "eval_loss": 4.516047477722168,
275
+ "eval_precision_macro": 0.0960521320476185,
276
+ "eval_precision_micro": 0.11721311475409836,
277
+ "eval_precision_weighted": 0.10289070402055932,
278
+ "eval_recall_macro": 0.1088095238095238,
279
+ "eval_recall_micro": 0.11721311475409836,
280
+ "eval_recall_weighted": 0.11721311475409836,
281
+ "eval_runtime": 81.067,
282
+ "eval_samples_per_second": 15.049,
283
+ "eval_steps_per_second": 0.247,
284
+ "step": 204
285
+ },
286
+ {
287
+ "epoch": 4.119205298013245,
288
+ "grad_norm": 7.674986362457275,
289
+ "learning_rate": 2.1e-05,
290
+ "loss": 4.1019,
291
+ "step": 210
292
+ },
293
+ {
294
+ "epoch": 4.258278145695364,
295
+ "grad_norm": 7.877310276031494,
296
+ "learning_rate": 2.1700000000000002e-05,
297
+ "loss": 4.4153,
298
+ "step": 217
299
+ },
300
+ {
301
+ "epoch": 4.397350993377484,
302
+ "grad_norm": 9.657820701599121,
303
+ "learning_rate": 2.2400000000000002e-05,
304
+ "loss": 4.2469,
305
+ "step": 224
306
+ },
307
+ {
308
+ "epoch": 4.5364238410596025,
309
+ "grad_norm": 11.347479820251465,
310
+ "learning_rate": 2.3100000000000002e-05,
311
+ "loss": 4.1469,
312
+ "step": 231
313
+ },
314
+ {
315
+ "epoch": 4.675496688741722,
316
+ "grad_norm": 12.215789794921875,
317
+ "learning_rate": 2.38e-05,
318
+ "loss": 4.0285,
319
+ "step": 238
320
+ },
321
+ {
322
+ "epoch": 4.814569536423841,
323
+ "grad_norm": 10.887558937072754,
324
+ "learning_rate": 2.45e-05,
325
+ "loss": 4.0269,
326
+ "step": 245
327
+ },
328
+ {
329
+ "epoch": 4.95364238410596,
330
+ "grad_norm": 12.850284576416016,
331
+ "learning_rate": 2.5200000000000003e-05,
332
+ "loss": 3.7643,
333
+ "step": 252
334
+ },
335
+ {
336
+ "epoch": 5.0,
337
+ "eval_accuracy": 0.2680327868852459,
338
+ "eval_f1_macro": 0.22529862929165922,
339
+ "eval_f1_micro": 0.2680327868852459,
340
+ "eval_f1_weighted": 0.23166885649403285,
341
+ "eval_loss": 3.3234214782714844,
342
+ "eval_precision_macro": 0.25987233164420576,
343
+ "eval_precision_micro": 0.2680327868852459,
344
+ "eval_precision_weighted": 0.26332140193762377,
345
+ "eval_recall_macro": 0.2577738095238095,
346
+ "eval_recall_micro": 0.2680327868852459,
347
+ "eval_recall_weighted": 0.2680327868852459,
348
+ "eval_runtime": 77.8338,
349
+ "eval_samples_per_second": 15.674,
350
+ "eval_steps_per_second": 0.257,
351
+ "step": 255
352
+ },
353
+ {
354
+ "epoch": 5.079470198675497,
355
+ "grad_norm": 11.808965682983398,
356
+ "learning_rate": 2.5900000000000003e-05,
357
+ "loss": 3.1734,
358
+ "step": 259
359
+ },
360
+ {
361
+ "epoch": 5.218543046357616,
362
+ "grad_norm": 17.217893600463867,
363
+ "learning_rate": 2.6600000000000003e-05,
364
+ "loss": 3.3163,
365
+ "step": 266
366
+ },
367
+ {
368
+ "epoch": 5.357615894039735,
369
+ "grad_norm": 14.96292495727539,
370
+ "learning_rate": 2.7300000000000003e-05,
371
+ "loss": 3.3328,
372
+ "step": 273
373
+ },
374
+ {
375
+ "epoch": 5.496688741721854,
376
+ "grad_norm": 11.553727149963379,
377
+ "learning_rate": 2.8000000000000003e-05,
378
+ "loss": 3.2036,
379
+ "step": 280
380
+ },
381
+ {
382
+ "epoch": 5.635761589403973,
383
+ "grad_norm": 12.452818870544434,
384
+ "learning_rate": 2.87e-05,
385
+ "loss": 3.1867,
386
+ "step": 287
387
+ },
388
+ {
389
+ "epoch": 5.774834437086093,
390
+ "grad_norm": 13.04163646697998,
391
+ "learning_rate": 2.94e-05,
392
+ "loss": 3.0558,
393
+ "step": 294
394
+ },
395
+ {
396
+ "epoch": 5.913907284768212,
397
+ "grad_norm": 12.779662132263184,
398
+ "learning_rate": 3.01e-05,
399
+ "loss": 2.9603,
400
+ "step": 301
401
+ },
402
+ {
403
+ "epoch": 6.0,
404
+ "eval_accuracy": 0.40327868852459015,
405
+ "eval_f1_macro": 0.3522325245599723,
406
+ "eval_f1_micro": 0.40327868852459015,
407
+ "eval_f1_weighted": 0.36290227384056034,
408
+ "eval_loss": 2.3593220710754395,
409
+ "eval_precision_macro": 0.4013160035627141,
410
+ "eval_precision_micro": 0.40327868852459015,
411
+ "eval_precision_weighted": 0.40977942860114985,
412
+ "eval_recall_macro": 0.38851190476190484,
413
+ "eval_recall_micro": 0.40327868852459015,
414
+ "eval_recall_weighted": 0.40327868852459015,
415
+ "eval_runtime": 71.8317,
416
+ "eval_samples_per_second": 16.984,
417
+ "eval_steps_per_second": 0.278,
418
+ "step": 306
419
+ },
420
+ {
421
+ "epoch": 6.039735099337748,
422
+ "grad_norm": 13.623518943786621,
423
+ "learning_rate": 3.08e-05,
424
+ "loss": 2.4678,
425
+ "step": 308
426
+ },
427
+ {
428
+ "epoch": 6.178807947019868,
429
+ "grad_norm": 13.266014099121094,
430
+ "learning_rate": 3.15e-05,
431
+ "loss": 2.6213,
432
+ "step": 315
433
+ },
434
+ {
435
+ "epoch": 6.317880794701987,
436
+ "grad_norm": 13.395142555236816,
437
+ "learning_rate": 3.2200000000000003e-05,
438
+ "loss": 2.4566,
439
+ "step": 322
440
+ },
441
+ {
442
+ "epoch": 6.456953642384106,
443
+ "grad_norm": 13.428766250610352,
444
+ "learning_rate": 3.29e-05,
445
+ "loss": 2.3462,
446
+ "step": 329
447
+ },
448
+ {
449
+ "epoch": 6.596026490066225,
450
+ "grad_norm": 11.362808227539062,
451
+ "learning_rate": 3.3600000000000004e-05,
452
+ "loss": 2.3357,
453
+ "step": 336
454
+ },
455
+ {
456
+ "epoch": 6.735099337748345,
457
+ "grad_norm": 11.982301712036133,
458
+ "learning_rate": 3.430000000000001e-05,
459
+ "loss": 2.2728,
460
+ "step": 343
461
+ },
462
+ {
463
+ "epoch": 6.874172185430464,
464
+ "grad_norm": 15.563032150268555,
465
+ "learning_rate": 3.5e-05,
466
+ "loss": 2.3091,
467
+ "step": 350
468
+ },
469
+ {
470
+ "epoch": 7.0,
471
+ "grad_norm": 10.777310371398926,
472
+ "learning_rate": 3.57e-05,
473
+ "loss": 1.9475,
474
+ "step": 357
475
+ },
476
+ {
477
+ "epoch": 7.0,
478
+ "eval_accuracy": 0.5336065573770492,
479
+ "eval_f1_macro": 0.5010502512573436,
480
+ "eval_f1_micro": 0.5336065573770492,
481
+ "eval_f1_weighted": 0.5078295641241183,
482
+ "eval_loss": 1.7169982194900513,
483
+ "eval_precision_macro": 0.570199926363626,
484
+ "eval_precision_micro": 0.5336065573770492,
485
+ "eval_precision_weighted": 0.5742672096804716,
486
+ "eval_recall_macro": 0.5233749999999999,
487
+ "eval_recall_micro": 0.5336065573770492,
488
+ "eval_recall_weighted": 0.5336065573770492,
489
+ "eval_runtime": 63.8109,
490
+ "eval_samples_per_second": 19.119,
491
+ "eval_steps_per_second": 0.313,
492
+ "step": 357
493
+ },
494
+ {
495
+ "epoch": 7.139072847682119,
496
+ "grad_norm": 12.829914093017578,
497
+ "learning_rate": 3.6400000000000004e-05,
498
+ "loss": 1.9122,
499
+ "step": 364
500
+ },
501
+ {
502
+ "epoch": 7.2781456953642385,
503
+ "grad_norm": 15.254327774047852,
504
+ "learning_rate": 3.71e-05,
505
+ "loss": 1.9511,
506
+ "step": 371
507
+ },
508
+ {
509
+ "epoch": 7.417218543046357,
510
+ "grad_norm": 13.248723030090332,
511
+ "learning_rate": 3.7800000000000004e-05,
512
+ "loss": 1.921,
513
+ "step": 378
514
+ },
515
+ {
516
+ "epoch": 7.556291390728477,
517
+ "grad_norm": 14.405394554138184,
518
+ "learning_rate": 3.85e-05,
519
+ "loss": 1.8447,
520
+ "step": 385
521
+ },
522
+ {
523
+ "epoch": 7.695364238410596,
524
+ "grad_norm": 13.432222366333008,
525
+ "learning_rate": 3.9200000000000004e-05,
526
+ "loss": 1.7079,
527
+ "step": 392
528
+ },
529
+ {
530
+ "epoch": 7.8344370860927155,
531
+ "grad_norm": 13.591761589050293,
532
+ "learning_rate": 3.99e-05,
533
+ "loss": 1.7888,
534
+ "step": 399
535
+ },
536
+ {
537
+ "epoch": 7.973509933774834,
538
+ "grad_norm": 12.760810852050781,
539
+ "learning_rate": 4.0600000000000004e-05,
540
+ "loss": 1.8494,
541
+ "step": 406
542
+ },
543
+ {
544
+ "epoch": 8.0,
545
+ "eval_accuracy": 0.6360655737704918,
546
+ "eval_f1_macro": 0.6127611312020431,
547
+ "eval_f1_micro": 0.6360655737704918,
548
+ "eval_f1_weighted": 0.6178432613234403,
549
+ "eval_loss": 1.343964695930481,
550
+ "eval_precision_macro": 0.6623227605727605,
551
+ "eval_precision_micro": 0.6360655737704918,
552
+ "eval_precision_weighted": 0.6653364258692127,
553
+ "eval_recall_macro": 0.6304523809523809,
554
+ "eval_recall_micro": 0.6360655737704918,
555
+ "eval_recall_weighted": 0.6360655737704918,
556
+ "eval_runtime": 79.4787,
557
+ "eval_samples_per_second": 15.35,
558
+ "eval_steps_per_second": 0.252,
559
+ "step": 408
560
+ },
561
+ {
562
+ "epoch": 8.099337748344372,
563
+ "grad_norm": 11.729964256286621,
564
+ "learning_rate": 4.13e-05,
565
+ "loss": 1.4108,
566
+ "step": 413
567
+ },
568
+ {
569
+ "epoch": 8.23841059602649,
570
+ "grad_norm": 12.144929885864258,
571
+ "learning_rate": 4.2e-05,
572
+ "loss": 1.5489,
573
+ "step": 420
574
+ },
575
+ {
576
+ "epoch": 8.37748344370861,
577
+ "grad_norm": 13.483667373657227,
578
+ "learning_rate": 4.27e-05,
579
+ "loss": 1.5863,
580
+ "step": 427
581
+ },
582
+ {
583
+ "epoch": 8.516556291390728,
584
+ "grad_norm": 16.043304443359375,
585
+ "learning_rate": 4.3400000000000005e-05,
586
+ "loss": 1.4405,
587
+ "step": 434
588
+ },
589
+ {
590
+ "epoch": 8.655629139072847,
591
+ "grad_norm": 15.305998802185059,
592
+ "learning_rate": 4.41e-05,
593
+ "loss": 1.4753,
594
+ "step": 441
595
+ },
596
+ {
597
+ "epoch": 8.794701986754967,
598
+ "grad_norm": 13.507715225219727,
599
+ "learning_rate": 4.4800000000000005e-05,
600
+ "loss": 1.4817,
601
+ "step": 448
602
+ },
603
+ {
604
+ "epoch": 8.933774834437086,
605
+ "grad_norm": 13.252425193786621,
606
+ "learning_rate": 4.55e-05,
607
+ "loss": 1.5227,
608
+ "step": 455
609
+ },
610
+ {
611
+ "epoch": 9.0,
612
+ "eval_accuracy": 0.6786885245901639,
613
+ "eval_f1_macro": 0.6571807258516,
614
+ "eval_f1_micro": 0.6786885245901639,
615
+ "eval_f1_weighted": 0.6634537879698879,
616
+ "eval_loss": 1.1470587253570557,
617
+ "eval_precision_macro": 0.7084700165031047,
618
+ "eval_precision_micro": 0.6786885245901639,
619
+ "eval_precision_weighted": 0.7094984540397994,
620
+ "eval_recall_macro": 0.6691130952380953,
621
+ "eval_recall_micro": 0.6786885245901639,
622
+ "eval_recall_weighted": 0.6786885245901639,
623
+ "eval_runtime": 69.8414,
624
+ "eval_samples_per_second": 17.468,
625
+ "eval_steps_per_second": 0.286,
626
+ "step": 459
627
+ },
628
+ {
629
+ "epoch": 9.059602649006623,
630
+ "grad_norm": 11.350573539733887,
631
+ "learning_rate": 4.6200000000000005e-05,
632
+ "loss": 1.352,
633
+ "step": 462
634
+ },
635
+ {
636
+ "epoch": 9.198675496688741,
637
+ "grad_norm": 11.896257400512695,
638
+ "learning_rate": 4.69e-05,
639
+ "loss": 1.2096,
640
+ "step": 469
641
+ },
642
+ {
643
+ "epoch": 9.33774834437086,
644
+ "grad_norm": 14.927756309509277,
645
+ "learning_rate": 4.76e-05,
646
+ "loss": 1.3018,
647
+ "step": 476
648
+ },
649
+ {
650
+ "epoch": 9.47682119205298,
651
+ "grad_norm": 14.38377571105957,
652
+ "learning_rate": 4.83e-05,
653
+ "loss": 1.2997,
654
+ "step": 483
655
+ },
656
+ {
657
+ "epoch": 9.6158940397351,
658
+ "grad_norm": 10.836702346801758,
659
+ "learning_rate": 4.9e-05,
660
+ "loss": 1.2053,
661
+ "step": 490
662
+ },
663
+ {
664
+ "epoch": 9.754966887417218,
665
+ "grad_norm": 13.384648323059082,
666
+ "learning_rate": 4.97e-05,
667
+ "loss": 1.2461,
668
+ "step": 497
669
+ },
670
+ {
671
+ "epoch": 9.894039735099337,
672
+ "grad_norm": 12.859415054321289,
673
+ "learning_rate": 4.995555555555556e-05,
674
+ "loss": 1.2476,
675
+ "step": 504
676
+ },
677
+ {
678
+ "epoch": 10.0,
679
+ "eval_accuracy": 0.7295081967213115,
680
+ "eval_f1_macro": 0.7136993719988303,
681
+ "eval_f1_micro": 0.7295081967213115,
682
+ "eval_f1_weighted": 0.7185265127973471,
683
+ "eval_loss": 0.9676371812820435,
684
+ "eval_precision_macro": 0.7655571405718464,
685
+ "eval_precision_micro": 0.7295081967213115,
686
+ "eval_precision_weighted": 0.7658903327466492,
687
+ "eval_recall_macro": 0.7218452380952379,
688
+ "eval_recall_micro": 0.7295081967213115,
689
+ "eval_recall_weighted": 0.7295081967213115,
690
+ "eval_runtime": 64.0687,
691
+ "eval_samples_per_second": 19.042,
692
+ "eval_steps_per_second": 0.312,
693
+ "step": 510
694
+ },
695
+ {
696
+ "epoch": 10.019867549668874,
697
+ "grad_norm": 11.940890312194824,
698
+ "learning_rate": 4.987777777777778e-05,
699
+ "loss": 1.0087,
700
+ "step": 511
701
+ },
702
+ {
703
+ "epoch": 10.158940397350994,
704
+ "grad_norm": 11.370889663696289,
705
+ "learning_rate": 4.9800000000000004e-05,
706
+ "loss": 1.0759,
707
+ "step": 518
708
+ },
709
+ {
710
+ "epoch": 10.298013245033113,
711
+ "grad_norm": 11.712719917297363,
712
+ "learning_rate": 4.972222222222223e-05,
713
+ "loss": 1.0388,
714
+ "step": 525
715
+ },
716
+ {
717
+ "epoch": 10.437086092715232,
718
+ "grad_norm": 15.134650230407715,
719
+ "learning_rate": 4.964444444444445e-05,
720
+ "loss": 1.0933,
721
+ "step": 532
722
+ },
723
+ {
724
+ "epoch": 10.57615894039735,
725
+ "grad_norm": 11.481903076171875,
726
+ "learning_rate": 4.956666666666667e-05,
727
+ "loss": 1.0236,
728
+ "step": 539
729
+ },
730
+ {
731
+ "epoch": 10.71523178807947,
732
+ "grad_norm": 11.978276252746582,
733
+ "learning_rate": 4.948888888888889e-05,
734
+ "loss": 1.1232,
735
+ "step": 546
736
+ },
737
+ {
738
+ "epoch": 10.85430463576159,
739
+ "grad_norm": 12.34005355834961,
740
+ "learning_rate": 4.9411111111111114e-05,
741
+ "loss": 1.0067,
742
+ "step": 553
743
+ },
744
+ {
745
+ "epoch": 10.993377483443709,
746
+ "grad_norm": 11.154061317443848,
747
+ "learning_rate": 4.933333333333334e-05,
748
+ "loss": 1.1001,
749
+ "step": 560
750
+ },
751
+ {
752
+ "epoch": 11.0,
753
+ "eval_accuracy": 0.7385245901639345,
754
+ "eval_f1_macro": 0.7282043296830448,
755
+ "eval_f1_micro": 0.7385245901639345,
756
+ "eval_f1_weighted": 0.732015719256241,
757
+ "eval_loss": 0.8772674798965454,
758
+ "eval_precision_macro": 0.7795788517038517,
759
+ "eval_precision_micro": 0.7385245901639345,
760
+ "eval_precision_weighted": 0.7814253801753802,
761
+ "eval_recall_macro": 0.733672619047619,
762
+ "eval_recall_micro": 0.7385245901639345,
763
+ "eval_recall_weighted": 0.7385245901639345,
764
+ "eval_runtime": 60.2253,
765
+ "eval_samples_per_second": 20.257,
766
+ "eval_steps_per_second": 0.332,
767
+ "step": 561
768
+ },
769
+ {
770
+ "epoch": 11.119205298013245,
771
+ "grad_norm": 8.575409889221191,
772
+ "learning_rate": 4.925555555555556e-05,
773
+ "loss": 0.8726,
774
+ "step": 567
775
+ },
776
+ {
777
+ "epoch": 11.258278145695364,
778
+ "grad_norm": 12.448003768920898,
779
+ "learning_rate": 4.917777777777778e-05,
780
+ "loss": 0.9765,
781
+ "step": 574
782
+ },
783
+ {
784
+ "epoch": 11.397350993377483,
785
+ "grad_norm": 10.99142837524414,
786
+ "learning_rate": 4.91e-05,
787
+ "loss": 0.8438,
788
+ "step": 581
789
+ },
790
+ {
791
+ "epoch": 11.536423841059603,
792
+ "grad_norm": 9.985913276672363,
793
+ "learning_rate": 4.9022222222222224e-05,
794
+ "loss": 0.863,
795
+ "step": 588
796
+ },
797
+ {
798
+ "epoch": 11.675496688741722,
799
+ "grad_norm": 14.102209091186523,
800
+ "learning_rate": 4.894444444444445e-05,
801
+ "loss": 0.9674,
802
+ "step": 595
803
+ },
804
+ {
805
+ "epoch": 11.814569536423841,
806
+ "grad_norm": 10.937699317932129,
807
+ "learning_rate": 4.886666666666667e-05,
808
+ "loss": 0.9521,
809
+ "step": 602
810
+ },
811
+ {
812
+ "epoch": 11.95364238410596,
813
+ "grad_norm": 10.190333366394043,
814
+ "learning_rate": 4.878888888888889e-05,
815
+ "loss": 0.8804,
816
+ "step": 609
817
+ },
818
+ {
819
+ "epoch": 12.0,
820
+ "eval_accuracy": 0.759016393442623,
821
+ "eval_f1_macro": 0.7427500998456881,
822
+ "eval_f1_micro": 0.759016393442623,
823
+ "eval_f1_weighted": 0.7456563548213297,
824
+ "eval_loss": 0.8271353840827942,
825
+ "eval_precision_macro": 0.7684717300243616,
826
+ "eval_precision_micro": 0.759016393442623,
827
+ "eval_precision_weighted": 0.7719776994647571,
828
+ "eval_recall_macro": 0.756702380952381,
829
+ "eval_recall_micro": 0.759016393442623,
830
+ "eval_recall_weighted": 0.759016393442623,
831
+ "eval_runtime": 58.6516,
832
+ "eval_samples_per_second": 20.801,
833
+ "eval_steps_per_second": 0.341,
834
+ "step": 612
835
+ },
836
+ {
837
+ "epoch": 12.079470198675496,
838
+ "grad_norm": 14.1576509475708,
839
+ "learning_rate": 4.871111111111111e-05,
840
+ "loss": 0.719,
841
+ "step": 616
842
+ },
843
+ {
844
+ "epoch": 12.218543046357617,
845
+ "grad_norm": 11.829643249511719,
846
+ "learning_rate": 4.8633333333333334e-05,
847
+ "loss": 0.9113,
848
+ "step": 623
849
+ },
850
+ {
851
+ "epoch": 12.357615894039736,
852
+ "grad_norm": 9.620296478271484,
853
+ "learning_rate": 4.855555555555556e-05,
854
+ "loss": 0.8671,
855
+ "step": 630
856
+ },
857
+ {
858
+ "epoch": 12.496688741721854,
859
+ "grad_norm": 10.44937801361084,
860
+ "learning_rate": 4.847777777777778e-05,
861
+ "loss": 0.8422,
862
+ "step": 637
863
+ },
864
+ {
865
+ "epoch": 12.635761589403973,
866
+ "grad_norm": 7.808290958404541,
867
+ "learning_rate": 4.8400000000000004e-05,
868
+ "loss": 0.8018,
869
+ "step": 644
870
+ },
871
+ {
872
+ "epoch": 12.774834437086092,
873
+ "grad_norm": 9.790284156799316,
874
+ "learning_rate": 4.832222222222223e-05,
875
+ "loss": 0.8626,
876
+ "step": 651
877
+ },
878
+ {
879
+ "epoch": 12.913907284768213,
880
+ "grad_norm": 12.296673774719238,
881
+ "learning_rate": 4.824444444444445e-05,
882
+ "loss": 0.9596,
883
+ "step": 658
884
+ },
885
+ {
886
+ "epoch": 13.0,
887
+ "eval_accuracy": 0.7622950819672131,
888
+ "eval_f1_macro": 0.7541482304589116,
889
+ "eval_f1_micro": 0.7622950819672131,
890
+ "eval_f1_weighted": 0.7581034870800643,
891
+ "eval_loss": 0.8282718062400818,
892
+ "eval_precision_macro": 0.7943097392803276,
893
+ "eval_precision_micro": 0.7622950819672131,
894
+ "eval_precision_weighted": 0.7971667340748826,
895
+ "eval_recall_macro": 0.7580535714285713,
896
+ "eval_recall_micro": 0.7622950819672131,
897
+ "eval_recall_weighted": 0.7622950819672131,
898
+ "eval_runtime": 59.927,
899
+ "eval_samples_per_second": 20.358,
900
+ "eval_steps_per_second": 0.334,
901
+ "step": 663
902
+ },
903
+ {
904
+ "epoch": 13.039735099337749,
905
+ "grad_norm": 18.717695236206055,
906
+ "learning_rate": 4.8166666666666674e-05,
907
+ "loss": 0.7906,
908
+ "step": 665
909
+ },
910
+ {
911
+ "epoch": 13.178807947019868,
912
+ "grad_norm": 14.046932220458984,
913
+ "learning_rate": 4.808888888888889e-05,
914
+ "loss": 0.7326,
915
+ "step": 672
916
+ },
917
+ {
918
+ "epoch": 13.317880794701987,
919
+ "grad_norm": 11.162008285522461,
920
+ "learning_rate": 4.8011111111111114e-05,
921
+ "loss": 0.8299,
922
+ "step": 679
923
+ },
924
+ {
925
+ "epoch": 13.456953642384105,
926
+ "grad_norm": 9.34903335571289,
927
+ "learning_rate": 4.793333333333334e-05,
928
+ "loss": 0.7046,
929
+ "step": 686
930
+ },
931
+ {
932
+ "epoch": 13.596026490066226,
933
+ "grad_norm": 8.978596687316895,
934
+ "learning_rate": 4.785555555555556e-05,
935
+ "loss": 0.672,
936
+ "step": 693
937
+ },
938
+ {
939
+ "epoch": 13.735099337748345,
940
+ "grad_norm": 9.649175643920898,
941
+ "learning_rate": 4.7777777777777784e-05,
942
+ "loss": 0.7706,
943
+ "step": 700
944
+ },
945
+ {
946
+ "epoch": 13.874172185430464,
947
+ "grad_norm": 9.140443801879883,
948
+ "learning_rate": 4.77e-05,
949
+ "loss": 0.7734,
950
+ "step": 707
951
+ },
952
+ {
953
+ "epoch": 14.0,
954
+ "grad_norm": 6.996921062469482,
955
+ "learning_rate": 4.7622222222222224e-05,
956
+ "loss": 0.6202,
957
+ "step": 714
958
+ },
959
+ {
960
+ "epoch": 14.0,
961
+ "eval_accuracy": 0.7754098360655738,
962
+ "eval_f1_macro": 0.765805670364494,
963
+ "eval_f1_micro": 0.7754098360655738,
964
+ "eval_f1_weighted": 0.7695095891286827,
965
+ "eval_loss": 0.7957718372344971,
966
+ "eval_precision_macro": 0.8098741258741259,
967
+ "eval_precision_micro": 0.7754098360655738,
968
+ "eval_precision_weighted": 0.8099575401829501,
969
+ "eval_recall_macro": 0.769452380952381,
970
+ "eval_recall_micro": 0.7754098360655738,
971
+ "eval_recall_weighted": 0.7754098360655738,
972
+ "eval_runtime": 58.8988,
973
+ "eval_samples_per_second": 20.713,
974
+ "eval_steps_per_second": 0.34,
975
+ "step": 714
976
+ },
977
+ {
978
+ "epoch": 14.139072847682119,
979
+ "grad_norm": 11.548070907592773,
980
+ "learning_rate": 4.754444444444445e-05,
981
+ "loss": 0.7968,
982
+ "step": 721
983
+ },
984
+ {
985
+ "epoch": 14.278145695364238,
986
+ "grad_norm": 11.0925874710083,
987
+ "learning_rate": 4.746666666666667e-05,
988
+ "loss": 0.6864,
989
+ "step": 728
990
+ },
991
+ {
992
+ "epoch": 14.417218543046358,
993
+ "grad_norm": 9.538455963134766,
994
+ "learning_rate": 4.7388888888888894e-05,
995
+ "loss": 0.6766,
996
+ "step": 735
997
+ },
998
+ {
999
+ "epoch": 14.556291390728477,
1000
+ "grad_norm": 7.995402812957764,
1001
+ "learning_rate": 4.731111111111111e-05,
1002
+ "loss": 0.7023,
1003
+ "step": 742
1004
+ },
1005
+ {
1006
+ "epoch": 14.695364238410596,
1007
+ "grad_norm": 10.825759887695312,
1008
+ "learning_rate": 4.7233333333333334e-05,
1009
+ "loss": 0.6883,
1010
+ "step": 749
1011
+ },
1012
+ {
1013
+ "epoch": 14.834437086092715,
1014
+ "grad_norm": 14.279191017150879,
1015
+ "learning_rate": 4.715555555555556e-05,
1016
+ "loss": 0.6533,
1017
+ "step": 756
1018
+ },
1019
+ {
1020
+ "epoch": 14.973509933774835,
1021
+ "grad_norm": 8.562923431396484,
1022
+ "learning_rate": 4.707777777777778e-05,
1023
+ "loss": 0.6466,
1024
+ "step": 763
1025
+ },
1026
+ {
1027
+ "epoch": 15.0,
1028
+ "eval_accuracy": 0.7967213114754098,
1029
+ "eval_f1_macro": 0.7874462737947056,
1030
+ "eval_f1_micro": 0.7967213114754098,
1031
+ "eval_f1_weighted": 0.7923798470661948,
1032
+ "eval_loss": 0.7445575594902039,
1033
+ "eval_precision_macro": 0.8216799295475766,
1034
+ "eval_precision_micro": 0.7967213114754098,
1035
+ "eval_precision_weighted": 0.8259746225862427,
1036
+ "eval_recall_macro": 0.7922261904761905,
1037
+ "eval_recall_micro": 0.7967213114754098,
1038
+ "eval_recall_weighted": 0.7967213114754098,
1039
+ "eval_runtime": 59.8003,
1040
+ "eval_samples_per_second": 20.401,
1041
+ "eval_steps_per_second": 0.334,
1042
+ "step": 765
1043
+ },
1044
+ {
1045
+ "epoch": 15.099337748344372,
1046
+ "grad_norm": 9.65889835357666,
1047
+ "learning_rate": 4.7e-05,
1048
+ "loss": 0.6024,
1049
+ "step": 770
1050
+ },
1051
+ {
1052
+ "epoch": 15.23841059602649,
1053
+ "grad_norm": 8.170406341552734,
1054
+ "learning_rate": 4.692222222222222e-05,
1055
+ "loss": 0.5263,
1056
+ "step": 777
1057
+ },
1058
+ {
1059
+ "epoch": 15.37748344370861,
1060
+ "grad_norm": 8.782620429992676,
1061
+ "learning_rate": 4.6844444444444444e-05,
1062
+ "loss": 0.552,
1063
+ "step": 784
1064
+ },
1065
+ {
1066
+ "epoch": 15.516556291390728,
1067
+ "grad_norm": 11.878396034240723,
1068
+ "learning_rate": 4.676666666666667e-05,
1069
+ "loss": 0.6127,
1070
+ "step": 791
1071
+ },
1072
+ {
1073
+ "epoch": 15.655629139072847,
1074
+ "grad_norm": 8.88171672821045,
1075
+ "learning_rate": 4.668888888888889e-05,
1076
+ "loss": 0.6756,
1077
+ "step": 798
1078
+ },
1079
+ {
1080
+ "epoch": 15.794701986754967,
1081
+ "grad_norm": 11.983383178710938,
1082
+ "learning_rate": 4.6611111111111114e-05,
1083
+ "loss": 0.664,
1084
+ "step": 805
1085
+ },
1086
+ {
1087
+ "epoch": 15.933774834437086,
1088
+ "grad_norm": 10.409689903259277,
1089
+ "learning_rate": 4.653333333333334e-05,
1090
+ "loss": 0.6436,
1091
+ "step": 812
1092
+ },
1093
+ {
1094
+ "epoch": 16.0,
1095
+ "eval_accuracy": 0.7918032786885246,
1096
+ "eval_f1_macro": 0.7815447427921685,
1097
+ "eval_f1_micro": 0.7918032786885246,
1098
+ "eval_f1_weighted": 0.7856156314459259,
1099
+ "eval_loss": 0.7297011017799377,
1100
+ "eval_precision_macro": 0.8101799866799867,
1101
+ "eval_precision_micro": 0.7918032786885246,
1102
+ "eval_precision_weighted": 0.8123722907329464,
1103
+ "eval_recall_macro": 0.7866488095238096,
1104
+ "eval_recall_micro": 0.7918032786885246,
1105
+ "eval_recall_weighted": 0.7918032786885246,
1106
+ "eval_runtime": 60.0895,
1107
+ "eval_samples_per_second": 20.303,
1108
+ "eval_steps_per_second": 0.333,
1109
+ "step": 816
1110
+ },
1111
+ {
1112
+ "epoch": 16.05960264900662,
1113
+ "grad_norm": 10.903715133666992,
1114
+ "learning_rate": 4.645555555555556e-05,
1115
+ "loss": 0.5591,
1116
+ "step": 819
1117
+ },
1118
+ {
1119
+ "epoch": 16.198675496688743,
1120
+ "grad_norm": 8.767610549926758,
1121
+ "learning_rate": 4.6377777777777784e-05,
1122
+ "loss": 0.5711,
1123
+ "step": 826
1124
+ },
1125
+ {
1126
+ "epoch": 16.337748344370862,
1127
+ "grad_norm": 8.273555755615234,
1128
+ "learning_rate": 4.630000000000001e-05,
1129
+ "loss": 0.5811,
1130
+ "step": 833
1131
+ },
1132
+ {
1133
+ "epoch": 16.47682119205298,
1134
+ "grad_norm": 12.013016700744629,
1135
+ "learning_rate": 4.6222222222222224e-05,
1136
+ "loss": 0.6443,
1137
+ "step": 840
1138
+ },
1139
+ {
1140
+ "epoch": 16.6158940397351,
1141
+ "grad_norm": 7.874364376068115,
1142
+ "learning_rate": 4.614444444444445e-05,
1143
+ "loss": 0.5073,
1144
+ "step": 847
1145
+ },
1146
+ {
1147
+ "epoch": 16.75496688741722,
1148
+ "grad_norm": 9.01498031616211,
1149
+ "learning_rate": 4.606666666666667e-05,
1150
+ "loss": 0.608,
1151
+ "step": 854
1152
+ },
1153
+ {
1154
+ "epoch": 16.894039735099337,
1155
+ "grad_norm": 9.848909378051758,
1156
+ "learning_rate": 4.5988888888888894e-05,
1157
+ "loss": 0.5929,
1158
+ "step": 861
1159
+ },
1160
+ {
1161
+ "epoch": 17.0,
1162
+ "eval_accuracy": 0.7959016393442623,
1163
+ "eval_f1_macro": 0.7867938321138785,
1164
+ "eval_f1_micro": 0.7959016393442623,
1165
+ "eval_f1_weighted": 0.7917754148114372,
1166
+ "eval_loss": 0.7077643871307373,
1167
+ "eval_precision_macro": 0.8185556526806528,
1168
+ "eval_precision_micro": 0.7959016393442623,
1169
+ "eval_precision_weighted": 0.8217451378312034,
1170
+ "eval_recall_macro": 0.7902916666666667,
1171
+ "eval_recall_micro": 0.7959016393442623,
1172
+ "eval_recall_weighted": 0.7959016393442623,
1173
+ "eval_runtime": 59.7504,
1174
+ "eval_samples_per_second": 20.418,
1175
+ "eval_steps_per_second": 0.335,
1176
+ "step": 867
1177
+ },
1178
+ {
1179
+ "epoch": 17.019867549668874,
1180
+ "grad_norm": 9.507264137268066,
1181
+ "learning_rate": 4.591111111111112e-05,
1182
+ "loss": 0.5247,
1183
+ "step": 868
1184
+ },
1185
+ {
1186
+ "epoch": 17.158940397350992,
1187
+ "grad_norm": 7.274167537689209,
1188
+ "learning_rate": 4.5833333333333334e-05,
1189
+ "loss": 0.5212,
1190
+ "step": 875
1191
+ },
1192
+ {
1193
+ "epoch": 17.29801324503311,
1194
+ "grad_norm": 8.040386199951172,
1195
+ "learning_rate": 4.575555555555556e-05,
1196
+ "loss": 0.4957,
1197
+ "step": 882
1198
+ },
1199
+ {
1200
+ "epoch": 17.437086092715234,
1201
+ "grad_norm": 10.34827709197998,
1202
+ "learning_rate": 4.567777777777778e-05,
1203
+ "loss": 0.4938,
1204
+ "step": 889
1205
+ },
1206
+ {
1207
+ "epoch": 17.576158940397352,
1208
+ "grad_norm": 9.062361717224121,
1209
+ "learning_rate": 4.5600000000000004e-05,
1210
+ "loss": 0.5341,
1211
+ "step": 896
1212
+ },
1213
+ {
1214
+ "epoch": 17.71523178807947,
1215
+ "grad_norm": 7.889723777770996,
1216
+ "learning_rate": 4.552222222222222e-05,
1217
+ "loss": 0.5407,
1218
+ "step": 903
1219
+ },
1220
+ {
1221
+ "epoch": 17.85430463576159,
1222
+ "grad_norm": 7.329662799835205,
1223
+ "learning_rate": 4.5444444444444444e-05,
1224
+ "loss": 0.5344,
1225
+ "step": 910
1226
+ },
1227
+ {
1228
+ "epoch": 17.99337748344371,
1229
+ "grad_norm": 10.251781463623047,
1230
+ "learning_rate": 4.536666666666667e-05,
1231
+ "loss": 0.5108,
1232
+ "step": 917
1233
+ },
1234
+ {
1235
+ "epoch": 18.0,
1236
+ "eval_accuracy": 0.8,
1237
+ "eval_f1_macro": 0.7904420722323199,
1238
+ "eval_f1_micro": 0.8,
1239
+ "eval_f1_weighted": 0.794189161749749,
1240
+ "eval_loss": 0.7119916081428528,
1241
+ "eval_precision_macro": 0.8223463203463203,
1242
+ "eval_precision_micro": 0.8,
1243
+ "eval_precision_weighted": 0.8258165377427673,
1244
+ "eval_recall_macro": 0.7962023809523809,
1245
+ "eval_recall_micro": 0.8,
1246
+ "eval_recall_weighted": 0.8,
1247
+ "eval_runtime": 58.9812,
1248
+ "eval_samples_per_second": 20.685,
1249
+ "eval_steps_per_second": 0.339,
1250
+ "step": 918
1251
+ },
1252
+ {
1253
+ "epoch": 18.119205298013245,
1254
+ "grad_norm": 7.837319374084473,
1255
+ "learning_rate": 4.528888888888889e-05,
1256
+ "loss": 0.4401,
1257
+ "step": 924
1258
+ },
1259
+ {
1260
+ "epoch": 18.258278145695364,
1261
+ "grad_norm": 7.545521259307861,
1262
+ "learning_rate": 4.5211111111111114e-05,
1263
+ "loss": 0.4821,
1264
+ "step": 931
1265
+ },
1266
+ {
1267
+ "epoch": 18.397350993377483,
1268
+ "grad_norm": 7.626832962036133,
1269
+ "learning_rate": 4.513333333333333e-05,
1270
+ "loss": 0.4991,
1271
+ "step": 938
1272
+ },
1273
+ {
1274
+ "epoch": 18.5364238410596,
1275
+ "grad_norm": 7.265345573425293,
1276
+ "learning_rate": 4.5055555555555554e-05,
1277
+ "loss": 0.5936,
1278
+ "step": 945
1279
+ },
1280
+ {
1281
+ "epoch": 18.67549668874172,
1282
+ "grad_norm": 6.648807525634766,
1283
+ "learning_rate": 4.497777777777778e-05,
1284
+ "loss": 0.4418,
1285
+ "step": 952
1286
+ },
1287
+ {
1288
+ "epoch": 18.814569536423843,
1289
+ "grad_norm": 6.413826942443848,
1290
+ "learning_rate": 4.49e-05,
1291
+ "loss": 0.4185,
1292
+ "step": 959
1293
+ },
1294
+ {
1295
+ "epoch": 18.95364238410596,
1296
+ "grad_norm": 9.378252029418945,
1297
+ "learning_rate": 4.4822222222222224e-05,
1298
+ "loss": 0.5109,
1299
+ "step": 966
1300
+ },
1301
+ {
1302
+ "epoch": 19.0,
1303
+ "eval_accuracy": 0.8106557377049181,
1304
+ "eval_f1_macro": 0.8023834074422309,
1305
+ "eval_f1_micro": 0.8106557377049181,
1306
+ "eval_f1_weighted": 0.8054703936104611,
1307
+ "eval_loss": 0.671293318271637,
1308
+ "eval_precision_macro": 0.8325211038961038,
1309
+ "eval_precision_micro": 0.8106557377049181,
1310
+ "eval_precision_weighted": 0.8349751023111679,
1311
+ "eval_recall_macro": 0.8078333333333333,
1312
+ "eval_recall_micro": 0.8106557377049181,
1313
+ "eval_recall_weighted": 0.8106557377049181,
1314
+ "eval_runtime": 60.0408,
1315
+ "eval_samples_per_second": 20.32,
1316
+ "eval_steps_per_second": 0.333,
1317
+ "step": 969
1318
+ },
1319
+ {
1320
+ "epoch": 19.079470198675498,
1321
+ "grad_norm": 6.43688440322876,
1322
+ "learning_rate": 4.474444444444445e-05,
1323
+ "loss": 0.401,
1324
+ "step": 973
1325
+ },
1326
+ {
1327
+ "epoch": 19.218543046357617,
1328
+ "grad_norm": 10.133489608764648,
1329
+ "learning_rate": 4.466666666666667e-05,
1330
+ "loss": 0.4449,
1331
+ "step": 980
1332
+ },
1333
+ {
1334
+ "epoch": 19.357615894039736,
1335
+ "grad_norm": 9.007479667663574,
1336
+ "learning_rate": 4.4588888888888894e-05,
1337
+ "loss": 0.5457,
1338
+ "step": 987
1339
+ },
1340
+ {
1341
+ "epoch": 19.496688741721854,
1342
+ "grad_norm": 10.912771224975586,
1343
+ "learning_rate": 4.451111111111112e-05,
1344
+ "loss": 0.5306,
1345
+ "step": 994
1346
+ },
1347
+ {
1348
+ "epoch": 19.635761589403973,
1349
+ "grad_norm": 6.615180492401123,
1350
+ "learning_rate": 4.443333333333334e-05,
1351
+ "loss": 0.4925,
1352
+ "step": 1001
1353
+ },
1354
+ {
1355
+ "epoch": 19.774834437086092,
1356
+ "grad_norm": 7.076197147369385,
1357
+ "learning_rate": 4.435555555555556e-05,
1358
+ "loss": 0.4787,
1359
+ "step": 1008
1360
+ },
1361
+ {
1362
+ "epoch": 19.91390728476821,
1363
+ "grad_norm": 7.040290832519531,
1364
+ "learning_rate": 4.427777777777778e-05,
1365
+ "loss": 0.4809,
1366
+ "step": 1015
1367
+ },
1368
+ {
1369
+ "epoch": 20.0,
1370
+ "eval_accuracy": 0.8139344262295082,
1371
+ "eval_f1_macro": 0.8081211352716771,
1372
+ "eval_f1_micro": 0.8139344262295082,
1373
+ "eval_f1_weighted": 0.8116663019924579,
1374
+ "eval_loss": 0.6667141914367676,
1375
+ "eval_precision_macro": 0.8430578726828728,
1376
+ "eval_precision_micro": 0.8139344262295082,
1377
+ "eval_precision_weighted": 0.8445454568200469,
1378
+ "eval_recall_macro": 0.8106190476190476,
1379
+ "eval_recall_micro": 0.8139344262295082,
1380
+ "eval_recall_weighted": 0.8139344262295082,
1381
+ "eval_runtime": 58.7404,
1382
+ "eval_samples_per_second": 20.769,
1383
+ "eval_steps_per_second": 0.34,
1384
+ "step": 1020
1385
+ },
1386
+ {
1387
+ "epoch": 20.039735099337747,
1388
+ "grad_norm": 9.023087501525879,
1389
+ "learning_rate": 4.4200000000000004e-05,
1390
+ "loss": 0.386,
1391
+ "step": 1022
1392
+ },
1393
+ {
1394
+ "epoch": 20.178807947019866,
1395
+ "grad_norm": 7.4928178787231445,
1396
+ "learning_rate": 4.412222222222223e-05,
1397
+ "loss": 0.4569,
1398
+ "step": 1029
1399
+ },
1400
+ {
1401
+ "epoch": 20.31788079470199,
1402
+ "grad_norm": 8.090821266174316,
1403
+ "learning_rate": 4.404444444444445e-05,
1404
+ "loss": 0.4778,
1405
+ "step": 1036
1406
+ },
1407
+ {
1408
+ "epoch": 20.456953642384107,
1409
+ "grad_norm": 8.650497436523438,
1410
+ "learning_rate": 4.396666666666667e-05,
1411
+ "loss": 0.4786,
1412
+ "step": 1043
1413
+ },
1414
+ {
1415
+ "epoch": 20.596026490066226,
1416
+ "grad_norm": 6.049080848693848,
1417
+ "learning_rate": 4.388888888888889e-05,
1418
+ "loss": 0.4975,
1419
+ "step": 1050
1420
+ },
1421
+ {
1422
+ "epoch": 20.735099337748345,
1423
+ "grad_norm": 10.202515602111816,
1424
+ "learning_rate": 4.3811111111111114e-05,
1425
+ "loss": 0.4035,
1426
+ "step": 1057
1427
+ },
1428
+ {
1429
+ "epoch": 20.874172185430464,
1430
+ "grad_norm": 7.0871429443359375,
1431
+ "learning_rate": 4.373333333333334e-05,
1432
+ "loss": 0.4274,
1433
+ "step": 1064
1434
+ },
1435
+ {
1436
+ "epoch": 21.0,
1437
+ "grad_norm": 6.111388206481934,
1438
+ "learning_rate": 4.3655555555555554e-05,
1439
+ "loss": 0.3576,
1440
+ "step": 1071
1441
+ },
1442
+ {
1443
+ "epoch": 21.0,
1444
+ "eval_accuracy": 0.8073770491803278,
1445
+ "eval_f1_macro": 0.7980818380535872,
1446
+ "eval_f1_micro": 0.8073770491803278,
1447
+ "eval_f1_weighted": 0.8027800592784986,
1448
+ "eval_loss": 0.6649746298789978,
1449
+ "eval_precision_macro": 0.8290474247974248,
1450
+ "eval_precision_micro": 0.8073770491803278,
1451
+ "eval_precision_weighted": 0.8307658143313881,
1452
+ "eval_recall_macro": 0.8019166666666666,
1453
+ "eval_recall_micro": 0.8073770491803278,
1454
+ "eval_recall_weighted": 0.8073770491803278,
1455
+ "eval_runtime": 60.057,
1456
+ "eval_samples_per_second": 20.314,
1457
+ "eval_steps_per_second": 0.333,
1458
+ "step": 1071
1459
+ },
1460
+ {
1461
+ "epoch": 21.13907284768212,
1462
+ "grad_norm": 9.059436798095703,
1463
+ "learning_rate": 4.357777777777778e-05,
1464
+ "loss": 0.4775,
1465
+ "step": 1078
1466
+ },
1467
+ {
1468
+ "epoch": 21.278145695364238,
1469
+ "grad_norm": 9.497885704040527,
1470
+ "learning_rate": 4.35e-05,
1471
+ "loss": 0.4531,
1472
+ "step": 1085
1473
+ },
1474
+ {
1475
+ "epoch": 21.417218543046356,
1476
+ "grad_norm": 10.471771240234375,
1477
+ "learning_rate": 4.3422222222222224e-05,
1478
+ "loss": 0.479,
1479
+ "step": 1092
1480
+ },
1481
+ {
1482
+ "epoch": 21.556291390728475,
1483
+ "grad_norm": 6.627233505249023,
1484
+ "learning_rate": 4.334444444444445e-05,
1485
+ "loss": 0.4332,
1486
+ "step": 1099
1487
+ },
1488
+ {
1489
+ "epoch": 21.695364238410598,
1490
+ "grad_norm": 9.046399116516113,
1491
+ "learning_rate": 4.3266666666666664e-05,
1492
+ "loss": 0.4767,
1493
+ "step": 1106
1494
+ },
1495
+ {
1496
+ "epoch": 21.834437086092716,
1497
+ "grad_norm": 6.7745513916015625,
1498
+ "learning_rate": 4.318888888888889e-05,
1499
+ "loss": 0.5137,
1500
+ "step": 1113
1501
+ },
1502
+ {
1503
+ "epoch": 21.973509933774835,
1504
+ "grad_norm": 8.061189651489258,
1505
+ "learning_rate": 4.311111111111111e-05,
1506
+ "loss": 0.4877,
1507
+ "step": 1120
1508
+ },
1509
+ {
1510
+ "epoch": 22.0,
1511
+ "eval_accuracy": 0.8114754098360656,
1512
+ "eval_f1_macro": 0.8045914526649821,
1513
+ "eval_f1_micro": 0.8114754098360656,
1514
+ "eval_f1_weighted": 0.8078734461991453,
1515
+ "eval_loss": 0.6778721809387207,
1516
+ "eval_precision_macro": 0.836376651126651,
1517
+ "eval_precision_micro": 0.8114754098360656,
1518
+ "eval_precision_weighted": 0.8366239998617048,
1519
+ "eval_recall_macro": 0.806404761904762,
1520
+ "eval_recall_micro": 0.8114754098360656,
1521
+ "eval_recall_weighted": 0.8114754098360656,
1522
+ "eval_runtime": 59.185,
1523
+ "eval_samples_per_second": 20.613,
1524
+ "eval_steps_per_second": 0.338,
1525
+ "step": 1122
1526
+ },
1527
+ {
1528
+ "epoch": 22.09933774834437,
1529
+ "grad_norm": 9.460957527160645,
1530
+ "learning_rate": 4.3033333333333334e-05,
1531
+ "loss": 0.4915,
1532
+ "step": 1127
1533
+ },
1534
+ {
1535
+ "epoch": 22.23841059602649,
1536
+ "grad_norm": 9.026511192321777,
1537
+ "learning_rate": 4.295555555555556e-05,
1538
+ "loss": 0.4157,
1539
+ "step": 1134
1540
+ },
1541
+ {
1542
+ "epoch": 22.37748344370861,
1543
+ "grad_norm": 9.733258247375488,
1544
+ "learning_rate": 4.287777777777778e-05,
1545
+ "loss": 0.3564,
1546
+ "step": 1141
1547
+ },
1548
+ {
1549
+ "epoch": 22.516556291390728,
1550
+ "grad_norm": 9.269991874694824,
1551
+ "learning_rate": 4.2800000000000004e-05,
1552
+ "loss": 0.4707,
1553
+ "step": 1148
1554
+ },
1555
+ {
1556
+ "epoch": 22.655629139072847,
1557
+ "grad_norm": 7.8387041091918945,
1558
+ "learning_rate": 4.272222222222223e-05,
1559
+ "loss": 0.4902,
1560
+ "step": 1155
1561
+ },
1562
+ {
1563
+ "epoch": 22.794701986754966,
1564
+ "grad_norm": 10.261953353881836,
1565
+ "learning_rate": 4.264444444444445e-05,
1566
+ "loss": 0.4656,
1567
+ "step": 1162
1568
+ },
1569
+ {
1570
+ "epoch": 22.933774834437084,
1571
+ "grad_norm": 9.317761421203613,
1572
+ "learning_rate": 4.2566666666666674e-05,
1573
+ "loss": 0.4705,
1574
+ "step": 1169
1575
+ },
1576
+ {
1577
+ "epoch": 23.0,
1578
+ "eval_accuracy": 0.8131147540983606,
1579
+ "eval_f1_macro": 0.8073761565232153,
1580
+ "eval_f1_micro": 0.8131147540983606,
1581
+ "eval_f1_weighted": 0.8111283224168953,
1582
+ "eval_loss": 0.6698673963546753,
1583
+ "eval_precision_macro": 0.8399364801864801,
1584
+ "eval_precision_micro": 0.8131147540983606,
1585
+ "eval_precision_weighted": 0.8420730703722508,
1586
+ "eval_recall_macro": 0.808672619047619,
1587
+ "eval_recall_micro": 0.8131147540983606,
1588
+ "eval_recall_weighted": 0.8131147540983606,
1589
+ "eval_runtime": 59.9539,
1590
+ "eval_samples_per_second": 20.349,
1591
+ "eval_steps_per_second": 0.334,
1592
+ "step": 1173
1593
+ },
1594
+ {
1595
+ "epoch": 23.05960264900662,
1596
+ "grad_norm": 9.809006690979004,
1597
+ "learning_rate": 4.248888888888889e-05,
1598
+ "loss": 0.3833,
1599
+ "step": 1176
1600
+ },
1601
+ {
1602
+ "epoch": 23.198675496688743,
1603
+ "grad_norm": 8.9915132522583,
1604
+ "learning_rate": 4.2411111111111114e-05,
1605
+ "loss": 0.4552,
1606
+ "step": 1183
1607
+ },
1608
+ {
1609
+ "epoch": 23.337748344370862,
1610
+ "grad_norm": 10.036259651184082,
1611
+ "learning_rate": 4.233333333333334e-05,
1612
+ "loss": 0.3869,
1613
+ "step": 1190
1614
+ },
1615
+ {
1616
+ "epoch": 23.47682119205298,
1617
+ "grad_norm": 10.57496166229248,
1618
+ "learning_rate": 4.225555555555556e-05,
1619
+ "loss": 0.4003,
1620
+ "step": 1197
1621
+ },
1622
+ {
1623
+ "epoch": 23.6158940397351,
1624
+ "grad_norm": 9.061355590820312,
1625
+ "learning_rate": 4.217777777777778e-05,
1626
+ "loss": 0.4654,
1627
+ "step": 1204
1628
+ },
1629
+ {
1630
+ "epoch": 23.75496688741722,
1631
+ "grad_norm": 7.108461380004883,
1632
+ "learning_rate": 4.21e-05,
1633
+ "loss": 0.4085,
1634
+ "step": 1211
1635
+ },
1636
+ {
1637
+ "epoch": 23.894039735099337,
1638
+ "grad_norm": 5.542710781097412,
1639
+ "learning_rate": 4.2022222222222223e-05,
1640
+ "loss": 0.4358,
1641
+ "step": 1218
1642
+ },
1643
+ {
1644
+ "epoch": 24.0,
1645
+ "eval_accuracy": 0.8262295081967214,
1646
+ "eval_f1_macro": 0.8156689398492805,
1647
+ "eval_f1_micro": 0.8262295081967214,
1648
+ "eval_f1_weighted": 0.8195565714293827,
1649
+ "eval_loss": 0.6602770090103149,
1650
+ "eval_precision_macro": 0.8476504329004328,
1651
+ "eval_precision_micro": 0.8262295081967214,
1652
+ "eval_precision_weighted": 0.849387256641355,
1653
+ "eval_recall_macro": 0.8219166666666666,
1654
+ "eval_recall_micro": 0.8262295081967214,
1655
+ "eval_recall_weighted": 0.8262295081967214,
1656
+ "eval_runtime": 58.671,
1657
+ "eval_samples_per_second": 20.794,
1658
+ "eval_steps_per_second": 0.341,
1659
+ "step": 1224
1660
+ }
1661
+ ],
1662
+ "logging_steps": 7,
1663
+ "max_steps": 5000,
1664
+ "num_input_tokens_seen": 0,
1665
+ "num_train_epochs": 100,
1666
+ "save_steps": 7,
1667
+ "stateful_callbacks": {
1668
+ "EarlyStoppingCallback": {
1669
+ "args": {
1670
+ "early_stopping_patience": 5,
1671
+ "early_stopping_threshold": 0.01
1672
+ },
1673
+ "attributes": {
1674
+ "early_stopping_patience_counter": 5
1675
+ }
1676
+ },
1677
+ "TrainerControl": {
1678
+ "args": {
1679
+ "should_epoch_stop": false,
1680
+ "should_evaluate": false,
1681
+ "should_log": false,
1682
+ "should_save": true,
1683
+ "should_training_stop": true
1684
+ },
1685
+ "attributes": {}
1686
+ }
1687
+ },
1688
+ "total_flos": 1.1899638530382496e+19,
1689
+ "train_batch_size": 32,
1690
+ "trial_name": null,
1691
+ "trial_params": null
1692
+ }
checkpoint-1224/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24779ba161a24fcf404cfce49654b0c18640007f7086a06316de954621441816
3
+ size 5304
config.json ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/focalnet-base",
3
+ "_num_labels": 200,
4
+ "architectures": [
5
+ "FocalNetForImageClassification"
6
+ ],
7
+ "depths": [
8
+ 2,
9
+ 2,
10
+ 18,
11
+ 2
12
+ ],
13
+ "drop_path_rate": 0.1,
14
+ "embed_dim": 128,
15
+ "encoder_stride": 32,
16
+ "focal_levels": [
17
+ 2,
18
+ 2,
19
+ 2,
20
+ 2
21
+ ],
22
+ "focal_windows": [
23
+ 3,
24
+ 3,
25
+ 3,
26
+ 3
27
+ ],
28
+ "hidden_act": "gelu",
29
+ "hidden_dropout_prob": 0.0,
30
+ "hidden_sizes": [
31
+ 192,
32
+ 384,
33
+ 768,
34
+ 768
35
+ ],
36
+ "id2label": {
37
+ "0": "acadian_flycatcher",
38
+ "1": "american_crow",
39
+ "2": "american_goldfinch",
40
+ "3": "american_pipit",
41
+ "4": "american_redstart",
42
+ "5": "american_three_toed_woodpecker",
43
+ "6": "anna_hummingbird",
44
+ "7": "artic_tern",
45
+ "8": "baird_sparrow",
46
+ "9": "baltimore_oriole",
47
+ "10": "bank_swallow",
48
+ "11": "barn_swallow",
49
+ "12": "bay_breasted_warbler",
50
+ "13": "belted_kingfisher",
51
+ "14": "bewick_wren",
52
+ "15": "black_and_white_warbler",
53
+ "16": "black_billed_cuckoo",
54
+ "17": "black_capped_vireo",
55
+ "18": "black_footed_albatross",
56
+ "19": "black_tern",
57
+ "20": "black_throated_blue_warbler",
58
+ "21": "black_throated_sparrow",
59
+ "22": "blue_grosbeak",
60
+ "23": "blue_headed_vireo",
61
+ "24": "blue_jay",
62
+ "25": "blue_winged_warbler",
63
+ "26": "boat_tailed_grackle",
64
+ "27": "bobolink",
65
+ "28": "bohemian_waxwing",
66
+ "29": "brandt_cormorant",
67
+ "30": "brewer_blackbird",
68
+ "31": "brewer_sparrow",
69
+ "32": "bronzed_cowbird",
70
+ "33": "brown_creeper",
71
+ "34": "brown_pelican",
72
+ "35": "brown_thrasher",
73
+ "36": "cactus_wren",
74
+ "37": "california_gull",
75
+ "38": "canada_warbler",
76
+ "39": "cape_glossy_starling",
77
+ "40": "cape_may_warbler",
78
+ "41": "cardinal",
79
+ "42": "carolina_wren",
80
+ "43": "caspian_tern",
81
+ "44": "cedar_waxwing",
82
+ "45": "cerulean_warbler",
83
+ "46": "chestnut_sided_warbler",
84
+ "47": "chipping_sparrow",
85
+ "48": "chuck_will_widow",
86
+ "49": "clark_nutcracker",
87
+ "50": "clay_colored_sparrow",
88
+ "51": "cliff_swallow",
89
+ "52": "common_raven",
90
+ "53": "common_tern",
91
+ "54": "common_yellowthroat",
92
+ "55": "crested_auklet",
93
+ "56": "dark_eyed_junco",
94
+ "57": "downy_woodpecker",
95
+ "58": "eared_grebe",
96
+ "59": "eastern_towhee",
97
+ "60": "elegant_tern",
98
+ "61": "european_goldfinch",
99
+ "62": "evening_grosbeak",
100
+ "63": "field_sparrow",
101
+ "64": "fish_crow",
102
+ "65": "florida_jay",
103
+ "66": "forsters_tern",
104
+ "67": "fox_sparrow",
105
+ "68": "frigatebird",
106
+ "69": "gadwall",
107
+ "70": "geococcyx",
108
+ "71": "glaucous_winged_gull",
109
+ "72": "golden_winged_warbler",
110
+ "73": "grasshopper_sparrow",
111
+ "74": "gray_catbird",
112
+ "75": "gray_crowned_rosy_finch",
113
+ "76": "gray_kingbird",
114
+ "77": "great_crested_flycatcher",
115
+ "78": "great_grey_shrike",
116
+ "79": "green_jay",
117
+ "80": "green_kingfisher",
118
+ "81": "green_tailed_towhee",
119
+ "82": "green_violetear",
120
+ "83": "groove_billed_ani",
121
+ "84": "harris_sparrow",
122
+ "85": "heermann_gull",
123
+ "86": "henslow_sparrow",
124
+ "87": "herring_gull",
125
+ "88": "hooded_merganser",
126
+ "89": "hooded_oriole",
127
+ "90": "hooded_warbler",
128
+ "91": "horned_grebe",
129
+ "92": "horned_lark",
130
+ "93": "horned_puffin",
131
+ "94": "house_sparrow",
132
+ "95": "house_wren",
133
+ "96": "indigo_bunting",
134
+ "97": "ivory_gull",
135
+ "98": "kentucky_warbler",
136
+ "99": "laysan_albatross",
137
+ "100": "lazuli_bunting",
138
+ "101": "le_conte_sparrow",
139
+ "102": "least_auklet",
140
+ "103": "least_flycatcher",
141
+ "104": "least_tern",
142
+ "105": "lincoln_sparrow",
143
+ "106": "loggerhead_shrike",
144
+ "107": "long_tailed_jaeger",
145
+ "108": "louisiana_waterthrush",
146
+ "109": "magnolia_warbler",
147
+ "110": "mallard",
148
+ "111": "mangrove_cuckoo",
149
+ "112": "marsh_wren",
150
+ "113": "mockingbird",
151
+ "114": "mourning_warbler",
152
+ "115": "myrtle_warbler",
153
+ "116": "nashville_warbler",
154
+ "117": "nelson_sharp_tailed_sparrow",
155
+ "118": "nighthawk",
156
+ "119": "northern_flicker",
157
+ "120": "northern_fulmar",
158
+ "121": "northern_waterthrush",
159
+ "122": "olive_sided_flycatcher",
160
+ "123": "orange_crowned_warbler",
161
+ "124": "orchard_oriole",
162
+ "125": "ovenbird",
163
+ "126": "pacific_loon",
164
+ "127": "painted_bunting",
165
+ "128": "palm_warbler",
166
+ "129": "parakeet_auklet",
167
+ "130": "pelagic_cormorant",
168
+ "131": "philadelphia_vireo",
169
+ "132": "pied_billed_grebe",
170
+ "133": "pied_kingfisher",
171
+ "134": "pigeon_guillemot",
172
+ "135": "pileated_woodpecker",
173
+ "136": "pine_grosbeak",
174
+ "137": "pine_warbler",
175
+ "138": "pomarine_jaeger",
176
+ "139": "prairie_warbler",
177
+ "140": "prothonotary_warbler",
178
+ "141": "purple_finch",
179
+ "142": "red_bellied_woodpecker",
180
+ "143": "red_breasted_merganser",
181
+ "144": "red_cockaded_woodpecker",
182
+ "145": "red_eyed_vireo",
183
+ "146": "red_faced_cormorant",
184
+ "147": "red_headed_woodpecker",
185
+ "148": "red_legged_kittiwake",
186
+ "149": "red_winged_blackbird",
187
+ "150": "rhinoceros_auklet",
188
+ "151": "ring_billed_gull",
189
+ "152": "ringed_kingfisher",
190
+ "153": "rock_wren",
191
+ "154": "rose_breasted_grosbeak",
192
+ "155": "ruby_throated_hummingbird",
193
+ "156": "rufous_hummingbird",
194
+ "157": "rusty_blackbird",
195
+ "158": "sage_thrasher",
196
+ "159": "savannah_sparrow",
197
+ "160": "sayornis",
198
+ "161": "scarlet_tanager",
199
+ "162": "scissor_tailed_flycatcher",
200
+ "163": "scott_oriole",
201
+ "164": "seaside_sparrow",
202
+ "165": "shiny_cowbird",
203
+ "166": "slaty_backed_gull",
204
+ "167": "song_sparrow",
205
+ "168": "sooty_albatross",
206
+ "169": "spotted_catbird",
207
+ "170": "summer_tanager",
208
+ "171": "swainson_warbler",
209
+ "172": "tennessee_warbler",
210
+ "173": "tree_sparrow",
211
+ "174": "tree_swallow",
212
+ "175": "tropical_kingbird",
213
+ "176": "vermilion_flycatcher",
214
+ "177": "vesper_sparrow",
215
+ "178": "warbling_vireo",
216
+ "179": "western_grebe",
217
+ "180": "western_gull",
218
+ "181": "western_meadowlark",
219
+ "182": "western_wood_pewee",
220
+ "183": "whip_poor_will",
221
+ "184": "white_breasted_kingfisher",
222
+ "185": "white_breasted_nuthatch",
223
+ "186": "white_crowned_sparrow",
224
+ "187": "white_eyed_vireo",
225
+ "188": "white_necked_raven",
226
+ "189": "white_pelican",
227
+ "190": "white_throated_sparrow",
228
+ "191": "wilson_warbler",
229
+ "192": "winter_wren",
230
+ "193": "worm_eating_warbler",
231
+ "194": "yellow_bellied_flycatcher",
232
+ "195": "yellow_billed_cuckoo",
233
+ "196": "yellow_breasted_chat",
234
+ "197": "yellow_headed_blackbird",
235
+ "198": "yellow_throated_vireo",
236
+ "199": "yellow_warbler"
237
+ },
238
+ "image_size": 224,
239
+ "initializer_range": 0.02,
240
+ "label2id": {
241
+ "acadian_flycatcher": 0,
242
+ "american_crow": 1,
243
+ "american_goldfinch": 2,
244
+ "american_pipit": 3,
245
+ "american_redstart": 4,
246
+ "american_three_toed_woodpecker": 5,
247
+ "anna_hummingbird": 6,
248
+ "artic_tern": 7,
249
+ "baird_sparrow": 8,
250
+ "baltimore_oriole": 9,
251
+ "bank_swallow": 10,
252
+ "barn_swallow": 11,
253
+ "bay_breasted_warbler": 12,
254
+ "belted_kingfisher": 13,
255
+ "bewick_wren": 14,
256
+ "black_and_white_warbler": 15,
257
+ "black_billed_cuckoo": 16,
258
+ "black_capped_vireo": 17,
259
+ "black_footed_albatross": 18,
260
+ "black_tern": 19,
261
+ "black_throated_blue_warbler": 20,
262
+ "black_throated_sparrow": 21,
263
+ "blue_grosbeak": 22,
264
+ "blue_headed_vireo": 23,
265
+ "blue_jay": 24,
266
+ "blue_winged_warbler": 25,
267
+ "boat_tailed_grackle": 26,
268
+ "bobolink": 27,
269
+ "bohemian_waxwing": 28,
270
+ "brandt_cormorant": 29,
271
+ "brewer_blackbird": 30,
272
+ "brewer_sparrow": 31,
273
+ "bronzed_cowbird": 32,
274
+ "brown_creeper": 33,
275
+ "brown_pelican": 34,
276
+ "brown_thrasher": 35,
277
+ "cactus_wren": 36,
278
+ "california_gull": 37,
279
+ "canada_warbler": 38,
280
+ "cape_glossy_starling": 39,
281
+ "cape_may_warbler": 40,
282
+ "cardinal": 41,
283
+ "carolina_wren": 42,
284
+ "caspian_tern": 43,
285
+ "cedar_waxwing": 44,
286
+ "cerulean_warbler": 45,
287
+ "chestnut_sided_warbler": 46,
288
+ "chipping_sparrow": 47,
289
+ "chuck_will_widow": 48,
290
+ "clark_nutcracker": 49,
291
+ "clay_colored_sparrow": 50,
292
+ "cliff_swallow": 51,
293
+ "common_raven": 52,
294
+ "common_tern": 53,
295
+ "common_yellowthroat": 54,
296
+ "crested_auklet": 55,
297
+ "dark_eyed_junco": 56,
298
+ "downy_woodpecker": 57,
299
+ "eared_grebe": 58,
300
+ "eastern_towhee": 59,
301
+ "elegant_tern": 60,
302
+ "european_goldfinch": 61,
303
+ "evening_grosbeak": 62,
304
+ "field_sparrow": 63,
305
+ "fish_crow": 64,
306
+ "florida_jay": 65,
307
+ "forsters_tern": 66,
308
+ "fox_sparrow": 67,
309
+ "frigatebird": 68,
310
+ "gadwall": 69,
311
+ "geococcyx": 70,
312
+ "glaucous_winged_gull": 71,
313
+ "golden_winged_warbler": 72,
314
+ "grasshopper_sparrow": 73,
315
+ "gray_catbird": 74,
316
+ "gray_crowned_rosy_finch": 75,
317
+ "gray_kingbird": 76,
318
+ "great_crested_flycatcher": 77,
319
+ "great_grey_shrike": 78,
320
+ "green_jay": 79,
321
+ "green_kingfisher": 80,
322
+ "green_tailed_towhee": 81,
323
+ "green_violetear": 82,
324
+ "groove_billed_ani": 83,
325
+ "harris_sparrow": 84,
326
+ "heermann_gull": 85,
327
+ "henslow_sparrow": 86,
328
+ "herring_gull": 87,
329
+ "hooded_merganser": 88,
330
+ "hooded_oriole": 89,
331
+ "hooded_warbler": 90,
332
+ "horned_grebe": 91,
333
+ "horned_lark": 92,
334
+ "horned_puffin": 93,
335
+ "house_sparrow": 94,
336
+ "house_wren": 95,
337
+ "indigo_bunting": 96,
338
+ "ivory_gull": 97,
339
+ "kentucky_warbler": 98,
340
+ "laysan_albatross": 99,
341
+ "lazuli_bunting": 100,
342
+ "le_conte_sparrow": 101,
343
+ "least_auklet": 102,
344
+ "least_flycatcher": 103,
345
+ "least_tern": 104,
346
+ "lincoln_sparrow": 105,
347
+ "loggerhead_shrike": 106,
348
+ "long_tailed_jaeger": 107,
349
+ "louisiana_waterthrush": 108,
350
+ "magnolia_warbler": 109,
351
+ "mallard": 110,
352
+ "mangrove_cuckoo": 111,
353
+ "marsh_wren": 112,
354
+ "mockingbird": 113,
355
+ "mourning_warbler": 114,
356
+ "myrtle_warbler": 115,
357
+ "nashville_warbler": 116,
358
+ "nelson_sharp_tailed_sparrow": 117,
359
+ "nighthawk": 118,
360
+ "northern_flicker": 119,
361
+ "northern_fulmar": 120,
362
+ "northern_waterthrush": 121,
363
+ "olive_sided_flycatcher": 122,
364
+ "orange_crowned_warbler": 123,
365
+ "orchard_oriole": 124,
366
+ "ovenbird": 125,
367
+ "pacific_loon": 126,
368
+ "painted_bunting": 127,
369
+ "palm_warbler": 128,
370
+ "parakeet_auklet": 129,
371
+ "pelagic_cormorant": 130,
372
+ "philadelphia_vireo": 131,
373
+ "pied_billed_grebe": 132,
374
+ "pied_kingfisher": 133,
375
+ "pigeon_guillemot": 134,
376
+ "pileated_woodpecker": 135,
377
+ "pine_grosbeak": 136,
378
+ "pine_warbler": 137,
379
+ "pomarine_jaeger": 138,
380
+ "prairie_warbler": 139,
381
+ "prothonotary_warbler": 140,
382
+ "purple_finch": 141,
383
+ "red_bellied_woodpecker": 142,
384
+ "red_breasted_merganser": 143,
385
+ "red_cockaded_woodpecker": 144,
386
+ "red_eyed_vireo": 145,
387
+ "red_faced_cormorant": 146,
388
+ "red_headed_woodpecker": 147,
389
+ "red_legged_kittiwake": 148,
390
+ "red_winged_blackbird": 149,
391
+ "rhinoceros_auklet": 150,
392
+ "ring_billed_gull": 151,
393
+ "ringed_kingfisher": 152,
394
+ "rock_wren": 153,
395
+ "rose_breasted_grosbeak": 154,
396
+ "ruby_throated_hummingbird": 155,
397
+ "rufous_hummingbird": 156,
398
+ "rusty_blackbird": 157,
399
+ "sage_thrasher": 158,
400
+ "savannah_sparrow": 159,
401
+ "sayornis": 160,
402
+ "scarlet_tanager": 161,
403
+ "scissor_tailed_flycatcher": 162,
404
+ "scott_oriole": 163,
405
+ "seaside_sparrow": 164,
406
+ "shiny_cowbird": 165,
407
+ "slaty_backed_gull": 166,
408
+ "song_sparrow": 167,
409
+ "sooty_albatross": 168,
410
+ "spotted_catbird": 169,
411
+ "summer_tanager": 170,
412
+ "swainson_warbler": 171,
413
+ "tennessee_warbler": 172,
414
+ "tree_sparrow": 173,
415
+ "tree_swallow": 174,
416
+ "tropical_kingbird": 175,
417
+ "vermilion_flycatcher": 176,
418
+ "vesper_sparrow": 177,
419
+ "warbling_vireo": 178,
420
+ "western_grebe": 179,
421
+ "western_gull": 180,
422
+ "western_meadowlark": 181,
423
+ "western_wood_pewee": 182,
424
+ "whip_poor_will": 183,
425
+ "white_breasted_kingfisher": 184,
426
+ "white_breasted_nuthatch": 185,
427
+ "white_crowned_sparrow": 186,
428
+ "white_eyed_vireo": 187,
429
+ "white_necked_raven": 188,
430
+ "white_pelican": 189,
431
+ "white_throated_sparrow": 190,
432
+ "wilson_warbler": 191,
433
+ "winter_wren": 192,
434
+ "worm_eating_warbler": 193,
435
+ "yellow_bellied_flycatcher": 194,
436
+ "yellow_billed_cuckoo": 195,
437
+ "yellow_breasted_chat": 196,
438
+ "yellow_headed_blackbird": 197,
439
+ "yellow_throated_vireo": 198,
440
+ "yellow_warbler": 199
441
+ },
442
+ "layer_norm_eps": 1e-05,
443
+ "layerscale_value": 0.0001,
444
+ "mlp_ratio": 4.0,
445
+ "model_type": "focalnet",
446
+ "normalize_modulator": false,
447
+ "num_channels": 3,
448
+ "out_features": [
449
+ "stage4"
450
+ ],
451
+ "out_indices": [
452
+ 4
453
+ ],
454
+ "patch_size": 4,
455
+ "problem_type": "single_label_classification",
456
+ "stage_names": [
457
+ "stem",
458
+ "stage1",
459
+ "stage2",
460
+ "stage3",
461
+ "stage4"
462
+ ],
463
+ "torch_dtype": "float32",
464
+ "transformers_version": "4.48.0",
465
+ "use_conv_embed": false,
466
+ "use_layerscale": false,
467
+ "use_post_layernorm": false,
468
+ "use_post_layernorm_in_modulation": false
469
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:564e2c9a878637323850cc43ab9863e2845c68e6c99a650d82b6000399eaf038
3
+ size 349364384
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.485,
13
+ 0.456,
14
+ 0.406
15
+ ],
16
+ "image_processor_type": "BitImageProcessor",
17
+ "image_std": [
18
+ 0.229,
19
+ 0.224,
20
+ 0.225
21
+ ],
22
+ "resample": 2,
23
+ "rescale_factor": 0.00392156862745098,
24
+ "size": {
25
+ "shortest_edge": 256
26
+ }
27
+ }
runs/Oct22_00-05-12_dld-laptop/events.out.tfevents.1761062713.dld-laptop.45160.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc69d38d4315ed562eca833f1457ccbe0e16b385404ef12e32ffbaef5699db95
3
+ size 74095
runs/Oct22_00-05-12_dld-laptop/events.out.tfevents.1761080455.dld-laptop.45160.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d99f5ed4e38d385488d58f7596f0079df134e46bdd766b56d72e7214518cd918
3
+ size 921
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24779ba161a24fcf404cfce49654b0c18640007f7086a06316de954621441816
3
+ size 5304
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "Model-Focalnet-Base-/autotrain-data",
3
+ "model": "microsoft/focalnet-base",
4
+ "username": "local",
5
+ "lr": 0.00005,
6
+ "epochs": 100,
7
+ "batch_size": 32,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 3,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.01,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "logging_steps": -1,
18
+ "project_name": "Model-Focalnet-Base-",
19
+ "auto_find_batch_size": false,
20
+ "mixed_precision": "bf16",
21
+ "save_total_limit": 1,
22
+ "token": null,
23
+ "push_to_hub": true,
24
+ "eval_strategy": "epoch",
25
+ "image_column": "autotrain_image",
26
+ "target_column": "autotrain_label",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }