merve HF Staff commited on
Commit
edf8732
·
verified ·
1 Parent(s): 50b5b8a

Delete DINOv3_FT.ipynb

Browse files
Files changed (1) hide show
  1. DINOv3_FT.ipynb +0 -1787
DINOv3_FT.ipynb DELETED
@@ -1,1787 +0,0 @@
1
- {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "machine_shape": "hm",
8
- "gpuType": "L4"
9
- },
10
- "kernelspec": {
11
- "name": "python3",
12
- "display_name": "Python 3"
13
- },
14
- "language_info": {
15
- "name": "python"
16
- },
17
- "accelerator": "GPU",
18
- "widgets": {
19
- "application/vnd.jupyter.widget-state+json": {
20
- "32138245d41348928cc5b5834b07cb7e": {
21
- "model_module": "@jupyter-widgets/controls",
22
- "model_name": "HBoxModel",
23
- "model_module_version": "1.5.0",
24
- "state": {
25
- "_dom_classes": [],
26
- "_model_module": "@jupyter-widgets/controls",
27
- "_model_module_version": "1.5.0",
28
- "_model_name": "HBoxModel",
29
- "_view_count": null,
30
- "_view_module": "@jupyter-widgets/controls",
31
- "_view_module_version": "1.5.0",
32
- "_view_name": "HBoxView",
33
- "box_style": "",
34
- "children": [
35
- "IPY_MODEL_df6de04fdb204d348767dd0b2d0e88f7",
36
- "IPY_MODEL_63a3800d62dd41d6b4a3f643a8930d95",
37
- "IPY_MODEL_49d67bd205184874a5cee04d318d91fe"
38
- ],
39
- "layout": "IPY_MODEL_f00ace964f96471b9eb839cce48ce378"
40
- }
41
- },
42
- "df6de04fdb204d348767dd0b2d0e88f7": {
43
- "model_module": "@jupyter-widgets/controls",
44
- "model_name": "HTMLModel",
45
- "model_module_version": "1.5.0",
46
- "state": {
47
- "_dom_classes": [],
48
- "_model_module": "@jupyter-widgets/controls",
49
- "_model_module_version": "1.5.0",
50
- "_model_name": "HTMLModel",
51
- "_view_count": null,
52
- "_view_module": "@jupyter-widgets/controls",
53
- "_view_module_version": "1.5.0",
54
- "_view_name": "HTMLView",
55
- "description": "",
56
- "description_tooltip": null,
57
- "layout": "IPY_MODEL_3ad0ac8def244930a3aff41d68a88a65",
58
- "placeholder": "​",
59
- "style": "IPY_MODEL_7464841c193d492685bb929b1c0d230c",
60
- "value": "preprocessor_config.json: 100%"
61
- }
62
- },
63
- "63a3800d62dd41d6b4a3f643a8930d95": {
64
- "model_module": "@jupyter-widgets/controls",
65
- "model_name": "FloatProgressModel",
66
- "model_module_version": "1.5.0",
67
- "state": {
68
- "_dom_classes": [],
69
- "_model_module": "@jupyter-widgets/controls",
70
- "_model_module_version": "1.5.0",
71
- "_model_name": "FloatProgressModel",
72
- "_view_count": null,
73
- "_view_module": "@jupyter-widgets/controls",
74
- "_view_module_version": "1.5.0",
75
- "_view_name": "ProgressView",
76
- "bar_style": "success",
77
- "description": "",
78
- "description_tooltip": null,
79
- "layout": "IPY_MODEL_5c16553a2ff34a37a2cb62b4a4c42a6f",
80
- "max": 585,
81
- "min": 0,
82
- "orientation": "horizontal",
83
- "style": "IPY_MODEL_34be83ddb4bf43e58cadbcbac5a606b7",
84
- "value": 585
85
- }
86
- },
87
- "49d67bd205184874a5cee04d318d91fe": {
88
- "model_module": "@jupyter-widgets/controls",
89
- "model_name": "HTMLModel",
90
- "model_module_version": "1.5.0",
91
- "state": {
92
- "_dom_classes": [],
93
- "_model_module": "@jupyter-widgets/controls",
94
- "_model_module_version": "1.5.0",
95
- "_model_name": "HTMLModel",
96
- "_view_count": null,
97
- "_view_module": "@jupyter-widgets/controls",
98
- "_view_module_version": "1.5.0",
99
- "_view_name": "HTMLView",
100
- "description": "",
101
- "description_tooltip": null,
102
- "layout": "IPY_MODEL_0ce7bd7e52074f29b446ef2d4dd0921a",
103
- "placeholder": "​",
104
- "style": "IPY_MODEL_7e2178d696c04d5787e736ace9ab57c0",
105
- "value": " 585/585 [00:00<00:00, 65.6kB/s]"
106
- }
107
- },
108
- "f00ace964f96471b9eb839cce48ce378": {
109
- "model_module": "@jupyter-widgets/base",
110
- "model_name": "LayoutModel",
111
- "model_module_version": "1.2.0",
112
- "state": {
113
- "_model_module": "@jupyter-widgets/base",
114
- "_model_module_version": "1.2.0",
115
- "_model_name": "LayoutModel",
116
- "_view_count": null,
117
- "_view_module": "@jupyter-widgets/base",
118
- "_view_module_version": "1.2.0",
119
- "_view_name": "LayoutView",
120
- "align_content": null,
121
- "align_items": null,
122
- "align_self": null,
123
- "border": null,
124
- "bottom": null,
125
- "display": null,
126
- "flex": null,
127
- "flex_flow": null,
128
- "grid_area": null,
129
- "grid_auto_columns": null,
130
- "grid_auto_flow": null,
131
- "grid_auto_rows": null,
132
- "grid_column": null,
133
- "grid_gap": null,
134
- "grid_row": null,
135
- "grid_template_areas": null,
136
- "grid_template_columns": null,
137
- "grid_template_rows": null,
138
- "height": null,
139
- "justify_content": null,
140
- "justify_items": null,
141
- "left": null,
142
- "margin": null,
143
- "max_height": null,
144
- "max_width": null,
145
- "min_height": null,
146
- "min_width": null,
147
- "object_fit": null,
148
- "object_position": null,
149
- "order": null,
150
- "overflow": null,
151
- "overflow_x": null,
152
- "overflow_y": null,
153
- "padding": null,
154
- "right": null,
155
- "top": null,
156
- "visibility": null,
157
- "width": null
158
- }
159
- },
160
- "3ad0ac8def244930a3aff41d68a88a65": {
161
- "model_module": "@jupyter-widgets/base",
162
- "model_name": "LayoutModel",
163
- "model_module_version": "1.2.0",
164
- "state": {
165
- "_model_module": "@jupyter-widgets/base",
166
- "_model_module_version": "1.2.0",
167
- "_model_name": "LayoutModel",
168
- "_view_count": null,
169
- "_view_module": "@jupyter-widgets/base",
170
- "_view_module_version": "1.2.0",
171
- "_view_name": "LayoutView",
172
- "align_content": null,
173
- "align_items": null,
174
- "align_self": null,
175
- "border": null,
176
- "bottom": null,
177
- "display": null,
178
- "flex": null,
179
- "flex_flow": null,
180
- "grid_area": null,
181
- "grid_auto_columns": null,
182
- "grid_auto_flow": null,
183
- "grid_auto_rows": null,
184
- "grid_column": null,
185
- "grid_gap": null,
186
- "grid_row": null,
187
- "grid_template_areas": null,
188
- "grid_template_columns": null,
189
- "grid_template_rows": null,
190
- "height": null,
191
- "justify_content": null,
192
- "justify_items": null,
193
- "left": null,
194
- "margin": null,
195
- "max_height": null,
196
- "max_width": null,
197
- "min_height": null,
198
- "min_width": null,
199
- "object_fit": null,
200
- "object_position": null,
201
- "order": null,
202
- "overflow": null,
203
- "overflow_x": null,
204
- "overflow_y": null,
205
- "padding": null,
206
- "right": null,
207
- "top": null,
208
- "visibility": null,
209
- "width": null
210
- }
211
- },
212
- "7464841c193d492685bb929b1c0d230c": {
213
- "model_module": "@jupyter-widgets/controls",
214
- "model_name": "DescriptionStyleModel",
215
- "model_module_version": "1.5.0",
216
- "state": {
217
- "_model_module": "@jupyter-widgets/controls",
218
- "_model_module_version": "1.5.0",
219
- "_model_name": "DescriptionStyleModel",
220
- "_view_count": null,
221
- "_view_module": "@jupyter-widgets/base",
222
- "_view_module_version": "1.2.0",
223
- "_view_name": "StyleView",
224
- "description_width": ""
225
- }
226
- },
227
- "5c16553a2ff34a37a2cb62b4a4c42a6f": {
228
- "model_module": "@jupyter-widgets/base",
229
- "model_name": "LayoutModel",
230
- "model_module_version": "1.2.0",
231
- "state": {
232
- "_model_module": "@jupyter-widgets/base",
233
- "_model_module_version": "1.2.0",
234
- "_model_name": "LayoutModel",
235
- "_view_count": null,
236
- "_view_module": "@jupyter-widgets/base",
237
- "_view_module_version": "1.2.0",
238
- "_view_name": "LayoutView",
239
- "align_content": null,
240
- "align_items": null,
241
- "align_self": null,
242
- "border": null,
243
- "bottom": null,
244
- "display": null,
245
- "flex": null,
246
- "flex_flow": null,
247
- "grid_area": null,
248
- "grid_auto_columns": null,
249
- "grid_auto_flow": null,
250
- "grid_auto_rows": null,
251
- "grid_column": null,
252
- "grid_gap": null,
253
- "grid_row": null,
254
- "grid_template_areas": null,
255
- "grid_template_columns": null,
256
- "grid_template_rows": null,
257
- "height": null,
258
- "justify_content": null,
259
- "justify_items": null,
260
- "left": null,
261
- "margin": null,
262
- "max_height": null,
263
- "max_width": null,
264
- "min_height": null,
265
- "min_width": null,
266
- "object_fit": null,
267
- "object_position": null,
268
- "order": null,
269
- "overflow": null,
270
- "overflow_x": null,
271
- "overflow_y": null,
272
- "padding": null,
273
- "right": null,
274
- "top": null,
275
- "visibility": null,
276
- "width": null
277
- }
278
- },
279
- "34be83ddb4bf43e58cadbcbac5a606b7": {
280
- "model_module": "@jupyter-widgets/controls",
281
- "model_name": "ProgressStyleModel",
282
- "model_module_version": "1.5.0",
283
- "state": {
284
- "_model_module": "@jupyter-widgets/controls",
285
- "_model_module_version": "1.5.0",
286
- "_model_name": "ProgressStyleModel",
287
- "_view_count": null,
288
- "_view_module": "@jupyter-widgets/base",
289
- "_view_module_version": "1.2.0",
290
- "_view_name": "StyleView",
291
- "bar_color": null,
292
- "description_width": ""
293
- }
294
- },
295
- "0ce7bd7e52074f29b446ef2d4dd0921a": {
296
- "model_module": "@jupyter-widgets/base",
297
- "model_name": "LayoutModel",
298
- "model_module_version": "1.2.0",
299
- "state": {
300
- "_model_module": "@jupyter-widgets/base",
301
- "_model_module_version": "1.2.0",
302
- "_model_name": "LayoutModel",
303
- "_view_count": null,
304
- "_view_module": "@jupyter-widgets/base",
305
- "_view_module_version": "1.2.0",
306
- "_view_name": "LayoutView",
307
- "align_content": null,
308
- "align_items": null,
309
- "align_self": null,
310
- "border": null,
311
- "bottom": null,
312
- "display": null,
313
- "flex": null,
314
- "flex_flow": null,
315
- "grid_area": null,
316
- "grid_auto_columns": null,
317
- "grid_auto_flow": null,
318
- "grid_auto_rows": null,
319
- "grid_column": null,
320
- "grid_gap": null,
321
- "grid_row": null,
322
- "grid_template_areas": null,
323
- "grid_template_columns": null,
324
- "grid_template_rows": null,
325
- "height": null,
326
- "justify_content": null,
327
- "justify_items": null,
328
- "left": null,
329
- "margin": null,
330
- "max_height": null,
331
- "max_width": null,
332
- "min_height": null,
333
- "min_width": null,
334
- "object_fit": null,
335
- "object_position": null,
336
- "order": null,
337
- "overflow": null,
338
- "overflow_x": null,
339
- "overflow_y": null,
340
- "padding": null,
341
- "right": null,
342
- "top": null,
343
- "visibility": null,
344
- "width": null
345
- }
346
- },
347
- "7e2178d696c04d5787e736ace9ab57c0": {
348
- "model_module": "@jupyter-widgets/controls",
349
- "model_name": "DescriptionStyleModel",
350
- "model_module_version": "1.5.0",
351
- "state": {
352
- "_model_module": "@jupyter-widgets/controls",
353
- "_model_module_version": "1.5.0",
354
- "_model_name": "DescriptionStyleModel",
355
- "_view_count": null,
356
- "_view_module": "@jupyter-widgets/base",
357
- "_view_module_version": "1.2.0",
358
- "_view_name": "StyleView",
359
- "description_width": ""
360
- }
361
- },
362
- "3ff80bc2f64948408757caa8715d0603": {
363
- "model_module": "@jupyter-widgets/controls",
364
- "model_name": "HBoxModel",
365
- "model_module_version": "1.5.0",
366
- "state": {
367
- "_dom_classes": [],
368
- "_model_module": "@jupyter-widgets/controls",
369
- "_model_module_version": "1.5.0",
370
- "_model_name": "HBoxModel",
371
- "_view_count": null,
372
- "_view_module": "@jupyter-widgets/controls",
373
- "_view_module_version": "1.5.0",
374
- "_view_name": "HBoxView",
375
- "box_style": "",
376
- "children": [
377
- "IPY_MODEL_12aa8675bca54f05a6deb7ec7a5def7a",
378
- "IPY_MODEL_31a74feac76f4744a0f34fbc99433831",
379
- "IPY_MODEL_bd51d97e739a4e78ad28083043f638d8"
380
- ],
381
- "layout": "IPY_MODEL_062d36b5d0c043a597eb9b3ebd35f313"
382
- }
383
- },
384
- "12aa8675bca54f05a6deb7ec7a5def7a": {
385
- "model_module": "@jupyter-widgets/controls",
386
- "model_name": "HTMLModel",
387
- "model_module_version": "1.5.0",
388
- "state": {
389
- "_dom_classes": [],
390
- "_model_module": "@jupyter-widgets/controls",
391
- "_model_module_version": "1.5.0",
392
- "_model_name": "HTMLModel",
393
- "_view_count": null,
394
- "_view_module": "@jupyter-widgets/controls",
395
- "_view_module_version": "1.5.0",
396
- "_view_name": "HTMLView",
397
- "description": "",
398
- "description_tooltip": null,
399
- "layout": "IPY_MODEL_2c2223a6ae3e4ff6be96a5f4e2d2d9b6",
400
- "placeholder": "​",
401
- "style": "IPY_MODEL_f2c7be27f90b49a3abe51b5e3003c17d",
402
- "value": "config.json: 100%"
403
- }
404
- },
405
- "31a74feac76f4744a0f34fbc99433831": {
406
- "model_module": "@jupyter-widgets/controls",
407
- "model_name": "FloatProgressModel",
408
- "model_module_version": "1.5.0",
409
- "state": {
410
- "_dom_classes": [],
411
- "_model_module": "@jupyter-widgets/controls",
412
- "_model_module_version": "1.5.0",
413
- "_model_name": "FloatProgressModel",
414
- "_view_count": null,
415
- "_view_module": "@jupyter-widgets/controls",
416
- "_view_module_version": "1.5.0",
417
- "_view_name": "ProgressView",
418
- "bar_style": "success",
419
- "description": "",
420
- "description_tooltip": null,
421
- "layout": "IPY_MODEL_76d1f15c857640c3b06d98aef478f234",
422
- "max": 744,
423
- "min": 0,
424
- "orientation": "horizontal",
425
- "style": "IPY_MODEL_d43089f8240c44339c6881355ff0aee3",
426
- "value": 744
427
- }
428
- },
429
- "bd51d97e739a4e78ad28083043f638d8": {
430
- "model_module": "@jupyter-widgets/controls",
431
- "model_name": "HTMLModel",
432
- "model_module_version": "1.5.0",
433
- "state": {
434
- "_dom_classes": [],
435
- "_model_module": "@jupyter-widgets/controls",
436
- "_model_module_version": "1.5.0",
437
- "_model_name": "HTMLModel",
438
- "_view_count": null,
439
- "_view_module": "@jupyter-widgets/controls",
440
- "_view_module_version": "1.5.0",
441
- "_view_name": "HTMLView",
442
- "description": "",
443
- "description_tooltip": null,
444
- "layout": "IPY_MODEL_a139b85557a942b9b5d32b9d7def3e50",
445
- "placeholder": "​",
446
- "style": "IPY_MODEL_92043bfce97e4629bf9e4b268aa88c11",
447
- "value": " 744/744 [00:00<00:00, 93.7kB/s]"
448
- }
449
- },
450
- "062d36b5d0c043a597eb9b3ebd35f313": {
451
- "model_module": "@jupyter-widgets/base",
452
- "model_name": "LayoutModel",
453
- "model_module_version": "1.2.0",
454
- "state": {
455
- "_model_module": "@jupyter-widgets/base",
456
- "_model_module_version": "1.2.0",
457
- "_model_name": "LayoutModel",
458
- "_view_count": null,
459
- "_view_module": "@jupyter-widgets/base",
460
- "_view_module_version": "1.2.0",
461
- "_view_name": "LayoutView",
462
- "align_content": null,
463
- "align_items": null,
464
- "align_self": null,
465
- "border": null,
466
- "bottom": null,
467
- "display": null,
468
- "flex": null,
469
- "flex_flow": null,
470
- "grid_area": null,
471
- "grid_auto_columns": null,
472
- "grid_auto_flow": null,
473
- "grid_auto_rows": null,
474
- "grid_column": null,
475
- "grid_gap": null,
476
- "grid_row": null,
477
- "grid_template_areas": null,
478
- "grid_template_columns": null,
479
- "grid_template_rows": null,
480
- "height": null,
481
- "justify_content": null,
482
- "justify_items": null,
483
- "left": null,
484
- "margin": null,
485
- "max_height": null,
486
- "max_width": null,
487
- "min_height": null,
488
- "min_width": null,
489
- "object_fit": null,
490
- "object_position": null,
491
- "order": null,
492
- "overflow": null,
493
- "overflow_x": null,
494
- "overflow_y": null,
495
- "padding": null,
496
- "right": null,
497
- "top": null,
498
- "visibility": null,
499
- "width": null
500
- }
501
- },
502
- "2c2223a6ae3e4ff6be96a5f4e2d2d9b6": {
503
- "model_module": "@jupyter-widgets/base",
504
- "model_name": "LayoutModel",
505
- "model_module_version": "1.2.0",
506
- "state": {
507
- "_model_module": "@jupyter-widgets/base",
508
- "_model_module_version": "1.2.0",
509
- "_model_name": "LayoutModel",
510
- "_view_count": null,
511
- "_view_module": "@jupyter-widgets/base",
512
- "_view_module_version": "1.2.0",
513
- "_view_name": "LayoutView",
514
- "align_content": null,
515
- "align_items": null,
516
- "align_self": null,
517
- "border": null,
518
- "bottom": null,
519
- "display": null,
520
- "flex": null,
521
- "flex_flow": null,
522
- "grid_area": null,
523
- "grid_auto_columns": null,
524
- "grid_auto_flow": null,
525
- "grid_auto_rows": null,
526
- "grid_column": null,
527
- "grid_gap": null,
528
- "grid_row": null,
529
- "grid_template_areas": null,
530
- "grid_template_columns": null,
531
- "grid_template_rows": null,
532
- "height": null,
533
- "justify_content": null,
534
- "justify_items": null,
535
- "left": null,
536
- "margin": null,
537
- "max_height": null,
538
- "max_width": null,
539
- "min_height": null,
540
- "min_width": null,
541
- "object_fit": null,
542
- "object_position": null,
543
- "order": null,
544
- "overflow": null,
545
- "overflow_x": null,
546
- "overflow_y": null,
547
- "padding": null,
548
- "right": null,
549
- "top": null,
550
- "visibility": null,
551
- "width": null
552
- }
553
- },
554
- "f2c7be27f90b49a3abe51b5e3003c17d": {
555
- "model_module": "@jupyter-widgets/controls",
556
- "model_name": "DescriptionStyleModel",
557
- "model_module_version": "1.5.0",
558
- "state": {
559
- "_model_module": "@jupyter-widgets/controls",
560
- "_model_module_version": "1.5.0",
561
- "_model_name": "DescriptionStyleModel",
562
- "_view_count": null,
563
- "_view_module": "@jupyter-widgets/base",
564
- "_view_module_version": "1.2.0",
565
- "_view_name": "StyleView",
566
- "description_width": ""
567
- }
568
- },
569
- "76d1f15c857640c3b06d98aef478f234": {
570
- "model_module": "@jupyter-widgets/base",
571
- "model_name": "LayoutModel",
572
- "model_module_version": "1.2.0",
573
- "state": {
574
- "_model_module": "@jupyter-widgets/base",
575
- "_model_module_version": "1.2.0",
576
- "_model_name": "LayoutModel",
577
- "_view_count": null,
578
- "_view_module": "@jupyter-widgets/base",
579
- "_view_module_version": "1.2.0",
580
- "_view_name": "LayoutView",
581
- "align_content": null,
582
- "align_items": null,
583
- "align_self": null,
584
- "border": null,
585
- "bottom": null,
586
- "display": null,
587
- "flex": null,
588
- "flex_flow": null,
589
- "grid_area": null,
590
- "grid_auto_columns": null,
591
- "grid_auto_flow": null,
592
- "grid_auto_rows": null,
593
- "grid_column": null,
594
- "grid_gap": null,
595
- "grid_row": null,
596
- "grid_template_areas": null,
597
- "grid_template_columns": null,
598
- "grid_template_rows": null,
599
- "height": null,
600
- "justify_content": null,
601
- "justify_items": null,
602
- "left": null,
603
- "margin": null,
604
- "max_height": null,
605
- "max_width": null,
606
- "min_height": null,
607
- "min_width": null,
608
- "object_fit": null,
609
- "object_position": null,
610
- "order": null,
611
- "overflow": null,
612
- "overflow_x": null,
613
- "overflow_y": null,
614
- "padding": null,
615
- "right": null,
616
- "top": null,
617
- "visibility": null,
618
- "width": null
619
- }
620
- },
621
- "d43089f8240c44339c6881355ff0aee3": {
622
- "model_module": "@jupyter-widgets/controls",
623
- "model_name": "ProgressStyleModel",
624
- "model_module_version": "1.5.0",
625
- "state": {
626
- "_model_module": "@jupyter-widgets/controls",
627
- "_model_module_version": "1.5.0",
628
- "_model_name": "ProgressStyleModel",
629
- "_view_count": null,
630
- "_view_module": "@jupyter-widgets/base",
631
- "_view_module_version": "1.2.0",
632
- "_view_name": "StyleView",
633
- "bar_color": null,
634
- "description_width": ""
635
- }
636
- },
637
- "a139b85557a942b9b5d32b9d7def3e50": {
638
- "model_module": "@jupyter-widgets/base",
639
- "model_name": "LayoutModel",
640
- "model_module_version": "1.2.0",
641
- "state": {
642
- "_model_module": "@jupyter-widgets/base",
643
- "_model_module_version": "1.2.0",
644
- "_model_name": "LayoutModel",
645
- "_view_count": null,
646
- "_view_module": "@jupyter-widgets/base",
647
- "_view_module_version": "1.2.0",
648
- "_view_name": "LayoutView",
649
- "align_content": null,
650
- "align_items": null,
651
- "align_self": null,
652
- "border": null,
653
- "bottom": null,
654
- "display": null,
655
- "flex": null,
656
- "flex_flow": null,
657
- "grid_area": null,
658
- "grid_auto_columns": null,
659
- "grid_auto_flow": null,
660
- "grid_auto_rows": null,
661
- "grid_column": null,
662
- "grid_gap": null,
663
- "grid_row": null,
664
- "grid_template_areas": null,
665
- "grid_template_columns": null,
666
- "grid_template_rows": null,
667
- "height": null,
668
- "justify_content": null,
669
- "justify_items": null,
670
- "left": null,
671
- "margin": null,
672
- "max_height": null,
673
- "max_width": null,
674
- "min_height": null,
675
- "min_width": null,
676
- "object_fit": null,
677
- "object_position": null,
678
- "order": null,
679
- "overflow": null,
680
- "overflow_x": null,
681
- "overflow_y": null,
682
- "padding": null,
683
- "right": null,
684
- "top": null,
685
- "visibility": null,
686
- "width": null
687
- }
688
- },
689
- "92043bfce97e4629bf9e4b268aa88c11": {
690
- "model_module": "@jupyter-widgets/controls",
691
- "model_name": "DescriptionStyleModel",
692
- "model_module_version": "1.5.0",
693
- "state": {
694
- "_model_module": "@jupyter-widgets/controls",
695
- "_model_module_version": "1.5.0",
696
- "_model_name": "DescriptionStyleModel",
697
- "_view_count": null,
698
- "_view_module": "@jupyter-widgets/base",
699
- "_view_module_version": "1.2.0",
700
- "_view_name": "StyleView",
701
- "description_width": ""
702
- }
703
- },
704
- "f20b3989658642528f4ed91666320097": {
705
- "model_module": "@jupyter-widgets/controls",
706
- "model_name": "HBoxModel",
707
- "model_module_version": "1.5.0",
708
- "state": {
709
- "_dom_classes": [],
710
- "_model_module": "@jupyter-widgets/controls",
711
- "_model_module_version": "1.5.0",
712
- "_model_name": "HBoxModel",
713
- "_view_count": null,
714
- "_view_module": "@jupyter-widgets/controls",
715
- "_view_module_version": "1.5.0",
716
- "_view_name": "HBoxView",
717
- "box_style": "",
718
- "children": [
719
- "IPY_MODEL_3ee9921a635d44ec9b248e2155b5b243",
720
- "IPY_MODEL_caf0790dbf2544378cb04aa8eb3098c3",
721
- "IPY_MODEL_3ff0fc5ce62a44b9950dd8575d90bd21"
722
- ],
723
- "layout": "IPY_MODEL_77cdafc6dae44107a43a46ae19ed390a"
724
- }
725
- },
726
- "3ee9921a635d44ec9b248e2155b5b243": {
727
- "model_module": "@jupyter-widgets/controls",
728
- "model_name": "HTMLModel",
729
- "model_module_version": "1.5.0",
730
- "state": {
731
- "_dom_classes": [],
732
- "_model_module": "@jupyter-widgets/controls",
733
- "_model_module_version": "1.5.0",
734
- "_model_name": "HTMLModel",
735
- "_view_count": null,
736
- "_view_module": "@jupyter-widgets/controls",
737
- "_view_module_version": "1.5.0",
738
- "_view_name": "HTMLView",
739
- "description": "",
740
- "description_tooltip": null,
741
- "layout": "IPY_MODEL_65d8b73e3bdd46fca8a42b67739e27f9",
742
- "placeholder": "​",
743
- "style": "IPY_MODEL_b566321171044b0eb02ea3bd8c0472df",
744
- "value": "model.safetensors: 100%"
745
- }
746
- },
747
- "caf0790dbf2544378cb04aa8eb3098c3": {
748
- "model_module": "@jupyter-widgets/controls",
749
- "model_name": "FloatProgressModel",
750
- "model_module_version": "1.5.0",
751
- "state": {
752
- "_dom_classes": [],
753
- "_model_module": "@jupyter-widgets/controls",
754
- "_model_module_version": "1.5.0",
755
- "_model_name": "FloatProgressModel",
756
- "_view_count": null,
757
- "_view_module": "@jupyter-widgets/controls",
758
- "_view_module_version": "1.5.0",
759
- "_view_name": "ProgressView",
760
- "bar_style": "success",
761
- "description": "",
762
- "description_tooltip": null,
763
- "layout": "IPY_MODEL_62535e046f794a28b4002c3f34fe7ff7",
764
- "max": 3362432800,
765
- "min": 0,
766
- "orientation": "horizontal",
767
- "style": "IPY_MODEL_663aa65fdb4e4349b2815b6bafce4dcd",
768
- "value": 3362432800
769
- }
770
- },
771
- "3ff0fc5ce62a44b9950dd8575d90bd21": {
772
- "model_module": "@jupyter-widgets/controls",
773
- "model_name": "HTMLModel",
774
- "model_module_version": "1.5.0",
775
- "state": {
776
- "_dom_classes": [],
777
- "_model_module": "@jupyter-widgets/controls",
778
- "_model_module_version": "1.5.0",
779
- "_model_name": "HTMLModel",
780
- "_view_count": null,
781
- "_view_module": "@jupyter-widgets/controls",
782
- "_view_module_version": "1.5.0",
783
- "_view_name": "HTMLView",
784
- "description": "",
785
- "description_tooltip": null,
786
- "layout": "IPY_MODEL_8410c9d15bca4c9f8b3aab2b7d327211",
787
- "placeholder": "​",
788
- "style": "IPY_MODEL_fb359d0651a74fe790aaace9a5d0e329",
789
- "value": " 3.36G/3.36G [00:18<00:00, 296MB/s]"
790
- }
791
- },
792
- "77cdafc6dae44107a43a46ae19ed390a": {
793
- "model_module": "@jupyter-widgets/base",
794
- "model_name": "LayoutModel",
795
- "model_module_version": "1.2.0",
796
- "state": {
797
- "_model_module": "@jupyter-widgets/base",
798
- "_model_module_version": "1.2.0",
799
- "_model_name": "LayoutModel",
800
- "_view_count": null,
801
- "_view_module": "@jupyter-widgets/base",
802
- "_view_module_version": "1.2.0",
803
- "_view_name": "LayoutView",
804
- "align_content": null,
805
- "align_items": null,
806
- "align_self": null,
807
- "border": null,
808
- "bottom": null,
809
- "display": null,
810
- "flex": null,
811
- "flex_flow": null,
812
- "grid_area": null,
813
- "grid_auto_columns": null,
814
- "grid_auto_flow": null,
815
- "grid_auto_rows": null,
816
- "grid_column": null,
817
- "grid_gap": null,
818
- "grid_row": null,
819
- "grid_template_areas": null,
820
- "grid_template_columns": null,
821
- "grid_template_rows": null,
822
- "height": null,
823
- "justify_content": null,
824
- "justify_items": null,
825
- "left": null,
826
- "margin": null,
827
- "max_height": null,
828
- "max_width": null,
829
- "min_height": null,
830
- "min_width": null,
831
- "object_fit": null,
832
- "object_position": null,
833
- "order": null,
834
- "overflow": null,
835
- "overflow_x": null,
836
- "overflow_y": null,
837
- "padding": null,
838
- "right": null,
839
- "top": null,
840
- "visibility": null,
841
- "width": null
842
- }
843
- },
844
- "65d8b73e3bdd46fca8a42b67739e27f9": {
845
- "model_module": "@jupyter-widgets/base",
846
- "model_name": "LayoutModel",
847
- "model_module_version": "1.2.0",
848
- "state": {
849
- "_model_module": "@jupyter-widgets/base",
850
- "_model_module_version": "1.2.0",
851
- "_model_name": "LayoutModel",
852
- "_view_count": null,
853
- "_view_module": "@jupyter-widgets/base",
854
- "_view_module_version": "1.2.0",
855
- "_view_name": "LayoutView",
856
- "align_content": null,
857
- "align_items": null,
858
- "align_self": null,
859
- "border": null,
860
- "bottom": null,
861
- "display": null,
862
- "flex": null,
863
- "flex_flow": null,
864
- "grid_area": null,
865
- "grid_auto_columns": null,
866
- "grid_auto_flow": null,
867
- "grid_auto_rows": null,
868
- "grid_column": null,
869
- "grid_gap": null,
870
- "grid_row": null,
871
- "grid_template_areas": null,
872
- "grid_template_columns": null,
873
- "grid_template_rows": null,
874
- "height": null,
875
- "justify_content": null,
876
- "justify_items": null,
877
- "left": null,
878
- "margin": null,
879
- "max_height": null,
880
- "max_width": null,
881
- "min_height": null,
882
- "min_width": null,
883
- "object_fit": null,
884
- "object_position": null,
885
- "order": null,
886
- "overflow": null,
887
- "overflow_x": null,
888
- "overflow_y": null,
889
- "padding": null,
890
- "right": null,
891
- "top": null,
892
- "visibility": null,
893
- "width": null
894
- }
895
- },
896
- "b566321171044b0eb02ea3bd8c0472df": {
897
- "model_module": "@jupyter-widgets/controls",
898
- "model_name": "DescriptionStyleModel",
899
- "model_module_version": "1.5.0",
900
- "state": {
901
- "_model_module": "@jupyter-widgets/controls",
902
- "_model_module_version": "1.5.0",
903
- "_model_name": "DescriptionStyleModel",
904
- "_view_count": null,
905
- "_view_module": "@jupyter-widgets/base",
906
- "_view_module_version": "1.2.0",
907
- "_view_name": "StyleView",
908
- "description_width": ""
909
- }
910
- },
911
- "62535e046f794a28b4002c3f34fe7ff7": {
912
- "model_module": "@jupyter-widgets/base",
913
- "model_name": "LayoutModel",
914
- "model_module_version": "1.2.0",
915
- "state": {
916
- "_model_module": "@jupyter-widgets/base",
917
- "_model_module_version": "1.2.0",
918
- "_model_name": "LayoutModel",
919
- "_view_count": null,
920
- "_view_module": "@jupyter-widgets/base",
921
- "_view_module_version": "1.2.0",
922
- "_view_name": "LayoutView",
923
- "align_content": null,
924
- "align_items": null,
925
- "align_self": null,
926
- "border": null,
927
- "bottom": null,
928
- "display": null,
929
- "flex": null,
930
- "flex_flow": null,
931
- "grid_area": null,
932
- "grid_auto_columns": null,
933
- "grid_auto_flow": null,
934
- "grid_auto_rows": null,
935
- "grid_column": null,
936
- "grid_gap": null,
937
- "grid_row": null,
938
- "grid_template_areas": null,
939
- "grid_template_columns": null,
940
- "grid_template_rows": null,
941
- "height": null,
942
- "justify_content": null,
943
- "justify_items": null,
944
- "left": null,
945
- "margin": null,
946
- "max_height": null,
947
- "max_width": null,
948
- "min_height": null,
949
- "min_width": null,
950
- "object_fit": null,
951
- "object_position": null,
952
- "order": null,
953
- "overflow": null,
954
- "overflow_x": null,
955
- "overflow_y": null,
956
- "padding": null,
957
- "right": null,
958
- "top": null,
959
- "visibility": null,
960
- "width": null
961
- }
962
- },
963
- "663aa65fdb4e4349b2815b6bafce4dcd": {
964
- "model_module": "@jupyter-widgets/controls",
965
- "model_name": "ProgressStyleModel",
966
- "model_module_version": "1.5.0",
967
- "state": {
968
- "_model_module": "@jupyter-widgets/controls",
969
- "_model_module_version": "1.5.0",
970
- "_model_name": "ProgressStyleModel",
971
- "_view_count": null,
972
- "_view_module": "@jupyter-widgets/base",
973
- "_view_module_version": "1.2.0",
974
- "_view_name": "StyleView",
975
- "bar_color": null,
976
- "description_width": ""
977
- }
978
- },
979
- "8410c9d15bca4c9f8b3aab2b7d327211": {
980
- "model_module": "@jupyter-widgets/base",
981
- "model_name": "LayoutModel",
982
- "model_module_version": "1.2.0",
983
- "state": {
984
- "_model_module": "@jupyter-widgets/base",
985
- "_model_module_version": "1.2.0",
986
- "_model_name": "LayoutModel",
987
- "_view_count": null,
988
- "_view_module": "@jupyter-widgets/base",
989
- "_view_module_version": "1.2.0",
990
- "_view_name": "LayoutView",
991
- "align_content": null,
992
- "align_items": null,
993
- "align_self": null,
994
- "border": null,
995
- "bottom": null,
996
- "display": null,
997
- "flex": null,
998
- "flex_flow": null,
999
- "grid_area": null,
1000
- "grid_auto_columns": null,
1001
- "grid_auto_flow": null,
1002
- "grid_auto_rows": null,
1003
- "grid_column": null,
1004
- "grid_gap": null,
1005
- "grid_row": null,
1006
- "grid_template_areas": null,
1007
- "grid_template_columns": null,
1008
- "grid_template_rows": null,
1009
- "height": null,
1010
- "justify_content": null,
1011
- "justify_items": null,
1012
- "left": null,
1013
- "margin": null,
1014
- "max_height": null,
1015
- "max_width": null,
1016
- "min_height": null,
1017
- "min_width": null,
1018
- "object_fit": null,
1019
- "object_position": null,
1020
- "order": null,
1021
- "overflow": null,
1022
- "overflow_x": null,
1023
- "overflow_y": null,
1024
- "padding": null,
1025
- "right": null,
1026
- "top": null,
1027
- "visibility": null,
1028
- "width": null
1029
- }
1030
- },
1031
- "fb359d0651a74fe790aaace9a5d0e329": {
1032
- "model_module": "@jupyter-widgets/controls",
1033
- "model_name": "DescriptionStyleModel",
1034
- "model_module_version": "1.5.0",
1035
- "state": {
1036
- "_model_module": "@jupyter-widgets/controls",
1037
- "_model_module_version": "1.5.0",
1038
- "_model_name": "DescriptionStyleModel",
1039
- "_view_count": null,
1040
- "_view_module": "@jupyter-widgets/base",
1041
- "_view_module_version": "1.2.0",
1042
- "_view_name": "StyleView",
1043
- "description_width": ""
1044
- }
1045
- }
1046
- }
1047
- }
1048
- },
1049
- "cells": [
1050
- {
1051
- "cell_type": "markdown",
1052
- "source": [
1053
- "## DINOv3 Fine-tuning for Image Classification"
1054
- ],
1055
- "metadata": {
1056
- "id": "BCTUDjwiYn6T"
1057
- }
1058
- },
1059
- {
1060
- "cell_type": "code",
1061
- "source": [
1062
- "!pip install -q trackio git+https://github.com/huggingface/transformers.git"
1063
- ],
1064
- "metadata": {
1065
- "colab": {
1066
- "base_uri": "https://localhost:8080/"
1067
- },
1068
- "id": "Aa1zPoxo_JBf",
1069
- "outputId": "958700f5-189c-42ec-dfc5-9852e5efe368"
1070
- },
1071
- "execution_count": 1,
1072
- "outputs": [
1073
- {
1074
- "output_type": "stream",
1075
- "name": "stdout",
1076
- "text": [
1077
- " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
1078
- " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
1079
- " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
1080
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m838.5/838.5 kB\u001b[0m \u001b[31m41.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
1081
- "\u001b[?25h Building wheel for transformers (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n"
1082
- ]
1083
- }
1084
- ]
1085
- },
1086
- {
1087
- "cell_type": "markdown",
1088
- "source": [
1089
- "## Dataset"
1090
- ],
1091
- "metadata": {
1092
- "id": "5AJ3YVCE8S9Y"
1093
- }
1094
- },
1095
- {
1096
- "cell_type": "markdown",
1097
- "source": [
1098
- "We will do a very small run on food101 dataset."
1099
- ],
1100
- "metadata": {
1101
- "id": "s_Aabbb6VBZt"
1102
- }
1103
- },
1104
- {
1105
- "cell_type": "code",
1106
- "source": [
1107
- "from datasets import load_dataset\n",
1108
- "\n",
1109
- "ds = load_dataset(\"ethz/food101\")\n",
1110
- "\n",
1111
- "train_ds = ds[\"train\"]\n",
1112
- "train_ds = train_ds.shuffle().train_test_split(test_size=0.9)[\"train\"]\n",
1113
- "val_ds = ds[\"validation\"].shuffle().train_test_split(test_size=0.9)[\"train\"]"
1114
- ],
1115
- "metadata": {
1116
- "id": "Cxzbngbq4K31"
1117
- },
1118
- "execution_count": 7,
1119
- "outputs": []
1120
- },
1121
- {
1122
- "cell_type": "code",
1123
- "source": [
1124
- "train_ds"
1125
- ],
1126
- "metadata": {
1127
- "colab": {
1128
- "base_uri": "https://localhost:8080/"
1129
- },
1130
- "id": "g1wl86sp8L6C",
1131
- "outputId": "1b42f43f-df62-4eba-f469-54cabd232cf9"
1132
- },
1133
- "execution_count": 8,
1134
- "outputs": [
1135
- {
1136
- "output_type": "execute_result",
1137
- "data": {
1138
- "text/plain": [
1139
- "Dataset({\n",
1140
- " features: ['image', 'label'],\n",
1141
- " num_rows: 7575\n",
1142
- "})"
1143
- ]
1144
- },
1145
- "metadata": {},
1146
- "execution_count": 8
1147
- }
1148
- ]
1149
- },
1150
- {
1151
- "cell_type": "code",
1152
- "source": [
1153
- "val_ds"
1154
- ],
1155
- "metadata": {
1156
- "colab": {
1157
- "base_uri": "https://localhost:8080/"
1158
- },
1159
- "id": "Tq5OiKxvVj9k",
1160
- "outputId": "391489ba-d95f-498a-b4bb-f959e19686b0"
1161
- },
1162
- "execution_count": 9,
1163
- "outputs": [
1164
- {
1165
- "output_type": "execute_result",
1166
- "data": {
1167
- "text/plain": [
1168
- "Dataset({\n",
1169
- " features: ['image', 'label'],\n",
1170
- " num_rows: 2525\n",
1171
- "})"
1172
- ]
1173
- },
1174
- "metadata": {},
1175
- "execution_count": 9
1176
- }
1177
- ]
1178
- },
1179
- {
1180
- "cell_type": "code",
1181
- "source": [
1182
- "num_classes = train_ds.features[\"label\"].num_classes\n",
1183
- "id2label = {i: name for i, name in enumerate(train_ds.features[\"label\"].names)}\n",
1184
- "label2id = {v: k for k, v in id2label.items()}\n",
1185
- "print(f\"Classes: {num_classes}\")"
1186
- ],
1187
- "metadata": {
1188
- "colab": {
1189
- "base_uri": "https://localhost:8080/"
1190
- },
1191
- "id": "1JcvDPFK8Scd",
1192
- "outputId": "5c920e23-e96b-4c62-bf3a-7db183c97f48"
1193
- },
1194
- "execution_count": 10,
1195
- "outputs": [
1196
- {
1197
- "output_type": "stream",
1198
- "name": "stdout",
1199
- "text": [
1200
- "Classes: 101\n"
1201
- ]
1202
- }
1203
- ]
1204
- },
1205
- {
1206
- "cell_type": "markdown",
1207
- "source": [
1208
- "## Load Model\n",
1209
- "\n",
1210
- "This model doesn't come with a head, so we need to write the headed model class."
1211
- ],
1212
- "metadata": {
1213
- "id": "_69A3AmO81c8"
1214
- }
1215
- },
1216
- {
1217
- "cell_type": "code",
1218
- "source": [
1219
- "import torch.nn as nn\n",
1220
- "import torch\n",
1221
- "from transformers import AutoImageProcessor, AutoModel, get_cosine_schedule_with_warmup\n",
1222
- "\n",
1223
- "MODEL_NAME = \"facebook/dinov3-vith16plus-pretrain-lvd1689m\"\n",
1224
- "\n",
1225
- "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
1226
- "\n",
1227
- "\n",
1228
- "image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)\n",
1229
- "backbone = AutoModel.from_pretrained(MODEL_NAME)\n",
1230
- "\n",
1231
- "hidden_size = getattr(backbone.config, \"hidden_size\", None)\n",
1232
- "\n",
1233
- "class DinoV3Linear(nn.Module):\n",
1234
- " def __init__(self, backbone: AutoModel, hidden_size: int, num_classes: int, freeze_backbone: bool = True):\n",
1235
- " super().__init__()\n",
1236
- " self.backbone = backbone\n",
1237
- " if freeze_backbone:\n",
1238
- " for p in self.backbone.parameters():\n",
1239
- " p.requires_grad = False\n",
1240
- " self.backbone.eval()\n",
1241
- "\n",
1242
- " self.head = nn.Linear(hidden_size, num_classes)\n",
1243
- "\n",
1244
- " def forward(self, pixel_values):\n",
1245
- " outputs = self.backbone(pixel_values=pixel_values)\n",
1246
- " last_hidden = outputs.last_hidden_state\n",
1247
- " cls = last_hidden[:, 0]\n",
1248
- " logits = self.head(cls)\n",
1249
- " return logits\n",
1250
- "\n",
1251
- "model = DinoV3Linear(backbone, hidden_size, num_classes, freeze_backbone=True).to(device) # we only train the head"
1252
- ],
1253
- "metadata": {
1254
- "colab": {
1255
- "base_uri": "https://localhost:8080/",
1256
- "height": 113,
1257
- "referenced_widgets": [
1258
- "32138245d41348928cc5b5834b07cb7e",
1259
- "df6de04fdb204d348767dd0b2d0e88f7",
1260
- "63a3800d62dd41d6b4a3f643a8930d95",
1261
- "49d67bd205184874a5cee04d318d91fe",
1262
- "f00ace964f96471b9eb839cce48ce378",
1263
- "3ad0ac8def244930a3aff41d68a88a65",
1264
- "7464841c193d492685bb929b1c0d230c",
1265
- "5c16553a2ff34a37a2cb62b4a4c42a6f",
1266
- "34be83ddb4bf43e58cadbcbac5a606b7",
1267
- "0ce7bd7e52074f29b446ef2d4dd0921a",
1268
- "7e2178d696c04d5787e736ace9ab57c0",
1269
- "3ff80bc2f64948408757caa8715d0603",
1270
- "12aa8675bca54f05a6deb7ec7a5def7a",
1271
- "31a74feac76f4744a0f34fbc99433831",
1272
- "bd51d97e739a4e78ad28083043f638d8",
1273
- "062d36b5d0c043a597eb9b3ebd35f313",
1274
- "2c2223a6ae3e4ff6be96a5f4e2d2d9b6",
1275
- "f2c7be27f90b49a3abe51b5e3003c17d",
1276
- "76d1f15c857640c3b06d98aef478f234",
1277
- "d43089f8240c44339c6881355ff0aee3",
1278
- "a139b85557a942b9b5d32b9d7def3e50",
1279
- "92043bfce97e4629bf9e4b268aa88c11",
1280
- "f20b3989658642528f4ed91666320097",
1281
- "3ee9921a635d44ec9b248e2155b5b243",
1282
- "caf0790dbf2544378cb04aa8eb3098c3",
1283
- "3ff0fc5ce62a44b9950dd8575d90bd21",
1284
- "77cdafc6dae44107a43a46ae19ed390a",
1285
- "65d8b73e3bdd46fca8a42b67739e27f9",
1286
- "b566321171044b0eb02ea3bd8c0472df",
1287
- "62535e046f794a28b4002c3f34fe7ff7",
1288
- "663aa65fdb4e4349b2815b6bafce4dcd",
1289
- "8410c9d15bca4c9f8b3aab2b7d327211",
1290
- "fb359d0651a74fe790aaace9a5d0e329"
1291
- ]
1292
- },
1293
- "id": "_oqXAu_y81H4",
1294
- "outputId": "7c4a4f6f-2301-4a43-eecb-50f1adb004b9"
1295
- },
1296
- "execution_count": 11,
1297
- "outputs": [
1298
- {
1299
- "output_type": "display_data",
1300
- "data": {
1301
- "text/plain": [
1302
- "preprocessor_config.json: 0%| | 0.00/585 [00:00<?, ?B/s]"
1303
- ],
1304
- "application/vnd.jupyter.widget-view+json": {
1305
- "version_major": 2,
1306
- "version_minor": 0,
1307
- "model_id": "32138245d41348928cc5b5834b07cb7e"
1308
- }
1309
- },
1310
- "metadata": {}
1311
- },
1312
- {
1313
- "output_type": "display_data",
1314
- "data": {
1315
- "text/plain": [
1316
- "config.json: 0%| | 0.00/744 [00:00<?, ?B/s]"
1317
- ],
1318
- "application/vnd.jupyter.widget-view+json": {
1319
- "version_major": 2,
1320
- "version_minor": 0,
1321
- "model_id": "3ff80bc2f64948408757caa8715d0603"
1322
- }
1323
- },
1324
- "metadata": {}
1325
- },
1326
- {
1327
- "output_type": "display_data",
1328
- "data": {
1329
- "text/plain": [
1330
- "model.safetensors: 0%| | 0.00/3.36G [00:00<?, ?B/s]"
1331
- ],
1332
- "application/vnd.jupyter.widget-view+json": {
1333
- "version_major": 2,
1334
- "version_minor": 0,
1335
- "model_id": "f20b3989658642528f4ed91666320097"
1336
- }
1337
- },
1338
- "metadata": {}
1339
- }
1340
- ]
1341
- },
1342
- {
1343
- "cell_type": "markdown",
1344
- "source": [
1345
- "Write the data collator to batch inputs and dataloaders for training."
1346
- ],
1347
- "metadata": {
1348
- "id": "IfC3TFbw9SlZ"
1349
- }
1350
- },
1351
- {
1352
- "cell_type": "code",
1353
- "source": [
1354
- "from dataclasses import dataclass\n",
1355
- "from PIL import Image\n",
1356
- "import numpy as np\n",
1357
- "import torch\n",
1358
- "from transformers import AutoImageProcessor\n",
1359
- "\n",
1360
- "@dataclass\n",
1361
- "class Collator:\n",
1362
- " processor: AutoImageProcessor\n",
1363
- "\n",
1364
- " def __call__(self, batch):\n",
1365
- " raw_images = [x[\"image\"] for x in batch]\n",
1366
- " labels = torch.tensor([x[\"label\"] for x in batch], dtype=torch.long)\n",
1367
- "\n",
1368
- " rgb_images = []\n",
1369
- " # there's grayscale images in the dataset\n",
1370
- " for im in raw_images:\n",
1371
- " if isinstance(im, Image.Image):\n",
1372
- " rgb_images.append(im.convert(\"RGB\"))\n",
1373
- " continue\n",
1374
- "\n",
1375
- " inputs = self.processor(images=rgb_images, return_tensors=\"pt\")\n",
1376
- " return {\"pixel_values\": inputs[\"pixel_values\"], \"labels\": labels}\n",
1377
- "\n",
1378
- "collate_fn = Collator(image_processor)"
1379
- ],
1380
- "metadata": {
1381
- "id": "Wlo3_8qE9SVR"
1382
- },
1383
- "execution_count": 12,
1384
- "outputs": []
1385
- },
1386
- {
1387
- "cell_type": "code",
1388
- "source": [
1389
- "from torch.utils.data import DataLoader\n",
1390
- "import os\n",
1391
- "\n",
1392
- "BATCH_SIZE = 8\n",
1393
- "NUM_WORKERS = min(8, os.cpu_count() or 2)\n",
1394
- "\n",
1395
- "train_loader = DataLoader(\n",
1396
- " train_ds,\n",
1397
- " batch_size=BATCH_SIZE,\n",
1398
- " shuffle=True,\n",
1399
- " num_workers=NUM_WORKERS,\n",
1400
- " pin_memory=True,\n",
1401
- " collate_fn=collate_fn,\n",
1402
- ")\n",
1403
- "val_loader = DataLoader(\n",
1404
- " val_ds,\n",
1405
- " batch_size=BATCH_SIZE,\n",
1406
- " shuffle=False,\n",
1407
- " num_workers=NUM_WORKERS,\n",
1408
- " pin_memory=True,\n",
1409
- " collate_fn=collate_fn,\n",
1410
- ")"
1411
- ],
1412
- "metadata": {
1413
- "id": "Nou-Ct_e9zV5"
1414
- },
1415
- "execution_count": 13,
1416
- "outputs": []
1417
- },
1418
- {
1419
- "cell_type": "markdown",
1420
- "source": [
1421
- "## Training"
1422
- ],
1423
- "metadata": {
1424
- "id": "RblgS11W-Wuo"
1425
- }
1426
- },
1427
- {
1428
- "cell_type": "markdown",
1429
- "source": [
1430
- "Find config below."
1431
- ],
1432
- "metadata": {
1433
- "id": "25sCxjwG_tPo"
1434
- }
1435
- },
1436
- {
1437
- "cell_type": "code",
1438
- "source": [
1439
- "import math\n",
1440
- "import random\n",
1441
- "from typing import List, Dict, Any\n",
1442
- "\n",
1443
- "\n",
1444
- "EPOCHS = 5\n",
1445
- "LR = 5e-4\n",
1446
- "WEIGHT_DECAY = 1e-4\n",
1447
- "WARMUP_RATIO = 0.05\n",
1448
- "CHECKPOINT_DIR = \"./checkpoints_dinov3_food101\"\n",
1449
- "EVAL_EVERY_STEPS = 100\n",
1450
- "\n",
1451
- "optimizer = torch.optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=LR, weight_decay=WEIGHT_DECAY)\n",
1452
- "total_steps = EPOCHS * math.ceil(len(train_loader))\n",
1453
- "warmup_steps = int(WARMUP_RATIO * total_steps)\n",
1454
- "scheduler = get_cosine_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=total_steps)\n",
1455
- "criterion = nn.CrossEntropyLoss()\n",
1456
- "\n",
1457
- "scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())"
1458
- ],
1459
- "metadata": {
1460
- "colab": {
1461
- "base_uri": "https://localhost:8080/"
1462
- },
1463
- "id": "WWM8KLQD_sya",
1464
- "outputId": "1672c194-aad2-4af2-a9cf-e61aa0d558b9"
1465
- },
1466
- "execution_count": 14,
1467
- "outputs": [
1468
- {
1469
- "output_type": "stream",
1470
- "name": "stderr",
1471
- "text": [
1472
- "/tmp/ipython-input-593493728.py:19: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n",
1473
- " scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())\n"
1474
- ]
1475
- }
1476
- ]
1477
- },
1478
- {
1479
- "cell_type": "code",
1480
- "source": [
1481
- "os.makedirs(\"./checkpoints_dinov3_food101\")"
1482
- ],
1483
- "metadata": {
1484
- "id": "OJPRRz09kxFT"
1485
- },
1486
- "execution_count": 15,
1487
- "outputs": []
1488
- },
1489
- {
1490
- "cell_type": "markdown",
1491
- "source": [
1492
- "We need to evaluate during training."
1493
- ],
1494
- "metadata": {
1495
- "id": "FHS5DSu1_22g"
1496
- }
1497
- },
1498
- {
1499
- "cell_type": "code",
1500
- "source": [
1501
- "def evaluate() -> Dict[str, float]:\n",
1502
- " model.eval()\n",
1503
- " correct, total, loss_sum = 0, 0, 0.0\n",
1504
- " with torch.no_grad():\n",
1505
- " for batch in val_loader:\n",
1506
- " pixel_values = batch[\"pixel_values\"].to(device, non_blocking=True)\n",
1507
- " labels = batch[\"labels\"].to(device, non_blocking=True)\n",
1508
- " logits = model(pixel_values)\n",
1509
- " loss = criterion(logits, labels)\n",
1510
- " loss_sum += loss.item() * labels.size(0)\n",
1511
- " preds = logits.argmax(dim=-1)\n",
1512
- " correct += (preds == labels).sum().item()\n",
1513
- " total += labels.size(0)\n",
1514
- " return {\n",
1515
- " \"val_loss\": loss_sum / max(total, 1),\n",
1516
- " \"val_acc\": correct / max(total, 1),\n",
1517
- " }"
1518
- ],
1519
- "metadata": {
1520
- "id": "TSD4tzZr_4i3"
1521
- },
1522
- "execution_count": 16,
1523
- "outputs": []
1524
- },
1525
- {
1526
- "cell_type": "markdown",
1527
- "source": [
1528
- "Let's write the training loop. We'll also use trackio for experiment tracking."
1529
- ],
1530
- "metadata": {
1531
- "id": "yakvOUOkAVcR"
1532
- }
1533
- },
1534
- {
1535
- "cell_type": "code",
1536
- "execution_count": 17,
1537
- "metadata": {
1538
- "colab": {
1539
- "base_uri": "https://localhost:8080/",
1540
- "height": 723
1541
- },
1542
- "id": "r-WQGd7UyN1s",
1543
- "outputId": "83e5aa69-4e4d-4c1d-c045-fc8ebad975ff"
1544
- },
1545
- "outputs": [
1546
- {
1547
- "output_type": "stream",
1548
- "name": "stdout",
1549
- "text": [
1550
- "* Running on public URL: https://3669a91d39321f7f86.gradio.live\n",
1551
- "* Trackio project initialized: dinov3\n",
1552
- "* Trackio metrics logged to: /root/.cache/huggingface/trackio\n",
1553
- "* View dashboard by running in your terminal:\n",
1554
- "\u001b[1m\u001b[93mtrackio show --project \"dinov3\"\u001b[0m\n",
1555
- "* or by running in Python: trackio.show(project=\"dinov3\")\n",
1556
- "[epoch 1 | step 100] train_loss=4.4878 val_loss=4.0990 val_acc=50.77%\n",
1557
- "[epoch 1 | step 200] train_loss=3.4722 val_loss=2.5605 val_acc=83.72%\n",
1558
- "[epoch 1 | step 300] train_loss=1.9046 val_loss=1.2049 val_acc=87.09%\n",
1559
- "[epoch 1 | step 400] train_loss=1.0664 val_loss=0.7385 val_acc=89.78%\n",
1560
- "[epoch 1 | step 500] train_loss=0.7269 val_loss=0.5500 val_acc=90.30%\n"
1561
- ]
1562
- },
1563
- {
1564
- "output_type": "stream",
1565
- "name": "stderr",
1566
- "text": [
1567
- "/usr/local/lib/python3.11/dist-packages/PIL/TiffImagePlugin.py:950: UserWarning: Truncated File Read\n",
1568
- " warnings.warn(str(msg))\n"
1569
- ]
1570
- },
1571
- {
1572
- "output_type": "stream",
1573
- "name": "stdout",
1574
- "text": [
1575
- "[epoch 1 | step 600] train_loss=0.6400 val_loss=0.4473 val_acc=91.92%\n",
1576
- "[epoch 1 | step 700] train_loss=0.5444 val_loss=0.3916 val_acc=92.44%\n",
1577
- "[epoch 1 | step 800] train_loss=0.5084 val_loss=0.3506 val_acc=92.08%\n"
1578
- ]
1579
- },
1580
- {
1581
- "output_type": "error",
1582
- "ename": "KeyboardInterrupt",
1583
- "evalue": "",
1584
- "traceback": [
1585
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1586
- "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
1587
- "\u001b[0;32m/tmp/ipython-input-3838929309.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mscale\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0mscaler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0mscheduler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1588
- "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36mstep\u001b[0;34m(self, optimizer, *args, **kwargs)\u001b[0m\n\u001b[1;32m 455\u001b[0m ), \"No inf checks were recorded for this optimizer.\"\n\u001b[1;32m 456\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 457\u001b[0;31m \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_maybe_opt_step\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moptimizer\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 458\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 459\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"stage\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mOptState\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSTEPPED\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1589
- "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36m_maybe_opt_step\u001b[0;34m(self, optimizer, optimizer_state, *args, **kwargs)\u001b[0m\n\u001b[1;32m 349\u001b[0m ) -> Optional[float]:\n\u001b[1;32m 350\u001b[0m \u001b[0mretval\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 351\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"found_inf_per_device\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 352\u001b[0m \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mretval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1590
- "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/amp/grad_scaler.py\u001b[0m in \u001b[0;36m<genexpr>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 349\u001b[0m ) -> Optional[float]:\n\u001b[1;32m 350\u001b[0m \u001b[0mretval\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 351\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0msum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitem\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0moptimizer_state\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"found_inf_per_device\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvalues\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 352\u001b[0m \u001b[0mretval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mretval\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1591
- "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
1592
- ]
1593
- }
1594
- ],
1595
- "source": [
1596
- "import trackio\n",
1597
- "\n",
1598
- "best_acc = 0.0\n",
1599
- "global_step = 0\n",
1600
- "\n",
1601
- "trackio.init(project=\"dinov3\", config={\n",
1602
- " \"epochs\": EPOCHS,\n",
1603
- " \"learning_rate\": LR,\n",
1604
- " \"batch_size\": BATCH_SIZE\n",
1605
- " })\n",
1606
- "\n",
1607
- "for epoch in range(1, EPOCHS + 1):\n",
1608
- " model.train()\n",
1609
- " model.backbone.eval() # comment out if you want to train the whole model\n",
1610
- "\n",
1611
- " running_loss = 0.0\n",
1612
- " for i, batch in enumerate(train_loader, start=1):\n",
1613
- " pixel_values = batch[\"pixel_values\"].to(device, non_blocking=True)\n",
1614
- " labels = batch[\"labels\"].to(device, non_blocking=True)\n",
1615
- "\n",
1616
- " optimizer.zero_grad(set_to_none=True)\n",
1617
- " logits = model(pixel_values)\n",
1618
- " loss = criterion(logits, labels)\n",
1619
- "\n",
1620
- " scaler.scale(loss).backward()\n",
1621
- " scaler.step(optimizer)\n",
1622
- " scaler.update()\n",
1623
- " scheduler.step()\n",
1624
- "\n",
1625
- " running_loss += loss.item()\n",
1626
- " global_step += 1\n",
1627
- "\n",
1628
- " if global_step % EVAL_EVERY_STEPS == 0:\n",
1629
- " metrics = evaluate()\n",
1630
- " print(\n",
1631
- " f\"[epoch {epoch} | step {global_step}] \"\n",
1632
- " f\"train_loss={running_loss / EVAL_EVERY_STEPS:.4f} \"\n",
1633
- " f\"val_loss={metrics['val_loss']:.4f} val_acc={metrics['val_acc']*100:.2f}%\"\n",
1634
- " )\n",
1635
- " running_loss = 0.0\n",
1636
- "\n",
1637
- " trackio.log(\n",
1638
- " {\n",
1639
- " \"epoch\": epoch,\n",
1640
- " \"val_acc\": best_acc,\n",
1641
- " }\n",
1642
- " )\n",
1643
- "\n",
1644
- " if metrics[\"val_acc\"] > best_acc:\n",
1645
- " best_acc = metrics[\"val_acc\"]\n",
1646
- " ckpt_path = os.path.join(CHECKPOINT_DIR, f\"best_acc_{best_acc:.4f}.pt\")\n",
1647
- " torch.save(\n",
1648
- " {\n",
1649
- " \"model_state_dict\": model.state_dict(),\n",
1650
- " \"optimizer_state_dict\": optimizer.state_dict(),\n",
1651
- " \"scheduler_state_dict\": scheduler.state_dict(),\n",
1652
- " \"config\": {\n",
1653
- " \"model_name\": MODEL_NAME,\n",
1654
- " \"num_classes\": num_classes,\n",
1655
- " },\n",
1656
- " \"step\": global_step,\n",
1657
- " \"epoch\": epoch,\n",
1658
- " },\n",
1659
- " ckpt_path,\n",
1660
- " )\n",
1661
- "\n",
1662
- "\n",
1663
- " metrics = evaluate()\n",
1664
- " print(\n",
1665
- " f\"END EPOCH {epoch}: val_loss={metrics['val_loss']:.4f} val_acc={metrics['val_acc']*100:.2f}% \"\n",
1666
- " f\"(best_acc={best_acc*100:.2f}%)\"\n",
1667
- " )\n",
1668
- " trackio.finish()"
1669
- ]
1670
- },
1671
- {
1672
- "cell_type": "code",
1673
- "source": [
1674
- "!trackio show"
1675
- ],
1676
- "metadata": {
1677
- "id": "dX0kEHogATQ_"
1678
- },
1679
- "execution_count": null,
1680
- "outputs": []
1681
- },
1682
- {
1683
- "cell_type": "markdown",
1684
- "source": [
1685
- "Let's infer with the model, I have a few in the wild images."
1686
- ],
1687
- "metadata": {
1688
- "id": "VKpGJ4L7bb2E"
1689
- }
1690
- },
1691
- {
1692
- "cell_type": "code",
1693
- "source": [
1694
- "import torch\n" \
1695
- "from PIL import Image\n" \
1696
- "from typing import List, Dict\n" \
1697
- "\n" \
1698
- "# --- Load checkpoint ---\n" \
1699
- "ckpt_path = \"./checkpoints_dinov3_class/best_acc_0.9025.pt\"\n" \
1700
- "\n" \
1701
- "model = DinoV3Linear(backbone, hidden_size, num_classes, freeze_backbone=True).to(device)\n" \
1702
- "checkpoint = torch.load(ckpt_path, map_location=device)\n" \
1703
- "model.load_state_dict(checkpoint[\"model_state_dict\"])\n" \
1704
- "model.eval()\n" \
1705
- "\n" \
1706
- "# --- Prepare images ---\n" \
1707
- "images = [\"/content/pizza.jpg\", \"/content/spaghetti.JPG\"]\n" \
1708
- "\n" \
1709
- "pil_images = [Image.open(p).convert(\"RGB\") for p in images]\n" \
1710
- "inputs = image_processor(images=pil_images, return_tensors=\"pt\").to(device)\n" \
1711
- "\n" \
1712
- "# --- Inference ---\n" \
1713
- "with torch.no_grad():\n" \
1714
- " logits = model(inputs[\"pixel_values\"])\n" \
1715
- "\n" \
1716
- "# take top 2 classes\n" \
1717
- "probs = logits.softmax(dim=-1)\n" \
1718
- "scores, indices = probs.topk(2, dim=-1)\n" \
1719
- "\n" \
1720
- "# --- Format results ---\n" \
1721
- "results = []\n" \
1722
- "for path, idxs, scs in zip(images, indices, scores):\n" \
1723
- " preds = [\n" \
1724
- " {\n" \
1725
- " \"label_id\": int(i.item()),\n" \
1726
- " \"label\": id2label.get(int(i.item()), f\"class_{int(i)}\"),\n" \
1727
- " \"score\": float(s.item())\n" \
1728
- " }\n" \
1729
- " for i, s in zip(idxs, scs)\n" \
1730
- " ]\n" \
1731
- " results.append({\"image\": path, \"topk\": preds})\n" \
1732
- "\n" \
1733
- "print(results)\n"
1734
-
1735
- ],
1736
- "metadata": {
1737
- "id": "RGZntYQEaVbA"
1738
- },
1739
- "execution_count": 19,
1740
- "outputs": []
1741
- },
1742
- {
1743
- "cell_type": "markdown",
1744
- "source": [
1745
- "The model predicts correctly, which is expected given we only trained head with the great backbone frozen, it learned very fast. Feel free to try with more challenging use cases."
1746
- ],
1747
- "metadata": {
1748
- "id": "bFoB-1Ebcab1"
1749
- }
1750
- },
1751
- {
1752
- "cell_type": "code",
1753
- "source": [
1754
- "results"
1755
- ],
1756
- "metadata": {
1757
- "colab": {
1758
- "base_uri": "https://localhost:8080/"
1759
- },
1760
- "id": "NrgtO2D1cXzj",
1761
- "outputId": "c972e7d0-ee78-45d3-e91f-7c68521d6a0b"
1762
- },
1763
- "execution_count": 20,
1764
- "outputs": [
1765
- {
1766
- "output_type": "execute_result",
1767
- "data": {
1768
- "text/plain": [
1769
- "[{'image': '/content/pizza.jpg',\n",
1770
- " 'topk': [{'label_id': 76, 'label': 'pizza', 'score': 0.7595003843307495},\n",
1771
- " {'label_id': 35, 'label': 'escargots', 'score': 0.013227012008428574}]},\n",
1772
- " {'image': '/content/spaghetti.JPG',\n",
1773
- " 'topk': [{'label_id': 91,\n",
1774
- " 'label': 'spaghetti_carbonara',\n",
1775
- " 'score': 0.6622196435928345},\n",
1776
- " {'label_id': 90,\n",
1777
- " 'label': 'spaghetti_bolognese',\n",
1778
- " 'score': 0.18182380497455597}]}]"
1779
- ]
1780
- },
1781
- "metadata": {},
1782
- "execution_count": 20
1783
- }
1784
- ]
1785
- }
1786
- ]
1787
- }