Spaces:
Running
Running
corrected csv
Browse files
app/src/content/assets/data/against_baselines_deduplicated.csv
CHANGED
|
@@ -1,3 +1,577 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
run,step,metric,value,stderr
|
| 2 |
+
FineVisionDD,1200,average,0.264341097123272,
|
| 3 |
+
FineVisionDD,1200,average_rank,2.5714285714285716,
|
| 4 |
+
FineVisionDD,1200,docvqa_val_anls,0.3715200680496628,0.005949832790823121
|
| 5 |
+
FineVisionDD,1200,infovqa_val_anls,0.19222676120723237,0.006565134600763451
|
| 6 |
+
FineVisionDD,1200,mme_total_score,743.1522609043617,
|
| 7 |
+
FineVisionDD,1200,mmmu_val_mmmu_acc,0.26222,
|
| 8 |
+
FineVisionDD,1200,mmstar_average,0.21525975348273643,
|
| 9 |
+
FineVisionDD,1200,ocrbench_ocrbench_accuracy,0.3,
|
| 10 |
+
FineVisionDD,1200,textvqa_val_exact_match,0.24482000000000004,0.005905726800471586
|
| 11 |
+
FineVisionDD,2400,average,0.3178775750923926,
|
| 12 |
+
FineVisionDD,2400,average_rank,2.4285714285714284,
|
| 13 |
+
FineVisionDD,2400,docvqa_val_anls,0.47030638473718095,0.006228583735740807
|
| 14 |
+
FineVisionDD,2400,infovqa_val_anls,0.20933736286426122,0.006709818578853176
|
| 15 |
+
FineVisionDD,2400,mme_total_score,1185.2899159663866,
|
| 16 |
+
FineVisionDD,2400,mmmu_val_mmmu_acc,0.25,
|
| 17 |
+
FineVisionDD,2400,mmstar_average,0.24490170295291339,
|
| 18 |
+
FineVisionDD,2400,ocrbench_ocrbench_accuracy,0.384,
|
| 19 |
+
FineVisionDD,2400,textvqa_val_exact_match,0.34872,0.00652553360559637
|
| 20 |
+
FineVisionDD,3600,average,0.34596783716441254,
|
| 21 |
+
FineVisionDD,3600,average_rank,2.4285714285714284,
|
| 22 |
+
FineVisionDD,3600,docvqa_val_anls,0.52073479618703,0.006284214687786431
|
| 23 |
+
FineVisionDD,3600,infovqa_val_anls,0.22809076679417026,0.006878849345111437
|
| 24 |
+
FineVisionDD,3600,mme_total_score,1168.4510804321728,
|
| 25 |
+
FineVisionDD,3600,mmmu_val_mmmu_acc,0.25667,
|
| 26 |
+
FineVisionDD,3600,mmstar_average,0.23323146000527503,
|
| 27 |
+
FineVisionDD,3600,ocrbench_ocrbench_accuracy,0.454,
|
| 28 |
+
FineVisionDD,3600,textvqa_val_exact_match,0.38308000000000003,0.0066477952252059665
|
| 29 |
+
FineVisionDD,4800,average,0.3549622071061929,
|
| 30 |
+
FineVisionDD,4800,average_rank,2.2857142857142856,
|
| 31 |
+
FineVisionDD,4800,docvqa_val_anls,0.5347116037470354,0.006161120918755636
|
| 32 |
+
FineVisionDD,4800,infovqa_val_anls,0.22616829864068178,0.006791811877573115
|
| 33 |
+
FineVisionDD,4800,mme_total_score,1067.0920368147258,
|
| 34 |
+
FineVisionDD,4800,mmmu_val_mmmu_acc,0.27444,
|
| 35 |
+
FineVisionDD,4800,mmstar_average,0.23307334024944037,
|
| 36 |
+
FineVisionDD,4800,ocrbench_ocrbench_accuracy,0.473,
|
| 37 |
+
FineVisionDD,4800,textvqa_val_exact_match,0.38837999999999995,0.006654731565618713
|
| 38 |
+
FineVisionDD,6000,average,0.3848921103122081,
|
| 39 |
+
FineVisionDD,6000,average_rank,2.142857142857143,
|
| 40 |
+
FineVisionDD,6000,docvqa_val_anls,0.5762794835718067,0.006247345256607651
|
| 41 |
+
FineVisionDD,6000,infovqa_val_anls,0.25437900510747613,0.007245969162163573
|
| 42 |
+
FineVisionDD,6000,mme_total_score,1182.3837535014004,
|
| 43 |
+
FineVisionDD,6000,mmmu_val_mmmu_acc,0.27222,
|
| 44 |
+
FineVisionDD,6000,mmstar_average,0.2747341731939661,
|
| 45 |
+
FineVisionDD,6000,ocrbench_ocrbench_accuracy,0.495,
|
| 46 |
+
FineVisionDD,6000,textvqa_val_exact_match,0.43673999999999996,0.006759376621735387
|
| 47 |
+
FineVisionDD,7200,average,0.3978156352765745,
|
| 48 |
+
FineVisionDD,7200,average_rank,1.8571428571428572,
|
| 49 |
+
FineVisionDD,7200,docvqa_val_anls,0.5914916761381446,0.006230792162717311
|
| 50 |
+
FineVisionDD,7200,infovqa_val_anls,0.2584115961449724,0.007214877478455323
|
| 51 |
+
FineVisionDD,7200,mme_total_score,1174.9931972789116,
|
| 52 |
+
FineVisionDD,7200,mmmu_val_mmmu_acc,0.28889,
|
| 53 |
+
FineVisionDD,7200,mmstar_average,0.30312053937633016,
|
| 54 |
+
FineVisionDD,7200,ocrbench_ocrbench_accuracy,0.501,
|
| 55 |
+
FineVisionDD,7200,textvqa_val_exact_match,0.44398000000000004,0.006765405092173878
|
| 56 |
+
FineVisionDD,8400,average,0.4059159035113804,
|
| 57 |
+
FineVisionDD,8400,average_rank,1.7142857142857142,
|
| 58 |
+
FineVisionDD,8400,docvqa_val_anls,0.6115548076222326,0.006189572923188405
|
| 59 |
+
FineVisionDD,8400,infovqa_val_anls,0.2617197889496108,0.007158591695868175
|
| 60 |
+
FineVisionDD,8400,mme_total_score,1252.2165866346538,
|
| 61 |
+
FineVisionDD,8400,mmmu_val_mmmu_acc,0.29444,
|
| 62 |
+
FineVisionDD,8400,mmstar_average,0.285260824496439,
|
| 63 |
+
FineVisionDD,8400,ocrbench_ocrbench_accuracy,0.52,
|
| 64 |
+
FineVisionDD,8400,textvqa_val_exact_match,0.4625200000000001,0.0067937236370175695
|
| 65 |
+
FineVisionDD,9600,average,0.41115899049749083,
|
| 66 |
+
FineVisionDD,9600,average_rank,1.5714285714285714,
|
| 67 |
+
FineVisionDD,9600,docvqa_val_anls,0.6213641622467091,0.006165172206432181
|
| 68 |
+
FineVisionDD,9600,infovqa_val_anls,0.2757908658532091,0.007363785243871019
|
| 69 |
+
FineVisionDD,9600,mme_total_score,1239.7746098439375,
|
| 70 |
+
FineVisionDD,9600,mmmu_val_mmmu_acc,0.29444,
|
| 71 |
+
FineVisionDD,9600,mmstar_average,0.2999389148850269,
|
| 72 |
+
FineVisionDD,9600,ocrbench_ocrbench_accuracy,0.519,
|
| 73 |
+
FineVisionDD,9600,textvqa_val_exact_match,0.45642000000000005,0.006788827170791062
|
| 74 |
+
FineVisionDD,10800,average,0.41894565175282533,
|
| 75 |
+
FineVisionDD,10800,average_rank,1.1428571428571428,
|
| 76 |
+
FineVisionDD,10800,docvqa_val_anls,0.6353621980573124,0.006124533744452508
|
| 77 |
+
FineVisionDD,10800,infovqa_val_anls,0.26751996667040645,0.0071172404352328284
|
| 78 |
+
FineVisionDD,10800,mme_total_score,1353.3499399759903,
|
| 79 |
+
FineVisionDD,10800,mmmu_val_mmmu_acc,0.29778,
|
| 80 |
+
FineVisionDD,10800,mmstar_average,0.325351745789233,
|
| 81 |
+
FineVisionDD,10800,ocrbench_ocrbench_accuracy,0.516,
|
| 82 |
+
FineVisionDD,10800,textvqa_val_exact_match,0.47165999999999997,0.0067931287489374085
|
| 83 |
+
FineVisionDD,12000,average,0.4208515127756214,
|
| 84 |
+
FineVisionDD,12000,average_rank,1.4285714285714286,
|
| 85 |
+
FineVisionDD,12000,docvqa_val_anls,0.6294351828158641,0.006169625021925361
|
| 86 |
+
FineVisionDD,12000,infovqa_val_anls,0.2797661440287805,0.007408513793528687
|
| 87 |
+
FineVisionDD,12000,mme_total_score,1091.6394557823128,
|
| 88 |
+
FineVisionDD,12000,mmmu_val_mmmu_acc,0.29556,
|
| 89 |
+
FineVisionDD,12000,mmstar_average,0.32114774980908367,
|
| 90 |
+
FineVisionDD,12000,ocrbench_ocrbench_accuracy,0.525,
|
| 91 |
+
FineVisionDD,12000,textvqa_val_exact_match,0.4742,0.006787465354400525
|
| 92 |
+
FineVisionDD,13200,average,0.42658753741516975,
|
| 93 |
+
FineVisionDD,13200,average_rank,1.5714285714285714,
|
| 94 |
+
FineVisionDD,13200,docvqa_val_anls,0.6427877927509281,0.006125147292514003
|
| 95 |
+
FineVisionDD,13200,infovqa_val_anls,0.2907270038093242,0.007372590798085613
|
| 96 |
+
FineVisionDD,13200,mme_total_score,1211.7135854341736,
|
| 97 |
+
FineVisionDD,13200,mmmu_val_mmmu_acc,0.28889,
|
| 98 |
+
FineVisionDD,13200,mmstar_average,0.30988042793076603,
|
| 99 |
+
FineVisionDD,13200,ocrbench_ocrbench_accuracy,0.546,
|
| 100 |
+
FineVisionDD,13200,textvqa_val_exact_match,0.48123999999999995,0.0068072667243212395
|
| 101 |
+
FineVisionDD,14400,average,0.4273536900736185,
|
| 102 |
+
FineVisionDD,14400,average_rank,1.5714285714285714,
|
| 103 |
+
FineVisionDD,14400,docvqa_val_anls,0.654480111743584,0.006079437400066777
|
| 104 |
+
FineVisionDD,14400,infovqa_val_anls,0.2776743812062677,0.007152404684338895
|
| 105 |
+
FineVisionDD,14400,mme_total_score,1211.577330932373,
|
| 106 |
+
FineVisionDD,14400,mmmu_val_mmmu_acc,0.28222,
|
| 107 |
+
FineVisionDD,14400,mmstar_average,0.32896764749185925,
|
| 108 |
+
FineVisionDD,14400,ocrbench_ocrbench_accuracy,0.527,
|
| 109 |
+
FineVisionDD,14400,textvqa_val_exact_match,0.49378,0.006791486374677893
|
| 110 |
+
FineVisionDD,15600,average,0.4373836230155283,
|
| 111 |
+
FineVisionDD,15600,average_rank,1.0,
|
| 112 |
+
FineVisionDD,15600,docvqa_val_anls,0.6587223702708729,0.0060724859630705355
|
| 113 |
+
FineVisionDD,15600,infovqa_val_anls,0.2954608342132971,0.007455706284703673
|
| 114 |
+
FineVisionDD,15600,mme_total_score,1196.3369347739094,
|
| 115 |
+
FineVisionDD,15600,mmmu_val_mmmu_acc,0.29333,
|
| 116 |
+
FineVisionDD,15600,mmstar_average,0.33750853360899963,
|
| 117 |
+
FineVisionDD,15600,ocrbench_ocrbench_accuracy,0.54,
|
| 118 |
+
FineVisionDD,15600,textvqa_val_exact_match,0.49927999999999995,0.0067965531666418525
|
| 119 |
+
FineVisionDD,16800,average,0.43378959957858315,
|
| 120 |
+
FineVisionDD,16800,average_rank,1.2857142857142858,
|
| 121 |
+
FineVisionDD,16800,docvqa_val_anls,0.6677987652181413,0.006012562319824571
|
| 122 |
+
FineVisionDD,16800,infovqa_val_anls,0.2813134865271826,0.007107230565585641
|
| 123 |
+
FineVisionDD,16800,mme_total_score,1303.9127651060423,
|
| 124 |
+
FineVisionDD,16800,mmmu_val_mmmu_acc,0.28111,
|
| 125 |
+
FineVisionDD,16800,mmstar_average,0.3315953457261746,
|
| 126 |
+
FineVisionDD,16800,ocrbench_ocrbench_accuracy,0.549,
|
| 127 |
+
FineVisionDD,16800,textvqa_val_exact_match,0.4919200000000001,0.006795246706011423
|
| 128 |
+
FineVisionDD,18000,average,0.4460242607466102,
|
| 129 |
+
FineVisionDD,18000,average_rank,1.1428571428571428,
|
| 130 |
+
FineVisionDD,18000,docvqa_val_anls,0.6719255126618523,0.006008621561058294
|
| 131 |
+
FineVisionDD,18000,infovqa_val_anls,0.29900934485493813,0.007466958171203317
|
| 132 |
+
FineVisionDD,18000,mme_total_score,1236.6654661864745,
|
| 133 |
+
FineVisionDD,18000,mmmu_val_mmmu_acc,0.3,
|
| 134 |
+
FineVisionDD,18000,mmstar_average,0.34327070696287054,
|
| 135 |
+
FineVisionDD,18000,ocrbench_ocrbench_accuracy,0.546,
|
| 136 |
+
FineVisionDD,18000,textvqa_val_exact_match,0.5159400000000001,0.006793085637800874
|
| 137 |
+
FineVisionDD,19200,average,0.44845865852995476,
|
| 138 |
+
FineVisionDD,19200,average_rank,1.0,
|
| 139 |
+
FineVisionDD,19200,docvqa_val_anls,0.6777684245254485,0.005985910291387732
|
| 140 |
+
FineVisionDD,19200,infovqa_val_anls,0.2877789783739627,0.007152893066126468
|
| 141 |
+
FineVisionDD,19200,mme_total_score,1240.2280912364945,
|
| 142 |
+
FineVisionDD,19200,mmmu_val_mmmu_acc,0.29778,
|
| 143 |
+
FineVisionDD,19200,mmstar_average,0.3473245482803175,
|
| 144 |
+
FineVisionDD,19200,ocrbench_ocrbench_accuracy,0.568,
|
| 145 |
+
FineVisionDD,19200,textvqa_val_exact_match,0.5121,0.006797143387603819
|
| 146 |
+
CauldronDD,1200,average,0.29972102969630693,
|
| 147 |
+
CauldronDD,1200,average_rank,1.5714285714285714,
|
| 148 |
+
CauldronDD,1200,docvqa_val_anls,0.3393747623503541,0.005393199870631087
|
| 149 |
+
CauldronDD,1200,infovqa_val_anls,0.14788475521512282,0.005517625394198703
|
| 150 |
+
CauldronDD,1200,mme_total_score,1237.1527611044417,
|
| 151 |
+
CauldronDD,1200,mmmu_val_mmmu_acc,0.28444,
|
| 152 |
+
CauldronDD,1200,mmstar_average,0.2961666606123647,
|
| 153 |
+
CauldronDD,1200,ocrbench_ocrbench_accuracy,0.324,
|
| 154 |
+
CauldronDD,1200,textvqa_val_exact_match,0.40646000000000004,0.006706135111196755
|
| 155 |
+
CauldronDD,2400,average,0.3338688722253544,
|
| 156 |
+
CauldronDD,2400,average_rank,1.8571428571428572,
|
| 157 |
+
CauldronDD,2400,docvqa_val_anls,0.4106908679403099,0.00557717705073105
|
| 158 |
+
CauldronDD,2400,infovqa_val_anls,0.16022819076638478,0.005740317063734872
|
| 159 |
+
CauldronDD,2400,mme_total_score,1243.3691476590636,
|
| 160 |
+
CauldronDD,2400,mmmu_val_mmmu_acc,0.27889,
|
| 161 |
+
CauldronDD,2400,mmstar_average,0.33588417464543163,
|
| 162 |
+
CauldronDD,2400,ocrbench_ocrbench_accuracy,0.366,
|
| 163 |
+
CauldronDD,2400,textvqa_val_exact_match,0.45152,0.006779965450229171
|
| 164 |
+
CauldronDD,3600,average,0.33947430615719726,
|
| 165 |
+
CauldronDD,3600,average_rank,2.2857142857142856,
|
| 166 |
+
CauldronDD,3600,docvqa_val_anls,0.43097255569855397,0.005587910026275849
|
| 167 |
+
CauldronDD,3600,infovqa_val_anls,0.1641426454649424,0.005800068910792727
|
| 168 |
+
CauldronDD,3600,mme_total_score,1310.0697278911566,
|
| 169 |
+
CauldronDD,3600,mmmu_val_mmmu_acc,0.28333,
|
| 170 |
+
CauldronDD,3600,mmstar_average,0.3259006357796873,
|
| 171 |
+
CauldronDD,3600,ocrbench_ocrbench_accuracy,0.36,
|
| 172 |
+
CauldronDD,3600,textvqa_val_exact_match,0.4725,0.006816571214960329
|
| 173 |
+
CauldronDD,4800,average,0.3474647210512976,
|
| 174 |
+
CauldronDD,4800,average_rank,2.142857142857143,
|
| 175 |
+
CauldronDD,4800,docvqa_val_anls,0.44347290757863167,0.005625752855686164
|
| 176 |
+
CauldronDD,4800,infovqa_val_anls,0.16073440834957092,0.00572812246049592
|
| 177 |
+
CauldronDD,4800,mme_total_score,1239.124949979992,
|
| 178 |
+
CauldronDD,4800,mmmu_val_mmmu_acc,0.31556,
|
| 179 |
+
CauldronDD,4800,mmstar_average,0.3157610103795831,
|
| 180 |
+
CauldronDD,4800,ocrbench_ocrbench_accuracy,0.378,
|
| 181 |
+
CauldronDD,4800,textvqa_val_exact_match,0.47125999999999996,0.00680373872603368
|
| 182 |
+
CauldronDD,6000,average,0.3400596935324955,
|
| 183 |
+
CauldronDD,6000,average_rank,2.0,
|
| 184 |
+
CauldronDD,6000,docvqa_val_anls,0.43150620522864996,0.005601817666455916
|
| 185 |
+
CauldronDD,6000,infovqa_val_anls,0.16804581718043338,0.005797914749544558
|
| 186 |
+
CauldronDD,6000,mme_total_score,1246.4825930372149,
|
| 187 |
+
CauldronDD,6000,mmmu_val_mmmu_acc,0.27667,
|
| 188 |
+
CauldronDD,6000,mmstar_average,0.34191613878588945,
|
| 189 |
+
CauldronDD,6000,ocrbench_ocrbench_accuracy,0.368,
|
| 190 |
+
CauldronDD,6000,textvqa_val_exact_match,0.45421999999999996,0.006799535650102248
|
| 191 |
+
CauldronDD,7200,average,0.3391609673818097,
|
| 192 |
+
CauldronDD,7200,average_rank,2.2857142857142856,
|
| 193 |
+
CauldronDD,7200,docvqa_val_anls,0.4285872356274967,0.005613450362222006
|
| 194 |
+
CauldronDD,7200,infovqa_val_anls,0.1673609356908039,0.0058332340615507815
|
| 195 |
+
CauldronDD,7200,mme_total_score,1225.8680472188876,
|
| 196 |
+
CauldronDD,7200,mmmu_val_mmmu_acc,0.28778,
|
| 197 |
+
CauldronDD,7200,mmstar_average,0.31851763297255725,
|
| 198 |
+
CauldronDD,7200,ocrbench_ocrbench_accuracy,0.378,
|
| 199 |
+
CauldronDD,7200,textvqa_val_exact_match,0.45472000000000007,0.006786512776907903
|
| 200 |
+
CauldronDD,8400,average,0.3431478061334871,
|
| 201 |
+
CauldronDD,8400,average_rank,2.5714285714285716,
|
| 202 |
+
CauldronDD,8400,docvqa_val_anls,0.440186698815653,0.005613446205499607
|
| 203 |
+
CauldronDD,8400,infovqa_val_anls,0.17029748604016814,0.005836597208873185
|
| 204 |
+
CauldronDD,8400,mme_total_score,1271.5840336134456,
|
| 205 |
+
CauldronDD,8400,mmmu_val_mmmu_acc,0.27778,
|
| 206 |
+
CauldronDD,8400,mmstar_average,0.32566265194510147,
|
| 207 |
+
CauldronDD,8400,ocrbench_ocrbench_accuracy,0.386,
|
| 208 |
+
CauldronDD,8400,textvqa_val_exact_match,0.45896000000000003,0.00681272532289869
|
| 209 |
+
CauldronDD,9600,average,0.3413459009956081,
|
| 210 |
+
CauldronDD,9600,average_rank,2.857142857142857,
|
| 211 |
+
CauldronDD,9600,docvqa_val_anls,0.4403774280666133,0.005612804160672664
|
| 212 |
+
CauldronDD,9600,infovqa_val_anls,0.16559694737276026,0.0058146690100803694
|
| 213 |
+
CauldronDD,9600,mme_total_score,1235.5730292116846,
|
| 214 |
+
CauldronDD,9600,mmmu_val_mmmu_acc,0.28,
|
| 215 |
+
CauldronDD,9600,mmstar_average,0.33264103053427463,
|
| 216 |
+
CauldronDD,9600,ocrbench_ocrbench_accuracy,0.383,
|
| 217 |
+
CauldronDD,9600,textvqa_val_exact_match,0.44646,0.006795434442760313
|
| 218 |
+
CauldronDD,10800,average,0.3380861972330776,
|
| 219 |
+
CauldronDD,10800,average_rank,3.142857142857143,
|
| 220 |
+
CauldronDD,10800,docvqa_val_anls,0.4402326817553441,0.005626934973411334
|
| 221 |
+
CauldronDD,10800,infovqa_val_anls,0.16122827030707865,0.005747720437259022
|
| 222 |
+
CauldronDD,10800,mme_total_score,1245.125650260104,
|
| 223 |
+
CauldronDD,10800,mmmu_val_mmmu_acc,0.29444,
|
| 224 |
+
CauldronDD,10800,mmstar_average,0.309516231336043,
|
| 225 |
+
CauldronDD,10800,ocrbench_ocrbench_accuracy,0.383,
|
| 226 |
+
CauldronDD,10800,textvqa_val_exact_match,0.4401,0.006786752537259658
|
| 227 |
+
CauldronDD,12000,average,0.33154594568198864,
|
| 228 |
+
CauldronDD,12000,average_rank,3.2857142857142856,
|
| 229 |
+
CauldronDD,12000,docvqa_val_anls,0.43508650222322015,0.00561327125316578
|
| 230 |
+
CauldronDD,12000,infovqa_val_anls,0.16563023539653135,0.0058079534236688945
|
| 231 |
+
CauldronDD,12000,mme_total_score,1240.7185874349739,
|
| 232 |
+
CauldronDD,12000,mmmu_val_mmmu_acc,0.27556,
|
| 233 |
+
CauldronDD,12000,mmstar_average,0.2978389364721804,
|
| 234 |
+
CauldronDD,12000,ocrbench_ocrbench_accuracy,0.375,
|
| 235 |
+
CauldronDD,12000,textvqa_val_exact_match,0.44016000000000005,0.006801256229349064
|
| 236 |
+
CauldronDD,13200,average,0.3323617201953493,
|
| 237 |
+
CauldronDD,13200,average_rank,3.2857142857142856,
|
| 238 |
+
CauldronDD,13200,docvqa_val_anls,0.4336687642519214,0.00561127691138422
|
| 239 |
+
CauldronDD,13200,infovqa_val_anls,0.16294964748823013,0.00577613475202133
|
| 240 |
+
CauldronDD,13200,mme_total_score,1232.6909763905562,
|
| 241 |
+
CauldronDD,13200,mmmu_val_mmmu_acc,0.27556,
|
| 242 |
+
CauldronDD,13200,mmstar_average,0.3120919094319445,
|
| 243 |
+
CauldronDD,13200,ocrbench_ocrbench_accuracy,0.37,
|
| 244 |
+
CauldronDD,13200,textvqa_val_exact_match,0.4398999999999999,0.006800709369586816
|
| 245 |
+
CauldronDD,14400,average,0.33686465162435447,
|
| 246 |
+
CauldronDD,14400,average_rank,3.0,
|
| 247 |
+
CauldronDD,14400,docvqa_val_anls,0.4346981780601323,0.005637000083152569
|
| 248 |
+
CauldronDD,14400,infovqa_val_anls,0.15117394150977184,0.005624727950317896
|
| 249 |
+
CauldronDD,14400,mme_total_score,1229.5749299719887,
|
| 250 |
+
CauldronDD,14400,mmmu_val_mmmu_acc,0.28444,
|
| 251 |
+
CauldronDD,14400,mmstar_average,0.3150357901762228,
|
| 252 |
+
CauldronDD,14400,ocrbench_ocrbench_accuracy,0.396,
|
| 253 |
+
CauldronDD,14400,textvqa_val_exact_match,0.43983999999999995,0.006801397406514065
|
| 254 |
+
CauldronDD,15600,average,0.32646326413760035,
|
| 255 |
+
CauldronDD,15600,average_rank,3.5714285714285716,
|
| 256 |
+
CauldronDD,15600,docvqa_val_anls,0.433995514472087,0.005646461618482555
|
| 257 |
+
CauldronDD,15600,infovqa_val_anls,0.1562018233604324,0.005700992835439662
|
| 258 |
+
CauldronDD,15600,mme_total_score,1122.3809523809523,
|
| 259 |
+
CauldronDD,15600,mmmu_val_mmmu_acc,0.26333,
|
| 260 |
+
CauldronDD,15600,mmstar_average,0.30641224699308284,
|
| 261 |
+
CauldronDD,15600,ocrbench_ocrbench_accuracy,0.366,
|
| 262 |
+
CauldronDD,15600,textvqa_val_exact_match,0.43283999999999995,0.006800820326359335
|
| 263 |
+
CauldronDD,16800,average,0.32818017568992097,
|
| 264 |
+
CauldronDD,16800,average_rank,3.2857142857142856,
|
| 265 |
+
CauldronDD,16800,docvqa_val_anls,0.43345387633219307,0.005602799050931306
|
| 266 |
+
CauldronDD,16800,infovqa_val_anls,0.16417934269316956,0.005815179007624968
|
| 267 |
+
CauldronDD,16800,mme_total_score,1197.6628651460585,
|
| 268 |
+
CauldronDD,16800,mmmu_val_mmmu_acc,0.27111,
|
| 269 |
+
CauldronDD,16800,mmstar_average,0.3091778351141632,
|
| 270 |
+
CauldronDD,16800,ocrbench_ocrbench_accuracy,0.36,
|
| 271 |
+
CauldronDD,16800,textvqa_val_exact_match,0.43116000000000004,0.006790215923404594
|
| 272 |
+
CauldronDD,18000,average,0.3341436139545066,
|
| 273 |
+
CauldronDD,18000,average_rank,3.2857142857142856,
|
| 274 |
+
CauldronDD,18000,docvqa_val_anls,0.4405469745279471,0.0056286501797814135
|
| 275 |
+
CauldronDD,18000,infovqa_val_anls,0.1660848313620339,0.005819813220995324
|
| 276 |
+
CauldronDD,18000,mme_total_score,1242.9980992396959,
|
| 277 |
+
CauldronDD,18000,mmmu_val_mmmu_acc,0.27778,
|
| 278 |
+
CauldronDD,18000,mmstar_average,0.31554987783705823,
|
| 279 |
+
CauldronDD,18000,ocrbench_ocrbench_accuracy,0.373,
|
| 280 |
+
CauldronDD,18000,textvqa_val_exact_match,0.4319,0.006790913141858027
|
| 281 |
+
CauldronDD,19200,average,0.33290606090591973,
|
| 282 |
+
CauldronDD,19200,average_rank,3.2857142857142856,
|
| 283 |
+
CauldronDD,19200,docvqa_val_anls,0.43616573848632056,0.005619579845927559
|
| 284 |
+
CauldronDD,19200,infovqa_val_anls,0.16528162106770297,0.005801061681754425
|
| 285 |
+
CauldronDD,19200,mme_total_score,1230.0974389755902,
|
| 286 |
+
CauldronDD,19200,mmmu_val_mmmu_acc,0.27,
|
| 287 |
+
CauldronDD,19200,mmstar_average,0.3266290058814946,
|
| 288 |
+
CauldronDD,19200,ocrbench_ocrbench_accuracy,0.374,
|
| 289 |
+
CauldronDD,19200,textvqa_val_exact_match,0.42536,0.006794218598284299
|
| 290 |
+
CambrianDD,1200,average,0.2568586004702917,
|
| 291 |
+
CambrianDD,1200,average_rank,2.857142857142857,
|
| 292 |
+
CambrianDD,1200,docvqa_val_anls,0.3316039842462008,0.0057785603046722
|
| 293 |
+
CambrianDD,1200,infovqa_val_anls,0.14630377786332374,0.005668585125239906
|
| 294 |
+
CambrianDD,1200,mme_total_score,1112.7626050420167,
|
| 295 |
+
CambrianDD,1200,mmmu_val_mmmu_acc,0.26111,
|
| 296 |
+
CambrianDD,1200,mmstar_average,0.21803384071222537,
|
| 297 |
+
CambrianDD,1200,ocrbench_ocrbench_accuracy,0.247,
|
| 298 |
+
CambrianDD,1200,textvqa_val_exact_match,0.3371,0.006460330113317322
|
| 299 |
+
CambrianDD,2400,average,0.30575373318860816,
|
| 300 |
+
CambrianDD,2400,average_rank,2.7142857142857144,
|
| 301 |
+
CambrianDD,2400,docvqa_val_anls,0.40422225671207945,0.006074261001968628
|
| 302 |
+
CambrianDD,2400,infovqa_val_anls,0.1523121409563817,0.005638329718892052
|
| 303 |
+
CambrianDD,2400,mme_total_score,1059.9440776310523,
|
| 304 |
+
CambrianDD,2400,mmmu_val_mmmu_acc,0.28444,
|
| 305 |
+
CambrianDD,2400,mmstar_average,0.3110480014631879,
|
| 306 |
+
CambrianDD,2400,ocrbench_ocrbench_accuracy,0.3,
|
| 307 |
+
CambrianDD,2400,textvqa_val_exact_match,0.38249999999999995,0.006625581458704827
|
| 308 |
+
CambrianDD,3600,average,0.3244376041867266,
|
| 309 |
+
CambrianDD,3600,average_rank,2.7142857142857144,
|
| 310 |
+
CambrianDD,3600,docvqa_val_anls,0.4477711985871837,0.006244212556452033
|
| 311 |
+
CambrianDD,3600,infovqa_val_anls,0.17166556922234352,0.006038401288152695
|
| 312 |
+
CambrianDD,3600,mme_total_score,1054.6183473389356,
|
| 313 |
+
CambrianDD,3600,mmmu_val_mmmu_acc,0.28778,
|
| 314 |
+
CambrianDD,3600,mmstar_average,0.3192288573108325,
|
| 315 |
+
CambrianDD,3600,ocrbench_ocrbench_accuracy,0.325,
|
| 316 |
+
CambrianDD,3600,textvqa_val_exact_match,0.39518000000000003,0.00666872160834278
|
| 317 |
+
CambrianDD,4800,average,0.33575298162563233,
|
| 318 |
+
CambrianDD,4800,average_rank,2.7142857142857144,
|
| 319 |
+
CambrianDD,4800,docvqa_val_anls,0.48021663592502906,0.006264475129046182
|
| 320 |
+
CambrianDD,4800,infovqa_val_anls,0.17732197564395005,0.005979359845801751
|
| 321 |
+
CambrianDD,4800,mme_total_score,984.9863945578231,
|
| 322 |
+
CambrianDD,4800,mmmu_val_mmmu_acc,0.29111,
|
| 323 |
+
CambrianDD,4800,mmstar_average,0.29772927818481454,
|
| 324 |
+
CambrianDD,4800,ocrbench_ocrbench_accuracy,0.346,
|
| 325 |
+
CambrianDD,4800,textvqa_val_exact_match,0.42214000000000007,0.0067477011177196344
|
| 326 |
+
CambrianDD,6000,average,0.32347651657813326,
|
| 327 |
+
CambrianDD,6000,average_rank,3.0,
|
| 328 |
+
CambrianDD,6000,docvqa_val_anls,0.46634507029121364,0.0062238629881778374
|
| 329 |
+
CambrianDD,6000,infovqa_val_anls,0.17940221095579675,0.006141333951799168
|
| 330 |
+
CambrianDD,6000,mme_total_score,1072.1291516606643,
|
| 331 |
+
CambrianDD,6000,mmmu_val_mmmu_acc,0.27667,
|
| 332 |
+
CambrianDD,6000,mmstar_average,0.31024181822178915,
|
| 333 |
+
CambrianDD,6000,ocrbench_ocrbench_accuracy,0.305,
|
| 334 |
+
CambrianDD,6000,textvqa_val_exact_match,0.4032,0.006697142849340224
|
| 335 |
+
CambrianDD,7200,average,0.3486686601177924,
|
| 336 |
+
CambrianDD,7200,average_rank,3.0,
|
| 337 |
+
CambrianDD,7200,docvqa_val_anls,0.5033994017292339,0.006211902263203208
|
| 338 |
+
CambrianDD,7200,infovqa_val_anls,0.1898192044728013,0.006149174628390649
|
| 339 |
+
CambrianDD,7200,mme_total_score,879.126550620248,
|
| 340 |
+
CambrianDD,7200,mmmu_val_mmmu_acc,0.27556,
|
| 341 |
+
CambrianDD,7200,mmstar_average,0.32559335450471943,
|
| 342 |
+
CambrianDD,7200,ocrbench_ocrbench_accuracy,0.365,
|
| 343 |
+
CambrianDD,7200,textvqa_val_exact_match,0.43263999999999997,0.006774430209318876
|
| 344 |
+
CambrianDD,8400,average,0.3566021934695403,
|
| 345 |
+
CambrianDD,8400,average_rank,2.7142857142857144,
|
| 346 |
+
CambrianDD,8400,docvqa_val_anls,0.49954330523768764,0.006213069198769485
|
| 347 |
+
CambrianDD,8400,infovqa_val_anls,0.199645571135255,0.006349349194468786
|
| 348 |
+
CambrianDD,8400,mme_total_score,1114.3343337334934,
|
| 349 |
+
CambrianDD,8400,mmmu_val_mmmu_acc,0.29556,
|
| 350 |
+
CambrianDD,8400,mmstar_average,0.3215042844442992,
|
| 351 |
+
CambrianDD,8400,ocrbench_ocrbench_accuracy,0.379,
|
| 352 |
+
CambrianDD,8400,textvqa_val_exact_match,0.4443599999999999,0.006777995745444597
|
| 353 |
+
CambrianDD,9600,average,0.3625887269392778,
|
| 354 |
+
CambrianDD,9600,average_rank,2.5714285714285716,
|
| 355 |
+
CambrianDD,9600,docvqa_val_anls,0.5209747359075046,0.006185627757446921
|
| 356 |
+
CambrianDD,9600,infovqa_val_anls,0.20779524694498724,0.006396756819481715
|
| 357 |
+
CambrianDD,9600,mme_total_score,881.3031212484995,
|
| 358 |
+
CambrianDD,9600,mmmu_val_mmmu_acc,0.29889,
|
| 359 |
+
CambrianDD,9600,mmstar_average,0.3216523787831747,
|
| 360 |
+
CambrianDD,9600,ocrbench_ocrbench_accuracy,0.378,
|
| 361 |
+
CambrianDD,9600,textvqa_val_exact_match,0.44822,0.006790212641555748
|
| 362 |
+
CambrianDD,10800,average,0.36225439567996837,
|
| 363 |
+
CambrianDD,10800,average_rank,2.857142857142857,
|
| 364 |
+
CambrianDD,10800,docvqa_val_anls,0.5360831553687384,0.0062503917855996835
|
| 365 |
+
CambrianDD,10800,infovqa_val_anls,0.20358054292257038,0.0063419401635538405
|
| 366 |
+
CambrianDD,10800,mme_total_score,1067.7270908363346,
|
| 367 |
+
CambrianDD,10800,mmmu_val_mmmu_acc,0.28333,
|
| 368 |
+
CambrianDD,10800,mmstar_average,0.3324526757885013,
|
| 369 |
+
CambrianDD,10800,ocrbench_ocrbench_accuracy,0.368,
|
| 370 |
+
CambrianDD,10800,textvqa_val_exact_match,0.45008000000000004,0.006781238512185797
|
| 371 |
+
CambrianDD,12000,average,0.37022690841525296,
|
| 372 |
+
CambrianDD,12000,average_rank,2.142857142857143,
|
| 373 |
+
CambrianDD,12000,docvqa_val_anls,0.5329231904501042,0.00621682474881696
|
| 374 |
+
CambrianDD,12000,infovqa_val_anls,0.2099071605782676,0.0064660431120906045
|
| 375 |
+
CambrianDD,12000,mme_total_score,1029.8929571828733,
|
| 376 |
+
CambrianDD,12000,mmmu_val_mmmu_acc,0.30444,
|
| 377 |
+
CambrianDD,12000,mmstar_average,0.322291099463146,
|
| 378 |
+
CambrianDD,12000,ocrbench_ocrbench_accuracy,0.402,
|
| 379 |
+
CambrianDD,12000,textvqa_val_exact_match,0.4498,0.006790199802853561
|
| 380 |
+
CambrianDD,13200,average,0.3705817479124603,
|
| 381 |
+
CambrianDD,13200,average_rank,2.5714285714285716,
|
| 382 |
+
CambrianDD,13200,docvqa_val_anls,0.5406674097008617,0.006220185507992941
|
| 383 |
+
CambrianDD,13200,infovqa_val_anls,0.21720675802877365,0.00650836938989414
|
| 384 |
+
CambrianDD,13200,mme_total_score,1134.0421168467387,
|
| 385 |
+
CambrianDD,13200,mmmu_val_mmmu_acc,0.27889,
|
| 386 |
+
CambrianDD,13200,mmstar_average,0.3148263197451263,
|
| 387 |
+
CambrianDD,13200,ocrbench_ocrbench_accuracy,0.409,
|
| 388 |
+
CambrianDD,13200,textvqa_val_exact_match,0.4629,0.006796348730841747
|
| 389 |
+
CambrianDD,14400,average,0.3623658612664291,
|
| 390 |
+
CambrianDD,14400,average_rank,2.5714285714285716,
|
| 391 |
+
CambrianDD,14400,docvqa_val_anls,0.5152099093312626,0.006100903397549162
|
| 392 |
+
CambrianDD,14400,infovqa_val_anls,0.21109380152234544,0.006429931358574082
|
| 393 |
+
CambrianDD,14400,mme_total_score,1050.657763105242,
|
| 394 |
+
CambrianDD,14400,mmmu_val_mmmu_acc,0.28778,
|
| 395 |
+
CambrianDD,14400,mmstar_average,0.32701145674496673,
|
| 396 |
+
CambrianDD,14400,ocrbench_ocrbench_accuracy,0.383,
|
| 397 |
+
CambrianDD,14400,textvqa_val_exact_match,0.4501,0.006783833877713699
|
| 398 |
+
CambrianDD,15600,average,0.37528695975168413,
|
| 399 |
+
CambrianDD,15600,average_rank,2.142857142857143,
|
| 400 |
+
CambrianDD,15600,docvqa_val_anls,0.5490540524723359,0.006271460845615347
|
| 401 |
+
CambrianDD,15600,infovqa_val_anls,0.2171513714875839,0.006549339354210817
|
| 402 |
+
CambrianDD,15600,mme_total_score,1127.4101640656263,
|
| 403 |
+
CambrianDD,15600,mmmu_val_mmmu_acc,0.28556,
|
| 404 |
+
CambrianDD,15600,mmstar_average,0.332896334550185,
|
| 405 |
+
CambrianDD,15600,ocrbench_ocrbench_accuracy,0.399,
|
| 406 |
+
CambrianDD,15600,textvqa_val_exact_match,0.46806000000000003,0.006792053715831151
|
| 407 |
+
CambrianDD,16800,average,0.378379686213323,
|
| 408 |
+
CambrianDD,16800,average_rank,2.142857142857143,
|
| 409 |
+
CambrianDD,16800,docvqa_val_anls,0.5508556858421052,0.006230983486378255
|
| 410 |
+
CambrianDD,16800,infovqa_val_anls,0.22644813810901007,0.0065684324248959204
|
| 411 |
+
CambrianDD,16800,mme_total_score,956.2077831132453,
|
| 412 |
+
CambrianDD,16800,mmmu_val_mmmu_acc,0.29444,
|
| 413 |
+
CambrianDD,16800,mmstar_average,0.34207429332882255,
|
| 414 |
+
CambrianDD,16800,ocrbench_ocrbench_accuracy,0.405,
|
| 415 |
+
CambrianDD,16800,textvqa_val_exact_match,0.45146000000000003,0.00677465518462557
|
| 416 |
+
CambrianDD,18000,average,0.37736657627182946,
|
| 417 |
+
CambrianDD,18000,average_rank,2.4285714285714284,
|
| 418 |
+
CambrianDD,18000,docvqa_val_anls,0.550171109156601,0.006266033692968377
|
| 419 |
+
CambrianDD,18000,infovqa_val_anls,0.2180520852784964,0.0064910045262362975
|
| 420 |
+
CambrianDD,18000,mme_total_score,1068.6598639455783,
|
| 421 |
+
CambrianDD,18000,mmmu_val_mmmu_acc,0.29,
|
| 422 |
+
CambrianDD,18000,mmstar_average,0.33205626319587944,
|
| 423 |
+
CambrianDD,18000,ocrbench_ocrbench_accuracy,0.409,
|
| 424 |
+
CambrianDD,18000,textvqa_val_exact_match,0.46492,0.0068105767385077025
|
| 425 |
+
CambrianDD,19200,average,0.37238254789618885,
|
| 426 |
+
CambrianDD,19200,average_rank,2.4285714285714284,
|
| 427 |
+
CambrianDD,19200,docvqa_val_anls,0.5332665411568654,0.006195231490784442
|
| 428 |
+
CambrianDD,19200,infovqa_val_anls,0.21571031377445513,0.006431739740859299
|
| 429 |
+
CambrianDD,19200,mme_total_score,1008.0998399359744,
|
| 430 |
+
CambrianDD,19200,mmmu_val_mmmu_acc,0.28444,
|
| 431 |
+
CambrianDD,19200,mmstar_average,0.33939843244581247,
|
| 432 |
+
CambrianDD,19200,ocrbench_ocrbench_accuracy,0.412,
|
| 433 |
+
CambrianDD,19200,textvqa_val_exact_match,0.44948,0.00679714544181831
|
| 434 |
+
LLaVaDD,1200,average,0.2509776310427157,
|
| 435 |
+
LLaVaDD,1200,average_rank,3.0,
|
| 436 |
+
LLaVaDD,1200,docvqa_val_anls,0.2444383475360029,0.005026540300329091
|
| 437 |
+
LLaVaDD,1200,infovqa_val_anls,0.15487600151177214,0.005600679946634536
|
| 438 |
+
LLaVaDD,1200,mme_total_score,860.4959983993598,
|
| 439 |
+
LLaVaDD,1200,mmmu_val_mmmu_acc,0.24667,
|
| 440 |
+
LLaVaDD,1200,mmstar_average,0.21306143720851922,
|
| 441 |
+
LLaVaDD,1200,ocrbench_ocrbench_accuracy,0.325,
|
| 442 |
+
LLaVaDD,1200,textvqa_val_exact_match,0.32182000000000005,0.006396230129691582
|
| 443 |
+
LLaVaDD,2400,average,0.29579280325109375,
|
| 444 |
+
LLaVaDD,2400,average_rank,3.0,
|
| 445 |
+
LLaVaDD,2400,docvqa_val_anls,0.31538339385878306,0.005424291843634001
|
| 446 |
+
LLaVaDD,2400,infovqa_val_anls,0.18261071688457164,0.0059828856978779545
|
| 447 |
+
LLaVaDD,2400,mme_total_score,744.4002601040415,
|
| 448 |
+
LLaVaDD,2400,mmmu_val_mmmu_acc,0.24889,
|
| 449 |
+
LLaVaDD,2400,mmstar_average,0.24909270876320772,
|
| 450 |
+
LLaVaDD,2400,ocrbench_ocrbench_accuracy,0.398,
|
| 451 |
+
LLaVaDD,2400,textvqa_val_exact_match,0.38077999999999995,0.006625050685037501
|
| 452 |
+
LLaVaDD,3600,average,0.32734135036250206,
|
| 453 |
+
LLaVaDD,3600,average_rank,2.5714285714285716,
|
| 454 |
+
LLaVaDD,3600,docvqa_val_anls,0.35235179662486144,0.005549556404054767
|
| 455 |
+
LLaVaDD,3600,infovqa_val_anls,0.18556296710855402,0.006043411346585987
|
| 456 |
+
LLaVaDD,3600,mme_total_score,835.5973389355743,
|
| 457 |
+
LLaVaDD,3600,mmmu_val_mmmu_acc,0.29778,
|
| 458 |
+
LLaVaDD,3600,mmstar_average,0.2915733384415969,
|
| 459 |
+
LLaVaDD,3600,ocrbench_ocrbench_accuracy,0.426,
|
| 460 |
+
LLaVaDD,3600,textvqa_val_exact_match,0.41078000000000003,0.0067073508951900115
|
| 461 |
+
LLaVaDD,4800,average,0.33013109358835874,
|
| 462 |
+
LLaVaDD,4800,average_rank,2.857142857142857,
|
| 463 |
+
LLaVaDD,4800,docvqa_val_anls,0.3502881859839653,0.005478097656928352
|
| 464 |
+
LLaVaDD,4800,infovqa_val_anls,0.19107082217989702,0.006085171603850096
|
| 465 |
+
LLaVaDD,4800,mme_total_score,733.0080032012804,
|
| 466 |
+
LLaVaDD,4800,mmmu_val_mmmu_acc,0.27,
|
| 467 |
+
LLaVaDD,4800,mmstar_average,0.32564755336629003,
|
| 468 |
+
LLaVaDD,4800,ocrbench_ocrbench_accuracy,0.424,
|
| 469 |
+
LLaVaDD,4800,textvqa_val_exact_match,0.41978000000000004,0.006734153256647549
|
| 470 |
+
LLaVaDD,6000,average,0.35016629838344665,
|
| 471 |
+
LLaVaDD,6000,average_rank,2.857142857142857,
|
| 472 |
+
LLaVaDD,6000,docvqa_val_anls,0.3972329845041029,0.005775860539243304
|
| 473 |
+
LLaVaDD,6000,infovqa_val_anls,0.2075063299082507,0.006269613699866996
|
| 474 |
+
LLaVaDD,6000,mme_total_score,793.4260704281713,
|
| 475 |
+
LLaVaDD,6000,mmmu_val_mmmu_acc,0.26778,
|
| 476 |
+
LLaVaDD,6000,mmstar_average,0.31483847588832625,
|
| 477 |
+
LLaVaDD,6000,ocrbench_ocrbench_accuracy,0.466,
|
| 478 |
+
LLaVaDD,6000,textvqa_val_exact_match,0.44764000000000004,0.006783751907166682
|
| 479 |
+
LLaVaDD,7200,average,0.34725325204788143,
|
| 480 |
+
LLaVaDD,7200,average_rank,2.857142857142857,
|
| 481 |
+
LLaVaDD,7200,docvqa_val_anls,0.38590528101197885,0.0056434459440418885
|
| 482 |
+
LLaVaDD,7200,infovqa_val_anls,0.20202261217969525,0.006207536626913416
|
| 483 |
+
LLaVaDD,7200,mme_total_score,806.6480592236894,
|
| 484 |
+
LLaVaDD,7200,mmmu_val_mmmu_acc,0.27778,
|
| 485 |
+
LLaVaDD,7200,mmstar_average,0.31109161909561434,
|
| 486 |
+
LLaVaDD,7200,ocrbench_ocrbench_accuracy,0.461,
|
| 487 |
+
LLaVaDD,7200,textvqa_val_exact_match,0.44572,0.006774357143149495
|
| 488 |
+
LLaVaDD,8400,average,0.36048790647619494,
|
| 489 |
+
LLaVaDD,8400,average_rank,3.0,
|
| 490 |
+
LLaVaDD,8400,docvqa_val_anls,0.41445027737785084,0.005825413484689958
|
| 491 |
+
LLaVaDD,8400,infovqa_val_anls,0.2172068852347218,0.006375888876018907
|
| 492 |
+
LLaVaDD,8400,mme_total_score,838.7092837134853,
|
| 493 |
+
LLaVaDD,8400,mmmu_val_mmmu_acc,0.29444,
|
| 494 |
+
LLaVaDD,8400,mmstar_average,0.31933027624459676,
|
| 495 |
+
LLaVaDD,8400,ocrbench_ocrbench_accuracy,0.473,
|
| 496 |
+
LLaVaDD,8400,textvqa_val_exact_match,0.4445,0.006768213334577188
|
| 497 |
+
LLaVaDD,9600,average,0.35282227960826557,
|
| 498 |
+
LLaVaDD,9600,average_rank,3.0,
|
| 499 |
+
LLaVaDD,9600,docvqa_val_anls,0.39757298714048717,0.005640323691319893
|
| 500 |
+
LLaVaDD,9600,infovqa_val_anls,0.2056866550403572,0.006279512757986692
|
| 501 |
+
LLaVaDD,9600,mme_total_score,760.0508203281312,
|
| 502 |
+
LLaVaDD,9600,mmmu_val_mmmu_acc,0.26556,
|
| 503 |
+
LLaVaDD,9600,mmstar_average,0.32465403546874877,
|
| 504 |
+
LLaVaDD,9600,ocrbench_ocrbench_accuracy,0.469,
|
| 505 |
+
LLaVaDD,9600,textvqa_val_exact_match,0.45446000000000003,0.006778466729448514
|
| 506 |
+
LLaVaDD,10800,average,0.36137323363878177,
|
| 507 |
+
LLaVaDD,10800,average_rank,2.857142857142857,
|
| 508 |
+
LLaVaDD,10800,docvqa_val_anls,0.408003869061574,0.005694760075750652
|
| 509 |
+
LLaVaDD,10800,infovqa_val_anls,0.21338055182077123,0.0063085701231859895
|
| 510 |
+
LLaVaDD,10800,mme_total_score,895.123949579832,
|
| 511 |
+
LLaVaDD,10800,mmmu_val_mmmu_acc,0.28444,
|
| 512 |
+
LLaVaDD,10800,mmstar_average,0.32415498095034523,
|
| 513 |
+
LLaVaDD,10800,ocrbench_ocrbench_accuracy,0.48,
|
| 514 |
+
LLaVaDD,10800,textvqa_val_exact_match,0.45826000000000006,0.0067923767383995465
|
| 515 |
+
LLaVaDD,12000,average,0.3604179175862374,
|
| 516 |
+
LLaVaDD,12000,average_rank,3.142857142857143,
|
| 517 |
+
LLaVaDD,12000,docvqa_val_anls,0.4168137297955176,0.005781419012200098
|
| 518 |
+
LLaVaDD,12000,infovqa_val_anls,0.20846969163165352,0.006257602143639074
|
| 519 |
+
LLaVaDD,12000,mme_total_score,947.7637054821929,
|
| 520 |
+
LLaVaDD,12000,mmmu_val_mmmu_acc,0.26778,
|
| 521 |
+
LLaVaDD,12000,mmstar_average,0.3158840840902534,
|
| 522 |
+
LLaVaDD,12000,ocrbench_ocrbench_accuracy,0.498,
|
| 523 |
+
LLaVaDD,12000,textvqa_val_exact_match,0.45556,0.006805283216437887
|
| 524 |
+
LLaVaDD,13200,average,0.3615047561957609,
|
| 525 |
+
LLaVaDD,13200,average_rank,2.5714285714285716,
|
| 526 |
+
LLaVaDD,13200,docvqa_val_anls,0.4019041370209914,0.005625248973963487
|
| 527 |
+
LLaVaDD,13200,infovqa_val_anls,0.20363163177627105,0.00622309778124376
|
| 528 |
+
LLaVaDD,13200,mme_total_score,874.9429771908764,
|
| 529 |
+
LLaVaDD,13200,mmmu_val_mmmu_acc,0.28111,
|
| 530 |
+
LLaVaDD,13200,mmstar_average,0.3177427683773028,
|
| 531 |
+
LLaVaDD,13200,ocrbench_ocrbench_accuracy,0.494,
|
| 532 |
+
LLaVaDD,13200,textvqa_val_exact_match,0.47063999999999995,0.006828303099939613
|
| 533 |
+
LLaVaDD,14400,average,0.35822126770736845,
|
| 534 |
+
LLaVaDD,14400,average_rank,2.857142857142857,
|
| 535 |
+
LLaVaDD,14400,docvqa_val_anls,0.40475932408589743,0.005711979175622161
|
| 536 |
+
LLaVaDD,14400,infovqa_val_anls,0.2054455223584203,0.006260304272567981
|
| 537 |
+
LLaVaDD,14400,mme_total_score,895.4330732292917,
|
| 538 |
+
LLaVaDD,14400,mmmu_val_mmmu_acc,0.24889,
|
| 539 |
+
LLaVaDD,14400,mmstar_average,0.3293727597998932,
|
| 540 |
+
LLaVaDD,14400,ocrbench_ocrbench_accuracy,0.486,
|
| 541 |
+
LLaVaDD,14400,textvqa_val_exact_match,0.47486000000000006,0.006809762892651316
|
| 542 |
+
LLaVaDD,15600,average,0.3531190433154545,
|
| 543 |
+
LLaVaDD,15600,average_rank,3.2857142857142856,
|
| 544 |
+
LLaVaDD,15600,docvqa_val_anls,0.39525955886140174,0.005587329122981871
|
| 545 |
+
LLaVaDD,15600,infovqa_val_anls,0.20744642424798548,0.006270716143292359
|
| 546 |
+
LLaVaDD,15600,mme_total_score,887.2177871148459,
|
| 547 |
+
LLaVaDD,15600,mmmu_val_mmmu_acc,0.25111,
|
| 548 |
+
LLaVaDD,15600,mmstar_average,0.3150982767833397,
|
| 549 |
+
LLaVaDD,15600,ocrbench_ocrbench_accuracy,0.483,
|
| 550 |
+
LLaVaDD,15600,textvqa_val_exact_match,0.4668000000000001,0.00682372806117965
|
| 551 |
+
LLaVaDD,16800,average,0.35105363138195517,
|
| 552 |
+
LLaVaDD,16800,average_rank,3.2857142857142856,
|
| 553 |
+
LLaVaDD,16800,docvqa_val_anls,0.41852303319453404,0.005850640721947784
|
| 554 |
+
LLaVaDD,16800,infovqa_val_anls,0.2060249552494562,0.006276240807887592
|
| 555 |
+
LLaVaDD,16800,mme_total_score,922.4671868747499,
|
| 556 |
+
LLaVaDD,16800,mmmu_val_mmmu_acc,0.26444,
|
| 557 |
+
LLaVaDD,16800,mmstar_average,0.2870137998477405,
|
| 558 |
+
LLaVaDD,16800,ocrbench_ocrbench_accuracy,0.476,
|
| 559 |
+
LLaVaDD,16800,textvqa_val_exact_match,0.45432,0.006822490512661711
|
| 560 |
+
LLaVaDD,18000,average,0.3577636224241274,
|
| 561 |
+
LLaVaDD,18000,average_rank,3.142857142857143,
|
| 562 |
+
LLaVaDD,18000,docvqa_val_anls,0.40703277772305824,0.005678461401864167
|
| 563 |
+
LLaVaDD,18000,infovqa_val_anls,0.20110975485759583,0.006181644423856455
|
| 564 |
+
LLaVaDD,18000,mme_total_score,810.5214085634254,
|
| 565 |
+
LLaVaDD,18000,mmmu_val_mmmu_acc,0.26444,
|
| 566 |
+
LLaVaDD,18000,mmstar_average,0.3233392019641104,
|
| 567 |
+
LLaVaDD,18000,ocrbench_ocrbench_accuracy,0.482,
|
| 568 |
+
LLaVaDD,18000,textvqa_val_exact_match,0.46865999999999997,0.006819241988099444
|
| 569 |
+
LLaVaDD,19200,average,0.35213697279154416,
|
| 570 |
+
LLaVaDD,19200,average_rank,3.2857142857142856,
|
| 571 |
+
LLaVaDD,19200,docvqa_val_anls,0.40393359954160324,0.0057202986837765315
|
| 572 |
+
LLaVaDD,19200,infovqa_val_anls,0.19769978171894423,0.006187032583796771
|
| 573 |
+
LLaVaDD,19200,mme_total_score,918.2750100040016,
|
| 574 |
+
LLaVaDD,19200,mmmu_val_mmmu_acc,0.26778,
|
| 575 |
+
LLaVaDD,19200,mmstar_average,0.31024845548871743,
|
| 576 |
+
LLaVaDD,19200,ocrbench_ocrbench_accuracy,0.478,
|
| 577 |
+
LLaVaDD,19200,textvqa_val_exact_match,0.45516,0.006808813910614232
|
app/src/content/assets/data/image_correspondence_filters.csv
CHANGED
|
@@ -227,18 +227,6 @@ Baseline,19000,mmstar_average,0.356220913822775,
|
|
| 227 |
Baseline,19000,ocrbench_ocrbench_accuracy,0.577,
|
| 228 |
Baseline,19000,seedbench_seed_all,0.554585881045025,
|
| 229 |
Baseline,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905
|
| 230 |
-
Baseline,20000,ai2d_exact_match,0.47571243523316065,0.00898853090258662
|
| 231 |
-
Baseline,20000,average,0.4873169067639118,
|
| 232 |
-
Baseline,20000,average_rank,2.1,
|
| 233 |
-
Baseline,20000,chartqa_relaxed_overall,0.6336,0.009638338810708618
|
| 234 |
-
Baseline,20000,docvqa_val_anls,0.6895214454380043,0.005896462073053767
|
| 235 |
-
Baseline,20000,infovqa_val_anls,0.2655657550458317,0.007033265532032538
|
| 236 |
-
Baseline,20000,mme_total_score,1324.6738695478193,
|
| 237 |
-
Baseline,20000,mmmu_val_mmmu_acc,0.30111,
|
| 238 |
-
Baseline,20000,mmstar_average,0.33806766134497995,
|
| 239 |
-
Baseline,20000,ocrbench_ocrbench_accuracy,0.555,
|
| 240 |
-
Baseline,20000,seedbench_seed_all,0.5587548638132296,
|
| 241 |
-
Baseline,20000,textvqa_val_exact_match,0.56852,0.006720151338087659
|
| 242 |
≥2,1000,ai2d_exact_match,0.25647668393782386,0.007859644922870102
|
| 243 |
≥2,1000,average,0.27425088839708317,
|
| 244 |
≥2,1000,average_rank,3.6,
|
|
@@ -467,18 +455,6 @@ Baseline,20000,textvqa_val_exact_match,0.56852,0.006720151338087659
|
|
| 467 |
≥2,19000,ocrbench_ocrbench_accuracy,0.562,
|
| 468 |
≥2,19000,seedbench_seed_all,0.5588660366870484,
|
| 469 |
≥2,19000,textvqa_val_exact_match,0.5562600000000001,0.006734421501999508
|
| 470 |
-
≥2,20000,ai2d_exact_match,0.4805699481865285,0.008992356706334513
|
| 471 |
-
≥2,20000,average,0.49109872298543183,
|
| 472 |
-
≥2,20000,average_rank,1.7,
|
| 473 |
-
≥2,20000,chartqa_relaxed_overall,0.6464,0.009563650001989001
|
| 474 |
-
≥2,20000,docvqa_val_anls,0.6823974164165829,0.005959610876737005
|
| 475 |
-
≥2,20000,infovqa_val_anls,0.26825054401896686,0.007072214875698234
|
| 476 |
-
≥2,20000,mme_total_score,1187.1244497799119,
|
| 477 |
-
≥2,20000,mmmu_val_mmmu_acc,0.31,
|
| 478 |
-
≥2,20000,mmstar_average,0.3539436054730449,
|
| 479 |
-
≥2,20000,ocrbench_ocrbench_accuracy,0.568,
|
| 480 |
-
≥2,20000,seedbench_seed_all,0.5565869927737632,
|
| 481 |
-
≥2,20000,textvqa_val_exact_match,0.55374,0.006734617546282709
|
| 482 |
≥3,1000,ai2d_exact_match,0.2619818652849741,0.007914086941902848
|
| 483 |
≥3,1000,average,0.2794334029794183,
|
| 484 |
≥3,1000,average_rank,2.8,
|
|
@@ -707,18 +683,6 @@ Baseline,20000,textvqa_val_exact_match,0.56852,0.006720151338087659
|
|
| 707 |
≥3,19000,ocrbench_ocrbench_accuracy,0.568,
|
| 708 |
≥3,19000,seedbench_seed_all,0.5493051695386326,
|
| 709 |
≥3,19000,textvqa_val_exact_match,0.56014,0.006731277597872481
|
| 710 |
-
≥3,20000,ai2d_exact_match,0.47117875647668395,0.008984191131586656
|
| 711 |
-
≥3,20000,average,0.4903196828425222,
|
| 712 |
-
≥3,20000,average_rank,2.2,
|
| 713 |
-
≥3,20000,chartqa_relaxed_overall,0.648,0.009553790345406665
|
| 714 |
-
≥3,20000,docvqa_val_anls,0.6902930502166585,0.0059096225576472155
|
| 715 |
-
≥3,20000,infovqa_val_anls,0.2637260616044305,0.007044756469416206
|
| 716 |
-
≥3,20000,mme_total_score,968.2636054421769,
|
| 717 |
-
≥3,20000,mmmu_val_mmmu_acc,0.29778,
|
| 718 |
-
≥3,20000,mmstar_average,0.3516103723377342,
|
| 719 |
-
≥3,20000,ocrbench_ocrbench_accuracy,0.568,
|
| 720 |
-
≥3,20000,seedbench_seed_all,0.5520289049471929,
|
| 721 |
-
≥3,20000,textvqa_val_exact_match,0.57026,0.0067066312154801
|
| 722 |
≥4,1000,ai2d_exact_match,0.24514248704663213,0.00774236194438642
|
| 723 |
≥4,1000,average,0.2886475913888803,
|
| 724 |
≥4,1000,average_rank,2.3,
|
|
|
|
| 227 |
Baseline,19000,ocrbench_ocrbench_accuracy,0.577,
|
| 228 |
Baseline,19000,seedbench_seed_all,0.554585881045025,
|
| 229 |
Baseline,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
≥2,1000,ai2d_exact_match,0.25647668393782386,0.007859644922870102
|
| 231 |
≥2,1000,average,0.27425088839708317,
|
| 232 |
≥2,1000,average_rank,3.6,
|
|
|
|
| 455 |
≥2,19000,ocrbench_ocrbench_accuracy,0.562,
|
| 456 |
≥2,19000,seedbench_seed_all,0.5588660366870484,
|
| 457 |
≥2,19000,textvqa_val_exact_match,0.5562600000000001,0.006734421501999508
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 458 |
≥3,1000,ai2d_exact_match,0.2619818652849741,0.007914086941902848
|
| 459 |
≥3,1000,average,0.2794334029794183,
|
| 460 |
≥3,1000,average_rank,2.8,
|
|
|
|
| 683 |
≥3,19000,ocrbench_ocrbench_accuracy,0.568,
|
| 684 |
≥3,19000,seedbench_seed_all,0.5493051695386326,
|
| 685 |
≥3,19000,textvqa_val_exact_match,0.56014,0.006731277597872481
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 686 |
≥4,1000,ai2d_exact_match,0.24514248704663213,0.00774236194438642
|
| 687 |
≥4,1000,average,0.2886475913888803,
|
| 688 |
≥4,1000,average_rank,2.3,
|
app/src/content/assets/data/visual_dependency_filters.csv
CHANGED
|
@@ -227,18 +227,6 @@ Baseline,19000,mmstar_average,0.356220913822775,
|
|
| 227 |
Baseline,19000,ocrbench_ocrbench_accuracy,0.577,
|
| 228 |
Baseline,19000,seedbench_seed_all,0.554585881045025,
|
| 229 |
Baseline,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905
|
| 230 |
-
Baseline,20000,ai2d_exact_match,0.47571243523316065,0.00898853090258662
|
| 231 |
-
Baseline,20000,average,0.4873169067639118,
|
| 232 |
-
Baseline,20000,average_rank,1.1,
|
| 233 |
-
Baseline,20000,chartqa_relaxed_overall,0.6336,0.009638338810708618
|
| 234 |
-
Baseline,20000,docvqa_val_anls,0.6895214454380043,0.005896462073053767
|
| 235 |
-
Baseline,20000,infovqa_val_anls,0.2655657550458317,0.007033265532032538
|
| 236 |
-
Baseline,20000,mme_total_score,1324.6738695478193,
|
| 237 |
-
Baseline,20000,mmmu_val_mmmu_acc,0.30111,
|
| 238 |
-
Baseline,20000,mmstar_average,0.33806766134497995,
|
| 239 |
-
Baseline,20000,ocrbench_ocrbench_accuracy,0.555,
|
| 240 |
-
Baseline,20000,seedbench_seed_all,0.5587548638132296,
|
| 241 |
-
Baseline,20000,textvqa_val_exact_match,0.56852,0.006720151338087659
|
| 242 |
≥2,1000,ai2d_exact_match,0.25777202072538863,0.00787260087439643
|
| 243 |
≥2,1000,average,0.29870004148945406,
|
| 244 |
≥2,1000,average_rank,1.6,
|
|
@@ -1151,15 +1139,3 @@ Baseline,20000,textvqa_val_exact_match,0.56852,0.006720151338087659
|
|
| 1151 |
≥5,19000,ocrbench_ocrbench_accuracy,0.569,
|
| 1152 |
≥5,19000,seedbench_seed_all,0.535408560311284,
|
| 1153 |
≥5,19000,textvqa_val_exact_match,0.52872,0.006772725173905718
|
| 1154 |
-
≥5,20000,ai2d_exact_match,0.40867875647668395,0.00884778289870743
|
| 1155 |
-
≥5,20000,average,0.4447757248308666,
|
| 1156 |
-
≥5,20000,average_rank,1.9,
|
| 1157 |
-
≥5,20000,chartqa_relaxed_overall,0.5368,0.009974873595254053
|
| 1158 |
-
≥5,20000,docvqa_val_anls,0.5881395593641573,0.00625433143624698
|
| 1159 |
-
≥5,20000,infovqa_val_anls,0.21756373662547837,0.006798638807266341
|
| 1160 |
-
≥5,20000,mme_total_score,1235.672769107643,
|
| 1161 |
-
≥5,20000,mmmu_val_mmmu_acc,0.28667,
|
| 1162 |
-
≥5,20000,mmstar_average,0.32944615805983984,
|
| 1163 |
-
≥5,20000,ocrbench_ocrbench_accuracy,0.57,
|
| 1164 |
-
≥5,20000,seedbench_seed_all,0.5339633129516398,
|
| 1165 |
-
≥5,20000,textvqa_val_exact_match,0.53172,0.006760466633437396
|
|
|
|
| 227 |
Baseline,19000,ocrbench_ocrbench_accuracy,0.577,
|
| 228 |
Baseline,19000,seedbench_seed_all,0.554585881045025,
|
| 229 |
Baseline,19000,textvqa_val_exact_match,0.57714,0.0066918487914812905
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
≥2,1000,ai2d_exact_match,0.25777202072538863,0.00787260087439643
|
| 231 |
≥2,1000,average,0.29870004148945406,
|
| 232 |
≥2,1000,average_rank,1.6,
|
|
|
|
| 1139 |
≥5,19000,ocrbench_ocrbench_accuracy,0.569,
|
| 1140 |
≥5,19000,seedbench_seed_all,0.535408560311284,
|
| 1141 |
≥5,19000,textvqa_val_exact_match,0.52872,0.006772725173905718
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|