Upload organize_model_results.json with huggingface_hub
Browse files- organize_model_results.json +67 -67
organize_model_results.json
CHANGED
|
@@ -17,20 +17,28 @@
|
|
| 17 |
"Qwen2-Audio-7B-Instruct": 79.0
|
| 18 |
}
|
| 19 |
},
|
| 20 |
-
"
|
| 21 |
-
"
|
| 22 |
-
"gpt-4o-audio":
|
| 23 |
-
"Qwen-Audio-Chat":
|
| 24 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION":
|
| 25 |
-
"Qwen2-Audio-7B-Instruct":
|
| 26 |
-
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct":
|
| 27 |
-
"
|
| 28 |
-
"
|
| 29 |
-
"
|
| 30 |
-
"SALMONN_7B":
|
| 31 |
-
"cascade_whisper_large_v3_llama_3_8b_instruct":
|
| 32 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
"llama3_70b_judge": {
|
|
|
|
| 34 |
"Qwen-Audio-Chat": 53.6,
|
| 35 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.60000000000001,
|
| 36 |
"Qwen2-Audio-7B-Instruct": 58.9,
|
|
@@ -41,25 +49,25 @@
|
|
| 41 |
"SALMONN_7B": 48.4,
|
| 42 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 55.60000000000001
|
| 43 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
"gpt4o_judge": {
|
| 45 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 63.9,
|
| 46 |
"Qwen2-Audio-7B-Instruct": 53.0
|
| 47 |
}
|
| 48 |
},
|
| 49 |
"mmau_mini_music": {
|
| 50 |
-
"string_match": {
|
| 51 |
-
"gpt-4o-audio": 0.0,
|
| 52 |
-
"Qwen-Audio-Chat": 0.4311377245508982,
|
| 53 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6077844311377245,
|
| 54 |
-
"Qwen2-Audio-7B-Instruct": 0.45808383233532934,
|
| 55 |
-
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.39520958083832336,
|
| 56 |
-
"gemini-1.5-flash": 0.2904191616766467,
|
| 57 |
-
"phi_4_multimodal_instruct": 0.6377245508982036,
|
| 58 |
-
"seallms_audio_7b": 0.6047904191616766,
|
| 59 |
-
"SALMONN_7B": 0.4820359281437126,
|
| 60 |
-
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5
|
| 61 |
-
},
|
| 62 |
"llama3_70b_judge": {
|
|
|
|
| 63 |
"Qwen-Audio-Chat": 0.5958083832335329,
|
| 64 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6437125748502994,
|
| 65 |
"Qwen2-Audio-7B-Instruct": 0.6017964071856288,
|
|
@@ -70,25 +78,25 @@
|
|
| 70 |
"SALMONN_7B": 0.5598802395209581,
|
| 71 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5359281437125748
|
| 72 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
"gpt4o_judge": {
|
| 74 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6347305389221557,
|
| 75 |
"Qwen2-Audio-7B-Instruct": 0.5473684210526316
|
| 76 |
}
|
| 77 |
},
|
| 78 |
"mmau_mini_sound": {
|
| 79 |
-
"string_match": {
|
| 80 |
-
"gpt-4o-audio": 0.0,
|
| 81 |
-
"Qwen-Audio-Chat": 0.43543543543543545,
|
| 82 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6606606606606606,
|
| 83 |
-
"Qwen2-Audio-7B-Instruct": 0.4744744744744745,
|
| 84 |
-
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.45045045045045046,
|
| 85 |
-
"gemini-1.5-flash": 0.3483483483483483,
|
| 86 |
-
"phi_4_multimodal_instruct": 0.5975975975975976,
|
| 87 |
-
"seallms_audio_7b": 0.5165165165165165,
|
| 88 |
-
"SALMONN_7B": 0.4594594594594595,
|
| 89 |
-
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.46546546546546547
|
| 90 |
-
},
|
| 91 |
"llama3_70b_judge": {
|
|
|
|
| 92 |
"Qwen-Audio-Chat": 0.5945945945945946,
|
| 93 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.7027027027027027,
|
| 94 |
"Qwen2-Audio-7B-Instruct": 0.6306306306306306,
|
|
@@ -99,25 +107,25 @@
|
|
| 99 |
"SALMONN_7B": 0.5105105105105106,
|
| 100 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5105105105105106
|
| 101 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
"gpt4o_judge": {
|
| 103 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6996996996996997,
|
| 104 |
"Qwen2-Audio-7B-Instruct": 0.5980392156862745
|
| 105 |
}
|
| 106 |
},
|
| 107 |
"mmau_mini_speech": {
|
| 108 |
-
"string_match": {
|
| 109 |
-
"gpt-4o-audio": 0.0,
|
| 110 |
-
"Qwen-Audio-Chat": 0.2882882882882883,
|
| 111 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5465465465465466,
|
| 112 |
-
"Qwen2-Audio-7B-Instruct": 0.3993993993993994,
|
| 113 |
-
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5555555555555556,
|
| 114 |
-
"gemini-1.5-flash": 0.3033033033033033,
|
| 115 |
-
"phi_4_multimodal_instruct": 0.3993993993993994,
|
| 116 |
-
"seallms_audio_7b": 0.42342342342342343,
|
| 117 |
-
"SALMONN_7B": 0.2732732732732733,
|
| 118 |
-
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5645645645645646
|
| 119 |
-
},
|
| 120 |
"llama3_70b_judge": {
|
|
|
|
| 121 |
"Qwen-Audio-Chat": 0.4174174174174174,
|
| 122 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5915915915915916,
|
| 123 |
"Qwen2-Audio-7B-Instruct": 0.5345345345345346,
|
|
@@ -128,30 +136,22 @@
|
|
| 128 |
"SALMONN_7B": 0.3813813813813814,
|
| 129 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.6216216216216216
|
| 130 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
"gpt4o_judge": {
|
| 132 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5825825825825826,
|
| 133 |
"Qwen2-Audio-7B-Instruct": 0.44660194174757284
|
| 134 |
}
|
| 135 |
},
|
| 136 |
-
"slue_p2_sqa5_test": {
|
| 137 |
-
"llama3_70b_judge": {
|
| 138 |
-
"gpt-4o-audio": 89.41176470588235,
|
| 139 |
-
"Qwen-Audio-Chat": 79.36274509803921,
|
| 140 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 86.76470588235293,
|
| 141 |
-
"Qwen2-Audio-7B-Instruct": 80.04901960784315,
|
| 142 |
-
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 88.57843137254902,
|
| 143 |
-
"phi_4_multimodal_instruct": 88.33333333333334,
|
| 144 |
-
"seallms_audio_7b": 83.52941176470588,
|
| 145 |
-
"WavLLM_fairseq": 83.92156862745098,
|
| 146 |
-
"SALMONN_7B": 83.48039215686273,
|
| 147 |
-
"cascade_whisper_large_v3_llama_3_8b_instruct": 82.99019607843137
|
| 148 |
-
},
|
| 149 |
-
"gpt4o_judge": {
|
| 150 |
-
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.23529411764707,
|
| 151 |
-
"Qwen2-Audio-7B-Instruct": 84.86666666666666,
|
| 152 |
-
"cascade_whisper_large_v3_llama_3_8b_instruct": 87.79411764705883
|
| 153 |
-
}
|
| 154 |
-
},
|
| 155 |
"voxceleb_accent_test": {
|
| 156 |
"llama3_70b_judge": {
|
| 157 |
"Qwen-Audio-Chat": 48.05088223225277,
|
|
|
|
| 17 |
"Qwen2-Audio-7B-Instruct": 79.0
|
| 18 |
}
|
| 19 |
},
|
| 20 |
+
"slue_p2_sqa5_test": {
|
| 21 |
+
"llama3_70b_judge": {
|
| 22 |
+
"gpt-4o-audio": 89.41176470588235,
|
| 23 |
+
"Qwen-Audio-Chat": 79.36274509803921,
|
| 24 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 86.76470588235293,
|
| 25 |
+
"Qwen2-Audio-7B-Instruct": 80.04901960784315,
|
| 26 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 88.57843137254902,
|
| 27 |
+
"phi_4_multimodal_instruct": 88.33333333333334,
|
| 28 |
+
"seallms_audio_7b": 83.52941176470588,
|
| 29 |
+
"WavLLM_fairseq": 83.92156862745098,
|
| 30 |
+
"SALMONN_7B": 83.48039215686273,
|
| 31 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 82.99019607843137
|
| 32 |
},
|
| 33 |
+
"gpt4o_judge": {
|
| 34 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 88.23529411764707,
|
| 35 |
+
"Qwen2-Audio-7B-Instruct": 84.86666666666666,
|
| 36 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 87.79411764705883
|
| 37 |
+
}
|
| 38 |
+
},
|
| 39 |
+
"mmau_mini": {
|
| 40 |
"llama3_70b_judge": {
|
| 41 |
+
"gpt-4o-audio": 40.6,
|
| 42 |
"Qwen-Audio-Chat": 53.6,
|
| 43 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 64.60000000000001,
|
| 44 |
"Qwen2-Audio-7B-Instruct": 58.9,
|
|
|
|
| 49 |
"SALMONN_7B": 48.4,
|
| 50 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 55.60000000000001
|
| 51 |
},
|
| 52 |
+
"string_match": {
|
| 53 |
+
"Qwen-Audio-Chat": 38.5,
|
| 54 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 60.5,
|
| 55 |
+
"Qwen2-Audio-7B-Instruct": 44.4,
|
| 56 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 46.7,
|
| 57 |
+
"gemini-1.5-flash": 31.4,
|
| 58 |
+
"phi_4_multimodal_instruct": 54.50000000000001,
|
| 59 |
+
"seallms_audio_7b": 51.5,
|
| 60 |
+
"SALMONN_7B": 40.5,
|
| 61 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 51.0
|
| 62 |
+
},
|
| 63 |
"gpt4o_judge": {
|
| 64 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 63.9,
|
| 65 |
"Qwen2-Audio-7B-Instruct": 53.0
|
| 66 |
}
|
| 67 |
},
|
| 68 |
"mmau_mini_music": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
"llama3_70b_judge": {
|
| 70 |
+
"gpt-4o-audio": 0.2904191616766467,
|
| 71 |
"Qwen-Audio-Chat": 0.5958083832335329,
|
| 72 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6437125748502994,
|
| 73 |
"Qwen2-Audio-7B-Instruct": 0.6017964071856288,
|
|
|
|
| 78 |
"SALMONN_7B": 0.5598802395209581,
|
| 79 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5359281437125748
|
| 80 |
},
|
| 81 |
+
"string_match": {
|
| 82 |
+
"Qwen-Audio-Chat": 0.4311377245508982,
|
| 83 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6077844311377245,
|
| 84 |
+
"Qwen2-Audio-7B-Instruct": 0.45808383233532934,
|
| 85 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.39520958083832336,
|
| 86 |
+
"gemini-1.5-flash": 0.2904191616766467,
|
| 87 |
+
"phi_4_multimodal_instruct": 0.6377245508982036,
|
| 88 |
+
"seallms_audio_7b": 0.6047904191616766,
|
| 89 |
+
"SALMONN_7B": 0.4820359281437126,
|
| 90 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5
|
| 91 |
+
},
|
| 92 |
"gpt4o_judge": {
|
| 93 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6347305389221557,
|
| 94 |
"Qwen2-Audio-7B-Instruct": 0.5473684210526316
|
| 95 |
}
|
| 96 |
},
|
| 97 |
"mmau_mini_sound": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
"llama3_70b_judge": {
|
| 99 |
+
"gpt-4o-audio": 0.3843843843843844,
|
| 100 |
"Qwen-Audio-Chat": 0.5945945945945946,
|
| 101 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.7027027027027027,
|
| 102 |
"Qwen2-Audio-7B-Instruct": 0.6306306306306306,
|
|
|
|
| 107 |
"SALMONN_7B": 0.5105105105105106,
|
| 108 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5105105105105106
|
| 109 |
},
|
| 110 |
+
"string_match": {
|
| 111 |
+
"Qwen-Audio-Chat": 0.43543543543543545,
|
| 112 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6606606606606606,
|
| 113 |
+
"Qwen2-Audio-7B-Instruct": 0.4744744744744745,
|
| 114 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.45045045045045046,
|
| 115 |
+
"gemini-1.5-flash": 0.3483483483483483,
|
| 116 |
+
"phi_4_multimodal_instruct": 0.5975975975975976,
|
| 117 |
+
"seallms_audio_7b": 0.5165165165165165,
|
| 118 |
+
"SALMONN_7B": 0.4594594594594595,
|
| 119 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.46546546546546547
|
| 120 |
+
},
|
| 121 |
"gpt4o_judge": {
|
| 122 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.6996996996996997,
|
| 123 |
"Qwen2-Audio-7B-Instruct": 0.5980392156862745
|
| 124 |
}
|
| 125 |
},
|
| 126 |
"mmau_mini_speech": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
"llama3_70b_judge": {
|
| 128 |
+
"gpt-4o-audio": 0.5435435435435435,
|
| 129 |
"Qwen-Audio-Chat": 0.4174174174174174,
|
| 130 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5915915915915916,
|
| 131 |
"Qwen2-Audio-7B-Instruct": 0.5345345345345346,
|
|
|
|
| 136 |
"SALMONN_7B": 0.3813813813813814,
|
| 137 |
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.6216216216216216
|
| 138 |
},
|
| 139 |
+
"string_match": {
|
| 140 |
+
"Qwen-Audio-Chat": 0.2882882882882883,
|
| 141 |
+
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5465465465465466,
|
| 142 |
+
"Qwen2-Audio-7B-Instruct": 0.3993993993993994,
|
| 143 |
+
"cascade_whisper_large_v2_gemma2_9b_cpt_sea_lionv3_instruct": 0.5555555555555556,
|
| 144 |
+
"gemini-1.5-flash": 0.3033033033033033,
|
| 145 |
+
"phi_4_multimodal_instruct": 0.3993993993993994,
|
| 146 |
+
"seallms_audio_7b": 0.42342342342342343,
|
| 147 |
+
"SALMONN_7B": 0.2732732732732733,
|
| 148 |
+
"cascade_whisper_large_v3_llama_3_8b_instruct": 0.5645645645645646
|
| 149 |
+
},
|
| 150 |
"gpt4o_judge": {
|
| 151 |
"MERaLiON-AudioLLM-Whisper-SEA-LION": 0.5825825825825826,
|
| 152 |
"Qwen2-Audio-7B-Instruct": 0.44660194174757284
|
| 153 |
}
|
| 154 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
"voxceleb_accent_test": {
|
| 156 |
"llama3_70b_judge": {
|
| 157 |
"Qwen-Audio-Chat": 48.05088223225277,
|