Clémentine
commited on
Commit
·
26ec6ee
1
Parent(s):
49f71ca
tmp
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app/public/finetasks +1 -0
- app/src/content/assets/finetasks/code.js +572 -198
- app/src/content/assets/finetasks/data/ar/acva_ara:_average_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/acva_ara:_average_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alfgahafa_mlqa_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alfgahafa_mlqa_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_arc_ara_cf:easy_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_arc_ara_cf:easy_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_exams_ara_cf:_average_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_exams_ara_cf:_average_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_facts_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_facts_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_meta_dialects_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_meta_dialects_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_mmlu_ara_cf:_average_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_mmlu_ara_cf:_average_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_openbookqa_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_openbookqa_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_piqa_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_piqa_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_race_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_race_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_no_neutral_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_no_neutral_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_sciqa_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_sciqa_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_sentiment_ara_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/alghafa_sentiment_ara_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/arcd_ara_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/arcd_ara_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/belebele_arb_Arab_cf_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/belebele_arb_Arab_cf_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/boolq_ara_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/boolq_ara_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:challenge_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:challenge_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:easy_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:easy_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:challenge_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:challenge_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:easy_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:easy_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_tur_cf:easy_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/community_arc_tur_cf:easy_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/exams_ara_cf:_average_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/exams_ara_cf:_average_stats.csv +3 -0
- app/src/content/assets/finetasks/data/ar/frenchbench_arc_fra_cf:challenge_data.csv +3 -0
- app/src/content/assets/finetasks/data/ar/frenchbench_arc_fra_cf:challenge_stats.csv +3 -0
app/public/finetasks
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
../src/content/assets/finetasks
|
app/src/content/assets/finetasks/code.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
|
|
| 1 |
import Papa from 'papaparse';
|
| 2 |
-
import
|
|
|
|
| 3 |
|
| 4 |
const languageMap = {
|
| 5 |
'Arabic': 'ar',
|
|
@@ -10,236 +12,608 @@ const languageMap = {
|
|
| 10 |
'Thai': 'th',
|
| 11 |
'Chinese': 'zh',
|
| 12 |
'French': 'fr',
|
| 13 |
-
'Hindi': 'hi'
|
| 14 |
};
|
| 15 |
|
| 16 |
-
const
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
]
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
]
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
const select = document.createElement('select');
|
|
|
|
| 36 |
options.forEach(option => {
|
| 37 |
const optionElement = document.createElement('option');
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
optionElement.textContent = option.label;
|
| 41 |
-
} else {
|
| 42 |
-
optionElement.value = option;
|
| 43 |
-
optionElement.textContent = option;
|
| 44 |
-
}
|
| 45 |
select.appendChild(optionElement);
|
| 46 |
});
|
| 47 |
-
select.addEventListener('change',
|
| 48 |
return select;
|
| 49 |
}
|
| 50 |
|
| 51 |
-
function
|
| 52 |
-
const
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
const columnNameMap = {
|
| 61 |
-
// 'Task': 'Task',
|
| 62 |
-
// 'Type': 'Type',
|
| 63 |
-
// 'f1': 'f1',
|
| 64 |
-
// 'prefix_match': 'prefix_match',
|
| 65 |
-
// 'acc': 'acc',
|
| 66 |
-
'acc_norm': 'acc_char',
|
| 67 |
-
'acc_norm_token': 'acc_token',
|
| 68 |
-
'acc_norm_pmi': 'acc_pmi',
|
| 69 |
-
'prefix_match': 'prefix'
|
| 70 |
-
};
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
const headerRow = header.insertRow();
|
| 83 |
-
columns.forEach(column => {
|
| 84 |
-
const th = document.createElement('th');
|
| 85 |
-
th.textContent = columnNameMap[column] || column;
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
}
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
}
|
| 121 |
-
td.textContent = value;
|
| 122 |
});
|
| 123 |
});
|
|
|
|
| 124 |
|
| 125 |
-
|
| 126 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
}
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
|
|
|
| 139 |
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
| 147 |
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
if (!response.ok) {
|
| 161 |
-
throw new Error(`HTTP error! status: ${response.status}`);
|
| 162 |
-
}
|
| 163 |
-
return response.text();
|
| 164 |
-
})
|
| 165 |
-
.then(csvText => {
|
| 166 |
-
const results = Papa.parse(csvText, { header: true }).data;
|
| 167 |
-
perTaskTableContainer.innerHTML = '';
|
| 168 |
-
const tableWrapper = createPerTaskResultsTable(results, tableType, metric);
|
| 169 |
-
perTaskTableContainer.appendChild(tableWrapper);
|
| 170 |
-
|
| 171 |
-
if (perTaskDataTable) {
|
| 172 |
-
perTaskDataTable.destroy();
|
| 173 |
-
}
|
| 174 |
-
|
| 175 |
-
perTaskDataTable = new DataTable('.fine-tasks-results-table', {
|
| 176 |
-
perPage: 10,
|
| 177 |
-
perPageSelect: false,
|
| 178 |
-
searchable: false,
|
| 179 |
-
sortable: true,
|
| 180 |
-
fixedHeight: true,
|
| 181 |
-
labels: {
|
| 182 |
-
info: '' // This removes the "Showing 1 to X of Y entries" text
|
| 183 |
-
}
|
| 184 |
-
});
|
| 185 |
-
|
| 186 |
-
})
|
| 187 |
-
.catch(error => {
|
| 188 |
-
console.error('Error fetching CSV:', error);
|
| 189 |
-
perTaskTableContainer.innerHTML = `<p>Error loading data: ${error.message}</p>`;
|
| 190 |
-
});
|
| 191 |
}
|
| 192 |
|
| 193 |
-
const
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
const
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
// Language control group
|
| 207 |
-
const languageControlGroup = document.createElement('div');
|
| 208 |
-
languageControlGroup.className = 'control-group';
|
| 209 |
-
const languageLabelPerTask = document.createElement('label');
|
| 210 |
-
languageLabelPerTask.textContent = 'Language: ';
|
| 211 |
-
const languageDropdownPerTask = createDropdown(Object.keys(languageMap), updatePerTaskResults);
|
| 212 |
-
languageControlGroup.appendChild(languageLabelPerTask);
|
| 213 |
-
languageControlGroup.appendChild(languageDropdownPerTask);
|
| 214 |
-
|
| 215 |
-
// Table type control group
|
| 216 |
-
const tableTypeControlGroup = document.createElement('div');
|
| 217 |
-
tableTypeControlGroup.className = 'control-group';
|
| 218 |
-
const tableTypeLabelPerTask = document.createElement('label');
|
| 219 |
-
tableTypeLabelPerTask.textContent = 'Type: ';
|
| 220 |
-
const tableTypeDropdownPerTask = createDropdown(tableTypes, updatePerTaskResults);
|
| 221 |
-
tableTypeControlGroup.appendChild(tableTypeLabelPerTask);
|
| 222 |
-
tableTypeControlGroup.appendChild(tableTypeDropdownPerTask);
|
| 223 |
-
|
| 224 |
-
// Metric control group
|
| 225 |
-
const metricControlGroup = document.createElement('div');
|
| 226 |
-
metricControlGroup.className = 'control-group';
|
| 227 |
-
const metricLabelPerTask = document.createElement('label');
|
| 228 |
-
metricLabelPerTask.textContent = 'Criteria: ';
|
| 229 |
-
const metricDropdownPerTask = createDropdown(metricTypes, updatePerTaskResults);
|
| 230 |
-
metricDropdownPerTask.value = 'max_score'; // Set default to Max Score
|
| 231 |
-
metricControlGroup.appendChild(metricLabelPerTask);
|
| 232 |
-
metricControlGroup.appendChild(metricDropdownPerTask);
|
| 233 |
-
|
| 234 |
-
perTaskControls.appendChild(taskFolderControlGroup);
|
| 235 |
-
perTaskControls.appendChild(languageControlGroup);
|
| 236 |
-
perTaskControls.appendChild(tableTypeControlGroup);
|
| 237 |
-
perTaskControls.appendChild(metricControlGroup);
|
| 238 |
-
|
| 239 |
-
container.appendChild(perTaskControls);
|
| 240 |
-
// container.appendChild(perTaskTitleElement);
|
| 241 |
-
container.appendChild(perTaskTableContainer);
|
| 242 |
-
|
| 243 |
-
// Initialize with default values
|
| 244 |
-
updatePerTaskResults();
|
| 245 |
}
|
|
|
|
|
|
| 1 |
+
import Plotly from 'plotly.js-basic-dist-min';
|
| 2 |
import Papa from 'papaparse';
|
| 3 |
+
import _ from 'lodash';
|
| 4 |
+
import { getColor } from './colors.mjs';
|
| 5 |
|
| 6 |
const languageMap = {
|
| 7 |
'Arabic': 'ar',
|
|
|
|
| 12 |
'Thai': 'th',
|
| 13 |
'Chinese': 'zh',
|
| 14 |
'French': 'fr',
|
| 15 |
+
'Hindi': 'hi'
|
| 16 |
};
|
| 17 |
|
| 18 |
+
const runNameMap = {
|
| 19 |
+
"orion": "Dataset-A",
|
| 20 |
+
"helios": "Dataset-B",
|
| 21 |
+
"lynx": "Dataset-C",
|
| 22 |
+
"aquila": "Dataset-D",
|
| 23 |
+
"commoncrawl": "CommonCrawl",
|
| 24 |
+
"baseline": "Baseline"
|
| 25 |
+
};
|
| 26 |
+
|
| 27 |
+
const taskLists = {
|
| 28 |
+
ar: ['acva_ara:_average', 'alfgahafa_mlqa_ara_cf', 'alghafa_arc_ara_cf:easy', 'alghafa_facts_ara_cf', 'alghafa_meta_dialects_ara_cf', 'alghafa_mmlu_ara_cf:_average', 'alghafa_openbookqa_ara_cf', 'alghafa_piqa_ara_cf', 'alghafa_race_ara_cf', 'alghafa_rating_sentiment_ara_cf', 'alghafa_rating_sentiment_no_neutral_ara_cf', 'alghafa_sciqa_ara_cf', 'alghafa_sentiment_ara_cf', 'arcd_ara', 'belebele_arb_Arab_cf', 'boolq_ara', 'exams_ara_cf:_average', 'mkqa_ara:_average', 'mlmm_arc_ara_cf:challenge', 'mlmm_hellaswag_ara_cf', 'mlmm_mmlu_ara_cf:_average', 'mlmm_truthfulqa_ara_cf:mc1', 'mlmm_truthfulqa_ara_cf:mc2', 'mlqa_ara', 'mmlu_ara_cf:_average', 'soqal_ara_cf', 'toxigen_ara_cf', 'tydiqa_ara', 'xcodah_ara_cf', 'xcopa_ara_cf', 'xcsqa_ara_cf', 'xnli2.0_ara_cf', 'xnli_ara_cf', 'xquad_ara', 'xstory_cloze_ara_cf'],
|
| 29 |
+
fr: ['belebele_fra_Latn_cf', 'community_boolq_fra_cf', 'exams_fra_cf:_average', 'fquadv2_fra', 'frenchbench_arc_fra_cf:challenge', 'frenchbench_hellaswag_fra_cf', 'meta_mmlu_fra_cf:_average', 'mintaka_fra', 'mkqa_fra:_average', 'mlmm_arc_fra_cf:challenge', 'mlmm_hellaswag_fra_cf', 'mlmm_mmlu_fra_cf:_average', 'mlmm_truthfulqa_fra_cf:mc1', 'mlmm_truthfulqa_fra_cf:mc2', 'pawsx_fra_cf', 'xcodah_fra_cf', 'xcsqa_fra_cf', 'xnli2.0_fra_cf', 'xwinograd_fra_cf'],
|
| 30 |
+
hi: ['belebele_hin_Deva_cf', 'community_arc_hin_cf:challenge', 'community_arc_hin_cf:easy', 'community_boolq_hin', 'community_hellaswag_hin_cf', 'indicnxnli_hin_cf', 'indicqa_hin', 'indicxcopa_hin_cf', 'meta_mmlu_hin_cf:_average', 'mintaka_hin', 'mlmm_arc_hin_cf:challenge', 'mlmm_hellaswag_hin_cf', 'mlmm_mmlu_hin_cf:_average', 'mlmm_truthfulqa_hin_cf:mc1', 'mlmm_truthfulqa_hin_cf:mc2', 'mlqa_hin', 'xcodah_hin_cf', 'xcsqa_hin_cf', 'xnli2.0_hin_cf', 'xnli_hin_cf', 'xquad_hin', 'xstory_cloze_hin_cf'],
|
| 31 |
+
ru: ['belebele_rus_Cyrl_cf', 'chegeka_rus', 'mathlogic_qa_rus_cf', 'mera_openbookqa_rus_cf', 'mera_worldtree_rus_cf', 'mkqa_rus:_average', 'mlmm_arc_rus_cf:challenge', 'mlmm_hellaswag_rus_cf', 'mlmm_mmlu_rus_cf:_average', 'mlmm_truthfulqa_rus_cf:mc1', 'mlmm_truthfulqa_rus_cf:mc2', 'parus_rus_cf', 'rcb_rus_cf', 'rummlu_rus_cf:_average', 'sber_squad_rus', 'tydiqa_rus', 'xcodah_rus_cf', 'xcsqa_rus_cf', 'xnli2.0_rus_cf', 'xquad_rus', 'xstory_cloze_rus_cf', 'xwinograd_rus_cf'],
|
| 32 |
+
sw: ['afric_mmlu_swa_cf:_average', 'afric_xnli_swa_cf', 'belebele_swh_Latn_cf', 'community_arc_swa_cf:challenge', 'community_arc_swa_cf:easy', 'community_mmlu_swa_cf', 'kenswquad_swa', 'm3exams_swa_cf', 'openai_mmlu_swa_cf:_average', 'tydiqa_swa', 'xcodah_swa_cf', 'xcopa_swa_cf', 'xcsqa_swa_cf', 'xnli2.0_swa_cf', 'xnli_swa_cf', 'xstory_cloze_swa_cf'],
|
| 33 |
+
te: ['belebele_tel_Telu_cf', 'community_hellaswag_tel_cf', 'indicnxnli_tel_cf', 'indicqa_tel', 'indicxcopa_tel_cf', 'mlmm_arc_tel_cf:challenge', 'mlmm_hellaswag_tel_cf', 'mlmm_mmlu_tel_cf:_average', 'mlmm_truthfulqa_tel_cf:mc1', 'mlmm_truthfulqa_tel_cf:mc2', 'tydiqa_tel', 'xstory_cloze_tel_cf'],
|
| 34 |
+
th: ['belebele_tha_Thai_cf', 'community_hellaswag_tha_cf', 'm3exams_tha_cf', 'meta_mmlu_tha_cf:_average', 'mkqa_tha:_average', 'thai_exams_tha_cf:_average', 'thai_exams_tha_cf:tgat', 'thaiqa_tha', 'wsci_tha_cf', 'xcopa_tha_cf', 'xnli2.0_tha_cf', 'xnli_tha_cf', 'xquad_tha'],
|
| 35 |
+
tr: ['belebele_tur_Latn_cf', 'community_arc_tur_cf:easy', 'community_hellaswag_tur_cf', 'community_mmlu_tur_cf:_average', 'community_truthfulqa_tur_cf:mc1', 'community_truthfulqa_tur_cf:mc2', 'community_xwinograd_tur_cf', 'exams_tur_cf:_average', 'mkqa_tur:_average', 'tquadv2_tur', 'xcopa_tur_cf', 'xnli2.0_tur_cf', 'xnli_tur_cf', 'xquad_tur'],
|
| 36 |
+
zh: ['agieval_zho_cf:_average', 'belebele_zho_Hans_cf', 'c3_zho_cf', 'ceval_zho_cf:_average', 'chinese_squad_zho', 'cmath_zho_cf', 'cmmlu_zho_cf:_average', 'cmnli_zho_cf', 'cmrc2018_zho', 'm3exams_zho_cf', 'mkqa_zho:_average', 'mlmm_arc_zho_cf:challenge', 'mlmm_hellaswag_zho_cf', 'mlmm_mmlu_zho_cf:_average', 'mlmm_truthfulqa_zho_cf:mc1', 'mlmm_truthfulqa_zho_cf:mc2', 'ocnli_zho_cf', 'pawsx_zho_cf', 'xcodah_zho_cf', 'xcopa_zho_cf', 'xcsqa_zho_cf', 'xnli2.0_zho_cf', 'xnli_zho_cf', 'xquad_zho', 'xstory_cloze_zho_cf', 'xwinograd_zho_cf']
|
| 37 |
+
};
|
| 38 |
+
|
| 39 |
+
const LINE_SETTINGS = {
|
| 40 |
+
width: 2.5,
|
| 41 |
+
type: "scatter",
|
| 42 |
+
mode: "lines+markers",
|
| 43 |
+
};
|
| 44 |
+
|
| 45 |
+
const DEFAULT_LAYOUT = {
|
| 46 |
+
font: {
|
| 47 |
+
family: "apple-system, Arial, sans-serif",
|
| 48 |
+
},
|
| 49 |
+
title: {
|
| 50 |
+
font: {
|
| 51 |
+
size: 15,
|
| 52 |
+
},
|
| 53 |
+
},
|
| 54 |
+
xaxis: {
|
| 55 |
+
title: {
|
| 56 |
+
text: "Training Tokens (billions)",
|
| 57 |
+
font: {
|
| 58 |
+
size: 14,
|
| 59 |
+
},
|
| 60 |
+
},
|
| 61 |
+
tickfont: {
|
| 62 |
+
size: 12,
|
| 63 |
+
},
|
| 64 |
+
showgrid: false,
|
| 65 |
+
mirror: true,
|
| 66 |
+
ticks: "outside",
|
| 67 |
+
showline: true,
|
| 68 |
+
},
|
| 69 |
+
yaxis: {
|
| 70 |
+
title: {
|
| 71 |
+
font: {
|
| 72 |
+
size: 14,
|
| 73 |
+
},
|
| 74 |
+
standoff: 10,
|
| 75 |
+
},
|
| 76 |
+
showgrid: false,
|
| 77 |
+
mirror: true,
|
| 78 |
+
ticks: "outside",
|
| 79 |
+
showline: true,
|
| 80 |
+
tickfont: {
|
| 81 |
+
size: 12,
|
| 82 |
+
},
|
| 83 |
+
},
|
| 84 |
+
height: 300, // You can adjust this value
|
| 85 |
+
autosize: true,
|
| 86 |
+
legend: {
|
| 87 |
+
orientation: 'h', // Set to 'h' for horizontal legend (required for columns)
|
| 88 |
+
yanchor: 'bottom',
|
| 89 |
+
y: 0, // Position at the bottom
|
| 90 |
+
xanchor: 'right',
|
| 91 |
+
x: 1, // Position at the right
|
| 92 |
+
traceorder: 'normal',
|
| 93 |
+
font: { size: 12 },
|
| 94 |
+
tracegroupgap: 0, // Space between legend items
|
| 95 |
+
bgcolor: 'rgba(255, 255, 255, 0.8)' // White background with 70% transparency (1 - 0.3 = 70%)
|
| 96 |
+
},
|
| 97 |
+
margin: {
|
| 98 |
+
t: 25,
|
| 99 |
+
b: 60,
|
| 100 |
+
l: 60,
|
| 101 |
+
r: 40,
|
| 102 |
+
},
|
| 103 |
+
};
|
| 104 |
+
|
| 105 |
+
export function initPlotApplets() {
|
| 106 |
+
const plotContainers = document.querySelectorAll('.task-signal-plot');
|
| 107 |
+
plotContainers.forEach(container => {
|
| 108 |
+
initPlotApplet(container);
|
| 109 |
+
});
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
function initPlotApplet(container) {
|
| 113 |
+
const defaultLanguage = container.dataset.language || 'Arabic';
|
| 114 |
+
const defaultTask = container.dataset.task || '';
|
| 115 |
+
const defaultMetric = container.dataset.metric || '';
|
| 116 |
+
const groupSeeds = container.dataset.groupSeeds === 'true';
|
| 117 |
+
const showControls = container.dataset.showControls === 'true';
|
| 118 |
+
const taskMetrics = (container.dataset.taskMetrics || 'monotonicity,snr,ordering,randomness').split(",");
|
| 119 |
+
|
| 120 |
+
const controls = createControls(container, defaultLanguage, defaultTask, defaultMetric, taskMetrics);
|
| 121 |
+
if (!showControls)
|
| 122 |
+
controls.style.display = 'none';
|
| 123 |
+
container.appendChild(controls);
|
| 124 |
+
|
| 125 |
+
const plotContainer = document.createElement('div');
|
| 126 |
+
plotContainer.className = 'plot-container';
|
| 127 |
+
container.appendChild(plotContainer);
|
| 128 |
+
|
| 129 |
+
const statsContainer = document.createElement('div');
|
| 130 |
+
statsContainer.className = 'stats-container';
|
| 131 |
+
container.appendChild(statsContainer);
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
// Create an initial empty plot
|
| 135 |
+
Plotly.newPlot(plotContainer, []);
|
| 136 |
+
|
| 137 |
+
// Set up the resize function
|
| 138 |
+
const resizePlot = () => {
|
| 139 |
+
const width = container.offsetWidth;
|
| 140 |
+
Plotly.relayout(plotContainer, { width: width });
|
| 141 |
+
};
|
| 142 |
+
|
| 143 |
+
// Add resize listener
|
| 144 |
+
window.addEventListener('resize', resizePlot);
|
| 145 |
+
|
| 146 |
+
// Initial resize
|
| 147 |
+
resizePlot();
|
| 148 |
+
|
| 149 |
+
// Load the initial data
|
| 150 |
+
updateLanguageTasks(container, defaultTask, defaultMetric, groupSeeds, taskMetrics);
|
| 151 |
+
}
|
| 152 |
+
|
| 153 |
+
function createControls(container, defaultLanguage, defaultTask, defaultMetric, taskMetrics) {
|
| 154 |
+
const controls = document.createElement('div');
|
| 155 |
+
controls.className = 'controls';
|
| 156 |
+
|
| 157 |
+
const languageSelect = createSelect('language', Object.keys(languageMap), () => updateLanguageTasks(container, '', '', true, taskMetrics));
|
| 158 |
+
languageSelect.value = defaultLanguage;
|
| 159 |
+
|
| 160 |
+
const taskSelect = createSelect('task', [], () => updateMetrics(container, '', true, taskMetrics));
|
| 161 |
+
const metricSelect = createSelect('metric', [], () => updatePlot(container, taskMetrics));
|
| 162 |
+
|
| 163 |
+
controls.appendChild(createControlGroup('Language:', languageSelect));
|
| 164 |
+
controls.appendChild(createControlGroup('Task:', taskSelect));
|
| 165 |
+
controls.appendChild(createControlGroup('Metric:', metricSelect));
|
| 166 |
+
|
| 167 |
+
return controls;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
function createSelect(id, options, onChangeHandler) {
|
| 171 |
const select = document.createElement('select');
|
| 172 |
+
select.id = id;
|
| 173 |
options.forEach(option => {
|
| 174 |
const optionElement = document.createElement('option');
|
| 175 |
+
optionElement.value = option;
|
| 176 |
+
optionElement.textContent = option;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
select.appendChild(optionElement);
|
| 178 |
});
|
| 179 |
+
select.addEventListener('change', onChangeHandler);
|
| 180 |
return select;
|
| 181 |
}
|
| 182 |
|
| 183 |
+
function createControlGroup(labelText, inputElement) {
|
| 184 |
+
const group = document.createElement('div');
|
| 185 |
+
group.className = 'control-group';
|
| 186 |
+
|
| 187 |
+
const label = document.createElement('label');
|
| 188 |
+
label.textContent = labelText;
|
| 189 |
+
label.className = 'control-label';
|
| 190 |
+
|
| 191 |
+
group.appendChild(label);
|
| 192 |
+
group.appendChild(inputElement);
|
| 193 |
+
|
| 194 |
+
return group;
|
| 195 |
+
}
|
| 196 |
|
| 197 |
+
async function updateLanguageTasks(container, defaultTask = '', defaultMetric = '', groupSeeds, taskMetrics) {
|
| 198 |
+
const languageSelect = container.querySelector('#language');
|
| 199 |
+
const taskSelect = container.querySelector('#task');
|
| 200 |
+
const language = languageSelect.value;
|
| 201 |
+
const langCode = languageMap[language];
|
| 202 |
|
| 203 |
+
taskSelect.innerHTML = '<option value="">Loading tasks...</option>';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
|
| 205 |
+
try {
|
| 206 |
+
const tasks = await getTasksForLanguage(langCode);
|
| 207 |
+
|
| 208 |
+
taskSelect.innerHTML = '';
|
| 209 |
+
if (tasks.length > 0) {
|
| 210 |
+
tasks.forEach(task => {
|
| 211 |
+
const option = document.createElement('option');
|
| 212 |
+
option.value = task;
|
| 213 |
+
option.textContent = truncateText(task, 25); // Reduced from 30 to 25
|
| 214 |
+
option.title = task; // Set full task name as title for tooltip
|
| 215 |
+
taskSelect.appendChild(option);
|
| 216 |
+
});
|
| 217 |
+
|
| 218 |
+
if (defaultTask && tasks.includes(defaultTask)) {
|
| 219 |
+
taskSelect.value = defaultTask;
|
| 220 |
+
} else {
|
| 221 |
+
taskSelect.selectedIndex = 0;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
await updateMetrics(container, defaultMetric, groupSeeds, taskMetrics);
|
| 225 |
+
} else {
|
| 226 |
+
taskSelect.innerHTML = '<option value="">No tasks available</option>';
|
| 227 |
+
clearPlot(container);
|
| 228 |
+
}
|
| 229 |
+
} catch (error) {
|
| 230 |
+
console.error('Error fetching tasks:', error);
|
| 231 |
+
taskSelect.innerHTML = '<option value="">Error loading tasks</option>';
|
| 232 |
+
clearPlot(container);
|
| 233 |
}
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
async function getTasksForLanguage(langCode) {
|
| 237 |
+
return taskLists[langCode] || [];
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
async function updateMetrics(container, defaultMetric = '', groupSeeds, taskMetrics) {
|
| 241 |
+
const language = container.querySelector('#language').value;
|
| 242 |
+
const task = container.querySelector('#task').value;
|
| 243 |
+
const langCode = languageMap[language];
|
| 244 |
+
const metricSelect = container.querySelector('#metric');
|
| 245 |
|
| 246 |
+
metricSelect.innerHTML = '<option value="">Loading metrics...</option>';
|
|
|
|
|
|
|
|
|
|
|
|
|
| 247 |
|
| 248 |
+
try {
|
| 249 |
+
const metrics = await getMetricsForTask(langCode, task);
|
| 250 |
+
|
| 251 |
+
metricSelect.innerHTML = '';
|
| 252 |
+
metrics.forEach(metric => {
|
| 253 |
+
const option = document.createElement('option');
|
| 254 |
+
option.value = metric;
|
| 255 |
+
option.textContent = metric;
|
| 256 |
+
metricSelect.appendChild(option);
|
| 257 |
+
});
|
| 258 |
+
|
| 259 |
+
if (defaultMetric && metrics.includes(defaultMetric)) {
|
| 260 |
+
metricSelect.value = defaultMetric;
|
| 261 |
+
} else if (metricSelect.options.length > 0) {
|
| 262 |
+
metricSelect.selectedIndex = 0;
|
| 263 |
}
|
| 264 |
+
|
| 265 |
+
await updatePlot(container, taskMetrics);
|
| 266 |
+
} catch (error) {
|
| 267 |
+
console.error('Error fetching metrics:', error);
|
| 268 |
+
metricSelect.innerHTML = '<option value="">Error loading metrics</option>';
|
| 269 |
+
clearPlot(container);
|
| 270 |
+
}
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
async function getMetricsForTask(langCode, task) {
|
| 274 |
+
return new Promise((resolve, reject) => {
|
| 275 |
+
Papa.parse(`data/${langCode}/${task}_stats.csv`, {
|
| 276 |
+
download: true,
|
| 277 |
+
header: true,
|
| 278 |
+
complete: function(results) {
|
| 279 |
+
const metrics = [...new Set(results.data.map(row => row.metric).filter(metric => metric))];
|
| 280 |
+
resolve(metrics);
|
| 281 |
+
},
|
| 282 |
+
error: function(error) {
|
| 283 |
+
console.error('Error fetching metrics:', error);
|
| 284 |
+
reject(error);
|
| 285 |
+
}
|
| 286 |
+
});
|
| 287 |
});
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
function updatePlot(container, taskMetrics) {
|
| 291 |
+
const language = container.querySelector('#language').value;
|
| 292 |
+
const task = container.querySelector('#task').value;
|
| 293 |
+
const metric = container.querySelector('#metric').value;
|
| 294 |
+
const title = container.dataset.title;
|
| 295 |
+
const langCode = languageMap[language];
|
| 296 |
+
|
| 297 |
+
if (!langCode || !task || !metric) {
|
| 298 |
+
clearPlot(container);
|
| 299 |
+
return;
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
const dataUrl = `data/${langCode}/${task}_data.csv`;
|
| 303 |
+
const statsUrl = `data/${langCode}/${task}_stats.csv`;
|
| 304 |
+
|
| 305 |
+
Promise.all([
|
| 306 |
+
new Promise((resolve, reject) => {
|
| 307 |
+
Papa.parse(dataUrl, {
|
| 308 |
+
download: true,
|
| 309 |
+
header: true,
|
| 310 |
+
dynamicTyping: true,
|
| 311 |
+
complete: resolve,
|
| 312 |
+
error: reject
|
| 313 |
+
});
|
| 314 |
+
}),
|
| 315 |
+
new Promise((resolve, reject) => {
|
| 316 |
+
Papa.parse(statsUrl, {
|
| 317 |
+
download: true,
|
| 318 |
+
header: true,
|
| 319 |
+
dynamicTyping: true,
|
| 320 |
+
complete: resolve,
|
| 321 |
+
error: reject
|
| 322 |
+
});
|
| 323 |
+
})
|
| 324 |
+
]).then(([dataResult, statsResult]) => {
|
| 325 |
+
const taskData = dataResult.data;
|
| 326 |
+
const statsData = statsResult.data;
|
| 327 |
+
plotData(container, taskData, statsData, metric, title, taskMetrics);
|
| 328 |
+
}).catch(error => {
|
| 329 |
+
console.error('Error parsing CSV:', error);
|
| 330 |
+
clearPlot(container);
|
| 331 |
+
});
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
function plotData(container, data, stats, metric, title, taskMetrics) {
|
| 335 |
+
const groupSeeds = container.dataset.groupSeeds === 'true';
|
| 336 |
+
const sortedData = sortDataByTokens(data);
|
| 337 |
+
const groupedData = groupDataByRunname(sortedData, groupSeeds, metric);
|
| 338 |
+
const interpolatedData = interpolateData(groupedData, metric);
|
| 339 |
+
const smoothedData = smoothData(interpolatedData, metric);
|
| 340 |
+
const traces = createTraces(smoothedData, metric);
|
| 341 |
+
|
| 342 |
+
const plotContainer = container.querySelector('.plot-container');
|
| 343 |
+
|
| 344 |
+
const layout = _.merge({}, DEFAULT_LAYOUT, {
|
| 345 |
+
title: { text: `${title}` },
|
| 346 |
+
xaxis: {
|
| 347 |
+
title: { text: 'Training Tokens (billions)' },
|
| 348 |
+
tickvals: [0, 5, 10, 15, 20, 25],
|
| 349 |
+
ticktext: ['0', '5B', '10B', '15B', '20B', '25B'],
|
| 350 |
+
tickangle: 45,
|
| 351 |
+
range: [0, 30], // Set the range to start from 0 and end at 30B
|
| 352 |
+
},
|
| 353 |
+
yaxis: {
|
| 354 |
+
title: { text: 'Score' },
|
| 355 |
+
range: [Math.min(...traces.flatMap(trace => trace.y)) * 0.95, Math.max(...traces.flatMap(trace => trace.y)) * 1.05], // Add 5% padding to the top and bottom
|
| 356 |
+
},
|
| 357 |
+
width: container.offsetWidth,
|
| 358 |
+
});
|
| 359 |
+
|
| 360 |
+
Plotly.newPlot(plotContainer, traces, layout, {responsive: true});
|
| 361 |
|
| 362 |
+
// Display statistics
|
| 363 |
+
displayStatistics(container, stats, metric, taskMetrics);
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
function displayStatistics(container, stats, metric, taskMetrics) {
|
| 367 |
+
const statsContainer = container.querySelector('.stats-container');
|
| 368 |
+
const metricStats = stats.find(stat => stat.metric === metric);
|
| 369 |
+
if (metricStats) {
|
| 370 |
+
statsContainer.innerHTML = `
|
| 371 |
+
<div class="compact-stats${taskMetrics.length === 1 ? '-single' : ''}">
|
| 372 |
+
${taskMetrics.includes('monotonicity') ? '<span title="Average Spearman Correlation">Monotonicity: ' + metricStats.avg_spearman.toFixed(2) + '</span>' : ''}
|
| 373 |
+
${taskMetrics.includes('snr') ? '<span title="Average Signal-to-Noise Ratio">Signal-to-Noise: ' + metricStats.avg_snr.toFixed(2) + '</span>' : ''}
|
| 374 |
+
${taskMetrics.includes('ordering') ? '<span title="Average Kendall Tau-a">Ordering Consistency: ' + metricStats.avg_kendall_tau_a.toFixed(2) + '</span>' : ''}
|
| 375 |
+
${taskMetrics.includes('randomness') ? '<span title="Max N Standard Deviations">Non-Randomness: ' + metricStats.max_n_std.toFixed(2) + '</span>' : ''}
|
| 376 |
+
</div>
|
| 377 |
+
`;
|
| 378 |
+
} else {
|
| 379 |
+
statsContainer.innerHTML = '<p>No statistics available for this metric.</p>';
|
| 380 |
+
}
|
| 381 |
+
}
|
| 382 |
+
|
| 383 |
+
function getReducedTickValues(tokens) {
|
| 384 |
+
const uniqueTokens = [...new Set(tokens)].sort((a, b) => a - b);
|
| 385 |
+
const tokenCount = uniqueTokens.length;
|
| 386 |
+
const targetTickCount = 10; // Adjust this value to increase/decrease the number of ticks
|
| 387 |
+
|
| 388 |
+
if (tokenCount <= targetTickCount) {
|
| 389 |
+
return uniqueTokens;
|
| 390 |
+
}
|
| 391 |
+
|
| 392 |
+
const stride = Math.ceil(tokenCount / targetTickCount);
|
| 393 |
+
return uniqueTokens.filter((_, index) => index % stride === 0);
|
| 394 |
+
}
|
| 395 |
+
|
| 396 |
+
function formatTickLabel(value) {
|
| 397 |
+
if (value >= 1e9) {
|
| 398 |
+
return (value / 1e9).toFixed(1) + 'B';
|
| 399 |
+
} else if (value >= 1e6) {
|
| 400 |
+
return (value / 1e6).toFixed(1) + 'M';
|
| 401 |
+
} else if (value >= 1e3) {
|
| 402 |
+
return (value / 1e3).toFixed(1) + 'K';
|
| 403 |
+
}
|
| 404 |
+
return value.toString();
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
function computeStatistics(data, metric) {
|
| 408 |
+
const stats = {
|
| 409 |
+
avg_spearman: 0,
|
| 410 |
+
avg_kendall_tau_a: 0,
|
| 411 |
+
avg_snr: 0,
|
| 412 |
+
max_n_std: 0
|
| 413 |
+
};
|
| 414 |
+
|
| 415 |
+
const baselineRun = Object.keys(data).find(key => key.toLowerCase().includes('baseline'));
|
| 416 |
+
const nonBaselineRuns = Object.keys(data).filter(key => key !== baselineRun);
|
| 417 |
+
|
| 418 |
+
// Compute statistics for each non-baseline run
|
| 419 |
+
nonBaselineRuns.forEach(run => {
|
| 420 |
+
const runData = data[run];
|
| 421 |
+
const tokens = runData.map(row => row.tokens);
|
| 422 |
+
const scores = runData.map(row => row[metric]);
|
| 423 |
+
|
| 424 |
+
// Spearman correlation
|
| 425 |
+
stats.avg_spearman += spearmanCorrelation(tokens, scores);
|
| 426 |
+
|
| 427 |
+
// Kendall Tau-a
|
| 428 |
+
const lastHalf = Math.floor(runData.length / 2);
|
| 429 |
+
const kendallTauValues = [];
|
| 430 |
+
for (let i = lastHalf; i < runData.length - 1; i++) {
|
| 431 |
+
kendallTauValues.push(kendallTauA(scores.slice(0, i + 1), scores.slice(0, i + 2)));
|
| 432 |
+
}
|
| 433 |
+
stats.avg_kendall_tau_a += _.mean(kendallTauValues);
|
| 434 |
+
|
| 435 |
+
// SNR and max_n_std
|
| 436 |
+
if (baselineRun) {
|
| 437 |
+
const baselineScores = data[baselineRun].map(row => row[metric]);
|
| 438 |
+
const stdDev = standardDeviation(scores);
|
| 439 |
+
stats.avg_snr += _.mean(scores) / stdDev;
|
| 440 |
+
stats.max_n_std = Math.max(stats.max_n_std, (_.max(scores) - _.mean(baselineScores)) / stdDev);
|
| 441 |
}
|
| 442 |
+
});
|
| 443 |
+
|
| 444 |
+
// Average the statistics
|
| 445 |
+
const numRuns = nonBaselineRuns.length;
|
| 446 |
+
stats.avg_spearman /= numRuns;
|
| 447 |
+
stats.avg_kendall_tau_a /= numRuns;
|
| 448 |
+
stats.avg_snr /= numRuns;
|
| 449 |
+
|
| 450 |
+
return stats;
|
| 451 |
+
}
|
| 452 |
+
|
| 453 |
+
function spearmanCorrelation(x, y) {
|
| 454 |
+
const n = x.length;
|
| 455 |
+
const rankX = rankData(x);
|
| 456 |
+
const rankY = rankData(y);
|
| 457 |
+
|
| 458 |
+
let sum_d_squared = 0;
|
| 459 |
+
for (let i = 0; i < n; i++) {
|
| 460 |
+
const d = rankX[i] - rankY[i];
|
| 461 |
+
sum_d_squared += d * d;
|
| 462 |
+
}
|
| 463 |
+
|
| 464 |
+
return 1 - (6 * sum_d_squared) / (n * (n * n - 1));
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
function rankData(data) {
|
| 468 |
+
const sorted = [...data].sort((a, b) => a - b);
|
| 469 |
+
return data.map(x => sorted.indexOf(x) + 1);
|
| 470 |
+
}
|
| 471 |
+
|
| 472 |
+
function kendallTauA(x, y) {
|
| 473 |
+
const n = x.length;
|
| 474 |
+
let concordant = 0;
|
| 475 |
+
let discordant = 0;
|
| 476 |
+
|
| 477 |
+
for (let i = 0; i < n; i++) {
|
| 478 |
+
for (let j = i + 1; j < n; j++) {
|
| 479 |
+
const sign_x = Math.sign(x[j] - x[i]);
|
| 480 |
+
const sign_y = Math.sign(y[j] - y[i]);
|
| 481 |
+
if (sign_x * sign_y > 0) concordant++;
|
| 482 |
+
else if (sign_x * sign_y < 0) discordant++;
|
| 483 |
+
}
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
return (concordant - discordant) / (n * (n - 1) / 2);
|
| 487 |
+
}
|
| 488 |
+
|
| 489 |
+
function standardDeviation(values) {
|
| 490 |
+
const mean = _.mean(values);
|
| 491 |
+
const squareDiffs = values.map(value => {
|
| 492 |
+
const diff = value - mean;
|
| 493 |
+
return diff * diff;
|
| 494 |
+
});
|
| 495 |
+
const avgSquareDiff = _.mean(squareDiffs);
|
| 496 |
+
return Math.sqrt(avgSquareDiff);
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
function interpolateData(data, metric) {
|
| 500 |
+
return _.mapValues(data, (rows) => {
|
| 501 |
+
const sortedRows = _.sortBy(rows, 'tokens');
|
| 502 |
+
const allTokens = _.uniq(_.flatMap(Object.values(data), rows => rows.map(r => r.tokens))).sort((a, b) => a - b);
|
| 503 |
|
| 504 |
+
return allTokens.map(token => {
|
| 505 |
+
const exactMatch = _.find(sortedRows, { tokens: token });
|
| 506 |
+
if (exactMatch) return exactMatch;
|
| 507 |
+
|
| 508 |
+
const lowerRow = _.findLast(sortedRows, r => r.tokens < token);
|
| 509 |
+
const upperRow = _.find(sortedRows, r => r.tokens > token);
|
| 510 |
+
|
| 511 |
+
if (!lowerRow) return { ...upperRow, tokens: token };
|
| 512 |
+
if (!upperRow) return { ...lowerRow, tokens: token };
|
| 513 |
+
|
| 514 |
+
const ratio = (token - lowerRow.tokens) / (upperRow.tokens - lowerRow.tokens);
|
| 515 |
+
const interpolatedMetric = lowerRow[metric] + (upperRow[metric] - lowerRow[metric]) * ratio;
|
| 516 |
+
|
| 517 |
+
return {
|
| 518 |
+
...lowerRow,
|
| 519 |
+
tokens: token,
|
| 520 |
+
[metric]: interpolatedMetric
|
| 521 |
+
};
|
|
|
|
|
|
|
| 522 |
});
|
| 523 |
});
|
| 524 |
+
}
|
| 525 |
|
| 526 |
+
function smoothData(data, metric, windowSize = 3) {
|
| 527 |
+
return _.mapValues(data, (rows) => {
|
| 528 |
+
return rows.map((row, index, array) => {
|
| 529 |
+
const window = array.slice(Math.max(0, index - windowSize + 1), index + 1);
|
| 530 |
+
const smoothedMetric = _.meanBy(window, r => r[metric]);
|
| 531 |
+
return { ...row, [metric]: smoothedMetric };
|
| 532 |
+
});
|
| 533 |
+
});
|
| 534 |
}
|
| 535 |
|
| 536 |
+
function sortDataByTokens(data) {
|
| 537 |
+
return _.sortBy(data, 'tokens');
|
| 538 |
+
}
|
| 539 |
|
| 540 |
+
function groupDataByRunname(data, groupSeeds, metric) {
|
| 541 |
+
// Remove null or undefined runs
|
| 542 |
+
data = data.filter(row => row.runname != null && row.runname !== 'null_undefined');
|
| 543 |
|
| 544 |
+
if (!groupSeeds) {
|
| 545 |
+
return _.groupBy(data, row => `${processRunName(row.runname)}_${row.seed}`);
|
| 546 |
+
}
|
| 547 |
|
| 548 |
+
const grouped = _.groupBy(data, row => processRunName(row.runname));
|
| 549 |
+
|
| 550 |
+
return _.mapValues(grouped, (rows) => {
|
| 551 |
+
const stepGroups = _.groupBy(rows, 'tokens');
|
| 552 |
+
return _.map(stepGroups, (stepRows) => {
|
| 553 |
+
const meanMetric = _.meanBy(stepRows, row => parseFloat(row[metric]) || 0);
|
| 554 |
+
return {
|
| 555 |
+
...stepRows[0],
|
| 556 |
+
[metric]: meanMetric
|
| 557 |
+
};
|
| 558 |
+
});
|
| 559 |
+
});
|
| 560 |
+
}
|
| 561 |
|
| 562 |
+
function processRunName(runname) {
|
| 563 |
+
for (const [key, value] of Object.entries(runNameMap)) {
|
| 564 |
+
if (runname.includes(key)) {
|
| 565 |
+
return value;
|
| 566 |
+
}
|
| 567 |
+
}
|
| 568 |
+
return runname;
|
| 569 |
+
}
|
| 570 |
|
| 571 |
+
function createTraces(groupedData, metric) {
|
| 572 |
+
const colorsMapping = new Map();
|
| 573 |
+
const sortedRunnames = Object.keys(groupedData).sort((a, b) => {
|
| 574 |
+
if (a.includes('baseline')) return 1;
|
| 575 |
+
if (b.includes('baseline')) return -1;
|
| 576 |
+
return a.localeCompare(b);
|
| 577 |
+
});
|
| 578 |
|
| 579 |
+
return sortedRunnames.map((runname, index) => {
|
| 580 |
+
const color = getColorForTrace(runname, colorsMapping, index);
|
| 581 |
+
return {
|
| 582 |
+
x: groupedData[runname].map(row => row.tokens),
|
| 583 |
+
y: groupedData[runname].map(row => row[metric]),
|
| 584 |
+
name: runname,
|
| 585 |
+
line: {
|
| 586 |
+
color: color,
|
| 587 |
+
shape: 'spline',
|
| 588 |
+
...LINE_SETTINGS
|
| 589 |
+
},
|
| 590 |
+
marker: {
|
| 591 |
+
color: color,
|
| 592 |
+
size: 6,
|
| 593 |
+
},
|
| 594 |
+
mode: 'lines+markers',
|
| 595 |
+
};
|
| 596 |
+
});
|
| 597 |
+
}
|
| 598 |
|
| 599 |
+
function getColorForTrace(traceName, colorsMapping, index) {
|
| 600 |
+
const reusedColor = colorsMapping.get(traceName);
|
| 601 |
+
if (reusedColor) {
|
| 602 |
+
return reusedColor;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 603 |
}
|
| 604 |
|
| 605 |
+
const color = getColor(index);
|
| 606 |
+
colorsMapping.set(traceName, color);
|
| 607 |
+
return color;
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
function clearPlot(container) {
|
| 611 |
+
const plotContainer = container.querySelector('.plot-container');
|
| 612 |
+
Plotly.purge(plotContainer);
|
| 613 |
+
}
|
| 614 |
+
|
| 615 |
+
function truncateText(text, maxLength) {
|
| 616 |
+
if (text.length <= maxLength) return text;
|
| 617 |
+
return text.substr(0, maxLength - 2) + '..';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 618 |
}
|
| 619 |
+
|
app/src/content/assets/finetasks/data/ar/acva_ara:_average_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee86019600943234de0d00cb7f2cfb5f08adea529e281c47fb11ab39e904fa14
|
| 3 |
+
size 26104
|
app/src/content/assets/finetasks/data/ar/acva_ara:_average_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79551f7eeb2579538604681929741203205e6150f95187ea5319e3e9671f634e
|
| 3 |
+
size 1078
|
app/src/content/assets/finetasks/data/ar/alfgahafa_mlqa_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0753a9fb838808ff6855bfcce87eb7d716d406dff82985e64bd72abf3e0eeed6
|
| 3 |
+
size 20564
|
app/src/content/assets/finetasks/data/ar/alfgahafa_mlqa_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b42cd429953188f1e3a2f61a3bbcd3aa669421bac407a5f2843b9ad3bc287b9b
|
| 3 |
+
size 903
|
app/src/content/assets/finetasks/data/ar/alghafa_arc_ara_cf:easy_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:810a68eb754b4f0a3acae2a34c311676c78d926fd88e34e1c0bb9be949e3aa20
|
| 3 |
+
size 18155
|
app/src/content/assets/finetasks/data/ar/alghafa_arc_ara_cf:easy_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:642e379750d340963d86ff023426787891d7cb494bf135c33be48c0c9897519f
|
| 3 |
+
size 908
|
app/src/content/assets/finetasks/data/ar/alghafa_exams_ara_cf:_average_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf7f6df15ba9e2c552c721bed4d292cf75a8bf6b3f3cbd5f65c9903b99e463d0
|
| 3 |
+
size 24386
|
app/src/content/assets/finetasks/data/ar/alghafa_exams_ara_cf:_average_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b25491622030909b5b075cd7744fcad61fabe7103253c14355710762cbdc6d6
|
| 3 |
+
size 928
|
app/src/content/assets/finetasks/data/ar/alghafa_facts_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3b1f369ae9a64e27702437a049456d90fff09c62133a0232cd146a19bfb1bba
|
| 3 |
+
size 17318
|
app/src/content/assets/finetasks/data/ar/alghafa_facts_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19159aa9195f4a9eebbc6a91431d65dce630ec97edf39e32fe3c0f8dc302e546
|
| 3 |
+
size 834
|
app/src/content/assets/finetasks/data/ar/alghafa_meta_dialects_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:714e3326ff11bfebb268366315fa4b5cf305b9cb8174c451db33773f5ac88d78
|
| 3 |
+
size 18138
|
app/src/content/assets/finetasks/data/ar/alghafa_meta_dialects_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7a37090de73b4fc41f1011e031ad56ff95e3883662275daf8c67656e166b5f9
|
| 3 |
+
size 935
|
app/src/content/assets/finetasks/data/ar/alghafa_mmlu_ara_cf:_average_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6959492567e052a2f9251d092f449dda7ed9118daca1441f5c146e6d2761e10c
|
| 3 |
+
size 23032
|
app/src/content/assets/finetasks/data/ar/alghafa_mmlu_ara_cf:_average_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f579eac42e69687634fa0d27ca738a80d5fd854b4dc33ef069210def32a7394
|
| 3 |
+
size 937
|
app/src/content/assets/finetasks/data/ar/alghafa_openbookqa_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9fe35704efef4b670a7f3ec6f64d64aa2e14f387caaa12b7c5da0eda18c4078a
|
| 3 |
+
size 22998
|
app/src/content/assets/finetasks/data/ar/alghafa_openbookqa_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e137221596d32ecd3f77c2f451c87e8ff8743a286816f974e7457290a2cfaec
|
| 3 |
+
size 925
|
app/src/content/assets/finetasks/data/ar/alghafa_piqa_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a436d5d6e494aa37f2f9c4e4f14c2376d97d1c48ed116ca4f9c6f65caf0fbc3f
|
| 3 |
+
size 18478
|
app/src/content/assets/finetasks/data/ar/alghafa_piqa_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f01a72195ce586f94d8dad8210e5accd5459bf83712ad968b149701dfe4b9e8
|
| 3 |
+
size 880
|
app/src/content/assets/finetasks/data/ar/alghafa_race_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80b15aea8264a8bdac278cf2ac0d07cbcc0e8c7ccac150ace4e26dd65471e6fc
|
| 3 |
+
size 18432
|
app/src/content/assets/finetasks/data/ar/alghafa_race_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80ae78c9142cf89d1be55977578c2ca041838a5dbaa736b40958a13730e46ae9
|
| 3 |
+
size 893
|
app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bdc28b21863e88fcc8fac6245d25da05db90d7da234708b341636400b2584769
|
| 3 |
+
size 18023
|
app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3291cd3da2406bdcc358764cf42ae1a21cf9d1c07b4daf5998e7695a09317c37
|
| 3 |
+
size 936
|
app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_no_neutral_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab37a3551307dadb3260bcd768b554954b7451493ce1e07bbaf8465d25f3e09b
|
| 3 |
+
size 16661
|
app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_no_neutral_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20a08683aa5cdd6a8a64efcffcf942a4a26dcf2550a2d770b0ff46b8c40ecffe
|
| 3 |
+
size 970
|
app/src/content/assets/finetasks/data/ar/alghafa_sciqa_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a29f2dcacf4f8d53096dddfbeb1f5cb8eb8e8a5354dc2f979a1908c5541ebcd6
|
| 3 |
+
size 23819
|
app/src/content/assets/finetasks/data/ar/alghafa_sciqa_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e6cac24425904dec66f4bb38aa34d65d0c1a6bc539baf5ea5300c5f7bc362626
|
| 3 |
+
size 894
|
app/src/content/assets/finetasks/data/ar/alghafa_sentiment_ara_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c69085a0ca2df0adc4f9ec3c3b9857adad82d6749f41d88e3d43ba16e6d936d3
|
| 3 |
+
size 17942
|
app/src/content/assets/finetasks/data/ar/alghafa_sentiment_ara_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b1db01058cac603bcb5b0a991b39d9499a3537ab0da1ad36eb1c3b317c8d5ff4
|
| 3 |
+
size 903
|
app/src/content/assets/finetasks/data/ar/arcd_ara_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7f5b91b32d5c9a58a34ac035fd9e880de1256f5d0c47edfed7fe591abed789fa
|
| 3 |
+
size 15849
|
app/src/content/assets/finetasks/data/ar/arcd_ara_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d15a351c2e4cf3dcc3372637baafd4821397bb7ab00c81704d91ec8b55e6a31
|
| 3 |
+
size 478
|
app/src/content/assets/finetasks/data/ar/belebele_arb_Arab_cf_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6464f54a9a933b4b47c9c513c907ae358909518998ad5db01d8580578b77a1c6
|
| 3 |
+
size 23912
|
app/src/content/assets/finetasks/data/ar/belebele_arb_Arab_cf_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fda4577c55a3298b9dc6f6a2e148aaf6a4038f8604811ca339f88d3c3f6e7573
|
| 3 |
+
size 903
|
app/src/content/assets/finetasks/data/ar/boolq_ara_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54675b0af158ca756a5c08ea1b6315f757df93827a3a107b4208b135bdf6d8db
|
| 3 |
+
size 18834
|
app/src/content/assets/finetasks/data/ar/boolq_ara_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:32457679a2dc38045522dab504c13590c7199bcb452f65acfcf337fbbb3bdc2c
|
| 3 |
+
size 1042
|
app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:challenge_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
|
| 3 |
+
size 14390
|
app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:challenge_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aabcf0d879390556fa664c0fb532afa47580407e37f2552026dbfffab89ebf57
|
| 3 |
+
size 469
|
app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:easy_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
|
| 3 |
+
size 14390
|
app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:easy_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d213b818d04764e3bd9a0d0ad57ab9e0e38fe26d8db51942c12c1b8eb92f3636
|
| 3 |
+
size 449
|
app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:challenge_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
|
| 3 |
+
size 14390
|
app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:challenge_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a7f3601c8b4750a98708960e1df4c574b2038821e4a5740837d52ad770bbbd3c
|
| 3 |
+
size 469
|
app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:easy_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
|
| 3 |
+
size 14390
|
app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:easy_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f25f52a5fe47096977b9dd294e354c65f54225ec87a7cde264933b5229ca0a67
|
| 3 |
+
size 449
|
app/src/content/assets/finetasks/data/ar/community_arc_tur_cf:easy_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
|
| 3 |
+
size 14390
|
app/src/content/assets/finetasks/data/ar/community_arc_tur_cf:easy_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad546f00bd725bea998a5fc4c6a870f43a1a4e7457bda42b110096b13a029fd8
|
| 3 |
+
size 449
|
app/src/content/assets/finetasks/data/ar/exams_ara_cf:_average_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:af146e0409fc3332f8f250a36caabb270e01ad48ad5d04dd539de86bdc8529ff
|
| 3 |
+
size 36571
|
app/src/content/assets/finetasks/data/ar/exams_ara_cf:_average_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dbd86d8c6c5922af78b295bbc8c89f483049a17500be20cb625565a2e599242
|
| 3 |
+
size 1717
|
app/src/content/assets/finetasks/data/ar/frenchbench_arc_fra_cf:challenge_data.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
|
| 3 |
+
size 14390
|
app/src/content/assets/finetasks/data/ar/frenchbench_arc_fra_cf:challenge_stats.csv
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2da7b002e63a530df289c706c5c56113d16427105d18cff9dc556f25feb7e5e5
|
| 3 |
+
size 477
|