Clémentine commited on
Commit
26ec6ee
·
1 Parent(s): 49f71ca
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app/public/finetasks +1 -0
  2. app/src/content/assets/finetasks/code.js +572 -198
  3. app/src/content/assets/finetasks/data/ar/acva_ara:_average_data.csv +3 -0
  4. app/src/content/assets/finetasks/data/ar/acva_ara:_average_stats.csv +3 -0
  5. app/src/content/assets/finetasks/data/ar/alfgahafa_mlqa_ara_cf_data.csv +3 -0
  6. app/src/content/assets/finetasks/data/ar/alfgahafa_mlqa_ara_cf_stats.csv +3 -0
  7. app/src/content/assets/finetasks/data/ar/alghafa_arc_ara_cf:easy_data.csv +3 -0
  8. app/src/content/assets/finetasks/data/ar/alghafa_arc_ara_cf:easy_stats.csv +3 -0
  9. app/src/content/assets/finetasks/data/ar/alghafa_exams_ara_cf:_average_data.csv +3 -0
  10. app/src/content/assets/finetasks/data/ar/alghafa_exams_ara_cf:_average_stats.csv +3 -0
  11. app/src/content/assets/finetasks/data/ar/alghafa_facts_ara_cf_data.csv +3 -0
  12. app/src/content/assets/finetasks/data/ar/alghafa_facts_ara_cf_stats.csv +3 -0
  13. app/src/content/assets/finetasks/data/ar/alghafa_meta_dialects_ara_cf_data.csv +3 -0
  14. app/src/content/assets/finetasks/data/ar/alghafa_meta_dialects_ara_cf_stats.csv +3 -0
  15. app/src/content/assets/finetasks/data/ar/alghafa_mmlu_ara_cf:_average_data.csv +3 -0
  16. app/src/content/assets/finetasks/data/ar/alghafa_mmlu_ara_cf:_average_stats.csv +3 -0
  17. app/src/content/assets/finetasks/data/ar/alghafa_openbookqa_ara_cf_data.csv +3 -0
  18. app/src/content/assets/finetasks/data/ar/alghafa_openbookqa_ara_cf_stats.csv +3 -0
  19. app/src/content/assets/finetasks/data/ar/alghafa_piqa_ara_cf_data.csv +3 -0
  20. app/src/content/assets/finetasks/data/ar/alghafa_piqa_ara_cf_stats.csv +3 -0
  21. app/src/content/assets/finetasks/data/ar/alghafa_race_ara_cf_data.csv +3 -0
  22. app/src/content/assets/finetasks/data/ar/alghafa_race_ara_cf_stats.csv +3 -0
  23. app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_ara_cf_data.csv +3 -0
  24. app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_ara_cf_stats.csv +3 -0
  25. app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_no_neutral_ara_cf_data.csv +3 -0
  26. app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_no_neutral_ara_cf_stats.csv +3 -0
  27. app/src/content/assets/finetasks/data/ar/alghafa_sciqa_ara_cf_data.csv +3 -0
  28. app/src/content/assets/finetasks/data/ar/alghafa_sciqa_ara_cf_stats.csv +3 -0
  29. app/src/content/assets/finetasks/data/ar/alghafa_sentiment_ara_cf_data.csv +3 -0
  30. app/src/content/assets/finetasks/data/ar/alghafa_sentiment_ara_cf_stats.csv +3 -0
  31. app/src/content/assets/finetasks/data/ar/arcd_ara_data.csv +3 -0
  32. app/src/content/assets/finetasks/data/ar/arcd_ara_stats.csv +3 -0
  33. app/src/content/assets/finetasks/data/ar/belebele_arb_Arab_cf_data.csv +3 -0
  34. app/src/content/assets/finetasks/data/ar/belebele_arb_Arab_cf_stats.csv +3 -0
  35. app/src/content/assets/finetasks/data/ar/boolq_ara_data.csv +3 -0
  36. app/src/content/assets/finetasks/data/ar/boolq_ara_stats.csv +3 -0
  37. app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:challenge_data.csv +3 -0
  38. app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:challenge_stats.csv +3 -0
  39. app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:easy_data.csv +3 -0
  40. app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:easy_stats.csv +3 -0
  41. app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:challenge_data.csv +3 -0
  42. app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:challenge_stats.csv +3 -0
  43. app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:easy_data.csv +3 -0
  44. app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:easy_stats.csv +3 -0
  45. app/src/content/assets/finetasks/data/ar/community_arc_tur_cf:easy_data.csv +3 -0
  46. app/src/content/assets/finetasks/data/ar/community_arc_tur_cf:easy_stats.csv +3 -0
  47. app/src/content/assets/finetasks/data/ar/exams_ara_cf:_average_data.csv +3 -0
  48. app/src/content/assets/finetasks/data/ar/exams_ara_cf:_average_stats.csv +3 -0
  49. app/src/content/assets/finetasks/data/ar/frenchbench_arc_fra_cf:challenge_data.csv +3 -0
  50. app/src/content/assets/finetasks/data/ar/frenchbench_arc_fra_cf:challenge_stats.csv +3 -0
app/public/finetasks ADDED
@@ -0,0 +1 @@
 
 
1
+ ../src/content/assets/finetasks
app/src/content/assets/finetasks/code.js CHANGED
@@ -1,5 +1,7 @@
 
1
  import Papa from 'papaparse';
2
- import { DataTable } from 'simple-datatables';
 
3
 
4
  const languageMap = {
5
  'Arabic': 'ar',
@@ -10,236 +12,608 @@ const languageMap = {
10
  'Thai': 'th',
11
  'Chinese': 'zh',
12
  'French': 'fr',
13
- 'Hindi': 'hi',
14
  };
15
 
16
- const metricTypes = [
17
- { value: 'max_score', label: 'Max Score' },
18
- { value: 'avg_snr', label: 'Low Noise' },
19
- { value: 'avg_spearman', label: 'Monotonicity' },
20
- { value: 'max_n_std', label: 'Non-Randomness' },
21
- { value: 'avg_kendall_tau_a', label: 'Ordering Consistency' }
22
- ];
23
-
24
- const tableTypes = [
25
- { value: 'gen', label: 'Generative' },
26
- { value: 'mc', label: 'Multichoice' }
27
- ];
28
-
29
- const taskFolders = [
30
- { value: 'selected', label: 'FineTasks' },
31
- { value: 'non_selected', label: 'Non-Selected' }
32
- ];
33
-
34
- function createDropdown(options, onChange) {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  const select = document.createElement('select');
 
36
  options.forEach(option => {
37
  const optionElement = document.createElement('option');
38
- if (typeof option === 'object' && option.value && option.label) {
39
- optionElement.value = option.value;
40
- optionElement.textContent = option.label;
41
- } else {
42
- optionElement.value = option;
43
- optionElement.textContent = option;
44
- }
45
  select.appendChild(optionElement);
46
  });
47
- select.addEventListener('change', onChange);
48
  return select;
49
  }
50
 
51
- function createPerTaskResultsTable(data, tableType, metric) {
52
- const tableWrapper = document.createElement('div');
53
- tableWrapper.className = 'table-wrapper fine-tasks-table-wrapper';
 
 
 
 
 
 
 
 
 
 
54
 
55
- const table = document.createElement('table');
56
- table.className = 'results-table fine-tasks-results-table';
 
 
 
57
 
58
- const columns = ['Task', 'Type', ...(tableType === 'gen' ? ['f1', 'prefix_match'] : ['acc', 'acc_norm', 'acc_norm_token', 'acc_norm_pmi'])];
59
-
60
- const columnNameMap = {
61
- // 'Task': 'Task',
62
- // 'Type': 'Type',
63
- // 'f1': 'f1',
64
- // 'prefix_match': 'prefix_match',
65
- // 'acc': 'acc',
66
- 'acc_norm': 'acc_char',
67
- 'acc_norm_token': 'acc_token',
68
- 'acc_norm_pmi': 'acc_pmi',
69
- 'prefix_match': 'prefix'
70
- };
71
 
72
- const taskMetricMap = {
73
- 'max_score': 'score',
74
- 'avg_snr': 'snr',
75
- 'avg_spearman': 'monotonicity',
76
- 'max_n_std': 'non-randomness',
77
- 'avg_kendall_tau_a': 'ordering'
78
- // 'avg_spearman': 'monotonicity',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
  }
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- const header = table.createTHead();
82
- const headerRow = header.insertRow();
83
- columns.forEach(column => {
84
- const th = document.createElement('th');
85
- th.textContent = columnNameMap[column] || column;
86
 
87
- if (th.textContent !== "Task" && th.textContent !== "Type") {
88
- th.textContent += " " + (taskMetricMap[metric] || metric);
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  }
90
- th.title = th.textContent;
91
- if (column === 'Type')
92
- th.style.width = '40px';
93
- headerRow.appendChild(th);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
- const body = table.createTBody();
97
- data.forEach(row => {
98
- if (Object.values(row).every(value => value === '' || value === undefined || value === null)) {
99
- return;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- const tr = body.insertRow();
103
- columns.forEach(column => {
104
- const td = tr.insertCell();
105
- let value = row[column];
106
- if (column === 'Task') {
107
- const fullTaskName = value; // Store the full task name
108
- const parts = value.split('|');
109
- value = parts.length > 1 ? parts[1] : value;
110
- value = value.split('_mcf')[0].split('_cf')[0];
111
- td.title = fullTaskName; // Set the title attribute to show the full name on hover
112
- } else if (column === 'Type') {
113
- // Keep the task type as is
114
- } else if (typeof value === 'number') {
115
- value = value.toFixed(2);
116
- } else if (value && !isNaN(parseFloat(value))) {
117
- value = parseFloat(value).toFixed(2);
118
- } else {
119
- value = '';
120
- }
121
- td.textContent = value;
122
  });
123
  });
 
124
 
125
- tableWrapper.appendChild(table);
126
- return tableWrapper;
 
 
 
 
 
 
127
  }
128
 
129
- export function initFineTasks(containerId) {
130
- const container = document.getElementById(containerId);
131
- if (!container) return;
132
 
133
- const perTaskTitleElement = document.createElement('h3');
134
- perTaskTitleElement.textContent = 'Task Results';
135
- perTaskTitleElement.className = 'fine-tasks-title';
136
 
137
- const perTaskTableContainer = document.createElement('div');
138
- perTaskTableContainer.className = 'table-container';
 
139
 
140
- let perTaskDataTable;
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
- function updatePerTaskResults() {
143
- const language = languageDropdownPerTask.value;
144
- const metric = metricDropdownPerTask.value;
145
- const tableType = tableTypeDropdownPerTask.value;
146
- const taskFolder = taskFolderDropdownPerTask.value;
 
 
 
147
 
148
- const languageCode = languageMap[language];
 
 
 
 
 
 
149
 
150
- if (!languageCode) {
151
- console.error(`Language code not found for ${language}`);
152
- perTaskTableContainer.innerHTML = `<p>Error: Language code not found for ${language}</p>`;
153
- return;
154
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- let url = `data/tasks/${taskFolder}/${languageCode}/${metric}/${tableType}_stats.csv`;
157
-
158
- fetch(url)
159
- .then(response => {
160
- if (!response.ok) {
161
- throw new Error(`HTTP error! status: ${response.status}`);
162
- }
163
- return response.text();
164
- })
165
- .then(csvText => {
166
- const results = Papa.parse(csvText, { header: true }).data;
167
- perTaskTableContainer.innerHTML = '';
168
- const tableWrapper = createPerTaskResultsTable(results, tableType, metric);
169
- perTaskTableContainer.appendChild(tableWrapper);
170
-
171
- if (perTaskDataTable) {
172
- perTaskDataTable.destroy();
173
- }
174
-
175
- perTaskDataTable = new DataTable('.fine-tasks-results-table', {
176
- perPage: 10,
177
- perPageSelect: false,
178
- searchable: false,
179
- sortable: true,
180
- fixedHeight: true,
181
- labels: {
182
- info: '' // This removes the "Showing 1 to X of Y entries" text
183
- }
184
- });
185
-
186
- })
187
- .catch(error => {
188
- console.error('Error fetching CSV:', error);
189
- perTaskTableContainer.innerHTML = `<p>Error loading data: ${error.message}</p>`;
190
- });
191
  }
192
 
193
- const perTaskControls = document.createElement('div');
194
- perTaskControls.className = 'controls fine-tasks-controls';
195
-
196
- // Task folder control group
197
- const taskFolderControlGroup = document.createElement('div');
198
- taskFolderControlGroup.className = 'control-group';
199
- const taskFolderLabelPerTask = document.createElement('label');
200
- taskFolderLabelPerTask.textContent = 'Task Set: ';
201
- const taskFolderDropdownPerTask = createDropdown(taskFolders, updatePerTaskResults);
202
- taskFolderDropdownPerTask.value = 'selected'; // Set default to FineTasks
203
- taskFolderControlGroup.appendChild(taskFolderLabelPerTask);
204
- taskFolderControlGroup.appendChild(taskFolderDropdownPerTask);
205
-
206
- // Language control group
207
- const languageControlGroup = document.createElement('div');
208
- languageControlGroup.className = 'control-group';
209
- const languageLabelPerTask = document.createElement('label');
210
- languageLabelPerTask.textContent = 'Language: ';
211
- const languageDropdownPerTask = createDropdown(Object.keys(languageMap), updatePerTaskResults);
212
- languageControlGroup.appendChild(languageLabelPerTask);
213
- languageControlGroup.appendChild(languageDropdownPerTask);
214
-
215
- // Table type control group
216
- const tableTypeControlGroup = document.createElement('div');
217
- tableTypeControlGroup.className = 'control-group';
218
- const tableTypeLabelPerTask = document.createElement('label');
219
- tableTypeLabelPerTask.textContent = 'Type: ';
220
- const tableTypeDropdownPerTask = createDropdown(tableTypes, updatePerTaskResults);
221
- tableTypeControlGroup.appendChild(tableTypeLabelPerTask);
222
- tableTypeControlGroup.appendChild(tableTypeDropdownPerTask);
223
-
224
- // Metric control group
225
- const metricControlGroup = document.createElement('div');
226
- metricControlGroup.className = 'control-group';
227
- const metricLabelPerTask = document.createElement('label');
228
- metricLabelPerTask.textContent = 'Criteria: ';
229
- const metricDropdownPerTask = createDropdown(metricTypes, updatePerTaskResults);
230
- metricDropdownPerTask.value = 'max_score'; // Set default to Max Score
231
- metricControlGroup.appendChild(metricLabelPerTask);
232
- metricControlGroup.appendChild(metricDropdownPerTask);
233
-
234
- perTaskControls.appendChild(taskFolderControlGroup);
235
- perTaskControls.appendChild(languageControlGroup);
236
- perTaskControls.appendChild(tableTypeControlGroup);
237
- perTaskControls.appendChild(metricControlGroup);
238
-
239
- container.appendChild(perTaskControls);
240
- // container.appendChild(perTaskTitleElement);
241
- container.appendChild(perTaskTableContainer);
242
-
243
- // Initialize with default values
244
- updatePerTaskResults();
245
  }
 
 
1
+ import Plotly from 'plotly.js-basic-dist-min';
2
  import Papa from 'papaparse';
3
+ import _ from 'lodash';
4
+ import { getColor } from './colors.mjs';
5
 
6
  const languageMap = {
7
  'Arabic': 'ar',
 
12
  'Thai': 'th',
13
  'Chinese': 'zh',
14
  'French': 'fr',
15
+ 'Hindi': 'hi'
16
  };
17
 
18
+ const runNameMap = {
19
+ "orion": "Dataset-A",
20
+ "helios": "Dataset-B",
21
+ "lynx": "Dataset-C",
22
+ "aquila": "Dataset-D",
23
+ "commoncrawl": "CommonCrawl",
24
+ "baseline": "Baseline"
25
+ };
26
+
27
+ const taskLists = {
28
+ ar: ['acva_ara:_average', 'alfgahafa_mlqa_ara_cf', 'alghafa_arc_ara_cf:easy', 'alghafa_facts_ara_cf', 'alghafa_meta_dialects_ara_cf', 'alghafa_mmlu_ara_cf:_average', 'alghafa_openbookqa_ara_cf', 'alghafa_piqa_ara_cf', 'alghafa_race_ara_cf', 'alghafa_rating_sentiment_ara_cf', 'alghafa_rating_sentiment_no_neutral_ara_cf', 'alghafa_sciqa_ara_cf', 'alghafa_sentiment_ara_cf', 'arcd_ara', 'belebele_arb_Arab_cf', 'boolq_ara', 'exams_ara_cf:_average', 'mkqa_ara:_average', 'mlmm_arc_ara_cf:challenge', 'mlmm_hellaswag_ara_cf', 'mlmm_mmlu_ara_cf:_average', 'mlmm_truthfulqa_ara_cf:mc1', 'mlmm_truthfulqa_ara_cf:mc2', 'mlqa_ara', 'mmlu_ara_cf:_average', 'soqal_ara_cf', 'toxigen_ara_cf', 'tydiqa_ara', 'xcodah_ara_cf', 'xcopa_ara_cf', 'xcsqa_ara_cf', 'xnli2.0_ara_cf', 'xnli_ara_cf', 'xquad_ara', 'xstory_cloze_ara_cf'],
29
+ fr: ['belebele_fra_Latn_cf', 'community_boolq_fra_cf', 'exams_fra_cf:_average', 'fquadv2_fra', 'frenchbench_arc_fra_cf:challenge', 'frenchbench_hellaswag_fra_cf', 'meta_mmlu_fra_cf:_average', 'mintaka_fra', 'mkqa_fra:_average', 'mlmm_arc_fra_cf:challenge', 'mlmm_hellaswag_fra_cf', 'mlmm_mmlu_fra_cf:_average', 'mlmm_truthfulqa_fra_cf:mc1', 'mlmm_truthfulqa_fra_cf:mc2', 'pawsx_fra_cf', 'xcodah_fra_cf', 'xcsqa_fra_cf', 'xnli2.0_fra_cf', 'xwinograd_fra_cf'],
30
+ hi: ['belebele_hin_Deva_cf', 'community_arc_hin_cf:challenge', 'community_arc_hin_cf:easy', 'community_boolq_hin', 'community_hellaswag_hin_cf', 'indicnxnli_hin_cf', 'indicqa_hin', 'indicxcopa_hin_cf', 'meta_mmlu_hin_cf:_average', 'mintaka_hin', 'mlmm_arc_hin_cf:challenge', 'mlmm_hellaswag_hin_cf', 'mlmm_mmlu_hin_cf:_average', 'mlmm_truthfulqa_hin_cf:mc1', 'mlmm_truthfulqa_hin_cf:mc2', 'mlqa_hin', 'xcodah_hin_cf', 'xcsqa_hin_cf', 'xnli2.0_hin_cf', 'xnli_hin_cf', 'xquad_hin', 'xstory_cloze_hin_cf'],
31
+ ru: ['belebele_rus_Cyrl_cf', 'chegeka_rus', 'mathlogic_qa_rus_cf', 'mera_openbookqa_rus_cf', 'mera_worldtree_rus_cf', 'mkqa_rus:_average', 'mlmm_arc_rus_cf:challenge', 'mlmm_hellaswag_rus_cf', 'mlmm_mmlu_rus_cf:_average', 'mlmm_truthfulqa_rus_cf:mc1', 'mlmm_truthfulqa_rus_cf:mc2', 'parus_rus_cf', 'rcb_rus_cf', 'rummlu_rus_cf:_average', 'sber_squad_rus', 'tydiqa_rus', 'xcodah_rus_cf', 'xcsqa_rus_cf', 'xnli2.0_rus_cf', 'xquad_rus', 'xstory_cloze_rus_cf', 'xwinograd_rus_cf'],
32
+ sw: ['afric_mmlu_swa_cf:_average', 'afric_xnli_swa_cf', 'belebele_swh_Latn_cf', 'community_arc_swa_cf:challenge', 'community_arc_swa_cf:easy', 'community_mmlu_swa_cf', 'kenswquad_swa', 'm3exams_swa_cf', 'openai_mmlu_swa_cf:_average', 'tydiqa_swa', 'xcodah_swa_cf', 'xcopa_swa_cf', 'xcsqa_swa_cf', 'xnli2.0_swa_cf', 'xnli_swa_cf', 'xstory_cloze_swa_cf'],
33
+ te: ['belebele_tel_Telu_cf', 'community_hellaswag_tel_cf', 'indicnxnli_tel_cf', 'indicqa_tel', 'indicxcopa_tel_cf', 'mlmm_arc_tel_cf:challenge', 'mlmm_hellaswag_tel_cf', 'mlmm_mmlu_tel_cf:_average', 'mlmm_truthfulqa_tel_cf:mc1', 'mlmm_truthfulqa_tel_cf:mc2', 'tydiqa_tel', 'xstory_cloze_tel_cf'],
34
+ th: ['belebele_tha_Thai_cf', 'community_hellaswag_tha_cf', 'm3exams_tha_cf', 'meta_mmlu_tha_cf:_average', 'mkqa_tha:_average', 'thai_exams_tha_cf:_average', 'thai_exams_tha_cf:tgat', 'thaiqa_tha', 'wsci_tha_cf', 'xcopa_tha_cf', 'xnli2.0_tha_cf', 'xnli_tha_cf', 'xquad_tha'],
35
+ tr: ['belebele_tur_Latn_cf', 'community_arc_tur_cf:easy', 'community_hellaswag_tur_cf', 'community_mmlu_tur_cf:_average', 'community_truthfulqa_tur_cf:mc1', 'community_truthfulqa_tur_cf:mc2', 'community_xwinograd_tur_cf', 'exams_tur_cf:_average', 'mkqa_tur:_average', 'tquadv2_tur', 'xcopa_tur_cf', 'xnli2.0_tur_cf', 'xnli_tur_cf', 'xquad_tur'],
36
+ zh: ['agieval_zho_cf:_average', 'belebele_zho_Hans_cf', 'c3_zho_cf', 'ceval_zho_cf:_average', 'chinese_squad_zho', 'cmath_zho_cf', 'cmmlu_zho_cf:_average', 'cmnli_zho_cf', 'cmrc2018_zho', 'm3exams_zho_cf', 'mkqa_zho:_average', 'mlmm_arc_zho_cf:challenge', 'mlmm_hellaswag_zho_cf', 'mlmm_mmlu_zho_cf:_average', 'mlmm_truthfulqa_zho_cf:mc1', 'mlmm_truthfulqa_zho_cf:mc2', 'ocnli_zho_cf', 'pawsx_zho_cf', 'xcodah_zho_cf', 'xcopa_zho_cf', 'xcsqa_zho_cf', 'xnli2.0_zho_cf', 'xnli_zho_cf', 'xquad_zho', 'xstory_cloze_zho_cf', 'xwinograd_zho_cf']
37
+ };
38
+
39
+ const LINE_SETTINGS = {
40
+ width: 2.5,
41
+ type: "scatter",
42
+ mode: "lines+markers",
43
+ };
44
+
45
+ const DEFAULT_LAYOUT = {
46
+ font: {
47
+ family: "apple-system, Arial, sans-serif",
48
+ },
49
+ title: {
50
+ font: {
51
+ size: 15,
52
+ },
53
+ },
54
+ xaxis: {
55
+ title: {
56
+ text: "Training Tokens (billions)",
57
+ font: {
58
+ size: 14,
59
+ },
60
+ },
61
+ tickfont: {
62
+ size: 12,
63
+ },
64
+ showgrid: false,
65
+ mirror: true,
66
+ ticks: "outside",
67
+ showline: true,
68
+ },
69
+ yaxis: {
70
+ title: {
71
+ font: {
72
+ size: 14,
73
+ },
74
+ standoff: 10,
75
+ },
76
+ showgrid: false,
77
+ mirror: true,
78
+ ticks: "outside",
79
+ showline: true,
80
+ tickfont: {
81
+ size: 12,
82
+ },
83
+ },
84
+ height: 300, // You can adjust this value
85
+ autosize: true,
86
+ legend: {
87
+ orientation: 'h', // Set to 'h' for horizontal legend (required for columns)
88
+ yanchor: 'bottom',
89
+ y: 0, // Position at the bottom
90
+ xanchor: 'right',
91
+ x: 1, // Position at the right
92
+ traceorder: 'normal',
93
+ font: { size: 12 },
94
+ tracegroupgap: 0, // Space between legend items
95
+ bgcolor: 'rgba(255, 255, 255, 0.8)' // White background with 70% transparency (1 - 0.3 = 70%)
96
+ },
97
+ margin: {
98
+ t: 25,
99
+ b: 60,
100
+ l: 60,
101
+ r: 40,
102
+ },
103
+ };
104
+
105
+ export function initPlotApplets() {
106
+ const plotContainers = document.querySelectorAll('.task-signal-plot');
107
+ plotContainers.forEach(container => {
108
+ initPlotApplet(container);
109
+ });
110
+ }
111
+
112
+ function initPlotApplet(container) {
113
+ const defaultLanguage = container.dataset.language || 'Arabic';
114
+ const defaultTask = container.dataset.task || '';
115
+ const defaultMetric = container.dataset.metric || '';
116
+ const groupSeeds = container.dataset.groupSeeds === 'true';
117
+ const showControls = container.dataset.showControls === 'true';
118
+ const taskMetrics = (container.dataset.taskMetrics || 'monotonicity,snr,ordering,randomness').split(",");
119
+
120
+ const controls = createControls(container, defaultLanguage, defaultTask, defaultMetric, taskMetrics);
121
+ if (!showControls)
122
+ controls.style.display = 'none';
123
+ container.appendChild(controls);
124
+
125
+ const plotContainer = document.createElement('div');
126
+ plotContainer.className = 'plot-container';
127
+ container.appendChild(plotContainer);
128
+
129
+ const statsContainer = document.createElement('div');
130
+ statsContainer.className = 'stats-container';
131
+ container.appendChild(statsContainer);
132
+
133
+
134
+ // Create an initial empty plot
135
+ Plotly.newPlot(plotContainer, []);
136
+
137
+ // Set up the resize function
138
+ const resizePlot = () => {
139
+ const width = container.offsetWidth;
140
+ Plotly.relayout(plotContainer, { width: width });
141
+ };
142
+
143
+ // Add resize listener
144
+ window.addEventListener('resize', resizePlot);
145
+
146
+ // Initial resize
147
+ resizePlot();
148
+
149
+ // Load the initial data
150
+ updateLanguageTasks(container, defaultTask, defaultMetric, groupSeeds, taskMetrics);
151
+ }
152
+
153
+ function createControls(container, defaultLanguage, defaultTask, defaultMetric, taskMetrics) {
154
+ const controls = document.createElement('div');
155
+ controls.className = 'controls';
156
+
157
+ const languageSelect = createSelect('language', Object.keys(languageMap), () => updateLanguageTasks(container, '', '', true, taskMetrics));
158
+ languageSelect.value = defaultLanguage;
159
+
160
+ const taskSelect = createSelect('task', [], () => updateMetrics(container, '', true, taskMetrics));
161
+ const metricSelect = createSelect('metric', [], () => updatePlot(container, taskMetrics));
162
+
163
+ controls.appendChild(createControlGroup('Language:', languageSelect));
164
+ controls.appendChild(createControlGroup('Task:', taskSelect));
165
+ controls.appendChild(createControlGroup('Metric:', metricSelect));
166
+
167
+ return controls;
168
+ }
169
+
170
+ function createSelect(id, options, onChangeHandler) {
171
  const select = document.createElement('select');
172
+ select.id = id;
173
  options.forEach(option => {
174
  const optionElement = document.createElement('option');
175
+ optionElement.value = option;
176
+ optionElement.textContent = option;
 
 
 
 
 
177
  select.appendChild(optionElement);
178
  });
179
+ select.addEventListener('change', onChangeHandler);
180
  return select;
181
  }
182
 
183
+ function createControlGroup(labelText, inputElement) {
184
+ const group = document.createElement('div');
185
+ group.className = 'control-group';
186
+
187
+ const label = document.createElement('label');
188
+ label.textContent = labelText;
189
+ label.className = 'control-label';
190
+
191
+ group.appendChild(label);
192
+ group.appendChild(inputElement);
193
+
194
+ return group;
195
+ }
196
 
197
+ async function updateLanguageTasks(container, defaultTask = '', defaultMetric = '', groupSeeds, taskMetrics) {
198
+ const languageSelect = container.querySelector('#language');
199
+ const taskSelect = container.querySelector('#task');
200
+ const language = languageSelect.value;
201
+ const langCode = languageMap[language];
202
 
203
+ taskSelect.innerHTML = '<option value="">Loading tasks...</option>';
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ try {
206
+ const tasks = await getTasksForLanguage(langCode);
207
+
208
+ taskSelect.innerHTML = '';
209
+ if (tasks.length > 0) {
210
+ tasks.forEach(task => {
211
+ const option = document.createElement('option');
212
+ option.value = task;
213
+ option.textContent = truncateText(task, 25); // Reduced from 30 to 25
214
+ option.title = task; // Set full task name as title for tooltip
215
+ taskSelect.appendChild(option);
216
+ });
217
+
218
+ if (defaultTask && tasks.includes(defaultTask)) {
219
+ taskSelect.value = defaultTask;
220
+ } else {
221
+ taskSelect.selectedIndex = 0;
222
+ }
223
+
224
+ await updateMetrics(container, defaultMetric, groupSeeds, taskMetrics);
225
+ } else {
226
+ taskSelect.innerHTML = '<option value="">No tasks available</option>';
227
+ clearPlot(container);
228
+ }
229
+ } catch (error) {
230
+ console.error('Error fetching tasks:', error);
231
+ taskSelect.innerHTML = '<option value="">Error loading tasks</option>';
232
+ clearPlot(container);
233
  }
234
+ }
235
+
236
+ async function getTasksForLanguage(langCode) {
237
+ return taskLists[langCode] || [];
238
+ }
239
+
240
+ async function updateMetrics(container, defaultMetric = '', groupSeeds, taskMetrics) {
241
+ const language = container.querySelector('#language').value;
242
+ const task = container.querySelector('#task').value;
243
+ const langCode = languageMap[language];
244
+ const metricSelect = container.querySelector('#metric');
245
 
246
+ metricSelect.innerHTML = '<option value="">Loading metrics...</option>';
 
 
 
 
247
 
248
+ try {
249
+ const metrics = await getMetricsForTask(langCode, task);
250
+
251
+ metricSelect.innerHTML = '';
252
+ metrics.forEach(metric => {
253
+ const option = document.createElement('option');
254
+ option.value = metric;
255
+ option.textContent = metric;
256
+ metricSelect.appendChild(option);
257
+ });
258
+
259
+ if (defaultMetric && metrics.includes(defaultMetric)) {
260
+ metricSelect.value = defaultMetric;
261
+ } else if (metricSelect.options.length > 0) {
262
+ metricSelect.selectedIndex = 0;
263
  }
264
+
265
+ await updatePlot(container, taskMetrics);
266
+ } catch (error) {
267
+ console.error('Error fetching metrics:', error);
268
+ metricSelect.innerHTML = '<option value="">Error loading metrics</option>';
269
+ clearPlot(container);
270
+ }
271
+ }
272
+
273
+ async function getMetricsForTask(langCode, task) {
274
+ return new Promise((resolve, reject) => {
275
+ Papa.parse(`data/${langCode}/${task}_stats.csv`, {
276
+ download: true,
277
+ header: true,
278
+ complete: function(results) {
279
+ const metrics = [...new Set(results.data.map(row => row.metric).filter(metric => metric))];
280
+ resolve(metrics);
281
+ },
282
+ error: function(error) {
283
+ console.error('Error fetching metrics:', error);
284
+ reject(error);
285
+ }
286
+ });
287
  });
288
+ }
289
+
290
+ function updatePlot(container, taskMetrics) {
291
+ const language = container.querySelector('#language').value;
292
+ const task = container.querySelector('#task').value;
293
+ const metric = container.querySelector('#metric').value;
294
+ const title = container.dataset.title;
295
+ const langCode = languageMap[language];
296
+
297
+ if (!langCode || !task || !metric) {
298
+ clearPlot(container);
299
+ return;
300
+ }
301
+
302
+ const dataUrl = `data/${langCode}/${task}_data.csv`;
303
+ const statsUrl = `data/${langCode}/${task}_stats.csv`;
304
+
305
+ Promise.all([
306
+ new Promise((resolve, reject) => {
307
+ Papa.parse(dataUrl, {
308
+ download: true,
309
+ header: true,
310
+ dynamicTyping: true,
311
+ complete: resolve,
312
+ error: reject
313
+ });
314
+ }),
315
+ new Promise((resolve, reject) => {
316
+ Papa.parse(statsUrl, {
317
+ download: true,
318
+ header: true,
319
+ dynamicTyping: true,
320
+ complete: resolve,
321
+ error: reject
322
+ });
323
+ })
324
+ ]).then(([dataResult, statsResult]) => {
325
+ const taskData = dataResult.data;
326
+ const statsData = statsResult.data;
327
+ plotData(container, taskData, statsData, metric, title, taskMetrics);
328
+ }).catch(error => {
329
+ console.error('Error parsing CSV:', error);
330
+ clearPlot(container);
331
+ });
332
+ }
333
+
334
+ function plotData(container, data, stats, metric, title, taskMetrics) {
335
+ const groupSeeds = container.dataset.groupSeeds === 'true';
336
+ const sortedData = sortDataByTokens(data);
337
+ const groupedData = groupDataByRunname(sortedData, groupSeeds, metric);
338
+ const interpolatedData = interpolateData(groupedData, metric);
339
+ const smoothedData = smoothData(interpolatedData, metric);
340
+ const traces = createTraces(smoothedData, metric);
341
+
342
+ const plotContainer = container.querySelector('.plot-container');
343
+
344
+ const layout = _.merge({}, DEFAULT_LAYOUT, {
345
+ title: { text: `${title}` },
346
+ xaxis: {
347
+ title: { text: 'Training Tokens (billions)' },
348
+ tickvals: [0, 5, 10, 15, 20, 25],
349
+ ticktext: ['0', '5B', '10B', '15B', '20B', '25B'],
350
+ tickangle: 45,
351
+ range: [0, 30], // Set the range to start from 0 and end at 30B
352
+ },
353
+ yaxis: {
354
+ title: { text: 'Score' },
355
+ range: [Math.min(...traces.flatMap(trace => trace.y)) * 0.95, Math.max(...traces.flatMap(trace => trace.y)) * 1.05], // Add 5% padding to the top and bottom
356
+ },
357
+ width: container.offsetWidth,
358
+ });
359
+
360
+ Plotly.newPlot(plotContainer, traces, layout, {responsive: true});
361
 
362
+ // Display statistics
363
+ displayStatistics(container, stats, metric, taskMetrics);
364
+ }
365
+
366
+ function displayStatistics(container, stats, metric, taskMetrics) {
367
+ const statsContainer = container.querySelector('.stats-container');
368
+ const metricStats = stats.find(stat => stat.metric === metric);
369
+ if (metricStats) {
370
+ statsContainer.innerHTML = `
371
+ <div class="compact-stats${taskMetrics.length === 1 ? '-single' : ''}">
372
+ ${taskMetrics.includes('monotonicity') ? '<span title="Average Spearman Correlation">Monotonicity: ' + metricStats.avg_spearman.toFixed(2) + '</span>' : ''}
373
+ ${taskMetrics.includes('snr') ? '<span title="Average Signal-to-Noise Ratio">Signal-to-Noise: ' + metricStats.avg_snr.toFixed(2) + '</span>' : ''}
374
+ ${taskMetrics.includes('ordering') ? '<span title="Average Kendall Tau-a">Ordering Consistency: ' + metricStats.avg_kendall_tau_a.toFixed(2) + '</span>' : ''}
375
+ ${taskMetrics.includes('randomness') ? '<span title="Max N Standard Deviations">Non-Randomness: ' + metricStats.max_n_std.toFixed(2) + '</span>' : ''}
376
+ </div>
377
+ `;
378
+ } else {
379
+ statsContainer.innerHTML = '<p>No statistics available for this metric.</p>';
380
+ }
381
+ }
382
+
383
+ function getReducedTickValues(tokens) {
384
+ const uniqueTokens = [...new Set(tokens)].sort((a, b) => a - b);
385
+ const tokenCount = uniqueTokens.length;
386
+ const targetTickCount = 10; // Adjust this value to increase/decrease the number of ticks
387
+
388
+ if (tokenCount <= targetTickCount) {
389
+ return uniqueTokens;
390
+ }
391
+
392
+ const stride = Math.ceil(tokenCount / targetTickCount);
393
+ return uniqueTokens.filter((_, index) => index % stride === 0);
394
+ }
395
+
396
+ function formatTickLabel(value) {
397
+ if (value >= 1e9) {
398
+ return (value / 1e9).toFixed(1) + 'B';
399
+ } else if (value >= 1e6) {
400
+ return (value / 1e6).toFixed(1) + 'M';
401
+ } else if (value >= 1e3) {
402
+ return (value / 1e3).toFixed(1) + 'K';
403
+ }
404
+ return value.toString();
405
+ }
406
+
407
+ function computeStatistics(data, metric) {
408
+ const stats = {
409
+ avg_spearman: 0,
410
+ avg_kendall_tau_a: 0,
411
+ avg_snr: 0,
412
+ max_n_std: 0
413
+ };
414
+
415
+ const baselineRun = Object.keys(data).find(key => key.toLowerCase().includes('baseline'));
416
+ const nonBaselineRuns = Object.keys(data).filter(key => key !== baselineRun);
417
+
418
+ // Compute statistics for each non-baseline run
419
+ nonBaselineRuns.forEach(run => {
420
+ const runData = data[run];
421
+ const tokens = runData.map(row => row.tokens);
422
+ const scores = runData.map(row => row[metric]);
423
+
424
+ // Spearman correlation
425
+ stats.avg_spearman += spearmanCorrelation(tokens, scores);
426
+
427
+ // Kendall Tau-a
428
+ const lastHalf = Math.floor(runData.length / 2);
429
+ const kendallTauValues = [];
430
+ for (let i = lastHalf; i < runData.length - 1; i++) {
431
+ kendallTauValues.push(kendallTauA(scores.slice(0, i + 1), scores.slice(0, i + 2)));
432
+ }
433
+ stats.avg_kendall_tau_a += _.mean(kendallTauValues);
434
+
435
+ // SNR and max_n_std
436
+ if (baselineRun) {
437
+ const baselineScores = data[baselineRun].map(row => row[metric]);
438
+ const stdDev = standardDeviation(scores);
439
+ stats.avg_snr += _.mean(scores) / stdDev;
440
+ stats.max_n_std = Math.max(stats.max_n_std, (_.max(scores) - _.mean(baselineScores)) / stdDev);
441
  }
442
+ });
443
+
444
+ // Average the statistics
445
+ const numRuns = nonBaselineRuns.length;
446
+ stats.avg_spearman /= numRuns;
447
+ stats.avg_kendall_tau_a /= numRuns;
448
+ stats.avg_snr /= numRuns;
449
+
450
+ return stats;
451
+ }
452
+
453
+ function spearmanCorrelation(x, y) {
454
+ const n = x.length;
455
+ const rankX = rankData(x);
456
+ const rankY = rankData(y);
457
+
458
+ let sum_d_squared = 0;
459
+ for (let i = 0; i < n; i++) {
460
+ const d = rankX[i] - rankY[i];
461
+ sum_d_squared += d * d;
462
+ }
463
+
464
+ return 1 - (6 * sum_d_squared) / (n * (n * n - 1));
465
+ }
466
+
467
+ function rankData(data) {
468
+ const sorted = [...data].sort((a, b) => a - b);
469
+ return data.map(x => sorted.indexOf(x) + 1);
470
+ }
471
+
472
+ function kendallTauA(x, y) {
473
+ const n = x.length;
474
+ let concordant = 0;
475
+ let discordant = 0;
476
+
477
+ for (let i = 0; i < n; i++) {
478
+ for (let j = i + 1; j < n; j++) {
479
+ const sign_x = Math.sign(x[j] - x[i]);
480
+ const sign_y = Math.sign(y[j] - y[i]);
481
+ if (sign_x * sign_y > 0) concordant++;
482
+ else if (sign_x * sign_y < 0) discordant++;
483
+ }
484
+ }
485
+
486
+ return (concordant - discordant) / (n * (n - 1) / 2);
487
+ }
488
+
489
+ function standardDeviation(values) {
490
+ const mean = _.mean(values);
491
+ const squareDiffs = values.map(value => {
492
+ const diff = value - mean;
493
+ return diff * diff;
494
+ });
495
+ const avgSquareDiff = _.mean(squareDiffs);
496
+ return Math.sqrt(avgSquareDiff);
497
+ }
498
+
499
+ function interpolateData(data, metric) {
500
+ return _.mapValues(data, (rows) => {
501
+ const sortedRows = _.sortBy(rows, 'tokens');
502
+ const allTokens = _.uniq(_.flatMap(Object.values(data), rows => rows.map(r => r.tokens))).sort((a, b) => a - b);
503
 
504
+ return allTokens.map(token => {
505
+ const exactMatch = _.find(sortedRows, { tokens: token });
506
+ if (exactMatch) return exactMatch;
507
+
508
+ const lowerRow = _.findLast(sortedRows, r => r.tokens < token);
509
+ const upperRow = _.find(sortedRows, r => r.tokens > token);
510
+
511
+ if (!lowerRow) return { ...upperRow, tokens: token };
512
+ if (!upperRow) return { ...lowerRow, tokens: token };
513
+
514
+ const ratio = (token - lowerRow.tokens) / (upperRow.tokens - lowerRow.tokens);
515
+ const interpolatedMetric = lowerRow[metric] + (upperRow[metric] - lowerRow[metric]) * ratio;
516
+
517
+ return {
518
+ ...lowerRow,
519
+ tokens: token,
520
+ [metric]: interpolatedMetric
521
+ };
 
 
522
  });
523
  });
524
+ }
525
 
526
+ function smoothData(data, metric, windowSize = 3) {
527
+ return _.mapValues(data, (rows) => {
528
+ return rows.map((row, index, array) => {
529
+ const window = array.slice(Math.max(0, index - windowSize + 1), index + 1);
530
+ const smoothedMetric = _.meanBy(window, r => r[metric]);
531
+ return { ...row, [metric]: smoothedMetric };
532
+ });
533
+ });
534
  }
535
 
536
+ function sortDataByTokens(data) {
537
+ return _.sortBy(data, 'tokens');
538
+ }
539
 
540
+ function groupDataByRunname(data, groupSeeds, metric) {
541
+ // Remove null or undefined runs
542
+ data = data.filter(row => row.runname != null && row.runname !== 'null_undefined');
543
 
544
+ if (!groupSeeds) {
545
+ return _.groupBy(data, row => `${processRunName(row.runname)}_${row.seed}`);
546
+ }
547
 
548
+ const grouped = _.groupBy(data, row => processRunName(row.runname));
549
+
550
+ return _.mapValues(grouped, (rows) => {
551
+ const stepGroups = _.groupBy(rows, 'tokens');
552
+ return _.map(stepGroups, (stepRows) => {
553
+ const meanMetric = _.meanBy(stepRows, row => parseFloat(row[metric]) || 0);
554
+ return {
555
+ ...stepRows[0],
556
+ [metric]: meanMetric
557
+ };
558
+ });
559
+ });
560
+ }
561
 
562
+ function processRunName(runname) {
563
+ for (const [key, value] of Object.entries(runNameMap)) {
564
+ if (runname.includes(key)) {
565
+ return value;
566
+ }
567
+ }
568
+ return runname;
569
+ }
570
 
571
+ function createTraces(groupedData, metric) {
572
+ const colorsMapping = new Map();
573
+ const sortedRunnames = Object.keys(groupedData).sort((a, b) => {
574
+ if (a.includes('baseline')) return 1;
575
+ if (b.includes('baseline')) return -1;
576
+ return a.localeCompare(b);
577
+ });
578
 
579
+ return sortedRunnames.map((runname, index) => {
580
+ const color = getColorForTrace(runname, colorsMapping, index);
581
+ return {
582
+ x: groupedData[runname].map(row => row.tokens),
583
+ y: groupedData[runname].map(row => row[metric]),
584
+ name: runname,
585
+ line: {
586
+ color: color,
587
+ shape: 'spline',
588
+ ...LINE_SETTINGS
589
+ },
590
+ marker: {
591
+ color: color,
592
+ size: 6,
593
+ },
594
+ mode: 'lines+markers',
595
+ };
596
+ });
597
+ }
598
 
599
+ function getColorForTrace(traceName, colorsMapping, index) {
600
+ const reusedColor = colorsMapping.get(traceName);
601
+ if (reusedColor) {
602
+ return reusedColor;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
603
  }
604
 
605
+ const color = getColor(index);
606
+ colorsMapping.set(traceName, color);
607
+ return color;
608
+ }
609
+
610
+ function clearPlot(container) {
611
+ const plotContainer = container.querySelector('.plot-container');
612
+ Plotly.purge(plotContainer);
613
+ }
614
+
615
+ function truncateText(text, maxLength) {
616
+ if (text.length <= maxLength) return text;
617
+ return text.substr(0, maxLength - 2) + '..';
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
618
  }
619
+
app/src/content/assets/finetasks/data/ar/acva_ara:_average_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee86019600943234de0d00cb7f2cfb5f08adea529e281c47fb11ab39e904fa14
3
+ size 26104
app/src/content/assets/finetasks/data/ar/acva_ara:_average_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79551f7eeb2579538604681929741203205e6150f95187ea5319e3e9671f634e
3
+ size 1078
app/src/content/assets/finetasks/data/ar/alfgahafa_mlqa_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0753a9fb838808ff6855bfcce87eb7d716d406dff82985e64bd72abf3e0eeed6
3
+ size 20564
app/src/content/assets/finetasks/data/ar/alfgahafa_mlqa_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b42cd429953188f1e3a2f61a3bbcd3aa669421bac407a5f2843b9ad3bc287b9b
3
+ size 903
app/src/content/assets/finetasks/data/ar/alghafa_arc_ara_cf:easy_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:810a68eb754b4f0a3acae2a34c311676c78d926fd88e34e1c0bb9be949e3aa20
3
+ size 18155
app/src/content/assets/finetasks/data/ar/alghafa_arc_ara_cf:easy_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:642e379750d340963d86ff023426787891d7cb494bf135c33be48c0c9897519f
3
+ size 908
app/src/content/assets/finetasks/data/ar/alghafa_exams_ara_cf:_average_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf7f6df15ba9e2c552c721bed4d292cf75a8bf6b3f3cbd5f65c9903b99e463d0
3
+ size 24386
app/src/content/assets/finetasks/data/ar/alghafa_exams_ara_cf:_average_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b25491622030909b5b075cd7744fcad61fabe7103253c14355710762cbdc6d6
3
+ size 928
app/src/content/assets/finetasks/data/ar/alghafa_facts_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3b1f369ae9a64e27702437a049456d90fff09c62133a0232cd146a19bfb1bba
3
+ size 17318
app/src/content/assets/finetasks/data/ar/alghafa_facts_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19159aa9195f4a9eebbc6a91431d65dce630ec97edf39e32fe3c0f8dc302e546
3
+ size 834
app/src/content/assets/finetasks/data/ar/alghafa_meta_dialects_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:714e3326ff11bfebb268366315fa4b5cf305b9cb8174c451db33773f5ac88d78
3
+ size 18138
app/src/content/assets/finetasks/data/ar/alghafa_meta_dialects_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a37090de73b4fc41f1011e031ad56ff95e3883662275daf8c67656e166b5f9
3
+ size 935
app/src/content/assets/finetasks/data/ar/alghafa_mmlu_ara_cf:_average_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6959492567e052a2f9251d092f449dda7ed9118daca1441f5c146e6d2761e10c
3
+ size 23032
app/src/content/assets/finetasks/data/ar/alghafa_mmlu_ara_cf:_average_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f579eac42e69687634fa0d27ca738a80d5fd854b4dc33ef069210def32a7394
3
+ size 937
app/src/content/assets/finetasks/data/ar/alghafa_openbookqa_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fe35704efef4b670a7f3ec6f64d64aa2e14f387caaa12b7c5da0eda18c4078a
3
+ size 22998
app/src/content/assets/finetasks/data/ar/alghafa_openbookqa_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e137221596d32ecd3f77c2f451c87e8ff8743a286816f974e7457290a2cfaec
3
+ size 925
app/src/content/assets/finetasks/data/ar/alghafa_piqa_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a436d5d6e494aa37f2f9c4e4f14c2376d97d1c48ed116ca4f9c6f65caf0fbc3f
3
+ size 18478
app/src/content/assets/finetasks/data/ar/alghafa_piqa_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f01a72195ce586f94d8dad8210e5accd5459bf83712ad968b149701dfe4b9e8
3
+ size 880
app/src/content/assets/finetasks/data/ar/alghafa_race_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b15aea8264a8bdac278cf2ac0d07cbcc0e8c7ccac150ace4e26dd65471e6fc
3
+ size 18432
app/src/content/assets/finetasks/data/ar/alghafa_race_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80ae78c9142cf89d1be55977578c2ca041838a5dbaa736b40958a13730e46ae9
3
+ size 893
app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc28b21863e88fcc8fac6245d25da05db90d7da234708b341636400b2584769
3
+ size 18023
app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3291cd3da2406bdcc358764cf42ae1a21cf9d1c07b4daf5998e7695a09317c37
3
+ size 936
app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_no_neutral_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab37a3551307dadb3260bcd768b554954b7451493ce1e07bbaf8465d25f3e09b
3
+ size 16661
app/src/content/assets/finetasks/data/ar/alghafa_rating_sentiment_no_neutral_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20a08683aa5cdd6a8a64efcffcf942a4a26dcf2550a2d770b0ff46b8c40ecffe
3
+ size 970
app/src/content/assets/finetasks/data/ar/alghafa_sciqa_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a29f2dcacf4f8d53096dddfbeb1f5cb8eb8e8a5354dc2f979a1908c5541ebcd6
3
+ size 23819
app/src/content/assets/finetasks/data/ar/alghafa_sciqa_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6cac24425904dec66f4bb38aa34d65d0c1a6bc539baf5ea5300c5f7bc362626
3
+ size 894
app/src/content/assets/finetasks/data/ar/alghafa_sentiment_ara_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c69085a0ca2df0adc4f9ec3c3b9857adad82d6749f41d88e3d43ba16e6d936d3
3
+ size 17942
app/src/content/assets/finetasks/data/ar/alghafa_sentiment_ara_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1db01058cac603bcb5b0a991b39d9499a3537ab0da1ad36eb1c3b317c8d5ff4
3
+ size 903
app/src/content/assets/finetasks/data/ar/arcd_ara_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5b91b32d5c9a58a34ac035fd9e880de1256f5d0c47edfed7fe591abed789fa
3
+ size 15849
app/src/content/assets/finetasks/data/ar/arcd_ara_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d15a351c2e4cf3dcc3372637baafd4821397bb7ab00c81704d91ec8b55e6a31
3
+ size 478
app/src/content/assets/finetasks/data/ar/belebele_arb_Arab_cf_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6464f54a9a933b4b47c9c513c907ae358909518998ad5db01d8580578b77a1c6
3
+ size 23912
app/src/content/assets/finetasks/data/ar/belebele_arb_Arab_cf_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda4577c55a3298b9dc6f6a2e148aaf6a4038f8604811ca339f88d3c3f6e7573
3
+ size 903
app/src/content/assets/finetasks/data/ar/boolq_ara_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54675b0af158ca756a5c08ea1b6315f757df93827a3a107b4208b135bdf6d8db
3
+ size 18834
app/src/content/assets/finetasks/data/ar/boolq_ara_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32457679a2dc38045522dab504c13590c7199bcb452f65acfcf337fbbb3bdc2c
3
+ size 1042
app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:challenge_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
3
+ size 14390
app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:challenge_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aabcf0d879390556fa664c0fb532afa47580407e37f2552026dbfffab89ebf57
3
+ size 469
app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:easy_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
3
+ size 14390
app/src/content/assets/finetasks/data/ar/community_arc_hin_cf:easy_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d213b818d04764e3bd9a0d0ad57ab9e0e38fe26d8db51942c12c1b8eb92f3636
3
+ size 449
app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:challenge_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
3
+ size 14390
app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:challenge_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7f3601c8b4750a98708960e1df4c574b2038821e4a5740837d52ad770bbbd3c
3
+ size 469
app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:easy_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
3
+ size 14390
app/src/content/assets/finetasks/data/ar/community_arc_swa_cf:easy_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f25f52a5fe47096977b9dd294e354c65f54225ec87a7cde264933b5229ca0a67
3
+ size 449
app/src/content/assets/finetasks/data/ar/community_arc_tur_cf:easy_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
3
+ size 14390
app/src/content/assets/finetasks/data/ar/community_arc_tur_cf:easy_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad546f00bd725bea998a5fc4c6a870f43a1a4e7457bda42b110096b13a029fd8
3
+ size 449
app/src/content/assets/finetasks/data/ar/exams_ara_cf:_average_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af146e0409fc3332f8f250a36caabb270e01ad48ad5d04dd539de86bdc8529ff
3
+ size 36571
app/src/content/assets/finetasks/data/ar/exams_ara_cf:_average_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dbd86d8c6c5922af78b295bbc8c89f483049a17500be20cb625565a2e599242
3
+ size 1717
app/src/content/assets/finetasks/data/ar/frenchbench_arc_fra_cf:challenge_data.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2649f9802e39e19da555d2e42851281cca18826534d23246bffd8b15a43e326a
3
+ size 14390
app/src/content/assets/finetasks/data/ar/frenchbench_arc_fra_cf:challenge_stats.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2da7b002e63a530df289c706c5c56113d16427105d18cff9dc556f25feb7e5e5
3
+ size 477