Spaces:
Sleeping
Sleeping
Commit
·
4e425e4
1
Parent(s):
6b89838
Fix wrong GED. Load all dataset at startup
Browse files
quiz.py
CHANGED
|
@@ -42,6 +42,15 @@ BENCHMARKS = {
|
|
| 42 |
},
|
| 43 |
}
|
| 44 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# Dataset specific preprocessing and standardization
|
| 47 |
def winogrande_preprocessing(sample):
|
|
@@ -65,7 +74,7 @@ def icelandic_sentence_gec_preprocessing(sample):
|
|
| 65 |
f"Inniheldur eftirfarandi málsgrein villu?<p style='margin-left: 25px;'><i>{sample['sentence']}</i></p>"
|
| 66 |
)
|
| 67 |
new_sample["options"] = "Villa", "Engin villa"
|
| 68 |
-
new_sample["answer"] = "Engin villa" if sample["correct"] else "Villa"
|
| 69 |
new_sample["instruction"] = "Valkostir"
|
| 70 |
return new_sample
|
| 71 |
|
|
@@ -161,12 +170,9 @@ class BenchmarkQuiz:
|
|
| 161 |
return self.state
|
| 162 |
|
| 163 |
def load_benchmark(self, benchmark_name: str) -> List[Dict[str, Any]]:
|
| 164 |
-
dataset =
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
split=BENCHMARKS[benchmark_name].get("split", "train"),
|
| 168 |
-
)
|
| 169 |
-
samples = random.sample(list(dataset), 5)
|
| 170 |
if benchmark_name == "icelandic-winogrande":
|
| 171 |
samples = [winogrande_preprocessing(sample) for sample in samples]
|
| 172 |
elif benchmark_name == "grammatical-error-detection":
|
|
|
|
| 42 |
},
|
| 43 |
}
|
| 44 |
|
| 45 |
+
DATASETS = {
|
| 46 |
+
dataset_name: load_dataset(
|
| 47 |
+
BENCHMARKS[dataset_name]["path"],
|
| 48 |
+
name=BENCHMARKS[dataset_name].get("config_name"),
|
| 49 |
+
split=BENCHMARKS[dataset_name].get("split", "train"),
|
| 50 |
+
)
|
| 51 |
+
for dataset_name in BENCHMARKS
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
|
| 55 |
# Dataset specific preprocessing and standardization
|
| 56 |
def winogrande_preprocessing(sample):
|
|
|
|
| 74 |
f"Inniheldur eftirfarandi málsgrein villu?<p style='margin-left: 25px;'><i>{sample['sentence']}</i></p>"
|
| 75 |
)
|
| 76 |
new_sample["options"] = "Villa", "Engin villa"
|
| 77 |
+
new_sample["answer"] = "Engin villa" if sample["correct"] == "false" else "Villa"
|
| 78 |
new_sample["instruction"] = "Valkostir"
|
| 79 |
return new_sample
|
| 80 |
|
|
|
|
| 170 |
return self.state
|
| 171 |
|
| 172 |
def load_benchmark(self, benchmark_name: str) -> List[Dict[str, Any]]:
|
| 173 |
+
dataset = DATASETS[benchmark_name]
|
| 174 |
+
random_indices = random.sample(range(len(dataset)), 5)
|
| 175 |
+
samples = dataset.select(random_indices)
|
|
|
|
|
|
|
|
|
|
| 176 |
if benchmark_name == "icelandic-winogrande":
|
| 177 |
samples = [winogrande_preprocessing(sample) for sample in samples]
|
| 178 |
elif benchmark_name == "grammatical-error-detection":
|