Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alexis Gobé
commited on
Commit
·
71fffd4
1
Parent(s):
4a5f8bc
Change GPQA metric
Browse files
backend/app/services/leaderboard.py
CHANGED
|
@@ -91,7 +91,7 @@ class Task:
|
|
| 91 |
col_name: str
|
| 92 |
|
| 93 |
class Tasks(Enum):
|
| 94 |
-
task0 = Task("community|gpqa-fr|0", "
|
| 95 |
task1 = Task("community|ifeval-fr|0", "prompt_level_strict_acc", "IFEval-fr") # FIXME norm_acc should be acc # et "Suivi d'instructions"
|
| 96 |
task2 = Task("community|bac-fr|0", "bac-fr-qem", "bac-fr") # et "Suivi d'instructions"
|
| 97 |
|
|
@@ -187,7 +187,7 @@ class EvalResult:
|
|
| 187 |
#FIXME postprocessing of metrics is done here ftm
|
| 188 |
display = True # Do not display models evaluation if something went wrong (missing task, 0 score, ...)
|
| 189 |
if(task.col_name == "GPQA-fr"):
|
| 190 |
-
accs = np.array([v.get("
|
| 191 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
| 192 |
display = False
|
| 193 |
continue
|
|
@@ -268,7 +268,8 @@ class LeaderboardService:
|
|
| 268 |
|
| 269 |
for root, _, files in os.walk(results_path):
|
| 270 |
#FIXME We will remove this check when results we be homogeneous
|
| 271 |
-
folderName = "clearML-sprint1
|
|
|
|
| 272 |
normalized_root = os.path.normpath(root)
|
| 273 |
path_components = normalized_root.split(os.sep)
|
| 274 |
if folderName in path_components:
|
|
|
|
| 91 |
col_name: str
|
| 92 |
|
| 93 |
class Tasks(Enum):
|
| 94 |
+
task0 = Task("community|gpqa-fr|0", "new_acc", "GPQA-fr") # On pourrait vouloir mettre "Connaissances"
|
| 95 |
task1 = Task("community|ifeval-fr|0", "prompt_level_strict_acc", "IFEval-fr") # FIXME norm_acc should be acc # et "Suivi d'instructions"
|
| 96 |
task2 = Task("community|bac-fr|0", "bac-fr-qem", "bac-fr") # et "Suivi d'instructions"
|
| 97 |
|
|
|
|
| 187 |
#FIXME postprocessing of metrics is done here ftm
|
| 188 |
display = True # Do not display models evaluation if something went wrong (missing task, 0 score, ...)
|
| 189 |
if(task.col_name == "GPQA-fr"):
|
| 190 |
+
accs = np.array([v.get("new_acc", None) for k, v in data["results"].items() if task.benchmark == k])
|
| 191 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
| 192 |
display = False
|
| 193 |
continue
|
|
|
|
| 268 |
|
| 269 |
for root, _, files in os.walk(results_path):
|
| 270 |
#FIXME We will remove this check when results we be homogeneous
|
| 271 |
+
folderName = "clearML-sprint1.5"
|
| 272 |
+
|
| 273 |
normalized_root = os.path.normpath(root)
|
| 274 |
path_components = normalized_root.split(os.sep)
|
| 275 |
if folderName in path_components:
|