Spaces:
Runtime error
Runtime error
Ahmed Ahmed
commited on
Commit
·
25de5ef
1
Parent(s):
c1fc4e2
consolidate
Browse files- src/display/utils.py +2 -3
- src/leaderboard/read_evals.py +6 -2
src/display/utils.py
CHANGED
|
@@ -28,9 +28,8 @@ auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "ma
|
|
| 28 |
#Scores
|
| 29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 30 |
for task in Tasks:
|
| 31 |
-
#
|
| 32 |
-
|
| 33 |
-
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(f"{task.value.col_name} {arrow}", "number", True)])
|
| 34 |
# Model information
|
| 35 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 36 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
|
|
|
| 28 |
#Scores
|
| 29 |
auto_eval_column_dict.append(["average", ColumnContent, ColumnContent("Average ⬆️", "number", True)])
|
| 30 |
for task in Tasks:
|
| 31 |
+
# All perplexity scores show with ⬇️ since lower is better
|
| 32 |
+
auto_eval_column_dict.append([task.name, ColumnContent, ColumnContent(f"{task.value.col_name} ⬇️", "number", True)])
|
|
|
|
| 33 |
# Model information
|
| 34 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 35 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -100,6 +100,10 @@ class EvalResult:
|
|
| 100 |
AutoEvalColumn.revision.name: self.revision,
|
| 101 |
AutoEvalColumn.average.name: average,
|
| 102 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
}
|
| 104 |
|
| 105 |
for task in Tasks:
|
|
@@ -107,9 +111,9 @@ class EvalResult:
|
|
| 107 |
if benchmark in self.results:
|
| 108 |
score = self.results[benchmark]
|
| 109 |
# Store original perplexity score (lower is better)
|
| 110 |
-
data_dict[task.value.col_name] = score
|
| 111 |
else:
|
| 112 |
-
data_dict[task.value.col_name] = None
|
| 113 |
|
| 114 |
return data_dict
|
| 115 |
|
|
|
|
| 100 |
AutoEvalColumn.revision.name: self.revision,
|
| 101 |
AutoEvalColumn.average.name: average,
|
| 102 |
AutoEvalColumn.still_on_hub.name: self.still_on_hub,
|
| 103 |
+
# Add missing columns with default values
|
| 104 |
+
AutoEvalColumn.license.name: "Unknown", # Default license
|
| 105 |
+
AutoEvalColumn.params.name: 0, # Default params
|
| 106 |
+
AutoEvalColumn.likes.name: 0, # Default likes
|
| 107 |
}
|
| 108 |
|
| 109 |
for task in Tasks:
|
|
|
|
| 111 |
if benchmark in self.results:
|
| 112 |
score = self.results[benchmark]
|
| 113 |
# Store original perplexity score (lower is better)
|
| 114 |
+
data_dict[f"{task.value.col_name} ⬇️"] = score
|
| 115 |
else:
|
| 116 |
+
data_dict[f"{task.value.col_name} ⬇️"] = None
|
| 117 |
|
| 118 |
return data_dict
|
| 119 |
|