Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
f976f1c
1
Parent(s):
5639a81
Add NPM field
Browse files- src/display/utils.py +14 -5
- src/leaderboard/read_evals.py +2 -2
src/display/utils.py
CHANGED
|
@@ -51,7 +51,7 @@ for task in Tasks:
|
|
| 51 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 52 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 53 |
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 54 |
-
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str",
|
| 55 |
auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
|
| 56 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 57 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
|
@@ -107,11 +107,15 @@ baseline_row = {
|
|
| 107 |
}
|
| 108 |
|
| 109 |
baseline_list = []
|
|
|
|
| 110 |
for task in Tasks:
|
| 111 |
baseline_row[task.value.col_name] = task.value.baseline
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
| 114 |
baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
|
|
| 115 |
|
| 116 |
#if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
| 117 |
baseline_row["🤗 Leaderboard Average"] = None
|
|
@@ -151,11 +155,16 @@ human_baseline_row = {
|
|
| 151 |
}
|
| 152 |
|
| 153 |
baseline_list = []
|
|
|
|
| 154 |
for task in Tasks:
|
| 155 |
human_baseline_row[task.value.col_name] = task.value.human_baseline
|
| 156 |
-
|
| 157 |
-
|
|
|
|
|
|
|
|
|
|
| 158 |
human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
|
|
|
| 159 |
#if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
| 160 |
human_baseline_row["🤗 Leaderboard Average"] = None
|
| 161 |
|
|
|
|
| 51 |
auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
|
| 52 |
auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
|
| 53 |
auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
|
| 54 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
|
| 55 |
auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
|
| 56 |
auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
|
| 57 |
auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
|
|
|
|
| 107 |
}
|
| 108 |
|
| 109 |
baseline_list = []
|
| 110 |
+
npm = []
|
| 111 |
for task in Tasks:
|
| 112 |
baseline_row[task.value.col_name] = task.value.baseline
|
| 113 |
+
res = task.value.baseline
|
| 114 |
+
if res is not None and (isinstance(res, float) or isinstance(res, int)):
|
| 115 |
+
baseline_list.append(res)
|
| 116 |
+
npm.append((res - task.value.baseline) / (100 - task.value.baseline))
|
| 117 |
baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
| 118 |
+
baseline_row[AutoEvalColumn.npm.name] = round(sum(npm) / len(npm), 2)
|
| 119 |
|
| 120 |
#if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
| 121 |
baseline_row["🤗 Leaderboard Average"] = None
|
|
|
|
| 155 |
}
|
| 156 |
|
| 157 |
baseline_list = []
|
| 158 |
+
npm = []
|
| 159 |
for task in Tasks:
|
| 160 |
human_baseline_row[task.value.col_name] = task.value.human_baseline
|
| 161 |
+
res = task.value.human_baseline
|
| 162 |
+
if res is None or not (isinstance(res, float) or isinstance(res, int)):
|
| 163 |
+
res = 95.0
|
| 164 |
+
baseline_list.append(res)
|
| 165 |
+
npm.append((res - task.value.baseline) / (100 - task.value.baseline))
|
| 166 |
human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
|
| 167 |
+
human_baseline_row[AutoEvalColumn.npm.name] = round(sum(npm) / len(npm), 2)
|
| 168 |
#if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
|
| 169 |
human_baseline_row["🤗 Leaderboard Average"] = None
|
| 170 |
|
src/leaderboard/read_evals.py
CHANGED
|
@@ -166,8 +166,8 @@ class EvalResult:
|
|
| 166 |
continue
|
| 167 |
average.append(res)
|
| 168 |
npm.append((res-task.value.baseline)*100.0 / (100.0-task.value.baseline))
|
| 169 |
-
average = sum(average)/len(average)
|
| 170 |
-
npm = sum(npm)/len(npm)
|
| 171 |
|
| 172 |
data_dict = {
|
| 173 |
"eval_name": self.eval_name, # not a column, just a save name,
|
|
|
|
| 166 |
continue
|
| 167 |
average.append(res)
|
| 168 |
npm.append((res-task.value.baseline)*100.0 / (100.0-task.value.baseline))
|
| 169 |
+
average = round(sum(average)/len(average), 2)
|
| 170 |
+
npm = round(sum(npm)/len(npm), 2)
|
| 171 |
|
| 172 |
data_dict = {
|
| 173 |
"eval_name": self.eval_name, # not a column, just a save name,
|