Commit
·
be645c2
1
Parent(s):
9fc574b
add
Browse files
src/leaderboard/read_evals.py
CHANGED
|
@@ -44,7 +44,7 @@ class EvalResult:
|
|
| 44 |
precision = Precision.from_str(config.get("model_dtype"))
|
| 45 |
|
| 46 |
# Get model and org
|
| 47 |
-
org_and_model = config.get("model_name", config.get("model_args", None))
|
| 48 |
org_and_model = org_and_model.split("/", 1)
|
| 49 |
|
| 50 |
if len(org_and_model) == 1:
|
|
@@ -66,7 +66,7 @@ class EvalResult:
|
|
| 66 |
if architectures:
|
| 67 |
architecture = ";".join(architectures)
|
| 68 |
|
| 69 |
-
print(data["results"])
|
| 70 |
# Extract results available in this file (some results are split in several files)
|
| 71 |
results = {}
|
| 72 |
for task in Tasks:
|
|
@@ -74,7 +74,7 @@ class EvalResult:
|
|
| 74 |
|
| 75 |
# We average all scores of a given metric (not all metrics are present in all files)
|
| 76 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
| 77 |
-
print(f"{task}: {accs}")
|
| 78 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
| 79 |
continue
|
| 80 |
|
|
@@ -177,12 +177,12 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
|
|
| 177 |
eval_results = {}
|
| 178 |
for model_result_filepath in model_result_filepaths:
|
| 179 |
# Creation of result
|
| 180 |
-
print(f"Model result filepath: {model_result_filepath}")
|
| 181 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 182 |
-
print(eval_result.results)
|
| 183 |
# print(eval_result)
|
| 184 |
eval_result.update_with_request_file(requests_path)
|
| 185 |
-
print(eval_result.results)
|
| 186 |
|
| 187 |
# Store results of same eval together
|
| 188 |
eval_name = eval_result.eval_name
|
|
|
|
| 44 |
precision = Precision.from_str(config.get("model_dtype"))
|
| 45 |
|
| 46 |
# Get model and org
|
| 47 |
+
org_and_model = data.get("model_name", config.get("model_name", config.get("model_args", None)))
|
| 48 |
org_and_model = org_and_model.split("/", 1)
|
| 49 |
|
| 50 |
if len(org_and_model) == 1:
|
|
|
|
| 66 |
if architectures:
|
| 67 |
architecture = ";".join(architectures)
|
| 68 |
|
| 69 |
+
# print(data["results"])
|
| 70 |
# Extract results available in this file (some results are split in several files)
|
| 71 |
results = {}
|
| 72 |
for task in Tasks:
|
|
|
|
| 74 |
|
| 75 |
# We average all scores of a given metric (not all metrics are present in all files)
|
| 76 |
accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
|
| 77 |
+
# print(f"{task}: {accs}")
|
| 78 |
if accs.size == 0 or any([acc is None for acc in accs]):
|
| 79 |
continue
|
| 80 |
|
|
|
|
| 177 |
eval_results = {}
|
| 178 |
for model_result_filepath in model_result_filepaths:
|
| 179 |
# Creation of result
|
| 180 |
+
# print(f"Model result filepath: {model_result_filepath}")
|
| 181 |
eval_result = EvalResult.init_from_json_file(model_result_filepath)
|
| 182 |
+
# print(eval_result.results)
|
| 183 |
# print(eval_result)
|
| 184 |
eval_result.update_with_request_file(requests_path)
|
| 185 |
+
# print(eval_result.results)
|
| 186 |
|
| 187 |
# Store results of same eval together
|
| 188 |
eval_name = eval_result.eval_name
|