Robert
commited on
Commit
·
8fe5a80
1
Parent(s):
2827202
Small calculation fixes. Current exact match: 0.02, F1-score: 0.12
Browse files- base_model/main.py +2 -2
- base_model/retriever.py +4 -6
base_model/main.py
CHANGED
|
@@ -15,6 +15,6 @@ if __name__ == '__main__':
|
|
| 15 |
print() # Newline
|
| 16 |
|
| 17 |
# Compute overall performance
|
| 18 |
-
exact_match, f1_score
|
| 19 |
-
print(f"Exact match: {exact_match}
|
| 20 |
f"F1-score: {f1_score:.02f}")
|
|
|
|
| 15 |
print() # Newline
|
| 16 |
|
| 17 |
# Compute overall performance
|
| 18 |
+
exact_match, f1_score = r.evaluate()
|
| 19 |
+
print(f"Exact match: {exact_match:.02f}\n"
|
| 20 |
f"F1-score: {f1_score:.02f}")
|
base_model/retriever.py
CHANGED
|
@@ -7,7 +7,6 @@ from transformers import (
|
|
| 7 |
from datasets import load_dataset
|
| 8 |
import torch
|
| 9 |
import os.path
|
| 10 |
-
import numpy
|
| 11 |
|
| 12 |
import evaluate
|
| 13 |
|
|
@@ -125,9 +124,8 @@ class Retriever:
|
|
| 125 |
entire dataset.
|
| 126 |
|
| 127 |
Returns:
|
| 128 |
-
|
| 129 |
float: overall F1-score
|
| 130 |
-
int: total amount of questions handled
|
| 131 |
"""
|
| 132 |
questions_ds = load_dataset("GroNLP/ik-nlp-22_slp", name="questions")['test']
|
| 133 |
questions = questions_ds['question']
|
|
@@ -142,7 +140,7 @@ class Retriever:
|
|
| 142 |
scores += score[0]
|
| 143 |
predictions.append(result['text'][0])
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
|
| 148 |
-
return
|
|
|
|
| 7 |
from datasets import load_dataset
|
| 8 |
import torch
|
| 9 |
import os.path
|
|
|
|
| 10 |
|
| 11 |
import evaluate
|
| 12 |
|
|
|
|
| 124 |
entire dataset.
|
| 125 |
|
| 126 |
Returns:
|
| 127 |
+
float: overall exact match
|
| 128 |
float: overall F1-score
|
|
|
|
| 129 |
"""
|
| 130 |
questions_ds = load_dataset("GroNLP/ik-nlp-22_slp", name="questions")['test']
|
| 131 |
questions = questions_ds['question']
|
|
|
|
| 140 |
scores += score[0]
|
| 141 |
predictions.append(result['text'][0])
|
| 142 |
|
| 143 |
+
exact_matches = [evaluate.compute_exact_match(predictions[i], answers[i]) for i in range(len(answers))]
|
| 144 |
+
f1_scores = [evaluate.compute_f1(predictions[i], answers[i]) for i in range(len(answers))]
|
| 145 |
|
| 146 |
+
return sum(exact_matches) / len(exact_matches), sum(f1_scores) / len(f1_scores)
|