Spaces:
Sleeping
Sleeping
Catch wrong key for unordered_list or missing dict.
Browse files- test_of_time_accuracy.py +7 -2
- tests.py +4 -2
test_of_time_accuracy.py
CHANGED
|
@@ -120,6 +120,11 @@ class TestOfTimeAccuracy(evaluate.Metric):
|
|
| 120 |
except (ValueError, SyntaxError):
|
| 121 |
return None
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
def _compute(
|
| 124 |
self,
|
| 125 |
predictions,
|
|
@@ -139,8 +144,8 @@ class TestOfTimeAccuracy(evaluate.Metric):
|
|
| 139 |
accuracy = []
|
| 140 |
for i, j in zip(predictions, references):
|
| 141 |
if subset == "arithmetic" and "unordered_list" in j:
|
| 142 |
-
i =
|
| 143 |
-
j =
|
| 144 |
accuracy.append(
|
| 145 |
str(i) == str(j)
|
| 146 |
) # Semantic subset answer JSON somestimes has int as value. Label is string.
|
|
|
|
| 120 |
except (ValueError, SyntaxError):
|
| 121 |
return None
|
| 122 |
|
| 123 |
+
def _sort_unordered_list(self, d):
|
| 124 |
+
if isinstance(d, dict) and "unordered_list" in d:
|
| 125 |
+
return sorted(d["unordered_list"])
|
| 126 |
+
return d
|
| 127 |
+
|
| 128 |
def _compute(
|
| 129 |
self,
|
| 130 |
predictions,
|
|
|
|
| 144 |
accuracy = []
|
| 145 |
for i, j in zip(predictions, references):
|
| 146 |
if subset == "arithmetic" and "unordered_list" in j:
|
| 147 |
+
i = self._sort_unordered_list(i)
|
| 148 |
+
j = self._sort_unordered_list(j)
|
| 149 |
accuracy.append(
|
| 150 |
str(i) == str(j)
|
| 151 |
) # Semantic subset answer JSON somestimes has int as value. Label is string.
|
tests.py
CHANGED
|
@@ -6,15 +6,17 @@ arithmetic_test_cases = {
|
|
| 6 |
"predictions": [
|
| 7 |
'JSON = {"explanation": "The war began in 360 BC. Since BC years count backwards, adding 8 years to 360 BC means subtracting 8 from 360, resulting in 352 BC.", "answer": "352 BC"}',
|
| 8 |
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "unordered_list": ["Berlin","London"]\n}\n```',
|
|
|
|
| 9 |
' "To find the date of the second most important game, we need to subtract 7 days from the date of the most important game. We can do this by counting back 7 days from April 14, 2005. April 14 - 7 days = April 7, 2005", "answer": "2005-04-07"}',
|
| 10 |
],
|
| 11 |
"references": [
|
| 12 |
'{"answer": "352 BC"}',
|
| 13 |
'{"unordered_list": ["London", "Berlin"]}',
|
|
|
|
| 14 |
"{'answer': '2005-04-07'}",
|
| 15 |
],
|
| 16 |
-
"result": {"accuracy":
|
| 17 |
-
"per_item_accuracy": [True, True, False],
|
| 18 |
}
|
| 19 |
|
| 20 |
semantic_test_cases = {
|
|
|
|
| 6 |
"predictions": [
|
| 7 |
'JSON = {"explanation": "The war began in 360 BC. Since BC years count backwards, adding 8 years to 360 BC means subtracting 8 from 360, resulting in 352 BC.", "answer": "352 BC"}',
|
| 8 |
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "unordered_list": ["Berlin","London"]\n}\n```',
|
| 9 |
+
'```json\n{\n "explanation": "The dates provided are March 2012, September 2011, June 2017, September 2019, and June 2015. These correspond to visits to Miami, Sydney, Tokyo, London, and Nairobi respectively. The latest date among these is September 2019, which is associated with London. Therefore, London is the last city visited.",\n "malformed_unordered_list": ["Berlin","London"]\n}\n```',
|
| 10 |
' "To find the date of the second most important game, we need to subtract 7 days from the date of the most important game. We can do this by counting back 7 days from April 14, 2005. April 14 - 7 days = April 7, 2005", "answer": "2005-04-07"}',
|
| 11 |
],
|
| 12 |
"references": [
|
| 13 |
'{"answer": "352 BC"}',
|
| 14 |
'{"unordered_list": ["London", "Berlin"]}',
|
| 15 |
+
'{"unordered_list": ["London", "Berlin"]}',
|
| 16 |
"{'answer': '2005-04-07'}",
|
| 17 |
],
|
| 18 |
+
"result": {"accuracy": 0.5},
|
| 19 |
+
"per_item_accuracy": [True, True, False, False],
|
| 20 |
}
|
| 21 |
|
| 22 |
semantic_test_cases = {
|