Spaces:
Running
Running
huangshiyu
commited on
Commit
·
6169a19
1
Parent(s):
3c598b1
update
Browse files- app.py +22 -6
- compute_accuracy.py +47 -0
- constants.py +13 -0
- eval_final_results.py +11 -0
app.py
CHANGED
|
@@ -3,9 +3,12 @@ __all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissi
|
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
| 5 |
import json
|
|
|
|
| 6 |
|
| 7 |
from constants import *
|
| 8 |
from huggingface_hub import Repository
|
|
|
|
|
|
|
| 9 |
|
| 10 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 11 |
|
|
@@ -37,11 +40,19 @@ def add_new_eval(
|
|
| 37 |
if input_file is None:
|
| 38 |
return "Error! Empty file!"
|
| 39 |
|
| 40 |
-
upload_data = json.loads(input_file)
|
|
|
|
| 41 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
|
| 42 |
repo_type="dataset",git_user="auto-uploader",git_email="uploader@163.com")
|
| 43 |
submission_repo.git_pull()
|
| 44 |
csv_data = pd.read_csv(CSV_DIR)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
if LLM_type == 'Other':
|
| 47 |
LLM_name = LLM_name_textbox
|
|
@@ -72,11 +83,16 @@ def add_new_eval(
|
|
| 72 |
model_date,
|
| 73 |
model_link
|
| 74 |
]
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
# print(new_data)
|
| 81 |
# print(csv_data.loc[col-1])
|
| 82 |
csv_data.loc[col] = new_data
|
|
|
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
| 5 |
import json
|
| 6 |
+
import traceback
|
| 7 |
|
| 8 |
from constants import *
|
| 9 |
from huggingface_hub import Repository
|
| 10 |
+
from eval_final_results import eval_final
|
| 11 |
+
|
| 12 |
|
| 13 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 14 |
|
|
|
|
| 40 |
if input_file is None:
|
| 41 |
return "Error! Empty file!"
|
| 42 |
|
| 43 |
+
# upload_data = json.loads(input_file)
|
| 44 |
+
|
| 45 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN,
|
| 46 |
repo_type="dataset",git_user="auto-uploader",git_email="uploader@163.com")
|
| 47 |
submission_repo.git_pull()
|
| 48 |
csv_data = pd.read_csv(CSV_DIR)
|
| 49 |
+
try:
|
| 50 |
+
upload_data = eval_final(test_answer_file,dev_answer_file, input_file)
|
| 51 |
+
except:
|
| 52 |
+
error_message = traceback.format_exc()
|
| 53 |
+
print("Error:", error_message)
|
| 54 |
+
return
|
| 55 |
+
|
| 56 |
|
| 57 |
if LLM_type == 'Other':
|
| 58 |
LLM_name = LLM_name_textbox
|
|
|
|
| 83 |
model_date,
|
| 84 |
model_link
|
| 85 |
]
|
| 86 |
+
try:
|
| 87 |
+
for key in TASK_INFO:
|
| 88 |
+
if key in upload_data:
|
| 89 |
+
new_data.append(round(100*upload_data[key_map[key]],1))
|
| 90 |
+
else:
|
| 91 |
+
new_data.append(0)
|
| 92 |
+
except:
|
| 93 |
+
error_message = traceback.format_exc()
|
| 94 |
+
print("Error:", error_message)
|
| 95 |
+
return
|
| 96 |
# print(new_data)
|
| 97 |
# print(csv_data.loc[col-1])
|
| 98 |
csv_data.loc[col] = new_data
|
compute_accuracy.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import jsonlines
|
| 3 |
+
from collections import defaultdict
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def compute_accuracy(answer_file: str, video_meta_file: str):
|
| 7 |
+
total_qa_num = 0
|
| 8 |
+
total_answered_num = 0
|
| 9 |
+
right_num = 0
|
| 10 |
+
|
| 11 |
+
category_right = defaultdict(float)
|
| 12 |
+
category_total = defaultdict(float)
|
| 13 |
+
category_acc = defaultdict(float)
|
| 14 |
+
|
| 15 |
+
with open(answer_file) as f:
|
| 16 |
+
model_answers = json.load(f)
|
| 17 |
+
|
| 18 |
+
with jsonlines.open(video_meta_file) as reader:
|
| 19 |
+
video_meta = list(reader)
|
| 20 |
+
for meta_data in video_meta:
|
| 21 |
+
for qa in meta_data['qa']:
|
| 22 |
+
uid = str(qa["uid"])
|
| 23 |
+
if uid in model_answers:
|
| 24 |
+
total_answered_num += 1
|
| 25 |
+
model_answer = model_answers[uid]
|
| 26 |
+
|
| 27 |
+
meta_data['question_type'] = [meta_data['question_type']]
|
| 28 |
+
if qa["answer"] == "NA":
|
| 29 |
+
continue
|
| 30 |
+
for category in meta_data['question_type']:
|
| 31 |
+
category_total[category] += 1
|
| 32 |
+
if model_answer == qa["answer"]:
|
| 33 |
+
category_right[category] += 1
|
| 34 |
+
|
| 35 |
+
if model_answer == qa["answer"]:
|
| 36 |
+
right_num += 1
|
| 37 |
+
total_qa_num += 1
|
| 38 |
+
|
| 39 |
+
for key in category_total:
|
| 40 |
+
category_acc[key] = category_right[key] / category_total[key]
|
| 41 |
+
|
| 42 |
+
acc = float(right_num) / total_qa_num
|
| 43 |
+
answered_acc = float(right_num) / total_answered_num
|
| 44 |
+
category_acc.update({"acc": acc, "answered_acc": answered_acc, "total_qa_num": total_qa_num,
|
| 45 |
+
"total_answered_num": total_answered_num, "right_num": right_num})
|
| 46 |
+
return category_acc
|
| 47 |
+
|
constants.py
CHANGED
|
@@ -5,12 +5,25 @@ MODEL_INFO = ["Model", "Language Model", "Date"]
|
|
| 5 |
TASK_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
|
| 6 |
AVG_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number',
|
| 9 |
'number', 'number']
|
| 10 |
|
| 11 |
SUBMISSION_NAME = "MotionBench_submission"
|
| 12 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/THUDM/", SUBMISSION_NAME)
|
| 13 |
CSV_DIR = "./MotionBench_submission/result.csv"
|
|
|
|
|
|
|
| 14 |
|
| 15 |
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
| 16 |
|
|
|
|
| 5 |
TASK_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
|
| 6 |
AVG_INFO = ["Dev Avg", "Test Avg", "MR", "LM", "CM", "MO", "AO", "RC"]
|
| 7 |
|
| 8 |
+
key_map = {
|
| 9 |
+
"Dev Avg": "dev avg",
|
| 10 |
+
"Test Avg": "test avg",
|
| 11 |
+
"MR": "Motion Recognition",
|
| 12 |
+
"LM": "Location-related Motion",
|
| 13 |
+
"CM": "Camera Motion",
|
| 14 |
+
"MO": "Motion-related Objects",
|
| 15 |
+
"AO": "Action Order",
|
| 16 |
+
"RC": "Repetition Count"
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number',
|
| 20 |
'number', 'number']
|
| 21 |
|
| 22 |
SUBMISSION_NAME = "MotionBench_submission"
|
| 23 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/THUDM/", SUBMISSION_NAME)
|
| 24 |
CSV_DIR = "./MotionBench_submission/result.csv"
|
| 25 |
+
test_answer_file = "./MotionBench_submission/test_ans_video_info.meta.jsonl"
|
| 26 |
+
dev_answer_file = "./MotionBench_submission/dev_ans_video_info.meta.jsonl"
|
| 27 |
|
| 28 |
COLUMN_NAMES = MODEL_INFO + TASK_INFO
|
| 29 |
|
eval_final_results.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from compute_accuracy import compute_accuracy
|
| 2 |
+
|
| 3 |
+
def eval_final(test_metafile,dev_metafile,to_eval):
|
| 4 |
+
print("Computing accuracy...")
|
| 5 |
+
result_test = compute_accuracy(to_eval, test_metafile)
|
| 6 |
+
result_dev = compute_accuracy(to_eval, dev_metafile)
|
| 7 |
+
|
| 8 |
+
output = {"dev avg": result_dev['answered_acc'],
|
| 9 |
+
"test avg": result_test['answered_acc'],
|
| 10 |
+
**result_test}
|
| 11 |
+
|