Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,457 Bytes
46861c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import argparse
import json
import os
"""
question:
{
"question_id": 1,
"image": "000000104486.jpg",
"category": "natural_box",
"text": "Please analyze the relationship between all marked regions in the image.",
"annotation": {
"bbox": [[157.23, 341.07, 10.67, 2.08]],
"segmentation": []
}
}
answer:
{
"question_id": 1,
"image": "000000104486.jpg",
"category": "natural_box",
"text": "<Region 1>: This region includes an individual who is caught in a moment that seems to involve some sort of task or activity. The person is engaged with a luggage cart, which suggests they might be arriving or departing from a location that offers such amenities, possibly a hotel. The cart holds luggage indicating travel or transit. The man's expression and attire provide clues to his role or state at the moment, such as potentially being a guest handling his luggage. The other individual seen partially in the background creates a sense of movement or interaction, but their relationship to the man or the context is unclear.\n"
}
predictions:
{
"question_id": 1,
"image": "000000104486.jpg",
"category": "natural_box",
"text": "<Region 1>: The marked region does not appear to have any direct relationship with other marked regions, as there are no other marks to compare or contrast with.\n"
}
"""
def main(args):
output_name = args.output_path.split("/")[-1] # android_QA_box.json
for phase in [
"android_detailed_caption_box",
"multipanel_detailed_caption_box",
"natural_detailed_caption_box",
"ocr_doc_detailed_caption_box",
"ocr_spotting_detailed_caption_box",
"web_detailed_caption_box",
]:
vp = "bbox"
domain = phase.split("_box")[0] # android_QA
if not os.path.exists(f"mdvp_for_gpt4v_eval/{phase}"):
os.mkdir(f"mdvp_for_gpt4v_eval/{phase}")
with open(args.output_path, "r") as f:
data = json.load(f)
with open("annotations/mdvp_caption_mask.json", "r") as f:
mask_data = json.load(f)
format_answer_list = []
format_prediction_list = []
for index, item in enumerate(data):
meta = mask_data[index]
assert meta["caption"] == item["gt"]
try:
image_path = item["image_path"]
except:
image_path = item["file_name"]
format_answer = {
"question_id": index + 1,
"image": image_path,
"category": meta["dataset_name"],
"text": item["gt"],
}
format_answer_list.append(format_answer)
format_prediction = {
"question_id": index + 1,
"image": image_path,
"category": meta["dataset_name"],
"text": item["caption"],
}
format_prediction_list.append(format_prediction)
with open(f"mdvp_for_gpt4v_eval/{phase}/answer.json", "w") as f:
json.dump(format_answer_list, f, indent=4, ensure_ascii=False)
print(f"mdvp_for_gpt4v_eval/{phase}/answer.json saved successfully!")
with open(f"mdvp_for_gpt4v_eval/{phase}/prediction.json", "w") as f:
json.dump(format_prediction_list, f, indent=4, ensure_ascii=False)
print(f"mdvp_for_gpt4v_eval/{phase}/prediction.json saved successfully!")
with open(f"data/{domain}/{domain}_box.json", "r") as f:
data = json.load(f)
format_question_list = []
for index, item in enumerate(data):
format_question = {
"question_id": index + 1,
"image": item["image_name"],
"category": phase,
"text": item["question"],
"annotation": {f"{vp}": item[f"{vp}"], "segmentation": []},
}
format_question_list.append(format_question)
with open(f"mdvp_for_gpt4v_eval/{phase}/question.json", "w") as f:
json.dump(format_question_list, f, indent=4, ensure_ascii=False)
print(f"mdvp_for_gpt4v_eval/{phase}/question.json saved successfully!")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Process args.")
parser.add_argument(
"--output_path", type=str, required=True, help="Path to output results"
)
args = parser.parse_args()
main(args)
|