Spaces:
Running
on
Zero
Running
on
Zero
| """ | |
| Reference: https://github.com/haotian-liu/LLaVA/blob/main/llava/eval/eval_gpt_review.py | |
| """ | |
| import argparse | |
| import json | |
| import os | |
| import time | |
| import cv2 | |
| import numpy as np | |
| import openai | |
| import requests | |
| from paint_util import encode_image, paint_text_box, paint_text_point | |
| from tqdm import tqdm | |
| # Define Azure OpenAI details | |
| model_name = "gpt-4o-2024-11-20" | |
| max_tokens = 1000 # range: [1, 4095] | |
| # Initialize the Azure client | |
| client = openai.AzureOpenAI( | |
| azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), | |
| api_key=os.getenv("AZURE_OPENAI_KEY"), | |
| api_version="2024-03-01-preview", | |
| ) | |
| def get_eval(content: str, max_tokens: int): | |
| while True: | |
| try: | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are a helpful and precise assistant for checking the quality of the answer.", | |
| }, | |
| { | |
| "role": "user", | |
| "content": content, | |
| }, | |
| ] | |
| completion = client.chat.completions.create( | |
| model=model_name, | |
| messages=messages, | |
| max_tokens=max_tokens, | |
| temperature=0, | |
| ) | |
| ret = completion.choices[0].message.content | |
| break | |
| except Exception as e: | |
| print(e) | |
| time.sleep(1) | |
| return ret | |
| def parse_score(review): | |
| try: | |
| score_pair = review.split("\n")[0] | |
| score_pair = score_pair.replace(",", " ") | |
| sp = score_pair.split(" ") | |
| print("score_pair:", score_pair, sp) | |
| return [float(sp[0]), float(sp[1])] | |
| except Exception as e: | |
| print(e) | |
| print("error", review) | |
| return [-1, -1] | |
| def main(args): | |
| phase = args.phase # android_QA_box | |
| domain = phase.split("_box")[0] # android_QA | |
| if "natural" in phase: | |
| context_str = "The image is a natural image." | |
| elif "ocr" in phase: | |
| context_str = "The image contains text, and the user wishes to know the content of the text." | |
| elif "screen" in phase: | |
| context_str = "The image is a screenshot from a mobile phone or webpage." | |
| elif "panel" in phase: | |
| context_str = "The image is a multi-panel figure." | |
| elif "android" in phase: | |
| context_str = "The image is an andriod screenshot." | |
| elif "web" in phase: | |
| context_str = "The image is a webpage screenshot." | |
| question_path = f"mdvp_for_gpt4v_eval/{phase}/question.json" | |
| args.question = question_path | |
| # parser.add_argument('--question', default=question_path, help='path to question file') | |
| answer_list_path = [ | |
| f"mdvp_for_gpt4v_eval/{phase}/answer.json", | |
| f"mdvp_for_gpt4v_eval/{phase}/prediction.json", | |
| ] | |
| args.answer_list = answer_list_path | |
| # parser.add_argument('--answer-list', nargs='+', default=answer_list_path, help='gpt answer and model answer json files') | |
| rule_path = f"annotations/rule.json" | |
| args.rule = rule_path | |
| # parser.add_argument('--rule', default=rule_path ,help='gpt rule') | |
| f_q = json.load(open(os.path.expanduser(args.question))) | |
| f_ans1 = json.load(open(os.path.expanduser(args.answer_list[0]))) | |
| f_ans2 = json.load(open(os.path.expanduser(args.answer_list[1]))) | |
| rule_dict = json.load(open(os.path.expanduser(args.rule), "r")) | |
| os.makedirs("./result", exist_ok=True) | |
| if os.path.isfile(os.path.expanduser(args.output)): | |
| cur_reviews = [ | |
| json.loads(line) for line in open(os.path.expanduser(args.output)) | |
| ] | |
| else: | |
| cur_reviews = [] | |
| review_file = open(f"{args.output}", "a") | |
| idx = 0 | |
| for ques, ans1, ans2 in tqdm(zip(f_q, f_ans1, f_ans2)): | |
| # paint som mark on image | |
| image_name = ques["image"] | |
| image_path = f"data/{domain}/images/" + image_name | |
| # print("loading image from {}".format(image_path)) | |
| image = cv2.imread(image_path) | |
| height, width, channels = image.shape | |
| (width, height) | |
| if "bbox" in ques["annotation"]: | |
| bbox = ques["annotation"]["bbox"] | |
| paint_image_path = paint_text_box(image_path, bbox) | |
| rule = rule_dict["box"] | |
| elif "points" in ques["annotation"]: | |
| points = ques["annotation"]["points"] | |
| paint_image_path = paint_text_point(image_path, points) | |
| rule = rule_dict["point"] | |
| base64_image = encode_image(paint_image_path) | |
| prompt = rule["prompt"] | |
| role = rule["role"] | |
| content_text = ( | |
| f"[Context]\{context_str}\n\n" | |
| f'[Question]\n{ques["text"]}\n\n' | |
| f'[{role} 1]\n{ans1["text"]}\n\n[End of {role} 1]\n\n' | |
| f'[{role} 2]\n{ans2["text"]}\n\n[End of {role} 2]\n\n' | |
| f"[System]\n{prompt}\n\n" | |
| ) | |
| content = [ | |
| { | |
| "type": "text", | |
| "text": content_text, | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/jpeg;base64,{base64_image}", | |
| "detail": "high", | |
| }, | |
| }, | |
| ] | |
| cur_js = { | |
| "id": idx + 1, | |
| "question_id": ques["question_id"], | |
| "answer1_id": ans1.get("answer_id", ans1["question_id"]), | |
| "answer2_id": ans2.get("answer_id", ans2["question_id"]), | |
| "category": phase, | |
| } | |
| # pdb.set_trace() | |
| if idx >= len(cur_reviews): | |
| review = get_eval(content, args.max_tokens) | |
| # print(review) | |
| scores = parse_score(review) | |
| cur_js["content"] = review | |
| cur_js["tuple"] = scores | |
| cur_js["answer1"] = ans1["text"] | |
| cur_js["answer2"] = ans2["text"] | |
| review_file.write(json.dumps(cur_js) + "\n") | |
| review_file.flush() | |
| else: | |
| print(f"Skipping {idx} as we already have it.") | |
| idx += 1 | |
| print(idx) | |
| review_file.close() | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="ChatGPT-based QA evaluation.") | |
| parser.add_argument( | |
| "--phase", help="MDVP domain", type=str, required=True | |
| ) # android_QA_box | |
| parser.add_argument( | |
| "--max-tokens", | |
| type=int, | |
| default=1024, | |
| help="maximum number of tokens produced in the output", | |
| ) | |
| parser.add_argument( | |
| "--output", default=f"result/gpt_score.jsonl", help="output json dir" | |
| ) | |
| args = parser.parse_args() | |
| main(args) | |