#!/usr/bin/python3 # -*- coding: utf-8 -*- import argparse import json import pandas as pd from project_settings import environment, project_path def get_args(): parser = argparse.ArgumentParser() parser.add_argument( "--filename", default="evaluation_results_max7.xlsx", type=str ) parser.add_argument( "--dataset", default=(project_path / "data/dataset/agent-lingoace-zh-400-choice.jsonl").as_posix(), type=str ) args = parser.parse_args() return args def main(): args = get_args() dataset = dict() with open(args.dataset, "r", encoding="utf-8") as f: for row in f: row = json.loads(row) idx = row["idx"] prompt = row["prompt"] response = row["response"] dataset[idx] = row result = list() df = pd.read_excel(args.filename) for i, row in df.iterrows(): # print(row) idx = row["idx"] conversation = row["conversation"] expected = row["expected"] actual_label = row["actual_label"] actual_reason = row["actual_reason"] correct = row["correct"] note = row["note"] if correct is False: print(idx) print(conversation) print(expected, actual_label) print(actual_reason) print(note) print("+" * 150) dataset_ = dataset[idx] prompt = dataset_["prompt"] response = dataset_["response"] print(prompt) print(response) print("-" * 150) result.append({ "idx": idx, "conversation": conversation, "expected": expected, "actual_label": actual_label, "actual_reason": actual_reason, "note": note, "prompt": prompt, "response": response, "op": None, "remark": None, }) result = pd.DataFrame(result) result.to_excel("result.xlsx", index=False) return if __name__ == "__main__": main()