| | import json |
| | import argparse |
| |
|
| | def main(): |
| | parser = argparse.ArgumentParser(description='Convert and merge JSONL files with question-answer mappings') |
| | parser.add_argument('orig_path', help='Path to the original JSONL file') |
| | parser.add_argument('out_path', help='Path to the output JSONL file') |
| | parser.add_argument('mapping_paths', nargs='+', help='Path(s) to mapping JSONL file(s)') |
| | |
| | args = parser.parse_args() |
| | |
| | |
| | orig_path = args.orig_path |
| | out_path = args.out_path |
| | mapping_paths = args.mapping_paths |
| |
|
| | |
| | mapping = {} |
| | for mp in mapping_paths: |
| | with open(mp, 'r', encoding='utf-8') as f_map: |
| | for idx, line in enumerate(f_map): |
| | obj = json.loads(line) |
| | q = obj.get("question") |
| | if q is None: |
| | continue |
| | |
| | ctx = obj.get("context", "") |
| | |
| | raw_ans = obj.get("answers", obj.get("answer", [])) |
| | |
| | if isinstance(raw_ans, list): |
| | ans = raw_ans |
| | else: |
| | ans = [raw_ans] |
| | |
| | mapping[q] = {"context": ctx, "answers": ans} |
| |
|
| | |
| | with open(orig_path, 'r', encoding='utf-8') as f_in, \ |
| | open(out_path, 'w', encoding='utf-8') as f_out: |
| | for line in f_in: |
| | item = json.loads(line) |
| | inp = item.get("input") |
| | if inp in mapping: |
| | item["context"] = mapping[inp]["context"] |
| | item["answers"] = mapping[inp]["answers"] |
| | f_out.write(json.dumps(item, ensure_ascii=False) + "\n") |
| |
|
| | print(f"Merge completed, output file: {out_path}") |
| |
|
| | if __name__ == "__main__": |
| | main() |