shashaNYU's picture
Upload 5 files
dbaef7a verified
import json
file_path = '/root/dev-zen-v1.0.json'
SEP_TOKEN = "<sep>"
data_loader = []
with open(file_path, 'r') as f:
data = json.load(f)
for content in data['data']:
title = content['title']
paragraphs = content['paragraphs']
for paragraph in paragraphs:
context = paragraph['context']
qas = paragraph['qas']
for qa_pair in qas:
question = qa_pair.get('question', None)
answers = qa_pair.get('answers', None)
for answer in answers:
answer_text = answer.get('text', None)
if answer_text and question != None:
data_loader.append({'title': title, 'context': context, 'question': question, 'answer': answer_text})
prompt_templatae = """根据下面input的上下文,生成和上下文有关的问答对,并输出到output中。"""
prompt_chunk = []
for i in data_loader:
# prompt_chunk.append(prompt_templatae.format(i['context'],f"question:{i['question']} {SEP_TOKEN} answer:{i['answer']}"))
prompt_chunk.append({"instruction": prompt_templatae, "input": i['context'], "output": ''})
with open('prompt_chunk_predict.json', 'w') as f:
json.dump(prompt_chunk, f, ensure_ascii=False, indent=4)