| import random | |
| import shutil | |
| random.seed(42) | |
| with open('train_data_link.jsonl', encoding='utf-8') as f: | |
| data = f.readlines() | |
| all_data_len = len(data) | |
| print(all_data_len) | |
| random.shuffle(data) | |
| valid_data = data[:int(all_data_len * 0.1)] | |
| train_data = data[int(all_data_len * 0.1):] | |
| print(len(train_data), len(valid_data)) | |
| def save_data(file_name, data): | |
| with open(file_name, 'w', encoding='utf-8') as f: | |
| f.writelines(data) | |
| save_data('train_data.jsonl', train_data) | |
| save_data('valid_data.jsonl', valid_data) | |
| shutil.copyfile('test_data_link.jsonl', 'test_data.jsonl') | |