| #!/usr/bin/env python | |
| from datasets import load_dataset | |
| _DATA_DIR = 'data' | |
| splits = { | |
| 'train': [f'{_DATA_DIR}/train/python_train_{i}.jsonl' for i in range(14)], | |
| 'validation': [f'{_DATA_DIR}/valid/python_valid_{i}.jsonl' for i in range(0)], | |
| 'test': [f'{_DATA_DIR}/test/python_test_{i}.jsonl' for i in range(0)] | |
| } | |
| dataset = load_dataset('json', data_files=splits) | |
| if __name__ == '__main__': | |
| print(dataset) |