| import csv | |
| import json | |
| csv.field_size_limit(10 * 1024 * 1024 * 1024) # 10MB | |
| score_dict = {} | |
| def load_score(): | |
| with open('res2.csv', 'r') as f: | |
| reader = csv.reader(f) | |
| amount = 0 | |
| for row in reader: | |
| row_json = None | |
| try: | |
| start_index, end_index = row[1].find('['), row[1].find(']') | |
| row_json = json.loads(row[1][start_index: end_index+1]) | |
| except: | |
| pass | |
| if row_json is not None and isinstance(row_json, list): | |
| for i in row_json: | |
| try: | |
| if isinstance(i, dict) and 'relevance_score' in i and 'function_start_line' in i: | |
| if i['relevance_score'] is not None and int(i['relevance_score']) > 1 and i['function_start_line'] is not None and int(i['function_start_line']) > 1: | |
| # yield row_json, row[0] | |
| amount += 1 | |
| score_dict[row[0]] = row_json | |
| except: | |
| pass | |
| print(amount) | |
| def load_code_file(): | |
| with open('/home/weifengsun/tangou1/domain_code/src/datasets/data_merged/dataset_all.csv', 'r', encoding='utf-8') as f: | |
| reader = csv.reader(f) | |
| for row in reader: | |
| if row[0] in score_dict: | |
| score_dict[row[0]] = {'code_file': row, 'score_json': score_dict[row[0]]} | |
| print(score_dict[row[0]]) | |
| break | |
| if __name__ == '__main__': | |
| load_score() | |
| load_code_file() | |
| # print(len(score_dict)) | |