File size: 1,636 Bytes
f2670ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import csv
import json

csv.field_size_limit(10 * 1024 * 1024 * 1024)  # 10MB

score_dict = {}

def load_score():
    with open('res2.csv', 'r') as f:
        reader = csv.reader(f)
        amount = 0
        for row in reader:
            row_json = None
            try:
                start_index, end_index = row[1].find('['), row[1].find(']')
                row_json = json.loads(row[1][start_index: end_index+1])
            except:
                pass
            if row_json is not None and isinstance(row_json, list):
                for i in row_json:
                    try:
                        if isinstance(i, dict) and 'relevance_score' in i and 'function_start_line' in i:
                            if i['relevance_score'] is not None and int(i['relevance_score']) > 1 and i['function_start_line'] is not None and int(i['function_start_line']) > 1:
                                # yield row_json, row[0]
                                amount += 1
                                score_dict[row[0]] = row_json
                    except:
                        pass
        print(amount)


def load_code_file():
    with open('/home/weifengsun/tangou1/domain_code/src/datasets/data_merged/dataset_all.csv', 'r', encoding='utf-8') as f:
        reader = csv.reader(f)
        for row in reader:
            if row[0] in score_dict:
                score_dict[row[0]] = {'code_file': row, 'score_json': score_dict[row[0]]}
                print(score_dict[row[0]])
                break
    
            
        


if __name__ == '__main__':
    load_score()
    load_code_file()
    # print(len(score_dict))