|
|
|
|
|
import csv |
|
|
import json |
|
|
|
|
|
|
|
|
print("Reading row 57 from CSV...") |
|
|
with open('function_dataset_v2.csv', 'r') as f: |
|
|
reader = csv.DictReader(f) |
|
|
for i, row in enumerate(reader): |
|
|
if i == 56: |
|
|
print(f"Row {i+1}:") |
|
|
print(f" original_index: {row['original_index']}") |
|
|
print(f" repo_name: '{row['repo_name']}'") |
|
|
print(f" path: '{row['path']}'") |
|
|
print(f" language: '{row['language']}'") |
|
|
print(f" function_name: '{row['function_name']}'") |
|
|
break |
|
|
|
|
|
|
|
|
print("\n\nChecking first JSONL entry...") |
|
|
with open('programming_problems.jsonl', 'r') as f: |
|
|
data = json.loads(f.readline()) |
|
|
print(f"original_index: {data['metadata']['original_index']}") |
|
|
print(f"function_name: {data['metadata']['function_name']}") |
|
|
print(f"Current repo_name: '{data['metadata']['repo_name']}'") |
|
|
print(f"Current path: '{data['metadata']['path']}'") |
|
|
print(f"Current language: '{data['metadata']['language']}'") |
|
|
|
|
|
|
|
|
print("\n\nCounting CSV rows with complete metadata...") |
|
|
with open('function_dataset_v2.csv', 'r') as f: |
|
|
reader = csv.DictReader(f) |
|
|
total = 0 |
|
|
complete = 0 |
|
|
for row in reader: |
|
|
total += 1 |
|
|
if row['repo_name'] and row['path'] and row['language']: |
|
|
complete += 1 |
|
|
print(f"Total CSV rows: {total}") |
|
|
print(f"Rows with complete metadata: {complete}") |
|
|
print(f"Rows with missing metadata: {total - complete}") |
|
|
|