Spaces:
Paused
Paused
File size: 5,669 Bytes
d153774 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os
import json
# 检查文件编码
def checkEncoding(filepath):
with open(filepath, "rb") as encode_check:
encoding = encode_check.readline(3)
if encoding == b"\xfe\xff\x00":
return "utf_16_be"
elif encoding == b"\xff\xfe0":
return "utf_16_le"
else:
return "utf_8"
# 读取文本文件
def readTextFile(filepath):
if os.path.exists(filepath):
file_encoding = checkEncoding(filepath)
try:
with open(filepath, "rt", encoding=file_encoding) as f_in:
return f_in.readlines()
except:
with open(filepath, "rt", encoding="latin_1") as f_in:
return f_in.readlines()
return None
# 处理单个LDR文件数据
def process_ldr_data(lines, label_mapping, label_inverse_mapping, label_frequency, label_counter):
# 定位main_section范围
startLine = 0
endLine = 0
lineCount = 0
foundEnd = False
main_section_lines = []
for line in lines:
parameters = line.strip().split()
if len(parameters) > 2:
if parameters[0] == "0" and parameters[1] == "FILE":
if not foundEnd:
endLine = lineCount
if endLine > startLine:
main_section_lines.extend(lines[startLine:endLine])
foundEnd = True
break
startLine = lineCount
foundEnd = False
if parameters[0] == "0" and parameters[1] == "NOFILE":
endLine = lineCount
foundEnd = True
main_section_lines.extend(lines[startLine:endLine])
break
lineCount += 1
if not foundEnd:
endLine = len(lines)
if endLine > startLine:
main_section_lines.extend(lines[startLine:endLine])
# 处理main_section中1开头的行
for line in main_section_lines:
if line.startswith('1'):
parts = line.split()
if len(parts) >= 15:
part_filename = parts[14]
if ".DAT" in part_filename:
part_filename = part_filename.replace(".DAT", ".dat")
if part_filename not in label_mapping:
label_mapping[part_filename] = label_counter
label_inverse_mapping[label_counter] = part_filename
label_counter += 1
current_label = label_mapping[part_filename]
label_frequency[current_label] = label_frequency.get(current_label, 0) + 1
return label_mapping, label_inverse_mapping, label_frequency, label_counter
# 处理文件夹中所有LDR文件
def process_all_ldr_in_folder(folder_path):
overall_label_mapping = {}
overall_label_inverse_mapping = {}
overall_label_frequency = {}
label_counter = 0
for root, dirs, files in os.walk(folder_path):
for file in files:
if file.lower().endswith('.ldr'):
file_path = os.path.join(root, file)
print(f"正在处理: {file_path}")
lines = readTextFile(file_path)
if lines is None:
print(f"⚠️ 无法读取文件 {file_path},已跳过")
continue
overall_label_mapping, overall_label_inverse_mapping, overall_label_frequency, label_counter = process_ldr_data(
lines, overall_label_mapping, overall_label_inverse_mapping, overall_label_frequency, label_counter)
return overall_label_mapping, overall_label_inverse_mapping, overall_label_frequency
# 保存映射表和按频率排序的频率表
def save_results(label_mapping, label_inverse_mapping, label_frequency, output_dir):
os.makedirs(output_dir, exist_ok=True)
# 保存标签映射表
with open(os.path.join(output_dir, 'label_mapping.json'), 'w', encoding='utf-8') as f:
json.dump(label_mapping, f, indent=4, ensure_ascii=False)
# 保存反向标签映射表
with open(os.path.join(output_dir, 'label_inverse_mapping.json'), 'w', encoding='utf-8') as f:
json.dump(label_inverse_mapping, f, indent=4, ensure_ascii=False)
# 准备频率数据并按使用次数排序(从高到低)
frequency_list = []
for label_id, count in label_frequency.items():
frequency_list.append({
"label_id": label_id,
"part_name": label_inverse_mapping.get(label_id, "未知零件"),
"usage_count": count
})
# 按使用次数降序排序
frequency_list.sort(key=lambda x: x["usage_count"], reverse=True)
# 保存排序后的频率表
with open(os.path.join(output_dir, 'label_frequency.json'), 'w', encoding='utf-8') as f:
json.dump(frequency_list, f, indent=4, ensure_ascii=False)
# 主程序
if __name__ == "__main__":
INPUT_FOLDER = '/public/home/wangshuo/gap/assembly/data/car_1k/subset_self/ldr_l30_rotrans_expand_wom'
OUTPUT_FOLDER = '/public/home/wangshuo/gap/assembly/data/car_1k/subset_self'
label_mapping, label_inverse_mapping, label_frequency = process_all_ldr_in_folder(INPUT_FOLDER)
save_results(label_mapping, label_inverse_mapping, label_frequency, OUTPUT_FOLDER)
print(f"\n✅ 处理完成!结果已保存到: {OUTPUT_FOLDER}")
print(f"📊 统计摘要:")
print(f" - 总唯一标签数: {len(label_mapping)}")
print(f" - 总使用次数: {sum(label_frequency.values())}")
print(f" - label_frequency.json已按使用频率从高到低排序")
|