from typing import List, Dict import re def _period_key(item_text: str) -> str: m = re.search(r"(\d{4}[./年]\d{1,2})\s*[-〜~]\s*(\d{4}[./年]?\d{0,2}|現在|至今)?", item_text) return m.group(0) if m else item_text[:50] def merge_normalized_records(records: List[Dict]) -> Dict: merged = {"work_experience": [], "education": [], "certifications": [], "skills": [], "raw_sections": {}} seen_we, seen_edu, seen_cert = set(), set(), set() skill_set = set() for r in records: for w in r.get("work_experience", []): key = _period_key(w.get("text", "")) + "|" + w.get("text", "")[:80] if key not in seen_we: seen_we.add(key) merged["work_experience"].append(w) for e in r.get("education", []): k = e.get("text", "") if k and k not in seen_edu: seen_edu.add(k) merged["education"].append(e) for c in r.get("certifications", []): k = c.get("text", "") if k and k not in seen_cert: seen_cert.add(k) merged["certifications"].append(c) for s in r.get("skills", []): if s: skill_set.add(s) for k, v in r.get("raw_sections", {}).items(): merged["raw_sections"][k] = (merged["raw_sections"].get(k, "") + "\n" + v).strip() def _sort_key(w): m = re.search(r"(\d{4})([./年])(\d{1,2})", w.get("period", "") or w.get("text", "")) if m: try: return (-(int(m.group(1)) * 100 + int(m.group(3))), 0) except Exception: return (0, 1) return (0, 1) merged["work_experience"].sort(key=_sort_key) merged["skills"] = sorted(skill_set) return merged