Spaces:
Sleeping
Sleeping
| from typing import List, Dict | |
| import re | |
| def _period_key(item_text: str) -> str: | |
| m = re.search(r"(\d{4}[./年]\d{1,2})\s*[-〜~]\s*(\d{4}[./年]?\d{0,2}|現在|至今)?", item_text) | |
| return m.group(0) if m else item_text[:50] | |
| def merge_normalized_records(records: List[Dict]) -> Dict: | |
| merged = {"work_experience": [], "education": [], "certifications": [], "skills": [], "raw_sections": {}} | |
| seen_we, seen_edu, seen_cert = set(), set(), set() | |
| skill_set = set() | |
| for r in records: | |
| for w in r.get("work_experience", []): | |
| key = _period_key(w.get("text", "")) + "|" + w.get("text", "")[:80] | |
| if key not in seen_we: | |
| seen_we.add(key) | |
| merged["work_experience"].append(w) | |
| for e in r.get("education", []): | |
| k = e.get("text", "") | |
| if k and k not in seen_edu: | |
| seen_edu.add(k) | |
| merged["education"].append(e) | |
| for c in r.get("certifications", []): | |
| k = c.get("text", "") | |
| if k and k not in seen_cert: | |
| seen_cert.add(k) | |
| merged["certifications"].append(c) | |
| for s in r.get("skills", []): | |
| if s: | |
| skill_set.add(s) | |
| for k, v in r.get("raw_sections", {}).items(): | |
| merged["raw_sections"][k] = (merged["raw_sections"].get(k, "") + "\n" + v).strip() | |
| def _sort_key(w): | |
| m = re.search(r"(\d{4})([./年])(\d{1,2})", w.get("period", "") or w.get("text", "")) | |
| if m: | |
| try: | |
| return (-(int(m.group(1)) * 100 + int(m.group(3))), 0) | |
| except Exception: | |
| return (0, 1) | |
| return (0, 1) | |
| merged["work_experience"].sort(key=_sort_key) | |
| merged["skills"] = sorted(skill_set) | |
| return merged | |