File size: 1,474 Bytes
893163e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import re
from typing import Dict, List

def normalize_resume(sections_dict: Dict[str, str]) -> Dict[str, List[Dict]]:
    work_items = []
    raw_work = sections_dict.get("work_experience", "") or sections_dict.get("work_experience_raw", "")
    for line in raw_work.splitlines():
        m = re.search(r"(\d{4}[./年]\d{1,2})\s*[-〜~]\s*(\d{4}[./年]?\d{0,2}|現在|至今)?", line)
        if m:
            work_items.append({"period": m.group(0), "text": line.strip()})

    edu_items = [{"text": l.strip()} for l in (sections_dict.get("education", "") or sections_dict.get("education_raw", "")).splitlines() if l.strip()]
    cert_items = [{"text": l.strip()} for l in (sections_dict.get("certifications", "") or sections_dict.get("certifications_raw", "")).splitlines() if l.strip()]

    skills_raw = sections_dict.get("skills", "")
    if isinstance(skills_raw, list):
        skill_items = skills_raw
    else:
        skill_items = [s.strip() for s in re.split(r"[、,\n]\s*", skills_raw) if s.strip()]

    return {
        "work_experience": work_items,
        "education": edu_items,
        "certifications": cert_items,
        "skills": skill_items,
        "raw_sections": {
            "work_experience": raw_work,
            "education": sections_dict.get("education", "") or sections_dict.get("education_raw", ""),
            "certifications": sections_dict.get("certifications", "") or sections_dict.get("certifications_raw", ""),
        },
    }