Spaces:

Corin1998
/

HFResumeIntakeSystem_DC

Sleeping

Create parsing.py

893163e verified 20 days ago

1.47 kB

	import re
	from typing import Dict, List

	def normalize_resume(sections_dict: Dict[str, str]) -> Dict[str, List[Dict]]:
	work_items = []
	raw_work = sections_dict.get("work_experience", "") or sections_dict.get("work_experience_raw", "")
	for line in raw_work.splitlines():
	m = re.search(r"(\d{4}[./年]\d{1,2})\s[-〜~]\s(\d{4}[./年]?\d{0,2}\|現在\|至今)?", line)
	if m:
	work_items.append({"period": m.group(0), "text": line.strip()})

	edu_items = [{"text": l.strip()} for l in (sections_dict.get("education", "") or sections_dict.get("education_raw", "")).splitlines() if l.strip()]
	cert_items = [{"text": l.strip()} for l in (sections_dict.get("certifications", "") or sections_dict.get("certifications_raw", "")).splitlines() if l.strip()]

	skills_raw = sections_dict.get("skills", "")
	if isinstance(skills_raw, list):
	skill_items = skills_raw
	else:
	skill_items = [s.strip() for s in re.split(r"[、,\n]\s*", skills_raw) if s.strip()]

	return {
	"work_experience": work_items,
	"education": edu_items,
	"certifications": cert_items,
	"skills": skill_items,
	"raw_sections": {
	"work_experience": raw_work,
	"education": sections_dict.get("education", "") or sections_dict.get("education_raw", ""),
	"certifications": sections_dict.get("certifications", "") or sections_dict.get("certifications_raw", ""),
	},
	}