Corin1998's picture
Update pipelines/scoring.py
4a8273c verified
import re
from typing import Dict
def _coverage_flags(normalized: Dict) -> Dict[str, bool]:
return {
"has_work": bool(normalized.get("work_experience")),
"has_edu": bool(normalized.get("education")),
"has_certs": bool(normalized.get("certifications")),
"has_skills": bool(normalized.get("skills")),
}
def _blank_gaps(normalized: Dict) -> int:
periods = [w.get("period", "") for w in normalized.get("work_experience", [])]
return max(0, 0 if not periods else len([p for p in periods if ("~" in p or "-" in p) and "現在" not in p]))
def _lexical_diversity(text: str) -> float:
tokens = re.findall(r"\w+", text.lower())
if not tokens:
return 0.0
uniq = len(set(tokens))
return round(uniq / len(tokens), 4)
def compute_quality_score(text: str, normalized: Dict) -> Dict:
flags = _coverage_flags(normalized)
coverage = sum(1 for v in flags.values() if v) / 4.0
gaps = _blank_gaps(normalized)
lexdiv = _lexical_diversity(text)
total = round(0.5 * coverage + 0.1 * max(0.0, 0.5 - min(0.5, gaps * 0.05)) + 0.4 * lexdiv, 4)
return {"coverage_flags": flags, "gap_count": gaps, "lexical_diversity": lexdiv, "total_score": total}