Spaces:

Corin1998
/

HFResumeIntakeSystem_DC

Sleeping

Corin1998 commited on 20 days ago

Commit

63c77bb

verified ·

1 Parent(s): c198598

Update pipelines/skills.py

Files changed (1) hide show

pipelines/skills.py CHANGED Viewed

@@ -2,17 +2,15 @@ import re
 from typing import Dict, List
 SKILL_LEXICON = [
-    "Python","C++","Java","Go","Rust","JavaScript","TypeScript","SQL","R",
-    "PyTorch","TensorFlow","Keras","scikit-learn","Hugging Face","Transformers",
-    "FastAPI","Django","Flask","React","Vue","Next.js","Node.js",
-    "Spark","Hadoop","Airflow","dbt","Kafka",
-    "AWS","GCP","Azure","Docker","Kubernetes",
-    "Tableau","Power BI","Looker",
-    "Git","Linux","Terraform","OpenAPI",
 ]
-NAME_HINTS = ["氏名", "Name"]
 def _regex_ner_contacts(text: str) -> Dict[str, List[str]]:
     emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", text)
     phones = re.findall(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}", text)
@@ -20,12 +18,10 @@ def _regex_ner_contacts(text: str) -> Dict[str, List[str]]:
 def extract_skills(text: str, sections: Dict[str, str]) -> Dict:
     contacts = _regex_ner_contacts(text)
-    name_lines = []
-    for hint in NAME_HINTS:
-        name_lines += [l.strip() for l in text.splitlines() if hint in l][:3]
     found = []
     text_lower = text.lower()
     for s in SKILL_LEXICON:
         if s.lower() in text_lower:
             found.append(s)
-    return {"skills": sorted(list(set(found))), "contacts": contacts, "name_candidates": name_lines}

 from typing import Dict, List
 SKILL_LEXICON = [
+    "Python", "C++", "Java", "Go", "Rust", "JavaScript", "TypeScript", "SQL", "R",
+    "PyTorch", "TensorFlow", "scikit-learn", "Hugging Face", "Transformers",
+    "FastAPI", "Django", "Flask", "React", "Vue", "Next.js", "Node.js",
+    "Spark", "Airflow", "Kafka",
+    "AWS", "GCP", "Azure", "Docker", "Kubernetes",
+    "Tableau", "Power BI", "Looker",
+    "Git", "Linux", "Terraform", "OpenAPI",
 ]
 def _regex_ner_contacts(text: str) -> Dict[str, List[str]]:
     emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", text)
     phones = re.findall(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}", text)
 def extract_skills(text: str, sections: Dict[str, str]) -> Dict:
     contacts = _regex_ner_contacts(text)
     found = []
     text_lower = text.lower()
     for s in SKILL_LEXICON:
         if s.lower() in text_lower:
             found.append(s)
+    return {"skills": sorted(list(set(found))), "contacts": contacts}