Spaces:
Sleeping
Sleeping
| import re | |
| from typing import Dict, List | |
| SKILL_LEXICON = [ | |
| "Python", "C++", "Java", "Go", "Rust", "JavaScript", "TypeScript", "SQL", "R", | |
| "PyTorch", "TensorFlow", "scikit-learn", "Hugging Face", "Transformers", | |
| "FastAPI", "Django", "Flask", "React", "Vue", "Next.js", "Node.js", | |
| "Spark", "Airflow", "Kafka", | |
| "AWS", "GCP", "Azure", "Docker", "Kubernetes", | |
| "Tableau", "Power BI", "Looker", | |
| "Git", "Linux", "Terraform", "OpenAPI", | |
| ] | |
| def _regex_ner_contacts(text: str) -> Dict[str, List[str]]: | |
| emails = re.findall(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}", text) | |
| phones = re.findall(r"(?:\+?\d{1,3}[ -]?)?(?:\(\d{2,4}\)[ -]?)?\d{2,4}[ -]?\d{2,4}[ -]?\d{3,4}", text) | |
| return {"EMAIL": list(set(emails)), "PHONE": list(set(phones))} | |
| def extract_skills(text: str, sections: Dict[str, str]) -> Dict: | |
| contacts = _regex_ner_contacts(text) | |
| found = [] | |
| text_lower = text.lower() | |
| for s in SKILL_LEXICON: | |
| if s.lower() in text_lower: | |
| found.append(s) | |
| return {"skills": sorted(list(set(found))), "contacts": contacts} | |