| import re | |
| import string | |
| def remove_articles(text): | |
| regex = re.compile(r"\b(a|an|the)\b", re.UNICODE) | |
| return re.sub(regex, " ", text) | |
| def white_space_fix(text): | |
| return " ".join(text.split()) | |
| def remove_punc(text): | |
| exclude = set(string.punctuation) | |
| return "".join(ch for ch in text if ch not in exclude) | |
| def lower(text): | |
| return text.lower() | |