Spaces:
Sleeping
Sleeping
| import torch | |
| import torchtext | |
| import re | |
| def clean_text(text): | |
| # Remove extra spaces | |
| text = text.strip() | |
| # Convert multiple spaces to single spaces | |
| text = re.sub('\s+', ' ', text) | |
| # Lowercase the text | |
| text = text.lower() | |
| # Remove punctuation marks | |
| text = re.sub('[^\w\s]', '', text) | |
| return text | |
| def get_preprocess(vocab_path): | |
| tokenizer = torchtext.data.utils.get_tokenizer('basic_english') | |
| vocab = torch.load(vocab_path) | |
| return lambda text: vocab(tokenizer(clean_text(text))) |