Spaces:
Build error
Build error
| import pandas as pd | |
| import torch | |
| import re | |
| from llm import TransformerRegrModel | |
| class VacancyAnalyzer: | |
| def __init__(self, transformer_path: str, inputs: dict): | |
| self.transformer_path = transformer_path | |
| self.inputs = pd.DataFrame(inputs, index=[0]).drop(columns=['conversion', 'conversion_class', 'id'], axis=1) | |
| self.cat_features = ['profession', 'grade', 'location'] | |
| self.text_features = ['emp_brand', 'mandatory', 'additional', 'comp_stages', 'work_conditions'] | |
| self.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') | |
| def __cleaner__(self, txt: str) -> str: | |
| txt = re.sub(r'\_(.*?)\_', r'', txt) | |
| txt = re.sub(r'([\n\t]*)', r'', txt) | |
| return txt | |
| def classify(self) -> tuple: | |
| df = self.inputs[self.text_features] | |
| description = df[self.text_features[0]].values[0] + ' ' | |
| for t in self.text_features[1:]: | |
| description += df[t].values[0] | |
| description += ' ' | |
| description = self.__cleaner__(description) | |
| if len(description) < 100: | |
| return 'Too short text', 'unknown' | |
| tbert = TransformerRegrModel('rubert', 3) | |
| tbert.load_state_dict(torch.load(self.transformer_path, map_location=torch.device(self.device))) | |
| tbert.to(self.device) | |
| tbert.eval() | |
| with torch.no_grad(): | |
| outputs, _, _ = tbert(description) | |
| prediction = torch.argmax(outputs, 1).cpu().numpy() | |
| return 'Text analyzing finished', prediction | |