File size: 1,915 Bytes
cd10708 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
#1 привести полученный текст к приемлемому виду
#2 подать текст на вход к модели и получить результат
import spacy
from joblib import load
def predict_sentiment():
model_binary = load("ml_binary.joblib")
def _inner(text: str):
pred = model_binary.predict([preprocess_text(text)])[0]
res = {
"labels": "positive" if pred == 1 else "negative",
"probs": pred
}
return res
return _inner
def predict_category():
model_category = load("ml_category.joblib")
def _inner(text: str):
pred = model_category.predict([preprocess_text(text)])[0]
labels = [
"политика",
"экономика",
"спорт",
"культура"
]
probs = [0, 0, 0, 0]
probs[pred] = 1
res = {
"labels": labels,
"probs": probs
}
return res
return _inner
def predict_categorys():
model_categorys = load("ml_categorys.joblib")
def _inner(text: str):
pred = model_categorys.predict([preprocess_text(text)])[0]
labels = [
"политика",
"экономика",
"спорт",
"культура"
]
res = {
"labels": labels,
"probs": pred
}
return res
return _inner
def preprocess_text(text: str) -> str:
if text is None:
return ""
nlp = spacy.load("ru_core_news_md", disable=["ner"])
text = " ".join(text.split()).lower()
doc = nlp(text)
tokens = []
for t in doc:
if t.is_stop or t.is_punct or t.is_space:
continue
lemma = t.lemma_.strip()
if len(lemma) <= 1:
continue
tokens.append(lemma)
return " ".join(tokens) |