Spaces:

ivanlau
/

IntelliLabel

Runtime error

App Files Files Community

ivanlau commited on Mar 8, 2022

Commit

f0726f1

1 Parent(s): a3858c0

added app.py

Browse files

Files changed (1) hide show

app.py +89 -0

app.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import neattext.functions as nfx
+import re
+import torch
+import streamlit as st
+# labels
+labels = [
+    'bug',
+    'enhancement',
+    'question'
+]
+# Model path
+# LOCAL
+# MODEL_DIR = "./model/distil-bert-uncased-finetuned-github-issues/"
+# REMOTE
+MODEL_DIR = "ivanlau/distil-bert-uncased-finetuned-github-issues"
+@st.cache(allow_output_mutation=True, show_spinner=False)
+def load_model():
+    model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR)
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR)
+    return model, tokenizer
+# Helpers
+reg_obj = re.compile(r'[^\u0000-\u007F]+', re.UNICODE)
+def is_english_text(text):
+    return (False if reg_obj.match(text) else True)
+# remove the stopwords, emojis from the text and convert it into lower case
+def neatify_text(text):
+    text = str(text).lower()
+    text = nfx.remove_stopwords(text)
+    text = nfx.remove_emojis(text)
+    return text
+def main():
+    # st UI setting
+    st.set_page_config(
+        page_title="IntelliLabel",
+        page_icon="🏷",
+        layout="centered",
+        initial_sidebar_state="auto",
+    )
+    st.title("IntelliLabel")
+    st.write("IntelliLabel is a github issue classification app. It classifies issue into 3 categories (Bug, Enhancement, Question).")
+    # load model
+    with st.spinner("Downloading model (takes ~1 min)"):
+        model, tokenizer = load_model()
+    default_text = "Unable to run Speech2Text example in documentation"
+    text = st.text_area('Enter text here:', value=default_text)
+    submit = st.button('Predict 🏷')
+    if submit:
+        text = text.strip(" \n\t")
+        if is_english_text(text):
+            text = neatify_text(text)
+            tokenized_sentence = tokenizer(text, return_tensors='pt')
+            output = model(**tokenized_sentence)
+            predictions = torch.nn.functional.softmax(output.logits, dim=-1)
+            _, preds = torch.max(predictions, dim=-1)
+            predicted = labels[preds.item()]
+            predictions = predictions.tolist()[0]
+            c1, c2, c3 = st.columns(3)
+            c1.metric(label="Bug", value=round(predictions[0],3))
+            c2.metric(label="Enhancement", value=round(predictions[1],3))
+            c3.metric(label="Question", value=round(predictions[2],3))
+            st.info("Prediction")
+            st.write(predicted.capitalize())
+        else:
+            st.error(str("Please input english text."))
+if __name__ == '__main__':
+	main()