Spaces:
Build error
Build error
| pip install streamlit pandas numpy scikit-learn nltk | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.tree import DecisionTreeClassifier | |
| import re | |
| from nltk.corpus import stopwords | |
| from nltk.stem import SnowballStemmer | |
| # Download NLTK resources | |
| import nltk | |
| nltk.download('stopwords') | |
| # Load stopwords | |
| stopword = set(stopwords.words('english')) | |
| # Load dataset | |
| data = pd.read_csv("https://raw.githubusercontent.com/amankharwal/Website-data/master/twitter.csv") | |
| # Map labels | |
| data["labels"] = data["class"].map({0: "Hate Speech", | |
| 1: "Offensive Language", | |
| 2: "No Hate and Offensive"}) | |
| # Select relevant columns | |
| data = data[["tweet", "labels"]] | |
| # Clean text function | |
| stemmer = SnowballStemmer("english") | |
| def clean(text): | |
| text = str(text).lower() | |
| text = re.sub('\[.*?\]', '', text) | |
| text = re.sub('https?://\S+|www\.\S+', '', text) | |
| text = re.sub('<.*?>+', '', text) | |
| text = re.sub('[%s]' % re.escape(string.punctuation), '', text) | |
| text = re.sub('\n', '', text) | |
| text = re.sub('\w*\d\w*', '', text) | |
| text = [word for word in text.split(' ') if word not in stopword] | |
| text = " ".join(text) | |
| text = [stemmer.stem(word) for word in text.split(' ')] | |
| text = " ".join(text) | |
| return text | |
| # Apply text cleaning | |
| data["tweet"] = data["tweet"].apply(clean) | |
| # Prepare data for model | |
| x = np.array(data["tweet"]) | |
| y = np.array(data["labels"]) | |
| cv = CountVectorizer() | |
| X = cv.fit_transform(x) # Fit the Data | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42) | |
| # Train the model | |
| clf = DecisionTreeClassifier() | |
| clf.fit(X_train, y_train) | |
| # Streamlit app | |
| st.title("Sentiment Analysis App") | |
| # User input | |
| sample = st.text_area("Enter a sentence for sentiment analysis:") | |
| # Predict and display result | |
| if st.button("Predict"): | |
| sample_cleaned = clean(sample) | |
| data_sample = cv.transform([sample_cleaned]).toarray() | |
| prediction = clf.predict(data_sample)[0] | |
| st.success(f"Sentiment: {prediction}") | |
| # Display dataset | |
| st.subheader("Dataset") | |
| st.write(data.head()) | |
| streamlit run sentiment_analysis_app.py | |