Insurance-Churn-Predictor / src /streamlit_app.py
DeepSoft-Tech's picture
Update src/streamlit_app.py
aa416cd verified
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import streamlit_authenticator as stauth
MODEL_FILENAME = "/tmp/insurance_churn_model.pkl"
st.title("Insurance Churn Prediction App")
menu = st.sidebar.radio("Navigation", ["Predict Churn","Train Model"])
if menu == "Train Model":
# st.header("Upload Dataset and Train Model")
# uploaded_file = st.file_uploader("Upload Insurance Churn Dataset (CSV)", type=["csv"])
# if uploaded_file is not None:
data = pd.read_csv("src/insurance.csv")
st.subheader("Dataset Preview")
st.dataframe(data.head())
st.subheader("Summary Statistics")
st.write(data.describe())
if 'churn' in data.columns:
st.subheader("Churn Distribution")
fig, ax = plt.subplots()
sns.countplot(x='churn', data=data, ax=ax)
st.pyplot(fig)
st.subheader("Model Training")
target_column = st.selectbox("Select Target Column", options=data.columns, index=data.columns.get_loc('churn') if 'churn' in data.columns else 0)
feature_columns = st.multiselect("Select Feature Columns", options=[col for col in data.columns if col != target_column])
#if feature_columns and target_column:
# X = pd.get_dummies(data[feature_columns])
# y = data[target_column]
# input features
# Automatically exclude identifier columns
# exclude_columns = ['Customer', 'Policy', 'Policy Number', 'Response']
# feature_columns = [col for col in data.columns if col not in exclude_columns]
# target_column = 'Response'
# feature_columns = data.drop(columns=[target_column]).select_dtypes(include=[np.number]).columns.tolist()
X= data[feature_columns]
y = data[target_column]
# X = data.drop('Response', axis = 1)
# # output labels
# y = data['Response']
agree = st.checkbox("Continue Training")
if agree:
st.write("Great! ML Model Training Started..")
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
st.subheader("Model Performance")
st.write("Accuracy:", accuracy_score(y_test, y_pred))
st.text("Classification Report:")
st.text(classification_report(y_test, y_pred))
joblib.dump((model, X.columns.tolist()), MODEL_FILENAME)
st.success(f"Model trained and saved as {MODEL_FILENAME}")
elif menu == "Predict Churn":
st.header("Insurance Churn Predictor")
st.markdown("To use Predictor, Please Train the ML Model if not done yet! ")
try:
model, feature_names = joblib.load(MODEL_FILENAME)
st.success("Model loaded successfully.")
except:
st.error("Model not found. Please train the model first.")
st.stop()
st.subheader("Enter Customer Details")
input_data = {}
for feature in feature_names:
input_data[feature] = st.text_input(f"{feature}", "")
if st.button("Predict Churn"):
try:
input_df = pd.DataFrame([input_data])
input_df = pd.get_dummies(input_df)
for col in feature_names:
if col not in input_df.columns:
input_df[col] = 0
input_df = input_df[feature_names]
prediction = model.predict(input_df)[0]
st.subheader("Prediction Result")
st.write(f"Churn: {'Yes' if prediction == 1 else 'No'}")
except Exception as e:
st.error(f"Error in prediction: {e}")