import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import joblib from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import classification_report, accuracy_score import streamlit_authenticator as stauth MODEL_FILENAME = "/tmp/insurance_churn_model.pkl" st.title("Insurance Churn Prediction App") menu = st.sidebar.radio("Navigation", ["Predict Churn","Train Model"]) if menu == "Train Model": # st.header("Upload Dataset and Train Model") # uploaded_file = st.file_uploader("Upload Insurance Churn Dataset (CSV)", type=["csv"]) # if uploaded_file is not None: data = pd.read_csv("src/insurance.csv") st.subheader("Dataset Preview") st.dataframe(data.head()) st.subheader("Summary Statistics") st.write(data.describe()) if 'churn' in data.columns: st.subheader("Churn Distribution") fig, ax = plt.subplots() sns.countplot(x='churn', data=data, ax=ax) st.pyplot(fig) st.subheader("Model Training") target_column = st.selectbox("Select Target Column", options=data.columns, index=data.columns.get_loc('churn') if 'churn' in data.columns else 0) feature_columns = st.multiselect("Select Feature Columns", options=[col for col in data.columns if col != target_column]) #if feature_columns and target_column: # X = pd.get_dummies(data[feature_columns]) # y = data[target_column] # input features # Automatically exclude identifier columns # exclude_columns = ['Customer', 'Policy', 'Policy Number', 'Response'] # feature_columns = [col for col in data.columns if col not in exclude_columns] # target_column = 'Response' # feature_columns = data.drop(columns=[target_column]).select_dtypes(include=[np.number]).columns.tolist() X= data[feature_columns] y = data[target_column] # X = data.drop('Response', axis = 1) # # output labels # y = data['Response'] agree = st.checkbox("Continue Training") if agree: st.write("Great! ML Model Training Started..") X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestClassifier() model.fit(X_train, y_train) y_pred = model.predict(X_test) st.subheader("Model Performance") st.write("Accuracy:", accuracy_score(y_test, y_pred)) st.text("Classification Report:") st.text(classification_report(y_test, y_pred)) joblib.dump((model, X.columns.tolist()), MODEL_FILENAME) st.success(f"Model trained and saved as {MODEL_FILENAME}") elif menu == "Predict Churn": st.header("Insurance Churn Predictor") st.markdown("To use Predictor, Please Train the ML Model if not done yet! ") try: model, feature_names = joblib.load(MODEL_FILENAME) st.success("Model loaded successfully.") except: st.error("Model not found. Please train the model first.") st.stop() st.subheader("Enter Customer Details") input_data = {} for feature in feature_names: input_data[feature] = st.text_input(f"{feature}", "") if st.button("Predict Churn"): try: input_df = pd.DataFrame([input_data]) input_df = pd.get_dummies(input_df) for col in feature_names: if col not in input_df.columns: input_df[col] = 0 input_df = input_df[feature_names] prediction = model.predict(input_df)[0] st.subheader("Prediction Result") st.write(f"Churn: {'Yes' if prediction == 1 else 'No'}") except Exception as e: st.error(f"Error in prediction: {e}")