Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import joblib | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.metrics import classification_report, accuracy_score | |
| import streamlit_authenticator as stauth | |
| MODEL_FILENAME = "/tmp/insurance_churn_model.pkl" | |
| st.title("Insurance Churn Prediction App") | |
| menu = st.sidebar.radio("Navigation", ["Predict Churn","Train Model"]) | |
| if menu == "Train Model": | |
| # st.header("Upload Dataset and Train Model") | |
| # uploaded_file = st.file_uploader("Upload Insurance Churn Dataset (CSV)", type=["csv"]) | |
| # if uploaded_file is not None: | |
| data = pd.read_csv("src/insurance.csv") | |
| st.subheader("Dataset Preview") | |
| st.dataframe(data.head()) | |
| st.subheader("Summary Statistics") | |
| st.write(data.describe()) | |
| if 'churn' in data.columns: | |
| st.subheader("Churn Distribution") | |
| fig, ax = plt.subplots() | |
| sns.countplot(x='churn', data=data, ax=ax) | |
| st.pyplot(fig) | |
| st.subheader("Model Training") | |
| target_column = st.selectbox("Select Target Column", options=data.columns, index=data.columns.get_loc('churn') if 'churn' in data.columns else 0) | |
| feature_columns = st.multiselect("Select Feature Columns", options=[col for col in data.columns if col != target_column]) | |
| #if feature_columns and target_column: | |
| # X = pd.get_dummies(data[feature_columns]) | |
| # y = data[target_column] | |
| # input features | |
| # Automatically exclude identifier columns | |
| # exclude_columns = ['Customer', 'Policy', 'Policy Number', 'Response'] | |
| # feature_columns = [col for col in data.columns if col not in exclude_columns] | |
| # target_column = 'Response' | |
| # feature_columns = data.drop(columns=[target_column]).select_dtypes(include=[np.number]).columns.tolist() | |
| X= data[feature_columns] | |
| y = data[target_column] | |
| # X = data.drop('Response', axis = 1) | |
| # # output labels | |
| # y = data['Response'] | |
| agree = st.checkbox("Continue Training") | |
| if agree: | |
| st.write("Great! ML Model Training Started..") | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| model = RandomForestClassifier() | |
| model.fit(X_train, y_train) | |
| y_pred = model.predict(X_test) | |
| st.subheader("Model Performance") | |
| st.write("Accuracy:", accuracy_score(y_test, y_pred)) | |
| st.text("Classification Report:") | |
| st.text(classification_report(y_test, y_pred)) | |
| joblib.dump((model, X.columns.tolist()), MODEL_FILENAME) | |
| st.success(f"Model trained and saved as {MODEL_FILENAME}") | |
| elif menu == "Predict Churn": | |
| st.header("Insurance Churn Predictor") | |
| st.markdown("To use Predictor, Please Train the ML Model if not done yet! ") | |
| try: | |
| model, feature_names = joblib.load(MODEL_FILENAME) | |
| st.success("Model loaded successfully.") | |
| except: | |
| st.error("Model not found. Please train the model first.") | |
| st.stop() | |
| st.subheader("Enter Customer Details") | |
| input_data = {} | |
| for feature in feature_names: | |
| input_data[feature] = st.text_input(f"{feature}", "") | |
| if st.button("Predict Churn"): | |
| try: | |
| input_df = pd.DataFrame([input_data]) | |
| input_df = pd.get_dummies(input_df) | |
| for col in feature_names: | |
| if col not in input_df.columns: | |
| input_df[col] = 0 | |
| input_df = input_df[feature_names] | |
| prediction = model.predict(input_df)[0] | |
| st.subheader("Prediction Result") | |
| st.write(f"Churn: {'Yes' if prediction == 1 else 'No'}") | |
| except Exception as e: | |
| st.error(f"Error in prediction: {e}") |