Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import OneHotEncoder | |
| from sklearn.svm import SVC | |
| from sklearn.ensemble import RandomForestClassifier | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.metrics import accuracy_score, classification_report, confusion_matrix | |
| from ucimlrepo import fetch_ucirepo | |
| # Page configuration | |
| st.set_page_config( | |
| page_title="Car Evaluation Analysis", | |
| page_icon="π", | |
| layout="wide" | |
| ) | |
| # Title and introduction | |
| st.title("π Car Evaluation Analysis Dashboard") | |
| st.markdown(""" | |
| This dashboard analyzes car evaluation data using different machine learning models. | |
| The dataset includes various car attributes and their evaluation classifications. | |
| """) | |
| # Load and prepare data | |
| def load_data(): | |
| car_evaluation = fetch_ucirepo(id=19) | |
| X, y = car_evaluation.data.features, car_evaluation.data.targets | |
| df = pd.concat([X, y], axis=1) | |
| return df, X, y | |
| df, X, y = load_data() | |
| # Sidebar | |
| st.sidebar.header("Navigation") | |
| page = st.sidebar.radio("Go to", ["Data Overview", "Exploratory Analysis", "Model Training", "Model Comparison"]) | |
| # Data Overview Page | |
| if page == "Data Overview": | |
| st.header("Dataset Overview") | |
| # Display metrics in cards | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.metric( | |
| label="Total Records", | |
| value=f"{len(df):,}" | |
| ) | |
| with col2: | |
| st.metric( | |
| label="Features", | |
| value=len(df.columns) - 1 | |
| ) | |
| with col3: | |
| st.metric( | |
| label="Target Classes", | |
| value=len(df['class'].unique()) | |
| ) | |
| with col4: | |
| st.metric( | |
| label="Missing Values", | |
| value=df.isnull().sum().sum() | |
| ) | |
| st.write("") | |
| # Sample Data | |
| st.subheader("Sample Data") | |
| st.dataframe( | |
| df.head(), | |
| use_container_width=True, | |
| height=230 | |
| ) | |
| # Target Class Distribution | |
| st.subheader("Target Class Distribution") | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.countplot(data=df, x='class', palette='viridis') | |
| plt.title('Distribution of Car Evaluations') | |
| st.pyplot(fig) | |
| with col2: | |
| st.write("") | |
| st.write("") | |
| class_distribution = df['class'].value_counts() | |
| for class_name, count in class_distribution.items(): | |
| st.metric( | |
| label=class_name, | |
| value=count | |
| ) | |
| # Exploratory Analysis Page | |
| elif page == "Exploratory Analysis": | |
| st.header("Exploratory Data Analysis") | |
| # Feature Distribution | |
| st.subheader("Feature Distributions") | |
| feature_to_plot = st.selectbox("Select Feature", df.columns[:-1]) | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.countplot(data=df, x=feature_to_plot, palette='coolwarm') | |
| plt.title(f'Distribution of {feature_to_plot}') | |
| plt.xticks(rotation=45) | |
| st.pyplot(fig) | |
| # Feature vs Target | |
| st.subheader("Feature vs Target Class") | |
| fig, ax = plt.subplots(figsize=(12, 6)) | |
| sns.countplot(data=df, x=feature_to_plot, hue='class', palette='Set2') | |
| plt.title(f'{feature_to_plot} Distribution by Class') | |
| plt.xticks(rotation=45) | |
| st.pyplot(fig) | |
| # Correlation Heatmap | |
| st.subheader("Correlation Heatmap") | |
| encoded_df = pd.get_dummies(df, drop_first=True) | |
| fig, ax = plt.subplots(figsize=(12, 8)) | |
| sns.heatmap(encoded_df.corr(), annot=True, fmt='.2f', cmap='coolwarm') | |
| plt.title('Correlation Heatmap of Encoded Features') | |
| st.pyplot(fig) | |
| # Model Training Page | |
| elif page == "Model Training": | |
| st.header("Model Training and Evaluation") | |
| # Data preprocessing | |
| encoder = OneHotEncoder(sparse_output=False) | |
| X_encoded = encoder.fit_transform(X) | |
| y_encoded = y.values.ravel() | |
| # Train-test split | |
| test_size = st.slider("Select Test Size", 0.1, 0.4, 0.2, 0.05) | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X_encoded, y_encoded, test_size=test_size, random_state=42 | |
| ) | |
| # Model selection | |
| model_choice = st.selectbox( | |
| "Select Model", | |
| ["Support Vector Machine", "Random Forest", "Logistic Regression"] | |
| ) | |
| if st.button("Train Model"): | |
| with st.spinner("Training model..."): | |
| if model_choice == "Support Vector Machine": | |
| model = SVC(kernel='linear', random_state=42) | |
| elif model_choice == "Random Forest": | |
| model = RandomForestClassifier(n_estimators=100, random_state=42) | |
| else: | |
| model = LogisticRegression(max_iter=500, random_state=42) | |
| model.fit(X_train, y_train) | |
| y_pred = model.predict(X_test) | |
| # Display results | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Model Performance") | |
| accuracy = accuracy_score(y_test, y_pred) | |
| st.metric(label="Accuracy", value=f"{accuracy:.4f}") | |
| st.text("Classification Report:") | |
| st.text(classification_report(y_test, y_pred)) | |
| with col2: | |
| st.subheader("Confusion Matrix") | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| sns.heatmap( | |
| confusion_matrix(y_test, y_pred), | |
| annot=True, | |
| fmt='d', | |
| cmap='Blues', | |
| xticklabels=np.unique(y_test), | |
| yticklabels=np.unique(y_test) | |
| ) | |
| plt.title(f'{model_choice} Confusion Matrix') | |
| plt.xlabel('Predicted') | |
| plt.ylabel('Actual') | |
| st.pyplot(fig) | |
| # Feature importance for Random Forest | |
| if model_choice == "Random Forest": | |
| st.subheader("Feature Importance") | |
| feature_importance = pd.DataFrame({ | |
| 'feature': encoder.get_feature_names_out(), | |
| 'importance': model.feature_importances_ | |
| }) | |
| feature_importance = feature_importance.sort_values( | |
| 'importance', ascending=False | |
| ).head(10) | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.barplot( | |
| data=feature_importance, | |
| x='importance', | |
| y='feature' | |
| ) | |
| plt.title('Top 10 Most Important Features') | |
| st.pyplot(fig) | |
| # Model Comparison Page | |
| else: | |
| st.header("Model Comparison") | |
| if st.button("Compare All Models"): | |
| with st.spinner("Training all models..."): | |
| # Data preprocessing | |
| encoder = OneHotEncoder(sparse_output=False) | |
| X_encoded = encoder.fit_transform(X) | |
| y_encoded = y.values.ravel() | |
| # Train-test split | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X_encoded, y_encoded, test_size=0.2, random_state=42 | |
| ) | |
| # Train all models | |
| models = { | |
| "SVM": SVC(kernel='linear', random_state=42), | |
| "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42), | |
| "Logistic Regression": LogisticRegression(max_iter=500, random_state=42) | |
| } | |
| results = {} | |
| for name, model in models.items(): | |
| model.fit(X_train, y_train) | |
| y_pred = model.predict(X_test) | |
| results[name] = { | |
| 'accuracy': accuracy_score(y_test, y_pred), | |
| 'predictions': y_pred | |
| } | |
| # Display comparison results | |
| st.subheader("Accuracy Comparison") | |
| accuracy_df = pd.DataFrame({ | |
| 'Model': list(results.keys()), | |
| 'Accuracy': [results[model]['accuracy'] for model in results.keys()] | |
| }) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.dataframe(accuracy_df) | |
| with col2: | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.barplot( | |
| data=accuracy_df, | |
| x='Model', | |
| y='Accuracy', | |
| palette='viridis' | |
| ) | |
| plt.title('Model Accuracy Comparison') | |
| plt.ylim(0, 1) | |
| st.pyplot(fig) | |
| # Detailed model comparison | |
| st.subheader("Detailed Model Performance") | |
| for name in results.keys(): | |
| st.write(f"\n{name}:") | |
| st.text(classification_report(y_test, results[name]['predictions'])) | |
| fig, ax = plt.subplots(figsize=(8, 6)) | |
| sns.heatmap( | |
| confusion_matrix(y_test, results[name]['predictions']), | |
| annot=True, | |
| fmt='d', | |
| cmap='Blues', | |
| xticklabels=np.unique(y_test), | |
| yticklabels=np.unique(y_test) | |
| ) | |
| plt.title(f'{name} Confusion Matrix') | |
| plt.xlabel('Predicted') | |
| plt.ylabel('Actual') | |
| st.pyplot(fig) | |
| # Footer | |
| st.markdown(""" | |
| --- | |
| Created with β€οΈ using Streamlit | |
| """) |