Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor | |
| from sklearn.linear_model import LinearRegression, LogisticRegression | |
| from sklearn.svm import SVC, SVR | |
| from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor | |
| from sklearn.naive_bayes import GaussianNB | |
| from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor | |
| from sklearn.metrics import accuracy_score, r2_score, mean_squared_error | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import io | |
| import base64 | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| class BusinessAnalystGPT: | |
| def __init__(self): | |
| self.df = None | |
| self.analysis_results = "" | |
| def analyze_dataset(self, file): | |
| """Analyze uploaded dataset and provide comprehensive insights""" | |
| try: | |
| # Read the dataset | |
| if file.name.endswith('.csv'): | |
| self.df = pd.read_csv(file.name) | |
| elif file.name.endswith(('.xlsx', '.xls')): | |
| self.df = pd.read_excel(file.name) | |
| else: | |
| return "Error: Please upload a CSV or Excel file." | |
| # Basic dataset info | |
| analysis = f""" | |
| # ๐ DATASET ANALYSIS REPORT | |
| ## ๐ Basic Information | |
| - **Dataset Shape**: {self.df.shape[0]} rows ร {self.df.shape[1]} columns | |
| - **Memory Usage**: {self.df.memory_usage(deep=True).sum() / 1024:.2f} KB | |
| - **Missing Values**: {self.df.isnull().sum().sum()} total | |
| ## ๐ Column Information | |
| """ | |
| # Column details | |
| for i, col in enumerate(self.df.columns): | |
| dtype = str(self.df[col].dtype) | |
| missing = self.df[col].isnull().sum() | |
| unique_vals = self.df[col].nunique() | |
| analysis += f"\n**{i+1}. {col}**\n" | |
| analysis += f" - Data Type: {dtype}\n" | |
| analysis += f" - Missing Values: {missing} ({missing/len(self.df)*100:.1f}%)\n" | |
| analysis += f" - Unique Values: {unique_vals}\n" | |
| if dtype in ['int64', 'float64']: | |
| analysis += f" - Range: {self.df[col].min():.2f} to {self.df[col].max():.2f}\n" | |
| analysis += f" - Mean: {self.df[col].mean():.2f}\n" | |
| elif dtype == 'object': | |
| top_values = self.df[col].value_counts().head(3) | |
| analysis += f" - Top Values: {list(top_values.index)}\n" | |
| # Add ML Model Recommendations | |
| analysis += self._get_ml_recommendations() | |
| # Add Visualization Recommendations | |
| analysis += self._get_visualization_recommendations() | |
| self.analysis_results = analysis | |
| return analysis | |
| except Exception as e: | |
| return f"Error analyzing dataset: {str(e)}" | |
| def _get_ml_recommendations(self): | |
| """Analyze dataset and recommend suitable ML models with variable suggestions""" | |
| if self.df is None: | |
| return "" | |
| ml_analysis = "\n\n## ๐ค MACHINE LEARNING MODEL RECOMMENDATIONS\n\n" | |
| # Identify variable types | |
| numeric_cols = self.df.select_dtypes(include=[np.number]).columns.tolist() | |
| categorical_cols = self.df.select_dtypes(include=['object']).columns.tolist() | |
| ml_analysis += "### ๐ฏ Potential Target Variables (Dependent Variables):\n" | |
| # Suggest target variables based on data characteristics | |
| target_suggestions = [] | |
| for col in numeric_cols: | |
| unique_ratio = self.df[col].nunique() / len(self.df) | |
| if unique_ratio < 0.1 and self.df[col].nunique() <= 10: | |
| target_suggestions.append((col, "Classification", f"Has {self.df[col].nunique()} unique values - good for classification")) | |
| elif unique_ratio > 0.1: | |
| target_suggestions.append((col, "Regression", "Continuous values - suitable for regression")) | |
| for col in categorical_cols: | |
| if self.df[col].nunique() <= 10: | |
| target_suggestions.append((col, "Classification", f"Categorical with {self.df[col].nunique()} classes")) | |
| if target_suggestions: | |
| for var, task_type, reason in target_suggestions: | |
| ml_analysis += f"- **{var}** ({task_type}): {reason}\n" | |
| else: | |
| ml_analysis += "- No clear target variables identified. Please specify based on your business objective.\n" | |
| ml_analysis += "\n### ๐ Feature Variables (Independent Variables):\n" | |
| # List potential feature variables | |
| all_cols = list(self.df.columns) | |
| if len(numeric_cols) > 0: | |
| ml_analysis += f"- **Numeric Features**: {', '.join(numeric_cols)}\n" | |
| if len(categorical_cols) > 0: | |
| ml_analysis += f"- **Categorical Features**: {', '.join(categorical_cols)}\n" | |
| # Model recommendations based on data characteristics | |
| ml_analysis += "\n### ๐ฎ Recommended Models & Expected Performance:\n\n" | |
| # Classification models | |
| if any("Classification" in suggestion[1] for suggestion in target_suggestions): | |
| ml_analysis += "#### ๐ฏ For Classification Tasks:\n" | |
| ml_analysis += """ | |
| 1. **Random Forest Classifier** โญโญโญโญโญ | |
| - Expected Accuracy: 85-95% | |
| - Best for: Mixed data types, feature importance | |
| - Pros: Handles missing values, no overfitting | |
| 2. **Logistic Regression** โญโญโญโญ | |
| - Expected Accuracy: 75-85% | |
| - Best for: Linear relationships, interpretability | |
| - Pros: Fast, interpretable coefficients | |
| 3. **Decision Tree** โญโญโญ | |
| - Expected Accuracy: 70-80% | |
| - Best for: Rule-based decisions, interpretability | |
| - Pros: Easy to understand and visualize | |
| 4. **Support Vector Machine (SVM)** โญโญโญโญ | |
| - Expected Accuracy: 80-90% | |
| - Best for: High-dimensional data, small datasets | |
| - Pros: Effective for complex patterns | |
| 5. **K-Nearest Neighbors (KNN)** โญโญโญ | |
| - Expected Accuracy: 70-85% | |
| - Best for: Simple patterns, small datasets | |
| - Pros: Simple, no assumptions about data | |
| """ | |
| # Regression models | |
| if any("Regression" in suggestion[1] for suggestion in target_suggestions): | |
| ml_analysis += "\n#### ๐ For Regression Tasks:\n" | |
| ml_analysis += """ | |
| 1. **Random Forest Regressor** โญโญโญโญโญ | |
| - Expected Rยฒ Score: 0.80-0.95 | |
| - Best for: Non-linear relationships, feature importance | |
| - Pros: Robust, handles outliers well | |
| 2. **Linear Regression** โญโญโญโญ | |
| - Expected Rยฒ Score: 0.70-0.85 | |
| - Best for: Linear relationships, interpretability | |
| - Pros: Fast, interpretable, baseline model | |
| 3. **Support Vector Regression (SVR)** โญโญโญโญ | |
| - Expected Rยฒ Score: 0.75-0.90 | |
| - Best for: Non-linear patterns, robust predictions | |
| - Pros: Effective for complex relationships | |
| 4. **Decision Tree Regressor** โญโญโญ | |
| - Expected Rยฒ Score: 0.65-0.80 | |
| - Best for: Non-linear, interpretable rules | |
| - Pros: Easy to understand decision path | |
| """ | |
| # Data preprocessing recommendations | |
| ml_analysis += "\n### ๐ ๏ธ Data Preprocessing Recommendations:\n" | |
| missing_data = self.df.isnull().sum().sum() | |
| if missing_data > 0: | |
| ml_analysis += f"- **Handle Missing Data**: {missing_data} missing values need attention\n" | |
| if len(categorical_cols) > 0: | |
| ml_analysis += "- **Encode Categorical Variables**: Use Label Encoding or One-Hot Encoding\n" | |
| if len(numeric_cols) > 1: | |
| ml_analysis += "- **Feature Scaling**: Consider StandardScaler for SVM/KNN models\n" | |
| outliers_detected = False | |
| for col in numeric_cols: | |
| Q1 = self.df[col].quantile(0.25) | |
| Q3 = self.df[col].quantile(0.75) | |
| IQR = Q3 - Q1 | |
| outliers = ((self.df[col] < (Q1 - 1.5 * IQR)) | (self.df[col] > (Q3 + 1.5 * IQR))).sum() | |
| if outliers > len(self.df) * 0.05: # More than 5% outliers | |
| outliers_detected = True | |
| break | |
| if outliers_detected: | |
| ml_analysis += "- **Handle Outliers**: Detected outliers that may affect model performance\n" | |
| return ml_analysis | |
| def _get_visualization_recommendations(self): | |
| """Provide specific chart recommendations for variables""" | |
| if self.df is None: | |
| return "" | |
| viz_analysis = "\n\n## ๐ DATA VISUALIZATION RECOMMENDATIONS\n\n" | |
| numeric_cols = self.df.select_dtypes(include=[np.number]).columns.tolist() | |
| categorical_cols = self.df.select_dtypes(include=['object']).columns.tolist() | |
| # Single variable visualizations | |
| viz_analysis += "### ๐ Single Variable Analysis:\n\n" | |
| for col in numeric_cols: | |
| viz_analysis += f"**{col}** (Numeric):\n" | |
| viz_analysis += f"- **Histogram**: Show distribution of {col}\n" | |
| viz_analysis += f"- **Box Plot**: Identify outliers in {col}\n" | |
| viz_analysis += f"- **Density Plot**: Smooth distribution curve for {col}\n\n" | |
| for col in categorical_cols: | |
| unique_count = self.df[col].nunique() | |
| viz_analysis += f"**{col}** (Categorical - {unique_count} categories):\n" | |
| if unique_count <= 10: | |
| viz_analysis += f"- **Bar Chart**: Count of each category in {col}\n" | |
| viz_analysis += f"- **Pie Chart**: Proportion of categories in {col}\n" | |
| else: | |
| viz_analysis += f"- **Bar Chart**: Top 10 categories in {col}\n" | |
| viz_analysis += f"- **Donut Chart**: Alternative to pie chart for {col}\n\n" | |
| # Two variable relationships | |
| if len(self.df.columns) > 1: | |
| viz_analysis += "### ๐ Two Variable Relationships:\n\n" | |
| # Numeric vs Numeric | |
| if len(numeric_cols) >= 2: | |
| viz_analysis += "**Numeric vs Numeric Combinations:**\n" | |
| for i in range(len(numeric_cols)): | |
| for j in range(i+1, len(numeric_cols)): | |
| col1, col2 = numeric_cols[i], numeric_cols[j] | |
| viz_analysis += f"- **Scatter Plot**: {col1} (X-axis) vs {col2} (Y-axis)\n" | |
| viz_analysis += f"- **Correlation Heatmap**: Relationship strength between {col1} and {col2}\n" | |
| viz_analysis += "\n" | |
| # Categorical vs Numeric | |
| if len(categorical_cols) > 0 and len(numeric_cols) > 0: | |
| viz_analysis += "**Categorical vs Numeric Combinations:**\n" | |
| for cat_col in categorical_cols: | |
| for num_col in numeric_cols: | |
| viz_analysis += f"- **Box Plot**: {cat_col} (X-axis) vs {num_col} (Y-axis)\n" | |
| viz_analysis += f"- **Violin Plot**: Distribution of {num_col} across {cat_col} categories\n" | |
| viz_analysis += f"- **Bar Plot**: Average {num_col} by {cat_col}\n" | |
| viz_analysis += "\n" | |
| # Categorical vs Categorical | |
| if len(categorical_cols) >= 2: | |
| viz_analysis += "**Categorical vs Categorical Combinations:**\n" | |
| for i in range(len(categorical_cols)): | |
| for j in range(i+1, len(categorical_cols)): | |
| col1, col2 = categorical_cols[i], categorical_cols[j] | |
| viz_analysis += f"- **Stacked Bar Chart**: {col1} (X-axis) stacked by {col2}\n" | |
| viz_analysis += f"- **Heatmap**: Cross-tabulation of {col1} vs {col2}\n" | |
| viz_analysis += f"- **Grouped Bar Chart**: {col1} grouped by {col2}\n" | |
| viz_analysis += "\n" | |
| # Advanced visualizations | |
| if len(self.df.columns) >= 3: | |
| viz_analysis += "### ๐จ Advanced Multi-Variable Analysis:\n\n" | |
| if len(numeric_cols) >= 3: | |
| viz_analysis += "**For 3+ Numeric Variables:**\n" | |
| viz_analysis += f"- **3D Scatter Plot**: {numeric_cols[0]} (X) vs {numeric_cols[1]} (Y) vs {numeric_cols[2]} (Z)\n" | |
| viz_analysis += f"- **Pair Plot**: All numeric variables against each other\n" | |
| viz_analysis += f"- **Correlation Matrix**: Heatmap of all numeric correlations\n\n" | |
| if len(numeric_cols) >= 2 and len(categorical_cols) >= 1: | |
| viz_analysis += "**For Mixed Variable Types:**\n" | |
| viz_analysis += f"- **Scatter Plot with Color**: {numeric_cols[0]} vs {numeric_cols[1]} colored by {categorical_cols[0]}\n" | |
| viz_analysis += f"- **Bubble Chart**: {numeric_cols[0]} (X) vs {numeric_cols[1]} (Y) with bubble size from another variable\n\n" | |
| # Dashboard recommendations | |
| viz_analysis += "### ๐ Dashboard Layout Suggestions:\n\n" | |
| viz_analysis += "**Top Row**: Overview metrics and key KPIs\n" | |
| viz_analysis += "**Middle Section**: Main analysis charts (2-3 key visualizations)\n" | |
| viz_analysis += "**Bottom Section**: Detailed breakdowns and filters\n" | |
| viz_analysis += "**Side Panel**: Interactive filters and controls\n" | |
| return viz_analysis | |
| def generate_business_insights(self, question): | |
| """Generate business insights based on the question and dataset""" | |
| if self.df is None: | |
| return "Please upload a dataset first to generate insights." | |
| insights = f""" | |
| # ๐ก BUSINESS INSIGHTS & RECOMMENDATIONS | |
| ## Question: {question} | |
| ## ๐ Data-Driven Analysis: | |
| """ | |
| # Basic statistics | |
| numeric_cols = self.df.select_dtypes(include=[np.number]).columns.tolist() | |
| categorical_cols = self.df.select_dtypes(include=['object']).columns.tolist() | |
| if len(numeric_cols) > 0: | |
| insights += "\n### ๐ Key Metrics:\n" | |
| for col in numeric_cols[:5]: # Show top 5 numeric columns | |
| mean_val = self.df[col].mean() | |
| median_val = self.df[col].median() | |
| std_val = self.df[col].std() | |
| insights += f"- **{col}**: Mean = {mean_val:.2f}, Median = {median_val:.2f}, Std = {std_val:.2f}\n" | |
| if len(categorical_cols) > 0: | |
| insights += "\n### ๐ Category Distribution:\n" | |
| for col in categorical_cols[:3]: # Show top 3 categorical columns | |
| top_category = self.df[col].mode()[0] | |
| category_count = self.df[col].value_counts().iloc[0] | |
| total_count = len(self.df) | |
| percentage = (category_count / total_count) * 100 | |
| insights += f"- **{col}**: Most common = '{top_category}' ({category_count}/{total_count} = {percentage:.1f}%)\n" | |
| # Generate recommendations based on question keywords | |
| question_lower = question.lower() | |
| if any(word in question_lower for word in ['revenue', 'sales', 'profit', 'income']): | |
| insights += "\n### ๐ฐ Revenue/Sales Insights:\n" | |
| insights += "- Focus on high-performing segments identified in the data\n" | |
| insights += "- Analyze seasonal trends if time data is available\n" | |
| insights += "- Consider customer segmentation based on purchase behavior\n" | |
| elif any(word in question_lower for word in ['customer', 'client', 'user']): | |
| insights += "\n### ๐ฅ Customer Insights:\n" | |
| insights += "- Segment customers based on key characteristics\n" | |
| insights += "- Identify high-value customer profiles\n" | |
| insights += "- Analyze customer retention patterns\n" | |
| elif any(word in question_lower for word in ['marketing', 'campaign', 'advertising']): | |
| insights += "\n### ๐ข Marketing Insights:\n" | |
| insights += "- Evaluate campaign performance metrics\n" | |
| insights += "- Identify most effective channels\n" | |
| insights += "- Optimize targeting based on demographic data\n" | |
| elif any(word in question_lower for word in ['predict', 'forecast', 'future']): | |
| insights += "\n### ๐ฎ Predictive Insights:\n" | |
| insights += "- Use historical patterns for forecasting\n" | |
| insights += "- Apply machine learning models for predictions\n" | |
| insights += "- Consider external factors that might influence outcomes\n" | |
| else: | |
| insights += "\n### ๐ฏ General Business Recommendations:\n" | |
| insights += "- Identify key performance indicators from your data\n" | |
| insights += "- Look for correlations between important variables\n" | |
| insights += "- Consider segmentation strategies based on data patterns\n" | |
| # Add data quality assessment | |
| missing_data_pct = (self.df.isnull().sum().sum() / (self.df.shape[0] * self.df.shape[1])) * 100 | |
| insights += f"\n### โ ๏ธ Data Quality Notes:\n" | |
| insights += f"- Missing data: {missing_data_pct:.1f}% of total data points\n" | |
| insights += f"- Data completeness: {100-missing_data_pct:.1f}%\n" | |
| if missing_data_pct > 10: | |
| insights += "- **Recommendation**: Address missing data before making critical decisions\n" | |
| return insights | |
| def create_visualization(self, chart_type, x_column, y_column): | |
| """Create visualizations based on user selection""" | |
| if self.df is None: | |
| return "Please upload a dataset first." | |
| try: | |
| plt.figure(figsize=(10, 6)) | |
| plt.style.use('default') | |
| if chart_type == "Scatter Plot": | |
| plt.scatter(self.df[x_column], self.df[y_column], alpha=0.6) | |
| plt.xlabel(x_column) | |
| plt.ylabel(y_column) | |
| plt.title(f'Scatter Plot: {x_column} vs {y_column}') | |
| elif chart_type == "Line Chart": | |
| plt.plot(self.df[x_column], self.df[y_column]) | |
| plt.xlabel(x_column) | |
| plt.ylabel(y_column) | |
| plt.title(f'Line Chart: {x_column} vs {y_column}') | |
| elif chart_type == "Bar Chart": | |
| if self.df[x_column].dtype == 'object': | |
| value_counts = self.df[x_column].value_counts().head(10) | |
| plt.bar(value_counts.index, value_counts.values) | |
| plt.xlabel(x_column) | |
| plt.ylabel('Count') | |
| plt.title(f'Bar Chart: {x_column}') | |
| plt.xticks(rotation=45) | |
| else: | |
| plt.bar(self.df[x_column], self.df[y_column]) | |
| plt.xlabel(x_column) | |
| plt.ylabel(y_column) | |
| plt.title(f'Bar Chart: {x_column} vs {y_column}') | |
| elif chart_type == "Histogram": | |
| plt.hist(self.df[x_column], bins=30, alpha=0.7) | |
| plt.xlabel(x_column) | |
| plt.ylabel('Frequency') | |
| plt.title(f'Histogram: {x_column}') | |
| elif chart_type == "Box Plot": | |
| if y_column and self.df[y_column].dtype == 'object': | |
| self.df.boxplot(column=x_column, by=y_column) | |
| plt.title(f'Box Plot: {x_column} by {y_column}') | |
| else: | |
| plt.boxplot(self.df[x_column].dropna()) | |
| plt.ylabel(x_column) | |
| plt.title(f'Box Plot: {x_column}') | |
| plt.tight_layout() | |
| # Save plot to bytes | |
| img_buffer = io.BytesIO() | |
| plt.savefig(img_buffer, format='png', dpi=150, bbox_inches='tight') | |
| img_buffer.seek(0) | |
| plt.close() | |
| return img_buffer.getvalue() | |
| except Exception as e: | |
| return f"Error creating visualization: {str(e)}" | |
| # Initialize the Business Analyst GPT | |
| analyst = BusinessAnalystGPT() | |
| # Define the Gradio interface | |
| def analyze_file(file): | |
| return analyst.analyze_dataset(file) | |
| def generate_insights(question): | |
| return analyst.generate_business_insights(question) | |
| def create_chart(chart_type, x_col, y_col): | |
| result = analyst.create_visualization(chart_type, x_col, y_col) | |
| if isinstance(result, bytes): | |
| return result | |
| else: | |
| return result | |
| def get_columns(): | |
| if analyst.df is not None: | |
| return gr.update(choices=list(analyst.df.columns)), gr.update(choices=list(analyst.df.columns)) | |
| return gr.update(choices=[]), gr.update(choices=[]) | |
| # Create the Gradio interface | |
| with gr.Blocks(title="Business Analyst GPT", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # ๐ค Business Analyst GPT | |
| ### Your AI-Powered Data Analysis Assistant | |
| Upload your dataset and get comprehensive business insights, ML model recommendations, and visualization suggestions! | |
| """) | |
| with gr.Tab("๐ Dataset Analysis"): | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload your dataset (CSV or Excel)", file_types=[".csv", ".xlsx", ".xls"]) | |
| analyze_btn = gr.Button("๐ Analyze Dataset", variant="primary") | |
| analysis_output = gr.Markdown(label="Analysis Results") | |
| analyze_btn.click(analyze_file, inputs=[file_input], outputs=[analysis_output]) | |
| with gr.Tab("๐ก Business Insights"): | |
| with gr.Row(): | |
| question_input = gr.Textbox( | |
| label="Ask a business question about your data", | |
| placeholder="e.g., How can I increase revenue? What are the key customer segments?", | |
| lines=2 | |
| ) | |
| insights_btn = gr.Button("๐ก Generate Insights", variant="primary") | |
| insights_output = gr.Markdown(label="Business Insights") | |
| insights_btn.click(generate_insights, inputs=[question_input], outputs=[insights_output]) | |
| with gr.Tab("๐ Data Visualization"): | |
| with gr.Row(): | |
| chart_type = gr.Dropdown( | |
| choices=["Scatter Plot", "Line Chart", "Bar Chart", "Histogram", "Box Plot"], | |
| label="Chart Type", | |
| value="Scatter Plot" | |
| ) | |
| refresh_cols = gr.Button("๐ Refresh Columns") | |
| with gr.Row(): | |
| x_column = gr.Dropdown(choices=[], label="X-axis Column") | |
| y_column = gr.Dropdown(choices=[], label="Y-axis Column (optional for some charts)") | |
| create_viz_btn = gr.Button("๐ Create Visualization", variant="primary") | |
| viz_output = gr.Image(label="Visualization") | |
| refresh_cols.click(get_columns, outputs=[x_column, y_column]) | |
| create_viz_btn.click(create_chart, inputs=[chart_type, x_column, y_column], outputs=[viz_output]) | |
| with gr.Tab("โน๏ธ How to Use"): | |
| gr.Markdown(""" | |
| ## ๐ How to Use Business Analyst GPT | |
| ### Step 1: Upload Your Dataset | |
| - Click on "Dataset Analysis" tab | |
| - Upload a CSV or Excel file containing your business data | |
| - Click "Analyze Dataset" to get comprehensive insights | |
| ### Step 2: Get ML Model Recommendations | |
| After uploading, you'll receive: | |
| - **Target Variable Suggestions**: Which columns can be predicted | |
| - **Feature Variable Identification**: Which columns to use as predictors | |
| - **Model Recommendations**: Best ML algorithms for your data | |
| - **Expected Performance**: Accuracy estimates for each model | |
| ### Step 3: Get Specific Visualization Ideas | |
| The analysis will provide: | |
| - **Single Variable Charts**: Best charts for each column | |
| - **Two Variable Relationships**: Specific X-axis and Y-axis recommendations | |
| - **Advanced Visualizations**: Multi-variable analysis suggestions | |
| - **Dashboard Layout**: How to organize your charts | |
| ### Step 4: Generate Business Insights | |
| - Ask specific business questions about your data | |
| - Get data-driven recommendations and insights | |
| - Receive actionable business strategies | |
| ### Step 5: Create Visualizations | |
| - Choose from various chart types | |
| - Select specific columns for X and Y axes | |
| - Generate publication-ready charts | |
| ## ๐ Supported File Types | |
| - CSV files (.csv) | |
| - Excel files (.xlsx, .xls) | |
| ## ๐ฏ Best Practices | |
| 1. **Clean Data**: Ensure your dataset has clear column headers | |
| 2. **Relevant Questions**: Ask specific business questions for better insights | |
| 3. **Column Selection**: Choose appropriate columns for visualizations | |
| 4. **Data Size**: Larger datasets provide more reliable ML recommendations | |
| """) | |
| # Launch the app | |
| if __name__ == "__main__": | |
| demo.launch() |