Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| import numpy as np | |
| import google.generativeai as genai | |
| import os | |
| from io import StringIO | |
| import json | |
| st.set_page_config(layout="wide", page_title="Dynamic Data Dashboard") | |
| def main(): | |
| st.title("Dynamic Data Dashboard Generator") | |
| st.markdown(""" | |
| Upload your CSV file to generate an interactive dashboard tailored to your data. | |
| The application uses AI to analyze your data and create relevant visualizations. | |
| """) | |
| # API key input with validation | |
| api_key_input = st.sidebar.text_input("Enter your Gemini API key for more power", type="password") | |
| api_key = api_key_input or os.getenv("GEMINI_API_KEY") | |
| uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
| if uploaded_file is not None: | |
| try: | |
| # Read and display data | |
| df = pd.read_csv(uploaded_file) | |
| with st.expander("Preview Data", expanded=True): | |
| st.dataframe(df.head(10)) | |
| # Basic data info | |
| st.subheader("Data Overview") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.metric("Rows", df.shape[0]) | |
| st.metric("Columns", df.shape[1]) | |
| with col2: | |
| st.metric("Numerical Columns", len(df.select_dtypes(include=np.number).columns)) | |
| st.metric("Categorical Columns", len(df.select_dtypes(exclude=np.number).columns)) | |
| # If API key is provided, use Gemini for analysis | |
| if api_key: | |
| st.subheader("AI-Powered Dashboard") | |
| with st.spinner("Analyzing your data and generating visualizations..."): | |
| try: | |
| generate_ai_dashboard(df, api_key) | |
| except Exception as e: | |
| st.error(f"Error generating AI dashboard: {e}") | |
| # Standard visualizations | |
| st.subheader("Standard Visualizations") | |
| generate_standard_dashboard(df) | |
| except Exception as e: | |
| st.error(f"Error processing your file: {e}") | |
| def generate_standard_dashboard(df): | |
| """Generate standard visualizations based on data types""" | |
| # Identify numerical and categorical columns | |
| numerical_cols = df.select_dtypes(include=np.number).columns.tolist() | |
| categorical_cols = df.select_dtypes(exclude=np.number).columns.tolist() | |
| # Data completeness | |
| st.subheader("Data Completeness") | |
| missing_data = pd.DataFrame({'column': df.columns, | |
| 'missing_values': df.isnull().sum(), | |
| 'percentage': (df.isnull().sum() / len(df) * 100).round(2)}) | |
| fig = px.bar(missing_data, x='column', y='percentage', | |
| title='Missing Values Percentage', | |
| labels={'percentage': 'Missing Values (%)', 'column': 'Column'}) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Distribution of numerical columns | |
| if numerical_cols: | |
| st.subheader("Numerical Distributions") | |
| selected_num_col = st.selectbox("Select a numerical column", numerical_cols) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| fig = px.histogram(df, x=selected_num_col, title=f'Distribution of {selected_num_col}') | |
| st.plotly_chart(fig, use_container_width=True) | |
| with col2: | |
| fig = px.box(df, y=selected_num_col, title=f'Box Plot of {selected_num_col}') | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Distribution of categorical columns | |
| if categorical_cols: | |
| st.subheader("Categorical Distributions") | |
| selected_cat_col = st.selectbox("Select a categorical column", categorical_cols) | |
| # Limit to top 10 categories for readability | |
| value_counts = df[selected_cat_col].value_counts().nlargest(10) | |
| fig = px.bar(x=value_counts.index, y=value_counts.values, | |
| title=f'Top 10 Categories in {selected_cat_col}', | |
| labels={'x': selected_cat_col, 'y': 'Count'}) | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Correlation heatmap for numerical data | |
| if len(numerical_cols) > 1: | |
| st.subheader("Correlation Between Numerical Variables") | |
| corr = df[numerical_cols].corr() | |
| fig = px.imshow(corr, text_auto=True, aspect="auto", | |
| title="Correlation Heatmap") | |
| st.plotly_chart(fig, use_container_width=True) | |
| # Scatter plot for exploring relationships | |
| if len(numerical_cols) >= 2: | |
| st.subheader("Explore Relationships") | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| x_col = st.selectbox("X-axis", numerical_cols, index=0) | |
| with col2: | |
| y_col = st.selectbox("Y-axis", numerical_cols, index=min(1, len(numerical_cols)-1)) | |
| color_col = None | |
| if categorical_cols: | |
| color_col = st.selectbox("Color by (optional)", ['None'] + categorical_cols) | |
| if color_col == 'None': | |
| color_col = None | |
| fig = px.scatter(df, x=x_col, y=y_col, color=color_col, | |
| title=f'{y_col} vs {x_col}', | |
| opacity=0.7) | |
| st.plotly_chart(fig, use_container_width=True) | |
| def generate_ai_dashboard(df, api_key): | |
| """Use Gemini AI to analyze data and generate dashboard recommendations""" | |
| # Configure Gemini | |
| genai.configure(api_key=api_key) | |
| model = genai.GenerativeModel('gemini-2.0-flash') | |
| # Generate data summary | |
| column_info = {col: { | |
| 'dtype': str(df[col].dtype), | |
| 'unique_values': int(df[col].nunique()), | |
| 'missing_values': int(df[col].isna().sum()), | |
| 'sample': [str(x) for x in df[col].dropna().sample(min(5, len(df))).tolist()] | |
| } for col in df.columns} | |
| # Prepare prompt | |
| full_prompt = f""" | |
| Analyze the following dataset and suggest visualizations that would be insightful: | |
| Dataset Summary: | |
| - Rows: {df.shape[0]} | |
| - Columns: {df.shape[1]} | |
| Column Information: | |
| {json.dumps(column_info, indent=2)} | |
| Please provide visualization recommendations in the following JSON format: | |
| {{ | |
| "insights": [ | |
| "Key insight about the data", | |
| "Another insight about the data" | |
| ], | |
| "visualizations": [ | |
| {{ | |
| "title": "Visualization Title", | |
| "description": "What this visualization shows", | |
| "type": "bar|line|scatter|pie|histogram|box|heatmap", | |
| "x_column": "column_name_for_x_axis", | |
| "y_column": "column_name_for_y_axis", | |
| "color_column": "optional_column_for_color", | |
| "facet_column": "optional_column_for_faceting" | |
| }} | |
| ] | |
| }} | |
| Return ONLY the JSON, no other text. | |
| """ | |
| # Call Gemini API | |
| response = model.generate_content( | |
| full_prompt, | |
| generation_config={"temperature": 0.3} | |
| ) | |
| try: | |
| # Try to parse the response as JSON | |
| response_text = response.text | |
| # Clean the response if it contains markdown code blocks | |
| if "```json" in response_text: | |
| response_text = response_text.split("```json")[1].split("```")[0].strip() | |
| elif "```" in response_text: | |
| response_text = response_text.split("```")[1].split("```")[0].strip() | |
| recommendations = json.loads(response_text) | |
| # Display AI insights | |
| st.subheader("AI Insights") | |
| for insight in recommendations.get("insights", []): | |
| st.info(insight) | |
| # Create visualizations | |
| st.subheader("AI Recommended Visualizations") | |
| for viz in recommendations.get("visualizations", []): | |
| with st.expander(viz["title"], expanded=True): | |
| st.write(viz["description"]) | |
| try: | |
| x_col = viz.get("x_column") | |
| y_col = viz.get("y_column") | |
| color_col = viz.get("color_column") | |
| viz_type = viz.get("type", "bar").lower() | |
| if viz_type == "bar": | |
| fig = px.bar(df, x=x_col, y=y_col, color=color_col, title=viz["title"]) | |
| elif viz_type == "line": | |
| fig = px.line(df, x=x_col, y=y_col, color=color_col, title=viz["title"]) | |
| elif viz_type == "scatter": | |
| fig = px.scatter(df, x=x_col, y=y_col, color=color_col, title=viz["title"]) | |
| elif viz_type == "pie": | |
| fig = px.pie(df, names=x_col, values=y_col, title=viz["title"]) | |
| elif viz_type == "histogram": | |
| fig = px.histogram(df, x=x_col, color=color_col, title=viz["title"]) | |
| elif viz_type == "box": | |
| fig = px.box(df, y=y_col, x=x_col, color=color_col, title=viz["title"]) | |
| elif viz_type == "heatmap": | |
| pivot_table = pd.pivot_table(df, values=y_col, index=x_col, columns=color_col, aggfunc='mean') | |
| fig = px.imshow(pivot_table, title=viz["title"]) | |
| else: | |
| fig = px.bar(df, x=x_col, y=y_col, title=viz["title"]) | |
| st.plotly_chart(fig, use_container_width=True) | |
| except Exception as e: | |
| st.error(f"Could not create this visualization: {e}") | |
| except Exception as e: | |
| st.error(f"Could not parse AI recommendations: {e}") | |
| st.code(response.text, language="json") | |
| if __name__ == "__main__": | |
| main() |