""" Batch Analysis page for Smartwatch Normative Z-Score Calculator. Upload multiple patient records for bulk z-score analysis. """ import streamlit as st import pandas as pd import sys import os from io import BytesIO # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from batch_utils import get_batch_template_df, process_batch_data, BIOMARKER_LABELS, AVAILABLE_BIOMARKERS import normalizer_model st.set_page_config( page_title="Batch Analysis - Smartwatch Z-Score Calculator", page_icon="📊", layout="wide", ) # Load normative data DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Table_1_summary_measure.csv") @st.cache_data def get_normative_data(): try: return normalizer_model.load_normative_table(DATA_PATH) except Exception as e: st.error(f"Could not load normative data: {e}") return None normative_df = get_normative_data() st.title("📊 Batch Analysis") st.markdown("**Upload multiple patient records for bulk smartwatch biomarker analysis**") st.info( "Upload an Excel or CSV file with patient data. Each row will be analyzed and " "z-scores will be calculated for all available biomarkers." ) col1, col2 = st.columns(2) with col1: st.subheader("📥 Download Template") st.markdown("Use this template to prepare your data in the correct format.") template_df = get_batch_template_df() # Create downloadable Excel template output = BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: template_df.to_excel(writer, index=False, sheet_name='Patient Data') workbook = writer.book worksheet = writer.sheets['Patient Data'] # Orange-themed header format header_format = workbook.add_format({ 'bold': True, 'bg_color': '#e67e22', 'font_color': 'white', 'border': 1 }) for col_num, value in enumerate(template_df.columns.values): worksheet.write(0, col_num, value, header_format) worksheet.set_column(col_num, col_num, 18) st.download_button( label="⬇️ Download Excel Template", data=output.getvalue(), file_name="smartwatch_zscore_template.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) st.markdown("#### Required Columns:") st.markdown(""" | Column | Description | Example | |--------|-------------|---------| | patient_id | Unique identifier | P001 | | age | Age in years | 45 | | gender | Man/Woman | Man | | region | Geographic region | Western Europe | | bmi | Body Mass Index | 24.5 | """) st.markdown("#### Biomarker Columns (optional):") biomarker_table = "| Column | Description |\n|--------|-------------|\n" for code in AVAILABLE_BIOMARKERS: label = BIOMARKER_LABELS.get(code, code) biomarker_table += f"| {code} | {label} |\n" st.markdown(biomarker_table) st.markdown("*Note: Include only the biomarkers you have data for. Leave cells blank if not measured.*") with col2: st.subheader("📤 Upload Data") uploaded_file = st.file_uploader( "Choose an Excel or CSV file", type=['xlsx', 'xls', 'csv'], help="Upload a file with patient data following the template format" ) if uploaded_file is not None: try: if uploaded_file.name.endswith('.csv'): df = pd.read_csv(uploaded_file) else: df = pd.read_excel(uploaded_file) st.success(f"✅ Loaded {len(df)} patient records") # Detect available biomarkers in the uploaded data detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS] if detected_biomarkers: st.markdown(f"**Detected biomarkers:** {', '.join([BIOMARKER_LABELS.get(b, b) for b in detected_biomarkers])}") else: st.warning("No recognized biomarker columns found. Please check your column names.") with st.expander("Preview uploaded data"): st.dataframe(df, use_container_width=True) except Exception as e: st.error(f"Error reading file: {str(e)}") df = None st.markdown("---") # Processing section if uploaded_file is not None and 'df' in dir() and df is not None and normative_df is not None: # Biomarker selection st.subheader("Select Biomarkers to Analyze") detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS] if detected_biomarkers: selected_biomarkers = st.multiselect( "Choose biomarkers to include in analysis", options=detected_biomarkers, default=detected_biomarkers, format_func=lambda x: BIOMARKER_LABELS.get(x, x) ) if st.button("🔬 Process Batch Data", type="primary"): if not selected_biomarkers: st.error("Please select at least one biomarker to analyze.") else: with st.spinner("Processing patient data..."): results_df = process_batch_data(df, normative_df, selected_biomarkers) st.success("✅ Processing complete!") # Results section st.subheader("Results") # Build display columns dynamically base_cols = ['patient_id', 'age', 'gender', 'region', 'bmi'] display_cols = [c for c in base_cols if c in results_df.columns] for bm in selected_biomarkers: if bm in results_df.columns: display_cols.append(bm) if f'{bm}_z' in results_df.columns: display_cols.append(f'{bm}_z') if f'{bm}_percentile' in results_df.columns: display_cols.append(f'{bm}_percentile') if f'{bm}_interpretation' in results_df.columns: display_cols.append(f'{bm}_interpretation') available_cols = [c for c in display_cols if c in results_df.columns] # Style function for interpretation columns def highlight_interpretation(val): if pd.isna(val) or val == 'N/A' or val == 'No data': return '' val_str = str(val).lower() if 'average' in val_str and 'below' not in val_str and 'above' not in val_str: return 'background-color: #90EE90' # Green elif 'below' in val_str: return 'background-color: #87CEEB' # Light blue elif 'above' in val_str: return 'background-color: #FFD700' # Gold elif 'very low' in val_str: return 'background-color: #ADD8E6' # Light blue elif 'very high' in val_str: return 'background-color: #FF6B6B' # Red return '' # Apply styling to interpretation columns interp_cols = [c for c in available_cols if 'interpretation' in c] if interp_cols: styled_df = results_df[available_cols].style.applymap( highlight_interpretation, subset=interp_cols ) st.dataframe(styled_df, use_container_width=True) else: st.dataframe(results_df[available_cols], use_container_width=True) # Summary Statistics st.subheader("Summary Statistics") # Create columns for each biomarker if len(selected_biomarkers) > 0: cols = st.columns(min(len(selected_biomarkers), 3)) for idx, bm in enumerate(selected_biomarkers[:3]): with cols[idx]: st.markdown(f"**{BIOMARKER_LABELS.get(bm, bm)}**") z_col = f'{bm}_z' if z_col in results_df.columns: # Filter out non-numeric values z_values = pd.to_numeric(results_df[z_col], errors='coerce').dropna() if len(z_values) > 0: st.metric("Mean Z-Score", f"{z_values.mean():.2f}") st.metric("Patients Analyzed", len(z_values)) # Distribution of interpretations interp_col = f'{bm}_interpretation' if interp_col in results_df.columns: interp_counts = results_df[interp_col].value_counts() st.bar_chart(interp_counts) # Export Results st.subheader("📥 Export Results") output = BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: results_df.to_excel(writer, index=False, sheet_name='Results') workbook = writer.book worksheet = writer.sheets['Results'] # Orange-themed header header_format = workbook.add_format({ 'bold': True, 'bg_color': '#e67e22', 'font_color': 'white', 'border': 1 }) for col_num, value in enumerate(results_df.columns.values): worksheet.write(0, col_num, value, header_format) worksheet.set_column(col_num, col_num, 18) st.download_button( label="⬇️ Download Results as Excel", data=output.getvalue(), file_name="smartwatch_zscore_results.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) else: st.warning( "No recognized biomarker columns found in your data. " "Please ensure your columns match the template format." ) # Z-Score Classification Guide st.markdown("---") with st.expander("📊 Z-Score Classification Guide"): st.markdown(""" **How to interpret Z-Scores:** | Z-Score Range | Classification | Percentile Range | |:-------------:|:--------------:|:----------------:| | z < -2.0 | Very Low | < 2.3% | | -2.0 ≤ z < -0.5 | Below Average | 2.3% - 30.9% | | **-0.5 ≤ z < 0.5** | **Average** | **30.9% - 69.1%** | | 0.5 ≤ z < 2.0 | Above Average | 69.1% - 97.7% | | z ≥ 2.0 | Very High | > 97.7% | **Context matters:** - For **steps, sleep duration, and active minutes**: Higher values are generally better ✓ - For **heart rate**: Lower resting values are generally better ✓ *A z-score of 0 means you are exactly at the population average for your demographic group.* """) # Footer st.markdown("---") st.markdown( "*Batch analysis calculates z-scores relative to the Withings normative population, " "stratified by region, gender, age group, and BMI category.*" ) st.markdown( "Built with ❤️ in Düsseldorf. © Lars Masanneck 2026." )