Withings_Normalization_App / pages /1_Batch_Analysis.py
Lars Masanneck
Adding explanaotry notes to app and exports
a3309b8
"""
Batch Analysis page for Smartwatch Normative Z-Score Calculator.
Upload multiple patient records for bulk z-score analysis.
"""
import streamlit as st
import pandas as pd
import sys
import os
from io import BytesIO
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from batch_utils import get_batch_template_df, process_batch_data, BIOMARKER_LABELS, AVAILABLE_BIOMARKERS
import normalizer_model
st.set_page_config(
page_title="Batch Analysis - Smartwatch Z-Score Calculator",
page_icon="📊",
layout="wide",
)
# Load normative data
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Table_1_summary_measure.csv")
@st.cache_data
def get_normative_data():
try:
return normalizer_model.load_normative_table(DATA_PATH)
except Exception as e:
st.error(f"Could not load normative data: {e}")
return None
normative_df = get_normative_data()
st.title("📊 Batch Analysis")
st.markdown("**Upload multiple patient records for bulk smartwatch biomarker analysis**")
st.info(
"Upload an Excel or CSV file with patient data. Each row will be analyzed and "
"z-scores will be calculated for all available biomarkers."
)
col1, col2 = st.columns(2)
with col1:
st.subheader("📥 Download Template")
st.markdown("Use this template to prepare your data in the correct format.")
template_df = get_batch_template_df()
# Create downloadable Excel template
output = BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
template_df.to_excel(writer, index=False, sheet_name='Patient Data')
workbook = writer.book
worksheet = writer.sheets['Patient Data']
# Orange-themed header format
header_format = workbook.add_format({
'bold': True,
'bg_color': '#e67e22',
'font_color': 'white',
'border': 1
})
for col_num, value in enumerate(template_df.columns.values):
worksheet.write(0, col_num, value, header_format)
worksheet.set_column(col_num, col_num, 18)
st.download_button(
label="⬇️ Download Excel Template",
data=output.getvalue(),
file_name="smartwatch_zscore_template.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
st.markdown("#### Required Columns:")
st.markdown("""
| Column | Description | Example |
|--------|-------------|---------|
| patient_id | Unique identifier | P001 |
| age | Age in years | 45 |
| gender | Man/Woman | Man |
| region | Geographic region | Western Europe |
| bmi | Body Mass Index | 24.5 |
""")
st.markdown("#### Biomarker Columns (optional):")
biomarker_table = "| Column | Description |\n|--------|-------------|\n"
for code in AVAILABLE_BIOMARKERS:
label = BIOMARKER_LABELS.get(code, code)
biomarker_table += f"| {code} | {label} |\n"
st.markdown(biomarker_table)
st.markdown("*Note: Include only the biomarkers you have data for. Leave cells blank if not measured.*")
with col2:
st.subheader("📤 Upload Data")
uploaded_file = st.file_uploader(
"Choose an Excel or CSV file",
type=['xlsx', 'xls', 'csv'],
help="Upload a file with patient data following the template format"
)
if uploaded_file is not None:
try:
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
st.success(f"✅ Loaded {len(df)} patient records")
# Detect available biomarkers in the uploaded data
detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]
if detected_biomarkers:
st.markdown(f"**Detected biomarkers:** {', '.join([BIOMARKER_LABELS.get(b, b) for b in detected_biomarkers])}")
else:
st.warning("No recognized biomarker columns found. Please check your column names.")
with st.expander("Preview uploaded data"):
st.dataframe(df, use_container_width=True)
except Exception as e:
st.error(f"Error reading file: {str(e)}")
df = None
st.markdown("---")
# Processing section
if uploaded_file is not None and 'df' in dir() and df is not None and normative_df is not None:
# Biomarker selection
st.subheader("Select Biomarkers to Analyze")
detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]
if detected_biomarkers:
selected_biomarkers = st.multiselect(
"Choose biomarkers to include in analysis",
options=detected_biomarkers,
default=detected_biomarkers,
format_func=lambda x: BIOMARKER_LABELS.get(x, x)
)
if st.button("🔬 Process Batch Data", type="primary"):
if not selected_biomarkers:
st.error("Please select at least one biomarker to analyze.")
else:
with st.spinner("Processing patient data..."):
results_df = process_batch_data(df, normative_df, selected_biomarkers)
st.success("✅ Processing complete!")
# Results section
st.subheader("Results")
# Build display columns dynamically
base_cols = ['patient_id', 'age', 'gender', 'region', 'bmi']
display_cols = [c for c in base_cols if c in results_df.columns]
for bm in selected_biomarkers:
if bm in results_df.columns:
display_cols.append(bm)
if f'{bm}_z' in results_df.columns:
display_cols.append(f'{bm}_z')
if f'{bm}_percentile' in results_df.columns:
display_cols.append(f'{bm}_percentile')
if f'{bm}_interpretation' in results_df.columns:
display_cols.append(f'{bm}_interpretation')
available_cols = [c for c in display_cols if c in results_df.columns]
# Style function for interpretation columns
def highlight_interpretation(val):
if pd.isna(val) or val == 'N/A' or val == 'No data':
return ''
val_str = str(val).lower()
if 'average' in val_str and 'below' not in val_str and 'above' not in val_str:
return 'background-color: #90EE90' # Green
elif 'below' in val_str:
return 'background-color: #87CEEB' # Light blue
elif 'above' in val_str:
return 'background-color: #FFD700' # Gold
elif 'very low' in val_str:
return 'background-color: #ADD8E6' # Light blue
elif 'very high' in val_str:
return 'background-color: #FF6B6B' # Red
return ''
# Apply styling to interpretation columns
interp_cols = [c for c in available_cols if 'interpretation' in c]
if interp_cols:
styled_df = results_df[available_cols].style.applymap(
highlight_interpretation,
subset=interp_cols
)
st.dataframe(styled_df, use_container_width=True)
else:
st.dataframe(results_df[available_cols], use_container_width=True)
# Summary Statistics
st.subheader("Summary Statistics")
# Create columns for each biomarker
if len(selected_biomarkers) > 0:
cols = st.columns(min(len(selected_biomarkers), 3))
for idx, bm in enumerate(selected_biomarkers[:3]):
with cols[idx]:
st.markdown(f"**{BIOMARKER_LABELS.get(bm, bm)}**")
z_col = f'{bm}_z'
if z_col in results_df.columns:
# Filter out non-numeric values
z_values = pd.to_numeric(results_df[z_col], errors='coerce').dropna()
if len(z_values) > 0:
st.metric("Mean Z-Score", f"{z_values.mean():.2f}")
st.metric("Patients Analyzed", len(z_values))
# Distribution of interpretations
interp_col = f'{bm}_interpretation'
if interp_col in results_df.columns:
interp_counts = results_df[interp_col].value_counts()
st.bar_chart(interp_counts)
# Export Results
st.subheader("📥 Export Results")
output = BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
results_df.to_excel(writer, index=False, sheet_name='Results')
workbook = writer.book
worksheet = writer.sheets['Results']
# Orange-themed header
header_format = workbook.add_format({
'bold': True,
'bg_color': '#e67e22',
'font_color': 'white',
'border': 1
})
for col_num, value in enumerate(results_df.columns.values):
worksheet.write(0, col_num, value, header_format)
worksheet.set_column(col_num, col_num, 18)
st.download_button(
label="⬇️ Download Results as Excel",
data=output.getvalue(),
file_name="smartwatch_zscore_results.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
else:
st.warning(
"No recognized biomarker columns found in your data. "
"Please ensure your columns match the template format."
)
# Z-Score Classification Guide
st.markdown("---")
with st.expander("📊 Z-Score Classification Guide"):
st.markdown("""
**How to interpret Z-Scores:**
| Z-Score Range | Classification | Percentile Range |
|:-------------:|:--------------:|:----------------:|
| z < -2.0 | Very Low | < 2.3% |
| -2.0 ≤ z < -0.5 | Below Average | 2.3% - 30.9% |
| **-0.5 ≤ z < 0.5** | **Average** | **30.9% - 69.1%** |
| 0.5 ≤ z < 2.0 | Above Average | 69.1% - 97.7% |
| z ≥ 2.0 | Very High | > 97.7% |
**Context matters:**
- For **steps, sleep duration, and active minutes**: Higher values are generally better ✓
- For **heart rate**: Lower resting values are generally better ✓
*A z-score of 0 means you are exactly at the population average for your demographic group.*
""")
# Footer
st.markdown("---")
st.markdown(
"*Batch analysis calculates z-scores relative to the Withings normative population, "
"stratified by region, gender, age group, and BMI category.*"
)
st.markdown(
"Built with ❤️ in Düsseldorf. © Lars Masanneck 2026."
)