|
|
""" |
|
|
Batch Analysis page for Smartwatch Normative Z-Score Calculator. |
|
|
|
|
|
Upload multiple patient records for bulk z-score analysis. |
|
|
""" |
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import sys |
|
|
import os |
|
|
from io import BytesIO |
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
from batch_utils import get_batch_template_df, process_batch_data, BIOMARKER_LABELS, AVAILABLE_BIOMARKERS |
|
|
import normalizer_model |
|
|
|
|
|
st.set_page_config( |
|
|
page_title="Batch Analysis - Smartwatch Z-Score Calculator", |
|
|
page_icon="📊", |
|
|
layout="wide", |
|
|
) |
|
|
|
|
|
|
|
|
DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "Table_1_summary_measure.csv") |
|
|
|
|
|
@st.cache_data |
|
|
def get_normative_data(): |
|
|
try: |
|
|
return normalizer_model.load_normative_table(DATA_PATH) |
|
|
except Exception as e: |
|
|
st.error(f"Could not load normative data: {e}") |
|
|
return None |
|
|
|
|
|
normative_df = get_normative_data() |
|
|
|
|
|
st.title("📊 Batch Analysis") |
|
|
st.markdown("**Upload multiple patient records for bulk smartwatch biomarker analysis**") |
|
|
|
|
|
st.info( |
|
|
"Upload an Excel or CSV file with patient data. Each row will be analyzed and " |
|
|
"z-scores will be calculated for all available biomarkers." |
|
|
) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.subheader("📥 Download Template") |
|
|
st.markdown("Use this template to prepare your data in the correct format.") |
|
|
|
|
|
template_df = get_batch_template_df() |
|
|
|
|
|
|
|
|
output = BytesIO() |
|
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
|
|
template_df.to_excel(writer, index=False, sheet_name='Patient Data') |
|
|
workbook = writer.book |
|
|
worksheet = writer.sheets['Patient Data'] |
|
|
|
|
|
|
|
|
header_format = workbook.add_format({ |
|
|
'bold': True, |
|
|
'bg_color': '#e67e22', |
|
|
'font_color': 'white', |
|
|
'border': 1 |
|
|
}) |
|
|
for col_num, value in enumerate(template_df.columns.values): |
|
|
worksheet.write(0, col_num, value, header_format) |
|
|
worksheet.set_column(col_num, col_num, 18) |
|
|
|
|
|
st.download_button( |
|
|
label="⬇️ Download Excel Template", |
|
|
data=output.getvalue(), |
|
|
file_name="smartwatch_zscore_template.xlsx", |
|
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
|
|
) |
|
|
|
|
|
st.markdown("#### Required Columns:") |
|
|
st.markdown(""" |
|
|
| Column | Description | Example | |
|
|
|--------|-------------|---------| |
|
|
| patient_id | Unique identifier | P001 | |
|
|
| age | Age in years | 45 | |
|
|
| gender | Man/Woman | Man | |
|
|
| region | Geographic region | Western Europe | |
|
|
| bmi | Body Mass Index | 24.5 | |
|
|
""") |
|
|
|
|
|
st.markdown("#### Biomarker Columns (optional):") |
|
|
biomarker_table = "| Column | Description |\n|--------|-------------|\n" |
|
|
for code in AVAILABLE_BIOMARKERS: |
|
|
label = BIOMARKER_LABELS.get(code, code) |
|
|
biomarker_table += f"| {code} | {label} |\n" |
|
|
st.markdown(biomarker_table) |
|
|
|
|
|
st.markdown("*Note: Include only the biomarkers you have data for. Leave cells blank if not measured.*") |
|
|
|
|
|
with col2: |
|
|
st.subheader("📤 Upload Data") |
|
|
|
|
|
uploaded_file = st.file_uploader( |
|
|
"Choose an Excel or CSV file", |
|
|
type=['xlsx', 'xls', 'csv'], |
|
|
help="Upload a file with patient data following the template format" |
|
|
) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
try: |
|
|
if uploaded_file.name.endswith('.csv'): |
|
|
df = pd.read_csv(uploaded_file) |
|
|
else: |
|
|
df = pd.read_excel(uploaded_file) |
|
|
|
|
|
st.success(f"✅ Loaded {len(df)} patient records") |
|
|
|
|
|
|
|
|
detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS] |
|
|
|
|
|
if detected_biomarkers: |
|
|
st.markdown(f"**Detected biomarkers:** {', '.join([BIOMARKER_LABELS.get(b, b) for b in detected_biomarkers])}") |
|
|
else: |
|
|
st.warning("No recognized biomarker columns found. Please check your column names.") |
|
|
|
|
|
with st.expander("Preview uploaded data"): |
|
|
st.dataframe(df, use_container_width=True) |
|
|
|
|
|
except Exception as e: |
|
|
st.error(f"Error reading file: {str(e)}") |
|
|
df = None |
|
|
|
|
|
st.markdown("---") |
|
|
|
|
|
|
|
|
if uploaded_file is not None and 'df' in dir() and df is not None and normative_df is not None: |
|
|
|
|
|
|
|
|
st.subheader("Select Biomarkers to Analyze") |
|
|
detected_biomarkers = [col for col in df.columns if col in AVAILABLE_BIOMARKERS] |
|
|
|
|
|
if detected_biomarkers: |
|
|
selected_biomarkers = st.multiselect( |
|
|
"Choose biomarkers to include in analysis", |
|
|
options=detected_biomarkers, |
|
|
default=detected_biomarkers, |
|
|
format_func=lambda x: BIOMARKER_LABELS.get(x, x) |
|
|
) |
|
|
|
|
|
if st.button("🔬 Process Batch Data", type="primary"): |
|
|
if not selected_biomarkers: |
|
|
st.error("Please select at least one biomarker to analyze.") |
|
|
else: |
|
|
with st.spinner("Processing patient data..."): |
|
|
results_df = process_batch_data(df, normative_df, selected_biomarkers) |
|
|
|
|
|
st.success("✅ Processing complete!") |
|
|
|
|
|
|
|
|
st.subheader("Results") |
|
|
|
|
|
|
|
|
base_cols = ['patient_id', 'age', 'gender', 'region', 'bmi'] |
|
|
display_cols = [c for c in base_cols if c in results_df.columns] |
|
|
|
|
|
for bm in selected_biomarkers: |
|
|
if bm in results_df.columns: |
|
|
display_cols.append(bm) |
|
|
if f'{bm}_z' in results_df.columns: |
|
|
display_cols.append(f'{bm}_z') |
|
|
if f'{bm}_percentile' in results_df.columns: |
|
|
display_cols.append(f'{bm}_percentile') |
|
|
if f'{bm}_interpretation' in results_df.columns: |
|
|
display_cols.append(f'{bm}_interpretation') |
|
|
|
|
|
available_cols = [c for c in display_cols if c in results_df.columns] |
|
|
|
|
|
|
|
|
def highlight_interpretation(val): |
|
|
if pd.isna(val) or val == 'N/A' or val == 'No data': |
|
|
return '' |
|
|
val_str = str(val).lower() |
|
|
if 'average' in val_str and 'below' not in val_str and 'above' not in val_str: |
|
|
return 'background-color: #90EE90' |
|
|
elif 'below' in val_str: |
|
|
return 'background-color: #87CEEB' |
|
|
elif 'above' in val_str: |
|
|
return 'background-color: #FFD700' |
|
|
elif 'very low' in val_str: |
|
|
return 'background-color: #ADD8E6' |
|
|
elif 'very high' in val_str: |
|
|
return 'background-color: #FF6B6B' |
|
|
return '' |
|
|
|
|
|
|
|
|
interp_cols = [c for c in available_cols if 'interpretation' in c] |
|
|
if interp_cols: |
|
|
styled_df = results_df[available_cols].style.applymap( |
|
|
highlight_interpretation, |
|
|
subset=interp_cols |
|
|
) |
|
|
st.dataframe(styled_df, use_container_width=True) |
|
|
else: |
|
|
st.dataframe(results_df[available_cols], use_container_width=True) |
|
|
|
|
|
|
|
|
st.subheader("Summary Statistics") |
|
|
|
|
|
|
|
|
if len(selected_biomarkers) > 0: |
|
|
cols = st.columns(min(len(selected_biomarkers), 3)) |
|
|
|
|
|
for idx, bm in enumerate(selected_biomarkers[:3]): |
|
|
with cols[idx]: |
|
|
st.markdown(f"**{BIOMARKER_LABELS.get(bm, bm)}**") |
|
|
z_col = f'{bm}_z' |
|
|
if z_col in results_df.columns: |
|
|
|
|
|
z_values = pd.to_numeric(results_df[z_col], errors='coerce').dropna() |
|
|
if len(z_values) > 0: |
|
|
st.metric("Mean Z-Score", f"{z_values.mean():.2f}") |
|
|
st.metric("Patients Analyzed", len(z_values)) |
|
|
|
|
|
|
|
|
interp_col = f'{bm}_interpretation' |
|
|
if interp_col in results_df.columns: |
|
|
interp_counts = results_df[interp_col].value_counts() |
|
|
st.bar_chart(interp_counts) |
|
|
|
|
|
|
|
|
st.subheader("📥 Export Results") |
|
|
|
|
|
output = BytesIO() |
|
|
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: |
|
|
results_df.to_excel(writer, index=False, sheet_name='Results') |
|
|
workbook = writer.book |
|
|
worksheet = writer.sheets['Results'] |
|
|
|
|
|
|
|
|
header_format = workbook.add_format({ |
|
|
'bold': True, |
|
|
'bg_color': '#e67e22', |
|
|
'font_color': 'white', |
|
|
'border': 1 |
|
|
}) |
|
|
for col_num, value in enumerate(results_df.columns.values): |
|
|
worksheet.write(0, col_num, value, header_format) |
|
|
worksheet.set_column(col_num, col_num, 18) |
|
|
|
|
|
st.download_button( |
|
|
label="⬇️ Download Results as Excel", |
|
|
data=output.getvalue(), |
|
|
file_name="smartwatch_zscore_results.xlsx", |
|
|
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" |
|
|
) |
|
|
else: |
|
|
st.warning( |
|
|
"No recognized biomarker columns found in your data. " |
|
|
"Please ensure your columns match the template format." |
|
|
) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
with st.expander("📊 Z-Score Classification Guide"): |
|
|
st.markdown(""" |
|
|
**How to interpret Z-Scores:** |
|
|
|
|
|
| Z-Score Range | Classification | Percentile Range | |
|
|
|:-------------:|:--------------:|:----------------:| |
|
|
| z < -2.0 | Very Low | < 2.3% | |
|
|
| -2.0 ≤ z < -0.5 | Below Average | 2.3% - 30.9% | |
|
|
| **-0.5 ≤ z < 0.5** | **Average** | **30.9% - 69.1%** | |
|
|
| 0.5 ≤ z < 2.0 | Above Average | 69.1% - 97.7% | |
|
|
| z ≥ 2.0 | Very High | > 97.7% | |
|
|
|
|
|
**Context matters:** |
|
|
- For **steps, sleep duration, and active minutes**: Higher values are generally better ✓ |
|
|
- For **heart rate**: Lower resting values are generally better ✓ |
|
|
|
|
|
*A z-score of 0 means you are exactly at the population average for your demographic group.* |
|
|
""") |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown( |
|
|
"*Batch analysis calculates z-scores relative to the Withings normative population, " |
|
|
"stratified by region, gender, age group, and BMI category.*" |
|
|
) |
|
|
st.markdown( |
|
|
"Built with ❤️ in Düsseldorf. © Lars Masanneck 2026." |
|
|
) |
|
|
|
|
|
|
|
|
|