import streamlit as st import normalizer_model import numpy as np import pandas as pd import altair as alt import plotly.graph_objects as go from scipy.stats import norm # Configure the Streamlit page before other commands st.set_page_config( page_title="Smartwatch Normative Z-Score Calculator", layout="wide", ) # Cache the normative DataFrame load def load_norm_df(path: str): return normalizer_model.load_normative_table(path) load_norm_df = st.cache_data(load_norm_df) # Load dataset norm_df = load_norm_df("Table_1_summary_measure.csv") # Friendly biomarker labels BIOMARKER_LABELS = { "nb_steps": "Number of Steps", "max_steps": "Maximum Steps", "mean_active_time": "Mean Active Time", "sbp": "Systolic Blood Pressure", "dbp": "Diastolic Blood Pressure", "sleep_duration": "Sleep Duration", "avg_night_hr": "Average Night Heart Rate", "nb_moderate_active_minutes": "Moderate Active Minutes", "nb_vigorous_active_minutes": "Vigorous Active Minutes", "weight": "Weight", "pwv": "Pulse Wave Velocity", # add any others here } # Biomarkers temporarily disabled in the UI. Remove from this set to re-enable. DISABLED_BIOMARKERS = {"weight", "sbp", "dbp", "pwv", "nb_vigorous_active_minutes"} def main(): if "disclaimer_shown" not in st.session_state: st.info( "These calculations are dedicated for scientific purposes only. " "For detailed questions regarding personal health data contact your " "healthcare professionals." ) st.session_state.disclaimer_shown = True st.title("Smartwatch Normative Z-Score Calculator") st.sidebar.header("Input Parameters") # Region with default Western Europe regions = sorted(norm_df["area"].unique()) if "Western Europe" in regions: default_region = "Western Europe" else: default_region = regions[0] region = st.sidebar.selectbox( "Region", regions, index=regions.index(default_region), ) # Gender selection gender = st.sidebar.selectbox( "Gender", sorted(norm_df["gender"].unique()), ) # Age input: choose between years or group st.sidebar.subheader("Age Input") age_input_mode = st.sidebar.radio( "Age input mode", ("Years", "Group"), ) if age_input_mode == "Years": age_years = st.sidebar.number_input( "Age (years)", min_value=0, max_value=120, value=30, step=1, ) age_param = age_years else: age_groups = sorted( norm_df["Age"].unique(), key=lambda x: int(x.split("-")[0]), ) age_group = st.sidebar.selectbox("Age group", [""] + age_groups) age_param = age_group # BMI input: choose between value or category st.sidebar.subheader("BMI Input") bmi_input_mode = st.sidebar.radio( "BMI input mode", ("Value", "Category"), ) if bmi_input_mode == "Value": bmi_val = st.sidebar.number_input( "BMI", min_value=0.0, max_value=100.0, value=24.0, step=0.1, format="%.1f", ) bmi_param = bmi_val else: bmi_cats = sorted(norm_df["Bmi"].unique()) bmi_cat = st.sidebar.selectbox("BMI category", [""] + bmi_cats) bmi_param = bmi_cat # Biomarker selection with friendly labels codes = sorted( c for c in norm_df["Biomarkers"].unique() if c not in DISABLED_BIOMARKERS ) friendly = [BIOMARKER_LABELS.get(c, c.title()) for c in codes] default_idx = friendly.index("Number of Steps") selected_label = st.sidebar.selectbox( "Biomarker", friendly, index=default_idx, ) biomarker = codes[friendly.index(selected_label)] # Value input with consistent float types default_value = 6500.0 if biomarker == "nb_steps" else 0.0 # Determine upper bound from normative data mask = norm_df["Biomarkers"].str.lower() == biomarker.lower() max_val = float(norm_df.loc[mask, "max"].max()) value = st.sidebar.number_input( f"{selected_label} value", min_value=0.0, max_value=max_val, value=default_value, step=1.0, ) # Compute norm_button = st.sidebar.button("Compute Normative Z-Score") if norm_button: try: res = normalizer_model.compute_normative_position( value=value, biomarker=biomarker, age_group=age_param, region=region, gender=gender, bmi=bmi_param, normative_df=norm_df, ) except Exception as e: st.error(f"Error: {e}") return # Show metrics st.subheader("Results") m1, m2, m3, m4, m5 = st.columns(5) m1.metric("Z-Score", f"{res['z_score']:.2f}") m2.metric("Percentile", f"{res['percentile']:.2f}") m3.metric("Mean", f"{res['mean']:.2f}") m4.metric("SD", f"{res['sd']:.2f}") m5.metric("Sample Size", res["n"]) # Compute actual age group and BMI category for cohort summary age_group_str = normalizer_model._categorize_age(age_param, norm_df) bmi_cat = normalizer_model.categorize_bmi(bmi_param) st.markdown( f"**Basis of calculation:** Data from region **{region}**, " f"gender **{gender}**, age group **{age_group_str}**, " f"and BMI category **{bmi_cat}. " f"Sample size: {res['n']}**." ) # Detailed statistics table st.subheader("Detailed Statistics") stats_df = pd.DataFrame( { "Statistic": [ "Z-Score", "Percentile", "Mean", "SD", "Sample Size", "Median", "Q1", "Q3", "IQR", "MAD", "SE", "CI", ], "Value": [ f"{res['z_score']:.2f}", f"{res['percentile']:.2f}", f"{res['mean']:.2f}", f"{res['sd']:.2f}", res.get("n", "N/A"), f"{res.get('median', float('nan')):.2f}", f"{res.get('q1', float('nan')):.2f}", f"{res.get('q3', float('nan')):.2f}", f"{res.get('iqr', float('nan')):.2f}", f"{res.get('mad', float('nan')):.2f}", f"{res.get('se', float('nan')):.2f}", f"{res.get('ci', float('nan')):.2f}", ], } ) st.table(stats_df) # Normality assumption note note = ( "*Note: Percentile and z-score estimation assume a normal " "distribution based on global Withings user data stratified by " "the parameters entered.*" ) st.write(note) # Normality checks import normality_checks as nc R = nc.iqr_tail_heaviness(res["iqr"], res["sd"]) q1_z, q3_z = nc.quartile_z_scores( res["mean"], res["sd"], res["q1"], res["q3"], ) skew = nc.pearson_skewness(res["mean"], res["median"], res["sd"]) st.subheader("Normality Heuristics") # Determine skewness interpretation if abs(skew) <= 0.1: skew_interp = "Symmetric (OK)" elif abs(skew) <= 0.5: skew_interp = f"{'Right' if skew > 0 else 'Left'} slight skew (usually OK)" elif abs(skew) <= 1.0: skew_interp = f"{'Right' if skew > 0 else 'Left'} noticeable skew" else: skew_interp = f"{'Right' if skew > 0 else 'Left'} strong skew" norm_checks = pd.DataFrame( { "Check": [ "IQR/SD", "Q1 z-score", "Q3 z-score", "Pearson Skewness", ], "Value": [ f"{R:.2f}", f"{q1_z:.2f}", f"{q3_z:.2f}", f"{skew:.2f}", ], "Flag": [ ( "Heavier tails" if R > 1.5 else "Lighter tails" if R < 1.2 else "OK" ), "Deviation" if abs(q1_z + 0.6745) > 0.1 else "OK", "Deviation" if abs(q3_z - 0.6745) > 0.1 else "OK", skew_interp, ], } ) st.table(norm_checks) # Add skewness interpretation guide st.markdown( """ **Pearson Skewness Interpretation:** - ≈ 0: Symmetric distribution - ±0.1 to ±0.5: Slight/moderate skew - ±0.5 to ±1: Noticeable skew - larger than±1: Strong skew - Positive values: Right skew (longer tail on right) - Negative values: Left skew (longer tail on left) """ ) # Warning if heuristic checks indicate non-normality if any(("OK" not in val) for val in norm_checks["Flag"]): st.warning( "Warning: Heuristic checks indicate possible deviations " "from normality; interpret z-score and percentiles with " "caution." ) # Skew-Corrected Results (optional) with st.expander("Optional: Skew-Corrected Results"): st.write("Adjusts for skew via Pearson Type III back-transform.") st.write("Error often <1 percentile point when |skew| ≤ 0.5.") st.write("Usually more useful for stronger skewed distributions.") st.write("Note: This is a heuristic and may not always be accurate.") res_skew = normalizer_model.compute_skew_corrected_position( value=value, mean=res["mean"], sd=res["sd"], median=res["median"], ) pct_skew = f"{res_skew['percentile_skew_corrected']:.2f}" sc1, sc2 = st.columns(2) sc1.metric( "Skew-Corrected Z-Score", f"{res_skew['z_skew_corrected']:.2f}", ) sc2.metric( "Skew-Corrected Percentile", pct_skew, ) st.markdown("---") st.subheader("Visualizations") # Prepare data for normal distribution z_vals = np.linspace(-4, 4, 400) density = norm.pdf(z_vals) df_chart = pd.DataFrame({"z": z_vals, "density": density}) # Shade area up to observed z-score area = ( alt.Chart(df_chart) .mark_area(color="orange", opacity=0.3) .transform_filter(alt.datum.z <= res["z_score"]) .encode( x=alt.X( "z:Q", title="z-score", ), y=alt.Y( "density:Q", title="Density", ), ) ) # Plot distribution line line = ( alt.Chart(df_chart) .mark_line(color="orange") .encode( x="z:Q", y="density:Q", ) ) # Vertical line at observed z vline = ( alt.Chart(pd.DataFrame({"z": [res["z_score"]]})) .mark_rule(color="orange") .encode(x="z:Q") ) chart = (area + line + vline).properties( width=600, height=300, title="Standard Normal Distribution", ) st.altair_chart(chart, use_container_width=True) # Text summary st.write( f"Your value is z = {res['z_score']:.2f}, which places you in " f"the {res['percentile']:.1f}th percentile of a normal " f"distribution." ) # Bullet chart showing z-score location # Using a horizontal bullet gauge from -3 to 3 SD bullet = go.Figure( go.Indicator( mode="number+gauge", value=res["z_score"], number={"suffix": " SD"}, gauge={ "shape": "bullet", "axis": { "range": [-3, 3], "tickmode": "linear", "dtick": 0.5, }, "bar": {"color": "orange"}, }, ) ) bullet.update_layout( height=150, margin={"t": 20, "b": 20, "l": 20, "r": 20}, ) st.plotly_chart(bullet, use_container_width=True) # Show percentile text st.write(f"Percentile: {res['percentile']:.1f}%") else: st.sidebar.info( "Fill in all inputs and click Compute " "to get normative Z-score." ) # Z-Score Classification Guide (always visible) st.markdown("---") with st.expander("📊 Z-Score Classification Guide"): st.markdown(""" **How to interpret Z-Scores:** | Z-Score Range | Classification | Percentile Range | |:-------------:|:--------------:|:----------------:| | z < -2.0 | Very Low | < 2.3% | | -2.0 ≤ z < -0.5 | Below Average | 2.3% - 30.9% | | **-0.5 ≤ z < 0.5** | **Average** | **30.9% - 69.1%** | | 0.5 ≤ z < 2.0 | Above Average | 69.1% - 97.7% | | z ≥ 2.0 | Very High | > 97.7% | **Context matters:** - For **steps, sleep duration, and active minutes**: Higher values are generally better ✓ - For **heart rate**: Lower resting values are generally better ✓ *A z-score of 0 means you are exactly at the population average for your demographic group.* """) # Footer st.markdown("---") st.markdown( "Built with ❤️ in Düsseldorf. © Lars Masanneck 2026. " "Thanks to Withings for sharing this data openly." ) st.markdown( "*This tool is part of the publication " "\"Population-Normalised Wearable Metrics Quantify Real-World Disability " "in Multiple Sclerosis\" currently in review.*" ) if __name__ == "__main__": main()