Spaces:

MasanneckLab
/

Withings_Normalization_App

Running

App Files Files Community

Lars Masanneck commited on May 8, 2025

Commit

04428af

1 Parent(s): 96a206a

Proper initial commit

Browse files

Files changed (7) hide show

Dockerfile +17 -0
Table_1_summary_measure.csv +0 -0
app.py +404 -0
normality_checks.py +31 -0
normalizer_model.py +414 -0
requirements.txt +10 -0
static/.gitkeep +1 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Copy and install dependencies
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . ./
+# Expose Streamlit default port
+EXPOSE 8501
+# Run Streamlit app
+CMD ["streamlit", "run", "app.py", "--server.address=0.0.0.0", "--server.port=8501"]

Table_1_summary_measure.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py ADDED Viewed

	@@ -0,0 +1,404 @@

+import streamlit as st
+import normalizer_model
+import numpy as np
+import pandas as pd
+import altair as alt
+import plotly.graph_objects as go
+from scipy.stats import norm
+# Configure the Streamlit page before other commands
+st.set_page_config(
+    page_title="Smartwatch Normative Z-Score Calculator",
+    layout="wide",
+)
+# Cache the normative DataFrame load
+def load_norm_df(path: str):
+    return normalizer_model.load_normative_table(path)
+load_norm_df = st.cache_data(load_norm_df)
+# Load dataset
+norm_df = load_norm_df("Table_1_summary_measure.csv")
+# Friendly biomarker labels
+BIOMARKER_LABELS = {
+    "nb_steps": "Number of Steps",
+    "max_steps": "Maximum Steps",
+    "mean_active_time": "Mean Active Time",
+    "sbp": "Systolic Blood Pressure",
+    "dbp": "Diastolic Blood Pressure",
+    "sleep_duration": "Sleep Duration",
+    "avg_night_hr": "Average Night Heart Rate",
+    "nb_moderate_active_minutes": "Moderate Active Minutes",
+    "nb_vigorous_active_minutes": "Vigorous Active Minutes",
+    "weight": "Weight",
+    "pwv": "Pulse Wave Velocity",
+    # add any others here
+}
+def main():
+    if "disclaimer_shown" not in st.session_state:
+        st.info(
+            "These calculations are dedicated for scientifically purposes only. "
+            "For detailed questions regarding personal health data contact your "
+            "healthcare professionals."
+        )
+        st.session_state.disclaimer_shown = True
+    st.title("Smartwatch Normative Z-Score Calculator")
+    st.sidebar.header("Input Parameters")
+    # Region with default Western Europe
+    regions = sorted(norm_df["area"].unique())
+    if "Western Europe" in regions:
+        default_region = "Western Europe"
+    else:
+        default_region = regions[0]
+    region = st.sidebar.selectbox(
+        "Region",
+        regions,
+        index=regions.index(default_region),
+    )
+    # Gender selection
+    gender = st.sidebar.selectbox(
+        "Gender",
+        sorted(norm_df["gender"].unique()),
+    )
+    # Age input: choose between years or group
+    st.sidebar.subheader("Age Input")
+    age_input_mode = st.sidebar.radio(
+        "Age input mode",
+        ("Years", "Group"),
+    )
+    if age_input_mode == "Years":
+        age_years = st.sidebar.number_input(
+            "Age (years)",
+            min_value=0,
+            max_value=120,
+            value=30,
+            step=1,
+        )
+        age_param = age_years
+    else:
+        age_groups = sorted(
+            norm_df["Age"].unique(),
+            key=lambda x: int(x.split("-")[0]),
+        )
+        age_group = st.sidebar.selectbox("Age group", [""] + age_groups)
+        age_param = age_group
+    # BMI input: choose between value or category
+    st.sidebar.subheader("BMI Input")
+    bmi_input_mode = st.sidebar.radio(
+        "BMI input mode",
+        ("Value", "Category"),
+    )
+    if bmi_input_mode == "Value":
+        bmi_val = st.sidebar.number_input(
+            "BMI",
+            min_value=0.0,
+            max_value=100.0,
+            value=24.0,
+            step=0.1,
+            format="%.1f",
+        )
+        bmi_param = bmi_val
+    else:
+        bmi_cats = sorted(norm_df["Bmi"].unique())
+        bmi_cat = st.sidebar.selectbox("BMI category", [""] + bmi_cats)
+        bmi_param = bmi_cat
+    # Biomarker selection with friendly labels
+    codes = sorted(norm_df["Biomarkers"].unique())
+    friendly = [BIOMARKER_LABELS.get(c, c.title()) for c in codes]
+    default_idx = friendly.index("Number of Steps")
+    selected_label = st.sidebar.selectbox(
+        "Biomarker",
+        friendly,
+        index=default_idx,
+    )
+    biomarker = codes[friendly.index(selected_label)]
+    # Value input with consistent float types
+    default_value = 6500.0 if biomarker == "nb_steps" else 0.0
+    # Determine upper bound from normative data
+    mask = norm_df["Biomarkers"].str.lower() == biomarker.lower()
+    max_val = float(norm_df.loc[mask, "max"].max())
+    value = st.sidebar.number_input(
+        f"{selected_label} value",
+        min_value=0.0,
+        max_value=max_val,
+        value=default_value,
+        step=1.0,
+    )
+    # Compute
+    norm_button = st.sidebar.button("Compute Normative Z-Score")
+    if norm_button:
+        try:
+            res = normalizer_model.compute_normative_position(
+                value=value,
+                biomarker=biomarker,
+                age_group=age_param,
+                region=region,
+                gender=gender,
+                bmi=bmi_param,
+                normative_df=norm_df,
+            )
+        except Exception as e:
+            st.error(f"Error: {e}")
+            return
+        # Show metrics
+        st.subheader("Results")
+        m1, m2, m3, m4, m5 = st.columns(5)
+        m1.metric("Z-Score", f"{res['z_score']:.2f}")
+        m2.metric("Percentile", f"{res['percentile']:.2f}")
+        m3.metric("Mean", f"{res['mean']:.2f}")
+        m4.metric("SD", f"{res['sd']:.2f}")
+        m5.metric("Sample Size", res["n"])
+        # Compute actual age group and BMI category for cohort summary
+        age_group_str = normalizer_model._categorize_age(age_param, norm_df)
+        bmi_cat = normalizer_model.categorize_bmi(bmi_param)
+        st.markdown(
+            f"**Basis of calculation:** Data from region **{region}**, "
+            f"gender **{gender}**, age group **{age_group_str}**, "
+            f"and BMI category **{bmi_cat}. "
+            f"Sample size: {res['n']}**."
+        )
+        # Detailed statistics table
+        st.subheader("Detailed Statistics")
+        stats_df = pd.DataFrame(
+            {
+                "Statistic": [
+                    "Z-Score",
+                    "Percentile",
+                    "Mean",
+                    "SD",
+                    "Sample Size",
+                    "Median",
+                    "Q1",
+                    "Q3",
+                    "IQR",
+                    "MAD",
+                    "SE",
+                    "CI",
+                ],
+                "Value": [
+                    f"{res['z_score']:.2f}",
+                    f"{res['percentile']:.2f}",
+                    f"{res['mean']:.2f}",
+                    f"{res['sd']:.2f}",
+                    res.get("n", "N/A"),
+                    f"{res.get('median', float('nan')):.2f}",
+                    f"{res.get('q1', float('nan')):.2f}",
+                    f"{res.get('q3', float('nan')):.2f}",
+                    f"{res.get('iqr', float('nan')):.2f}",
+                    f"{res.get('mad', float('nan')):.2f}",
+                    f"{res.get('se', float('nan')):.2f}",
+                    f"{res.get('ci', float('nan')):.2f}",
+                ],
+            }
+        )
+        st.table(stats_df)
+        # Normality assumption note
+        note = (
+            "*Note: Percentile and z-score estimation assume a normal "
+            "distribution based on global Withings user data stratified by "
+            "the parameters entered.*"
+        )
+        st.write(note)
+        # Normality checks
+        import normality_checks as nc
+        R = nc.iqr_tail_heaviness(res["iqr"], res["sd"])
+        q1_z, q3_z = nc.quartile_z_scores(
+            res["mean"],
+            res["sd"],
+            res["q1"],
+            res["q3"],
+        )
+        skew = nc.pearson_skewness(res["mean"], res["median"], res["sd"])
+        st.subheader("Normality Heuristics")
+        # Determine skewness interpretation
+        if abs(skew) <= 0.1:
+            skew_interp = "Symmetric (OK)"
+        elif abs(skew) <= 0.5:
+            skew_interp = f"{'Right' if skew > 0 else 'Left'} slight skew (usually OK)"
+        elif abs(skew) <= 1.0:
+            skew_interp = f"{'Right' if skew > 0 else 'Left'} noticeable skew"
+        else:
+            skew_interp = f"{'Right' if skew > 0 else 'Left'} strong skew"
+        norm_checks = pd.DataFrame(
+            {
+                "Check": [
+                    "IQR/SD",
+                    "Q1 z-score",
+                    "Q3 z-score",
+                    "Pearson Skewness",
+                ],
+                "Value": [
+                    f"{R:.2f}",
+                    f"{q1_z:.2f}",
+                    f"{q3_z:.2f}",
+                    f"{skew:.2f}",
+                ],
+                "Flag": [
+                    (
+                        "Heavier tails"
+                        if R > 1.5
+                        else "Lighter tails" if R < 1.2 else "OK"
+                    ),
+                    "Deviation" if abs(q1_z + 0.6745) > 0.1 else "OK",
+                    "Deviation" if abs(q3_z - 0.6745) > 0.1 else "OK",
+                    skew_interp,
+                ],
+            }
+        )
+        st.table(norm_checks)
+        # Add skewness interpretation guide
+        st.markdown(
+            """
+        **Pearson Skewness Interpretation:**
+        - ≈ 0: Symmetric distribution
+        - ±0.1 to ±0.5: Slight/moderate skew
+        - ±0.5 to ±1: Noticeable skew
+        - larger than±1: Strong skew
+        - Positive values: Right skew (longer tail on right)
+        - Negative values: Left skew (longer tail on left)
+        """
+        )
+        # Warning if heuristic checks indicate non-normality
+        if any(("OK" not in val) for val in norm_checks["Flag"]):
+            st.warning(
+                "Warning: Heuristic checks indicate possible deviations "
+                "from normality; interpret z-score and percentiles with "
+                "caution."
+            )
+        # Skew-Corrected Results (optional)
+        with st.expander("Optional: Skew-Corrected Results"):
+            st.write("Adjusts for skew via Pearson Type III back-transform.")
+            st.write("Error often <1 percentile point when |skew| ≤ 0.5.")
+            st.write("Usually more useful for stronger skewed distributions.")
+            st.write("Note: This is a heuristic and may not always be accurate.")
+            res_skew = normalizer_model.compute_skew_corrected_position(
+                value=value,
+                mean=res["mean"],
+                sd=res["sd"],
+                median=res["median"],
+            )
+            pct_skew = f"{res_skew['percentile_skew_corrected']:.2f}"
+            sc1, sc2 = st.columns(2)
+            sc1.metric(
+                "Skew-Corrected Z-Score",
+                f"{res_skew['z_skew_corrected']:.2f}",
+            )
+            sc2.metric(
+                "Skew-Corrected Percentile",
+                pct_skew,
+            )
+        st.markdown("---")
+        st.subheader("Visualizations")
+        # Prepare data for normal distribution
+        z_vals = np.linspace(-4, 4, 400)
+        density = norm.pdf(z_vals)
+        df_chart = pd.DataFrame({"z": z_vals, "density": density})
+        # Shade area up to observed z-score
+        area = (
+            alt.Chart(df_chart)
+            .mark_area(color="orange", opacity=0.3)
+            .transform_filter(alt.datum.z <= res["z_score"])
+            .encode(
+                x=alt.X(
+                    "z:Q",
+                    title="z-score",
+                ),
+                y=alt.Y(
+                    "density:Q",
+                    title="Density",
+                ),
+            )
+        )
+        # Plot distribution line
+        line = (
+            alt.Chart(df_chart)
+            .mark_line(color="orange")
+            .encode(
+                x="z:Q",
+                y="density:Q",
+            )
+        )
+        # Vertical line at observed z
+        vline = (
+            alt.Chart(pd.DataFrame({"z": [res["z_score"]]}))
+            .mark_rule(color="orange")
+            .encode(x="z:Q")
+        )
+        chart = (area + line + vline).properties(
+            width=600,
+            height=300,
+            title="Standard Normal Distribution",
+        )
+        st.altair_chart(chart, use_container_width=True)
+        # Text summary
+        st.write(
+            f"Your value is z = {res['z_score']:.2f}, which places you in "
+            f"the {res['percentile']:.1f}th percentile of a normal "
+            f"distribution."
+        )
+        # Bullet chart showing z-score location
+        # Using a horizontal bullet gauge from -3 to 3 SD
+        bullet = go.Figure(
+            go.Indicator(
+                mode="number+gauge",
+                value=res["z_score"],
+                number={"suffix": " SD"},
+                gauge={
+                    "shape": "bullet",
+                    "axis": {
+                        "range": [-3, 3],
+                        "tickmode": "linear",
+                        "dtick": 0.5,
+                    },
+                    "bar": {"color": "orange"},
+                },
+            )
+        )
+        bullet.update_layout(
+            height=150,
+            margin={"t": 20, "b": 20, "l": 20, "r": 20},
+        )
+        st.plotly_chart(bullet, use_container_width=True)
+        # Show percentile text
+        st.write(f"Percentile: {res['percentile']:.1f}%")
+    else:
+        st.sidebar.info(
+            "Fill in all inputs and click Compute " "to get normative Z-score."
+        )
+    # Footer
+    st.markdown("---")
+    st.markdown(
+        "Built in with ❤️ in Düsseldorf. © Lars Masanneck 2025. "
+        "Thanks to Withings for sharing this data openly."
+    )
+if __name__ == "__main__":
+    main()

normality_checks.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+normality_checks.py
+Module for normality check heuristics.
+"""
+from typing import Tuple
+def iqr_tail_heaviness(iqr: float, sd: float) -> float:
+    """Return ratio R = IQR/SD for tail heaviness checking."""
+    return iqr / sd if sd != 0 else float("nan")
+def quartile_z_scores(
+    mean: float,
+    sd: float,
+    q1: float,
+    q3: float,
+) -> Tuple[float, float]:
+    """Return observed z-scores for Q1 and Q3."""
+    if sd == 0:
+        return (float("nan"), float("nan"))
+    q1_z = (q1 - mean) / sd
+    q3_z = (q3 - mean) / sd
+    return q1_z, q3_z
+def pearson_skewness(mean: float, median: float, sd: float) -> float:
+    """Return Pearson's moment coefficient of skewness."""
+    return 3 * (mean - median) / sd if sd != 0 else float("nan")

normalizer_model.py ADDED Viewed

	@@ -0,0 +1,414 @@

+"""
+normative_calculator.py - v2
+Utility functions for computing z-scores and percentiles for any biomarker
+contained in *Table_1_summary_measure.xlsx*.
+Author: Lars Masanneck 06-05-2025
+"""
+from __future__ import annotations
+import math
+import pathlib
+import warnings
+from typing import Dict, Iterable, List, Sequence, Union
+import pandas as pd
+from scipy import stats
+from datetime import datetime
+###############################################################################
+# Public API (re-exported in __all__)
+###############################################################################
+__all__ = [
+    "load_normative_table",
+    "compute_normative_position",
+    "add_normative_columns",
+    "categorize_bmi",
+    "compute_skew_corrected_position",
+]
+###############################################################################
+# Constant category mappings
+###############################################################################
+# BMI categories (WHO definition)
+_BMI_BOUNDS: List[tuple[float, float, str]] = [
+    (0, 18.5, "Underweight"),
+    (18.5, 25, "Healthy"),
+    (25, 30, "Overweight"),
+    (30, math.inf, "Obesity"),
+]
+###############################################################################
+# Helper functions – categories & loading
+###############################################################################
+def _categorize(value: float, bounds: Sequence[tuple]) -> str:
+    """Return category *label* for *value* given (lower, upper, label) tuples."""
+    for lower, upper, label in bounds:
+        if lower <= value < upper:
+            return label
+    raise ValueError(f"{value} outside defined bounds.")
+def categorize_bmi(bmi: Union[str, float]) -> str:
+    """Map numeric BMI to the table's BMI category strings."""
+    if isinstance(bmi, str):
+        return bmi.strip().capitalize()
+    return _categorize(float(bmi), _BMI_BOUNDS)
+def _categorize_age(age: Union[str, int], normative_df: pd.DataFrame) -> str:
+    """Return an age‐group string for a numeric age, or pass through if already a string."""
+    if isinstance(age, str):
+        return age.strip()
+    for grp in normative_df["Age"].unique():
+        grp = grp.strip()
+        if "-" in grp:
+            lo, hi = grp.split("-", 1)
+            try:
+                lo_i, hi_i = int(lo), int(hi)
+            except ValueError:
+                continue
+            if lo_i <= age <= hi_i:
+                return grp
+        elif grp.endswith("+"):
+            try:
+                lo_i = int(grp[:-1])
+            except ValueError:
+                continue
+            if age >= lo_i:
+                return grp
+    raise ValueError(f"No normative age group found for age {age!r}.")
+def load_normative_table(path):
+    path = pathlib.Path(path)
+    if not path.exists():
+        raise FileNotFoundError(path)
+    # columns to keep as strings
+    str_cols = ["Age", "area", "gender", "Bmi", "Biomarkers", "nb_category"]
+    # columns to cast to floats (recovering numbers from any date‐formatted cells)
+    float_cols = [
+        "min",
+        "max",
+        "median",
+        "q1",
+        "q3",
+        "iqr",
+        "mad",
+        "mean",
+        "sd",
+        "se",
+        "ci",
+    ]
+    def parse_num(x):
+        # Excel‐formatted dates get parsed into datetime; map back to original float:
+        if isinstance(x, datetime):
+            # if year is in the future (e.g. 3183 → original was 3183.xx),
+            # treat year as integer part and month as two‐digit fractional
+            if x.year > datetime.now().year:
+                return x.year + x.month / 100
+            # otherwise (small numbers like 5.06 → parsed as 2025-06-05),
+            # use day as integer and month as two‐digit fractional
+            return x.day + x.month / 100
+        # non‐dates: just a normal float cast (coerce errors to NA)
+        try:
+            return float(x)
+        except Exception:
+            return pd.NA
+    # build your converters
+    converters = {col: str for col in str_cols}
+    converters.update({col: parse_num for col in float_cols})
+    # read the normative table (Excel or CSV) with our converters
+    if path.suffix.lower() == ".csv":
+        df = pd.read_csv(path, converters=converters)
+    else:
+        df = pd.read_excel(path, converters=converters)
+    # ensure string cols are truly str dtype
+    for c in str_cols:
+        df[c] = df[c].astype(str)
+    df.columns = df.columns.str.strip()
+    return df
+###############################################################################
+# Core calculus
+###############################################################################
+def _extract_stats(
+    normative_df: pd.DataFrame,
+    biomarker: str,
+    age_group: str,
+    region: str,
+    gender: str,
+    bmi_category: str,
+) -> Dict[str, Union[float, str]]:
+    """Return all summary statistics for the requested stratum."""
+    mask = (
+        (normative_df["Biomarkers"].str.lower() == biomarker.lower())
+        & (normative_df["Age"].str.lower() == age_group.lower())
+        & (normative_df["area"].str.lower() == region.lower())
+        & (normative_df["gender"].str.lower() == gender.lower())
+        & (normative_df["Bmi"].str.lower() == bmi_category.lower())
+    )
+    subset = normative_df.loc[mask]
+    if subset.empty:
+        raise KeyError("No normative stats found for the specified stratum.")
+    if len(subset) > 1:
+        warnings.warn(
+            "Multiple normative rows found; using the first one (check your table)."
+        )
+    row = subset.iloc[0]
+    # Some versions of the table label sample size as "n" instead of "nb_category"
+    n_col = "nb_category" if "nb_category" in row else "n"
+    n_raw = row[n_col]
+    n = str(row[n_col])
+    return {
+        "median": float(row["median"]),
+        "q1": float(row["q1"]),
+        "q3": float(row["q3"]),
+        "iqr": float(row["iqr"]),
+        "mad": float(row["mad"]),
+        "mean": float(row["mean"]),
+        "sd": float(row["sd"]),
+        "se": float(row["se"]),
+        "ci": float(row["ci"]),
+        "n": n,
+    }
+def z_score(value: float, mean: float, sd: float) -> float:
+    """Compute z-score; returns NaN if SD is 0."""
+    if sd == 0:
+        return float("nan")
+    return (value - mean) / sd
+def percentile_from_z(z: float) -> float:
+    """Convert z-score to percentile (0-100)."""
+    return float(stats.norm.cdf(z) * 100)
+def compute_normative_position(
+    *,
+    value: float,
+    biomarker: str,
+    age_group: Union[str, int],
+    region: str,
+    gender: str,
+    bmi: Union[str, float],
+    normative_df: pd.DataFrame,
+) -> Dict[str, Union[float, str]]:
+    """
+    Compute where a single measurement falls relative to a normative distribution.
+    Parameters
+    ----------
+    value : float
+        Raw measurement for the specified biomarker.
+    biomarker : str
+        Name of the biomarker (must match a value in the "Biomarkers" column
+        of `normative_df`).
+    age_group : Union[str, int]
+        Either:
+          - A string age-group label (e.g. "40-49") matching `normative_df["Age"]`, or
+          - An integer age, which will be mapped into the correct age-group bracket.
+    region : str
+        Region name matching `normative_df["area"]` (case-insensitive).
+    gender : str
+        Gender label matching `normative_df["gender"]` (case-insensitive).
+    bmi : Union[str, float]
+        Either:
+          - A string BMI category (e.g. "Healthy"), or
+          - A numeric BMI value, which will be bucketed into WHO categories.
+    normative_df : pd.DataFrame
+        Table of normative summary statistics as returned by `load_normative_table`.
+    Returns
+    -------
+    Dict[str, Union[float, str]]
+        A dictionary containing:
+          - "z_score" (float): the computed z-score,
+          - "percentile" (float): the percentile (0–100),
+          - "mean" (float): the normative mean,
+          - "sd" (float): the normative standard deviation,
+          - "n" (str): the sample-size category string from the normative table.
+          - "median" (float): the normative median,
+          - "q1" (float): the first quartile,
+          - "q3" (float): the third quartile,
+          - "iqr" (float): the interquartile range,
+          - "mad" (float): the median absolute deviation,
+          - "se" (float): the standard error,
+          - "ci" (float): the confidence interval.
+    Raises
+    ------
+    KeyError
+        If no matching stratum is found in `normative_df`.
+    ValueError
+        If an integer `age_group` cannot be mapped to any age bracket.
+    """
+    # allow numeric age inputs by mapping them to the correct "Age" group
+    age_group_str = _categorize_age(age_group, normative_df)
+    bmi_cat = categorize_bmi(bmi)
+    stats_d = _extract_stats(
+        normative_df=normative_df,
+        biomarker=biomarker,
+        age_group=age_group_str,
+        region=region,
+        gender=gender,
+        bmi_category=bmi_cat,
+    )
+    z = z_score(value, stats_d["mean"], stats_d["sd"])
+    pct = percentile_from_z(z)
+    return {
+        "z_score": z,
+        "percentile": pct,
+        "mean": stats_d["mean"],
+        "sd": stats_d["sd"],
+        "n": stats_d["n"],
+        "median": stats_d["median"],
+        "q1": stats_d["q1"],
+        "q3": stats_d["q3"],
+        "iqr": stats_d["iqr"],
+        "mad": stats_d["mad"],
+        "se": stats_d["se"],
+        "ci": stats_d["ci"],
+    }
+###############################################################################
+# Batch processing helper
+###############################################################################
+def _compute_for_row(
+    row: pd.Series,
+    biomarker: str,
+    normative_df: pd.DataFrame,
+    age_col: str,
+    region_col: str,
+    gender_col: str,
+    bmi_col: str,
+    value_col: str,
+):
+    try:
+        res = compute_normative_position(
+            value=row[value_col],
+            biomarker=biomarker,
+            age_group=row[age_col],
+            region=row[region_col],
+            gender=row[gender_col],
+            bmi=row[bmi_col],
+            normative_df=normative_df,
+        )
+        return pd.Series(
+            [res["z_score"], res["percentile"]],
+            index=[f"{biomarker}_z", f"{biomarker}_pct"],
+        )
+    except Exception as exc:  # pragma: no cover
+        warnings.warn(str(exc))
+        return pd.Series(
+            [float("nan"), float("nan")], index=[f"{biomarker}_z", f"{biomarker}_pct"]
+        )
+def add_normative_columns(
+    df: pd.DataFrame,
+    *,
+    biomarkers: Iterable[str],
+    normative_df: pd.DataFrame,
+    age_col: str = "Age",
+    region_col: str = "area",
+    gender_col: str = "gender",
+    bmi_col: str = "Bmi",
+    value_cols: dict[str, str] | None = None,
+    output_prefixes: dict[str, str] | None = None,
+) -> pd.DataFrame:
+    """
+    Append z-score and percentile columns for multiple biomarkers, with optional
+    custom prefixes for the output column names.
+    Parameters
+    ----------
+    df : pd.DataFrame
+        Participant-level data, must include demographic columns and raw biomarker
+        values.
+    biomarkers : Iterable[str]
+        List of biomarker names to process.
+    normative_df : pd.DataFrame
+        Normative summary table as loaded by `load_normative_table`.
+    age_col : str, default "Age"
+        Column in `df` containing age-group labels or integer ages.
+    region_col : str, default "area"
+        Column in `df` matching the "area" field in `normative_df`.
+    gender_col : str, default "gender"
+        Column in `df` matching the "gender" field in `normative_df`.
+    bmi_col : str, default "Bmi"
+        Column in `df` containing BMI values or categories.
+    value_cols : dict[str, str], optional
+        Mapping from each biomarker name to the column in `df` that holds its
+        raw numeric value.  Defaults to identity mapping.
+    output_prefixes : dict[str, str], optional
+        Mapping from each biomarker name to the prefix to use for the output
+        columns.  Defaults to using the biomarker name itself.
+    Returns
+    -------
+    pd.DataFrame
+        A copy of `df` with two new columns for each biomarker:
+        `<prefix>_z` and `<prefix>_pct`.
+    """
+    value_cols = value_cols or {bm: bm for bm in biomarkers}
+    output_prefixes = output_prefixes or {}
+    out = df.copy()
+    for bm in biomarkers:
+        prefix = output_prefixes.get(bm, bm)
+        out[[f"{prefix}_z", f"{prefix}_pct"]] = df.apply(
+            _compute_for_row,
+            axis=1,
+            biomarker=bm,
+            normative_df=normative_df,
+            age_col=age_col,
+            region_col=region_col,
+            gender_col=gender_col,
+            bmi_col=bmi_col,
+            value_col=value_cols[bm],
+        )
+    return out
+# Add a function for skew-corrected z-score calculation
+def compute_skew_corrected_position(
+    value: float, mean: float, sd: float, median: float
+) -> dict[str, float]:
+    """Compute skew-corrected z-score and percentile using Pearson Type III distribution."""
+    # Pearson's moment coefficient of skewness
+    if sd == 0:
+        skewness = float("nan")
+    else:
+        skewness = 3 * (mean - median) / sd
+    # Build Pearson Type III distribution (gamma-based)
+    dist = stats.pearson3(skewness, loc=mean, scale=sd)
+    # Compute percentile under skewed model
+    p = dist.cdf(value)
+    # Back-transform to standard normal z-score
+    z_corr = stats.norm.ppf(p)
+    return {"z_skew_corrected": z_corr, "percentile_skew_corrected": float(p * 100)}

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+streamlit==1.26.0
+pycountry==22.3.5
+scipy==1.11.3
+numpy==1.26.0
+pandas==2.1.0
+matplotlib==3.8.0
+seaborn==0.13.0
+openpyxl==3.1.2
+altair==5.5.0
+plotly==5.21.0

static/.gitkeep ADDED Viewed

	@@ -0,0 +1 @@


1	+ # static files directory (for CSS, JS, images)