File size: 18,510 Bytes
ae420f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523afc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae420f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee7a14d
ae420f7
523afc6
 
 
 
 
 
ee7a14d
523afc6
ee7a14d
523afc6
 
 
 
 
ae420f7
523afc6
 
 
ee7a14d
523afc6
ee7a14d
523afc6
 
 
 
 
ae420f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523afc6
 
 
ae420f7
 
 
 
 
 
 
523afc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae420f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ee7a14d
523afc6
 
 
 
 
 
ee7a14d
523afc6
ee7a14d
523afc6
 
 
 
 
ae420f7
523afc6
 
 
ee7a14d
523afc6
ee7a14d
523afc6
 
 
 
 
ae420f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523afc6
ae420f7
 
 
523afc6
ae420f7
 
 
 
 
 
 
 
 
 
 
 
 
a3309b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ae420f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
"""
Batch processing and PDF generation utilities for Smartwatch Normative Z-Score Calculator.

Author: Lars Masanneck 2026
"""
import pandas as pd
import numpy as np
from io import BytesIO
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.graphics.shapes import Drawing, Rect, Line, String

# Import from the main normalizer model
import normalizer_model

# Friendly biomarker labels (matching the main app)
BIOMARKER_LABELS = {
    "nb_steps": "Number of Steps",
    "max_steps": "Maximum Steps",
    "mean_active_time": "Mean Active Time",
    "sbp": "Systolic Blood Pressure",
    "dbp": "Diastolic Blood Pressure",
    "sleep_duration": "Sleep Duration",
    "avg_night_hr": "Average Night Heart Rate",
    "nb_moderate_active_minutes": "Moderate Active Minutes",
    "nb_vigorous_active_minutes": "Vigorous Active Minutes",
    "weight": "Weight",
    "pwv": "Pulse Wave Velocity",
}

# Biomarkers where HIGHER values are BETTER (more is good)
# These get green for high z-scores, concerning colors for low
HIGHER_IS_BETTER = {
    "nb_steps",
    "max_steps",
    "mean_active_time",
    "sleep_duration",
    "nb_moderate_active_minutes",
    "nb_vigorous_active_minutes",
}

# Biomarkers where LOWER values are BETTER (less is good)
# These get green for low z-scores, concerning colors for high
LOWER_IS_BETTER = {
    "sbp",
    "dbp",
    "pwv",
    "avg_night_hr",
    "weight",
}

# Biomarkers available for batch processing (excluding disabled ones)
AVAILABLE_BIOMARKERS = [
    "nb_steps",
    "max_steps", 
    "mean_active_time",
    "sleep_duration",
    "avg_night_hr",
    "nb_moderate_active_minutes",
]


def get_batch_template_df():
    """Return a template DataFrame for batch upload."""
    return pd.DataFrame({
        "patient_id": ["P001", "P002", "P003"],
        "age": [45, 62, 38],
        "gender": ["Man", "Woman", "Man"],
        "region": ["Western Europe", "Western Europe", "North America"],
        "bmi": [24.5, 28.1, 22.3],
        "nb_steps": [7500, 4200, 9800],
        "sleep_duration": [7.2, 6.5, 8.1],
        "avg_night_hr": [62, 68, 58],
    })


def process_batch_data(df: pd.DataFrame, normative_df: pd.DataFrame, 
                       biomarkers_to_process: list = None) -> pd.DataFrame:
    """
    Process batch data and add z-score and percentile columns for selected biomarkers.
    
    Parameters
    ----------
    df : pd.DataFrame
        Input data with patient demographics and biomarker values
    normative_df : pd.DataFrame
        Normative reference table
    biomarkers_to_process : list, optional
        List of biomarker columns to process. If None, auto-detect from data.
    
    Returns
    -------
    pd.DataFrame
        Results with z-scores and percentiles added
    """
    results = []
    
    # Auto-detect biomarkers if not specified
    if biomarkers_to_process is None:
        biomarkers_to_process = [col for col in df.columns if col in AVAILABLE_BIOMARKERS]
    
    for _, row in df.iterrows():
        result = row.to_dict()
        
        # Process each biomarker
        for biomarker in biomarkers_to_process:
            if pd.notna(row.get(biomarker)):
                try:
                    res = normalizer_model.compute_normative_position(
                        value=float(row[biomarker]),
                        biomarker=biomarker,
                        age_group=int(row['age']) if pd.notna(row.get('age')) else 45,
                        region=row.get('region', 'Western Europe'),
                        gender=row.get('gender', 'Man'),
                        bmi=float(row.get('bmi', 24.0)) if pd.notna(row.get('bmi')) else 24.0,
                        normative_df=normative_df,
                    )
                    result[f'{biomarker}_z'] = round(res['z_score'], 2)
                    result[f'{biomarker}_percentile'] = round(res['percentile'], 1)
                    
                    # Context-aware interpretation (Average = -0.5 to 0.5)
                    z = res['z_score']
                    higher_is_better = biomarker in HIGHER_IS_BETTER
                    
                    if higher_is_better:
                        # For steps, sleep, activity: high is good
                        if z < -2:
                            result[f'{biomarker}_interpretation'] = 'Very Low ⚠️'
                        elif z < -0.5:
                            result[f'{biomarker}_interpretation'] = 'Below Average'
                        elif z < 0.5:
                            result[f'{biomarker}_interpretation'] = 'Average'
                        elif z < 2:
                            result[f'{biomarker}_interpretation'] = 'Above Average ✓'
                        else:
                            result[f'{biomarker}_interpretation'] = 'Excellent ✓✓'
                    else:
                        # For HR, BP, PWV: low is good
                        if z < -2:
                            result[f'{biomarker}_interpretation'] = 'Very Low ✓✓'
                        elif z < -0.5:
                            result[f'{biomarker}_interpretation'] = 'Below Average ✓'
                        elif z < 0.5:
                            result[f'{biomarker}_interpretation'] = 'Average'
                        elif z < 2:
                            result[f'{biomarker}_interpretation'] = 'Above Average'
                        else:
                            result[f'{biomarker}_interpretation'] = 'Elevated ⚠️'
                        
                except Exception as e:
                    result[f'{biomarker}_z'] = 'N/A'
                    result[f'{biomarker}_percentile'] = 'N/A'
                    result[f'{biomarker}_interpretation'] = f'Error: {str(e)[:30]}'
            else:
                result[f'{biomarker}_z'] = 'N/A'
                result[f'{biomarker}_percentile'] = 'N/A'
                result[f'{biomarker}_interpretation'] = 'No data'
        
        results.append(result)
    
    return pd.DataFrame(results)


def create_z_score_gauge(z_score: float, label: str, biomarker: str = None, 
                         width: float = 350, height: float = 100) -> Drawing:
    """Create a horizontal gauge showing z-score position with context-aware coloring."""
    d = Drawing(width, height)
    
    gauge_y = 35
    gauge_height = 25
    gauge_left = 50
    gauge_width = width - 100
    
    # Determine if higher is better for this biomarker
    higher_is_better = biomarker in HIGHER_IS_BETTER if biomarker else False
    
    if higher_is_better:
        # For steps, sleep, activity: LOW is bad (red), HIGH is good (green)
        zone_colors = [
            (colors.HexColor('#c0392b'), -3),   # Red - very low (bad)
            (colors.HexColor('#e74c3c'), -2),   # Lighter red
            (colors.HexColor('#f39c12'), -1),   # Orange - below average
            (colors.HexColor('#f1c40f'), 0),    # Yellow - average
            (colors.HexColor('#2ecc71'), 1),    # Light green - above average
            (colors.HexColor('#27ae60'), 2),    # Green - high (good)
        ]
    else:
        # For BP, HR, PWV: HIGH is bad (red), LOW is good (green)
        zone_colors = [
            (colors.HexColor('#27ae60'), -3),   # Green - very low (good)
            (colors.HexColor('#2ecc71'), -2),   # Light green
            (colors.HexColor('#f1c40f'), -1),   # Yellow - average
            (colors.HexColor('#f39c12'), 0),    # Orange
            (colors.HexColor('#e74c3c'), 1),    # Lighter red - elevated
            (colors.HexColor('#c0392b'), 2),    # Red - high (bad)
        ]
    
    zone_width = gauge_width / 6
    for i, (color, _) in enumerate(zone_colors):
        d.add(Rect(gauge_left + i * zone_width, gauge_y, zone_width, gauge_height, 
                   fillColor=color, strokeColor=None))
    
    # Border
    d.add(Rect(gauge_left, gauge_y, gauge_width, gauge_height, 
               fillColor=None, strokeColor=colors.black, strokeWidth=1))
    
    # Marker position (clamp z to -3, 3)
    clamped_z = max(-3, min(3, z_score))
    marker_x = gauge_left + ((clamped_z + 3) / 6) * gauge_width
    
    # Marker line
    d.add(Line(marker_x, gauge_y - 8, marker_x, gauge_y + gauge_height + 8, 
               strokeColor=colors.black, strokeWidth=3))
    
    # Scale labels
    for i, val in enumerate([-3, -2, -1, 0, 1, 2, 3]):
        x = gauge_left + (i / 6) * gauge_width
        d.add(String(x, gauge_y - 15, str(val), fontSize=9, textAnchor='middle'))
    
    # Title
    d.add(String(width / 2, height - 8, label, fontSize=11, textAnchor='middle', fontName='Helvetica-Bold'))
    
    # Z-score value
    d.add(String(width / 2, gauge_y + gauge_height + 18, f"Z = {z_score:.2f}", 
                 fontSize=10, textAnchor='middle', fontName='Helvetica-Bold'))
    
    return d


def generate_pdf_report(patient_info: dict, measurements: dict, z_scores: dict = None) -> BytesIO:
    """
    Generate a PDF report for a patient with Z-scores and graphs.
    
    Parameters
    ----------
    patient_info : dict
        Patient demographics (age, gender, region, bmi)
    measurements : dict
        Biomarker measurements (biomarker_code: value)
    z_scores : dict
        Z-score results for each biomarker
    
    Returns
    -------
    BytesIO
        PDF buffer ready for download
    """
    buffer = BytesIO()
    doc = SimpleDocTemplate(buffer, pagesize=A4, topMargin=0.5*inch, bottomMargin=0.5*inch)
    
    styles = getSampleStyleSheet()
    
    # Orange-themed styles
    title_style = ParagraphStyle(
        'Title', 
        parent=styles['Heading1'], 
        fontSize=18, 
        spaceAfter=12, 
        alignment=1,
        textColor=colors.HexColor('#d35400')
    )
    heading_style = ParagraphStyle(
        'Heading', 
        parent=styles['Heading2'], 
        fontSize=14, 
        spaceAfter=8, 
        spaceBefore=12,
        textColor=colors.HexColor('#e67e22')
    )
    normal_style = styles['Normal']
    
    elements = []
    
    # Title
    elements.append(Paragraph("Smartwatch Normative Z-Score Report", title_style))
    elements.append(Spacer(1, 0.2*inch))
    
    # Patient Information
    elements.append(Paragraph("Demographics", heading_style))
    patient_data = [
        ["Age:", f"{patient_info.get('age', 'N/A')} years"],
        ["Gender:", patient_info.get('gender', 'N/A')],
        ["Region:", patient_info.get('region', 'N/A')],
        ["BMI:", f"{patient_info.get('bmi', 'N/A')}"],
    ]
    patient_table = Table(patient_data, colWidths=[2*inch, 4*inch])
    patient_table.setStyle(TableStyle([
        ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
    ]))
    elements.append(patient_table)
    elements.append(Spacer(1, 0.2*inch))
    
    # Measurements
    if measurements:
        elements.append(Paragraph("Measurements", heading_style))
        measurements_data = []
        for biomarker, value in measurements.items():
            label = BIOMARKER_LABELS.get(biomarker, biomarker.replace('_', ' ').title())
            measurements_data.append([f"{label}:", f"{value}"])
        
        if measurements_data:
            meas_table = Table(measurements_data, colWidths=[2.5*inch, 3.5*inch])
            meas_table.setStyle(TableStyle([
                ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
                ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
                ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
            ]))
            elements.append(meas_table)
        elements.append(Spacer(1, 0.2*inch))
    
    # Z-Score Analysis
    if z_scores:
        elements.append(Paragraph("Z-Score Analysis", heading_style))
        elements.append(Paragraph(
            "Z-scores indicate how many standard deviations a measurement is from the population mean. "
            "Values between -2 and +2 are typically considered within normal range.",
            ParagraphStyle('ZInfo', parent=normal_style, fontSize=9, textColor=colors.grey, spaceAfter=8)
        ))
        
        # Z-score table
        z_data = [["Biomarker", "Value", "Z-Score", "Percentile", "Interpretation"]]
        
        for biomarker, data in z_scores.items():
            if isinstance(data, dict) and 'z_score' in data:
                z = data['z_score']
                pct = data['percentile']
                value = measurements.get(biomarker, 'N/A')
                label = BIOMARKER_LABELS.get(biomarker, biomarker.replace('_', ' ').title())
                
                # Context-aware interpretation (Average = -0.5 to 0.5)
                higher_is_better = biomarker in HIGHER_IS_BETTER
                
                if higher_is_better:
                    # For steps, sleep, activity: high is good
                    if z < -2:
                        interp = "Very Low ⚠️"
                    elif z < -0.5:
                        interp = "Below Average"
                    elif z < 0.5:
                        interp = "Average"
                    elif z < 2:
                        interp = "Above Average ✓"
                    else:
                        interp = "Excellent ✓✓"
                else:
                    # For HR, BP, PWV: low is good
                    if z < -2:
                        interp = "Very Low ✓✓"
                    elif z < -0.5:
                        interp = "Below Average ✓"
                    elif z < 0.5:
                        interp = "Average"
                    elif z < 2:
                        interp = "Above Average"
                    else:
                        interp = "Elevated ⚠️"
                
                z_data.append([label, str(value), f"{z:.2f}", f"{pct:.1f}%", interp])
        
        if len(z_data) > 1:
            z_table = Table(z_data, colWidths=[1.5*inch, 1*inch, 0.8*inch, 1*inch, 1.2*inch])
            z_table.setStyle(TableStyle([
                ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#e67e22')),
                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                ('FONTSIZE', (0, 0), (-1, -1), 9),
                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
                ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
                ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
                ('TOPPADDING', (0, 0), (-1, -1), 6),
            ]))
            elements.append(z_table)
            elements.append(Spacer(1, 0.15*inch))
            
            # Add Z-score gauges with context-aware coloring
            for biomarker, data in z_scores.items():
                if isinstance(data, dict) and 'z_score' in data:
                    label = BIOMARKER_LABELS.get(biomarker, biomarker.replace('_', ' ').title())
                    gauge = create_z_score_gauge(data['z_score'], label, biomarker=biomarker)
                    elements.append(gauge)
                    elements.append(Spacer(1, 0.1*inch))
        
        elements.append(Spacer(1, 0.2*inch))
    
    # Cohort Information
    elements.append(Paragraph("Reference Population", heading_style))
    cohort_text = (
        f"Z-scores calculated using normative data from Withings users in "
        f"{patient_info.get('region', 'Western Europe')}, filtered by gender "
        f"({patient_info.get('gender', 'N/A')}), age group, and BMI category."
    )
    elements.append(Paragraph(cohort_text, normal_style))
    elements.append(Spacer(1, 0.2*inch))
    
    # Z-Score Classification Guide
    elements.append(Paragraph("Z-Score Classification Guide", heading_style))
    
    classification_data = [
        ["Z-Score Range", "Classification", "Percentile"],
        ["z < -2.0", "Very Low", "< 2.3%"],
        ["-2.0 ≤ z < -0.5", "Below Average", "2.3% - 30.9%"],
        ["-0.5 ≤ z < 0.5", "Average", "30.9% - 69.1%"],
        ["0.5 ≤ z < 2.0", "Above Average", "69.1% - 97.7%"],
        ["z ≥ 2.0", "Very High", "> 97.7%"],
    ]
    
    class_table = Table(classification_data, colWidths=[1.8*inch, 1.5*inch, 1.5*inch])
    class_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#e67e22')),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 6),
        ('TOPPADDING', (0, 0), (-1, -1), 6),
        # Highlight the "Average" row
        ('BACKGROUND', (0, 3), (-1, 3), colors.HexColor('#fef9e7')),
    ]))
    elements.append(class_table)
    elements.append(Spacer(1, 0.1*inch))
    
    context_note = Paragraph(
        "<b>Context:</b> For steps, sleep, and activity - higher is better. "
        "For heart rate - lower resting values are better. "
        "A z-score of 0 = population average for your demographic group.",
        ParagraphStyle('ContextNote', parent=normal_style, fontSize=8, textColor=colors.HexColor('#555555'))
    )
    elements.append(context_note)
    elements.append(Spacer(1, 0.2*inch))
    
    # Disclaimer
    disclaimer = Paragraph(
        "<i>This report is for educational and research purposes only. Z-scores are based on "
        "Withings population data and may not reflect clinical reference ranges. For detailed "
        "questions regarding personal health data, contact your healthcare professionals.</i>",
        ParagraphStyle('Disclaimer', parent=normal_style, fontSize=8, textColor=colors.grey)
    )
    elements.append(disclaimer)
    
    # Footer
    elements.append(Spacer(1, 0.2*inch))
    footer = Paragraph(
        "Built with ❤️ in Düsseldorf. © Lars Masanneck 2026.",
        ParagraphStyle('Footer', parent=normal_style, fontSize=8, textColor=colors.grey, alignment=1)
    )
    elements.append(footer)
    
    doc.build(elements)
    buffer.seek(0)
    return buffer