Spaces:
Sleeping
Sleeping
File size: 40,392 Bytes
2a9dafb 4fd13fd 2a9dafb 714f068 2a9dafb b559b25 4fd13fd 2a9dafb b559b25 4fd13fd 2a9dafb 4fd13fd b559b25 5ced715 b559b25 2a9dafb 714f068 2a9dafb 4fd13fd 2a9dafb 4fd13fd 2a9dafb 4fd13fd 2a9dafb 4fd13fd 2a9dafb 4fd13fd 2a9dafb 4fd13fd 2a9dafb 4fd13fd 2a9dafb 4fd13fd b559b25 4fd13fd 5ced715 4fd13fd 2a9dafb 4fd13fd 2a9dafb 4fd13fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 |
"""
LangChain Chain Implementation for HBV Assessment
Implements hybrid approach: Deterministic Logic (Phase 1) + LLM Generation (Phase 2)
"""
import logging
import json
import re
from typing import Dict, Any
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from .config import get_llm
logger = logging.getLogger(__name__)
def clean_json_string(json_str: str) -> str:
"""
Clean a JSON string by properly escaping control characters within string values.
This handles cases where LLMs generate JSON with literal newlines, tabs, etc.
Args:
json_str: Raw JSON string that may contain unescaped control characters
Returns:
Cleaned JSON string with properly escaped control characters
"""
# First, try to identify string values in the JSON and escape control characters within them
# We need to be careful not to break the JSON structure itself
# Replace common control characters that appear in string values
# but preserve the JSON structure (newlines between key-value pairs are OK)
# Strategy: Parse character by character, track if we're inside a string value
result = []
in_string = False
escape_next = False
for i, char in enumerate(json_str):
if escape_next:
result.append(char)
escape_next = False
continue
if char == "\\":
result.append(char)
escape_next = True
continue
if char == '"':
in_string = not in_string
result.append(char)
continue
# If we're inside a string value, escape control characters
if in_string:
if char == "\n":
result.append("\\n")
elif char == "\r":
result.append("\\r")
elif char == "\t":
result.append("\\t")
elif char == "\b":
result.append("\\b")
elif char == "\f":
result.append("\\f")
elif ord(char) < 32: # Other control characters
result.append(f"\\u{ord(char):04x}")
else:
result.append(char)
else:
result.append(char)
return "".join(result)
# SASLT 2021 Guidelines - Extracted directly from official PDF
SASLT_GUIDELINES = """
===== SASLT 2021 GUIDELINES: TREATMENT & MANAGEMENT =====
[Extracted from: SASLT practice guidelines for the management of Hepatitis B virus β An update,
Saudi J Gastroenterol 2021;27:115-26]
### 1. TREATMENT INDICATIONS [SASLT 2021, Page 6]
**RECOMMENDATIONS FOR INITIATION OF TREATMENT:**
- All patients with chronic hepatitis B (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and/or at least moderate liver necroinflammation or fibrosis (Grade A) [Page 6]
- Patients with cirrhosis (compensated or decompensated), with any detectable HBV DNA level and regardless of ALT levels (Grade A) [Page 6]
- Patients with HBV DNA > 20,000 IU/mL and ALT > 2xULN, regardless of the degree of fibrosis (Grade B) [Page 6]
- Patients with HBeAg-positive chronic HBV infection (persistently normal ALT and high HBV DNA levels) may be treated if they are > 30 years, regardless of the severity of liver histological lesions (Grade D) [Page 6]
- Patients with chronic HBV infection (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and a family history of HCC or cirrhosis and extrahepatic manifestations (Grade D) [Page 6]
**DETAILED TREATMENT CRITERIA [Page 6]:**
Nonβcirrhotic patients should be considered for treatment if they have HBV DNA levels >2,000 IU/mL, serum ALT >~40 IU/L and severity of liver disease assessed by liver biopsy showing at least moderate necroinflammation and/or at least moderate fibrosis.
Patients with HBV DNA greater than 20,000 IU/mL and ALT greater than 2x ULN can begin treatment without a liver biopsy.
Patients with HBV DNA >2,000 IU/mL and at least moderate fibrosis may initiate treatment even if ALT levels are normal.
Treatment indications should also take into account patient's age, health status, risk of HBV transmission, family history of HCC or cirrhosis and extrahepatic manifestations.
**CRITICAL INTERPRETATION:**
- HBV DNA > 2,000 IU/mL is REQUIRED for all standard treatment criteria
- Exception: Cirrhosis (F4) requires only "any detectable HBV DNA level"
- Exception: Special populations (HIV coinfection, immunosuppression, pregnancy) have different thresholds
### 2. MONITORING OF UNTREATED PATIENTS [SASLT 2021, Page 6-7]
- Patients with HBeAg-positive chronic HBV infection who are younger than 30 years should be followed at least every 3-6 months (Grade B) [Page 7]
- Patients with HBeAg-negative chronic HBV infection and serum HBV DNA <2,000 IU/ml should be followed every 6-12 months (Grade B) [Page 7]
- Patients with HBeAg-negative chronic HBV infection and serum HBV DNA β₯2,000 IU/ml should be followed every 3 months for the first year and thereafter every 6 months (Grade D) [Page 7]
### 3. TREATMENT OF CHRONIC HEPATITIS B [SASLT 2021, Page 8]
**RECOMMENDATIONS:**
- The treatment of choice is the long-term administration of a potent NA with a high barrier to resistance, regardless of the severity of liver disease (Grade A) [Page 8]
- Preferred regimens are ETV, TDF and TAF as monotherapies (Grade A) [Page 8]
- LAM, ADV and TBV are not recommended in the treatment of CHB (Grade A) [Page 8]
**ABOUT TAF vs TDF [Page 8]:**
TAF has demonstrated superior renal and bone density safety profiles compared with TDF in head-to-head trials. International guidelines recommend switching individuals at high risk for bone or renal disease from TDF to either TAF or ETV. TAF maintains a better safety profile unless the patient's creatinine clearance (CrCl) is less than 15 mL/minute.
### 4. HBV-HCV COINFECTION [SASLT 2021, Page 8-9]
**RECOMMENDATIONS:**
- Treatment of HCV through DAAs may lead to reactivation of HBV. Patients who meet the criteria for HBV treatment should be treated concurrently or before initiation of DAA (Grade A) [Page 9]
- HBV DNA and ALT should be monitored every four to eight weeks while on DAA and three months after completion of therapy (Grade D) [Page 9]
- ALT level should be monitored every four weeks while on DAA for patients who are HBsAg-negative but HBcAb-positive. If ALT starts to rise, HBsAg and HBV DNA must be obtained to determine the need to start HBV treatment (Grade D) [Page 9]
### 5. HBV-HIV COINFECTION [SASLT 2021, Page 9] β οΈ ABSOLUTE TREATMENT INDICATION
**CRITICAL: This is an ABSOLUTE indication for treatment regardless of ALT, HBV DNA level, fibrosis stage, or necroinflammatory activity.**
"Patients with HBVβHIV coinfection are at increased risk of rapid fibrosis progression, development of HCC, and liverβrelated mortality." [Page 9]
"The prevalence of HBV in patients with HIV coinfection in Saudi Arabia is 3%, which is much higher than the general population." [Page 9]
"All patients with HBVβHIV coinfection should receive antiretroviral therapy (ART)." [Page 9]
"Patients must be followed closely after initiation of ART, given the risk of immune reconstitution syndrome, which may lead to HBV flare." [Page 9]
"The regimen must include tenofovir with either formulation TDF or TAF. TAF has a better safety profile and is preferred over TDF unless the patient has CrCl < 15 mL/minute. Emtricitabine and LAM should be included in the ART regimen." [Page 9]
**RECOMMENDATIONS:**
- All HIV-positive patients with HBV co-infection should start ART irrespective of CD4 cell count (Grade A) [Page 9]
- HBV-HIV co-infected patients should be treated with TDF- or TAF-based ART regimen (Grade A) [Page 9]
### 6. IMMUNOCOMPROMISED PATIENTS [SASLT 2021, Page 9] β οΈ ABSOLUTE TREATMENT INDICATION
"Hepatitis B flare during chemotherapy treatment or treatment with other immunosuppressive agents is potentially life threatening. The risk is very high, particularly with the use of CD20 depleting agents." [Page 9]
"Therefore, all patients undergoing immunosuppressive treatment or chemotherapy, even shortβterm courses, should be screened for HBsAg, antiβHBc, and antiβHBs (and HBV DNA, if HBsAg is already positive)." [Page 9]
**RECOMMENDATIONS:**
- Prophylaxis for all patients with positive HBsAg should be done before initiating chemotherapy or other immunosuppressive agents (Grade A) [Page 9]
- HBsAg-negative/anti-HBc-positive patients should undergo HBV prophylaxis if they are candidates for anti CD20 or are undergoing stem cell transplantation. HBV prophylaxis should continue for at least six months after completion of immunosuppressive treatment and for twelve months if taking anti CD20 (Grade D) [Page 9]
- We recommend starting HBV prophylaxis for HBsAg or antiβHBc positive patients undergoing treatment with tumor necrosis factor (TNF) inhibitors [Page 9]
- We recommend HBV prophylaxis for all patients who are HBsAg or anti-HBc positive before initiation of immunotherapy such as antiβprogrammed cell death (PDβ1) and antiβprogrammed cell deathβligand 1 (PDβL1) therapy [Page 9]
### 7. HBV AND PREGNANCY [SASLT 2021, Page 9-10]
"The most effective way to prevent motherβtoβchild transmission is to detect HBV early in pregnancy. Therefore, all pregnant women must be screened for HBV during the first trimester." [Page 9]
"Pregnant women should be treated if they meet the standard indication of therapy. We recommend HBV treatment if HBV DNA is greater than 100,000 IU/mL in the late second trimester (between 24β28 weeks of gestation)." [Page 9]
"TDF is the drug of choice during pregnancy. However, more recently, a multiβcenter experience from China reported no motherβtoβchild transmission or developmental anomalies in 71 infants born to mothers who received TAF during the last trimester of pregnancy." [Page 9]
**RECOMMENDATIONS:**
- All pregnant women must be screened for HBV during the first trimester (Grade A) [Page 10]
- All pregnant women with HBV DNA greater than 100,000 IU/mL in the late second trimester (between 24-28 weeks of gestation) should start antiviral prophylaxis with TDF, or TAF as an alternative (Grade D) [Page 10]
- Switch to TDF or TAF is recommended if the patient is receiving ETV, ADV, or interferon during pregnancy (Grade D) [Page 10]
- Breastfeeding is not contraindicated in HBsAg-positive untreated women or on TDF-based treatment or prophylaxis (Grade B) [Page 10]
### KEY DEFINITIONS [From Table 2, Page 3 and text]
**ALT (Alanine Aminotransferase):**
- Upper Limit of Normal (ULN) = ~40 IU/L [Page 6]
- 2ΓULN = ~80 IU/L
**Necroinflammatory Activity Grades:**
- A1 = mild
- A2 = moderate
- A3 = severe
**Liver Fibrosis Stages:**
- F0 = no fibrosis
- F1 = mild fibrosis, pericellular collagen deposits
- F2 = moderate fibrosis, beginning bridging fibrosis
- F3 = severe fibrosis, defined as presence of numerous bridges and septa
- F4 = cirrhosis
**HBV DNA Thresholds [From Table 2, Page 3]:**
- Phase 3 (Inactive carrier): <2,000 IU/mL
- Phase 4 (HBeAg-negative chronic hepatitis): >2,000 IU/mL (fluctuating levels)
- Phase 1 (Immune tolerant): >10^7 IU/mL (very high)
"""
def extract_eligibility_from_text(recommendations: str) -> bool:
"""
Extract eligibility decision from recommendations text.
Looks for patterns like "Decision: ELIGIBLE" or "Decision: NOT ELIGIBLE"
Args:
recommendations: Recommendations text string
Returns:
True if text indicates ELIGIBLE, False if NOT ELIGIBLE, None if ambiguous
"""
if not recommendations:
return None
# Normalize text for searching (case-insensitive, handle escaped newlines)
normalized = recommendations.replace("\\n", "\n").upper()
# Look for explicit decision statements
# Pattern 1: "*Decision:* ELIGIBLE" or "*Decision:* NOT ELIGIBLE"
decision_match = re.search(r"\*DECISION:\*\s*(ELIGIBLE|NOT\s+ELIGIBLE)", normalized)
if decision_match:
decision = decision_match.group(1)
if "NOT" in decision:
return False
return True
# Pattern 2: "Decision: ELIGIBLE" or "Decision: NOT ELIGIBLE" (without asterisks)
decision_match = re.search(r"DECISION:\s*(ELIGIBLE|NOT\s+ELIGIBLE)", normalized)
if decision_match:
decision = decision_match.group(1)
if "NOT" in decision:
return False
return True
# Pattern 3: Look for strong indicators in rationale
# If text says "patient is eligible" or "treatment is recommended" with strong language
eligible_indicators = [
r"PATIENT\s+IS\s+ELIGIBLE",
r"TREATMENT\s+IS\s+RECOMMENDED",
r"ABSOLUTE\s+INDICATION",
r"AUTOMATICALLY\s+ELIGIBLE",
r"REQUIRES\s+TREATMENT",
r"SHOULD\s+RECEIVE\s+TREATMENT",
r"PROPHYLAXIS\s+IS\s+REQUIRED",
]
not_eligible_indicators = [
r"PATIENT\s+IS\s+NOT\s+ELIGIBLE",
r"NOT\s+ELIGIBLE",
r"DOES\s+NOT\s+MEET\s+CRITERIA",
r"REQUIRES\s+MONITORING\s+ONLY",
]
# Check for eligible indicators
for pattern in eligible_indicators:
if re.search(pattern, normalized):
return True
# Check for not eligible indicators
for pattern in not_eligible_indicators:
if re.search(pattern, normalized):
return False
return None
def validate_eligibility_consistency(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Validation Node:
- Checks consistency between JSON 'eligible' field and recommendations text
- If mismatch detected, corrects the JSON field to match the text (text is authoritative)
- Logs any corrections made
Args:
patient_data: Patient data with parsed result
Returns:
Patient data with corrected eligibility if needed
"""
logger.info("π [PHASE 2] Eligibility Consistency Validation Node")
parsed_result = patient_data["parsed_result"]
json_eligible = parsed_result.get("eligible")
recommendations = parsed_result.get("recommendations", "")
# Extract eligibility from text
text_eligible = extract_eligibility_from_text(recommendations)
if text_eligible is None:
logger.warning(
"β οΈ Could not extract eligibility from recommendations text - using JSON value"
)
return patient_data
# Check for mismatch
if json_eligible != text_eligible:
logger.warning(f"β οΈ INCONSISTENCY DETECTED:")
logger.warning(f" JSON 'eligible': {json_eligible}")
logger.warning(f" Text decision: {text_eligible}")
logger.warning(
f" Correcting JSON to match text decision (text is authoritative)"
)
# Correct the JSON field to match the text
parsed_result["eligible"] = text_eligible
patient_data["parsed_result"] = parsed_result
logger.info(f"β Corrected eligibility: {text_eligible}")
else:
logger.info(f"β Eligibility consistent: {json_eligible}")
return patient_data
def normalize_recommendations(text: str) -> str:
"""
Normalize recommendations text - preserve intentional formatting.
- Replace escaped newlines with actual newlines
- Remove excessive blank lines (more than 2 consecutive)
- Ensure consistent spacing around section headers
- Trim leading/trailing whitespace
Args:
text: Raw recommendations string with escaped newlines
Returns:
Normalized recommendations string with proper formatting
"""
if not text:
return ""
# Replace escaped newlines with actual newlines
normalized = text.replace("\\n", "\n")
# Remove excessive blank lines (more than 2 consecutive)
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
# Ensure consistent spacing around section headers (** markers)
normalized = re.sub(r"\n\*\*", "\n\n**", normalized)
# Trim leading/trailing whitespace
normalized = normalized.strip()
# Soft cap length to avoid overly long outputs
max_len = 2500 # Increased from 1800 to accommodate comprehensive format
if len(normalized) > max_len:
normalized = normalized[:max_len].rstrip()
return normalized
def normalize_patient_summary(text: str) -> str:
"""
Normalize patient summary text.
- Replace escaped newlines with actual newlines
- Collapse excessive blank lines
- Trim surrounding whitespace
"""
if not text:
return ""
normalized = text.replace("\\n", "\n")
normalized = re.sub(r"\n{3,}", "\n\n", normalized)
normalized = normalized.strip()
max_len = 800
if len(normalized) > max_len:
normalized = normalized[:max_len].rstrip()
return normalized
# ============================================================================
# PHASE 1: DETERMINISTIC ELIGIBILITY & DATA PREPARATION
# ============================================================================
def validate_and_clean_input(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Validation & Cleaning Node:
- Enforces input schema
- Converts string DNA/ALT to numeric
- Handles missing data
Args:
patient_data: Raw patient data dictionary
Returns:
Cleaned and validated patient data
"""
logger.info("π [PHASE 1] Validation & Cleaning Node")
# Convert HBV DNA to numeric
hbv_dna = patient_data.get("hbv_dna_level", 0)
hbv_dna_numeric = hbv_dna
if isinstance(hbv_dna_numeric, str):
try:
cleaned = re.sub(r"[^\d\.]", "", hbv_dna_numeric)
hbv_dna_numeric = float(cleaned) if cleaned else 0.0
except Exception:
hbv_dna_numeric = 0.0
try:
hbv_dna_numeric = float(hbv_dna_numeric)
except (TypeError, ValueError):
hbv_dna_numeric = 0.0
patient_data["hbv_dna_level_numeric"] = hbv_dna_numeric
# Compute HBV DNA comparison
if hbv_dna_numeric > 2000:
hbv_dna_2000_comparison = ">"
elif hbv_dna_numeric < 2000:
hbv_dna_2000_comparison = "<"
else:
hbv_dna_2000_comparison = "="
patient_data["hbv_dna_2000_comparison"] = hbv_dna_2000_comparison
logger.info(
f"β HBV DNA normalized: {hbv_dna_numeric} {hbv_dna_2000_comparison} 2000 IU/mL"
)
return patient_data
def assemble_llm_prompt(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Prompt Assembly Node:
- Constructs the final, complete prompt for LLM
- LLM is solely responsible for eligibility determination
- Uses comprehensive yet concise format with visual indicators
Args:
patient_data: Cleaned patient data
Returns:
Patient data with assembled prompt
"""
logger.info("π [PHASE 1] Prompt Assembly Node")
hbv_dna_2000_comparison = patient_data.get("hbv_dna_2000_comparison", "N/A")
# Extract patient parameters
sex = patient_data.get("sex", "Male")
age = patient_data.get("age", "N/A")
pregnancy_status = patient_data.get("pregnancy_status", "N/A")
hbsag_status = patient_data.get("hbsag_status", "N/A")
duration_hbsag = patient_data.get("duration_hbsag_months", "N/A")
hbeag_status = patient_data.get("hbeag_status", "N/A")
alt_level = patient_data.get("alt_level", 0)
fibrosis_stage = patient_data.get("fibrosis_stage", "N/A")
necroinflammatory = patient_data.get("necroinflammatory_activity", "N/A")
extrahepatic = patient_data.get("extrahepatic_manifestations", False)
immunosuppression = patient_data.get("immunosuppression_status", "None")
coinfections = patient_data.get("coinfections", [])
family_history = patient_data.get("family_history_cirrhosis_hcc", False)
comorbidities = patient_data.get("other_comorbidities", [])
hbv_dna = patient_data.get("hbv_dna_level", 0)
# Check for special absolute indications
has_hiv = "HIV" in [c.upper() for c in coinfections] if coinfections else False
has_hcv = "HCV" in [c.upper() for c in coinfections] if coinfections else False
has_hdv = "HDV" in [c.upper() for c in coinfections] if coinfections else False
# Define strings with backslashes for f-string compatibility
hiv_critical_line = "- **CRITICAL: HIV coinfection present - absolute treatment indication**\\n"
hiv_absolute_indication = "- β
**HBV-HIV coinfection: ABSOLUTE INDICATION** [SASLT 2021, Page 9, Grade A]\\n"
no_hiv_line = "- β No HIV coinfection\\n"
hiv_rationale_line = "- If HIV coinfection: State this is an absolute Grade A indication that overrides all other criteria\\n"
hiv_treatment_section = "**HBV-HIV Coinfection Treatment (Grade A):**\\n- All HIV-positive patients with HBV coinfection should start ART immediately, irrespective of CD4 count [SASLT 2021, Page 9]\\n- Regimen MUST include TDF or TAF (preferably TAF for better renal/bone safety) [SASLT 2021, Page 9]\\n- Include Emtricitabine or Lamivudine as part of ART regimen\\n- Monitor for immune reconstitution syndrome (may cause HBV flare in first 3-6 months)\\n- HBV DNA and ALT at 3, 6, 12 months, then every 6-12 months\\n- HIV viral load every 3-6 months\\n- Annual HCC surveillance (ultrasound Β± AFP)\\n\\n"
hiv_final_recommendation = "- **Emphasize that HIV coinfection makes treatment mandatory regardless of other parameters**\\n"
# Build analysis prompt with mandatory eligibility decision tree
analysis_prompt = f"""You are an expert hepatologist providing HBV treatment eligibility assessments based on SASLT 2021 guidelines.
PATIENT DATA:
- Sex: {sex}
- Age: {age} years
- Pregnancy Status: {pregnancy_status}
- HBsAg Status: {hbsag_status}
- HBsAg Duration: {duration_hbsag} months
- HBV DNA Level: {hbv_dna} IU/mL ({hbv_dna_2000_comparison} 2000 IU/mL)
- HBeAg Status: {hbeag_status}
- ALT Level: {alt_level} IU/L
- Fibrosis Stage: {fibrosis_stage}
- Necroinflammatory Activity: {necroinflammatory}
- Extrahepatic Manifestations: {extrahepatic}
- Immunosuppression: {immunosuppression}
- Coinfections: {', '.join(coinfections) if coinfections else 'None'}
- Family History (Cirrhosis/HCC): {family_history}
- Other Comorbidities: {', '.join(comorbidities) if comorbidities else 'None'}
SASLT 2021 GUIDELINES REFERENCE:
{SASLT_GUIDELINES}
β οΈ MANDATORY ELIGIBILITY DECISION TREE - FOLLOW THIS EXACT SEQUENCE:
**STEP 1: Check ABSOLUTE INDICATIONS (these override ALL standard criteria):**
1a. **HBV-HIV Coinfection** [Page 123, Grade A]:
- Does patient have HIV coinfection? Check: {', '.join(coinfections) if coinfections else 'None'}
- If YES β **AUTOMATICALLY ELIGIBLE** (no other criteria needed)
- Rationale: "Patients with HBV-HIV coinfection are at increased risk of rapid fibrosis progression, development of HCC, and liver-related mortality"
- Treatment: TDF- or TAF-based ART regimen irrespective of CD4 count
1b. **Cirrhosis (F4)** [Page 120, Grade A]:
- Does patient have cirrhosis? Check: {fibrosis_stage}
- Does patient have ANY detectable HBV DNA? Check: {hbv_dna} IU/mL
- If BOTH YES β **AUTOMATICALLY ELIGIBLE**
1c. **Immunosuppression/Chemotherapy** [Page 123, Grade A]:
- Is patient undergoing immunosuppression? Check: {immunosuppression}
- Is HBsAg positive? Check: {hbsag_status}
- If BOTH YES β **AUTOMATICALLY ELIGIBLE** (prophylaxis required)
1d. **Pregnancy with High Viral Load** [Page 124, Grade D]:
- Is patient pregnant? Check: {pregnancy_status}
- Is HBV DNA > 100,000 IU/mL? Check: {hbv_dna} vs 100,000
- If BOTH YES β **AUTOMATICALLY ELIGIBLE**
β If ANY absolute indication is met, STOP HERE and return ELIGIBLE = true
**STEP 2: If NO absolute indications, check STANDARD CRITERIA:**
2a. **High Viral Load + High ALT** [Page 120, Grade B]:
- HBV DNA > 20,000 IU/mL? β {hbv_dna} vs 20,000 = {"YES β
" if hbv_dna > 20000 else "NO β"}
- ALT > 2ΓULN (80 IU/L)? β {alt_level} vs 80 = {"YES β
" if alt_level > 80 else "NO β"}
- If BOTH YES β ELIGIBLE (fibrosis stage irrelevant)
2b. **Standard Triple Criteria** [Page 120, Grade A]:
- HBV DNA > 2,000 IU/mL? β {hbv_dna} vs 2,000 = {"YES β
" if hbv_dna > 2000 else "NO β"}
- ALT > ULN (~40 IU/L)? β {alt_level} vs 40 = {"YES β
" if alt_level > 40 else "NO β"}
- F2+ OR A2+? β {fibrosis_stage} and {necroinflammatory} = [Check if F2+ OR A2+]
- If ALL THREE YES β ELIGIBLE
2c. **Moderate Fibrosis Exception** [Page 120]:
- HBV DNA > 2,000 IU/mL? β {hbv_dna} vs 2,000 = {"YES β
" if hbv_dna > 2000 else "NO β"}
- F2+ fibrosis? β {fibrosis_stage} = [Check if F2+]
- If BOTH YES β ELIGIBLE (even if ALT normal)
2d. **HBeAg Positive >30 years** [Page 120, Grade D]:
- HBeAg positive? β {hbeag_status}
- Age > 30? β {age} vs 30
- HBV DNA > 2,000 IU/mL? β {hbv_dna} vs 2,000
- If ALL THREE YES β ELIGIBLE
2e. **Family History** [Page 120, Grade D]:
- HBV DNA > 2,000 IU/mL? β {hbv_dna} vs 2,000 = {"YES β
" if hbv_dna > 2000 else "NO β"}
- ALT > ULN (~40 IU/L)? β {alt_level} vs 40 = {"YES β
" if alt_level > 40 else "NO β"}
- Family history HCC/cirrhosis? β {family_history}
- If ALL THREE YES β ELIGIBLE
**STEP 3: If NONE of the above criteria met:**
β **NOT ELIGIBLE**
β Patient requires monitoring per Page 121 guidelines
**CRITICAL RULES YOU MUST FOLLOW:**
1. β οΈ **HIV COINFECTION = AUTOMATIC ELIGIBILITY** - This overrides ALL other parameters including normal ALT, low HBV DNA, mild fibrosis
2. β οΈ **HBV DNA > 2,000 IU/mL is MANDATORY** for all standard criteria EXCEPT:
- Cirrhosis (needs only detectable HBV DNA)
- HIV coinfection (no HBV DNA threshold)
- Immunosuppression (no HBV DNA threshold)
3. **If HBV DNA β€ 2,000 IU/mL:**
- Check for cirrhosis, HIV, immunosuppression
- If none present β AUTOMATICALLY NOT ELIGIBLE
- Elevated ALT + moderate fibrosis is NOT sufficient without HBV DNA >2,000
4. **Direct quotes from guidelines must be cited with [Page X]**
5. **Never hallucinate criteria** - use ONLY what's explicitly stated in guidelines above
6. β οΈ **CRITICAL: CONSISTENCY REQUIREMENT** - The JSON "eligible" field MUST match the "Decision:" statement in your recommendations text:
- If you write "*Decision:* ELIGIBLE" in recommendations β JSON "eligible" MUST be true
- If you write "*Decision:* NOT ELIGIBLE" in recommendations β JSON "eligible" MUST be false
- These two fields MUST be perfectly consistent - any mismatch will be automatically corrected
RESPONSE FORMAT (JSON ONLY - NO MARKDOWN):
{{
"eligible": true or false,
"recommendations": "Start with clinical recommendations/decision, then discussion/rationale, and end with eligibility analysis. Use \\n for new lines. Do NOT include the patient summary here. Do NOT include editorial phrases like '(start with this section)', '(keep second)', or '(place last)'.",
"patient_summary": "3-5 bullet patient summary (age, sex, HBV DNA, ALT, fibrosis stage, immunosuppression, coinfections). Use \\n for new lines."
}}
STRUCTURE OF "recommendations" FIELD:
Use \\n for line breaks (NOT literal newlines). Format as follows:
**Clinical Recommendation**\\n
*Decision:* [ELIGIBLE/NOT ELIGIBLE]\\n
*Immediate Plan:*\\n
{hiv_treatment_section if has_hiv else ""}
- If eligible (standard criteria): Preferred agents ETV/TDF/TAF with brief dosing note [SASLT 2021, Page 8, Grade A]\\n
- If not eligible: State monitoring cadence (ALT q3-6mo, HBV DNA q6-12mo, fibrosis yearly) [SASLT 2021, Page 7]\\n
\\n
*Key Factors:*\\n
- 3-5 concise bullets of the main clinical considerations driving the decision\\n
\\n
**Discussion & Rationale**\\n
- Concise narrative explaining why the patient is (not) eligible with SASLT page citations\\n
- Highlight any caveats or follow-up steps\\n
- Explicitly remind that HIV coinfection is an absolute indication if present\\n
\\n
**Eligibility Analysis**\\n
\\n
*Absolute Indications Check (Priority):*\\n
{hiv_absolute_indication if has_hiv else ""}
{no_hiv_line if not has_hiv else ""}
- Cirrhosis (F4): [Check and mark β
or β]\\n
- Immunosuppression: [Check and mark β
or β]\\n
- Pregnancy with high viral load: [Check and mark β
or β]\\n
\\n
*Standard Criteria Assessment (if no absolute indications):*\\n
- HBV DNA >2000 IU/mL: [β
or β]\\n
- ALT >ULN (40 IU/L): [β
or β]\\n
- Moderate necroinflammation/fibrosis (F2+/A2+): [β
or β]\\n
\\n
*Special Considerations:*\\n
- Note any additional factors: family history, age >30, extrahepatic manifestations\\n
- Cite specific SASLT guideline provisions\\n
\\n
Do NOT include patient summary text in "recommendations". Place it only in "patient_summary".
Return ONLY the JSON object, nothing else."""
patient_data["llm_prompt"] = analysis_prompt
logger.info("β LLM prompt assembled")
if has_hiv:
logger.info("β οΈ HIV coinfection detected - absolute treatment indication")
return patient_data
# ============================================================================
# PHASE 2: LLM GENERATION AND POST-PROCESSING
# ============================================================================
def invoke_llm_for_assessment(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
LLM Generation Node (R-Node):
- Invokes the LLM with the assembled prompt
- Returns raw LLM text response
Args:
patient_data: Patient data with assembled prompt
Returns:
Patient data with raw LLM response
"""
logger.info("π€ [PHASE 2] LLM Generation Node")
llm = get_llm()
prompt = patient_data["llm_prompt"]
logger.info("Sending prompt to LLM...")
response = llm.invoke(prompt)
logger.info("LLM response received")
response_text = response.content if hasattr(response, "content") else str(response)
if isinstance(response_text, str):
response_text = response_text.strip()
patient_data["llm_response_raw"] = response_text
logger.info(f"β LLM response (first 200 chars): {response_text[:200]}...")
return patient_data
def parse_structured_output(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Structured Output Parser Node (P-Node):
- Expects a JSON code block and attempts to parse it
- Enforces Integrity: Overrides the eligible key with deterministic is_eligible if LLM deviated
Args:
patient_data: Patient data with raw LLM response
Returns:
Patient data with parsed JSON
"""
logger.info("π [PHASE 2] Structured Output Parser Node")
response_text = patient_data["llm_response_raw"]
try:
# Extract JSON from response (handle markdown code blocks)
json_start = response_text.find("{")
json_end = response_text.rfind("}") + 1
if json_start == -1 or json_end == 0:
raise ValueError("No JSON object found in response")
json_str = response_text[json_start:json_end].strip()
# Strip surrounding markdown fences if present
json_str = re.sub(r"^```(?:json)?\s*", "", json_str)
json_str = re.sub(r"\s*```$", "", json_str)
# Fix common LLM formatting issue: missing comma before patient summary field
json_str = re.sub(
r'("recommendations"\s*:\s*"(?:[^"\\]|\\.)*")\s*(?="patient[_ ]summary")',
r'\1, ',
json_str,
flags=re.S,
)
# Remove invisible Unicode separators
invisible_chars = ["\u200b", "\u200c", "\u200d", "\ufeff", "\xa0"]
for ch in invisible_chars:
json_str = json_str.replace(ch, "")
# Clean JSON string
cleaned_json_str = clean_json_string(json_str)
# Parse JSON
result = json.loads(cleaned_json_str)
logger.info("β Successfully parsed JSON response")
logger.info(f"β LLM determined eligibility: {result.get('eligible')}")
patient_data["parsed_result"] = result
return patient_data
except (json.JSONDecodeError, ValueError) as e:
logger.error(f"β Failed to parse LLM response as JSON: {e}")
logger.error(f"Response text: {response_text}")
raise ValueError(f"Failed to parse LLM response: {str(e)}")
def normalize_output(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Final Normalization Node:
- Executes normalize_recommendations on the parsed recommendations string
- Executes normalize_patient_summary on the parsed patient summary
- Returns final {eligible: bool, recommendations: str, patient_summary: str} dictionary
- LLM eligibility determination is final (no fallback)
Args:
patient_data: Patient data with parsed result
Returns:
Patient data with normalized recommendations
"""
logger.info("π [PHASE 2] Final Normalization Node")
parsed_result = patient_data["parsed_result"]
recommendations = parsed_result.get("recommendations", "")
patient_summary_raw = (
parsed_result.get("patient_summary")
or parsed_result.get("patient summary")
or ""
)
normalized_recs = normalize_recommendations(recommendations)
normalized_summary = normalize_patient_summary(patient_summary_raw)
assessment_result = {
"eligible": parsed_result.get("eligible"),
"recommendations": normalized_recs,
"patient_summary": normalized_summary,
}
patient_data["assessment_result"] = assessment_result
logger.info(
"β Output normalized: recommendations=%d chars, patient_summary=%d chars",
len(normalized_recs),
len(normalized_summary),
)
return patient_data
# ============================================================================
# CHAIN ASSEMBLY
# ============================================================================
def build_prompt_from_raw_text(raw_text: str) -> str:
"""
Construct the LLM prompt used when the user provides raw free-form text.
The prompt embeds the SASLT guidelines and forces the LLM to emit the
canonical JSON structure so downstream parsing succeeds.
"""
cleaned_text = raw_text.strip()
prompt = f"""You are an expert hepatologist. Read the free-form clinical note
below, infer the relevant HBV parameters, and determine treatment eligibility
STRICTLY according to SASLT 2021 guidelines.
RAW CLINICAL NOTE (verbatim):
\"\"\"{cleaned_text}\"\"\"
SASLT 2021 KEY EXCERPTS:
{SASLT_GUIDELINES}
REASONING & OUTPUT RULES:
1. First, internally extract the patient's HBV DNA, ALT, fibrosis, HBeAg, age,
pregnancy status, immunosuppression, coinfections, family history, and other
factors mentioned.
2. Apply the EXACT eligibility decision tree from the guidelines: absolute
indications first (HIV coinfection, cirrhosis, immunosuppression, pregnancy
with high viral load), then standard criteria (HBV DNA/ALT/fibrosis combos).
3. Return ONLY valid JSON (no markdown) using this schema:
{{
"eligible": true or false,
"recommendations": "Start with the clinical recommendation/decision, then the eligibility analysis, and end with a short discussion/rationale. Use \\n for new lines. Do NOT include the patient summary here.",
"patient_summary": "3-5 bullet patient summary (age, sex, HBV DNA, ALT, fibrosis, immunosuppression, coinfections). Use \\n for new lines."
}}
4. Recommendations order (must follow this sequence and DO NOT include editorial hints like '(start with this section)', '(keep second)', or '(place last)' in the output):
- Clinical Recommendation section FIRST with *Decision:* ELIGIBLE/NOT ELIGIBLE, immediate plan, and monitoring/treatment steps.
- Discussion/Rationale section SECOND with concise explanation and any caveats.
- Eligibility Analysis section LAST referencing specific SASLT criteria with citations.
5. The patient_summary field must only contain the patient summary text (no clinical recommendation content).
6. The JSON MUST be the only output. Do not include explanations outside the JSON.
Return the JSON object now."""
return prompt
def create_hbv_assessment_chain():
"""
Create the complete HBV Assessment LangChain Chain
LLM is solely responsible for eligibility determination based on SASLT 2021 guidelines
Returns:
Runnable chain that processes patient data end-to-end
"""
logger.info("π Building HBV Assessment Chain...")
# Phase 1: Input Validation & Preparation
# Phase 2: LLM-Based Eligibility Determination & Assessment
chain = (
RunnablePassthrough()
| RunnableLambda(validate_and_clean_input)
| RunnableLambda(assemble_llm_prompt)
| RunnableLambda(invoke_llm_for_assessment)
| RunnableLambda(parse_structured_output)
| RunnableLambda(validate_eligibility_consistency)
| RunnableLambda(normalize_output)
)
logger.info("β Chain built successfully")
return chain
def create_hbv_assessment_chain_from_prompt():
"""
Create an HBV Assessment Chain that starts from the LLM invocation step.
This variant assumes that:
- The caller already prepared the full prompt text in ``llm_prompt``
- No deterministic validation/normalization is required beforehand
It reuses the same Phase 2 post-processing steps so the final output
structure matches the standard assessment chain.
"""
logger.info("π Building HBV Assessment Chain (from prompt)...")
chain = (
RunnablePassthrough()
| RunnableLambda(invoke_llm_for_assessment)
| RunnableLambda(parse_structured_output)
| RunnableLambda(validate_eligibility_consistency)
| RunnableLambda(normalize_output)
)
logger.info("β Prompt-based chain built successfully")
return chain
def run_assessment_chain(patient_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Execute the HBV Assessment Chain
Args:
patient_data: Patient data dictionary
Returns:
Assessment result with eligible and recommendations
"""
logger.info("=" * 80)
logger.info("π STARTING HBV ASSESSMENT CHAIN")
logger.info("=" * 80)
try:
chain = create_hbv_assessment_chain()
result = chain.invoke(patient_data)
assessment = result["assessment_result"]
logger.info("=" * 80)
logger.info("β
CHAIN EXECUTION COMPLETE")
logger.info("=" * 80)
logger.info(f"Eligible: {assessment['eligible']}")
logger.info(
f"Recommendations length: {len(assessment['recommendations'])} characters"
)
logger.info("=" * 80)
return assessment
except Exception as e:
logger.error(f"β Chain execution failed: {str(e)}")
logger.error("=" * 80)
raise
def run_assessment_chain_from_prompt(prompt_text: str) -> Dict[str, Any]:
"""
Execute the HBV Assessment Chain starting from a raw LLM prompt.
This helper is intended for text-based assessment where the user provides
full free-text input and we feed it directly as the LLM prompt, skipping
all deterministic preprocessing nodes.
Args:
prompt_text: Full prompt text to send to the LLM.
Returns:
Assessment result with eligible and recommendations.
"""
logger.info("=" * 80)
logger.info("π STARTING HBV ASSESSMENT CHAIN (FROM PROMPT)")
logger.info("=" * 80)
try:
chain = create_hbv_assessment_chain_from_prompt()
# The downstream nodes expect a dict with ``llm_prompt`` key
initial_payload: Dict[str, Any] = {"llm_prompt": prompt_text}
result = chain.invoke(initial_payload)
assessment = result["assessment_result"]
logger.info("=" * 80)
logger.info("β
PROMPT-BASED CHAIN EXECUTION COMPLETE")
logger.info("=" * 80)
logger.info(f"Eligible: {assessment['eligible']}")
logger.info(
f"Recommendations length: {len(assessment['recommendations'])} characters"
)
logger.info("=" * 80)
return assessment
except Exception as e:
logger.error(f"β Prompt-based chain execution failed: {str(e)}")
logger.error("=" * 80)
raise
|