Spaces:

moazx
/

HBV_AI_Assistant

Sleeping

File size: 40,392 Bytes

"""
LangChain Chain Implementation for HBV Assessment
Implements hybrid approach: Deterministic Logic (Phase 1) + LLM Generation (Phase 2)
"""

import logging
import json
import re
from typing import Dict, Any
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate
from .config import get_llm

logger = logging.getLogger(__name__)


def clean_json_string(json_str: str) -> str:
    """
    Clean a JSON string by properly escaping control characters within string values.
    This handles cases where LLMs generate JSON with literal newlines, tabs, etc.

    Args:
        json_str: Raw JSON string that may contain unescaped control characters

    Returns:
        Cleaned JSON string with properly escaped control characters
    """
    # First, try to identify string values in the JSON and escape control characters within them
    # We need to be careful not to break the JSON structure itself

    # Replace common control characters that appear in string values
    # but preserve the JSON structure (newlines between key-value pairs are OK)

    # Strategy: Parse character by character, track if we're inside a string value
    result = []
    in_string = False
    escape_next = False

    for i, char in enumerate(json_str):
        if escape_next:
            result.append(char)
            escape_next = False
            continue

        if char == "\\":
            result.append(char)
            escape_next = True
            continue

        if char == '"':
            in_string = not in_string
            result.append(char)
            continue

        # If we're inside a string value, escape control characters
        if in_string:
            if char == "\n":
                result.append("\\n")
            elif char == "\r":
                result.append("\\r")
            elif char == "\t":
                result.append("\\t")
            elif char == "\b":
                result.append("\\b")
            elif char == "\f":
                result.append("\\f")
            elif ord(char) < 32:  # Other control characters
                result.append(f"\\u{ord(char):04x}")
            else:
                result.append(char)
        else:
            result.append(char)

    return "".join(result)


# SASLT 2021 Guidelines - Extracted directly from official PDF
SASLT_GUIDELINES = """
===== SASLT 2021 GUIDELINES: TREATMENT & MANAGEMENT =====
[Extracted from: SASLT practice guidelines for the management of Hepatitis B virus – An update, 
Saudi J Gastroenterol 2021;27:115-26]

### 1. TREATMENT INDICATIONS [SASLT 2021, Page 6]

**RECOMMENDATIONS FOR INITIATION OF TREATMENT:**

- All patients with chronic hepatitis B (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and/or at least moderate liver necroinflammation or fibrosis (Grade A) [Page 6]

- Patients with cirrhosis (compensated or decompensated), with any detectable HBV DNA level and regardless of ALT levels (Grade A) [Page 6]

- Patients with HBV DNA > 20,000 IU/mL and ALT > 2xULN, regardless of the degree of fibrosis (Grade B) [Page 6]

- Patients with HBeAg-positive chronic HBV infection (persistently normal ALT and high HBV DNA levels) may be treated if they are > 30 years, regardless of the severity of liver histological lesions (Grade D) [Page 6]

- Patients with chronic HBV infection (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and a family history of HCC or cirrhosis and extrahepatic manifestations (Grade D) [Page 6]

**DETAILED TREATMENT CRITERIA [Page 6]:**

Non‑cirrhotic patients should be considered for treatment if they have HBV DNA levels >2,000 IU/mL, serum ALT >~40 IU/L and severity of liver disease assessed by liver biopsy showing at least moderate necroinflammation and/or at least moderate fibrosis.

Patients with HBV DNA greater than 20,000 IU/mL and ALT greater than 2x ULN can begin treatment without a liver biopsy.

Patients with HBV DNA >2,000 IU/mL and at least moderate fibrosis may initiate treatment even if ALT levels are normal.

Treatment indications should also take into account patient's age, health status, risk of HBV transmission, family history of HCC or cirrhosis and extrahepatic manifestations.

**CRITICAL INTERPRETATION:**
- HBV DNA > 2,000 IU/mL is REQUIRED for all standard treatment criteria
- Exception: Cirrhosis (F4) requires only "any detectable HBV DNA level"
- Exception: Special populations (HIV coinfection, immunosuppression, pregnancy) have different thresholds


### 2. MONITORING OF UNTREATED PATIENTS [SASLT 2021, Page 6-7]

- Patients with HBeAg-positive chronic HBV infection who are younger than 30 years should be followed at least every 3-6 months (Grade B) [Page 7]

- Patients with HBeAg-negative chronic HBV infection and serum HBV DNA <2,000 IU/ml should be followed every 6-12 months (Grade B) [Page 7]

- Patients with HBeAg-negative chronic HBV infection and serum HBV DNA ≥2,000 IU/ml should be followed every 3 months for the first year and thereafter every 6 months (Grade D) [Page 7]


### 3. TREATMENT OF CHRONIC HEPATITIS B [SASLT 2021, Page 8]

**RECOMMENDATIONS:**

- The treatment of choice is the long-term administration of a potent NA with a high barrier to resistance, regardless of the severity of liver disease (Grade A) [Page 8]

- Preferred regimens are ETV, TDF and TAF as monotherapies (Grade A) [Page 8]

- LAM, ADV and TBV are not recommended in the treatment of CHB (Grade A) [Page 8]

**ABOUT TAF vs TDF [Page 8]:**

TAF has demonstrated superior renal and bone density safety profiles compared with TDF in head-to-head trials. International guidelines recommend switching individuals at high risk for bone or renal disease from TDF to either TAF or ETV. TAF maintains a better safety profile unless the patient's creatinine clearance (CrCl) is less than 15 mL/minute.


### 4. HBV-HCV COINFECTION [SASLT 2021, Page 8-9]

**RECOMMENDATIONS:**

- Treatment of HCV through DAAs may lead to reactivation of HBV. Patients who meet the criteria for HBV treatment should be treated concurrently or before initiation of DAA (Grade A) [Page 9]

- HBV DNA and ALT should be monitored every four to eight weeks while on DAA and three months after completion of therapy (Grade D) [Page 9]

- ALT level should be monitored every four weeks while on DAA for patients who are HBsAg-negative but HBcAb-positive. If ALT starts to rise, HBsAg and HBV DNA must be obtained to determine the need to start HBV treatment (Grade D) [Page 9]


### 5. HBV-HIV COINFECTION [SASLT 2021, Page 9] ⚠️ ABSOLUTE TREATMENT INDICATION

**CRITICAL: This is an ABSOLUTE indication for treatment regardless of ALT, HBV DNA level, fibrosis stage, or necroinflammatory activity.**

"Patients with HBV‑HIV coinfection are at increased risk of rapid fibrosis progression, development of HCC, and liver‑related mortality." [Page 9]

"The prevalence of HBV in patients with HIV coinfection in Saudi Arabia is 3%, which is much higher than the general population." [Page 9]

"All patients with HBV‑HIV coinfection should receive antiretroviral therapy (ART)." [Page 9]

"Patients must be followed closely after initiation of ART, given the risk of immune reconstitution syndrome, which may lead to HBV flare." [Page 9]

"The regimen must include tenofovir with either formulation TDF or TAF. TAF has a better safety profile and is preferred over TDF unless the patient has CrCl < 15 mL/minute. Emtricitabine and LAM should be included in the ART regimen." [Page 9]

**RECOMMENDATIONS:**

- All HIV-positive patients with HBV co-infection should start ART irrespective of CD4 cell count (Grade A) [Page 9]

- HBV-HIV co-infected patients should be treated with TDF- or TAF-based ART regimen (Grade A) [Page 9]


### 6. IMMUNOCOMPROMISED PATIENTS [SASLT 2021, Page 9] ⚠️ ABSOLUTE TREATMENT INDICATION

"Hepatitis B flare during chemotherapy treatment or treatment with other immunosuppressive agents is potentially life threatening. The risk is very high, particularly with the use of CD20 depleting agents." [Page 9]

"Therefore, all patients undergoing immunosuppressive treatment or chemotherapy, even short‑term courses, should be screened for HBsAg, anti‑HBc, and anti‑HBs (and HBV DNA, if HBsAg is already positive)." [Page 9]

**RECOMMENDATIONS:**

- Prophylaxis for all patients with positive HBsAg should be done before initiating chemotherapy or other immunosuppressive agents (Grade A) [Page 9]

- HBsAg-negative/anti-HBc-positive patients should undergo HBV prophylaxis if they are candidates for anti CD20 or are undergoing stem cell transplantation. HBV prophylaxis should continue for at least six months after completion of immunosuppressive treatment and for twelve months if taking anti CD20 (Grade D) [Page 9]

- We recommend starting HBV prophylaxis for HBsAg or anti‑HBc positive patients undergoing treatment with tumor necrosis factor (TNF) inhibitors [Page 9]

- We recommend HBV prophylaxis for all patients who are HBsAg or anti-HBc positive before initiation of immunotherapy such as anti‑programmed cell death (PD‑1) and anti‑programmed cell death‑ligand 1 (PD‑L1) therapy [Page 9]


### 7. HBV AND PREGNANCY [SASLT 2021, Page 9-10]

"The most effective way to prevent mother‑to‑child transmission is to detect HBV early in pregnancy. Therefore, all pregnant women must be screened for HBV during the first trimester." [Page 9]

"Pregnant women should be treated if they meet the standard indication of therapy. We recommend HBV treatment if HBV DNA is greater than 100,000 IU/mL in the late second trimester (between 24‑28 weeks of gestation)." [Page 9]

"TDF is the drug of choice during pregnancy. However, more recently, a multi‑center experience from China reported no mother‑to‑child transmission or developmental anomalies in 71 infants born to mothers who received TAF during the last trimester of pregnancy." [Page 9]

**RECOMMENDATIONS:**

- All pregnant women must be screened for HBV during the first trimester (Grade A) [Page 10]

- All pregnant women with HBV DNA greater than 100,000 IU/mL in the late second trimester (between 24-28 weeks of gestation) should start antiviral prophylaxis with TDF, or TAF as an alternative (Grade D) [Page 10]

- Switch to TDF or TAF is recommended if the patient is receiving ETV, ADV, or interferon during pregnancy (Grade D) [Page 10]

- Breastfeeding is not contraindicated in HBsAg-positive untreated women or on TDF-based treatment or prophylaxis (Grade B) [Page 10]


### KEY DEFINITIONS [From Table 2, Page 3 and text]

**ALT (Alanine Aminotransferase):**
- Upper Limit of Normal (ULN) = ~40 IU/L [Page 6]
- 2×ULN = ~80 IU/L

**Necroinflammatory Activity Grades:**
- A1 = mild
- A2 = moderate  
- A3 = severe

**Liver Fibrosis Stages:**
- F0 = no fibrosis
- F1 = mild fibrosis, pericellular collagen deposits
- F2 = moderate fibrosis, beginning bridging fibrosis
- F3 = severe fibrosis, defined as presence of numerous bridges and septa
- F4 = cirrhosis

**HBV DNA Thresholds [From Table 2, Page 3]:**
- Phase 3 (Inactive carrier): <2,000 IU/mL
- Phase 4 (HBeAg-negative chronic hepatitis): >2,000 IU/mL (fluctuating levels)
- Phase 1 (Immune tolerant): >10^7 IU/mL (very high)
"""


def extract_eligibility_from_text(recommendations: str) -> bool:
    """
    Extract eligibility decision from recommendations text.
    Looks for patterns like "Decision: ELIGIBLE" or "Decision: NOT ELIGIBLE"

    Args:
        recommendations: Recommendations text string

    Returns:
        True if text indicates ELIGIBLE, False if NOT ELIGIBLE, None if ambiguous
    """
    if not recommendations:
        return None

    # Normalize text for searching (case-insensitive, handle escaped newlines)
    normalized = recommendations.replace("\\n", "\n").upper()

    # Look for explicit decision statements
    # Pattern 1: "*Decision:* ELIGIBLE" or "*Decision:* NOT ELIGIBLE"
    decision_match = re.search(r"\*DECISION:\*\s*(ELIGIBLE|NOT\s+ELIGIBLE)", normalized)
    if decision_match:
        decision = decision_match.group(1)
        if "NOT" in decision:
            return False
        return True

    # Pattern 2: "Decision: ELIGIBLE" or "Decision: NOT ELIGIBLE" (without asterisks)
    decision_match = re.search(r"DECISION:\s*(ELIGIBLE|NOT\s+ELIGIBLE)", normalized)
    if decision_match:
        decision = decision_match.group(1)
        if "NOT" in decision:
            return False
        return True

    # Pattern 3: Look for strong indicators in rationale
    # If text says "patient is eligible" or "treatment is recommended" with strong language
    eligible_indicators = [
        r"PATIENT\s+IS\s+ELIGIBLE",
        r"TREATMENT\s+IS\s+RECOMMENDED",
        r"ABSOLUTE\s+INDICATION",
        r"AUTOMATICALLY\s+ELIGIBLE",
        r"REQUIRES\s+TREATMENT",
        r"SHOULD\s+RECEIVE\s+TREATMENT",
        r"PROPHYLAXIS\s+IS\s+REQUIRED",
    ]

    not_eligible_indicators = [
        r"PATIENT\s+IS\s+NOT\s+ELIGIBLE",
        r"NOT\s+ELIGIBLE",
        r"DOES\s+NOT\s+MEET\s+CRITERIA",
        r"REQUIRES\s+MONITORING\s+ONLY",
    ]

    # Check for eligible indicators
    for pattern in eligible_indicators:
        if re.search(pattern, normalized):
            return True

    # Check for not eligible indicators
    for pattern in not_eligible_indicators:
        if re.search(pattern, normalized):
            return False

    return None


def validate_eligibility_consistency(patient_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Validation Node:
    - Checks consistency between JSON 'eligible' field and recommendations text
    - If mismatch detected, corrects the JSON field to match the text (text is authoritative)
    - Logs any corrections made

    Args:
        patient_data: Patient data with parsed result

    Returns:
        Patient data with corrected eligibility if needed
    """
    logger.info("🔍 [PHASE 2] Eligibility Consistency Validation Node")

    parsed_result = patient_data["parsed_result"]
    json_eligible = parsed_result.get("eligible")
    recommendations = parsed_result.get("recommendations", "")

    # Extract eligibility from text
    text_eligible = extract_eligibility_from_text(recommendations)

    if text_eligible is None:
        logger.warning(
            "⚠️ Could not extract eligibility from recommendations text - using JSON value"
        )
        return patient_data

    # Check for mismatch
    if json_eligible != text_eligible:
        logger.warning(f"⚠️ INCONSISTENCY DETECTED:")
        logger.warning(f"   JSON 'eligible': {json_eligible}")
        logger.warning(f"   Text decision: {text_eligible}")
        logger.warning(
            f"   Correcting JSON to match text decision (text is authoritative)"
        )

        # Correct the JSON field to match the text
        parsed_result["eligible"] = text_eligible
        patient_data["parsed_result"] = parsed_result

        logger.info(f"✓ Corrected eligibility: {text_eligible}")
    else:
        logger.info(f"✓ Eligibility consistent: {json_eligible}")

    return patient_data


def normalize_recommendations(text: str) -> str:
    """
    Normalize recommendations text - preserve intentional formatting.
    - Replace escaped newlines with actual newlines
    - Remove excessive blank lines (more than 2 consecutive)
    - Ensure consistent spacing around section headers
    - Trim leading/trailing whitespace

    Args:
        text: Raw recommendations string with escaped newlines

    Returns:
        Normalized recommendations string with proper formatting
    """
    if not text:
        return ""

    # Replace escaped newlines with actual newlines
    normalized = text.replace("\\n", "\n")

    # Remove excessive blank lines (more than 2 consecutive)
    normalized = re.sub(r"\n{3,}", "\n\n", normalized)

    # Ensure consistent spacing around section headers (** markers)
    normalized = re.sub(r"\n\*\*", "\n\n**", normalized)

    # Trim leading/trailing whitespace
    normalized = normalized.strip()

    # Soft cap length to avoid overly long outputs
    max_len = 2500  # Increased from 1800 to accommodate comprehensive format
    if len(normalized) > max_len:
        normalized = normalized[:max_len].rstrip()

    return normalized


def normalize_patient_summary(text: str) -> str:
    """
    Normalize patient summary text.
    - Replace escaped newlines with actual newlines
    - Collapse excessive blank lines
    - Trim surrounding whitespace
    """
    if not text:
        return ""

    normalized = text.replace("\\n", "\n")
    normalized = re.sub(r"\n{3,}", "\n\n", normalized)
    normalized = normalized.strip()

    max_len = 800
    if len(normalized) > max_len:
        normalized = normalized[:max_len].rstrip()

    return normalized


# ============================================================================
# PHASE 1: DETERMINISTIC ELIGIBILITY & DATA PREPARATION
# ============================================================================


def validate_and_clean_input(patient_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Validation & Cleaning Node:
    - Enforces input schema
    - Converts string DNA/ALT to numeric
    - Handles missing data

    Args:
        patient_data: Raw patient data dictionary

    Returns:
        Cleaned and validated patient data
    """
    logger.info("🔍 [PHASE 1] Validation & Cleaning Node")

    # Convert HBV DNA to numeric
    hbv_dna = patient_data.get("hbv_dna_level", 0)
    hbv_dna_numeric = hbv_dna

    if isinstance(hbv_dna_numeric, str):
        try:
            cleaned = re.sub(r"[^\d\.]", "", hbv_dna_numeric)
            hbv_dna_numeric = float(cleaned) if cleaned else 0.0
        except Exception:
            hbv_dna_numeric = 0.0

    try:
        hbv_dna_numeric = float(hbv_dna_numeric)
    except (TypeError, ValueError):
        hbv_dna_numeric = 0.0

    patient_data["hbv_dna_level_numeric"] = hbv_dna_numeric

    # Compute HBV DNA comparison
    if hbv_dna_numeric > 2000:
        hbv_dna_2000_comparison = ">"
    elif hbv_dna_numeric < 2000:
        hbv_dna_2000_comparison = "<"
    else:
        hbv_dna_2000_comparison = "="

    patient_data["hbv_dna_2000_comparison"] = hbv_dna_2000_comparison
    logger.info(
        f"✓ HBV DNA normalized: {hbv_dna_numeric} {hbv_dna_2000_comparison} 2000 IU/mL"
    )

    return patient_data


def assemble_llm_prompt(patient_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Prompt Assembly Node:
    - Constructs the final, complete prompt for LLM
    - LLM is solely responsible for eligibility determination
    - Uses comprehensive yet concise format with visual indicators

    Args:
        patient_data: Cleaned patient data

    Returns:
        Patient data with assembled prompt
    """
    logger.info("🔍 [PHASE 1] Prompt Assembly Node")

    hbv_dna_2000_comparison = patient_data.get("hbv_dna_2000_comparison", "N/A")

    # Extract patient parameters
    sex = patient_data.get("sex", "Male")
    age = patient_data.get("age", "N/A")
    pregnancy_status = patient_data.get("pregnancy_status", "N/A")
    hbsag_status = patient_data.get("hbsag_status", "N/A")
    duration_hbsag = patient_data.get("duration_hbsag_months", "N/A")
    hbeag_status = patient_data.get("hbeag_status", "N/A")
    alt_level = patient_data.get("alt_level", 0)
    fibrosis_stage = patient_data.get("fibrosis_stage", "N/A")
    necroinflammatory = patient_data.get("necroinflammatory_activity", "N/A")
    extrahepatic = patient_data.get("extrahepatic_manifestations", False)
    immunosuppression = patient_data.get("immunosuppression_status", "None")
    coinfections = patient_data.get("coinfections", [])
    family_history = patient_data.get("family_history_cirrhosis_hcc", False)
    comorbidities = patient_data.get("other_comorbidities", [])
    hbv_dna = patient_data.get("hbv_dna_level", 0)

    # Check for special absolute indications
    has_hiv = "HIV" in [c.upper() for c in coinfections] if coinfections else False
    has_hcv = "HCV" in [c.upper() for c in coinfections] if coinfections else False
    has_hdv = "HDV" in [c.upper() for c in coinfections] if coinfections else False

    # Define strings with backslashes for f-string compatibility
    hiv_critical_line = "- **CRITICAL: HIV coinfection present - absolute treatment indication**\\n"
    hiv_absolute_indication = "- ✅ **HBV-HIV coinfection: ABSOLUTE INDICATION** [SASLT 2021, Page 9, Grade A]\\n"
    no_hiv_line = "- ❌ No HIV coinfection\\n"
    hiv_rationale_line = "- If HIV coinfection: State this is an absolute Grade A indication that overrides all other criteria\\n"
    hiv_treatment_section = "**HBV-HIV Coinfection Treatment (Grade A):**\\n- All HIV-positive patients with HBV coinfection should start ART immediately, irrespective of CD4 count [SASLT 2021, Page 9]\\n- Regimen MUST include TDF or TAF (preferably TAF for better renal/bone safety) [SASLT 2021, Page 9]\\n- Include Emtricitabine or Lamivudine as part of ART regimen\\n- Monitor for immune reconstitution syndrome (may cause HBV flare in first 3-6 months)\\n- HBV DNA and ALT at 3, 6, 12 months, then every 6-12 months\\n- HIV viral load every 3-6 months\\n- Annual HCC surveillance (ultrasound ± AFP)\\n\\n"
    hiv_final_recommendation = "- **Emphasize that HIV coinfection makes treatment mandatory regardless of other parameters**\\n"

    # Build analysis prompt with mandatory eligibility decision tree
    analysis_prompt = f"""You are an expert hepatologist providing HBV treatment eligibility assessments based on SASLT 2021 guidelines.

PATIENT DATA:
- Sex: {sex}
- Age: {age} years
- Pregnancy Status: {pregnancy_status}
- HBsAg Status: {hbsag_status}
- HBsAg Duration: {duration_hbsag} months
- HBV DNA Level: {hbv_dna} IU/mL ({hbv_dna_2000_comparison} 2000 IU/mL)
- HBeAg Status: {hbeag_status}
- ALT Level: {alt_level} IU/L
- Fibrosis Stage: {fibrosis_stage}
- Necroinflammatory Activity: {necroinflammatory}
- Extrahepatic Manifestations: {extrahepatic}
- Immunosuppression: {immunosuppression}
- Coinfections: {', '.join(coinfections) if coinfections else 'None'}
- Family History (Cirrhosis/HCC): {family_history}
- Other Comorbidities: {', '.join(comorbidities) if comorbidities else 'None'}

SASLT 2021 GUIDELINES REFERENCE:
{SASLT_GUIDELINES}

⚠️ MANDATORY ELIGIBILITY DECISION TREE - FOLLOW THIS EXACT SEQUENCE:

**STEP 1: Check ABSOLUTE INDICATIONS (these override ALL standard criteria):**

1a. **HBV-HIV Coinfection** [Page 123, Grade A]:
   - Does patient have HIV coinfection? Check: {', '.join(coinfections) if coinfections else 'None'}
   - If YES → **AUTOMATICALLY ELIGIBLE** (no other criteria needed)
   - Rationale: "Patients with HBV-HIV coinfection are at increased risk of rapid fibrosis progression, development of HCC, and liver-related mortality"
   - Treatment: TDF- or TAF-based ART regimen irrespective of CD4 count

1b. **Cirrhosis (F4)** [Page 120, Grade A]:
   - Does patient have cirrhosis? Check: {fibrosis_stage}
   - Does patient have ANY detectable HBV DNA? Check: {hbv_dna} IU/mL
   - If BOTH YES → **AUTOMATICALLY ELIGIBLE**

1c. **Immunosuppression/Chemotherapy** [Page 123, Grade A]:
   - Is patient undergoing immunosuppression? Check: {immunosuppression}
   - Is HBsAg positive? Check: {hbsag_status}
   - If BOTH YES → **AUTOMATICALLY ELIGIBLE** (prophylaxis required)

1d. **Pregnancy with High Viral Load** [Page 124, Grade D]:
   - Is patient pregnant? Check: {pregnancy_status}
   - Is HBV DNA > 100,000 IU/mL? Check: {hbv_dna} vs 100,000
   - If BOTH YES → **AUTOMATICALLY ELIGIBLE**

→ If ANY absolute indication is met, STOP HERE and return ELIGIBLE = true


**STEP 2: If NO absolute indications, check STANDARD CRITERIA:**

2a. **High Viral Load + High ALT** [Page 120, Grade B]:
   - HBV DNA > 20,000 IU/mL? → {hbv_dna} vs 20,000 = {"YES ✅" if hbv_dna > 20000 else "NO ❌"}
   - ALT > 2×ULN (80 IU/L)? → {alt_level} vs 80 = {"YES ✅" if alt_level > 80 else "NO ❌"}
   - If BOTH YES → ELIGIBLE (fibrosis stage irrelevant)

2b. **Standard Triple Criteria** [Page 120, Grade A]:
   - HBV DNA > 2,000 IU/mL? → {hbv_dna} vs 2,000 = {"YES ✅" if hbv_dna > 2000 else "NO ❌"}
   - ALT > ULN (~40 IU/L)? → {alt_level} vs 40 = {"YES ✅" if alt_level > 40 else "NO ❌"}
   - F2+ OR A2+? → {fibrosis_stage} and {necroinflammatory} = [Check if F2+ OR A2+]
   - If ALL THREE YES → ELIGIBLE

2c. **Moderate Fibrosis Exception** [Page 120]:
   - HBV DNA > 2,000 IU/mL? → {hbv_dna} vs 2,000 = {"YES ✅" if hbv_dna > 2000 else "NO ❌"}
   - F2+ fibrosis? → {fibrosis_stage} = [Check if F2+]
   - If BOTH YES → ELIGIBLE (even if ALT normal)

2d. **HBeAg Positive >30 years** [Page 120, Grade D]:
   - HBeAg positive? → {hbeag_status}
   - Age > 30? → {age} vs 30
   - HBV DNA > 2,000 IU/mL? → {hbv_dna} vs 2,000
   - If ALL THREE YES → ELIGIBLE

2e. **Family History** [Page 120, Grade D]:
   - HBV DNA > 2,000 IU/mL? → {hbv_dna} vs 2,000 = {"YES ✅" if hbv_dna > 2000 else "NO ❌"}
   - ALT > ULN (~40 IU/L)? → {alt_level} vs 40 = {"YES ✅" if alt_level > 40 else "NO ❌"}
   - Family history HCC/cirrhosis? → {family_history}
   - If ALL THREE YES → ELIGIBLE


**STEP 3: If NONE of the above criteria met:**
→ **NOT ELIGIBLE**
→ Patient requires monitoring per Page 121 guidelines


**CRITICAL RULES YOU MUST FOLLOW:**

1. ⚠️ **HIV COINFECTION = AUTOMATIC ELIGIBILITY** - This overrides ALL other parameters including normal ALT, low HBV DNA, mild fibrosis

2. ⚠️ **HBV DNA > 2,000 IU/mL is MANDATORY** for all standard criteria EXCEPT:
   - Cirrhosis (needs only detectable HBV DNA)
   - HIV coinfection (no HBV DNA threshold)
   - Immunosuppression (no HBV DNA threshold)

3. **If HBV DNA ≤ 2,000 IU/mL:**
   - Check for cirrhosis, HIV, immunosuppression
   - If none present → AUTOMATICALLY NOT ELIGIBLE
   - Elevated ALT + moderate fibrosis is NOT sufficient without HBV DNA >2,000

4. **Direct quotes from guidelines must be cited with [Page X]**

5. **Never hallucinate criteria** - use ONLY what's explicitly stated in guidelines above

6. ⚠️ **CRITICAL: CONSISTENCY REQUIREMENT** - The JSON "eligible" field MUST match the "Decision:" statement in your recommendations text:
   - If you write "*Decision:* ELIGIBLE" in recommendations → JSON "eligible" MUST be true
   - If you write "*Decision:* NOT ELIGIBLE" in recommendations → JSON "eligible" MUST be false
   - These two fields MUST be perfectly consistent - any mismatch will be automatically corrected


RESPONSE FORMAT (JSON ONLY - NO MARKDOWN):
{{
  "eligible": true or false,
  "recommendations": "Start with clinical recommendations/decision, then discussion/rationale, and end with eligibility analysis. Use \\n for new lines. Do NOT include the patient summary here. Do NOT include editorial phrases like '(start with this section)', '(keep second)', or '(place last)'.",
  "patient_summary": "3-5 bullet patient summary (age, sex, HBV DNA, ALT, fibrosis stage, immunosuppression, coinfections). Use \\n for new lines."
}}

STRUCTURE OF "recommendations" FIELD:
Use \\n for line breaks (NOT literal newlines). Format as follows:

**Clinical Recommendation**\\n
*Decision:* [ELIGIBLE/NOT ELIGIBLE]\\n
*Immediate Plan:*\\n
{hiv_treatment_section if has_hiv else ""}
- If eligible (standard criteria): Preferred agents ETV/TDF/TAF with brief dosing note [SASLT 2021, Page 8, Grade A]\\n
- If not eligible: State monitoring cadence (ALT q3-6mo, HBV DNA q6-12mo, fibrosis yearly) [SASLT 2021, Page 7]\\n
\\n
*Key Factors:*\\n
- 3-5 concise bullets of the main clinical considerations driving the decision\\n
\\n
**Discussion & Rationale**\\n
- Concise narrative explaining why the patient is (not) eligible with SASLT page citations\\n
- Highlight any caveats or follow-up steps\\n
- Explicitly remind that HIV coinfection is an absolute indication if present\\n
\\n
**Eligibility Analysis**\\n
\\n
*Absolute Indications Check (Priority):*\\n
{hiv_absolute_indication if has_hiv else ""}
{no_hiv_line if not has_hiv else ""}
- Cirrhosis (F4): [Check and mark ✅ or ❌]\\n
- Immunosuppression: [Check and mark ✅ or ❌]\\n
- Pregnancy with high viral load: [Check and mark ✅ or ❌]\\n
\\n
*Standard Criteria Assessment (if no absolute indications):*\\n
- HBV DNA >2000 IU/mL: [✅ or ❌]\\n
- ALT >ULN (40 IU/L): [✅ or ❌]\\n
- Moderate necroinflammation/fibrosis (F2+/A2+): [✅ or ❌]\\n
\\n
*Special Considerations:*\\n
- Note any additional factors: family history, age >30, extrahepatic manifestations\\n
- Cite specific SASLT guideline provisions\\n
\\n
Do NOT include patient summary text in "recommendations". Place it only in "patient_summary".

Return ONLY the JSON object, nothing else."""

    patient_data["llm_prompt"] = analysis_prompt
    logger.info("✓ LLM prompt assembled")
    if has_hiv:
        logger.info("⚠️ HIV coinfection detected - absolute treatment indication")

    return patient_data


# ============================================================================
# PHASE 2: LLM GENERATION AND POST-PROCESSING
# ============================================================================


def invoke_llm_for_assessment(patient_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    LLM Generation Node (R-Node):
    - Invokes the LLM with the assembled prompt
    - Returns raw LLM text response

    Args:
        patient_data: Patient data with assembled prompt

    Returns:
        Patient data with raw LLM response
    """
    logger.info("🤖 [PHASE 2] LLM Generation Node")

    llm = get_llm()
    prompt = patient_data["llm_prompt"]

    logger.info("Sending prompt to LLM...")
    response = llm.invoke(prompt)
    logger.info("LLM response received")

    response_text = response.content if hasattr(response, "content") else str(response)
    if isinstance(response_text, str):
        response_text = response_text.strip()

    patient_data["llm_response_raw"] = response_text

    logger.info(f"✓ LLM response (first 200 chars): {response_text[:200]}...")

    return patient_data


def parse_structured_output(patient_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Structured Output Parser Node (P-Node):
    - Expects a JSON code block and attempts to parse it
    - Enforces Integrity: Overrides the eligible key with deterministic is_eligible if LLM deviated

    Args:
        patient_data: Patient data with raw LLM response

    Returns:
        Patient data with parsed JSON
    """
    logger.info("🔍 [PHASE 2] Structured Output Parser Node")

    response_text = patient_data["llm_response_raw"]

    try:
        # Extract JSON from response (handle markdown code blocks)
        json_start = response_text.find("{")
        json_end = response_text.rfind("}") + 1

        if json_start == -1 or json_end == 0:
            raise ValueError("No JSON object found in response")

        json_str = response_text[json_start:json_end].strip()

        # Strip surrounding markdown fences if present
        json_str = re.sub(r"^```(?:json)?\s*", "", json_str)
        json_str = re.sub(r"\s*```$", "", json_str)

        # Fix common LLM formatting issue: missing comma before patient summary field
        json_str = re.sub(
            r'("recommendations"\s*:\s*"(?:[^"\\]|\\.)*")\s*(?="patient[_ ]summary")',
            r'\1, ',
            json_str,
            flags=re.S,
        )

        # Remove invisible Unicode separators
        invisible_chars = ["\u200b", "\u200c", "\u200d", "\ufeff", "\xa0"]
        for ch in invisible_chars:
            json_str = json_str.replace(ch, "")

        # Clean JSON string
        cleaned_json_str = clean_json_string(json_str)

        # Parse JSON
        result = json.loads(cleaned_json_str)
        logger.info("✓ Successfully parsed JSON response")
        logger.info(f"✓ LLM determined eligibility: {result.get('eligible')}")

        patient_data["parsed_result"] = result

        return patient_data

    except (json.JSONDecodeError, ValueError) as e:
        logger.error(f"❌ Failed to parse LLM response as JSON: {e}")
        logger.error(f"Response text: {response_text}")
        raise ValueError(f"Failed to parse LLM response: {str(e)}")


def normalize_output(patient_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Final Normalization Node:
    - Executes normalize_recommendations on the parsed recommendations string
    - Executes normalize_patient_summary on the parsed patient summary
    - Returns final {eligible: bool, recommendations: str, patient_summary: str} dictionary
    - LLM eligibility determination is final (no fallback)

    Args:
        patient_data: Patient data with parsed result

    Returns:
        Patient data with normalized recommendations
    """
    logger.info("🔍 [PHASE 2] Final Normalization Node")

    parsed_result = patient_data["parsed_result"]
    recommendations = parsed_result.get("recommendations", "")
    patient_summary_raw = (
        parsed_result.get("patient_summary")
        or parsed_result.get("patient summary")
        or ""
    )

    normalized_recs = normalize_recommendations(recommendations)
    normalized_summary = normalize_patient_summary(patient_summary_raw)

    assessment_result = {
        "eligible": parsed_result.get("eligible"),
        "recommendations": normalized_recs,
        "patient_summary": normalized_summary,
    }

    patient_data["assessment_result"] = assessment_result
    logger.info(
        "✓ Output normalized: recommendations=%d chars, patient_summary=%d chars",
        len(normalized_recs),
        len(normalized_summary),
    )

    return patient_data


# ============================================================================
# CHAIN ASSEMBLY
# ============================================================================


def build_prompt_from_raw_text(raw_text: str) -> str:
    """
    Construct the LLM prompt used when the user provides raw free-form text.

    The prompt embeds the SASLT guidelines and forces the LLM to emit the
    canonical JSON structure so downstream parsing succeeds.
    """
    cleaned_text = raw_text.strip()

    prompt = f"""You are an expert hepatologist. Read the free-form clinical note
below, infer the relevant HBV parameters, and determine treatment eligibility
STRICTLY according to SASLT 2021 guidelines.

RAW CLINICAL NOTE (verbatim):
\"\"\"{cleaned_text}\"\"\"

SASLT 2021 KEY EXCERPTS:
{SASLT_GUIDELINES}

REASONING & OUTPUT RULES:
1. First, internally extract the patient's HBV DNA, ALT, fibrosis, HBeAg, age,
   pregnancy status, immunosuppression, coinfections, family history, and other
   factors mentioned.
2. Apply the EXACT eligibility decision tree from the guidelines: absolute
   indications first (HIV coinfection, cirrhosis, immunosuppression, pregnancy
   with high viral load), then standard criteria (HBV DNA/ALT/fibrosis combos).
3. Return ONLY valid JSON (no markdown) using this schema:
{{
  "eligible": true or false,
  "recommendations": "Start with the clinical recommendation/decision, then the eligibility analysis, and end with a short discussion/rationale. Use \\n for new lines. Do NOT include the patient summary here.",
  "patient_summary": "3-5 bullet patient summary (age, sex, HBV DNA, ALT, fibrosis, immunosuppression, coinfections). Use \\n for new lines."
}}
4. Recommendations order (must follow this sequence and DO NOT include editorial hints like '(start with this section)', '(keep second)', or '(place last)' in the output):
   - Clinical Recommendation section FIRST with *Decision:* ELIGIBLE/NOT ELIGIBLE, immediate plan, and monitoring/treatment steps.
   - Discussion/Rationale section SECOND with concise explanation and any caveats.
   - Eligibility Analysis section LAST referencing specific SASLT criteria with citations.
5. The patient_summary field must only contain the patient summary text (no clinical recommendation content).
6. The JSON MUST be the only output. Do not include explanations outside the JSON.

Return the JSON object now."""

    return prompt


def create_hbv_assessment_chain():
    """
    Create the complete HBV Assessment LangChain Chain

    LLM is solely responsible for eligibility determination based on SASLT 2021 guidelines

    Returns:
        Runnable chain that processes patient data end-to-end
    """
    logger.info("🔗 Building HBV Assessment Chain...")

    # Phase 1: Input Validation & Preparation
    # Phase 2: LLM-Based Eligibility Determination & Assessment
    chain = (
        RunnablePassthrough()
        | RunnableLambda(validate_and_clean_input)
        | RunnableLambda(assemble_llm_prompt)
        | RunnableLambda(invoke_llm_for_assessment)
        | RunnableLambda(parse_structured_output)
        | RunnableLambda(validate_eligibility_consistency)
        | RunnableLambda(normalize_output)
    )

    logger.info("✓ Chain built successfully")

    return chain


def create_hbv_assessment_chain_from_prompt():
    """
    Create an HBV Assessment Chain that starts from the LLM invocation step.

    This variant assumes that:
    - The caller already prepared the full prompt text in ``llm_prompt``
    - No deterministic validation/normalization is required beforehand

    It reuses the same Phase 2 post-processing steps so the final output
    structure matches the standard assessment chain.
    """
    logger.info("🔗 Building HBV Assessment Chain (from prompt)...")

    chain = (
        RunnablePassthrough()
        | RunnableLambda(invoke_llm_for_assessment)
        | RunnableLambda(parse_structured_output)
        | RunnableLambda(validate_eligibility_consistency)
        | RunnableLambda(normalize_output)
    )

    logger.info("✓ Prompt-based chain built successfully")

    return chain


def run_assessment_chain(patient_data: Dict[str, Any]) -> Dict[str, Any]:
    """
    Execute the HBV Assessment Chain

    Args:
        patient_data: Patient data dictionary

    Returns:
        Assessment result with eligible and recommendations
    """
    logger.info("=" * 80)
    logger.info("🚀 STARTING HBV ASSESSMENT CHAIN")
    logger.info("=" * 80)

    try:
        chain = create_hbv_assessment_chain()
        result = chain.invoke(patient_data)

        assessment = result["assessment_result"]

        logger.info("=" * 80)
        logger.info("✅ CHAIN EXECUTION COMPLETE")
        logger.info("=" * 80)
        logger.info(f"Eligible: {assessment['eligible']}")
        logger.info(
            f"Recommendations length: {len(assessment['recommendations'])} characters"
        )
        logger.info("=" * 80)

        return assessment

    except Exception as e:
        logger.error(f"❌ Chain execution failed: {str(e)}")
        logger.error("=" * 80)
        raise


def run_assessment_chain_from_prompt(prompt_text: str) -> Dict[str, Any]:
    """
    Execute the HBV Assessment Chain starting from a raw LLM prompt.

    This helper is intended for text-based assessment where the user provides
    full free-text input and we feed it directly as the LLM prompt, skipping
    all deterministic preprocessing nodes.

    Args:
        prompt_text: Full prompt text to send to the LLM.

    Returns:
        Assessment result with eligible and recommendations.
    """
    logger.info("=" * 80)
    logger.info("🚀 STARTING HBV ASSESSMENT CHAIN (FROM PROMPT)")
    logger.info("=" * 80)

    try:
        chain = create_hbv_assessment_chain_from_prompt()
        # The downstream nodes expect a dict with ``llm_prompt`` key
        initial_payload: Dict[str, Any] = {"llm_prompt": prompt_text}
        result = chain.invoke(initial_payload)

        assessment = result["assessment_result"]

        logger.info("=" * 80)
        logger.info("✅ PROMPT-BASED CHAIN EXECUTION COMPLETE")
        logger.info("=" * 80)
        logger.info(f"Eligible: {assessment['eligible']}")
        logger.info(
            f"Recommendations length: {len(assessment['recommendations'])} characters"
        )
        logger.info("=" * 80)

        return assessment

    except Exception as e:
        logger.error(f"❌ Prompt-based chain execution failed: {str(e)}")
        logger.error("=" * 80)
        raise