Spaces:
Sleeping
Sleeping
| """ | |
| LangChain Chain Implementation for HBV Assessment | |
| Implements hybrid approach: Deterministic Logic (Phase 1) + LLM Generation (Phase 2) | |
| """ | |
| import logging | |
| import json | |
| import re | |
| from typing import Dict, Any | |
| from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
| from langchain_core.output_parsers import JsonOutputParser | |
| from langchain_core.prompts import PromptTemplate | |
| from .config import get_llm | |
| logger = logging.getLogger(__name__) | |
| def clean_json_string(json_str: str) -> str: | |
| """ | |
| Clean a JSON string by properly escaping control characters within string values. | |
| This handles cases where LLMs generate JSON with literal newlines, tabs, etc. | |
| Args: | |
| json_str: Raw JSON string that may contain unescaped control characters | |
| Returns: | |
| Cleaned JSON string with properly escaped control characters | |
| """ | |
| # First, try to identify string values in the JSON and escape control characters within them | |
| # We need to be careful not to break the JSON structure itself | |
| # Replace common control characters that appear in string values | |
| # but preserve the JSON structure (newlines between key-value pairs are OK) | |
| # Strategy: Parse character by character, track if we're inside a string value | |
| result = [] | |
| in_string = False | |
| escape_next = False | |
| for i, char in enumerate(json_str): | |
| if escape_next: | |
| result.append(char) | |
| escape_next = False | |
| continue | |
| if char == "\\": | |
| result.append(char) | |
| escape_next = True | |
| continue | |
| if char == '"': | |
| in_string = not in_string | |
| result.append(char) | |
| continue | |
| # If we're inside a string value, escape control characters | |
| if in_string: | |
| if char == "\n": | |
| result.append("\\n") | |
| elif char == "\r": | |
| result.append("\\r") | |
| elif char == "\t": | |
| result.append("\\t") | |
| elif char == "\b": | |
| result.append("\\b") | |
| elif char == "\f": | |
| result.append("\\f") | |
| elif ord(char) < 32: # Other control characters | |
| result.append(f"\\u{ord(char):04x}") | |
| else: | |
| result.append(char) | |
| else: | |
| result.append(char) | |
| return "".join(result) | |
| # SASLT 2021 Guidelines - Extracted directly from official PDF | |
| SASLT_GUIDELINES = """ | |
| ===== SASLT 2021 GUIDELINES: TREATMENT & MANAGEMENT ===== | |
| [Extracted from: SASLT practice guidelines for the management of Hepatitis B virus β An update, | |
| Saudi J Gastroenterol 2021;27:115-26] | |
| ### 1. TREATMENT INDICATIONS [SASLT 2021, Page 6] | |
| **RECOMMENDATIONS FOR INITIATION OF TREATMENT:** | |
| - All patients with chronic hepatitis B (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and/or at least moderate liver necroinflammation or fibrosis (Grade A) [Page 6] | |
| - Patients with cirrhosis (compensated or decompensated), with any detectable HBV DNA level and regardless of ALT levels (Grade A) [Page 6] | |
| - Patients with HBV DNA > 20,000 IU/mL and ALT > 2xULN, regardless of the degree of fibrosis (Grade B) [Page 6] | |
| - Patients with HBeAg-positive chronic HBV infection (persistently normal ALT and high HBV DNA levels) may be treated if they are > 30 years, regardless of the severity of liver histological lesions (Grade D) [Page 6] | |
| - Patients with chronic HBV infection (HBV DNA > 2,000 IU/mL, ALT > ULN), regardless of HBeAg status, and a family history of HCC or cirrhosis and extrahepatic manifestations (Grade D) [Page 6] | |
| **DETAILED TREATMENT CRITERIA [Page 6]:** | |
| Nonβcirrhotic patients should be considered for treatment if they have HBV DNA levels >2,000 IU/mL, serum ALT >~40 IU/L and severity of liver disease assessed by liver biopsy showing at least moderate necroinflammation and/or at least moderate fibrosis. | |
| Patients with HBV DNA greater than 20,000 IU/mL and ALT greater than 2x ULN can begin treatment without a liver biopsy. | |
| Patients with HBV DNA >2,000 IU/mL and at least moderate fibrosis may initiate treatment even if ALT levels are normal. | |
| Treatment indications should also take into account patient's age, health status, risk of HBV transmission, family history of HCC or cirrhosis and extrahepatic manifestations. | |
| **CRITICAL INTERPRETATION:** | |
| - HBV DNA > 2,000 IU/mL is REQUIRED for all standard treatment criteria | |
| - Exception: Cirrhosis (F4) requires only "any detectable HBV DNA level" | |
| - Exception: Special populations (HIV coinfection, immunosuppression, pregnancy) have different thresholds | |
| ### 2. MONITORING OF UNTREATED PATIENTS [SASLT 2021, Page 6-7] | |
| - Patients with HBeAg-positive chronic HBV infection who are younger than 30 years should be followed at least every 3-6 months (Grade B) [Page 7] | |
| - Patients with HBeAg-negative chronic HBV infection and serum HBV DNA <2,000 IU/ml should be followed every 6-12 months (Grade B) [Page 7] | |
| - Patients with HBeAg-negative chronic HBV infection and serum HBV DNA β₯2,000 IU/ml should be followed every 3 months for the first year and thereafter every 6 months (Grade D) [Page 7] | |
| ### 3. TREATMENT OF CHRONIC HEPATITIS B [SASLT 2021, Page 8] | |
| **RECOMMENDATIONS:** | |
| - The treatment of choice is the long-term administration of a potent NA with a high barrier to resistance, regardless of the severity of liver disease (Grade A) [Page 8] | |
| - Preferred regimens are ETV, TDF and TAF as monotherapies (Grade A) [Page 8] | |
| - LAM, ADV and TBV are not recommended in the treatment of CHB (Grade A) [Page 8] | |
| **ABOUT TAF vs TDF [Page 8]:** | |
| TAF has demonstrated superior renal and bone density safety profiles compared with TDF in head-to-head trials. International guidelines recommend switching individuals at high risk for bone or renal disease from TDF to either TAF or ETV. TAF maintains a better safety profile unless the patient's creatinine clearance (CrCl) is less than 15 mL/minute. | |
| ### 4. HBV-HCV COINFECTION [SASLT 2021, Page 8-9] | |
| **RECOMMENDATIONS:** | |
| - Treatment of HCV through DAAs may lead to reactivation of HBV. Patients who meet the criteria for HBV treatment should be treated concurrently or before initiation of DAA (Grade A) [Page 9] | |
| - HBV DNA and ALT should be monitored every four to eight weeks while on DAA and three months after completion of therapy (Grade D) [Page 9] | |
| - ALT level should be monitored every four weeks while on DAA for patients who are HBsAg-negative but HBcAb-positive. If ALT starts to rise, HBsAg and HBV DNA must be obtained to determine the need to start HBV treatment (Grade D) [Page 9] | |
| ### 5. HBV-HIV COINFECTION [SASLT 2021, Page 9] β οΈ ABSOLUTE TREATMENT INDICATION | |
| **CRITICAL: This is an ABSOLUTE indication for treatment regardless of ALT, HBV DNA level, fibrosis stage, or necroinflammatory activity.** | |
| "Patients with HBVβHIV coinfection are at increased risk of rapid fibrosis progression, development of HCC, and liverβrelated mortality." [Page 9] | |
| "The prevalence of HBV in patients with HIV coinfection in Saudi Arabia is 3%, which is much higher than the general population." [Page 9] | |
| "All patients with HBVβHIV coinfection should receive antiretroviral therapy (ART)." [Page 9] | |
| "Patients must be followed closely after initiation of ART, given the risk of immune reconstitution syndrome, which may lead to HBV flare." [Page 9] | |
| "The regimen must include tenofovir with either formulation TDF or TAF. TAF has a better safety profile and is preferred over TDF unless the patient has CrCl < 15 mL/minute. Emtricitabine and LAM should be included in the ART regimen." [Page 9] | |
| **RECOMMENDATIONS:** | |
| - All HIV-positive patients with HBV co-infection should start ART irrespective of CD4 cell count (Grade A) [Page 9] | |
| - HBV-HIV co-infected patients should be treated with TDF- or TAF-based ART regimen (Grade A) [Page 9] | |
| ### 6. IMMUNOCOMPROMISED PATIENTS [SASLT 2021, Page 9] β οΈ ABSOLUTE TREATMENT INDICATION | |
| "Hepatitis B flare during chemotherapy treatment or treatment with other immunosuppressive agents is potentially life threatening. The risk is very high, particularly with the use of CD20 depleting agents." [Page 9] | |
| "Therefore, all patients undergoing immunosuppressive treatment or chemotherapy, even shortβterm courses, should be screened for HBsAg, antiβHBc, and antiβHBs (and HBV DNA, if HBsAg is already positive)." [Page 9] | |
| **RECOMMENDATIONS:** | |
| - Prophylaxis for all patients with positive HBsAg should be done before initiating chemotherapy or other immunosuppressive agents (Grade A) [Page 9] | |
| - HBsAg-negative/anti-HBc-positive patients should undergo HBV prophylaxis if they are candidates for anti CD20 or are undergoing stem cell transplantation. HBV prophylaxis should continue for at least six months after completion of immunosuppressive treatment and for twelve months if taking anti CD20 (Grade D) [Page 9] | |
| - We recommend starting HBV prophylaxis for HBsAg or antiβHBc positive patients undergoing treatment with tumor necrosis factor (TNF) inhibitors [Page 9] | |
| - We recommend HBV prophylaxis for all patients who are HBsAg or anti-HBc positive before initiation of immunotherapy such as antiβprogrammed cell death (PDβ1) and antiβprogrammed cell deathβligand 1 (PDβL1) therapy [Page 9] | |
| ### 7. HBV AND PREGNANCY [SASLT 2021, Page 9-10] | |
| "The most effective way to prevent motherβtoβchild transmission is to detect HBV early in pregnancy. Therefore, all pregnant women must be screened for HBV during the first trimester." [Page 9] | |
| "Pregnant women should be treated if they meet the standard indication of therapy. We recommend HBV treatment if HBV DNA is greater than 100,000 IU/mL in the late second trimester (between 24β28 weeks of gestation)." [Page 9] | |
| "TDF is the drug of choice during pregnancy. However, more recently, a multiβcenter experience from China reported no motherβtoβchild transmission or developmental anomalies in 71 infants born to mothers who received TAF during the last trimester of pregnancy." [Page 9] | |
| **RECOMMENDATIONS:** | |
| - All pregnant women must be screened for HBV during the first trimester (Grade A) [Page 10] | |
| - All pregnant women with HBV DNA greater than 100,000 IU/mL in the late second trimester (between 24-28 weeks of gestation) should start antiviral prophylaxis with TDF, or TAF as an alternative (Grade D) [Page 10] | |
| - Switch to TDF or TAF is recommended if the patient is receiving ETV, ADV, or interferon during pregnancy (Grade D) [Page 10] | |
| - Breastfeeding is not contraindicated in HBsAg-positive untreated women or on TDF-based treatment or prophylaxis (Grade B) [Page 10] | |
| ### KEY DEFINITIONS [From Table 2, Page 3 and text] | |
| **ALT (Alanine Aminotransferase):** | |
| - Upper Limit of Normal (ULN) = ~40 IU/L [Page 6] | |
| - 2ΓULN = ~80 IU/L | |
| **Necroinflammatory Activity Grades:** | |
| - A1 = mild | |
| - A2 = moderate | |
| - A3 = severe | |
| **Liver Fibrosis Stages:** | |
| - F0 = no fibrosis | |
| - F1 = mild fibrosis, pericellular collagen deposits | |
| - F2 = moderate fibrosis, beginning bridging fibrosis | |
| - F3 = severe fibrosis, defined as presence of numerous bridges and septa | |
| - F4 = cirrhosis | |
| **HBV DNA Thresholds [From Table 2, Page 3]:** | |
| - Phase 3 (Inactive carrier): <2,000 IU/mL | |
| - Phase 4 (HBeAg-negative chronic hepatitis): >2,000 IU/mL (fluctuating levels) | |
| - Phase 1 (Immune tolerant): >10^7 IU/mL (very high) | |
| """ | |
| def extract_eligibility_from_text(recommendations: str) -> bool: | |
| """ | |
| Extract eligibility decision from recommendations text. | |
| Looks for patterns like "Decision: ELIGIBLE" or "Decision: NOT ELIGIBLE" | |
| Args: | |
| recommendations: Recommendations text string | |
| Returns: | |
| True if text indicates ELIGIBLE, False if NOT ELIGIBLE, None if ambiguous | |
| """ | |
| if not recommendations: | |
| return None | |
| # Normalize text for searching (case-insensitive, handle escaped newlines) | |
| normalized = recommendations.replace("\\n", "\n").upper() | |
| # Look for explicit decision statements | |
| # Pattern 1: "*Decision:* ELIGIBLE" or "*Decision:* NOT ELIGIBLE" | |
| decision_match = re.search(r"\*DECISION:\*\s*(ELIGIBLE|NOT\s+ELIGIBLE)", normalized) | |
| if decision_match: | |
| decision = decision_match.group(1) | |
| if "NOT" in decision: | |
| return False | |
| return True | |
| # Pattern 2: "Decision: ELIGIBLE" or "Decision: NOT ELIGIBLE" (without asterisks) | |
| decision_match = re.search(r"DECISION:\s*(ELIGIBLE|NOT\s+ELIGIBLE)", normalized) | |
| if decision_match: | |
| decision = decision_match.group(1) | |
| if "NOT" in decision: | |
| return False | |
| return True | |
| # Pattern 3: Look for strong indicators in rationale | |
| # If text says "patient is eligible" or "treatment is recommended" with strong language | |
| eligible_indicators = [ | |
| r"PATIENT\s+IS\s+ELIGIBLE", | |
| r"TREATMENT\s+IS\s+RECOMMENDED", | |
| r"ABSOLUTE\s+INDICATION", | |
| r"AUTOMATICALLY\s+ELIGIBLE", | |
| r"REQUIRES\s+TREATMENT", | |
| r"SHOULD\s+RECEIVE\s+TREATMENT", | |
| r"PROPHYLAXIS\s+IS\s+REQUIRED", | |
| ] | |
| not_eligible_indicators = [ | |
| r"PATIENT\s+IS\s+NOT\s+ELIGIBLE", | |
| r"NOT\s+ELIGIBLE", | |
| r"DOES\s+NOT\s+MEET\s+CRITERIA", | |
| r"REQUIRES\s+MONITORING\s+ONLY", | |
| ] | |
| # Check for eligible indicators | |
| for pattern in eligible_indicators: | |
| if re.search(pattern, normalized): | |
| return True | |
| # Check for not eligible indicators | |
| for pattern in not_eligible_indicators: | |
| if re.search(pattern, normalized): | |
| return False | |
| return None | |
| def validate_eligibility_consistency(patient_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Validation Node: | |
| - Checks consistency between JSON 'eligible' field and recommendations text | |
| - If mismatch detected, corrects the JSON field to match the text (text is authoritative) | |
| - Logs any corrections made | |
| Args: | |
| patient_data: Patient data with parsed result | |
| Returns: | |
| Patient data with corrected eligibility if needed | |
| """ | |
| logger.info("π [PHASE 2] Eligibility Consistency Validation Node") | |
| parsed_result = patient_data["parsed_result"] | |
| json_eligible = parsed_result.get("eligible") | |
| recommendations = parsed_result.get("recommendations", "") | |
| # Extract eligibility from text | |
| text_eligible = extract_eligibility_from_text(recommendations) | |
| if text_eligible is None: | |
| logger.warning( | |
| "β οΈ Could not extract eligibility from recommendations text - using JSON value" | |
| ) | |
| return patient_data | |
| # Check for mismatch | |
| if json_eligible != text_eligible: | |
| logger.warning(f"β οΈ INCONSISTENCY DETECTED:") | |
| logger.warning(f" JSON 'eligible': {json_eligible}") | |
| logger.warning(f" Text decision: {text_eligible}") | |
| logger.warning( | |
| f" Correcting JSON to match text decision (text is authoritative)" | |
| ) | |
| # Correct the JSON field to match the text | |
| parsed_result["eligible"] = text_eligible | |
| patient_data["parsed_result"] = parsed_result | |
| logger.info(f"β Corrected eligibility: {text_eligible}") | |
| else: | |
| logger.info(f"β Eligibility consistent: {json_eligible}") | |
| return patient_data | |
| def normalize_recommendations(text: str) -> str: | |
| """ | |
| Normalize recommendations text - preserve intentional formatting. | |
| - Replace escaped newlines with actual newlines | |
| - Remove excessive blank lines (more than 2 consecutive) | |
| - Ensure consistent spacing around section headers | |
| - Trim leading/trailing whitespace | |
| Args: | |
| text: Raw recommendations string with escaped newlines | |
| Returns: | |
| Normalized recommendations string with proper formatting | |
| """ | |
| if not text: | |
| return "" | |
| # Replace escaped newlines with actual newlines | |
| normalized = text.replace("\\n", "\n") | |
| # Remove excessive blank lines (more than 2 consecutive) | |
| normalized = re.sub(r"\n{3,}", "\n\n", normalized) | |
| # Ensure consistent spacing around section headers (** markers) | |
| normalized = re.sub(r"\n\*\*", "\n\n**", normalized) | |
| # Trim leading/trailing whitespace | |
| normalized = normalized.strip() | |
| # Soft cap length to avoid overly long outputs | |
| max_len = 2500 # Increased from 1800 to accommodate comprehensive format | |
| if len(normalized) > max_len: | |
| normalized = normalized[:max_len].rstrip() | |
| return normalized | |
| def normalize_patient_summary(text: str) -> str: | |
| """ | |
| Normalize patient summary text. | |
| - Replace escaped newlines with actual newlines | |
| - Collapse excessive blank lines | |
| - Trim surrounding whitespace | |
| """ | |
| if not text: | |
| return "" | |
| normalized = text.replace("\\n", "\n") | |
| normalized = re.sub(r"\n{3,}", "\n\n", normalized) | |
| normalized = normalized.strip() | |
| max_len = 800 | |
| if len(normalized) > max_len: | |
| normalized = normalized[:max_len].rstrip() | |
| return normalized | |
| # ============================================================================ | |
| # PHASE 1: DETERMINISTIC ELIGIBILITY & DATA PREPARATION | |
| # ============================================================================ | |
| def validate_and_clean_input(patient_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Validation & Cleaning Node: | |
| - Enforces input schema | |
| - Converts string DNA/ALT to numeric | |
| - Handles missing data | |
| Args: | |
| patient_data: Raw patient data dictionary | |
| Returns: | |
| Cleaned and validated patient data | |
| """ | |
| logger.info("π [PHASE 1] Validation & Cleaning Node") | |
| # Convert HBV DNA to numeric | |
| hbv_dna = patient_data.get("hbv_dna_level", 0) | |
| hbv_dna_numeric = hbv_dna | |
| if isinstance(hbv_dna_numeric, str): | |
| try: | |
| cleaned = re.sub(r"[^\d\.]", "", hbv_dna_numeric) | |
| hbv_dna_numeric = float(cleaned) if cleaned else 0.0 | |
| except Exception: | |
| hbv_dna_numeric = 0.0 | |
| try: | |
| hbv_dna_numeric = float(hbv_dna_numeric) | |
| except (TypeError, ValueError): | |
| hbv_dna_numeric = 0.0 | |
| patient_data["hbv_dna_level_numeric"] = hbv_dna_numeric | |
| # Compute HBV DNA comparison | |
| if hbv_dna_numeric > 2000: | |
| hbv_dna_2000_comparison = ">" | |
| elif hbv_dna_numeric < 2000: | |
| hbv_dna_2000_comparison = "<" | |
| else: | |
| hbv_dna_2000_comparison = "=" | |
| patient_data["hbv_dna_2000_comparison"] = hbv_dna_2000_comparison | |
| logger.info( | |
| f"β HBV DNA normalized: {hbv_dna_numeric} {hbv_dna_2000_comparison} 2000 IU/mL" | |
| ) | |
| return patient_data | |
| def assemble_llm_prompt(patient_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Prompt Assembly Node: | |
| - Constructs the final, complete prompt for LLM | |
| - LLM is solely responsible for eligibility determination | |
| - Uses comprehensive yet concise format with visual indicators | |
| Args: | |
| patient_data: Cleaned patient data | |
| Returns: | |
| Patient data with assembled prompt | |
| """ | |
| logger.info("π [PHASE 1] Prompt Assembly Node") | |
| hbv_dna_2000_comparison = patient_data.get("hbv_dna_2000_comparison", "N/A") | |
| # Extract patient parameters | |
| sex = patient_data.get("sex", "Male") | |
| age = patient_data.get("age", "N/A") | |
| pregnancy_status = patient_data.get("pregnancy_status", "N/A") | |
| hbsag_status = patient_data.get("hbsag_status", "N/A") | |
| duration_hbsag = patient_data.get("duration_hbsag_months", "N/A") | |
| hbeag_status = patient_data.get("hbeag_status", "N/A") | |
| alt_level = patient_data.get("alt_level", 0) | |
| fibrosis_stage = patient_data.get("fibrosis_stage", "N/A") | |
| necroinflammatory = patient_data.get("necroinflammatory_activity", "N/A") | |
| extrahepatic = patient_data.get("extrahepatic_manifestations", False) | |
| immunosuppression = patient_data.get("immunosuppression_status", "None") | |
| coinfections = patient_data.get("coinfections", []) | |
| family_history = patient_data.get("family_history_cirrhosis_hcc", False) | |
| comorbidities = patient_data.get("other_comorbidities", []) | |
| hbv_dna = patient_data.get("hbv_dna_level", 0) | |
| # Check for special absolute indications | |
| has_hiv = "HIV" in [c.upper() for c in coinfections] if coinfections else False | |
| has_hcv = "HCV" in [c.upper() for c in coinfections] if coinfections else False | |
| has_hdv = "HDV" in [c.upper() for c in coinfections] if coinfections else False | |
| # Define strings with backslashes for f-string compatibility | |
| hiv_critical_line = "- **CRITICAL: HIV coinfection present - absolute treatment indication**\\n" | |
| hiv_absolute_indication = "- β **HBV-HIV coinfection: ABSOLUTE INDICATION** [SASLT 2021, Page 9, Grade A]\\n" | |
| no_hiv_line = "- β No HIV coinfection\\n" | |
| hiv_rationale_line = "- If HIV coinfection: State this is an absolute Grade A indication that overrides all other criteria\\n" | |
| hiv_treatment_section = "**HBV-HIV Coinfection Treatment (Grade A):**\\n- All HIV-positive patients with HBV coinfection should start ART immediately, irrespective of CD4 count [SASLT 2021, Page 9]\\n- Regimen MUST include TDF or TAF (preferably TAF for better renal/bone safety) [SASLT 2021, Page 9]\\n- Include Emtricitabine or Lamivudine as part of ART regimen\\n- Monitor for immune reconstitution syndrome (may cause HBV flare in first 3-6 months)\\n- HBV DNA and ALT at 3, 6, 12 months, then every 6-12 months\\n- HIV viral load every 3-6 months\\n- Annual HCC surveillance (ultrasound Β± AFP)\\n\\n" | |
| hiv_final_recommendation = "- **Emphasize that HIV coinfection makes treatment mandatory regardless of other parameters**\\n" | |
| # Build analysis prompt with mandatory eligibility decision tree | |
| analysis_prompt = f"""You are an expert hepatologist providing HBV treatment eligibility assessments based on SASLT 2021 guidelines. | |
| PATIENT DATA: | |
| - Sex: {sex} | |
| - Age: {age} years | |
| - Pregnancy Status: {pregnancy_status} | |
| - HBsAg Status: {hbsag_status} | |
| - HBsAg Duration: {duration_hbsag} months | |
| - HBV DNA Level: {hbv_dna} IU/mL ({hbv_dna_2000_comparison} 2000 IU/mL) | |
| - HBeAg Status: {hbeag_status} | |
| - ALT Level: {alt_level} IU/L | |
| - Fibrosis Stage: {fibrosis_stage} | |
| - Necroinflammatory Activity: {necroinflammatory} | |
| - Extrahepatic Manifestations: {extrahepatic} | |
| - Immunosuppression: {immunosuppression} | |
| - Coinfections: {', '.join(coinfections) if coinfections else 'None'} | |
| - Family History (Cirrhosis/HCC): {family_history} | |
| - Other Comorbidities: {', '.join(comorbidities) if comorbidities else 'None'} | |
| SASLT 2021 GUIDELINES REFERENCE: | |
| {SASLT_GUIDELINES} | |
| β οΈ MANDATORY ELIGIBILITY DECISION TREE - FOLLOW THIS EXACT SEQUENCE: | |
| **STEP 1: Check ABSOLUTE INDICATIONS (these override ALL standard criteria):** | |
| 1a. **HBV-HIV Coinfection** [Page 123, Grade A]: | |
| - Does patient have HIV coinfection? Check: {', '.join(coinfections) if coinfections else 'None'} | |
| - If YES β **AUTOMATICALLY ELIGIBLE** (no other criteria needed) | |
| - Rationale: "Patients with HBV-HIV coinfection are at increased risk of rapid fibrosis progression, development of HCC, and liver-related mortality" | |
| - Treatment: TDF- or TAF-based ART regimen irrespective of CD4 count | |
| 1b. **Cirrhosis (F4)** [Page 120, Grade A]: | |
| - Does patient have cirrhosis? Check: {fibrosis_stage} | |
| - Does patient have ANY detectable HBV DNA? Check: {hbv_dna} IU/mL | |
| - If BOTH YES β **AUTOMATICALLY ELIGIBLE** | |
| 1c. **Immunosuppression/Chemotherapy** [Page 123, Grade A]: | |
| - Is patient undergoing immunosuppression? Check: {immunosuppression} | |
| - Is HBsAg positive? Check: {hbsag_status} | |
| - If BOTH YES β **AUTOMATICALLY ELIGIBLE** (prophylaxis required) | |
| 1d. **Pregnancy with High Viral Load** [Page 124, Grade D]: | |
| - Is patient pregnant? Check: {pregnancy_status} | |
| - Is HBV DNA > 100,000 IU/mL? Check: {hbv_dna} vs 100,000 | |
| - If BOTH YES β **AUTOMATICALLY ELIGIBLE** | |
| β If ANY absolute indication is met, STOP HERE and return ELIGIBLE = true | |
| **STEP 2: If NO absolute indications, check STANDARD CRITERIA:** | |
| 2a. **High Viral Load + High ALT** [Page 120, Grade B]: | |
| - HBV DNA > 20,000 IU/mL? β {hbv_dna} vs 20,000 = {"YES β " if hbv_dna > 20000 else "NO β"} | |
| - ALT > 2ΓULN (80 IU/L)? β {alt_level} vs 80 = {"YES β " if alt_level > 80 else "NO β"} | |
| - If BOTH YES β ELIGIBLE (fibrosis stage irrelevant) | |
| 2b. **Standard Triple Criteria** [Page 120, Grade A]: | |
| - HBV DNA > 2,000 IU/mL? β {hbv_dna} vs 2,000 = {"YES β " if hbv_dna > 2000 else "NO β"} | |
| - ALT > ULN (~40 IU/L)? β {alt_level} vs 40 = {"YES β " if alt_level > 40 else "NO β"} | |
| - F2+ OR A2+? β {fibrosis_stage} and {necroinflammatory} = [Check if F2+ OR A2+] | |
| - If ALL THREE YES β ELIGIBLE | |
| 2c. **Moderate Fibrosis Exception** [Page 120]: | |
| - HBV DNA > 2,000 IU/mL? β {hbv_dna} vs 2,000 = {"YES β " if hbv_dna > 2000 else "NO β"} | |
| - F2+ fibrosis? β {fibrosis_stage} = [Check if F2+] | |
| - If BOTH YES β ELIGIBLE (even if ALT normal) | |
| 2d. **HBeAg Positive >30 years** [Page 120, Grade D]: | |
| - HBeAg positive? β {hbeag_status} | |
| - Age > 30? β {age} vs 30 | |
| - HBV DNA > 2,000 IU/mL? β {hbv_dna} vs 2,000 | |
| - If ALL THREE YES β ELIGIBLE | |
| 2e. **Family History** [Page 120, Grade D]: | |
| - HBV DNA > 2,000 IU/mL? β {hbv_dna} vs 2,000 = {"YES β " if hbv_dna > 2000 else "NO β"} | |
| - ALT > ULN (~40 IU/L)? β {alt_level} vs 40 = {"YES β " if alt_level > 40 else "NO β"} | |
| - Family history HCC/cirrhosis? β {family_history} | |
| - If ALL THREE YES β ELIGIBLE | |
| **STEP 3: If NONE of the above criteria met:** | |
| β **NOT ELIGIBLE** | |
| β Patient requires monitoring per Page 121 guidelines | |
| **CRITICAL RULES YOU MUST FOLLOW:** | |
| 1. β οΈ **HIV COINFECTION = AUTOMATIC ELIGIBILITY** - This overrides ALL other parameters including normal ALT, low HBV DNA, mild fibrosis | |
| 2. β οΈ **HBV DNA > 2,000 IU/mL is MANDATORY** for all standard criteria EXCEPT: | |
| - Cirrhosis (needs only detectable HBV DNA) | |
| - HIV coinfection (no HBV DNA threshold) | |
| - Immunosuppression (no HBV DNA threshold) | |
| 3. **If HBV DNA β€ 2,000 IU/mL:** | |
| - Check for cirrhosis, HIV, immunosuppression | |
| - If none present β AUTOMATICALLY NOT ELIGIBLE | |
| - Elevated ALT + moderate fibrosis is NOT sufficient without HBV DNA >2,000 | |
| 4. **Direct quotes from guidelines must be cited with [Page X]** | |
| 5. **Never hallucinate criteria** - use ONLY what's explicitly stated in guidelines above | |
| 6. β οΈ **CRITICAL: CONSISTENCY REQUIREMENT** - The JSON "eligible" field MUST match the "Decision:" statement in your recommendations text: | |
| - If you write "*Decision:* ELIGIBLE" in recommendations β JSON "eligible" MUST be true | |
| - If you write "*Decision:* NOT ELIGIBLE" in recommendations β JSON "eligible" MUST be false | |
| - These two fields MUST be perfectly consistent - any mismatch will be automatically corrected | |
| RESPONSE FORMAT (JSON ONLY - NO MARKDOWN): | |
| {{ | |
| "eligible": true or false, | |
| "recommendations": "Start with clinical recommendations/decision, then discussion/rationale, and end with eligibility analysis. Use \\n for new lines. Do NOT include the patient summary here. Do NOT include editorial phrases like '(start with this section)', '(keep second)', or '(place last)'.", | |
| "patient_summary": "3-5 bullet patient summary (age, sex, HBV DNA, ALT, fibrosis stage, immunosuppression, coinfections). Use \\n for new lines." | |
| }} | |
| STRUCTURE OF "recommendations" FIELD: | |
| Use \\n for line breaks (NOT literal newlines). Format as follows: | |
| **Clinical Recommendation**\\n | |
| *Decision:* [ELIGIBLE/NOT ELIGIBLE]\\n | |
| *Immediate Plan:*\\n | |
| {hiv_treatment_section if has_hiv else ""} | |
| - If eligible (standard criteria): Preferred agents ETV/TDF/TAF with brief dosing note [SASLT 2021, Page 8, Grade A]\\n | |
| - If not eligible: State monitoring cadence (ALT q3-6mo, HBV DNA q6-12mo, fibrosis yearly) [SASLT 2021, Page 7]\\n | |
| \\n | |
| *Key Factors:*\\n | |
| - 3-5 concise bullets of the main clinical considerations driving the decision\\n | |
| \\n | |
| **Discussion & Rationale**\\n | |
| - Concise narrative explaining why the patient is (not) eligible with SASLT page citations\\n | |
| - Highlight any caveats or follow-up steps\\n | |
| - Explicitly remind that HIV coinfection is an absolute indication if present\\n | |
| \\n | |
| **Eligibility Analysis**\\n | |
| \\n | |
| *Absolute Indications Check (Priority):*\\n | |
| {hiv_absolute_indication if has_hiv else ""} | |
| {no_hiv_line if not has_hiv else ""} | |
| - Cirrhosis (F4): [Check and mark β or β]\\n | |
| - Immunosuppression: [Check and mark β or β]\\n | |
| - Pregnancy with high viral load: [Check and mark β or β]\\n | |
| \\n | |
| *Standard Criteria Assessment (if no absolute indications):*\\n | |
| - HBV DNA >2000 IU/mL: [β or β]\\n | |
| - ALT >ULN (40 IU/L): [β or β]\\n | |
| - Moderate necroinflammation/fibrosis (F2+/A2+): [β or β]\\n | |
| \\n | |
| *Special Considerations:*\\n | |
| - Note any additional factors: family history, age >30, extrahepatic manifestations\\n | |
| - Cite specific SASLT guideline provisions\\n | |
| \\n | |
| Do NOT include patient summary text in "recommendations". Place it only in "patient_summary". | |
| Return ONLY the JSON object, nothing else.""" | |
| patient_data["llm_prompt"] = analysis_prompt | |
| logger.info("β LLM prompt assembled") | |
| if has_hiv: | |
| logger.info("β οΈ HIV coinfection detected - absolute treatment indication") | |
| return patient_data | |
| # ============================================================================ | |
| # PHASE 2: LLM GENERATION AND POST-PROCESSING | |
| # ============================================================================ | |
| def invoke_llm_for_assessment(patient_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| LLM Generation Node (R-Node): | |
| - Invokes the LLM with the assembled prompt | |
| - Returns raw LLM text response | |
| Args: | |
| patient_data: Patient data with assembled prompt | |
| Returns: | |
| Patient data with raw LLM response | |
| """ | |
| logger.info("π€ [PHASE 2] LLM Generation Node") | |
| llm = get_llm() | |
| prompt = patient_data["llm_prompt"] | |
| logger.info("Sending prompt to LLM...") | |
| response = llm.invoke(prompt) | |
| logger.info("LLM response received") | |
| response_text = response.content if hasattr(response, "content") else str(response) | |
| if isinstance(response_text, str): | |
| response_text = response_text.strip() | |
| patient_data["llm_response_raw"] = response_text | |
| logger.info(f"β LLM response (first 200 chars): {response_text[:200]}...") | |
| return patient_data | |
| def parse_structured_output(patient_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Structured Output Parser Node (P-Node): | |
| - Expects a JSON code block and attempts to parse it | |
| - Enforces Integrity: Overrides the eligible key with deterministic is_eligible if LLM deviated | |
| Args: | |
| patient_data: Patient data with raw LLM response | |
| Returns: | |
| Patient data with parsed JSON | |
| """ | |
| logger.info("π [PHASE 2] Structured Output Parser Node") | |
| response_text = patient_data["llm_response_raw"] | |
| try: | |
| # Extract JSON from response (handle markdown code blocks) | |
| json_start = response_text.find("{") | |
| json_end = response_text.rfind("}") + 1 | |
| if json_start == -1 or json_end == 0: | |
| raise ValueError("No JSON object found in response") | |
| json_str = response_text[json_start:json_end].strip() | |
| # Strip surrounding markdown fences if present | |
| json_str = re.sub(r"^```(?:json)?\s*", "", json_str) | |
| json_str = re.sub(r"\s*```$", "", json_str) | |
| # Fix common LLM formatting issue: missing comma before patient summary field | |
| json_str = re.sub( | |
| r'("recommendations"\s*:\s*"(?:[^"\\]|\\.)*")\s*(?="patient[_ ]summary")', | |
| r'\1, ', | |
| json_str, | |
| flags=re.S, | |
| ) | |
| # Remove invisible Unicode separators | |
| invisible_chars = ["\u200b", "\u200c", "\u200d", "\ufeff", "\xa0"] | |
| for ch in invisible_chars: | |
| json_str = json_str.replace(ch, "") | |
| # Clean JSON string | |
| cleaned_json_str = clean_json_string(json_str) | |
| # Parse JSON | |
| result = json.loads(cleaned_json_str) | |
| logger.info("β Successfully parsed JSON response") | |
| logger.info(f"β LLM determined eligibility: {result.get('eligible')}") | |
| patient_data["parsed_result"] = result | |
| return patient_data | |
| except (json.JSONDecodeError, ValueError) as e: | |
| logger.error(f"β Failed to parse LLM response as JSON: {e}") | |
| logger.error(f"Response text: {response_text}") | |
| raise ValueError(f"Failed to parse LLM response: {str(e)}") | |
| def normalize_output(patient_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Final Normalization Node: | |
| - Executes normalize_recommendations on the parsed recommendations string | |
| - Executes normalize_patient_summary on the parsed patient summary | |
| - Returns final {eligible: bool, recommendations: str, patient_summary: str} dictionary | |
| - LLM eligibility determination is final (no fallback) | |
| Args: | |
| patient_data: Patient data with parsed result | |
| Returns: | |
| Patient data with normalized recommendations | |
| """ | |
| logger.info("π [PHASE 2] Final Normalization Node") | |
| parsed_result = patient_data["parsed_result"] | |
| recommendations = parsed_result.get("recommendations", "") | |
| patient_summary_raw = ( | |
| parsed_result.get("patient_summary") | |
| or parsed_result.get("patient summary") | |
| or "" | |
| ) | |
| normalized_recs = normalize_recommendations(recommendations) | |
| normalized_summary = normalize_patient_summary(patient_summary_raw) | |
| assessment_result = { | |
| "eligible": parsed_result.get("eligible"), | |
| "recommendations": normalized_recs, | |
| "patient_summary": normalized_summary, | |
| } | |
| patient_data["assessment_result"] = assessment_result | |
| logger.info( | |
| "β Output normalized: recommendations=%d chars, patient_summary=%d chars", | |
| len(normalized_recs), | |
| len(normalized_summary), | |
| ) | |
| return patient_data | |
| # ============================================================================ | |
| # CHAIN ASSEMBLY | |
| # ============================================================================ | |
| def build_prompt_from_raw_text(raw_text: str) -> str: | |
| """ | |
| Construct the LLM prompt used when the user provides raw free-form text. | |
| The prompt embeds the SASLT guidelines and forces the LLM to emit the | |
| canonical JSON structure so downstream parsing succeeds. | |
| """ | |
| cleaned_text = raw_text.strip() | |
| prompt = f"""You are an expert hepatologist. Read the free-form clinical note | |
| below, infer the relevant HBV parameters, and determine treatment eligibility | |
| STRICTLY according to SASLT 2021 guidelines. | |
| RAW CLINICAL NOTE (verbatim): | |
| \"\"\"{cleaned_text}\"\"\" | |
| SASLT 2021 KEY EXCERPTS: | |
| {SASLT_GUIDELINES} | |
| REASONING & OUTPUT RULES: | |
| 1. First, internally extract the patient's HBV DNA, ALT, fibrosis, HBeAg, age, | |
| pregnancy status, immunosuppression, coinfections, family history, and other | |
| factors mentioned. | |
| 2. Apply the EXACT eligibility decision tree from the guidelines: absolute | |
| indications first (HIV coinfection, cirrhosis, immunosuppression, pregnancy | |
| with high viral load), then standard criteria (HBV DNA/ALT/fibrosis combos). | |
| 3. Return ONLY valid JSON (no markdown) using this schema: | |
| {{ | |
| "eligible": true or false, | |
| "recommendations": "Start with the clinical recommendation/decision, then the eligibility analysis, and end with a short discussion/rationale. Use \\n for new lines. Do NOT include the patient summary here.", | |
| "patient_summary": "3-5 bullet patient summary (age, sex, HBV DNA, ALT, fibrosis, immunosuppression, coinfections). Use \\n for new lines." | |
| }} | |
| 4. Recommendations order (must follow this sequence and DO NOT include editorial hints like '(start with this section)', '(keep second)', or '(place last)' in the output): | |
| - Clinical Recommendation section FIRST with *Decision:* ELIGIBLE/NOT ELIGIBLE, immediate plan, and monitoring/treatment steps. | |
| - Discussion/Rationale section SECOND with concise explanation and any caveats. | |
| - Eligibility Analysis section LAST referencing specific SASLT criteria with citations. | |
| 5. The patient_summary field must only contain the patient summary text (no clinical recommendation content). | |
| 6. The JSON MUST be the only output. Do not include explanations outside the JSON. | |
| Return the JSON object now.""" | |
| return prompt | |
| def create_hbv_assessment_chain(): | |
| """ | |
| Create the complete HBV Assessment LangChain Chain | |
| LLM is solely responsible for eligibility determination based on SASLT 2021 guidelines | |
| Returns: | |
| Runnable chain that processes patient data end-to-end | |
| """ | |
| logger.info("π Building HBV Assessment Chain...") | |
| # Phase 1: Input Validation & Preparation | |
| # Phase 2: LLM-Based Eligibility Determination & Assessment | |
| chain = ( | |
| RunnablePassthrough() | |
| | RunnableLambda(validate_and_clean_input) | |
| | RunnableLambda(assemble_llm_prompt) | |
| | RunnableLambda(invoke_llm_for_assessment) | |
| | RunnableLambda(parse_structured_output) | |
| | RunnableLambda(validate_eligibility_consistency) | |
| | RunnableLambda(normalize_output) | |
| ) | |
| logger.info("β Chain built successfully") | |
| return chain | |
| def create_hbv_assessment_chain_from_prompt(): | |
| """ | |
| Create an HBV Assessment Chain that starts from the LLM invocation step. | |
| This variant assumes that: | |
| - The caller already prepared the full prompt text in ``llm_prompt`` | |
| - No deterministic validation/normalization is required beforehand | |
| It reuses the same Phase 2 post-processing steps so the final output | |
| structure matches the standard assessment chain. | |
| """ | |
| logger.info("π Building HBV Assessment Chain (from prompt)...") | |
| chain = ( | |
| RunnablePassthrough() | |
| | RunnableLambda(invoke_llm_for_assessment) | |
| | RunnableLambda(parse_structured_output) | |
| | RunnableLambda(validate_eligibility_consistency) | |
| | RunnableLambda(normalize_output) | |
| ) | |
| logger.info("β Prompt-based chain built successfully") | |
| return chain | |
| def run_assessment_chain(patient_data: Dict[str, Any]) -> Dict[str, Any]: | |
| """ | |
| Execute the HBV Assessment Chain | |
| Args: | |
| patient_data: Patient data dictionary | |
| Returns: | |
| Assessment result with eligible and recommendations | |
| """ | |
| logger.info("=" * 80) | |
| logger.info("π STARTING HBV ASSESSMENT CHAIN") | |
| logger.info("=" * 80) | |
| try: | |
| chain = create_hbv_assessment_chain() | |
| result = chain.invoke(patient_data) | |
| assessment = result["assessment_result"] | |
| logger.info("=" * 80) | |
| logger.info("β CHAIN EXECUTION COMPLETE") | |
| logger.info("=" * 80) | |
| logger.info(f"Eligible: {assessment['eligible']}") | |
| logger.info( | |
| f"Recommendations length: {len(assessment['recommendations'])} characters" | |
| ) | |
| logger.info("=" * 80) | |
| return assessment | |
| except Exception as e: | |
| logger.error(f"β Chain execution failed: {str(e)}") | |
| logger.error("=" * 80) | |
| raise | |
| def run_assessment_chain_from_prompt(prompt_text: str) -> Dict[str, Any]: | |
| """ | |
| Execute the HBV Assessment Chain starting from a raw LLM prompt. | |
| This helper is intended for text-based assessment where the user provides | |
| full free-text input and we feed it directly as the LLM prompt, skipping | |
| all deterministic preprocessing nodes. | |
| Args: | |
| prompt_text: Full prompt text to send to the LLM. | |
| Returns: | |
| Assessment result with eligible and recommendations. | |
| """ | |
| logger.info("=" * 80) | |
| logger.info("π STARTING HBV ASSESSMENT CHAIN (FROM PROMPT)") | |
| logger.info("=" * 80) | |
| try: | |
| chain = create_hbv_assessment_chain_from_prompt() | |
| # The downstream nodes expect a dict with ``llm_prompt`` key | |
| initial_payload: Dict[str, Any] = {"llm_prompt": prompt_text} | |
| result = chain.invoke(initial_payload) | |
| assessment = result["assessment_result"] | |
| logger.info("=" * 80) | |
| logger.info("β PROMPT-BASED CHAIN EXECUTION COMPLETE") | |
| logger.info("=" * 80) | |
| logger.info(f"Eligible: {assessment['eligible']}") | |
| logger.info( | |
| f"Recommendations length: {len(assessment['recommendations'])} characters" | |
| ) | |
| logger.info("=" * 80) | |
| return assessment | |
| except Exception as e: | |
| logger.error(f"β Prompt-based chain execution failed: {str(e)}") | |
| logger.error("=" * 80) | |
| raise | |