Spaces:

moazx
/

HBV_AI_Assistant

Sleeping

File size: 8,364 Bytes

import pandas as pd
import requests
import json
import re
from typing import Optional, List
import time


def parse_hbsag_duration(hbsag_value: str) -> int:
    """Extract duration in months from HBsAg status string."""
    if pd.isna(hbsag_value):
        return 6  # Default to 6 months if not specified
    
    # Extract number and unit from strings like "Positive (36 months)" or "Positive (10 years)"
    match = re.search(r'\((\d+)\s*(months?|years?)\)', str(hbsag_value))
    if match:
        value = int(match.group(1))
        unit = match.group(2).lower()
        return value * 12 if 'year' in unit else value
    
    # If just "Positive" with no duration, default to 6 months
    return 6


def parse_status(value: str) -> str:
    """Parse status values to 'Positive' or 'Negative' (exact capitalization required)."""
    if pd.isna(value):
        return "Negative"
    val_lower = str(value).lower()
    if 'positive' in val_lower:
        return "Positive"
    elif 'negative' in val_lower:
        return "Negative"
    return "Negative"


def parse_sex(value: str) -> str:
    """Parse sex to 'Male' or 'Female' (exact capitalization required)."""
    if pd.isna(value):
        return "Male"
    val_lower = str(value).lower()
    if val_lower in ['m', 'male']:
        return "Male"
    elif val_lower in ['f', 'female']:
        return "Female"
    return "Male"


def parse_pregnancy_status(sex: str, value: str) -> str:
    """Parse pregnancy status to 'Not pregnant' or 'Pregnant' (exact capitalization required)."""
    if sex == "Male":
        return "Not pregnant"
    if pd.isna(value):
        return "Not pregnant"
    val_lower = str(value).lower()
    if 'yes' in val_lower or 'pregnant' in val_lower:
        return "Pregnant"
    return "Not pregnant"


def parse_boolean(value: str) -> bool:
    """Parse Yes/No values to boolean."""
    if pd.isna(value):
        return False
    val_lower = str(value).lower()
    return 'yes' in val_lower


def parse_fibrosis_stage(value: str) -> str:
    """Extract fibrosis stage - must be 'F0-F1', 'F2-F3', or 'F4'."""
    if pd.isna(value) or value == "N/A":
        return "F0"
    
    val_str = str(value).upper()
    
    # Map to discrete METAVIR stages only
    if 'F4' in val_str or 'CIRRHOSIS' in val_str:
        return "F4"
    if 'F3' in val_str:
        return "F3"
    if 'F2' in val_str:
        return "F2"
    if 'F1' in val_str:
        return "F1"
    if 'F0' in val_str:
        return "F0"

    # Handle ranges by choosing higher severity within the range
    if 'F0-F1' in val_str:
        return "F1"
    if 'F1-F2' in val_str:
        return "F2"
    if 'F2-F3' in val_str:
        return "F3"

    return "F0"


def parse_necroinflammation(value: str) -> str:
    """Parse necroinflammation activity to one of 'A0','A1','A2','A3','A4'."""
    if pd.isna(value) or str(value).strip().lower() == "none":
        return "A0"

    val_str = str(value).strip().upper()

    # Explicit textual mappings
    if 'MARKED' in val_str:
        return "A4"
    if 'SEVERE' in val_str:
        return "A3"
    if 'MODERATE' in val_str:
        return "A2"
    if 'MILD' in val_str:
        return "A1"
    if 'MINIMAL' in val_str:
        return "A0"

    # Direct A-codes if already provided
    if 'A4' in val_str:
        return "A4"
    if 'A3' in val_str:
        return "A3"
    if 'A2' in val_str:
        return "A2"
    if 'A1' in val_str:
        return "A1"
    if 'A0' in val_str:
        return "A0"

    # Default to mild if unclear
    return "A1"


def parse_immunosuppression(value: str) -> str:
    """Parse immunosuppression therapy status - must be 'None', 'Chemotherapy', or 'Other'."""
    if pd.isna(value) or str(value).lower() == "none":
        return "None"
    
    val_lower = str(value).lower()
    if 'chemo' in val_lower:
        return "Chemotherapy"
    elif 'none' in val_lower:
        return "None"
    else:
        return "Other"


def parse_coinfections(value: str) -> List[str]:
    """Parse coinfections - must be from list: HIV, HCV, HDV."""
    if pd.isna(value) or str(value).lower() == "none":
        return []
    
    coinfections = []
    val_upper = str(value).upper()
    
    if 'HCV' in val_upper:
        coinfections.append("HCV")
    if 'HIV' in val_upper:
        coinfections.append("HIV")
    if 'HDV' in val_upper:
        coinfections.append("HDV")
    
    return coinfections


def parse_comorbidities(value: str) -> Optional[List[str]]:
    """Parse other comorbidities."""
    if pd.isna(value) or str(value).lower() == "none":
        return None
    return [str(value)]


def create_api_payload(row: pd.Series) -> dict:
    """Create API request payload from CSV row."""
    sex = parse_sex(row['Sex'])
    
    return {
        "sex": sex,
        "age": int(row['Age']),
        "pregnancy_status": parse_pregnancy_status(sex, row['Pregnancy Status']),
        "hbsag_status": parse_status(row['HBsAg']),
        "duration_hbsag_months": parse_hbsag_duration(row['HBsAg']),
        "hbv_dna_level": float(row['HBV DNA (IU/mL)']),
        "hbeag_status": parse_status(row['HBeAg']),
        "alt_level": float(row['ALT (U/L)']),
        "fibrosis_stage": parse_fibrosis_stage(row['Fibrosis/Cirrhosis Stage']),
        "necroinflammatory_activity": parse_necroinflammation(row['Necroinflammation']),
        "extrahepatic_manifestations": parse_boolean(row['Extrahepatic Manifestations']),
        "immunosuppression_status": parse_immunosuppression(row['Immunosuppressive Therapy']),
        "coinfections": parse_coinfections(row['Coinfections']),
        "family_history_cirrhosis_hcc": parse_boolean(row['Family History of HCC/Cirrhosis']),
        "other_comorbidities": parse_comorbidities(row['Comorbidities'])
    }


def assess_case(payload: dict, api_url: str) -> dict:
    """Call the API to assess eligibility."""
    try:
        response = requests.post(
            api_url,
            json=payload,
            headers={'Content-Type': 'application/json'},
            timeout=30
        )
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as e:
        # Try to get detailed error message
        try:
            error_detail = response.json()
            print(f"API Error Details: {json.dumps(error_detail, indent=2)}")
        except:
            print(f"API Error: {e}")
        return {"eligible": None, "recommendations": f"Error: {str(e)}"}
    except requests.exceptions.RequestException as e:
        print(f"API Error: {e}")
        return {"eligible": None, "recommendations": f"Error: {str(e)}"}


def main():
    # Configuration
    input_file = r"D:\Work\HBV AI Assistant\HBV_Eligibility_TestCases- Model Answer VS Test.xlsx"
    output_file = "HBV_Eligibility_Results.csv"
    api_url = "https://moazx-hbv-ai-assistant.hf.space/assess"
    
    # Read Excel
    print(f"Reading {input_file}...")
    df = pd.read_excel(input_file)
    
    # Add columns for results
    df['Eligibility-20/11'] = ''
    df['recommendations-20/11'] = ''
    
    # Process each case
    print(f"\nProcessing {len(df)} cases...")
    for idx, row in df.iterrows():
        case_id = row['Case ID']
        print(f"\nProcessing {case_id}...")
        
        # Create payload
        payload = create_api_payload(row)
        print(f"Payload: {json.dumps(payload, indent=2)}")
        
        # Call API
        result = assess_case(payload, api_url)
        print(f"Result: {result}")
        
        # Update dataframe
        df.at[idx, 'Eligibility-20/11'] = (
            'eligible' if result.get('eligible') is True else (
            'not eligible' if result.get('eligible') is False else ''
        ))
        df.at[idx, 'recommendations-20/11'] = result.get('recommendations', '')
        
        # Small delay between requests
        time.sleep(1.0)
    
    # Save results
    print(f"\nSaving results to {output_file}...")
    df.to_csv(output_file, index=False)
    print("Done!")
    
    # Print summary
    eligible_count = (df['Eligibility-20/11'] == 'eligible').sum() if 'Eligibility-20/11' in df.columns else 0
    print(f"\nSummary:")
    print(f"Total cases: {len(df)}")
    print(f"Eligible: {eligible_count}")
    print(f"Not eligible: {len(df) - eligible_count}")


if __name__ == "__main__":
    main()