import pandas as pd import requests import json import re from typing import Optional, List import time def parse_hbsag_duration(hbsag_value: str) -> int: """Extract duration in months from HBsAg status string.""" if pd.isna(hbsag_value): return 6 # Default to 6 months if not specified # Extract number and unit from strings like "Positive (36 months)" or "Positive (10 years)" match = re.search(r'\((\d+)\s*(months?|years?)\)', str(hbsag_value)) if match: value = int(match.group(1)) unit = match.group(2).lower() return value * 12 if 'year' in unit else value # If just "Positive" with no duration, default to 6 months return 6 def parse_status(value: str) -> str: """Parse status values to 'Positive' or 'Negative' (exact capitalization required).""" if pd.isna(value): return "Negative" val_lower = str(value).lower() if 'positive' in val_lower: return "Positive" elif 'negative' in val_lower: return "Negative" return "Negative" def parse_sex(value: str) -> str: """Parse sex to 'Male' or 'Female' (exact capitalization required).""" if pd.isna(value): return "Male" val_lower = str(value).lower() if val_lower in ['m', 'male']: return "Male" elif val_lower in ['f', 'female']: return "Female" return "Male" def parse_pregnancy_status(sex: str, value: str) -> str: """Parse pregnancy status to 'Not pregnant' or 'Pregnant' (exact capitalization required).""" if sex == "Male": return "Not pregnant" if pd.isna(value): return "Not pregnant" val_lower = str(value).lower() if 'yes' in val_lower or 'pregnant' in val_lower: return "Pregnant" return "Not pregnant" def parse_boolean(value: str) -> bool: """Parse Yes/No values to boolean.""" if pd.isna(value): return False val_lower = str(value).lower() return 'yes' in val_lower def parse_fibrosis_stage(value: str) -> str: """Extract fibrosis stage - must be 'F0-F1', 'F2-F3', or 'F4'.""" if pd.isna(value) or value == "N/A": return "F0" val_str = str(value).upper() # Map to discrete METAVIR stages only if 'F4' in val_str or 'CIRRHOSIS' in val_str: return "F4" if 'F3' in val_str: return "F3" if 'F2' in val_str: return "F2" if 'F1' in val_str: return "F1" if 'F0' in val_str: return "F0" # Handle ranges by choosing higher severity within the range if 'F0-F1' in val_str: return "F1" if 'F1-F2' in val_str: return "F2" if 'F2-F3' in val_str: return "F3" return "F0" def parse_necroinflammation(value: str) -> str: """Parse necroinflammation activity to one of 'A0','A1','A2','A3','A4'.""" if pd.isna(value) or str(value).strip().lower() == "none": return "A0" val_str = str(value).strip().upper() # Explicit textual mappings if 'MARKED' in val_str: return "A4" if 'SEVERE' in val_str: return "A3" if 'MODERATE' in val_str: return "A2" if 'MILD' in val_str: return "A1" if 'MINIMAL' in val_str: return "A0" # Direct A-codes if already provided if 'A4' in val_str: return "A4" if 'A3' in val_str: return "A3" if 'A2' in val_str: return "A2" if 'A1' in val_str: return "A1" if 'A0' in val_str: return "A0" # Default to mild if unclear return "A1" def parse_immunosuppression(value: str) -> str: """Parse immunosuppression therapy status - must be 'None', 'Chemotherapy', or 'Other'.""" if pd.isna(value) or str(value).lower() == "none": return "None" val_lower = str(value).lower() if 'chemo' in val_lower: return "Chemotherapy" elif 'none' in val_lower: return "None" else: return "Other" def parse_coinfections(value: str) -> List[str]: """Parse coinfections - must be from list: HIV, HCV, HDV.""" if pd.isna(value) or str(value).lower() == "none": return [] coinfections = [] val_upper = str(value).upper() if 'HCV' in val_upper: coinfections.append("HCV") if 'HIV' in val_upper: coinfections.append("HIV") if 'HDV' in val_upper: coinfections.append("HDV") return coinfections def parse_comorbidities(value: str) -> Optional[List[str]]: """Parse other comorbidities.""" if pd.isna(value) or str(value).lower() == "none": return None return [str(value)] def create_api_payload(row: pd.Series) -> dict: """Create API request payload from CSV row.""" sex = parse_sex(row['Sex']) return { "sex": sex, "age": int(row['Age']), "pregnancy_status": parse_pregnancy_status(sex, row['Pregnancy Status']), "hbsag_status": parse_status(row['HBsAg']), "duration_hbsag_months": parse_hbsag_duration(row['HBsAg']), "hbv_dna_level": float(row['HBV DNA (IU/mL)']), "hbeag_status": parse_status(row['HBeAg']), "alt_level": float(row['ALT (U/L)']), "fibrosis_stage": parse_fibrosis_stage(row['Fibrosis/Cirrhosis Stage']), "necroinflammatory_activity": parse_necroinflammation(row['Necroinflammation']), "extrahepatic_manifestations": parse_boolean(row['Extrahepatic Manifestations']), "immunosuppression_status": parse_immunosuppression(row['Immunosuppressive Therapy']), "coinfections": parse_coinfections(row['Coinfections']), "family_history_cirrhosis_hcc": parse_boolean(row['Family History of HCC/Cirrhosis']), "other_comorbidities": parse_comorbidities(row['Comorbidities']) } def assess_case(payload: dict, api_url: str) -> dict: """Call the API to assess eligibility.""" try: response = requests.post( api_url, json=payload, headers={'Content-Type': 'application/json'}, timeout=30 ) response.raise_for_status() return response.json() except requests.exceptions.HTTPError as e: # Try to get detailed error message try: error_detail = response.json() print(f"API Error Details: {json.dumps(error_detail, indent=2)}") except: print(f"API Error: {e}") return {"eligible": None, "recommendations": f"Error: {str(e)}"} except requests.exceptions.RequestException as e: print(f"API Error: {e}") return {"eligible": None, "recommendations": f"Error: {str(e)}"} def main(): # Configuration input_file = r"D:\Work\HBV AI Assistant\HBV_Eligibility_TestCases- Model Answer VS Test.xlsx" output_file = "HBV_Eligibility_Results.csv" api_url = "https://moazx-hbv-ai-assistant.hf.space/assess" # Read Excel print(f"Reading {input_file}...") df = pd.read_excel(input_file) # Add columns for results df['Eligibility-20/11'] = '' df['recommendations-20/11'] = '' # Process each case print(f"\nProcessing {len(df)} cases...") for idx, row in df.iterrows(): case_id = row['Case ID'] print(f"\nProcessing {case_id}...") # Create payload payload = create_api_payload(row) print(f"Payload: {json.dumps(payload, indent=2)}") # Call API result = assess_case(payload, api_url) print(f"Result: {result}") # Update dataframe df.at[idx, 'Eligibility-20/11'] = ( 'eligible' if result.get('eligible') is True else ( 'not eligible' if result.get('eligible') is False else '' )) df.at[idx, 'recommendations-20/11'] = result.get('recommendations', '') # Small delay between requests time.sleep(1.0) # Save results print(f"\nSaving results to {output_file}...") df.to_csv(output_file, index=False) print("Done!") # Print summary eligible_count = (df['Eligibility-20/11'] == 'eligible').sum() if 'Eligibility-20/11' in df.columns else 0 print(f"\nSummary:") print(f"Total cases: {len(df)}") print(f"Eligible: {eligible_count}") print(f"Not eligible: {len(df) - eligible_count}") if __name__ == "__main__": main()