import requests import os import re HF_API_KEY = os.getenv("HF_API_KEY") # Hugging Face free account HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} MODELS = { "ai_detector": "roberta-base-openai-detector", "sentiment": "finiteautomata/bertweet-base-sentiment-analysis", "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection", } # Suspicious patterns to look for SUSPICIOUS_PATTERNS = [ r"verify your account", r"urgent action", r"click here", r"reset (your )?password", r"confirm (your )?identity", r"bank account", r"invoice", r"payment (required|overdue|failed|method expired)", r"unauthorized login", r"compromised", r"final reminder", r"account (suspended|deactivated|locked)", r"update your (information|details|billing)", r"legal action", ] def query_hf(model, text): url = f"https://api-inference.huggingface.co/models/{model}" try: res = requests.post(url, headers=HF_HEADERS, json={"inputs": text[:1000]}) return res.json() except Exception: return None def analyze_body(text): findings = [] score = 0 body_lower = text.lower() # --- 1. Suspicious keyword detection --- for pattern in SUSPICIOUS_PATTERNS: matches = re.findall(pattern, body_lower) for match in matches: findings.append(f"Suspicious phrase detected: \"{match}\"") score += 20 # weight for suspicious phrase # --- 2. AI-generated text detection --- result = query_hf(MODELS["ai_detector"], text) if result and isinstance(result, list) and len(result) > 0: label = result[0]["label"] confidence = result[0]["score"] findings.append(f"Body: AI Detector → {label} (confidence {confidence:.2f})") # No score impact yet (just informational) # --- 3. Sentiment analysis --- result = query_hf(MODELS["sentiment"], text) if result and isinstance(result, list) and len(result) > 0: label = result[0]["label"] confidence = result[0]["score"] findings.append(f"Body: Sentiment → {label} (confidence {confidence:.2f})") if label.lower() in ["negative"]: score += 10 # negative/urgent tone adds risk # --- 4. Spam vs Ham detection --- result = query_hf(MODELS["spam"], text) if result and isinstance(result, list) and len(result) > 0: label = result[0]["label"] confidence = result[0]["score"] findings.append(f"Body: Spam Detector → {label} (confidence {confidence:.2f})") if label.lower() == "spam": score += 20 # spam classification increases risk if not findings: return ["No suspicious content detected in body."], 0 return findings, score