File size: 2,773 Bytes
d9f5d53
 
bedf234
d9f5d53
 
 
 
 
 
 
 
 
 
bedf234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9f5d53
 
bedf234
 
 
 
 
d9f5d53
 
 
bedf234
 
d9f5d53
bedf234
 
 
 
 
 
d9f5d53
bedf234
 
 
 
 
 
 
d9f5d53
bedf234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9f5d53
bedf234
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import requests
import os
import re

HF_API_KEY = os.getenv("HF_API_KEY")  # Hugging Face free account
HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}

MODELS = {
    "ai_detector": "roberta-base-openai-detector",
    "sentiment": "finiteautomata/bertweet-base-sentiment-analysis",
    "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
}

# Suspicious patterns to look for
SUSPICIOUS_PATTERNS = [
    r"verify your account",
    r"urgent action",
    r"click here",
    r"reset (your )?password",
    r"confirm (your )?identity",
    r"bank account",
    r"invoice",
    r"payment (required|overdue|failed|method expired)",
    r"unauthorized login",
    r"compromised",
    r"final reminder",
    r"account (suspended|deactivated|locked)",
    r"update your (information|details|billing)",
    r"legal action",
]

def query_hf(model, text):
    url = f"https://api-inference.huggingface.co/models/{model}"
    try:
        res = requests.post(url, headers=HF_HEADERS, json={"inputs": text[:1000]})
        return res.json()
    except Exception:
        return None

def analyze_body(text):
    findings = []
    score = 0
    body_lower = text.lower()

    # --- 1. Suspicious keyword detection ---
    for pattern in SUSPICIOUS_PATTERNS:
        matches = re.findall(pattern, body_lower)
        for match in matches:
            findings.append(f"Suspicious phrase detected: \"{match}\"")
            score += 20  # weight for suspicious phrase

    # --- 2. AI-generated text detection ---
    result = query_hf(MODELS["ai_detector"], text)
    if result and isinstance(result, list) and len(result) > 0:
        label = result[0]["label"]
        confidence = result[0]["score"]
        findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")
        # No score impact yet (just informational)

    # --- 3. Sentiment analysis ---
    result = query_hf(MODELS["sentiment"], text)
    if result and isinstance(result, list) and len(result) > 0:
        label = result[0]["label"]
        confidence = result[0]["score"]
        findings.append(f"Body: Sentiment β†’ {label} (confidence {confidence:.2f})")
        if label.lower() in ["negative"]:
            score += 10  # negative/urgent tone adds risk

    # --- 4. Spam vs Ham detection ---
    result = query_hf(MODELS["spam"], text)
    if result and isinstance(result, list) and len(result) > 0:
        label = result[0]["label"]
        confidence = result[0]["score"]
        findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
        if label.lower() == "spam":
            score += 20  # spam classification increases risk

    if not findings:
        return ["No suspicious content detected in body."], 0

    return findings, score