File size: 4,007 Bytes
d9f5d53
 
bedf234
d9f5d53
4fb4f18
 
d9f5d53
 
 
 
 
 
 
bedf234
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90ca431
 
 
 
 
 
 
 
 
 
 
 
 
d9f5d53
4fb4f18
 
bedf234
4fb4f18
 
 
 
a235ecf
4fb4f18
bedf234
 
 
d9f5d53
a235ecf
 
 
 
 
 
 
 
 
 
 
 
 
 
d9f5d53
 
bedf234
a235ecf
 
d9f5d53
a235ecf
bedf234
 
 
a235ecf
 
 
 
 
4fb4f18
a235ecf
 
 
 
4fb4f18
d9f5d53
a235ecf
90ca431
 
 
 
a235ecf
90ca431
a235ecf
 
 
bedf234
d9f5d53
a235ecf
 
 
bedf234
4fb4f18
 
bedf234
a235ecf
 
 
bedf234
 
90ca431
bedf234
a235ecf
90ca431
 
 
 
 
 
 
d9f5d53
90ca431
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import requests
import os
import re

HF_API_KEY = os.getenv("HF_API_KEY")
HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} if HF_API_KEY else {}

MODELS = {
    "ai_detector": "roberta-base-openai-detector",
    "sentiment": "finiteautomata/bertweet-base-sentiment-analysis",
    "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
}

SUSPICIOUS_PATTERNS = [
    r"verify your account",
    r"urgent action",
    r"click here",
    r"reset (your )?password",
    r"confirm (your )?identity",
    r"bank account",
    r"invoice",
    r"payment (required|overdue|failed|method expired)",
    r"unauthorized login",
    r"compromised",
    r"final reminder",
    r"account (suspended|deactivated|locked)",
    r"update your (information|details|billing)",
    r"legal action",
    r"free trial",
    r"limited time offer",
    r"click below",
    r"winner",
    r"congratulations",
    r"urgent response",
    r"claim your prize",
    r"act now",
    r"unsubscribe",
    r"lottery",
    r"risk-free",
]

def query_hf(model, text):
    if not HF_API_KEY:
        return None
    try:
        res = requests.post(
            f"https://api-inference.huggingface.co/models/{model}",
            headers=HF_HEADERS,
            json={"inputs": text[:1000]},
            timeout=15,
        )
        return res.json()
    except Exception:
        return None

def parse_hf_result(result):
    # Common shapes: [{"label": "...", "score": ...}] or {"labels":[...], "scores":[...]}
    if not result:
        return None, None
    if isinstance(result, list) and result and isinstance(result[0], dict):
        if "label" in result[0] and "score" in result[0]:
            return result[0]["label"], result[0]["score"]
    if isinstance(result, dict):
        labels = result.get("labels") or []
        scores = result.get("scores") or []
        if labels and scores:
            return labels[0], scores[0]
    return None, None

def analyze_body(text):
    findings = []
    score = 0
    body_lower = (text or "").lower()
    highlighted_body = text or ""

    # 1) Suspicious phrases
    for pattern in SUSPICIOUS_PATTERNS:
        matches = re.findall(pattern, body_lower)
        for match in matches:
            display = match if isinstance(match, str) else (match[0] if match else "")
            if not display:
                continue
            findings.append(f'Suspicious phrase detected: "{display}"')
            score += 15  # tuned down to reduce instant Malicious
            highlighted_body = re.sub(
                re.escape(display),
                f"<mark>{display}</mark>",
                highlighted_body,
                flags=re.IGNORECASE,
            )

    # 2) URLs
    urls = re.findall(r'https?://[^\s]+', body_lower)
    for url in urls:
        findings.append(f"Suspicious URL detected: {url}")
        score += 10
        highlighted_body = re.sub(re.escape(url), f"<mark>{url}</mark>", highlighted_body, flags=re.IGNORECASE)

    # 3) AI text detector
    label, confidence = parse_hf_result(query_hf(MODELS["ai_detector"], text or ""))
    if label:
        findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")

    # 4) Sentiment
    label, confidence = parse_hf_result(query_hf(MODELS["sentiment"], text or ""))
    if label:
        findings.append(f"Body: Sentiment β†’ {label} (confidence {confidence:.2f})")
        if label.lower() == "negative":
            score += 10

    # 5) Spam detector
    label, confidence = parse_hf_result(query_hf(MODELS["spam"], text or ""))
    if label:
        findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
        if label.lower() == "spam":
            score += 25

    # 6) Verdict
    if score >= 50:
        verdict = "Malicious / Spam"
    elif score >= 20:
        verdict = "Suspicious"
    else:
        verdict = "Safe"
        findings.append("No suspicious content detected in body.")

    return findings, score, highlighted_body, verdict