Spaces:
Sleeping
Sleeping
File size: 4,007 Bytes
d9f5d53 bedf234 d9f5d53 4fb4f18 d9f5d53 bedf234 90ca431 d9f5d53 4fb4f18 bedf234 4fb4f18 a235ecf 4fb4f18 bedf234 d9f5d53 a235ecf d9f5d53 bedf234 a235ecf d9f5d53 a235ecf bedf234 a235ecf 4fb4f18 a235ecf 4fb4f18 d9f5d53 a235ecf 90ca431 a235ecf 90ca431 a235ecf bedf234 d9f5d53 a235ecf bedf234 4fb4f18 bedf234 a235ecf bedf234 90ca431 bedf234 a235ecf 90ca431 d9f5d53 90ca431 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
import requests
import os
import re
HF_API_KEY = os.getenv("HF_API_KEY")
HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} if HF_API_KEY else {}
MODELS = {
"ai_detector": "roberta-base-openai-detector",
"sentiment": "finiteautomata/bertweet-base-sentiment-analysis",
"spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
}
SUSPICIOUS_PATTERNS = [
r"verify your account",
r"urgent action",
r"click here",
r"reset (your )?password",
r"confirm (your )?identity",
r"bank account",
r"invoice",
r"payment (required|overdue|failed|method expired)",
r"unauthorized login",
r"compromised",
r"final reminder",
r"account (suspended|deactivated|locked)",
r"update your (information|details|billing)",
r"legal action",
r"free trial",
r"limited time offer",
r"click below",
r"winner",
r"congratulations",
r"urgent response",
r"claim your prize",
r"act now",
r"unsubscribe",
r"lottery",
r"risk-free",
]
def query_hf(model, text):
if not HF_API_KEY:
return None
try:
res = requests.post(
f"https://api-inference.huggingface.co/models/{model}",
headers=HF_HEADERS,
json={"inputs": text[:1000]},
timeout=15,
)
return res.json()
except Exception:
return None
def parse_hf_result(result):
# Common shapes: [{"label": "...", "score": ...}] or {"labels":[...], "scores":[...]}
if not result:
return None, None
if isinstance(result, list) and result and isinstance(result[0], dict):
if "label" in result[0] and "score" in result[0]:
return result[0]["label"], result[0]["score"]
if isinstance(result, dict):
labels = result.get("labels") or []
scores = result.get("scores") or []
if labels and scores:
return labels[0], scores[0]
return None, None
def analyze_body(text):
findings = []
score = 0
body_lower = (text or "").lower()
highlighted_body = text or ""
# 1) Suspicious phrases
for pattern in SUSPICIOUS_PATTERNS:
matches = re.findall(pattern, body_lower)
for match in matches:
display = match if isinstance(match, str) else (match[0] if match else "")
if not display:
continue
findings.append(f'Suspicious phrase detected: "{display}"')
score += 15 # tuned down to reduce instant Malicious
highlighted_body = re.sub(
re.escape(display),
f"<mark>{display}</mark>",
highlighted_body,
flags=re.IGNORECASE,
)
# 2) URLs
urls = re.findall(r'https?://[^\s]+', body_lower)
for url in urls:
findings.append(f"Suspicious URL detected: {url}")
score += 10
highlighted_body = re.sub(re.escape(url), f"<mark>{url}</mark>", highlighted_body, flags=re.IGNORECASE)
# 3) AI text detector
label, confidence = parse_hf_result(query_hf(MODELS["ai_detector"], text or ""))
if label:
findings.append(f"Body: AI Detector β {label} (confidence {confidence:.2f})")
# 4) Sentiment
label, confidence = parse_hf_result(query_hf(MODELS["sentiment"], text or ""))
if label:
findings.append(f"Body: Sentiment β {label} (confidence {confidence:.2f})")
if label.lower() == "negative":
score += 10
# 5) Spam detector
label, confidence = parse_hf_result(query_hf(MODELS["spam"], text or ""))
if label:
findings.append(f"Body: Spam Detector β {label} (confidence {confidence:.2f})")
if label.lower() == "spam":
score += 25
# 6) Verdict
if score >= 50:
verdict = "Malicious / Spam"
elif score >= 20:
verdict = "Suspicious"
else:
verdict = "Safe"
findings.append("No suspicious content detected in body.")
return findings, score, highlighted_body, verdict
|