Spaces:
Running
Running
File size: 2,773 Bytes
d9f5d53 bedf234 d9f5d53 bedf234 d9f5d53 bedf234 d9f5d53 bedf234 d9f5d53 bedf234 d9f5d53 bedf234 d9f5d53 bedf234 d9f5d53 bedf234 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import requests
import os
import re
HF_API_KEY = os.getenv("HF_API_KEY") # Hugging Face free account
HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}
MODELS = {
"ai_detector": "roberta-base-openai-detector",
"sentiment": "finiteautomata/bertweet-base-sentiment-analysis",
"spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
}
# Suspicious patterns to look for
SUSPICIOUS_PATTERNS = [
r"verify your account",
r"urgent action",
r"click here",
r"reset (your )?password",
r"confirm (your )?identity",
r"bank account",
r"invoice",
r"payment (required|overdue|failed|method expired)",
r"unauthorized login",
r"compromised",
r"final reminder",
r"account (suspended|deactivated|locked)",
r"update your (information|details|billing)",
r"legal action",
]
def query_hf(model, text):
url = f"https://api-inference.huggingface.co/models/{model}"
try:
res = requests.post(url, headers=HF_HEADERS, json={"inputs": text[:1000]})
return res.json()
except Exception:
return None
def analyze_body(text):
findings = []
score = 0
body_lower = text.lower()
# --- 1. Suspicious keyword detection ---
for pattern in SUSPICIOUS_PATTERNS:
matches = re.findall(pattern, body_lower)
for match in matches:
findings.append(f"Suspicious phrase detected: \"{match}\"")
score += 20 # weight for suspicious phrase
# --- 2. AI-generated text detection ---
result = query_hf(MODELS["ai_detector"], text)
if result and isinstance(result, list) and len(result) > 0:
label = result[0]["label"]
confidence = result[0]["score"]
findings.append(f"Body: AI Detector β {label} (confidence {confidence:.2f})")
# No score impact yet (just informational)
# --- 3. Sentiment analysis ---
result = query_hf(MODELS["sentiment"], text)
if result and isinstance(result, list) and len(result) > 0:
label = result[0]["label"]
confidence = result[0]["score"]
findings.append(f"Body: Sentiment β {label} (confidence {confidence:.2f})")
if label.lower() in ["negative"]:
score += 10 # negative/urgent tone adds risk
# --- 4. Spam vs Ham detection ---
result = query_hf(MODELS["spam"], text)
if result and isinstance(result, list) and len(result) > 0:
label = result[0]["label"]
confidence = result[0]["score"]
findings.append(f"Body: Spam Detector β {label} (confidence {confidence:.2f})")
if label.lower() == "spam":
score += 20 # spam classification increases risk
if not findings:
return ["No suspicious content detected in body."], 0
return findings, score
|