princemaxp commited on
Commit
bedf234
Β·
verified Β·
1 Parent(s): 0e4c3b7

Update body_analyzer.py

Browse files
Files changed (1) hide show
  1. body_analyzer.py +60 -24
body_analyzer.py CHANGED
@@ -1,5 +1,6 @@
1
  import requests
2
  import os
 
3
 
4
  HF_API_KEY = os.getenv("HF_API_KEY") # Hugging Face free account
5
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}
@@ -10,36 +11,71 @@ MODELS = {
10
  "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
11
  }
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def query_hf(model, text):
14
  url = f"https://api-inference.huggingface.co/models/{model}"
15
- res = requests.post(url, headers=HF_HEADERS, json={"inputs": text[:1000]})
16
- return res.json()
 
 
 
17
 
18
  def analyze_body(text):
19
  findings = []
 
 
20
 
21
- # 1. AI-generated detection
22
- try:
23
- result = query_hf(MODELS["ai_detector"], text)
24
- if isinstance(result, list):
25
- findings.append(f"Body: AI Detector β†’ {result[0]['label']} (confidence {result[0]['score']:.2f})")
26
- except:
27
- findings.append("Body: AI detection failed")
28
 
29
- # 2. Sentiment / Tone
30
- try:
31
- result = query_hf(MODELS["sentiment"], text)
32
- if isinstance(result, list):
33
- findings.append(f"Body: Sentiment β†’ {result[0]['label']} (confidence {result[0]['score']:.2f})")
34
- except:
35
- findings.append("Body: Sentiment analysis failed")
36
 
37
- # 3. Spam vs Ham
38
- try:
39
- result = query_hf(MODELS["spam"], text)
40
- if isinstance(result, list):
41
- findings.append(f"Body: Spam Detector β†’ {result[0]['label']} (confidence {result[0]['score']:.2f})")
42
- except:
43
- findings.append("Body: Spam detection failed")
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- return findings
 
1
  import requests
2
  import os
3
+ import re
4
 
5
  HF_API_KEY = os.getenv("HF_API_KEY") # Hugging Face free account
6
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}
 
11
  "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
12
  }
13
 
14
+ # Suspicious patterns to look for
15
+ SUSPICIOUS_PATTERNS = [
16
+ r"verify your account",
17
+ r"urgent action",
18
+ r"click here",
19
+ r"reset (your )?password",
20
+ r"confirm (your )?identity",
21
+ r"bank account",
22
+ r"invoice",
23
+ r"payment (required|overdue|failed|method expired)",
24
+ r"unauthorized login",
25
+ r"compromised",
26
+ r"final reminder",
27
+ r"account (suspended|deactivated|locked)",
28
+ r"update your (information|details|billing)",
29
+ r"legal action",
30
+ ]
31
+
32
  def query_hf(model, text):
33
  url = f"https://api-inference.huggingface.co/models/{model}"
34
+ try:
35
+ res = requests.post(url, headers=HF_HEADERS, json={"inputs": text[:1000]})
36
+ return res.json()
37
+ except Exception:
38
+ return None
39
 
40
  def analyze_body(text):
41
  findings = []
42
+ score = 0
43
+ body_lower = text.lower()
44
 
45
+ # --- 1. Suspicious keyword detection ---
46
+ for pattern in SUSPICIOUS_PATTERNS:
47
+ matches = re.findall(pattern, body_lower)
48
+ for match in matches:
49
+ findings.append(f"Suspicious phrase detected: \"{match}\"")
50
+ score += 20 # weight for suspicious phrase
 
51
 
52
+ # --- 2. AI-generated text detection ---
53
+ result = query_hf(MODELS["ai_detector"], text)
54
+ if result and isinstance(result, list) and len(result) > 0:
55
+ label = result[0]["label"]
56
+ confidence = result[0]["score"]
57
+ findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")
58
+ # No score impact yet (just informational)
59
 
60
+ # --- 3. Sentiment analysis ---
61
+ result = query_hf(MODELS["sentiment"], text)
62
+ if result and isinstance(result, list) and len(result) > 0:
63
+ label = result[0]["label"]
64
+ confidence = result[0]["score"]
65
+ findings.append(f"Body: Sentiment β†’ {label} (confidence {confidence:.2f})")
66
+ if label.lower() in ["negative"]:
67
+ score += 10 # negative/urgent tone adds risk
68
+
69
+ # --- 4. Spam vs Ham detection ---
70
+ result = query_hf(MODELS["spam"], text)
71
+ if result and isinstance(result, list) and len(result) > 0:
72
+ label = result[0]["label"]
73
+ confidence = result[0]["score"]
74
+ findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
75
+ if label.lower() == "spam":
76
+ score += 20 # spam classification increases risk
77
+
78
+ if not findings:
79
+ return ["No suspicious content detected in body."], 0
80
 
81
+ return findings, score