princemaxp commited on
Commit
90ca431
Β·
verified Β·
1 Parent(s): 15cd72a

Update body_analyzer.py

Browse files
Files changed (1) hide show
  1. body_analyzer.py +37 -8
body_analyzer.py CHANGED
@@ -11,6 +11,7 @@ MODELS = {
11
  "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
12
  }
13
 
 
14
  SUSPICIOUS_PATTERNS = [
15
  r"verify your account",
16
  r"urgent action",
@@ -28,6 +29,21 @@ SUSPICIOUS_PATTERNS = [
28
  r"legal action",
29
  ]
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def query_hf(model, text):
32
  if not HF_API_KEY:
33
  return None
@@ -52,19 +68,26 @@ def analyze_body(text):
52
  matches = re.findall(pattern, body_lower)
53
  for match in matches:
54
  findings.append(f"Suspicious phrase detected: \"{match}\"")
55
- score += 20
56
  highlighted_body = re.sub(
57
  match, f"<mark>{match}</mark>", highlighted_body, flags=re.IGNORECASE
58
  )
59
 
60
- # --- 2. AI-generated text detection ---
 
 
 
 
 
 
 
61
  result = query_hf(MODELS["ai_detector"], text)
62
  if result and isinstance(result, list) and len(result) > 0:
63
  label = result[0]["label"]
64
  confidence = result[0]["score"]
65
  findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")
66
 
67
- # --- 3. Sentiment analysis ---
68
  result = query_hf(MODELS["sentiment"], text)
69
  if result and isinstance(result, list) and len(result) > 0:
70
  label = result[0]["label"]
@@ -73,16 +96,22 @@ def analyze_body(text):
73
  if label.lower() == "negative":
74
  score += 10
75
 
76
- # --- 4. Spam vs Ham detection ---
77
  result = query_hf(MODELS["spam"], text)
78
  if result and isinstance(result, list) and len(result) > 0:
79
  label = result[0]["label"]
80
  confidence = result[0]["score"]
81
  findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
82
  if label.lower() == "spam":
83
- score += 20
84
 
85
- if not findings:
86
- return ["No suspicious content detected in body."], 0, text
 
 
 
 
 
 
87
 
88
- return findings, score, highlighted_body
 
11
  "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
12
  }
13
 
14
+ # Base suspicious patterns
15
  SUSPICIOUS_PATTERNS = [
16
  r"verify your account",
17
  r"urgent action",
 
29
  r"legal action",
30
  ]
31
 
32
+ # Extended spammy / phishing keywords
33
+ SUSPICIOUS_PATTERNS += [
34
+ r"free trial",
35
+ r"limited time offer",
36
+ r"click below",
37
+ r"winner",
38
+ r"congratulations",
39
+ r"urgent response",
40
+ r"claim your prize",
41
+ r"act now",
42
+ r"unsubscribe",
43
+ r"lottery",
44
+ r"risk-free",
45
+ ]
46
+
47
  def query_hf(model, text):
48
  if not HF_API_KEY:
49
  return None
 
68
  matches = re.findall(pattern, body_lower)
69
  for match in matches:
70
  findings.append(f"Suspicious phrase detected: \"{match}\"")
71
+ score += 25 # stronger weighting
72
  highlighted_body = re.sub(
73
  match, f"<mark>{match}</mark>", highlighted_body, flags=re.IGNORECASE
74
  )
75
 
76
+ # --- 2. URL detection ---
77
+ urls = re.findall(r'https?://[^\s]+', body_lower)
78
+ for url in urls:
79
+ findings.append(f"Suspicious URL detected: {url}")
80
+ score += 10
81
+ highlighted_body = re.sub(url, f"<mark>{url}</mark>", highlighted_body, flags=re.IGNORECASE)
82
+
83
+ # --- 3. AI-generated text detection ---
84
  result = query_hf(MODELS["ai_detector"], text)
85
  if result and isinstance(result, list) and len(result) > 0:
86
  label = result[0]["label"]
87
  confidence = result[0]["score"]
88
  findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")
89
 
90
+ # --- 4. Sentiment analysis ---
91
  result = query_hf(MODELS["sentiment"], text)
92
  if result and isinstance(result, list) and len(result) > 0:
93
  label = result[0]["label"]
 
96
  if label.lower() == "negative":
97
  score += 10
98
 
99
+ # --- 5. Spam vs Ham detection ---
100
  result = query_hf(MODELS["spam"], text)
101
  if result and isinstance(result, list) and len(result) > 0:
102
  label = result[0]["label"]
103
  confidence = result[0]["score"]
104
  findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
105
  if label.lower() == "spam":
106
+ score += 25
107
 
108
+ # --- 6. Final verdict based on score ---
109
+ if score >= 50:
110
+ verdict = "Malicious / Spam"
111
+ elif score >= 20:
112
+ verdict = "Suspicious"
113
+ else:
114
+ verdict = "Safe"
115
+ findings.append("No suspicious content detected in body.")
116
 
117
+ return findings, score, highlighted_body, verdict