princemaxp commited on
Commit
a235ecf
Β·
verified Β·
1 Parent(s): abe135d

Update body_analyzer.py

Browse files
Files changed (1) hide show
  1. body_analyzer.py +39 -28
body_analyzer.py CHANGED
@@ -2,7 +2,6 @@ import requests
2
  import os
3
  import re
4
 
5
- # --- HuggingFace API setup ---
6
  HF_API_KEY = os.getenv("HF_API_KEY")
7
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} if HF_API_KEY else {}
8
 
@@ -12,7 +11,6 @@ MODELS = {
12
  "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
13
  }
14
 
15
- # --- Suspicious keyword patterns ---
16
  SUSPICIOUS_PATTERNS = [
17
  r"verify your account",
18
  r"urgent action",
@@ -41,7 +39,6 @@ SUSPICIOUS_PATTERNS = [
41
  r"risk-free",
42
  ]
43
 
44
- # --- Helper: query HuggingFace model ---
45
  def query_hf(model, text):
46
  if not HF_API_KEY:
47
  return None
@@ -50,61 +47,75 @@ def query_hf(model, text):
50
  f"https://api-inference.huggingface.co/models/{model}",
51
  headers=HF_HEADERS,
52
  json={"inputs": text[:1000]},
 
53
  )
54
  return res.json()
55
  except Exception:
56
  return None
57
 
58
- # --- Main body analyzer ---
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  def analyze_body(text):
60
  findings = []
61
  score = 0
62
- body_lower = text.lower()
63
- highlighted_body = text
64
 
65
- # --- 1. Suspicious keyword detection ---
66
  for pattern in SUSPICIOUS_PATTERNS:
67
  matches = re.findall(pattern, body_lower)
68
  for match in matches:
69
- findings.append(f"Suspicious phrase detected: \"{match}\"")
70
- score += 25 # stronger weighting
 
 
 
71
  highlighted_body = re.sub(
72
- match, f"<mark>{match}</mark>", highlighted_body, flags=re.IGNORECASE
 
 
 
73
  )
74
 
75
- # --- 2. URL detection ---
76
  urls = re.findall(r'https?://[^\s]+', body_lower)
77
  for url in urls:
78
  findings.append(f"Suspicious URL detected: {url}")
79
  score += 10
80
- highlighted_body = re.sub(url, f"<mark>{url}</mark>", highlighted_body, flags=re.IGNORECASE)
81
 
82
- # --- 3. AI-generated text detection ---
83
- result = query_hf(MODELS["ai_detector"], text)
84
- if result and isinstance(result, list) and len(result) > 0:
85
- label = result[0]["label"]
86
- confidence = result[0]["score"]
87
  findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")
88
 
89
- # --- 4. Sentiment analysis ---
90
- result = query_hf(MODELS["sentiment"], text)
91
- if result and isinstance(result, list) and len(result) > 0:
92
- label = result[0]["label"]
93
- confidence = result[0]["score"]
94
  findings.append(f"Body: Sentiment β†’ {label} (confidence {confidence:.2f})")
95
  if label.lower() == "negative":
96
  score += 10
97
 
98
- # --- 5. Spam vs Ham detection ---
99
- result = query_hf(MODELS["spam"], text)
100
- if result and isinstance(result, list) and len(result) > 0:
101
- label = result[0]["label"]
102
- confidence = result[0]["score"]
103
  findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
104
  if label.lower() == "spam":
105
  score += 25
106
 
107
- # --- 6. Final verdict based on score ---
108
  if score >= 50:
109
  verdict = "Malicious / Spam"
110
  elif score >= 20:
 
2
  import os
3
  import re
4
 
 
5
  HF_API_KEY = os.getenv("HF_API_KEY")
6
  HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} if HF_API_KEY else {}
7
 
 
11
  "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
12
  }
13
 
 
14
  SUSPICIOUS_PATTERNS = [
15
  r"verify your account",
16
  r"urgent action",
 
39
  r"risk-free",
40
  ]
41
 
 
42
  def query_hf(model, text):
43
  if not HF_API_KEY:
44
  return None
 
47
  f"https://api-inference.huggingface.co/models/{model}",
48
  headers=HF_HEADERS,
49
  json={"inputs": text[:1000]},
50
+ timeout=15,
51
  )
52
  return res.json()
53
  except Exception:
54
  return None
55
 
56
+ def parse_hf_result(result):
57
+ # Common shapes: [{"label": "...", "score": ...}] or {"labels":[...], "scores":[...]}
58
+ if not result:
59
+ return None, None
60
+ if isinstance(result, list) and result and isinstance(result[0], dict):
61
+ if "label" in result[0] and "score" in result[0]:
62
+ return result[0]["label"], result[0]["score"]
63
+ if isinstance(result, dict):
64
+ labels = result.get("labels") or []
65
+ scores = result.get("scores") or []
66
+ if labels and scores:
67
+ return labels[0], scores[0]
68
+ return None, None
69
+
70
  def analyze_body(text):
71
  findings = []
72
  score = 0
73
+ body_lower = (text or "").lower()
74
+ highlighted_body = text or ""
75
 
76
+ # 1) Suspicious phrases
77
  for pattern in SUSPICIOUS_PATTERNS:
78
  matches = re.findall(pattern, body_lower)
79
  for match in matches:
80
+ display = match if isinstance(match, str) else (match[0] if match else "")
81
+ if not display:
82
+ continue
83
+ findings.append(f'Suspicious phrase detected: "{display}"')
84
+ score += 15 # tuned down to reduce instant Malicious
85
  highlighted_body = re.sub(
86
+ re.escape(display),
87
+ f"<mark>{display}</mark>",
88
+ highlighted_body,
89
+ flags=re.IGNORECASE,
90
  )
91
 
92
+ # 2) URLs
93
  urls = re.findall(r'https?://[^\s]+', body_lower)
94
  for url in urls:
95
  findings.append(f"Suspicious URL detected: {url}")
96
  score += 10
97
+ highlighted_body = re.sub(re.escape(url), f"<mark>{url}</mark>", highlighted_body, flags=re.IGNORECASE)
98
 
99
+ # 3) AI text detector
100
+ label, confidence = parse_hf_result(query_hf(MODELS["ai_detector"], text or ""))
101
+ if label:
 
 
102
  findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")
103
 
104
+ # 4) Sentiment
105
+ label, confidence = parse_hf_result(query_hf(MODELS["sentiment"], text or ""))
106
+ if label:
 
 
107
  findings.append(f"Body: Sentiment β†’ {label} (confidence {confidence:.2f})")
108
  if label.lower() == "negative":
109
  score += 10
110
 
111
+ # 5) Spam detector
112
+ label, confidence = parse_hf_result(query_hf(MODELS["spam"], text or ""))
113
+ if label:
 
 
114
  findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
115
  if label.lower() == "spam":
116
  score += 25
117
 
118
+ # 6) Verdict
119
  if score >= 50:
120
  verdict = "Malicious / Spam"
121
  elif score >= 20: