princemaxp commited on
Commit
4fb4f18
Β·
verified Β·
1 Parent(s): 55b41f6

Update body_analyzer.py

Browse files
Files changed (1) hide show
  1. body_analyzer.py +19 -12
body_analyzer.py CHANGED
@@ -2,8 +2,8 @@ import requests
2
  import os
3
  import re
4
 
5
- HF_API_KEY = os.getenv("HF_API_KEY") # Hugging Face free account
6
- HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"}
7
 
8
  MODELS = {
9
  "ai_detector": "roberta-base-openai-detector",
@@ -11,7 +11,6 @@ MODELS = {
11
  "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
12
  }
13
 
14
- # Suspicious patterns to look for
15
  SUSPICIOUS_PATTERNS = [
16
  r"verify your account",
17
  r"urgent action",
@@ -30,9 +29,14 @@ SUSPICIOUS_PATTERNS = [
30
  ]
31
 
32
  def query_hf(model, text):
33
- url = f"https://api-inference.huggingface.co/models/{model}"
 
34
  try:
35
- res = requests.post(url, headers=HF_HEADERS, json={"inputs": text[:1000]})
 
 
 
 
36
  return res.json()
37
  except Exception:
38
  return None
@@ -41,13 +45,17 @@ def analyze_body(text):
41
  findings = []
42
  score = 0
43
  body_lower = text.lower()
 
44
 
45
  # --- 1. Suspicious keyword detection ---
46
  for pattern in SUSPICIOUS_PATTERNS:
47
  matches = re.findall(pattern, body_lower)
48
  for match in matches:
49
  findings.append(f"Suspicious phrase detected: \"{match}\"")
50
- score += 20 # weight for suspicious phrase
 
 
 
51
 
52
  # --- 2. AI-generated text detection ---
53
  result = query_hf(MODELS["ai_detector"], text)
@@ -55,7 +63,6 @@ def analyze_body(text):
55
  label = result[0]["label"]
56
  confidence = result[0]["score"]
57
  findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")
58
- # No score impact yet (just informational)
59
 
60
  # --- 3. Sentiment analysis ---
61
  result = query_hf(MODELS["sentiment"], text)
@@ -63,8 +70,8 @@ def analyze_body(text):
63
  label = result[0]["label"]
64
  confidence = result[0]["score"]
65
  findings.append(f"Body: Sentiment β†’ {label} (confidence {confidence:.2f})")
66
- if label.lower() in ["negative"]:
67
- score += 10 # negative/urgent tone adds risk
68
 
69
  # --- 4. Spam vs Ham detection ---
70
  result = query_hf(MODELS["spam"], text)
@@ -73,9 +80,9 @@ def analyze_body(text):
73
  confidence = result[0]["score"]
74
  findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
75
  if label.lower() == "spam":
76
- score += 20 # spam classification increases risk
77
 
78
  if not findings:
79
- return ["No suspicious content detected in body."], 0
80
 
81
- return findings, score
 
2
  import os
3
  import re
4
 
5
+ HF_API_KEY = os.getenv("HF_API_KEY")
6
+ HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} if HF_API_KEY else {}
7
 
8
  MODELS = {
9
  "ai_detector": "roberta-base-openai-detector",
 
11
  "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
12
  }
13
 
 
14
  SUSPICIOUS_PATTERNS = [
15
  r"verify your account",
16
  r"urgent action",
 
29
  ]
30
 
31
  def query_hf(model, text):
32
+ if not HF_API_KEY:
33
+ return None
34
  try:
35
+ res = requests.post(
36
+ f"https://api-inference.huggingface.co/models/{model}",
37
+ headers=HF_HEADERS,
38
+ json={"inputs": text[:1000]},
39
+ )
40
  return res.json()
41
  except Exception:
42
  return None
 
45
  findings = []
46
  score = 0
47
  body_lower = text.lower()
48
+ highlighted_body = text
49
 
50
  # --- 1. Suspicious keyword detection ---
51
  for pattern in SUSPICIOUS_PATTERNS:
52
  matches = re.findall(pattern, body_lower)
53
  for match in matches:
54
  findings.append(f"Suspicious phrase detected: \"{match}\"")
55
+ score += 20
56
+ highlighted_body = re.sub(
57
+ match, f"<mark>{match}</mark>", highlighted_body, flags=re.IGNORECASE
58
+ )
59
 
60
  # --- 2. AI-generated text detection ---
61
  result = query_hf(MODELS["ai_detector"], text)
 
63
  label = result[0]["label"]
64
  confidence = result[0]["score"]
65
  findings.append(f"Body: AI Detector β†’ {label} (confidence {confidence:.2f})")
 
66
 
67
  # --- 3. Sentiment analysis ---
68
  result = query_hf(MODELS["sentiment"], text)
 
70
  label = result[0]["label"]
71
  confidence = result[0]["score"]
72
  findings.append(f"Body: Sentiment β†’ {label} (confidence {confidence:.2f})")
73
+ if label.lower() == "negative":
74
+ score += 10
75
 
76
  # --- 4. Spam vs Ham detection ---
77
  result = query_hf(MODELS["spam"], text)
 
80
  confidence = result[0]["score"]
81
  findings.append(f"Body: Spam Detector β†’ {label} (confidence {confidence:.2f})")
82
  if label.lower() == "spam":
83
+ score += 20
84
 
85
  if not findings:
86
+ return ["No suspicious content detected in body."], 0, text
87
 
88
+ return findings, score, highlighted_body