Spaces:

princemaxp
/

CySecGuardians

Sleeping

App Files Files Community

princemaxp commited on Sep 5

Commit

90ca431

verified ·

1 Parent(s): 15cd72a

Update body_analyzer.py

Browse files

Files changed (1) hide show

body_analyzer.py +37 -8

body_analyzer.py CHANGED Viewed

@@ -11,6 +11,7 @@ MODELS = {
     "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
 }
 SUSPICIOUS_PATTERNS = [
     r"verify your account",
     r"urgent action",
@@ -28,6 +29,21 @@ SUSPICIOUS_PATTERNS = [
     r"legal action",
 ]
 def query_hf(model, text):
     if not HF_API_KEY:
         return None
@@ -52,19 +68,26 @@ def analyze_body(text):
         matches = re.findall(pattern, body_lower)
         for match in matches:
             findings.append(f"Suspicious phrase detected: \"{match}\"")
-            score += 20
             highlighted_body = re.sub(
                 match, f"<mark>{match}</mark>", highlighted_body, flags=re.IGNORECASE
             )
-    # --- 2. AI-generated text detection ---
     result = query_hf(MODELS["ai_detector"], text)
     if result and isinstance(result, list) and len(result) > 0:
         label = result[0]["label"]
         confidence = result[0]["score"]
         findings.append(f"Body: AI Detector → {label} (confidence {confidence:.2f})")
-    # --- 3. Sentiment analysis ---
     result = query_hf(MODELS["sentiment"], text)
     if result and isinstance(result, list) and len(result) > 0:
         label = result[0]["label"]
@@ -73,16 +96,22 @@ def analyze_body(text):
         if label.lower() == "negative":
             score += 10
-    # --- 4. Spam vs Ham detection ---
     result = query_hf(MODELS["spam"], text)
     if result and isinstance(result, list) and len(result) > 0:
         label = result[0]["label"]
         confidence = result[0]["score"]
         findings.append(f"Body: Spam Detector → {label} (confidence {confidence:.2f})")
         if label.lower() == "spam":
-            score += 20
-    if not findings:
-        return ["No suspicious content detected in body."], 0, text
-    return findings, score, highlighted_body

     "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
 }
+# Base suspicious patterns
 SUSPICIOUS_PATTERNS = [
     r"verify your account",
     r"urgent action",
     r"legal action",
 ]
+# Extended spammy / phishing keywords
+SUSPICIOUS_PATTERNS += [
+    r"free trial",
+    r"limited time offer",
+    r"click below",
+    r"winner",
+    r"congratulations",
+    r"urgent response",
+    r"claim your prize",
+    r"act now",
+    r"unsubscribe",
+    r"lottery",
+    r"risk-free",
+]
 def query_hf(model, text):
     if not HF_API_KEY:
         return None
         matches = re.findall(pattern, body_lower)
         for match in matches:
             findings.append(f"Suspicious phrase detected: \"{match}\"")
+            score += 25  # stronger weighting
             highlighted_body = re.sub(
                 match, f"<mark>{match}</mark>", highlighted_body, flags=re.IGNORECASE
             )
+    # --- 2. URL detection ---
+    urls = re.findall(r'https?://[^\s]+', body_lower)
+    for url in urls:
+        findings.append(f"Suspicious URL detected: {url}")
+        score += 10
+        highlighted_body = re.sub(url, f"<mark>{url}</mark>", highlighted_body, flags=re.IGNORECASE)
+    # --- 3. AI-generated text detection ---
     result = query_hf(MODELS["ai_detector"], text)
     if result and isinstance(result, list) and len(result) > 0:
         label = result[0]["label"]
         confidence = result[0]["score"]
         findings.append(f"Body: AI Detector → {label} (confidence {confidence:.2f})")
+    # --- 4. Sentiment analysis ---
     result = query_hf(MODELS["sentiment"], text)
     if result and isinstance(result, list) and len(result) > 0:
         label = result[0]["label"]
         if label.lower() == "negative":
             score += 10
+    # --- 5. Spam vs Ham detection ---
     result = query_hf(MODELS["spam"], text)
     if result and isinstance(result, list) and len(result) > 0:
         label = result[0]["label"]
         confidence = result[0]["score"]
         findings.append(f"Body: Spam Detector → {label} (confidence {confidence:.2f})")
         if label.lower() == "spam":
+            score += 25
+    # --- 6. Final verdict based on score ---
+    if score >= 50:
+        verdict = "Malicious / Spam"
+    elif score >= 20:
+        verdict = "Suspicious"
+    else:
+        verdict = "Safe"
+        findings.append("No suspicious content detected in body.")
+    return findings, score, highlighted_body, verdict