Spaces:

princemaxp
/

CySecGuardians

Sleeping

App Files Files Community

princemaxp commited on Sep 5

Commit

a235ecf

verified ·

1 Parent(s): abe135d

Update body_analyzer.py

Browse files

Files changed (1) hide show

body_analyzer.py +39 -28

body_analyzer.py CHANGED Viewed

@@ -2,7 +2,6 @@ import requests
 import os
 import re
-# --- HuggingFace API setup ---
 HF_API_KEY = os.getenv("HF_API_KEY")
 HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} if HF_API_KEY else {}
@@ -12,7 +11,6 @@ MODELS = {
     "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
 }
-# --- Suspicious keyword patterns ---
 SUSPICIOUS_PATTERNS = [
     r"verify your account",
     r"urgent action",
@@ -41,7 +39,6 @@ SUSPICIOUS_PATTERNS = [
     r"risk-free",
 ]
-# --- Helper: query HuggingFace model ---
 def query_hf(model, text):
     if not HF_API_KEY:
         return None
@@ -50,61 +47,75 @@ def query_hf(model, text):
             f"https://api-inference.huggingface.co/models/{model}",
             headers=HF_HEADERS,
             json={"inputs": text[:1000]},
         )
         return res.json()
     except Exception:
         return None
-# --- Main body analyzer ---
 def analyze_body(text):
     findings = []
     score = 0
-    body_lower = text.lower()
-    highlighted_body = text
-    # --- 1. Suspicious keyword detection ---
     for pattern in SUSPICIOUS_PATTERNS:
         matches = re.findall(pattern, body_lower)
         for match in matches:
-            findings.append(f"Suspicious phrase detected: \"{match}\"")
-            score += 25  # stronger weighting
             highlighted_body = re.sub(
-                match, f"<mark>{match}</mark>", highlighted_body, flags=re.IGNORECASE
             )
-    # --- 2. URL detection ---
     urls = re.findall(r'https?://[^\s]+', body_lower)
     for url in urls:
         findings.append(f"Suspicious URL detected: {url}")
         score += 10
-        highlighted_body = re.sub(url, f"<mark>{url}</mark>", highlighted_body, flags=re.IGNORECASE)
-    # --- 3. AI-generated text detection ---
-    result = query_hf(MODELS["ai_detector"], text)
-    if result and isinstance(result, list) and len(result) > 0:
-        label = result[0]["label"]
-        confidence = result[0]["score"]
         findings.append(f"Body: AI Detector → {label} (confidence {confidence:.2f})")
-    # --- 4. Sentiment analysis ---
-    result = query_hf(MODELS["sentiment"], text)
-    if result and isinstance(result, list) and len(result) > 0:
-        label = result[0]["label"]
-        confidence = result[0]["score"]
         findings.append(f"Body: Sentiment → {label} (confidence {confidence:.2f})")
         if label.lower() == "negative":
             score += 10
-    # --- 5. Spam vs Ham detection ---
-    result = query_hf(MODELS["spam"], text)
-    if result and isinstance(result, list) and len(result) > 0:
-        label = result[0]["label"]
-        confidence = result[0]["score"]
         findings.append(f"Body: Spam Detector → {label} (confidence {confidence:.2f})")
         if label.lower() == "spam":
             score += 25
-    # --- 6. Final verdict based on score ---
     if score >= 50:
         verdict = "Malicious / Spam"
     elif score >= 20:

 import os
 import re
 HF_API_KEY = os.getenv("HF_API_KEY")
 HF_HEADERS = {"Authorization": f"Bearer {HF_API_KEY}"} if HF_API_KEY else {}
     "spam": "mrm8488/bert-tiny-finetuned-sms-spam-detection",
 }
 SUSPICIOUS_PATTERNS = [
     r"verify your account",
     r"urgent action",
     r"risk-free",
 ]
 def query_hf(model, text):
     if not HF_API_KEY:
         return None
             f"https://api-inference.huggingface.co/models/{model}",
             headers=HF_HEADERS,
             json={"inputs": text[:1000]},
+            timeout=15,
         )
         return res.json()
     except Exception:
         return None
+def parse_hf_result(result):
+    # Common shapes: [{"label": "...", "score": ...}] or {"labels":[...], "scores":[...]}
+    if not result:
+        return None, None
+    if isinstance(result, list) and result and isinstance(result[0], dict):
+        if "label" in result[0] and "score" in result[0]:
+            return result[0]["label"], result[0]["score"]
+    if isinstance(result, dict):
+        labels = result.get("labels") or []
+        scores = result.get("scores") or []
+        if labels and scores:
+            return labels[0], scores[0]
+    return None, None
 def analyze_body(text):
     findings = []
     score = 0
+    body_lower = (text or "").lower()
+    highlighted_body = text or ""
+    # 1) Suspicious phrases
     for pattern in SUSPICIOUS_PATTERNS:
         matches = re.findall(pattern, body_lower)
         for match in matches:
+            display = match if isinstance(match, str) else (match[0] if match else "")
+            if not display:
+                continue
+            findings.append(f'Suspicious phrase detected: "{display}"')
+            score += 15  # tuned down to reduce instant Malicious
             highlighted_body = re.sub(
+                re.escape(display),
+                f"<mark>{display}</mark>",
+                highlighted_body,
+                flags=re.IGNORECASE,
             )
+    # 2) URLs
     urls = re.findall(r'https?://[^\s]+', body_lower)
     for url in urls:
         findings.append(f"Suspicious URL detected: {url}")
         score += 10
+        highlighted_body = re.sub(re.escape(url), f"<mark>{url}</mark>", highlighted_body, flags=re.IGNORECASE)
+    # 3) AI text detector
+    label, confidence = parse_hf_result(query_hf(MODELS["ai_detector"], text or ""))
+    if label:
         findings.append(f"Body: AI Detector → {label} (confidence {confidence:.2f})")
+    # 4) Sentiment
+    label, confidence = parse_hf_result(query_hf(MODELS["sentiment"], text or ""))
+    if label:
         findings.append(f"Body: Sentiment → {label} (confidence {confidence:.2f})")
         if label.lower() == "negative":
             score += 10
+    # 5) Spam detector
+    label, confidence = parse_hf_result(query_hf(MODELS["spam"], text or ""))
+    if label:
         findings.append(f"Body: Spam Detector → {label} (confidence {confidence:.2f})")
         if label.lower() == "spam":
             score += 25
+    # 6) Verdict
     if score >= 50:
         verdict = "Malicious / Spam"
     elif score >= 20: