Spaces:

princemaxp
/

CySecGuardians

Sleeping

App Files Files Community

princemaxp commited on Sep 5, 2025

Commit

da36e3d

verified ·

1 Parent(s): f88bfb7

Update header_analyzer.py

Browse files

Files changed (1) hide show

header_analyzer.py +51 -22

header_analyzer.py CHANGED Viewed

@@ -1,23 +1,39 @@
 import re
 def analyze_headers(headers):
     findings = []
     score = 0
     headers = headers or {}
-    # 1) SPF / DKIM / DMARC
-    auth_results = (headers.get("Authentication-Results") or "").lower()
     if "dkim=fail" in auth_results or "dkim=permerror" in auth_results:
         findings.append("Header: DKIM check failed")
-        score += 25
     if "spf=fail" in auth_results:
         findings.append("Header: SPF check failed")
-        score += 25
     if "dmarc=fail" in auth_results:
         findings.append("Header: DMARC check failed")
-        score += 25
-    # softer signals
     if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]):
         findings.append("Header: SPF not properly aligned")
         score += 10
@@ -25,33 +41,46 @@ def analyze_headers(headers):
         findings.append("Header: Temporary auth errors (DKIM/DMARC)")
         score += 5
-    # 2) From / Reply-To mismatch (domain-level)
     from_addr = headers.get("From", "") or ""
     reply_to = headers.get("Reply-To", "") or ""
-    from_domain = re.search(r'@([a-zA-Z0-9.-]+)', from_addr)
-    reply_domain = re.search(r'@([a-zA-Z0-9.-]+)', reply_to)
-    if from_domain and reply_domain:
-        if from_domain.group(1).lower() != reply_domain.group(1).lower():
-            findings.append(f"Header: Reply-To domain mismatch (From: {from_domain.group(1)}, Reply-To: {reply_domain.group(1)})")
             score += 20
-    # 3) Suspicious sender domain
-    sender = headers.get("From", "") or ""
     match = re.search(r'@([a-zA-Z0-9.-]+)', sender)
     if match:
         domain = match.group(1).lower()
-        parts = domain.split(".")
-        if any(free in domain for free in ["gmail.com", "yahoo.com", "outlook.com"]):
             findings.append(f"Header: Free email provider used ({domain})")
-            score += 10
-        if len(parts) > 4 or (parts and any(char.isdigit() for char in parts[0])):
-            findings.append(f"Header: Suspicious-looking domain ({domain})")
             score += 15
-    # 4) BCC usage (note: often stripped)
-    if headers.get("Bcc"):
         findings.append("Header: Email sent with BCC (common in mass phishing)")
-        score += 15
     if not findings:
         return ["No suspicious issues found in headers."], 0

+# header_analyzer.py
 import re
+# Simple brand list and their official domains for brand-squatting detection
+BRAND_OFFICIAL = {
+    "paypal": ["paypal.com"],
+    "amazon": ["amazon.com"],
+    "google": ["google.com", "gmail.com"],
+    "microsoft": ["microsoft.com", "outlook.com", "live.com"],
+    "apple": ["apple.com"],
+    "bank": [],  # generic bank keyword detection (no official domain)
+}
 def analyze_headers(headers):
+    """
+    Input: headers dict
+    Output: (findings: list[str], score: int)
+    """
     findings = []
     score = 0
     headers = headers or {}
+    auth_results = (headers.get("Authentication-Results") or headers.get("Authentication-results") or "").lower()
+    # Strict auth failures
     if "dkim=fail" in auth_results or "dkim=permerror" in auth_results:
         findings.append("Header: DKIM check failed")
+        score += 30
     if "spf=fail" in auth_results:
         findings.append("Header: SPF check failed")
+        score += 30
     if "dmarc=fail" in auth_results:
         findings.append("Header: DMARC check failed")
+        score += 30
+    # Softer auth problems
     if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]):
         findings.append("Header: SPF not properly aligned")
         score += 10
         findings.append("Header: Temporary auth errors (DKIM/DMARC)")
         score += 5
+    # From and Reply-To domain compare (domain-level)
     from_addr = headers.get("From", "") or ""
     reply_to = headers.get("Reply-To", "") or ""
+    from_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', from_addr)
+    reply_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', reply_to)
+    if from_domain_m and reply_domain_m:
+        from_domain = from_domain_m.group(1).lower()
+        reply_domain = reply_domain_m.group(1).lower()
+        if from_domain != reply_domain:
+            findings.append(f"Header: Reply-To domain mismatch (From: {from_domain}, Reply-To: {reply_domain})")
             score += 20
+    # Suspicious looking sender domain & brand-squatting detection
+    sender = from_addr or ""
     match = re.search(r'@([a-zA-Z0-9.-]+)', sender)
     if match:
         domain = match.group(1).lower()
+        parts = domain.split('.')
+        # free provider detection
+        if any(free in domain for free in ["gmail.com", "yahoo.com", "outlook.com", "hotmail.com"]):
             findings.append(f"Header: Free email provider used ({domain})")
+            score += 8
+        # suspicious structural domain heuristics
+        if len(parts) > 4 or (parts and any(ch.isdigit() for ch in parts[0])):
+            findings.append(f"Header: Suspicious-looking domain structure ({domain})")
             score += 15
+        # brand-squatting: if domain contains a known brand but isn't exactly an official brand domain
+        for brand, official_list in BRAND_OFFICIAL.items():
+            if brand in domain:
+                # check if domain is exactly an official domain or subdomain of official
+                is_official = any(domain.endswith("." + off) or domain == off for off in official_list) if official_list else False
+                if not is_official:
+                    findings.append(f"Header: Domain contains brand '{brand}' but is not official ({domain})")
+                    score += 30
+    # Bcc usage detection
+    if headers.get("Bcc") or headers.get("bcc"):
         findings.append("Header: Email sent with BCC (common in mass phishing)")
+        score += 12
     if not findings:
         return ["No suspicious issues found in headers."], 0