CySecGuardians / header_analyzer.py
princemaxp's picture
Update header_analyzer.py
d76bad0 verified
raw
history blame
5.08 kB
import re
import difflib
import whois
from datetime import datetime
# Official brand domains (extend as needed)
BRAND_OFFICIAL = {
"paypal": ["paypal.com"],
"amazon": ["amazon.com"],
"google": ["google.com", "gmail.com"],
"microsoft": ["microsoft.com", "outlook.com", "live.com"],
"apple": ["apple.com"],
"flowtoscale": ["flowtoscale.com"], # Example from your case
}
# Suspicious / cheap TLDs often abused
SUSPICIOUS_TLDS = {"info", "xyz", "top", "click", "work", "loan", "tk"}
def get_domain_age_days(domain: str):
"""Return domain age in days (or None if lookup fails)."""
try:
w = whois.whois(domain)
creation_date = w.creation_date
if isinstance(creation_date, list): # sometimes returns list
creation_date = creation_date[0]
if creation_date:
return (datetime.now() - creation_date).days
except Exception:
return None
return None
def analyze_headers(headers, body=""):
"""
Input: headers dict, optional body text
Output: (findings: list[str], score: int)
"""
findings = []
score = 0
headers = headers or {}
auth_results = (headers.get("Authentication-Results") or headers.get("Authentication-results") or "").lower()
# Strict auth failures
if "dkim=fail" in auth_results or "dkim=permerror" in auth_results:
findings.append("Header: DKIM check failed")
score += 30
if "spf=fail" in auth_results:
findings.append("Header: SPF check failed")
score += 30
if "dmarc=fail" in auth_results:
findings.append("Header: DMARC check failed")
score += 30
# Softer auth problems
if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]):
findings.append("Header: SPF not properly aligned")
score += 10
if any(x in auth_results for x in ["dmarc=temperror", "dkim=temperror"]):
findings.append("Header: Temporary auth errors (DKIM/DMARC)")
score += 5
# From and Reply-To domain compare
from_addr = headers.get("From", "") or ""
reply_to = headers.get("Reply-To", "") or ""
from_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', from_addr)
reply_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', reply_to)
if from_domain_m and reply_domain_m:
from_domain = from_domain_m.group(1).lower()
reply_domain = reply_domain_m.group(1).lower()
if from_domain != reply_domain:
findings.append(f"Header: Reply-To domain mismatch (From: {from_domain}, Reply-To: {reply_domain})")
score += 20
else:
from_domain = from_domain_m.group(1).lower() if from_domain_m else ""
# Sender domain analysis
if from_domain:
parts = from_domain.split('.')
tld = parts[-1]
# free provider detection
if from_domain in ["gmail.com", "yahoo.com", "outlook.com", "hotmail.com"]:
findings.append(f"Header: Free email provider used ({from_domain})")
score += 8
# suspicious domain structure
if len(parts) > 4 or (parts and any(ch.isdigit() for ch in parts[0])):
findings.append(f"Header: Suspicious-looking domain structure ({from_domain})")
score += 15
# suspicious TLD
if tld in SUSPICIOUS_TLDS:
findings.append(f"Header: Suspicious/abused TLD used ({tld})")
score += 20
# Domain age check
age_days = get_domain_age_days(from_domain)
if age_days is not None and age_days < 90:
findings.append(f"Header: Domain {from_domain} is very new ({age_days} days old)")
score += 35
# brand-squatting / look-alike check
for brand, official_list in BRAND_OFFICIAL.items():
if brand in from_domain:
is_official = any(
from_domain.endswith("." + off) or from_domain == off
for off in official_list
)
if not is_official:
findings.append(f"Header: Domain contains brand '{brand}' but is not official ({from_domain})")
score += 30
# fuzzy look-alike
for legit in official_list:
ratio = difflib.SequenceMatcher(None, from_domain, legit).ratio()
if ratio > 0.7 and from_domain != legit:
findings.append(f"Header: Possible look-alike spoofing ({from_domain} vs {legit})")
score += 40
# Content-to-domain mismatch (organization spoofing)
if body and "ravenmail" in body.lower() and "ravenmail" not in from_domain:
findings.append("Header/Content: Possible spoofing — mentions RavenMail but sender domain is unrelated")
score += 40
# Bcc usage
if headers.get("Bcc") or headers.get("bcc"):
findings.append("Header: Email sent with BCC (common in mass phishing)")
score += 12
if not findings:
return ["No suspicious issues found in headers."], 0
return findings, score