Spaces:

princemaxp
/

CySecGuardians

Sleeping

App Files Files Community

CySecGuardians / header_analyzer.py

princemaxp

Update header_analyzer.py

d76bad0 verified 3 months ago

raw

history blame

5.08 kB

	import re
	import difflib
	import whois
	from datetime import datetime

	# Official brand domains (extend as needed)
	BRAND_OFFICIAL = {
	"paypal": ["paypal.com"],
	"amazon": ["amazon.com"],
	"google": ["google.com", "gmail.com"],
	"microsoft": ["microsoft.com", "outlook.com", "live.com"],
	"apple": ["apple.com"],
	"flowtoscale": ["flowtoscale.com"], # Example from your case
	}

	# Suspicious / cheap TLDs often abused
	SUSPICIOUS_TLDS = {"info", "xyz", "top", "click", "work", "loan", "tk"}

	def get_domain_age_days(domain: str):
	"""Return domain age in days (or None if lookup fails)."""
	try:
	w = whois.whois(domain)
	creation_date = w.creation_date
	if isinstance(creation_date, list): # sometimes returns list
	creation_date = creation_date[0]
	if creation_date:
	return (datetime.now() - creation_date).days
	except Exception:
	return None
	return None

	def analyze_headers(headers, body=""):
	"""
	Input: headers dict, optional body text
	Output: (findings: list[str], score: int)
	"""
	findings = []
	score = 0
	headers = headers or {}

	auth_results = (headers.get("Authentication-Results") or headers.get("Authentication-results") or "").lower()

	# Strict auth failures
	if "dkim=fail" in auth_results or "dkim=permerror" in auth_results:
	findings.append("Header: DKIM check failed")
	score += 30
	if "spf=fail" in auth_results:
	findings.append("Header: SPF check failed")
	score += 30
	if "dmarc=fail" in auth_results:
	findings.append("Header: DMARC check failed")
	score += 30

	# Softer auth problems
	if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]):
	findings.append("Header: SPF not properly aligned")
	score += 10
	if any(x in auth_results for x in ["dmarc=temperror", "dkim=temperror"]):
	findings.append("Header: Temporary auth errors (DKIM/DMARC)")
	score += 5

	# From and Reply-To domain compare
	from_addr = headers.get("From", "") or ""
	reply_to = headers.get("Reply-To", "") or ""
	from_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', from_addr)
	reply_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', reply_to)
	if from_domain_m and reply_domain_m:
	from_domain = from_domain_m.group(1).lower()
	reply_domain = reply_domain_m.group(1).lower()
	if from_domain != reply_domain:
	findings.append(f"Header: Reply-To domain mismatch (From: {from_domain}, Reply-To: {reply_domain})")
	score += 20
	else:
	from_domain = from_domain_m.group(1).lower() if from_domain_m else ""

	# Sender domain analysis
	if from_domain:
	parts = from_domain.split('.')
	tld = parts[-1]

	# free provider detection
	if from_domain in ["gmail.com", "yahoo.com", "outlook.com", "hotmail.com"]:
	findings.append(f"Header: Free email provider used ({from_domain})")
	score += 8

	# suspicious domain structure
	if len(parts) > 4 or (parts and any(ch.isdigit() for ch in parts[0])):
	findings.append(f"Header: Suspicious-looking domain structure ({from_domain})")
	score += 15

	# suspicious TLD
	if tld in SUSPICIOUS_TLDS:
	findings.append(f"Header: Suspicious/abused TLD used ({tld})")
	score += 20

	# Domain age check
	age_days = get_domain_age_days(from_domain)
	if age_days is not None and age_days < 90:
	findings.append(f"Header: Domain {from_domain} is very new ({age_days} days old)")
	score += 35

	# brand-squatting / look-alike check
	for brand, official_list in BRAND_OFFICIAL.items():
	if brand in from_domain:
	is_official = any(
	from_domain.endswith("." + off) or from_domain == off
	for off in official_list
	)
	if not is_official:
	findings.append(f"Header: Domain contains brand '{brand}' but is not official ({from_domain})")
	score += 30

	# fuzzy look-alike
	for legit in official_list:
	ratio = difflib.SequenceMatcher(None, from_domain, legit).ratio()
	if ratio > 0.7 and from_domain != legit:
	findings.append(f"Header: Possible look-alike spoofing ({from_domain} vs {legit})")
	score += 40

	# Content-to-domain mismatch (organization spoofing)
	if body and "ravenmail" in body.lower() and "ravenmail" not in from_domain:
	findings.append("Header/Content: Possible spoofing — mentions RavenMail but sender domain is unrelated")
	score += 40

	# Bcc usage
	if headers.get("Bcc") or headers.get("bcc"):
	findings.append("Header: Email sent with BCC (common in mass phishing)")
	score += 12

	if not findings:
	return ["No suspicious issues found in headers."], 0

	return findings, score