Spaces:
Sleeping
Sleeping
File size: 5,084 Bytes
14fecff d76bad0 14fecff d76bad0 da36e3d d76bad0 da36e3d d76bad0 da36e3d d76bad0 da36e3d 49f1a98 14fecff 292c399 49f1a98 da36e3d 292c399 49f1a98 da36e3d 292c399 14fecff da36e3d 292c399 49f1a98 da36e3d 49f1a98 da36e3d 292c399 14fecff d76bad0 292c399 da36e3d 292c399 d76bad0 292c399 da36e3d d76bad0 da36e3d d76bad0 da36e3d d76bad0 14fecff d76bad0 da36e3d d76bad0 da36e3d d76bad0 da36e3d d76bad0 da36e3d 14fecff da36e3d 14fecff 49f1a98 14fecff |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import re
import difflib
import whois
from datetime import datetime
# Official brand domains (extend as needed)
BRAND_OFFICIAL = {
"paypal": ["paypal.com"],
"amazon": ["amazon.com"],
"google": ["google.com", "gmail.com"],
"microsoft": ["microsoft.com", "outlook.com", "live.com"],
"apple": ["apple.com"],
"flowtoscale": ["flowtoscale.com"], # Example from your case
}
# Suspicious / cheap TLDs often abused
SUSPICIOUS_TLDS = {"info", "xyz", "top", "click", "work", "loan", "tk"}
def get_domain_age_days(domain: str):
"""Return domain age in days (or None if lookup fails)."""
try:
w = whois.whois(domain)
creation_date = w.creation_date
if isinstance(creation_date, list): # sometimes returns list
creation_date = creation_date[0]
if creation_date:
return (datetime.now() - creation_date).days
except Exception:
return None
return None
def analyze_headers(headers, body=""):
"""
Input: headers dict, optional body text
Output: (findings: list[str], score: int)
"""
findings = []
score = 0
headers = headers or {}
auth_results = (headers.get("Authentication-Results") or headers.get("Authentication-results") or "").lower()
# Strict auth failures
if "dkim=fail" in auth_results or "dkim=permerror" in auth_results:
findings.append("Header: DKIM check failed")
score += 30
if "spf=fail" in auth_results:
findings.append("Header: SPF check failed")
score += 30
if "dmarc=fail" in auth_results:
findings.append("Header: DMARC check failed")
score += 30
# Softer auth problems
if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]):
findings.append("Header: SPF not properly aligned")
score += 10
if any(x in auth_results for x in ["dmarc=temperror", "dkim=temperror"]):
findings.append("Header: Temporary auth errors (DKIM/DMARC)")
score += 5
# From and Reply-To domain compare
from_addr = headers.get("From", "") or ""
reply_to = headers.get("Reply-To", "") or ""
from_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', from_addr)
reply_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', reply_to)
if from_domain_m and reply_domain_m:
from_domain = from_domain_m.group(1).lower()
reply_domain = reply_domain_m.group(1).lower()
if from_domain != reply_domain:
findings.append(f"Header: Reply-To domain mismatch (From: {from_domain}, Reply-To: {reply_domain})")
score += 20
else:
from_domain = from_domain_m.group(1).lower() if from_domain_m else ""
# Sender domain analysis
if from_domain:
parts = from_domain.split('.')
tld = parts[-1]
# free provider detection
if from_domain in ["gmail.com", "yahoo.com", "outlook.com", "hotmail.com"]:
findings.append(f"Header: Free email provider used ({from_domain})")
score += 8
# suspicious domain structure
if len(parts) > 4 or (parts and any(ch.isdigit() for ch in parts[0])):
findings.append(f"Header: Suspicious-looking domain structure ({from_domain})")
score += 15
# suspicious TLD
if tld in SUSPICIOUS_TLDS:
findings.append(f"Header: Suspicious/abused TLD used ({tld})")
score += 20
# Domain age check
age_days = get_domain_age_days(from_domain)
if age_days is not None and age_days < 90:
findings.append(f"Header: Domain {from_domain} is very new ({age_days} days old)")
score += 35
# brand-squatting / look-alike check
for brand, official_list in BRAND_OFFICIAL.items():
if brand in from_domain:
is_official = any(
from_domain.endswith("." + off) or from_domain == off
for off in official_list
)
if not is_official:
findings.append(f"Header: Domain contains brand '{brand}' but is not official ({from_domain})")
score += 30
# fuzzy look-alike
for legit in official_list:
ratio = difflib.SequenceMatcher(None, from_domain, legit).ratio()
if ratio > 0.7 and from_domain != legit:
findings.append(f"Header: Possible look-alike spoofing ({from_domain} vs {legit})")
score += 40
# Content-to-domain mismatch (organization spoofing)
if body and "ravenmail" in body.lower() and "ravenmail" not in from_domain:
findings.append("Header/Content: Possible spoofing — mentions RavenMail but sender domain is unrelated")
score += 40
# Bcc usage
if headers.get("Bcc") or headers.get("bcc"):
findings.append("Header: Email sent with BCC (common in mass phishing)")
score += 12
if not findings:
return ["No suspicious issues found in headers."], 0
return findings, score
|