Spaces:
Sleeping
Sleeping
| import re | |
| import difflib | |
| import whois | |
| from datetime import datetime | |
| # Official brand domains (extend as needed) | |
| BRAND_OFFICIAL = { | |
| "paypal": ["paypal.com"], | |
| "amazon": ["amazon.com"], | |
| "google": ["google.com", "gmail.com"], | |
| "microsoft": ["microsoft.com", "outlook.com", "live.com"], | |
| "apple": ["apple.com"], | |
| "flowtoscale": ["flowtoscale.com"], # Example from your case | |
| } | |
| # Suspicious / cheap TLDs often abused | |
| SUSPICIOUS_TLDS = {"info", "xyz", "top", "click", "work", "loan", "tk"} | |
| def get_domain_age_days(domain: str): | |
| """Return domain age in days (or None if lookup fails).""" | |
| try: | |
| w = whois.whois(domain) | |
| creation_date = w.creation_date | |
| if isinstance(creation_date, list): # sometimes returns list | |
| creation_date = creation_date[0] | |
| if creation_date: | |
| return (datetime.now() - creation_date).days | |
| except Exception: | |
| return None | |
| return None | |
| def analyze_headers(headers, body=""): | |
| """ | |
| Input: headers dict, optional body text | |
| Output: (findings: list[str], score: int) | |
| """ | |
| findings = [] | |
| score = 0 | |
| headers = headers or {} | |
| auth_results = (headers.get("Authentication-Results") or headers.get("Authentication-results") or "").lower() | |
| # Strict auth failures | |
| if "dkim=fail" in auth_results or "dkim=permerror" in auth_results: | |
| findings.append("Header: DKIM check failed") | |
| score += 30 | |
| if "spf=fail" in auth_results: | |
| findings.append("Header: SPF check failed") | |
| score += 30 | |
| if "dmarc=fail" in auth_results: | |
| findings.append("Header: DMARC check failed") | |
| score += 30 | |
| # Softer auth problems | |
| if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]): | |
| findings.append("Header: SPF not properly aligned") | |
| score += 10 | |
| if any(x in auth_results for x in ["dmarc=temperror", "dkim=temperror"]): | |
| findings.append("Header: Temporary auth errors (DKIM/DMARC)") | |
| score += 5 | |
| # From and Reply-To domain compare | |
| from_addr = headers.get("From", "") or "" | |
| reply_to = headers.get("Reply-To", "") or "" | |
| from_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', from_addr) | |
| reply_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', reply_to) | |
| if from_domain_m and reply_domain_m: | |
| from_domain = from_domain_m.group(1).lower() | |
| reply_domain = reply_domain_m.group(1).lower() | |
| if from_domain != reply_domain: | |
| findings.append(f"Header: Reply-To domain mismatch (From: {from_domain}, Reply-To: {reply_domain})") | |
| score += 20 | |
| else: | |
| from_domain = from_domain_m.group(1).lower() if from_domain_m else "" | |
| # Sender domain analysis | |
| if from_domain: | |
| parts = from_domain.split('.') | |
| tld = parts[-1] | |
| # free provider detection | |
| if from_domain in ["gmail.com", "yahoo.com", "outlook.com", "hotmail.com"]: | |
| findings.append(f"Header: Free email provider used ({from_domain})") | |
| score += 8 | |
| # suspicious domain structure | |
| if len(parts) > 4 or (parts and any(ch.isdigit() for ch in parts[0])): | |
| findings.append(f"Header: Suspicious-looking domain structure ({from_domain})") | |
| score += 15 | |
| # suspicious TLD | |
| if tld in SUSPICIOUS_TLDS: | |
| findings.append(f"Header: Suspicious/abused TLD used ({tld})") | |
| score += 20 | |
| # Domain age check | |
| age_days = get_domain_age_days(from_domain) | |
| if age_days is not None and age_days < 90: | |
| findings.append(f"Header: Domain {from_domain} is very new ({age_days} days old)") | |
| score += 35 | |
| # brand-squatting / look-alike check | |
| for brand, official_list in BRAND_OFFICIAL.items(): | |
| if brand in from_domain: | |
| is_official = any( | |
| from_domain.endswith("." + off) or from_domain == off | |
| for off in official_list | |
| ) | |
| if not is_official: | |
| findings.append(f"Header: Domain contains brand '{brand}' but is not official ({from_domain})") | |
| score += 30 | |
| # fuzzy look-alike | |
| for legit in official_list: | |
| ratio = difflib.SequenceMatcher(None, from_domain, legit).ratio() | |
| if ratio > 0.7 and from_domain != legit: | |
| findings.append(f"Header: Possible look-alike spoofing ({from_domain} vs {legit})") | |
| score += 40 | |
| # Content-to-domain mismatch (organization spoofing) | |
| if body and "ravenmail" in body.lower() and "ravenmail" not in from_domain: | |
| findings.append("Header/Content: Possible spoofing — mentions RavenMail but sender domain is unrelated") | |
| score += 40 | |
| # Bcc usage | |
| if headers.get("Bcc") or headers.get("bcc"): | |
| findings.append("Header: Email sent with BCC (common in mass phishing)") | |
| score += 12 | |
| if not findings: | |
| return ["No suspicious issues found in headers."], 0 | |
| return findings, score | |