File size: 5,084 Bytes
14fecff
d76bad0
 
 
14fecff
d76bad0
da36e3d
 
 
 
 
 
d76bad0
da36e3d
 
d76bad0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
da36e3d
d76bad0
da36e3d
 
49f1a98
14fecff
292c399
49f1a98
da36e3d
 
 
292c399
49f1a98
da36e3d
292c399
14fecff
da36e3d
292c399
49f1a98
da36e3d
49f1a98
da36e3d
292c399
 
 
 
 
 
14fecff
d76bad0
292c399
 
da36e3d
 
 
 
 
 
 
292c399
d76bad0
 
 
 
 
 
 
292c399
da36e3d
d76bad0
 
da36e3d
d76bad0
 
da36e3d
d76bad0
14fecff
 
d76bad0
 
 
 
 
 
 
 
 
 
 
 
da36e3d
d76bad0
 
 
 
 
da36e3d
d76bad0
da36e3d
 
d76bad0
 
 
 
 
 
 
 
 
 
 
 
 
da36e3d
14fecff
da36e3d
14fecff
 
 
49f1a98
14fecff
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import re
import difflib
import whois
from datetime import datetime

# Official brand domains (extend as needed)
BRAND_OFFICIAL = {
    "paypal": ["paypal.com"],
    "amazon": ["amazon.com"],
    "google": ["google.com", "gmail.com"],
    "microsoft": ["microsoft.com", "outlook.com", "live.com"],
    "apple": ["apple.com"],
    "flowtoscale": ["flowtoscale.com"],  # Example from your case
}

# Suspicious / cheap TLDs often abused
SUSPICIOUS_TLDS = {"info", "xyz", "top", "click", "work", "loan", "tk"}

def get_domain_age_days(domain: str):
    """Return domain age in days (or None if lookup fails)."""
    try:
        w = whois.whois(domain)
        creation_date = w.creation_date
        if isinstance(creation_date, list):  # sometimes returns list
            creation_date = creation_date[0]
        if creation_date:
            return (datetime.now() - creation_date).days
    except Exception:
        return None
    return None

def analyze_headers(headers, body=""):
    """
    Input: headers dict, optional body text
    Output: (findings: list[str], score: int)
    """
    findings = []
    score = 0
    headers = headers or {}

    auth_results = (headers.get("Authentication-Results") or headers.get("Authentication-results") or "").lower()

    # Strict auth failures
    if "dkim=fail" in auth_results or "dkim=permerror" in auth_results:
        findings.append("Header: DKIM check failed")
        score += 30
    if "spf=fail" in auth_results:
        findings.append("Header: SPF check failed")
        score += 30
    if "dmarc=fail" in auth_results:
        findings.append("Header: DMARC check failed")
        score += 30

    # Softer auth problems
    if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]):
        findings.append("Header: SPF not properly aligned")
        score += 10
    if any(x in auth_results for x in ["dmarc=temperror", "dkim=temperror"]):
        findings.append("Header: Temporary auth errors (DKIM/DMARC)")
        score += 5

    # From and Reply-To domain compare
    from_addr = headers.get("From", "") or ""
    reply_to = headers.get("Reply-To", "") or ""
    from_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', from_addr)
    reply_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', reply_to)
    if from_domain_m and reply_domain_m:
        from_domain = from_domain_m.group(1).lower()
        reply_domain = reply_domain_m.group(1).lower()
        if from_domain != reply_domain:
            findings.append(f"Header: Reply-To domain mismatch (From: {from_domain}, Reply-To: {reply_domain})")
            score += 20
    else:
        from_domain = from_domain_m.group(1).lower() if from_domain_m else ""

    # Sender domain analysis
    if from_domain:
        parts = from_domain.split('.')
        tld = parts[-1]

        # free provider detection
        if from_domain in ["gmail.com", "yahoo.com", "outlook.com", "hotmail.com"]:
            findings.append(f"Header: Free email provider used ({from_domain})")
            score += 8

        # suspicious domain structure
        if len(parts) > 4 or (parts and any(ch.isdigit() for ch in parts[0])):
            findings.append(f"Header: Suspicious-looking domain structure ({from_domain})")
            score += 15

        # suspicious TLD
        if tld in SUSPICIOUS_TLDS:
            findings.append(f"Header: Suspicious/abused TLD used ({tld})")
            score += 20

        # Domain age check
        age_days = get_domain_age_days(from_domain)
        if age_days is not None and age_days < 90:
            findings.append(f"Header: Domain {from_domain} is very new ({age_days} days old)")
            score += 35

        # brand-squatting / look-alike check
        for brand, official_list in BRAND_OFFICIAL.items():
            if brand in from_domain:
                is_official = any(
                    from_domain.endswith("." + off) or from_domain == off
                    for off in official_list
                )
                if not is_official:
                    findings.append(f"Header: Domain contains brand '{brand}' but is not official ({from_domain})")
                    score += 30

            # fuzzy look-alike
            for legit in official_list:
                ratio = difflib.SequenceMatcher(None, from_domain, legit).ratio()
                if ratio > 0.7 and from_domain != legit:
                    findings.append(f"Header: Possible look-alike spoofing ({from_domain} vs {legit})")
                    score += 40

        # Content-to-domain mismatch (organization spoofing)
        if body and "ravenmail" in body.lower() and "ravenmail" not in from_domain:
            findings.append("Header/Content: Possible spoofing — mentions RavenMail but sender domain is unrelated")
            score += 40

    # Bcc usage
    if headers.get("Bcc") or headers.get("bcc"):
        findings.append("Header: Email sent with BCC (common in mass phishing)")
        score += 12

    if not findings:
        return ["No suspicious issues found in headers."], 0

    return findings, score