princemaxp commited on
Commit
da36e3d
·
verified ·
1 Parent(s): f88bfb7

Update header_analyzer.py

Browse files
Files changed (1) hide show
  1. header_analyzer.py +51 -22
header_analyzer.py CHANGED
@@ -1,23 +1,39 @@
 
1
  import re
2
 
 
 
 
 
 
 
 
 
 
 
3
  def analyze_headers(headers):
 
 
 
 
4
  findings = []
5
  score = 0
6
  headers = headers or {}
7
 
8
- # 1) SPF / DKIM / DMARC
9
- auth_results = (headers.get("Authentication-Results") or "").lower()
 
10
  if "dkim=fail" in auth_results or "dkim=permerror" in auth_results:
11
  findings.append("Header: DKIM check failed")
12
- score += 25
13
  if "spf=fail" in auth_results:
14
  findings.append("Header: SPF check failed")
15
- score += 25
16
  if "dmarc=fail" in auth_results:
17
  findings.append("Header: DMARC check failed")
18
- score += 25
19
 
20
- # softer signals
21
  if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]):
22
  findings.append("Header: SPF not properly aligned")
23
  score += 10
@@ -25,33 +41,46 @@ def analyze_headers(headers):
25
  findings.append("Header: Temporary auth errors (DKIM/DMARC)")
26
  score += 5
27
 
28
- # 2) From / Reply-To mismatch (domain-level)
29
  from_addr = headers.get("From", "") or ""
30
  reply_to = headers.get("Reply-To", "") or ""
31
- from_domain = re.search(r'@([a-zA-Z0-9.-]+)', from_addr)
32
- reply_domain = re.search(r'@([a-zA-Z0-9.-]+)', reply_to)
33
- if from_domain and reply_domain:
34
- if from_domain.group(1).lower() != reply_domain.group(1).lower():
35
- findings.append(f"Header: Reply-To domain mismatch (From: {from_domain.group(1)}, Reply-To: {reply_domain.group(1)})")
 
 
36
  score += 20
37
 
38
- # 3) Suspicious sender domain
39
- sender = headers.get("From", "") or ""
40
  match = re.search(r'@([a-zA-Z0-9.-]+)', sender)
41
  if match:
42
  domain = match.group(1).lower()
43
- parts = domain.split(".")
44
- if any(free in domain for free in ["gmail.com", "yahoo.com", "outlook.com"]):
 
45
  findings.append(f"Header: Free email provider used ({domain})")
46
- score += 10
47
- if len(parts) > 4 or (parts and any(char.isdigit() for char in parts[0])):
48
- findings.append(f"Header: Suspicious-looking domain ({domain})")
 
49
  score += 15
50
 
51
- # 4) BCC usage (note: often stripped)
52
- if headers.get("Bcc"):
 
 
 
 
 
 
 
 
 
53
  findings.append("Header: Email sent with BCC (common in mass phishing)")
54
- score += 15
55
 
56
  if not findings:
57
  return ["No suspicious issues found in headers."], 0
 
1
+ # header_analyzer.py
2
  import re
3
 
4
+ # Simple brand list and their official domains for brand-squatting detection
5
+ BRAND_OFFICIAL = {
6
+ "paypal": ["paypal.com"],
7
+ "amazon": ["amazon.com"],
8
+ "google": ["google.com", "gmail.com"],
9
+ "microsoft": ["microsoft.com", "outlook.com", "live.com"],
10
+ "apple": ["apple.com"],
11
+ "bank": [], # generic bank keyword detection (no official domain)
12
+ }
13
+
14
  def analyze_headers(headers):
15
+ """
16
+ Input: headers dict
17
+ Output: (findings: list[str], score: int)
18
+ """
19
  findings = []
20
  score = 0
21
  headers = headers or {}
22
 
23
+ auth_results = (headers.get("Authentication-Results") or headers.get("Authentication-results") or "").lower()
24
+
25
+ # Strict auth failures
26
  if "dkim=fail" in auth_results or "dkim=permerror" in auth_results:
27
  findings.append("Header: DKIM check failed")
28
+ score += 30
29
  if "spf=fail" in auth_results:
30
  findings.append("Header: SPF check failed")
31
+ score += 30
32
  if "dmarc=fail" in auth_results:
33
  findings.append("Header: DMARC check failed")
34
+ score += 30
35
 
36
+ # Softer auth problems
37
  if any(x in auth_results for x in ["spf=softfail", "spf=neutral", "spf=none"]):
38
  findings.append("Header: SPF not properly aligned")
39
  score += 10
 
41
  findings.append("Header: Temporary auth errors (DKIM/DMARC)")
42
  score += 5
43
 
44
+ # From and Reply-To domain compare (domain-level)
45
  from_addr = headers.get("From", "") or ""
46
  reply_to = headers.get("Reply-To", "") or ""
47
+ from_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', from_addr)
48
+ reply_domain_m = re.search(r'@([a-zA-Z0-9.-]+)', reply_to)
49
+ if from_domain_m and reply_domain_m:
50
+ from_domain = from_domain_m.group(1).lower()
51
+ reply_domain = reply_domain_m.group(1).lower()
52
+ if from_domain != reply_domain:
53
+ findings.append(f"Header: Reply-To domain mismatch (From: {from_domain}, Reply-To: {reply_domain})")
54
  score += 20
55
 
56
+ # Suspicious looking sender domain & brand-squatting detection
57
+ sender = from_addr or ""
58
  match = re.search(r'@([a-zA-Z0-9.-]+)', sender)
59
  if match:
60
  domain = match.group(1).lower()
61
+ parts = domain.split('.')
62
+ # free provider detection
63
+ if any(free in domain for free in ["gmail.com", "yahoo.com", "outlook.com", "hotmail.com"]):
64
  findings.append(f"Header: Free email provider used ({domain})")
65
+ score += 8
66
+ # suspicious structural domain heuristics
67
+ if len(parts) > 4 or (parts and any(ch.isdigit() for ch in parts[0])):
68
+ findings.append(f"Header: Suspicious-looking domain structure ({domain})")
69
  score += 15
70
 
71
+ # brand-squatting: if domain contains a known brand but isn't exactly an official brand domain
72
+ for brand, official_list in BRAND_OFFICIAL.items():
73
+ if brand in domain:
74
+ # check if domain is exactly an official domain or subdomain of official
75
+ is_official = any(domain.endswith("." + off) or domain == off for off in official_list) if official_list else False
76
+ if not is_official:
77
+ findings.append(f"Header: Domain contains brand '{brand}' but is not official ({domain})")
78
+ score += 30
79
+
80
+ # Bcc usage detection
81
+ if headers.get("Bcc") or headers.get("bcc"):
82
  findings.append("Header: Email sent with BCC (common in mass phishing)")
83
+ score += 12
84
 
85
  if not findings:
86
  return ["No suspicious issues found in headers."], 0