File size: 4,010 Bytes
cbf4608
75f5bd8
 
 
 
da51597
 
 
 
 
 
 
 
 
 
 
 
 
 
75f5bd8
0e4c3b7
cbf4608
 
75f5bd8
cbf4608
da51597
cbf4608
 
abe135d
882ef51
cbf4608
 
 
 
 
 
da51597
 
cbf4608
 
abe135d
 
 
 
 
 
882ef51
da51597
0e4c3b7
 
 
 
 
 
 
 
 
da51597
abe135d
cbf4608
 
e41451e
cbf4608
e41451e
cbf4608
e41451e
cbf4608
0e4c3b7
e41451e
 
0e4c3b7
cbf4608
0e4c3b7
abe135d
 
 
0e4c3b7
abe135d
67a74ee
abe135d
0e4c3b7
abe135d
0e4c3b7
abe135d
 
cbf4608
 
0e4c3b7
8a8ba08
 
 
abe135d
 
8a8ba08
0e4c3b7
8a8ba08
abe135d
 
 
 
da51597
8a8ba08
75f5bd8
8a8ba08
cbf4608
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# analyze_email_main.py
from parse_email import parse_email
from header_analyzer import analyze_headers
from body_analyzer import analyze_body
from url_analyzer import analyze_urls
import re

def parse_auth_results(auth_results: str):
    """Extract SPF, DKIM, and DMARC values from Authentication-Results header."""
    results = {"spf": "unknown", "dkim": "unknown", "dmarc": "unknown"}
    if not auth_results:
        return results
    auth_results = auth_results.lower()
    for key in results.keys():
        m = re.search(rf"{key}=([\w-]+)", auth_results)
        if m:
            results[key] = m.group(1)
    return results


def analyze(file_path):
    # parse
    headers, subject, body, urls, images = parse_email(file_path)

    # header analysis
    header_findings, header_score, auth_summary = analyze_headers(headers or {})

    # url analysis (keeps previous checks like Safe Browsing / URLHaus)
    url_findings, url_score = analyze_urls(urls or [])

    # body analysis (subject, body, urls, images)
    body_findings, body_score, highlighted_body, body_verdict = analyze_body(subject, body, urls or [], images or [])

    # combine scores
    total_score = 0
    total_score += (header_score or 0)
    total_score += (body_score or 0) * 1.0  # weight body normally
    total_score += (url_score or 0) * 1.2   # URLs a bit heavier

    # clamp
    try:
        total_score = float(total_score)
    except Exception:
        total_score = 0.0
    total_score = max(0.0, min(total_score, 100.0))
    total_score_rounded = round(total_score)

    # final verdict
    if total_score >= 70:
        verdict = "🚨 Malicious"
    elif 50 <= total_score < 70:
        verdict = "⚠️ Suspicious"
    elif 30 <= total_score < 50:
        verdict = "πŸ“© Spam"
    else:
        verdict = "βœ… Safe"

    # attack type heuristics
    attack_type = "General Phishing"
    combined_text_lower = ((subject or "") + "\n" + (body or "")).lower()
    if any(k in combined_text_lower for k in ["invoice", "payment", "wire transfer", "bank details"]):
        attack_type = "Invoice/Payment Fraud (BEC)"
    elif any(k in combined_text_lower for k in ["password", "verify", "account", "login", "credentials"]):
        attack_type = "Credential Harvesting (Phishing)"
    elif any("reply-to domain mismatch" in f.lower() for f in header_findings):
        attack_type = "Business Email Compromise (BEC)"
    elif any("spam" in f.lower() for f in body_findings + url_findings):
        attack_type = "Spam / Marketing"
    elif verdict == "βœ… Safe":
        attack_type = "Benign / Normal Email"

    # tags
    tags = []
    for finding in (header_findings + body_findings + url_findings):
        fl = finding.lower()
        if "domain" in fl:
            tags.append("Suspicious Sender Domain")
        if "phishing" in fl or "malicious url" in fl or "urlhaus" in fl:
            tags.append("Phishing / Malicious URL")
        if "urgent" in fl or "suspicious phrase" in fl:
            tags.append("Urgent Language")
        if "spam" in fl or "marketing" in fl:
            tags.append("Spam Tone")
        if "spf" in fl or "dkim" in fl or "dmarc" in fl:
            tags.append("Auth Failures (SPF/DKIM/DMARC)")
        if "ocr" in fl or "extracted text" in fl:
            tags.append("Image-based content detected")

    summary = {
        "Final Verdict": verdict,
        "Attack Type": attack_type,
        "Attack Score": total_score_rounded,
        "Main Tags": ", ".join(sorted(set(tags))) if tags else "No special tags",
    }

    details = {
        "Header Findings": header_findings or [],
        "Body Findings": body_findings or [],
        "URL Findings": url_findings or [],
        "Highlighted Body": highlighted_body or "",
        "Auth Results": auth_summary or {},   # <-- NEW: show SPF, DKIM, DMARC results
    }

    return summary, details


if __name__ == "__main__":
    fp = "sample.eml"
    s, d = analyze(fp)
    print("SUMMARY:", s)
    print("DETAILS:", d)