Spaces:
Sleeping
Sleeping
File size: 4,010 Bytes
cbf4608 75f5bd8 da51597 75f5bd8 0e4c3b7 cbf4608 75f5bd8 cbf4608 da51597 cbf4608 abe135d 882ef51 cbf4608 da51597 cbf4608 abe135d 882ef51 da51597 0e4c3b7 da51597 abe135d cbf4608 e41451e cbf4608 e41451e cbf4608 e41451e cbf4608 0e4c3b7 e41451e 0e4c3b7 cbf4608 0e4c3b7 abe135d 0e4c3b7 abe135d 67a74ee abe135d 0e4c3b7 abe135d 0e4c3b7 abe135d cbf4608 0e4c3b7 8a8ba08 abe135d 8a8ba08 0e4c3b7 8a8ba08 abe135d da51597 8a8ba08 75f5bd8 8a8ba08 cbf4608 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# analyze_email_main.py
from parse_email import parse_email
from header_analyzer import analyze_headers
from body_analyzer import analyze_body
from url_analyzer import analyze_urls
import re
def parse_auth_results(auth_results: str):
"""Extract SPF, DKIM, and DMARC values from Authentication-Results header."""
results = {"spf": "unknown", "dkim": "unknown", "dmarc": "unknown"}
if not auth_results:
return results
auth_results = auth_results.lower()
for key in results.keys():
m = re.search(rf"{key}=([\w-]+)", auth_results)
if m:
results[key] = m.group(1)
return results
def analyze(file_path):
# parse
headers, subject, body, urls, images = parse_email(file_path)
# header analysis
header_findings, header_score, auth_summary = analyze_headers(headers or {})
# url analysis (keeps previous checks like Safe Browsing / URLHaus)
url_findings, url_score = analyze_urls(urls or [])
# body analysis (subject, body, urls, images)
body_findings, body_score, highlighted_body, body_verdict = analyze_body(subject, body, urls or [], images or [])
# combine scores
total_score = 0
total_score += (header_score or 0)
total_score += (body_score or 0) * 1.0 # weight body normally
total_score += (url_score or 0) * 1.2 # URLs a bit heavier
# clamp
try:
total_score = float(total_score)
except Exception:
total_score = 0.0
total_score = max(0.0, min(total_score, 100.0))
total_score_rounded = round(total_score)
# final verdict
if total_score >= 70:
verdict = "π¨ Malicious"
elif 50 <= total_score < 70:
verdict = "β οΈ Suspicious"
elif 30 <= total_score < 50:
verdict = "π© Spam"
else:
verdict = "β
Safe"
# attack type heuristics
attack_type = "General Phishing"
combined_text_lower = ((subject or "") + "\n" + (body or "")).lower()
if any(k in combined_text_lower for k in ["invoice", "payment", "wire transfer", "bank details"]):
attack_type = "Invoice/Payment Fraud (BEC)"
elif any(k in combined_text_lower for k in ["password", "verify", "account", "login", "credentials"]):
attack_type = "Credential Harvesting (Phishing)"
elif any("reply-to domain mismatch" in f.lower() for f in header_findings):
attack_type = "Business Email Compromise (BEC)"
elif any("spam" in f.lower() for f in body_findings + url_findings):
attack_type = "Spam / Marketing"
elif verdict == "β
Safe":
attack_type = "Benign / Normal Email"
# tags
tags = []
for finding in (header_findings + body_findings + url_findings):
fl = finding.lower()
if "domain" in fl:
tags.append("Suspicious Sender Domain")
if "phishing" in fl or "malicious url" in fl or "urlhaus" in fl:
tags.append("Phishing / Malicious URL")
if "urgent" in fl or "suspicious phrase" in fl:
tags.append("Urgent Language")
if "spam" in fl or "marketing" in fl:
tags.append("Spam Tone")
if "spf" in fl or "dkim" in fl or "dmarc" in fl:
tags.append("Auth Failures (SPF/DKIM/DMARC)")
if "ocr" in fl or "extracted text" in fl:
tags.append("Image-based content detected")
summary = {
"Final Verdict": verdict,
"Attack Type": attack_type,
"Attack Score": total_score_rounded,
"Main Tags": ", ".join(sorted(set(tags))) if tags else "No special tags",
}
details = {
"Header Findings": header_findings or [],
"Body Findings": body_findings or [],
"URL Findings": url_findings or [],
"Highlighted Body": highlighted_body or "",
"Auth Results": auth_summary or {}, # <-- NEW: show SPF, DKIM, DMARC results
}
return summary, details
if __name__ == "__main__":
fp = "sample.eml"
s, d = analyze(fp)
print("SUMMARY:", s)
print("DETAILS:", d)
|