# analyze_email_main.py from parse_email import parse_email from header_analyzer import analyze_headers from body_analyzer import analyze_body from url_analyzer import analyze_urls import re def parse_auth_results(auth_results: str): """Extract SPF, DKIM, and DMARC values from Authentication-Results header.""" results = {"spf": "unknown", "dkim": "unknown", "dmarc": "unknown"} if not auth_results: return results auth_results = auth_results.lower() for key in results.keys(): m = re.search(rf"{key}=([\w-]+)", auth_results) if m: results[key] = m.group(1) return results def analyze(file_path): # parse headers, subject, body, urls, images = parse_email(file_path) # header analysis header_findings, header_score, auth_summary = analyze_headers(headers or {}) # url analysis (keeps previous checks like Safe Browsing / URLHaus) url_findings, url_score = analyze_urls(urls or []) # body analysis (subject, body, urls, images) body_findings, body_score, highlighted_body, body_verdict = analyze_body(subject, body, urls or [], images or []) # combine scores total_score = 0 total_score += (header_score or 0) total_score += (body_score or 0) * 1.0 # weight body normally total_score += (url_score or 0) * 1.2 # URLs a bit heavier # clamp try: total_score = float(total_score) except Exception: total_score = 0.0 total_score = max(0.0, min(total_score, 100.0)) total_score_rounded = round(total_score) # final verdict if total_score >= 70: verdict = "🚨 Malicious" elif 50 <= total_score < 70: verdict = "⚠️ Suspicious" elif 30 <= total_score < 50: verdict = "📩 Spam" else: verdict = "✅ Safe" # attack type heuristics attack_type = "General Phishing" combined_text_lower = ((subject or "") + "\n" + (body or "")).lower() if any(k in combined_text_lower for k in ["invoice", "payment", "wire transfer", "bank details"]): attack_type = "Invoice/Payment Fraud (BEC)" elif any(k in combined_text_lower for k in ["password", "verify", "account", "login", "credentials"]): attack_type = "Credential Harvesting (Phishing)" elif any("reply-to domain mismatch" in f.lower() for f in header_findings): attack_type = "Business Email Compromise (BEC)" elif any("spam" in f.lower() for f in body_findings + url_findings): attack_type = "Spam / Marketing" elif verdict == "✅ Safe": attack_type = "Benign / Normal Email" # tags tags = [] for finding in (header_findings + body_findings + url_findings): fl = finding.lower() if "domain" in fl: tags.append("Suspicious Sender Domain") if "phishing" in fl or "malicious url" in fl or "urlhaus" in fl: tags.append("Phishing / Malicious URL") if "urgent" in fl or "suspicious phrase" in fl: tags.append("Urgent Language") if "spam" in fl or "marketing" in fl: tags.append("Spam Tone") if "spf" in fl or "dkim" in fl or "dmarc" in fl: tags.append("Auth Failures (SPF/DKIM/DMARC)") if "ocr" in fl or "extracted text" in fl: tags.append("Image-based content detected") summary = { "Final Verdict": verdict, "Attack Type": attack_type, "Attack Score": total_score_rounded, "Main Tags": ", ".join(sorted(set(tags))) if tags else "No special tags", } details = { "Header Findings": header_findings or [], "Body Findings": body_findings or [], "URL Findings": url_findings or [], "Highlighted Body": highlighted_body or "", "Auth Results": auth_summary or {}, # <-- NEW: show SPF, DKIM, DMARC results } return summary, details if __name__ == "__main__": fp = "sample.eml" s, d = analyze(fp) print("SUMMARY:", s) print("DETAILS:", d)