Spaces:

princemaxp
/

CySecGuardians

Sleeping

App Files Files Community

princemaxp commited on Sep 5

Commit

cbf4608

verified ·

1 Parent(s): 9b5168a

Update analyze_email_main.py

Browse files

Files changed (1) hide show

analyze_email_main.py +36 -14

analyze_email_main.py CHANGED Viewed

@@ -1,19 +1,31 @@
 from parse_email import parse_email
 from header_analyzer import analyze_headers
 from body_analyzer import analyze_body
 from url_analyzer import analyze_urls
 def analyze(file_path):
-    # Parse
-    headers, body, urls = parse_email(file_path or "")
-    # Analyze
     header_findings, header_score = analyze_headers(headers or {})
-    body_findings, body_score, highlighted_body, body_verdict = analyze_body(body or "")
     url_findings, url_score = analyze_urls(urls or [])
-    # Score
-    total_score = (header_score or 0) + (body_score or 0) + (url_score or 0) * 1.5
     try:
         total_score = float(total_score)
     except Exception:
@@ -21,7 +33,7 @@ def analyze(file_path):
     total_score = max(0.0, min(total_score, 100.0))
     total_score_rounded = round(total_score)
-    # Verdict
     if total_score >= 70:
         verdict = "🚨 Malicious"
     elif 50 <= total_score < 70:
@@ -31,21 +43,21 @@ def analyze(file_path):
     else:
         verdict = "✅ Safe"
-    # Attack type
-    body_lower = (body or "").lower()
     attack_type = "General Phishing"
-    if any(word in body_lower for word in ["invoice", "payment", "wire transfer", "bank details"]):
         attack_type = "Invoice/Payment Fraud (BEC)"
-    elif any(word in body_lower for word in ["password", "verify", "account", "login", "credentials"]):
         attack_type = "Credential Harvesting (Phishing)"
-    elif any("reply-to mismatch" in f.lower() for f in header_findings):
         attack_type = "Business Email Compromise (BEC)"
-    elif any("spam" in f.lower() for f in body_findings):
         attack_type = "Spam / Marketing"
     elif verdict == "✅ Safe":
         attack_type = "Benign / Normal Email"
-    # Tags
     tags = []
     for finding in (header_findings + body_findings + url_findings):
         fl = finding.lower()
@@ -59,6 +71,8 @@ def analyze(file_path):
             tags.append("Spam Tone")
         if "spf" in fl or "dkim" in fl or "dmarc" in fl:
             tags.append("Auth Failures (SPF/DKIM/DMARC)")
     summary = {
         "Final Verdict": verdict,
@@ -75,3 +89,11 @@ def analyze(file_path):
     }
     return summary, details

+# analyze_email_main.py
 from parse_email import parse_email
 from header_analyzer import analyze_headers
 from body_analyzer import analyze_body
 from url_analyzer import analyze_urls
 def analyze(file_path):
+    # parse
+    headers, subject, body, urls, images = parse_email(file_path)
+    # header analysis
     header_findings, header_score = analyze_headers(headers or {})
+    # url analysis (keeps previous checks like Safe Browsing / URLHaus)
     url_findings, url_score = analyze_urls(urls or [])
+    # body analysis (subject, body, urls, images)
+    body_findings, body_score, highlighted_body, body_verdict = analyze_body(subject, body, urls or [], images or [])
+    # combine scores
+    total_score = 0
+    total_score += (header_score or 0)
+    # weight body more
+    total_score += (body_score or 0) * 1.0
+    # weight urls also (url analyzer already returns a score)
+    total_score += (url_score or 0) * 1.2
+    # clamp
     try:
         total_score = float(total_score)
     except Exception:
     total_score = max(0.0, min(total_score, 100.0))
     total_score_rounded = round(total_score)
+    # final verdict (majority-driven — body_verdict also considered)
     if total_score >= 70:
         verdict = "🚨 Malicious"
     elif 50 <= total_score < 70:
     else:
         verdict = "✅ Safe"
+    # attack type heuristics (enriched by behavior inference when present in body_findings)
     attack_type = "General Phishing"
+    combined_text_lower = ((subject or "") + "\n" + (body or "")).lower()
+    if any(k in combined_text_lower for k in ["invoice", "payment", "wire transfer", "bank details"]):
         attack_type = "Invoice/Payment Fraud (BEC)"
+    elif any(k in combined_text_lower for k in ["password", "verify", "account", "login", "credentials"]):
         attack_type = "Credential Harvesting (Phishing)"
+    elif any("reply-to domain mismatch" in f.lower() for f in header_findings):
         attack_type = "Business Email Compromise (BEC)"
+    elif any("spam" in f.lower() for f in body_findings + url_findings):
         attack_type = "Spam / Marketing"
     elif verdict == "✅ Safe":
         attack_type = "Benign / Normal Email"
+    # tags
     tags = []
     for finding in (header_findings + body_findings + url_findings):
         fl = finding.lower()
             tags.append("Spam Tone")
         if "spf" in fl or "dkim" in fl or "dmarc" in fl:
             tags.append("Auth Failures (SPF/DKIM/DMARC)")
+        if "ocr" in fl or "extracted text" in fl:
+            tags.append("Image-based content detected")
     summary = {
         "Final Verdict": verdict,
     }
     return summary, details
+if __name__ == "__main__":
+    # quick local test (if you want)
+    fp = "sample.eml"
+    s, d = analyze(fp)
+    print("SUMMARY:", s)
+    print("DETAILS:", d)