princemaxp commited on
Commit
cbf4608
Β·
verified Β·
1 Parent(s): 9b5168a

Update analyze_email_main.py

Browse files
Files changed (1) hide show
  1. analyze_email_main.py +36 -14
analyze_email_main.py CHANGED
@@ -1,19 +1,31 @@
 
1
  from parse_email import parse_email
2
  from header_analyzer import analyze_headers
3
  from body_analyzer import analyze_body
4
  from url_analyzer import analyze_urls
5
 
6
  def analyze(file_path):
7
- # Parse
8
- headers, body, urls = parse_email(file_path or "")
9
 
10
- # Analyze
11
  header_findings, header_score = analyze_headers(headers or {})
12
- body_findings, body_score, highlighted_body, body_verdict = analyze_body(body or "")
 
13
  url_findings, url_score = analyze_urls(urls or [])
14
 
15
- # Score
16
- total_score = (header_score or 0) + (body_score or 0) + (url_score or 0) * 1.5
 
 
 
 
 
 
 
 
 
 
17
  try:
18
  total_score = float(total_score)
19
  except Exception:
@@ -21,7 +33,7 @@ def analyze(file_path):
21
  total_score = max(0.0, min(total_score, 100.0))
22
  total_score_rounded = round(total_score)
23
 
24
- # Verdict
25
  if total_score >= 70:
26
  verdict = "🚨 Malicious"
27
  elif 50 <= total_score < 70:
@@ -31,21 +43,21 @@ def analyze(file_path):
31
  else:
32
  verdict = "βœ… Safe"
33
 
34
- # Attack type
35
- body_lower = (body or "").lower()
36
  attack_type = "General Phishing"
37
- if any(word in body_lower for word in ["invoice", "payment", "wire transfer", "bank details"]):
 
38
  attack_type = "Invoice/Payment Fraud (BEC)"
39
- elif any(word in body_lower for word in ["password", "verify", "account", "login", "credentials"]):
40
  attack_type = "Credential Harvesting (Phishing)"
41
- elif any("reply-to mismatch" in f.lower() for f in header_findings):
42
  attack_type = "Business Email Compromise (BEC)"
43
- elif any("spam" in f.lower() for f in body_findings):
44
  attack_type = "Spam / Marketing"
45
  elif verdict == "βœ… Safe":
46
  attack_type = "Benign / Normal Email"
47
 
48
- # Tags
49
  tags = []
50
  for finding in (header_findings + body_findings + url_findings):
51
  fl = finding.lower()
@@ -59,6 +71,8 @@ def analyze(file_path):
59
  tags.append("Spam Tone")
60
  if "spf" in fl or "dkim" in fl or "dmarc" in fl:
61
  tags.append("Auth Failures (SPF/DKIM/DMARC)")
 
 
62
 
63
  summary = {
64
  "Final Verdict": verdict,
@@ -75,3 +89,11 @@ def analyze(file_path):
75
  }
76
 
77
  return summary, details
 
 
 
 
 
 
 
 
 
1
+ # analyze_email_main.py
2
  from parse_email import parse_email
3
  from header_analyzer import analyze_headers
4
  from body_analyzer import analyze_body
5
  from url_analyzer import analyze_urls
6
 
7
  def analyze(file_path):
8
+ # parse
9
+ headers, subject, body, urls, images = parse_email(file_path)
10
 
11
+ # header analysis
12
  header_findings, header_score = analyze_headers(headers or {})
13
+
14
+ # url analysis (keeps previous checks like Safe Browsing / URLHaus)
15
  url_findings, url_score = analyze_urls(urls or [])
16
 
17
+ # body analysis (subject, body, urls, images)
18
+ body_findings, body_score, highlighted_body, body_verdict = analyze_body(subject, body, urls or [], images or [])
19
+
20
+ # combine scores
21
+ total_score = 0
22
+ total_score += (header_score or 0)
23
+ # weight body more
24
+ total_score += (body_score or 0) * 1.0
25
+ # weight urls also (url analyzer already returns a score)
26
+ total_score += (url_score or 0) * 1.2
27
+
28
+ # clamp
29
  try:
30
  total_score = float(total_score)
31
  except Exception:
 
33
  total_score = max(0.0, min(total_score, 100.0))
34
  total_score_rounded = round(total_score)
35
 
36
+ # final verdict (majority-driven β€” body_verdict also considered)
37
  if total_score >= 70:
38
  verdict = "🚨 Malicious"
39
  elif 50 <= total_score < 70:
 
43
  else:
44
  verdict = "βœ… Safe"
45
 
46
+ # attack type heuristics (enriched by behavior inference when present in body_findings)
 
47
  attack_type = "General Phishing"
48
+ combined_text_lower = ((subject or "") + "\n" + (body or "")).lower()
49
+ if any(k in combined_text_lower for k in ["invoice", "payment", "wire transfer", "bank details"]):
50
  attack_type = "Invoice/Payment Fraud (BEC)"
51
+ elif any(k in combined_text_lower for k in ["password", "verify", "account", "login", "credentials"]):
52
  attack_type = "Credential Harvesting (Phishing)"
53
+ elif any("reply-to domain mismatch" in f.lower() for f in header_findings):
54
  attack_type = "Business Email Compromise (BEC)"
55
+ elif any("spam" in f.lower() for f in body_findings + url_findings):
56
  attack_type = "Spam / Marketing"
57
  elif verdict == "βœ… Safe":
58
  attack_type = "Benign / Normal Email"
59
 
60
+ # tags
61
  tags = []
62
  for finding in (header_findings + body_findings + url_findings):
63
  fl = finding.lower()
 
71
  tags.append("Spam Tone")
72
  if "spf" in fl or "dkim" in fl or "dmarc" in fl:
73
  tags.append("Auth Failures (SPF/DKIM/DMARC)")
74
+ if "ocr" in fl or "extracted text" in fl:
75
+ tags.append("Image-based content detected")
76
 
77
  summary = {
78
  "Final Verdict": verdict,
 
89
  }
90
 
91
  return summary, details
92
+
93
+
94
+ if __name__ == "__main__":
95
+ # quick local test (if you want)
96
+ fp = "sample.eml"
97
+ s, d = analyze(fp)
98
+ print("SUMMARY:", s)
99
+ print("DETAILS:", d)