Spaces:

princemaxp
/

CySecGuardians

Sleeping

princemaxp commited on Sep 4

Commit

3bb3451

verified ·

1 Parent(s): 49f1a98

Create parse_email.py

Files changed (1) hide show

parse_email.py ADDED Viewed

+import email
+import re
+from bs4 import BeautifulSoup
+def parse_email(file_path):
+    with open(file_path, "rb") as f:
+        msg = email.message_from_binary_file(f)
+    headers = dict(msg.items())
+    # Extract body (handle plain + HTML)
+    body = ""
+    if msg.is_multipart():
+        for part in msg.walk():
+            if part.get_content_type() == "text/plain":
+                body += part.get_payload(decode=True).decode(errors="ignore")
+            elif part.get_content_type() == "text/html":
+                html = part.get_payload(decode=True).decode(errors="ignore")
+                soup = BeautifulSoup(html, "html.parser")
+                body += soup.get_text()
+    else:
+        body = msg.get_payload(decode=True).decode(errors="ignore")
+    # Extract URLs
+    urls = re.findall(r'(https?://\S+)', body)
+    return headers, body, urls