Spaces:

princemaxp
/

CySecGuardians

Sleeping

CySecGuardians / parse_email.py

Create parse_email.py

3bb3451 verified 3 months ago

861 Bytes

	import email
	import re
	from bs4 import BeautifulSoup

	def parse_email(file_path):
	with open(file_path, "rb") as f:
	msg = email.message_from_binary_file(f)

	headers = dict(msg.items())

	# Extract body (handle plain + HTML)
	body = ""
	if msg.is_multipart():
	for part in msg.walk():
	if part.get_content_type() == "text/plain":
	body += part.get_payload(decode=True).decode(errors="ignore")
	elif part.get_content_type() == "text/html":
	html = part.get_payload(decode=True).decode(errors="ignore")
	soup = BeautifulSoup(html, "html.parser")
	body += soup.get_text()
	else:
	body = msg.get_payload(decode=True).decode(errors="ignore")

	# Extract URLs
	urls = re.findall(r'(https?://\S+)', body)

	return headers, body, urls