import requests import os import re from urllib.parse import quote SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY") OTX_API_KEY = os.getenv("OTX_API_KEY") def analyze_urls(urls): findings = [] score = 0 urls = urls or [] for url in urls: # 1) Google Safe Browsing if SAFE_BROWSING_API_KEY: try: payload = { "client": {"clientId": "email-analysis-tool", "clientVersion": "1.0"}, "threatInfo": { "threatTypes": ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE"], "platformTypes": ["ANY_PLATFORM"], "threatEntryTypes": ["URL"], "threatEntries": [{"url": url}], }, } res = requests.post( f"https://safebrowsing.googleapis.com/v4/threatMatches:find?key={SAFE_BROWSING_API_KEY}", json=payload, timeout=15, ) data = res.json() if isinstance(data, dict) and "matches" in data: findings.append(f"URL: {url} flagged by Google Safe Browsing") score += 40 else: findings.append(f"URL: {url} not flagged (Google Safe Browsing)") except Exception: findings.append(f"URL: {url} check failed (Google Safe Browsing)") # 2) AlienVault OTX if OTX_API_KEY: try: headers = {"X-OTX-API-KEY": OTX_API_KEY} encoded = quote(url, safe="") res = requests.get( f"https://otx.alienvault.com/api/v1/indicators/url/{encoded}/general", headers=headers, timeout=15, ) if res.status_code == 200: data = res.json() if data.get("pulse_info", {}).get("count", 0) > 0: findings.append(f"URL: {url} flagged in AlienVault OTX") score += 30 else: findings.append(f"URL: {url} not found in AlienVault OTX") else: findings.append(f"URL: {url} OTX lookup returned {res.status_code}") except Exception: findings.append(f"URL: {url} check failed (AlienVault OTX)") # 3) URLHaus try: res = requests.post("https://urlhaus-api.abuse.ch/v1/url/", data={"url": url}, timeout=15) data = res.json() if data.get("query_status") == "ok": status = data.get("url_status", "malicious/suspicious") findings.append(f"URL: {url} flagged as {status} (URLHaus)") score += 30 else: findings.append(f"URL: {url} not found in URLHaus") except Exception: findings.append(f"URL: {url} check failed (URLHaus)") # 4) Heuristics domain_match = re.search(r"https?://([^/]+)/?", url) if domain_match: domain = domain_match.group(1) if len(domain) > 25 or any(char.isdigit() for char in domain.split(".")[0]): findings.append(f"URL: {url} has suspicious-looking domain") score += 15 if "?" in url and len(url.split("?", 1)[1]) > 50: findings.append(f"URL: {url} has obfuscated query string") score += 15 if not findings: return ["No URLs found in email."], 0 return findings, score