CySecGuardians / url_analyzer.py
princemaxp's picture
Update url_analyzer.py
b1aff46 verified
import requests
import os
import re
from urllib.parse import quote
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
OTX_API_KEY = os.getenv("OTX_API_KEY")
def analyze_urls(urls):
findings = []
score = 0
urls = urls or []
for url in urls:
# 1) Google Safe Browsing
if SAFE_BROWSING_API_KEY:
try:
payload = {
"client": {"clientId": "email-analysis-tool", "clientVersion": "1.0"},
"threatInfo": {
"threatTypes": ["MALWARE", "SOCIAL_ENGINEERING", "UNWANTED_SOFTWARE"],
"platformTypes": ["ANY_PLATFORM"],
"threatEntryTypes": ["URL"],
"threatEntries": [{"url": url}],
},
}
res = requests.post(
f"https://safebrowsing.googleapis.com/v4/threatMatches:find?key={SAFE_BROWSING_API_KEY}",
json=payload,
timeout=15,
)
data = res.json()
if isinstance(data, dict) and "matches" in data:
findings.append(f"URL: {url} flagged by Google Safe Browsing")
score += 40
else:
findings.append(f"URL: {url} not flagged (Google Safe Browsing)")
except Exception:
findings.append(f"URL: {url} check failed (Google Safe Browsing)")
# 2) AlienVault OTX
if OTX_API_KEY:
try:
headers = {"X-OTX-API-KEY": OTX_API_KEY}
encoded = quote(url, safe="")
res = requests.get(
f"https://otx.alienvault.com/api/v1/indicators/url/{encoded}/general",
headers=headers,
timeout=15,
)
if res.status_code == 200:
data = res.json()
if data.get("pulse_info", {}).get("count", 0) > 0:
findings.append(f"URL: {url} flagged in AlienVault OTX")
score += 30
else:
findings.append(f"URL: {url} not found in AlienVault OTX")
else:
findings.append(f"URL: {url} OTX lookup returned {res.status_code}")
except Exception:
findings.append(f"URL: {url} check failed (AlienVault OTX)")
# 3) URLHaus
try:
res = requests.post("https://urlhaus-api.abuse.ch/v1/url/", data={"url": url}, timeout=15)
data = res.json()
if data.get("query_status") == "ok":
status = data.get("url_status", "malicious/suspicious")
findings.append(f"URL: {url} flagged as {status} (URLHaus)")
score += 30
else:
findings.append(f"URL: {url} not found in URLHaus")
except Exception:
findings.append(f"URL: {url} check failed (URLHaus)")
# 4) Heuristics
domain_match = re.search(r"https?://([^/]+)/?", url)
if domain_match:
domain = domain_match.group(1)
if len(domain) > 25 or any(char.isdigit() for char in domain.split(".")[0]):
findings.append(f"URL: {url} has suspicious-looking domain")
score += 15
if "?" in url and len(url.split("?", 1)[1]) > 50:
findings.append(f"URL: {url} has obfuscated query string")
score += 15
if not findings:
return ["No URLs found in email."], 0
return findings, score