princemaxp commited on
Commit
b1aff46
·
verified ·
1 Parent(s): 292c399

Update url_analyzer.py

Browse files
Files changed (1) hide show
  1. url_analyzer.py +23 -12
url_analyzer.py CHANGED
@@ -1,6 +1,7 @@
1
  import requests
2
  import os
3
  import re
 
4
 
5
  SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
6
  OTX_API_KEY = os.getenv("OTX_API_KEY")
@@ -8,9 +9,10 @@ OTX_API_KEY = os.getenv("OTX_API_KEY")
8
  def analyze_urls(urls):
9
  findings = []
10
  score = 0
 
11
 
12
  for url in urls:
13
- # --- 1. Google Safe Browsing ---
14
  if SAFE_BROWSING_API_KEY:
15
  try:
16
  payload = {
@@ -25,21 +27,27 @@ def analyze_urls(urls):
25
  res = requests.post(
26
  f"https://safebrowsing.googleapis.com/v4/threatMatches:find?key={SAFE_BROWSING_API_KEY}",
27
  json=payload,
 
28
  )
29
  data = res.json()
30
- if "matches" in data:
31
  findings.append(f"URL: {url} flagged by Google Safe Browsing")
32
  score += 40
33
  else:
34
  findings.append(f"URL: {url} not flagged (Google Safe Browsing)")
35
- except:
36
  findings.append(f"URL: {url} check failed (Google Safe Browsing)")
37
 
38
- # --- 2. AlienVault OTX ---
39
  if OTX_API_KEY:
40
  try:
41
  headers = {"X-OTX-API-KEY": OTX_API_KEY}
42
- res = requests.get(f"https://otx.alienvault.com/api/v1/indicators/url/{url}/general", headers=headers)
 
 
 
 
 
43
  if res.status_code == 200:
44
  data = res.json()
45
  if data.get("pulse_info", {}).get("count", 0) > 0:
@@ -47,29 +55,32 @@ def analyze_urls(urls):
47
  score += 30
48
  else:
49
  findings.append(f"URL: {url} not found in AlienVault OTX")
50
- except:
 
 
51
  findings.append(f"URL: {url} check failed (AlienVault OTX)")
52
 
53
- # --- 3. URLHaus ---
54
  try:
55
- res = requests.post("https://urlhaus-api.abuse.ch/v1/url/", data={"url": url})
56
  data = res.json()
57
  if data.get("query_status") == "ok":
58
- findings.append(f"URL: {url} flagged as {data['url_status']} (URLHaus)")
 
59
  score += 30
60
  else:
61
  findings.append(f"URL: {url} not found in URLHaus")
62
- except:
63
  findings.append(f"URL: {url} check failed (URLHaus)")
64
 
65
- # --- 4. Heuristics ---
66
  domain_match = re.search(r"https?://([^/]+)/?", url)
67
  if domain_match:
68
  domain = domain_match.group(1)
69
  if len(domain) > 25 or any(char.isdigit() for char in domain.split(".")[0]):
70
  findings.append(f"URL: {url} has suspicious-looking domain")
71
  score += 15
72
- if "?" in url and len(url.split("?")[1]) > 50:
73
  findings.append(f"URL: {url} has obfuscated query string")
74
  score += 15
75
 
 
1
  import requests
2
  import os
3
  import re
4
+ from urllib.parse import quote
5
 
6
  SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
7
  OTX_API_KEY = os.getenv("OTX_API_KEY")
 
9
  def analyze_urls(urls):
10
  findings = []
11
  score = 0
12
+ urls = urls or []
13
 
14
  for url in urls:
15
+ # 1) Google Safe Browsing
16
  if SAFE_BROWSING_API_KEY:
17
  try:
18
  payload = {
 
27
  res = requests.post(
28
  f"https://safebrowsing.googleapis.com/v4/threatMatches:find?key={SAFE_BROWSING_API_KEY}",
29
  json=payload,
30
+ timeout=15,
31
  )
32
  data = res.json()
33
+ if isinstance(data, dict) and "matches" in data:
34
  findings.append(f"URL: {url} flagged by Google Safe Browsing")
35
  score += 40
36
  else:
37
  findings.append(f"URL: {url} not flagged (Google Safe Browsing)")
38
+ except Exception:
39
  findings.append(f"URL: {url} check failed (Google Safe Browsing)")
40
 
41
+ # 2) AlienVault OTX
42
  if OTX_API_KEY:
43
  try:
44
  headers = {"X-OTX-API-KEY": OTX_API_KEY}
45
+ encoded = quote(url, safe="")
46
+ res = requests.get(
47
+ f"https://otx.alienvault.com/api/v1/indicators/url/{encoded}/general",
48
+ headers=headers,
49
+ timeout=15,
50
+ )
51
  if res.status_code == 200:
52
  data = res.json()
53
  if data.get("pulse_info", {}).get("count", 0) > 0:
 
55
  score += 30
56
  else:
57
  findings.append(f"URL: {url} not found in AlienVault OTX")
58
+ else:
59
+ findings.append(f"URL: {url} OTX lookup returned {res.status_code}")
60
+ except Exception:
61
  findings.append(f"URL: {url} check failed (AlienVault OTX)")
62
 
63
+ # 3) URLHaus
64
  try:
65
+ res = requests.post("https://urlhaus-api.abuse.ch/v1/url/", data={"url": url}, timeout=15)
66
  data = res.json()
67
  if data.get("query_status") == "ok":
68
+ status = data.get("url_status", "malicious/suspicious")
69
+ findings.append(f"URL: {url} flagged as {status} (URLHaus)")
70
  score += 30
71
  else:
72
  findings.append(f"URL: {url} not found in URLHaus")
73
+ except Exception:
74
  findings.append(f"URL: {url} check failed (URLHaus)")
75
 
76
+ # 4) Heuristics
77
  domain_match = re.search(r"https?://([^/]+)/?", url)
78
  if domain_match:
79
  domain = domain_match.group(1)
80
  if len(domain) > 25 or any(char.isdigit() for char in domain.split(".")[0]):
81
  findings.append(f"URL: {url} has suspicious-looking domain")
82
  score += 15
83
+ if "?" in url and len(url.split("?", 1)[1]) > 50:
84
  findings.append(f"URL: {url} has obfuscated query string")
85
  score += 15
86