Spaces:
Sleeping
Sleeping
Update url_analyzer.py
Browse files- url_analyzer.py +23 -12
url_analyzer.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
import re
|
|
|
|
| 4 |
|
| 5 |
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
|
| 6 |
OTX_API_KEY = os.getenv("OTX_API_KEY")
|
|
@@ -8,9 +9,10 @@ OTX_API_KEY = os.getenv("OTX_API_KEY")
|
|
| 8 |
def analyze_urls(urls):
|
| 9 |
findings = []
|
| 10 |
score = 0
|
|
|
|
| 11 |
|
| 12 |
for url in urls:
|
| 13 |
-
#
|
| 14 |
if SAFE_BROWSING_API_KEY:
|
| 15 |
try:
|
| 16 |
payload = {
|
|
@@ -25,21 +27,27 @@ def analyze_urls(urls):
|
|
| 25 |
res = requests.post(
|
| 26 |
f"https://safebrowsing.googleapis.com/v4/threatMatches:find?key={SAFE_BROWSING_API_KEY}",
|
| 27 |
json=payload,
|
|
|
|
| 28 |
)
|
| 29 |
data = res.json()
|
| 30 |
-
if "matches" in data:
|
| 31 |
findings.append(f"URL: {url} flagged by Google Safe Browsing")
|
| 32 |
score += 40
|
| 33 |
else:
|
| 34 |
findings.append(f"URL: {url} not flagged (Google Safe Browsing)")
|
| 35 |
-
except:
|
| 36 |
findings.append(f"URL: {url} check failed (Google Safe Browsing)")
|
| 37 |
|
| 38 |
-
#
|
| 39 |
if OTX_API_KEY:
|
| 40 |
try:
|
| 41 |
headers = {"X-OTX-API-KEY": OTX_API_KEY}
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
if res.status_code == 200:
|
| 44 |
data = res.json()
|
| 45 |
if data.get("pulse_info", {}).get("count", 0) > 0:
|
|
@@ -47,29 +55,32 @@ def analyze_urls(urls):
|
|
| 47 |
score += 30
|
| 48 |
else:
|
| 49 |
findings.append(f"URL: {url} not found in AlienVault OTX")
|
| 50 |
-
|
|
|
|
|
|
|
| 51 |
findings.append(f"URL: {url} check failed (AlienVault OTX)")
|
| 52 |
|
| 53 |
-
#
|
| 54 |
try:
|
| 55 |
-
res = requests.post("https://urlhaus-api.abuse.ch/v1/url/", data={"url": url})
|
| 56 |
data = res.json()
|
| 57 |
if data.get("query_status") == "ok":
|
| 58 |
-
|
|
|
|
| 59 |
score += 30
|
| 60 |
else:
|
| 61 |
findings.append(f"URL: {url} not found in URLHaus")
|
| 62 |
-
except:
|
| 63 |
findings.append(f"URL: {url} check failed (URLHaus)")
|
| 64 |
|
| 65 |
-
#
|
| 66 |
domain_match = re.search(r"https?://([^/]+)/?", url)
|
| 67 |
if domain_match:
|
| 68 |
domain = domain_match.group(1)
|
| 69 |
if len(domain) > 25 or any(char.isdigit() for char in domain.split(".")[0]):
|
| 70 |
findings.append(f"URL: {url} has suspicious-looking domain")
|
| 71 |
score += 15
|
| 72 |
-
if "?" in url and len(url.split("?")[1]) > 50:
|
| 73 |
findings.append(f"URL: {url} has obfuscated query string")
|
| 74 |
score += 15
|
| 75 |
|
|
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
import re
|
| 4 |
+
from urllib.parse import quote
|
| 5 |
|
| 6 |
SAFE_BROWSING_API_KEY = os.getenv("SAFE_BROWSING_API_KEY")
|
| 7 |
OTX_API_KEY = os.getenv("OTX_API_KEY")
|
|
|
|
| 9 |
def analyze_urls(urls):
|
| 10 |
findings = []
|
| 11 |
score = 0
|
| 12 |
+
urls = urls or []
|
| 13 |
|
| 14 |
for url in urls:
|
| 15 |
+
# 1) Google Safe Browsing
|
| 16 |
if SAFE_BROWSING_API_KEY:
|
| 17 |
try:
|
| 18 |
payload = {
|
|
|
|
| 27 |
res = requests.post(
|
| 28 |
f"https://safebrowsing.googleapis.com/v4/threatMatches:find?key={SAFE_BROWSING_API_KEY}",
|
| 29 |
json=payload,
|
| 30 |
+
timeout=15,
|
| 31 |
)
|
| 32 |
data = res.json()
|
| 33 |
+
if isinstance(data, dict) and "matches" in data:
|
| 34 |
findings.append(f"URL: {url} flagged by Google Safe Browsing")
|
| 35 |
score += 40
|
| 36 |
else:
|
| 37 |
findings.append(f"URL: {url} not flagged (Google Safe Browsing)")
|
| 38 |
+
except Exception:
|
| 39 |
findings.append(f"URL: {url} check failed (Google Safe Browsing)")
|
| 40 |
|
| 41 |
+
# 2) AlienVault OTX
|
| 42 |
if OTX_API_KEY:
|
| 43 |
try:
|
| 44 |
headers = {"X-OTX-API-KEY": OTX_API_KEY}
|
| 45 |
+
encoded = quote(url, safe="")
|
| 46 |
+
res = requests.get(
|
| 47 |
+
f"https://otx.alienvault.com/api/v1/indicators/url/{encoded}/general",
|
| 48 |
+
headers=headers,
|
| 49 |
+
timeout=15,
|
| 50 |
+
)
|
| 51 |
if res.status_code == 200:
|
| 52 |
data = res.json()
|
| 53 |
if data.get("pulse_info", {}).get("count", 0) > 0:
|
|
|
|
| 55 |
score += 30
|
| 56 |
else:
|
| 57 |
findings.append(f"URL: {url} not found in AlienVault OTX")
|
| 58 |
+
else:
|
| 59 |
+
findings.append(f"URL: {url} OTX lookup returned {res.status_code}")
|
| 60 |
+
except Exception:
|
| 61 |
findings.append(f"URL: {url} check failed (AlienVault OTX)")
|
| 62 |
|
| 63 |
+
# 3) URLHaus
|
| 64 |
try:
|
| 65 |
+
res = requests.post("https://urlhaus-api.abuse.ch/v1/url/", data={"url": url}, timeout=15)
|
| 66 |
data = res.json()
|
| 67 |
if data.get("query_status") == "ok":
|
| 68 |
+
status = data.get("url_status", "malicious/suspicious")
|
| 69 |
+
findings.append(f"URL: {url} flagged as {status} (URLHaus)")
|
| 70 |
score += 30
|
| 71 |
else:
|
| 72 |
findings.append(f"URL: {url} not found in URLHaus")
|
| 73 |
+
except Exception:
|
| 74 |
findings.append(f"URL: {url} check failed (URLHaus)")
|
| 75 |
|
| 76 |
+
# 4) Heuristics
|
| 77 |
domain_match = re.search(r"https?://([^/]+)/?", url)
|
| 78 |
if domain_match:
|
| 79 |
domain = domain_match.group(1)
|
| 80 |
if len(domain) > 25 or any(char.isdigit() for char in domain.split(".")[0]):
|
| 81 |
findings.append(f"URL: {url} has suspicious-looking domain")
|
| 82 |
score += 15
|
| 83 |
+
if "?" in url and len(url.split("?", 1)[1]) > 50:
|
| 84 |
findings.append(f"URL: {url} has obfuscated query string")
|
| 85 |
score += 15
|
| 86 |
|