Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
from seleniumwire import webdriver
|
| 2 |
from selenium.webdriver.chrome.options import Options
|
| 3 |
-
from selenium.common.exceptions import WebDriverException
|
| 4 |
from fastapi import FastAPI, Request
|
| 5 |
import uvicorn
|
| 6 |
import time
|
|
@@ -25,29 +24,14 @@ def get_root_domain(url):
|
|
| 25 |
return '.'.join(parts[-2:])
|
| 26 |
else:
|
| 27 |
return domain
|
| 28 |
-
|
| 29 |
-
# 网络抓包内容过滤
|
| 30 |
-
def filter_type(_type: str):
|
| 31 |
-
types = [
|
| 32 |
-
'application/javascript', 'application/x-javascript', 'text/css', 'webp', 'image/png', 'image/gif',
|
| 33 |
-
'image/jpeg', 'image/x-icon', 'application/octet-stream'
|
| 34 |
-
]
|
| 35 |
-
if _type not in types:
|
| 36 |
-
return True
|
| 37 |
-
return False
|
| 38 |
-
|
| 39 |
@app.get("/")
|
| 40 |
def main():
|
| 41 |
return {"code": 200,"msg":"Success"}
|
| 42 |
|
| 43 |
@app.get("/chrome")
|
| 44 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
| 45 |
-
|
| 46 |
-
caps = {
|
| 47 |
-
"browserName": "chrome",
|
| 48 |
-
'goog:loggingPrefs': {'performance': 'ALL'}
|
| 49 |
-
}
|
| 50 |
-
|
| 51 |
# 必须有目标url
|
| 52 |
if type(url) == str:
|
| 53 |
target_url = unquote(url)
|
|
@@ -80,20 +64,17 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 80 |
# 设置为无头模式
|
| 81 |
options.add_argument('--headless')
|
| 82 |
|
| 83 |
-
# 开启日志性能监听
|
| 84 |
-
for key, value in caps.items():
|
| 85 |
-
options.set_capability(key, value)
|
| 86 |
-
|
| 87 |
# 实例化
|
| 88 |
driver = webdriver.Chrome(options=options)
|
| 89 |
|
| 90 |
# 需要打开网址页面,才能用 driver.add_cookie 进行cookie追加
|
| 91 |
driver.get(target_url)
|
| 92 |
|
| 93 |
-
# 清除本次打开网址页面,可能存储在本地的cookie、sessionStorage、localStorage
|
| 94 |
driver.delete_all_cookies()
|
| 95 |
driver.execute_script("window.sessionStorage.clear();")
|
| 96 |
driver.execute_script("window.localStorage.clear();")
|
|
|
|
| 97 |
|
| 98 |
# 对浏览器追加我们传递进来的cookie
|
| 99 |
if 'cookie' in header_array:
|
|
@@ -126,34 +107,19 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 126 |
|
| 127 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
| 128 |
is_jump = (target_url != current_url)
|
| 129 |
-
|
| 130 |
-
network = []
|
| 131 |
-
performance_log = driver.get_log('performance') # 获取名称为 performance 的日志
|
| 132 |
-
for packet in performance_log:
|
| 133 |
-
message = json.loads(packet.get('message')).get('message') # 获取message的数据
|
| 134 |
-
if message.get('method') != 'Network.responseReceived': # 如果method 不是 responseReceived 类型就不往下执行
|
| 135 |
-
continue
|
| 136 |
-
packet_type = message.get('params').get('response').get('mimeType') # 获取该请求返回的type
|
| 137 |
-
if not filter_type(_type=packet_type): # 过滤type
|
| 138 |
-
continue
|
| 139 |
-
requestId = message.get('params').get('requestId') # 唯一的请求标识符。相当于该请求的身份证
|
| 140 |
-
url = message.get('params').get('response').get('url') # 获取 该请求 url
|
| 141 |
-
headers = message.get('params').get('response').get('headers') # 获取 该请求 url
|
| 142 |
-
status = message.get('params').get('response').get('status') # 获取 该请求 url
|
| 143 |
-
try:
|
| 144 |
-
network.append({"status":status ,"url":url, "type":packet_type, "headers":headers})
|
| 145 |
-
# resp = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': requestId})
|
| 146 |
-
# print(f'response: {resp}')
|
| 147 |
-
except WebDriverException: # 忽略异常
|
| 148 |
-
pass
|
| 149 |
-
print(driver.requests)
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
data = {
|
| 152 |
"url": current_url,
|
| 153 |
"page_source": page_source,
|
| 154 |
"cookies": cookies,
|
| 155 |
"is_jump": is_jump,
|
| 156 |
-
"network": network
|
| 157 |
}
|
| 158 |
|
| 159 |
driver.quit()
|
|
|
|
| 1 |
from seleniumwire import webdriver
|
| 2 |
from selenium.webdriver.chrome.options import Options
|
|
|
|
| 3 |
from fastapi import FastAPI, Request
|
| 4 |
import uvicorn
|
| 5 |
import time
|
|
|
|
| 24 |
return '.'.join(parts[-2:])
|
| 25 |
else:
|
| 26 |
return domain
|
| 27 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
@app.get("/")
|
| 29 |
def main():
|
| 30 |
return {"code": 200,"msg":"Success"}
|
| 31 |
|
| 32 |
@app.get("/chrome")
|
| 33 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
| 34 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
# 必须有目标url
|
| 36 |
if type(url) == str:
|
| 37 |
target_url = unquote(url)
|
|
|
|
| 64 |
# 设置为无头模式
|
| 65 |
options.add_argument('--headless')
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
# 实例化
|
| 68 |
driver = webdriver.Chrome(options=options)
|
| 69 |
|
| 70 |
# 需要打开网址页面,才能用 driver.add_cookie 进行cookie追加
|
| 71 |
driver.get(target_url)
|
| 72 |
|
| 73 |
+
# 清除本次打开网址页面,可能存储在本地的cookie、sessionStorage、localStorage,并删除因此次访问所产生的 har
|
| 74 |
driver.delete_all_cookies()
|
| 75 |
driver.execute_script("window.sessionStorage.clear();")
|
| 76 |
driver.execute_script("window.localStorage.clear();")
|
| 77 |
+
del driver.requests
|
| 78 |
|
| 79 |
# 对浏览器追加我们传递进来的cookie
|
| 80 |
if 'cookie' in header_array:
|
|
|
|
| 107 |
|
| 108 |
# 完全加载完成时,页面是否有发生过 301 302 跳转过
|
| 109 |
is_jump = (target_url != current_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
+
network = []
|
| 112 |
+
# 遍历输出过程中的har
|
| 113 |
+
for request in driver.requests:
|
| 114 |
+
if request.response:
|
| 115 |
+
network.append({"method":request.response.method, "status":request.response.status_code ,"url":request.url, "headers":request.response.headers})
|
| 116 |
+
|
| 117 |
data = {
|
| 118 |
"url": current_url,
|
| 119 |
"page_source": page_source,
|
| 120 |
"cookies": cookies,
|
| 121 |
"is_jump": is_jump,
|
| 122 |
+
"network": network
|
| 123 |
}
|
| 124 |
|
| 125 |
driver.quit()
|