Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -38,7 +38,11 @@ def main():
|
|
| 38 |
|
| 39 |
@app.get("/chrome")
|
| 40 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# 必须有目标url
|
| 43 |
if type(url) == str:
|
| 44 |
target_url = unquote(url)
|
|
@@ -72,12 +76,12 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 72 |
options.add_argument('--headless')
|
| 73 |
|
| 74 |
# 实例化
|
| 75 |
-
driver = webdriver.Chrome(options=options)
|
| 76 |
|
| 77 |
# 需要打开网址页面,才能用 driver.add_cookie 进行cookie追加
|
| 78 |
driver.get(target_url)
|
| 79 |
|
| 80 |
-
# 清除本次打开网址页面,可能存储在本地的cookie、sessionStorage、localStorage,并删除因此次访问所产生的 har
|
| 81 |
driver.delete_all_cookies()
|
| 82 |
driver.execute_script("window.sessionStorage.clear();")
|
| 83 |
driver.execute_script("window.localStorage.clear();")
|
|
@@ -116,7 +120,7 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 116 |
is_jump = (target_url != current_url)
|
| 117 |
|
| 118 |
network = []
|
| 119 |
-
# 遍历输出过程中的
|
| 120 |
for request in driver.requests:
|
| 121 |
if request.response:
|
| 122 |
network.append({
|
|
@@ -126,7 +130,8 @@ def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
|
| 126 |
"responseheaders":{k: try_json_decode(v) for k, v in request.response.headers.items()},
|
| 127 |
"requestheaders":{k: try_json_decode(v) for k, v in request.headers.items()},
|
| 128 |
})
|
| 129 |
-
|
|
|
|
| 130 |
data = {
|
| 131 |
"url": current_url,
|
| 132 |
"page_source": page_source,
|
|
|
|
| 38 |
|
| 39 |
@app.get("/chrome")
|
| 40 |
def chrome(url:str=None,wait:int=5,header:str=None,cookie:str=None):
|
| 41 |
+
# 开启捕获HAR数据功能,允许使用 driver.har 进行检索
|
| 42 |
+
seleniumwire_options = {
|
| 43 |
+
'enable_har': True
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
# 必须有目标url
|
| 47 |
if type(url) == str:
|
| 48 |
target_url = unquote(url)
|
|
|
|
| 76 |
options.add_argument('--headless')
|
| 77 |
|
| 78 |
# 实例化
|
| 79 |
+
driver = webdriver.Chrome(options=options,seleniumwire_options=seleniumwire_options)
|
| 80 |
|
| 81 |
# 需要打开网址页面,才能用 driver.add_cookie 进行cookie追加
|
| 82 |
driver.get(target_url)
|
| 83 |
|
| 84 |
+
# 清除本次打开网址页面,可能存储在本地的cookie、sessionStorage、localStorage,并删除因此次访问所产生的 network 和 har 记录
|
| 85 |
driver.delete_all_cookies()
|
| 86 |
driver.execute_script("window.sessionStorage.clear();")
|
| 87 |
driver.execute_script("window.localStorage.clear();")
|
|
|
|
| 120 |
is_jump = (target_url != current_url)
|
| 121 |
|
| 122 |
network = []
|
| 123 |
+
# 遍历输出过程中的 network
|
| 124 |
for request in driver.requests:
|
| 125 |
if request.response:
|
| 126 |
network.append({
|
|
|
|
| 130 |
"responseheaders":{k: try_json_decode(v) for k, v in request.response.headers.items()},
|
| 131 |
"requestheaders":{k: try_json_decode(v) for k, v in request.headers.items()},
|
| 132 |
})
|
| 133 |
+
print(driver.har)
|
| 134 |
+
|
| 135 |
data = {
|
| 136 |
"url": current_url,
|
| 137 |
"page_source": page_source,
|