Spaces:
Build error
Build error
| import requests | |
| from selenium import webdriver | |
| from selenium.webdriver.common.keys import Keys | |
| from selenium.webdriver.common.by import By | |
| from selenium.webdriver.chrome.service import Service | |
| from bs4 import BeautifulSoup | |
| import time | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| service = Service(executable_path=ChromeDriverManager().install()) | |
| op = webdriver.ChromeOptions() | |
| # op.add_argument('headless') | |
| op.add_argument("--log-level=3") | |
| driver = webdriver.Chrome(options=op,service=service) | |
| websites = [ | |
| "https://zh.player.fm/series/fm-59854", | |
| "https://zh.player.fm/series/series-1288180", | |
| "https://zh.player.fm/series/series-1952287", | |
| "https://zh.player.fm/series/re-men-shu-ji-jie-du", | |
| "https://zh.player.fm/series/xue-qiu-cai-jing-you-shen-du", | |
| "https://zh.player.fm/series/series-1540278", | |
| "https://zh.player.fm/series/2435157", | |
| "https://zh.player.fm/series/gu-shi-fm-1496859" | |
| ] | |
| urls_file = "urls.txt" | |
| for website in websites: | |
| driver.get(website) | |
| scrolls = 50 | |
| for _ in range(scrolls): | |
| body = driver.find_element(By.TAG_NAME,'html') | |
| body.send_keys(Keys.END) | |
| time.sleep(2) | |
| body.send_keys(Keys.PAGE_UP) # 模拟按下“Page Up”键,将页面稍微向上滑动 | |
| time.sleep(1) # 等待一段时间,确保页面加载完成 | |
| html_content = driver.page_source | |
| soup = BeautifulSoup(html_content, "html.parser") | |
| target_tags = soup.select('a[href$=".m4a"]') | |
| # audio_links = soup.find_all("audio") | |
| i = 0 | |
| for tag in target_tags: | |
| i = 1-i | |
| if i==0: | |
| continue | |
| audio_url = tag['href'] | |
| with open(urls_file, "a") as file: | |
| file.write(audio_url + "\n") | |
| driver.quit() | |