import requests from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from bs4 import BeautifulSoup import time from webdriver_manager.chrome import ChromeDriverManager service = Service(executable_path=ChromeDriverManager().install()) op = webdriver.ChromeOptions() # op.add_argument('headless') op.add_argument("--log-level=3") driver = webdriver.Chrome(options=op,service=service) websites = [ "https://zh.player.fm/series/fm-59854", "https://zh.player.fm/series/series-1288180", "https://zh.player.fm/series/series-1952287", "https://zh.player.fm/series/re-men-shu-ji-jie-du", "https://zh.player.fm/series/xue-qiu-cai-jing-you-shen-du", "https://zh.player.fm/series/series-1540278", "https://zh.player.fm/series/2435157", "https://zh.player.fm/series/gu-shi-fm-1496859" ] urls_file = "urls.txt" for website in websites: driver.get(website) scrolls = 50 for _ in range(scrolls): body = driver.find_element(By.TAG_NAME,'html') body.send_keys(Keys.END) time.sleep(2) body.send_keys(Keys.PAGE_UP) # 模拟按下“Page Up”键,将页面稍微向上滑动 time.sleep(1) # 等待一段时间,确保页面加载完成 html_content = driver.page_source soup = BeautifulSoup(html_content, "html.parser") target_tags = soup.select('a[href$=".m4a"]') # audio_links = soup.find_all("audio") i = 0 for tag in target_tags: i = 1-i if i==0: continue audio_url = tag['href'] with open(urls_file, "a") as file: file.write(audio_url + "\n") driver.quit()