Spaces:
Build error
Build error
File size: 1,745 Bytes
4ee33aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import time
from webdriver_manager.chrome import ChromeDriverManager
service = Service(executable_path=ChromeDriverManager().install())
op = webdriver.ChromeOptions()
# op.add_argument('headless')
op.add_argument("--log-level=3")
driver = webdriver.Chrome(options=op,service=service)
websites = [
"https://zh.player.fm/series/fm-59854",
"https://zh.player.fm/series/series-1288180",
"https://zh.player.fm/series/series-1952287",
"https://zh.player.fm/series/re-men-shu-ji-jie-du",
"https://zh.player.fm/series/xue-qiu-cai-jing-you-shen-du",
"https://zh.player.fm/series/series-1540278",
"https://zh.player.fm/series/2435157",
"https://zh.player.fm/series/gu-shi-fm-1496859"
]
urls_file = "urls.txt"
for website in websites:
driver.get(website)
scrolls = 50
for _ in range(scrolls):
body = driver.find_element(By.TAG_NAME,'html')
body.send_keys(Keys.END)
time.sleep(2)
body.send_keys(Keys.PAGE_UP) # 模拟按下“Page Up”键,将页面稍微向上滑动
time.sleep(1) # 等待一段时间,确保页面加载完成
html_content = driver.page_source
soup = BeautifulSoup(html_content, "html.parser")
target_tags = soup.select('a[href$=".m4a"]')
# audio_links = soup.find_all("audio")
i = 0
for tag in target_tags:
i = 1-i
if i==0:
continue
audio_url = tag['href']
with open(urls_file, "a") as file:
file.write(audio_url + "\n")
driver.quit()
|