File size: 1,745 Bytes
4ee33aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup
import time
from webdriver_manager.chrome import ChromeDriverManager
service = Service(executable_path=ChromeDriverManager().install())

op = webdriver.ChromeOptions()
# op.add_argument('headless')
op.add_argument("--log-level=3")
driver = webdriver.Chrome(options=op,service=service)

websites = [
    "https://zh.player.fm/series/fm-59854",
    "https://zh.player.fm/series/series-1288180",
    "https://zh.player.fm/series/series-1952287",
    "https://zh.player.fm/series/re-men-shu-ji-jie-du",
    "https://zh.player.fm/series/xue-qiu-cai-jing-you-shen-du",
    "https://zh.player.fm/series/series-1540278",
    "https://zh.player.fm/series/2435157",
    "https://zh.player.fm/series/gu-shi-fm-1496859"

]


urls_file = "urls.txt"
for website in websites:
    driver.get(website)

    scrolls = 50
    for _ in range(scrolls):
        body = driver.find_element(By.TAG_NAME,'html')
        body.send_keys(Keys.END)
        time.sleep(2)
        body.send_keys(Keys.PAGE_UP)  # 模拟按下“Page Up”键,将页面稍微向上滑动
        time.sleep(1)  # 等待一段时间,确保页面加载完成

    html_content = driver.page_source

    soup = BeautifulSoup(html_content, "html.parser")

    target_tags = soup.select('a[href$=".m4a"]')
    # audio_links = soup.find_all("audio")
    i = 0
    for tag in target_tags:
        i = 1-i
        if i==0:
            continue
        audio_url = tag['href']
        with open(urls_file, "a") as file:
            file.write(audio_url + "\n")

driver.quit()