File size: 2,069 Bytes
79c7b05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import os
import sys
import requests
from bs4 import BeautifulSoup
import argparse

# Default URL of the script to download
DEFAULT_SCRIPT_URL = "https://www.dailyscript.com/scripts/twelve_monkeys.html"

# Directory to save the downloaded script
save_dir = "scripts"
os.makedirs(save_dir, exist_ok=True)

# Function to download a script
def download_script(script_url, save_dir):
    response = requests.get(script_url)
    response.raise_for_status()

    # Parse the HTML content
    soup = BeautifulSoup(response.text, "html.parser")

    # Try to find the <pre> tag
    pre_tag = soup.find("pre")

    if pre_tag:
        # Extract the text content from the <pre> tag
        text_content = pre_tag.get_text()
        # Remove the line numbers
        lines = text_content.split("\n")
        cleaned_lines = [line.split("|", 1)[-1] for line in lines]
        cleaned_text = "\n".join(cleaned_lines)
    else:
        # If no <pre> tag, get the text from the body
        body = soup.find("body")
        if body:
            cleaned_text = body.get_text()
        else:
            raise ValueError("Could not find script content in the HTML")

    # Extract the filename from the URL and change the extension to .txt
    filename = os.path.basename(script_url).split(".")[0] + ".txt"
    save_path = os.path.join(save_dir, filename)

    with open(save_path, "w", encoding="utf-8") as file:
        file.write(cleaned_text)

    print(f"Downloaded: {save_path}")
    return save_path

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Download a screenplay from a given URL.")
    parser.add_argument("--url", type=str, default=DEFAULT_SCRIPT_URL,
                        help="URL of the screenplay to download (default: Twelve Monkeys)")
    
    args = parser.parse_args()
    script_url = args.url

    # Extract the script name from the URL
    script_name = os.path.basename(script_url).split(".")[0] + ".txt"
    save_path = os.path.join(save_dir, script_name)

    # Download the script
    download_script(script_url, save_dir)