File size: 2,393 Bytes
6881af9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import subprocess
import sys
from pathlib import Path

import requests

from parltopic.utils.helper import get_main_config

config = get_main_config()


def download_model(url, save_path):
    # Send a GET request to the URL
    response = requests.get(url, stream=True)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Open a file in binary write mode to save the downloaded content
        with open(save_path, "wb") as f:
            # Iterate over the response content in chunks and write to the file
            for chunk in response.iter_content(chunk_size=1024):
                f.write(chunk)
        print("Model downloaded successfully!")
    else:
        # Print an error message if the request was not successful
        print(f"Failed to download model. Status code: {response.status_code}")


def set_tokenizers_parallelism(value):
    """Set the TOKENIZERS_PARALLELISM environment variable."""
    os.environ["TOKENIZERS_PARALLELISM"] = "true" if value else "false"
    print(f"TOKENIZERS_PARALLELISM set to {os.environ['TOKENIZERS_PARALLELISM']}")


def install_requirements():
    """Install packages listed in requirements.txt"""
    try:
        subprocess.check_call(
            [sys.executable, "-m", "pip", "install", "-r", "requirements.txt"]
        )
        print("All packages from requirements.txt installed successfully.")
    except subprocess.CalledProcessError as e:
        print(f"Failed to install packages from requirements.txt: {e}")
        sys.exit(1)


def install_spacy_model(model_name):
    """Install a specific spaCy model"""
    try:
        subprocess.check_call([sys.executable, "-m", "spacy", "download", model_name])
        print(f"spaCy model '{model_name}' installed successfully.")
    except subprocess.CalledProcessError as e:
        print(f"Failed to install spaCy model '{model_name}': {e}")
        sys.exit(1)


if __name__ == "__main__":

    for name, path in config["paths"].items():
        Path(path).mkdir(exist_ok=True)

    download_model(
        "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.bin",
        Path(config["paths"]["resources"]) / "lid.176.bin",
    )

    install_spacy_model("de_core_news_lg")
    install_spacy_model("fr_core_news_lg")
    install_spacy_model("it_core_news_lg")
    set_tokenizers_parallelism(True)