ensure the downloading repo
Browse files
src/synthetic_dataset_generator/apps/rag.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import random
|
2 |
import uuid
|
3 |
from tqdm import tqdm
|
@@ -51,8 +52,11 @@ from synthetic_dataset_generator.utils import (
|
|
51 |
get_random_repo_name,
|
52 |
swap_visibility,
|
53 |
)
|
54 |
-
|
55 |
-
|
|
|
|
|
|
|
56 |
|
57 |
def _get_valid_columns(dataframe: pd.DataFrame):
|
58 |
doc_valid_columns = []
|
|
|
1 |
+
import os
|
2 |
import random
|
3 |
import uuid
|
4 |
from tqdm import tqdm
|
|
|
52 |
get_random_repo_name,
|
53 |
swap_visibility,
|
54 |
)
|
55 |
+
|
56 |
+
os.makedirs("./nltk_data", exist_ok=True)
|
57 |
+
nltk.data.path.append("./nltk_data")
|
58 |
+
nltk.download("punkt", download_dir="./nltk_data")
|
59 |
+
nltk.download("averaged_perceptron_tagger", download_dir="./nltk_data")
|
60 |
|
61 |
def _get_valid_columns(dataframe: pd.DataFrame):
|
62 |
doc_valid_columns = []
|