sdiazlor commited on
Commit
f4fb90b
·
1 Parent(s): ca59253

ensure the downloading repo

Browse files
src/synthetic_dataset_generator/apps/rag.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import random
2
  import uuid
3
  from tqdm import tqdm
@@ -51,8 +52,11 @@ from synthetic_dataset_generator.utils import (
51
  get_random_repo_name,
52
  swap_visibility,
53
  )
54
- nltk.download("punkt_tab")
55
- nltk.download("averaged_perceptron_tagger_eng",download_dir="./nltk_data/")
 
 
 
56
 
57
  def _get_valid_columns(dataframe: pd.DataFrame):
58
  doc_valid_columns = []
 
1
+ import os
2
  import random
3
  import uuid
4
  from tqdm import tqdm
 
52
  get_random_repo_name,
53
  swap_visibility,
54
  )
55
+
56
+ os.makedirs("./nltk_data", exist_ok=True)
57
+ nltk.data.path.append("./nltk_data")
58
+ nltk.download("punkt", download_dir="./nltk_data")
59
+ nltk.download("averaged_perceptron_tagger", download_dir="./nltk_data")
60
 
61
  def _get_valid_columns(dataframe: pd.DataFrame):
62
  doc_valid_columns = []