Valeriy Sinyukov commited on
Commit
283e838
·
1 Parent(s): 43a63e6

Script for downloading russian dataset

Browse files
category_classification/datasets/ru/download_train_test.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ from kagglehub import dataset_download
7
+
8
+ dataset = "hibiscus4000/Arxiv-papers-ru"
9
+
10
+ test_dataset = "arxiv_test.csv"
11
+ train_dataset = "arxiv_train.csv"
12
+
13
+ dataset_path = Path(dataset_download(dataset))
14
+ test_file_path = dataset_path / test_dataset
15
+ train_file_path = dataset_path / train_dataset
16
+
17
+ if not test_file_path.exists():
18
+ os.symlink(dataset_path / test_dataset, test_dataset)
19
+ if not train_file_path.exists():
20
+ os.symlink(dataset_path / train_dataset, train_dataset)