Valeriy Sinyukov
commited on
Commit
·
283e838
1
Parent(s):
43a63e6
Script for downloading russian dataset
Browse files
category_classification/datasets/ru/download_train_test.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
|
3 |
+
import os
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
from kagglehub import dataset_download
|
7 |
+
|
8 |
+
dataset = "hibiscus4000/Arxiv-papers-ru"
|
9 |
+
|
10 |
+
test_dataset = "arxiv_test.csv"
|
11 |
+
train_dataset = "arxiv_train.csv"
|
12 |
+
|
13 |
+
dataset_path = Path(dataset_download(dataset))
|
14 |
+
test_file_path = dataset_path / test_dataset
|
15 |
+
train_file_path = dataset_path / train_dataset
|
16 |
+
|
17 |
+
if not test_file_path.exists():
|
18 |
+
os.symlink(dataset_path / test_dataset, test_dataset)
|
19 |
+
if not train_file_path.exists():
|
20 |
+
os.symlink(dataset_path / train_dataset, train_dataset)
|