File size: 1,199 Bytes
8b414b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
from pathlib import Path
from typing import Tuple

import pandas as pd
from dotenv import load_dotenv

load_dotenv()


def set_env_if_kaggle_environ() -> None:
    if 'KAGGLE_DATA_PROXY_TOKEN' in os.environ:
        os.environ['DATA_PATH'] = '/kaggle/input/feedback-prize-english-language-learning/'


def load_train_test_df(is_testing: bool = False) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Loads train/test dataframes

    :param is_testing: If set to true, load subsample of train/test dataframes
    :return Train and test dataframes

    """
    set_env_if_kaggle_environ()

    if is_testing:
        train_df_path = Path("tests/data/train_sample.csv")
        test_df_path = Path("tests/data/test_sample.csv")

    else:
        train_df_path = Path(os.environ['DATA_PATH']) / 'train.csv'
        test_df_path = Path(os.environ['DATA_PATH']) / 'test.csv'

    if not test_df_path.is_file():
        raise OSError(f"File not found: {test_df_path.absolute()}")

    if not train_df_path.is_file():
        raise OSError(f"File not found: {train_df_path.absolute()}")

    train_df = pd.read_csv(train_df_path)
    test_df = pd.read_csv(test_df_path)

    return train_df, test_df