File size: 1,199 Bytes
8b414b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import os
from pathlib import Path
from typing import Tuple
import pandas as pd
from dotenv import load_dotenv
load_dotenv()
def set_env_if_kaggle_environ() -> None:
if 'KAGGLE_DATA_PROXY_TOKEN' in os.environ:
os.environ['DATA_PATH'] = '/kaggle/input/feedback-prize-english-language-learning/'
def load_train_test_df(is_testing: bool = False) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Loads train/test dataframes
:param is_testing: If set to true, load subsample of train/test dataframes
:return Train and test dataframes
"""
set_env_if_kaggle_environ()
if is_testing:
train_df_path = Path("tests/data/train_sample.csv")
test_df_path = Path("tests/data/test_sample.csv")
else:
train_df_path = Path(os.environ['DATA_PATH']) / 'train.csv'
test_df_path = Path(os.environ['DATA_PATH']) / 'test.csv'
if not test_df_path.is_file():
raise OSError(f"File not found: {test_df_path.absolute()}")
if not train_df_path.is_file():
raise OSError(f"File not found: {train_df_path.absolute()}")
train_df = pd.read_csv(train_df_path)
test_df = pd.read_csv(test_df_path)
return train_df, test_df
|