|
import os |
|
import logging |
|
from datasets import load_dataset |
|
import pickle |
|
|
|
def load_data(data_set_name, local_path="local_datasets"): |
|
os.makedirs(local_path, exist_ok=True) |
|
dataset_file = os.path.join(local_path, f"{data_set_name}_test.pkl") |
|
|
|
if os.path.exists(dataset_file): |
|
logging.info("Loading dataset from local storage") |
|
with open(dataset_file, "rb") as f: |
|
dataset = pickle.load(f) |
|
else: |
|
logging.info("Loading dataset from Hugging Face") |
|
dataset = load_dataset("rungalileo/ragbench", data_set_name, split="test") |
|
logging.info(f"Saving {data_set_name} dataset locally") |
|
with open(dataset_file, "wb") as f: |
|
pickle.dump(dataset, f) |
|
|
|
logging.info("Dataset loaded successfully") |
|
logging.info(f"Number of documents found: {dataset.num_rows}") |
|
return dataset |
|
|