File size: 899 Bytes
bcc15bd
bd69eee
026aeba
bcc15bd
026aeba
bcc15bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd69eee
f7c2fa3
bcc15bd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import os
import logging
from datasets import load_dataset
import pickle  # For saving the dataset locally

def load_data(data_set_name, local_path="local_datasets"):
    os.makedirs(local_path, exist_ok=True)
    dataset_file = os.path.join(local_path, f"{data_set_name}_test.pkl")
    
    if os.path.exists(dataset_file):
        logging.info("Loading dataset from local storage")
        with open(dataset_file, "rb") as f:
            dataset = pickle.load(f)
    else:
        logging.info("Loading dataset from Hugging Face")
        dataset = load_dataset("rungalileo/ragbench", data_set_name, split="test")
        logging.info(f"Saving {data_set_name} dataset locally")
        with open(dataset_file, "wb") as f:
            pickle.dump(dataset, f)
    
    logging.info("Dataset loaded successfully")
    logging.info(f"Number of documents found: {dataset.num_rows}")
    return dataset