File size: 675 Bytes
55406ba 5fa433f 55406ba 7c06aef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
from datasets import get_dataset_config_names, load_dataset
from joblib.memory import Memory
cache = Memory(location=".cache", verbose=0).cache
@cache
def _get_dataset_config_names(dataset, **kwargs):
return get_dataset_config_names(dataset, **kwargs)
@cache
def _load_dataset(dataset, subset, **kwargs):
return load_dataset(dataset, subset, **kwargs)
# Cache individual dataset items to avoid reloading entire datasets
@cache
def _get_dataset_item(dataset, subset, split, index, **kwargs):
"""Load a single item from a dataset efficiently"""
ds = load_dataset(dataset, subset, split=split, **kwargs)
return ds[index] if index < len(ds) else None
|