davidpomerenke's picture
Upload from GitHub Actions: Merge pull request #9 from datenlabor-bmz/jn-dev
7c06aef verified
raw
history blame contribute delete
675 Bytes
from datasets import get_dataset_config_names, load_dataset
from joblib.memory import Memory
cache = Memory(location=".cache", verbose=0).cache
@cache
def _get_dataset_config_names(dataset, **kwargs):
return get_dataset_config_names(dataset, **kwargs)
@cache
def _load_dataset(dataset, subset, **kwargs):
return load_dataset(dataset, subset, **kwargs)
# Cache individual dataset items to avoid reloading entire datasets
@cache
def _get_dataset_item(dataset, subset, split, index, **kwargs):
"""Load a single item from a dataset efficiently"""
ds = load_dataset(dataset, subset, split=split, **kwargs)
return ds[index] if index < len(ds) else None