ASR_Model_Comparison / dataset.py
j-tobias
add front end
1dc0a7f
raw
history blame
813 Bytes
from huggingface_hub import login
from datasets import load_dataset
from datasets import Audio
import json
def get_credentials():
with open("credentials.json","r") as f:
credentials = json.load(f)
return credentials['token']
class Dataset:
def __init__(self, n:int=100):
# # Log the client into HF
# login(token=get_credentials())
# # Load the Dataset in Streaming Mode
# self.librispeech_clean = load_dataset("librispeech_asr", "all", split="test.clean", streaming=True)
# self.librispeech_other = load_dataset("librispeech_asr", "all", split="test.other", streaming=True)
self.n = n
self.options = ['librisspeech_clean','librisspeech_other']
def get_option(self):
return self.options
dataset = Dataset()