from dataset import Dataset from model import Models def data(dataset): for i, item in enumerate(dataset): yield {**item["audio"], "reference": item["norm_text"]} def streamed_infernce(dataset, pipeline): # placeholders for predictions and references predictions = [] references = [] # run streamed inference for out in pipeline(data(dataset), batch_size=16): predictions.append(pipeline(out["text"])) references.append(out["reference"][0]) return predictions, references