from dataset import Dataset | |
from model import Models | |
def data(dataset): | |
for i, item in enumerate(dataset): | |
yield {**item["audio"], "reference": item["norm_text"]} | |
def streamed_infernce(dataset, pipeline): | |
# placeholders for predictions and references | |
predictions = [] | |
references = [] | |
# run streamed inference | |
for out in pipeline(data(dataset), batch_size=16): | |
predictions.append(pipeline(out["text"])) | |
references.append(out["reference"][0]) | |
return predictions, references |