Spaces:

jdalfonso
/

SISE-ULTIMATE-CHALLENGE

Sleeping

SISE-ULTIMATE-CHALLENGE / src /utils /preprocessing.py

Updated app

233adb5 8 months ago

970 Bytes

	import soundfile as sf
	import torch
	import torchaudio
	import numpy as np
	from src.model.feature_extractor import processor # type: ignore
	from src.config import DEVICE

	# Resampler
	resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000)

	def preprocess_audio(batch):
	speech, sample_rate = sf.read(batch["path"], dtype="float32")

	if sample_rate != 16000:
	speech = torch.tensor(speech).unsqueeze(0)
	speech = resampler(speech).squeeze(0).numpy()

	batch["speech"] = speech.tolist()
	batch["sampling_rate"] = 16000
	return batch

	def prepare_features(batch, max_length):
	features = processor(
	batch["speech"],
	sampling_rate=16000,
	padding=True,
	truncation=True,
	max_length=max_length,
	return_tensors="pt"
	)
	batch["input_values"] = features.input_values.squeeze(0)
	batch["label"] = torch.tensor(batch["label"], dtype=torch.long)
	return batch