Spaces:
Sleeping
Sleeping
| import soundfile as sf | |
| import torch | |
| import torchaudio | |
| import numpy as np | |
| from src.model.feature_extractor import processor # type: ignore | |
| from src.config import DEVICE | |
| # Resampler | |
| resampler = torchaudio.transforms.Resample(orig_freq=48_000, new_freq=16_000) | |
| def preprocess_audio(batch): | |
| speech, sample_rate = sf.read(batch["path"], dtype="float32") | |
| if sample_rate != 16000: | |
| speech = torch.tensor(speech).unsqueeze(0) | |
| speech = resampler(speech).squeeze(0).numpy() | |
| batch["speech"] = speech.tolist() | |
| batch["sampling_rate"] = 16000 | |
| return batch | |
| def prepare_features(batch, max_length): | |
| features = processor( | |
| batch["speech"], | |
| sampling_rate=16000, | |
| padding=True, | |
| truncation=True, | |
| max_length=max_length, | |
| return_tensors="pt" | |
| ) | |
| batch["input_values"] = features.input_values.squeeze(0) | |
| batch["label"] = torch.tensor(batch["label"], dtype=torch.long) | |
| return batch | |