Spaces:
Runtime error
Runtime error
| import librosa | |
| import numpy as np | |
| import soundfile | |
| import torch | |
| def random_amplify(mix, targets, shapes, min, max): | |
| ''' | |
| Data augmentation by randomly amplifying sources before adding them to form a new mixture | |
| :param mix: Original mixture | |
| :param targets: Source targets | |
| :param shapes: Shape dict from model | |
| :param min: Minimum possible amplification | |
| :param max: Maximum possible amplification | |
| :return: New data point as tuple (mix, targets) | |
| ''' | |
| residual = mix # start with original mix | |
| for key in targets.keys(): | |
| if key != "mix": | |
| residual -= targets[key] # subtract all instruments (output is zero if all instruments add to mix) | |
| mix = residual * np.random.uniform(min, max) # also apply gain data augmentation to residual | |
| for key in targets.keys(): | |
| if key != "mix": | |
| targets[key] = targets[key] * np.random.uniform(min, max) | |
| mix += targets[key] # add instrument with gain data augmentation to mix | |
| mix = np.clip(mix, -1.0, 1.0) | |
| return crop_targets(mix, targets, shapes) | |
| def crop_targets(mix, targets, shapes): | |
| ''' | |
| Crops target audio to the output shape required by the model given in "shapes" | |
| ''' | |
| for key in targets.keys(): | |
| if key != "mix": | |
| targets[key] = targets[key][:, shapes["output_start_frame"]:shapes["output_end_frame"]] | |
| return mix, targets | |
| def load(path, sr=22050, mono=True, mode="numpy", offset=0.0, duration=None): | |
| y, curr_sr = librosa.load(path, sr=sr, mono=mono, res_type='kaiser_fast', offset=offset, duration=duration) | |
| if len(y.shape) == 1: | |
| # Expand channel dimension | |
| y = y[np.newaxis, :] | |
| if mode == "pytorch": | |
| y = torch.tensor(y) | |
| return y, curr_sr | |
| def write_wav(path, audio, sr): | |
| soundfile.write(path, audio.T, sr, "PCM_16") | |
| def resample(audio, orig_sr, new_sr, mode="numpy"): | |
| if orig_sr == new_sr: | |
| return audio | |
| if isinstance(audio, torch.Tensor): | |
| audio = audio.detach().cpu().numpy() | |
| out = librosa.resample(audio, orig_sr, new_sr, res_type='kaiser_fast') | |
| if mode == "pytorch": | |
| out = torch.tensor(out) | |
| return out |