Spaces:

hieupt
/

Wave_U_Net_audio

Runtime error

App Files Files Community

Wave_U_Net_audio / data /utils.py

hieupt

Upload utils.py

41ed787 verified over 1 year ago

raw

history blame

2.19 kB

	import librosa
	import numpy as np
	import soundfile
	import torch


	def random_amplify(mix, targets, shapes, min, max):
	'''
	Data augmentation by randomly amplifying sources before adding them to form a new mixture
	:param mix: Original mixture
	:param targets: Source targets
	:param shapes: Shape dict from model
	:param min: Minimum possible amplification
	:param max: Maximum possible amplification
	:return: New data point as tuple (mix, targets)
	'''
	residual = mix # start with original mix
	for key in targets.keys():
	if key != "mix":
	residual -= targets[key] # subtract all instruments (output is zero if all instruments add to mix)
	mix = residual * np.random.uniform(min, max) # also apply gain data augmentation to residual
	for key in targets.keys():
	if key != "mix":
	targets[key] = targets[key] * np.random.uniform(min, max)
	mix += targets[key] # add instrument with gain data augmentation to mix
	mix = np.clip(mix, -1.0, 1.0)
	return crop_targets(mix, targets, shapes)


	def crop_targets(mix, targets, shapes):
	'''
	Crops target audio to the output shape required by the model given in "shapes"
	'''
	for key in targets.keys():
	if key != "mix":
	targets[key] = targets[key][:, shapes["output_start_frame"]:shapes["output_end_frame"]]
	return mix, targets


	def load(path, sr=22050, mono=True, mode="numpy", offset=0.0, duration=None):
	y, curr_sr = librosa.load(path, sr=sr, mono=mono, res_type='kaiser_fast', offset=offset, duration=duration)

	if len(y.shape) == 1:
	# Expand channel dimension
	y = y[np.newaxis, :]

	if mode == "pytorch":
	y = torch.tensor(y)

	return y, curr_sr


	def write_wav(path, audio, sr):
	soundfile.write(path, audio.T, sr, "PCM_16")


	def resample(audio, orig_sr, new_sr, mode="numpy"):
	if orig_sr == new_sr:
	return audio

	if isinstance(audio, torch.Tensor):
	audio = audio.detach().cpu().numpy()

	out = librosa.resample(audio, orig_sr, new_sr, res_type='kaiser_fast')

	if mode == "pytorch":
	out = torch.tensor(out)
	return out