audio-electroma

Sleeping

audio-electroma / tasks /utils /preprocess.py

xgboost api (#5)

6dacae5 verified about 2 months ago

1.35 kB

	import librosa
	import numpy as np


	# Function to resample the audio array
	def resample_audio(array, orig_sr, target_sr):
	array = np.array(array) # Ensure it's a numpy array
	if orig_sr != target_sr:
	array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr)
	return array


	def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512):
	"""
	Create a Mel spectrogram from a waveform.

	Args:
	waveform (np.ndarray): 1D NumPy array of the audio waveform.
	sr (int): Sampling rate of the waveform.
	n_mels (int): Number of Mel bands to generate.
	n_fft (int): Length of the FFT window.
	hop_length (int): Number of samples between successive frames.

	Returns:
	np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]).
	"""
	# Create Mel spectrogram
	mel_spectrogram = librosa.feature.melspectrogram(
	y=waveform,
	sr=sr,
	n_fft=n_fft,
	hop_length=hop_length,
	n_mels=n_mels
	)

	# Convert power spectrogram (amplitude squared) to decibel (log scale)
	mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

	# Ensure consistent length for each feature
	#log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300)

	return mel_spectrogram_db