Spaces:
Sleeping
Sleeping
import librosa | |
import numpy as np | |
# Function to resample the audio array | |
def resample_audio(array, orig_sr, target_sr): | |
array = np.array(array) # Ensure it's a numpy array | |
if orig_sr != target_sr: | |
array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr) | |
return array | |
def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512): | |
""" | |
Create a Mel spectrogram from a waveform. | |
Args: | |
waveform (np.ndarray): 1D NumPy array of the audio waveform. | |
sr (int): Sampling rate of the waveform. | |
n_mels (int): Number of Mel bands to generate. | |
n_fft (int): Length of the FFT window. | |
hop_length (int): Number of samples between successive frames. | |
Returns: | |
np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]). | |
""" | |
# Create Mel spectrogram | |
mel_spectrogram = librosa.feature.melspectrogram( | |
y=waveform, | |
sr=sr, | |
n_fft=n_fft, | |
hop_length=hop_length, | |
n_mels=n_mels | |
) | |
# Convert power spectrogram (amplitude squared) to decibel (log scale) | |
mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max) | |
# Ensure consistent length for each feature | |
#log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300) | |
return mel_spectrogram_db |