Spaces:
Sleeping
Sleeping
File size: 1,347 Bytes
6dacae5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import librosa
import numpy as np
# Function to resample the audio array
def resample_audio(array, orig_sr, target_sr):
array = np.array(array) # Ensure it's a numpy array
if orig_sr != target_sr:
array = librosa.resample(array, orig_sr=orig_sr, target_sr=target_sr)
return array
def create_mel_spectrogram(waveform, sr, n_mels=128, n_fft=2048, hop_length=512):
"""
Create a Mel spectrogram from a waveform.
Args:
waveform (np.ndarray): 1D NumPy array of the audio waveform.
sr (int): Sampling rate of the waveform.
n_mels (int): Number of Mel bands to generate.
n_fft (int): Length of the FFT window.
hop_length (int): Number of samples between successive frames.
Returns:
np.ndarray: 2D NumPy array of the Mel spectrogram (shape: [n_mels, time]).
"""
# Create Mel spectrogram
mel_spectrogram = librosa.feature.melspectrogram(
y=waveform,
sr=sr,
n_fft=n_fft,
hop_length=hop_length,
n_mels=n_mels
)
# Convert power spectrogram (amplitude squared) to decibel (log scale)
mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
# Ensure consistent length for each feature
#log_mel_spec = librosa.util.fix_length(log_mel_spec, 1300)
return mel_spectrogram_db |