commited on
Browse files- app/ +39 -36
@@ -27,16 +27,16 @@ default_sample_rate=22050
27 |
def load(file_name, skip_seconds=0):
28 |
return librosa.load(file_name, sr=None, res_type='kaiser_fast')
29 |
30 |
def preprocess_audio(audio_data, rate):
31 |
32 |
33 |
34 |
35 |
36 |
# audio_data = fix_length(audio_data)
37 |
38 |
39 |
40 |
41 |
def extract_features(X, sample_rate):
42 |
# Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
@@ -187,23 +187,26 @@ pca = joblib.load('app/pca.pkl')
187 |
scaler = joblib.load('app/1713638595.3178492_scaler.joblib')
188 |
label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
# y_denoised = librosa.effects.preemphasis(y_trimmed)
208 |
209 |
# # Apply dynamic range compression
@@ -221,25 +224,25 @@ label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
221 |
# # Define target sample rate
222 |
# target_sr = sr
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
# smile = opensmile.Smile(
245 |
# feature_set=opensmile.FeatureSet.ComParE_2016,
27 |
def load(file_name, skip_seconds=0):
28 |
return librosa.load(file_name, sr=None, res_type='kaiser_fast')
29 |
30 |
# def preprocess_audio(audio_data, rate):
31 |
# # Apply preprocessing steps
32 |
# audio_data = nr.reduce_noise(y=audio_data, sr=rate)
33 |
# audio_data = librosa.util.normalize(audio_data)
34 |
# audio_data, _ = librosa.effects.trim(audio_data)
35 |
# audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
36 |
# # audio_data = fix_length(audio_data)
37 |
# rate = default_sample_rate
38 |
39 |
# return audio_data, rate
40 |
41 |
def extract_features(X, sample_rate):
42 |
# Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
187 |
scaler = joblib.load('app/1713638595.3178492_scaler.joblib')
188 |
label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
189 |
190 |
def preprocess_audio(audio_data, rate):
191 |
audio_data = nr.reduce_noise(y=audio_data, sr=rate)
192 |
# remove silence
193 |
intervals = librosa.effects.split(audio_data, top_db=20)
194 |
# Concatenate non-silent intervals
195 |
audio_data = np.concatenate([audio_data[start:end] for start, end in intervals])
196 |
197 |
audio_data = librosa.util.normalize(audio_data)
198 |
audio_data, _ = librosa.effects.trim(audio_data)
199 |
audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
200 |
# audio_data = fix_length(audio_data)
201 |
rate = default_sample_rate
202 |
203 |
# y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
204 |
# D = librosa.stft(y)
205 |
# S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
206 |
# S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
207 |
# S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
208 |
209 |
# Apply noise reduction (example using spectral subtraction)
210 |
# y_denoised = librosa.effects.preemphasis(y_trimmed)
211 |
212 |
# # Apply dynamic range compression
224 |
# # Define target sample rate
225 |
# target_sr = sr
226 |
227 |
# # Data Augmentation (example: pitch shifting)
228 |
# y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
229 |
230 |
231 |
# Split audio into non-silent intervals
232 |
233 |
234 |
# Normalize the audio signal
235 |
# y_normalized = librosa.util.normalize(y_equalized)
236 |
237 |
# Feature Extraction (example: MFCCs)
238 |
# mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
239 |
240 |
# output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
241 |
242 |
# Write the audio data to the output file in .wav format
243 |
# sf.write(path, y_normalized, target_sr)
244 |
245 |
return audio_data, rate
246 |
247 |
# smile = opensmile.Smile(
248 |
# feature_set=opensmile.FeatureSet.ComParE_2016,