Spaces:
Running
Running
deveix
commited on
Commit
·
754923b
1
Parent(s):
9dae67d
resample
Browse files- app/main.py +39 -36
app/main.py
CHANGED
@@ -27,16 +27,16 @@ default_sample_rate=22050
|
|
27 |
def load(file_name, skip_seconds=0):
|
28 |
return librosa.load(file_name, sr=None, res_type='kaiser_fast')
|
29 |
|
30 |
-
def preprocess_audio(audio_data, rate):
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
# audio_data = fix_length(audio_data)
|
37 |
-
|
38 |
|
39 |
-
|
40 |
|
41 |
def extract_features(X, sample_rate):
|
42 |
# Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
|
@@ -187,23 +187,26 @@ pca = joblib.load('app/pca.pkl')
|
|
187 |
scaler = joblib.load('app/1713638595.3178492_scaler.joblib')
|
188 |
label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
|
189 |
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
#
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
#
|
201 |
-
|
202 |
-
|
203 |
-
#
|
204 |
-
#
|
|
|
|
|
|
|
205 |
|
206 |
-
#
|
207 |
# y_denoised = librosa.effects.preemphasis(y_trimmed)
|
208 |
|
209 |
# # Apply dynamic range compression
|
@@ -221,25 +224,25 @@ label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
|
|
221 |
# # Define target sample rate
|
222 |
# target_sr = sr
|
223 |
|
224 |
-
#
|
225 |
-
#
|
226 |
|
227 |
|
228 |
-
#
|
229 |
|
230 |
|
231 |
-
#
|
232 |
-
#
|
233 |
|
234 |
-
#
|
235 |
-
#
|
236 |
|
237 |
-
#
|
238 |
|
239 |
-
#
|
240 |
-
#
|
241 |
|
242 |
-
|
243 |
|
244 |
# smile = opensmile.Smile(
|
245 |
# feature_set=opensmile.FeatureSet.ComParE_2016,
|
|
|
27 |
def load(file_name, skip_seconds=0):
|
28 |
return librosa.load(file_name, sr=None, res_type='kaiser_fast')
|
29 |
|
30 |
+
# def preprocess_audio(audio_data, rate):
|
31 |
+
# # Apply preprocessing steps
|
32 |
+
# audio_data = nr.reduce_noise(y=audio_data, sr=rate)
|
33 |
+
# audio_data = librosa.util.normalize(audio_data)
|
34 |
+
# audio_data, _ = librosa.effects.trim(audio_data)
|
35 |
+
# audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
|
36 |
+
# # audio_data = fix_length(audio_data)
|
37 |
+
# rate = default_sample_rate
|
38 |
|
39 |
+
# return audio_data, rate
|
40 |
|
41 |
def extract_features(X, sample_rate):
|
42 |
# Generate Mel-frequency cepstral coefficients (MFCCs) from a time series
|
|
|
187 |
scaler = joblib.load('app/1713638595.3178492_scaler.joblib')
|
188 |
label_encoder = joblib.load('app/1713638744.044928_label_encoder.joblib')
|
189 |
|
190 |
+
def preprocess_audio(audio_data, rate):
|
191 |
+
audio_data = nr.reduce_noise(y=audio_data, sr=rate)
|
192 |
+
# remove silence
|
193 |
+
intervals = librosa.effects.split(audio_data, top_db=20)
|
194 |
+
# Concatenate non-silent intervals
|
195 |
+
audio_data = np.concatenate([audio_data[start:end] for start, end in intervals])
|
196 |
+
|
197 |
+
audio_data = librosa.util.normalize(audio_data)
|
198 |
+
audio_data, _ = librosa.effects.trim(audio_data)
|
199 |
+
audio_data = librosa.resample(audio_data, orig_sr=rate, target_sr=default_sample_rate)
|
200 |
+
# audio_data = fix_length(audio_data)
|
201 |
+
rate = default_sample_rate
|
202 |
+
|
203 |
+
# y_trimmed, _ = librosa.effects.trim(y_no_gaps, top_db = 20)
|
204 |
+
# D = librosa.stft(y)
|
205 |
+
# S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
|
206 |
+
# S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128*2,)
|
207 |
+
# S_db_mel = librosa.amplitude_to_db(np.abs(S), ref=np.max)
|
208 |
|
209 |
+
# Apply noise reduction (example using spectral subtraction)
|
210 |
# y_denoised = librosa.effects.preemphasis(y_trimmed)
|
211 |
|
212 |
# # Apply dynamic range compression
|
|
|
224 |
# # Define target sample rate
|
225 |
# target_sr = sr
|
226 |
|
227 |
+
# # Data Augmentation (example: pitch shifting)
|
228 |
+
# y_pitch_shifted = librosa.effects.pitch_shift(y_normalized, sr=target_sr, n_steps=2)
|
229 |
|
230 |
|
231 |
+
# Split audio into non-silent intervals
|
232 |
|
233 |
|
234 |
+
# Normalize the audio signal
|
235 |
+
# y_normalized = librosa.util.normalize(y_equalized)
|
236 |
|
237 |
+
# Feature Extraction (example: MFCCs)
|
238 |
+
# mfccs = librosa.feature.mfcc(y=y_normalized, sr=target_sr, n_mfcc=20)
|
239 |
|
240 |
+
# output_file_path = os.path.join(save_dir, f"{file_name_without_extension}.{extension}")
|
241 |
|
242 |
+
# Write the audio data to the output file in .wav format
|
243 |
+
# sf.write(path, y_normalized, target_sr)
|
244 |
|
245 |
+
return audio_data, rate
|
246 |
|
247 |
# smile = opensmile.Smile(
|
248 |
# feature_set=opensmile.FeatureSet.ComParE_2016,
|