Spaces:
Running
Running
deveix
commited on
Commit
·
1e2c630
1
Parent(s):
08af9a0
add segments
Browse files- app/main.py +29 -3
app/main.py
CHANGED
@@ -27,6 +27,22 @@ default_sample_rate=22050
|
|
27 |
def load(file_name, skip_seconds=0):
|
28 |
return librosa.load(file_name, sr=None, res_type='kaiser_fast')
|
29 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
def preprocess_audio(audio_data, rate):
|
31 |
# Apply preprocessing steps
|
32 |
audio_data = nr.reduce_noise(y=audio_data, sr=rate)
|
@@ -343,7 +359,16 @@ async def handle_audio(file: UploadFile = File(...)):
|
|
343 |
audio_data, sr = preprocess_audio(audio_data, sr)
|
344 |
print("finished processing ", temp_filename)
|
345 |
# Extract features
|
|
|
346 |
features = extract_features(audio_data, sr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
|
348 |
# preprocess_audio(temp_filename, 'app')
|
349 |
# repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
|
@@ -352,17 +377,18 @@ async def handle_audio(file: UploadFile = File(...)):
|
|
352 |
# print("Extracted Features:", features)
|
353 |
# features = pca.transform(features)
|
354 |
# features = np.array(features).reshape(1, -1)
|
355 |
-
features = features.reshape(1, -1)
|
356 |
|
357 |
-
|
358 |
|
359 |
# proceed with an inference
|
360 |
results = model.predict(features)
|
361 |
# decoded_predictions = [label_encoder.classes_[i] for i in results]
|
|
|
362 |
|
363 |
# # Decode the predictions using the label encoder
|
364 |
decoded_predictions = label_encoder.inverse_transform(results)
|
365 |
-
print('decoded', decoded_predictions
|
366 |
# .tolist()
|
367 |
# Clean up the temporary file
|
368 |
os.remove(temp_filename)
|
|
|
27 |
def load(file_name, skip_seconds=0):
|
28 |
return librosa.load(file_name, sr=None, res_type='kaiser_fast')
|
29 |
|
30 |
+
def split_audio(audio_data, sample_rate, segment_length_sec=20):
|
31 |
+
# Calculate the number of samples in each segment
|
32 |
+
num_samples_per_segment = segment_length_sec * sample_rate
|
33 |
+
|
34 |
+
# Calculate total number of segments
|
35 |
+
total_segments = int(np.ceil(len(audio_data) / num_samples_per_segment))
|
36 |
+
|
37 |
+
# Split the audio data into segments
|
38 |
+
segments = []
|
39 |
+
for i in range(total_segments):
|
40 |
+
start = i * num_samples_per_segment
|
41 |
+
end = start + num_samples_per_segment
|
42 |
+
segment = audio_data[start:end]
|
43 |
+
segments.append(segment)
|
44 |
+
return segments
|
45 |
+
|
46 |
def preprocess_audio(audio_data, rate):
|
47 |
# Apply preprocessing steps
|
48 |
audio_data = nr.reduce_noise(y=audio_data, sr=rate)
|
|
|
359 |
audio_data, sr = preprocess_audio(audio_data, sr)
|
360 |
print("finished processing ", temp_filename)
|
361 |
# Extract features
|
362 |
+
features_list = []
|
363 |
features = extract_features(audio_data, sr)
|
364 |
+
features_list.append(features)
|
365 |
+
|
366 |
+
segments = split_audio(audio_data, sr)
|
367 |
+
for i, segment in enumerate(segments):
|
368 |
+
# Extract features from the processed audio segment (you need to define this function)
|
369 |
+
features = extract_features(segment, sr)
|
370 |
+
print(f"Features extracted for segment {i+1}")
|
371 |
+
features_list.append(features)
|
372 |
|
373 |
# preprocess_audio(temp_filename, 'app')
|
374 |
# repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
|
|
|
377 |
# print("Extracted Features:", features)
|
378 |
# features = pca.transform(features)
|
379 |
# features = np.array(features).reshape(1, -1)
|
380 |
+
# features = features.reshape(1, -1)
|
381 |
|
382 |
+
features_list = scaler.transform(features_list)
|
383 |
|
384 |
# proceed with an inference
|
385 |
results = model.predict(features)
|
386 |
# decoded_predictions = [label_encoder.classes_[i] for i in results]
|
387 |
+
print('decoded', results)
|
388 |
|
389 |
# # Decode the predictions using the label encoder
|
390 |
decoded_predictions = label_encoder.inverse_transform(results)
|
391 |
+
print('decoded', decoded_predictions)
|
392 |
# .tolist()
|
393 |
# Clean up the temporary file
|
394 |
os.remove(temp_filename)
|