deveix commited on
Commit
1e2c630
·
1 Parent(s): 08af9a0

add segments

Browse files
Files changed (1) hide show
  1. app/main.py +29 -3
app/main.py CHANGED
@@ -27,6 +27,22 @@ default_sample_rate=22050
27
  def load(file_name, skip_seconds=0):
28
  return librosa.load(file_name, sr=None, res_type='kaiser_fast')
29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  def preprocess_audio(audio_data, rate):
31
  # Apply preprocessing steps
32
  audio_data = nr.reduce_noise(y=audio_data, sr=rate)
@@ -343,7 +359,16 @@ async def handle_audio(file: UploadFile = File(...)):
343
  audio_data, sr = preprocess_audio(audio_data, sr)
344
  print("finished processing ", temp_filename)
345
  # Extract features
 
346
  features = extract_features(audio_data, sr)
 
 
 
 
 
 
 
 
347
 
348
  # preprocess_audio(temp_filename, 'app')
349
  # repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
@@ -352,17 +377,18 @@ async def handle_audio(file: UploadFile = File(...)):
352
  # print("Extracted Features:", features)
353
  # features = pca.transform(features)
354
  # features = np.array(features).reshape(1, -1)
355
- features = features.reshape(1, -1)
356
 
357
- features = scaler.transform(features)
358
 
359
  # proceed with an inference
360
  results = model.predict(features)
361
  # decoded_predictions = [label_encoder.classes_[i] for i in results]
 
362
 
363
  # # Decode the predictions using the label encoder
364
  decoded_predictions = label_encoder.inverse_transform(results)
365
- print('decoded', decoded_predictions[0])
366
  # .tolist()
367
  # Clean up the temporary file
368
  os.remove(temp_filename)
 
27
  def load(file_name, skip_seconds=0):
28
  return librosa.load(file_name, sr=None, res_type='kaiser_fast')
29
 
30
+ def split_audio(audio_data, sample_rate, segment_length_sec=20):
31
+ # Calculate the number of samples in each segment
32
+ num_samples_per_segment = segment_length_sec * sample_rate
33
+
34
+ # Calculate total number of segments
35
+ total_segments = int(np.ceil(len(audio_data) / num_samples_per_segment))
36
+
37
+ # Split the audio data into segments
38
+ segments = []
39
+ for i in range(total_segments):
40
+ start = i * num_samples_per_segment
41
+ end = start + num_samples_per_segment
42
+ segment = audio_data[start:end]
43
+ segments.append(segment)
44
+ return segments
45
+
46
  def preprocess_audio(audio_data, rate):
47
  # Apply preprocessing steps
48
  audio_data = nr.reduce_noise(y=audio_data, sr=rate)
 
359
  audio_data, sr = preprocess_audio(audio_data, sr)
360
  print("finished processing ", temp_filename)
361
  # Extract features
362
+ features_list = []
363
  features = extract_features(audio_data, sr)
364
+ features_list.append(features)
365
+
366
+ segments = split_audio(audio_data, sr)
367
+ for i, segment in enumerate(segments):
368
+ # Extract features from the processed audio segment (you need to define this function)
369
+ features = extract_features(segment, sr)
370
+ print(f"Features extracted for segment {i+1}")
371
+ features_list.append(features)
372
 
373
  # preprocess_audio(temp_filename, 'app')
374
  # repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
 
377
  # print("Extracted Features:", features)
378
  # features = pca.transform(features)
379
  # features = np.array(features).reshape(1, -1)
380
+ # features = features.reshape(1, -1)
381
 
382
+ features_list = scaler.transform(features_list)
383
 
384
  # proceed with an inference
385
  results = model.predict(features)
386
  # decoded_predictions = [label_encoder.classes_[i] for i in results]
387
+ print('decoded', results)
388
 
389
  # # Decode the predictions using the label encoder
390
  decoded_predictions = label_encoder.inverse_transform(results)
391
+ print('decoded', decoded_predictions)
392
  # .tolist()
393
  # Clean up the temporary file
394
  os.remove(temp_filename)