Spaces:

ahmedkasem
/

quran-nlp

Sleeping

App Files Files Community

deveix commited on Apr 21, 2024

Commit

1e2c630

1 Parent(s): 08af9a0

add segments

Browse files

Files changed (1) hide show

app/main.py +29 -3

app/main.py CHANGED Viewed

@@ -27,6 +27,22 @@ default_sample_rate=22050
 def load(file_name, skip_seconds=0):
     return librosa.load(file_name, sr=None, res_type='kaiser_fast')
 def preprocess_audio(audio_data, rate):
     # Apply preprocessing steps
     audio_data = nr.reduce_noise(y=audio_data, sr=rate)
@@ -343,7 +359,16 @@ async def handle_audio(file: UploadFile = File(...)):
         audio_data, sr = preprocess_audio(audio_data, sr)
         print("finished processing ", temp_filename)
         # Extract features
         features = extract_features(audio_data, sr)
         # preprocess_audio(temp_filename, 'app')
         # repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
@@ -352,17 +377,18 @@ async def handle_audio(file: UploadFile = File(...)):
         # print("Extracted Features:", features)
         # features = pca.transform(features)
         # features = np.array(features).reshape(1, -1)
-        features = features.reshape(1, -1)
-        features = scaler.transform(features)
         # proceed with an inference
         results = model.predict(features)
         # decoded_predictions = [label_encoder.classes_[i] for i in results]
         # # Decode the predictions using the label encoder
         decoded_predictions = label_encoder.inverse_transform(results)
-        print('decoded', decoded_predictions[0])
         # .tolist()
         # Clean up the temporary file
         os.remove(temp_filename)

 def load(file_name, skip_seconds=0):
     return librosa.load(file_name, sr=None, res_type='kaiser_fast')
+def split_audio(audio_data, sample_rate, segment_length_sec=20):
+    # Calculate the number of samples in each segment
+    num_samples_per_segment = segment_length_sec * sample_rate
+    # Calculate total number of segments
+    total_segments = int(np.ceil(len(audio_data) / num_samples_per_segment))
+    # Split the audio data into segments
+    segments = []
+    for i in range(total_segments):
+        start = i * num_samples_per_segment
+        end = start + num_samples_per_segment
+        segment = audio_data[start:end]
+        segments.append(segment)
+    return segments
 def preprocess_audio(audio_data, rate):
     # Apply preprocessing steps
     audio_data = nr.reduce_noise(y=audio_data, sr=rate)
         audio_data, sr = preprocess_audio(audio_data, sr)
         print("finished processing ", temp_filename)
         # Extract features
+        features_list = []
         features = extract_features(audio_data, sr)
+        features_list.append(features)
+        segments = split_audio(audio_data, sr)
+        for i, segment in enumerate(segments):
+            # Extract features from the processed audio segment (you need to define this function)
+            features = extract_features(segment, sr)
+            print(f"Features extracted for segment {i+1}")
+            features_list.append(features)
         # preprocess_audio(temp_filename, 'app')
         # repair_mp3_with_ffmpeg_python(temp_filename, temp_filename)
         # print("Extracted Features:", features)
         # features = pca.transform(features)
         # features = np.array(features).reshape(1, -1)
+        # features = features.reshape(1, -1)
+        features_list = scaler.transform(features_list)
         # proceed with an inference
         results = model.predict(features)
         # decoded_predictions = [label_encoder.classes_[i] for i in results]
+        print('decoded', results)
         # # Decode the predictions using the label encoder
         decoded_predictions = label_encoder.inverse_transform(results)
+        print('decoded', decoded_predictions)
         # .tolist()
         # Clean up the temporary file
         os.remove(temp_filename)