all-in-one

Sleeping

helloWorld199 commited on Jul 7, 2024

Commit

db38fbd

verified ·

1 Parent(s): f412e2b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -167,6 +167,10 @@ def aggregate_vocal_times(vocal_time):
     return compressed_vocal_times
 def add_voice_label(json_file, audio_path):
     # Load the JSON file
     with open(json_file, 'r') as f:
         data = json.load(f)
@@ -190,7 +194,7 @@ def add_voice_label(json_file, audio_path):
         speech_probs.append(speech_prob)
     vad_iterator.reset_states() # reset model states after each audio
-    voice_idxs = np.where(np.array(speech_probs) >= 0.7)[0]
     print(len(voice_idxs))
     if len(voice_idxs) == 0:

     return compressed_vocal_times
 def add_voice_label(json_file, audio_path):
+    # This is an hyperparameter of the model which determines wheter to consider
+    # the segment voice of non voice
+    THRESHOLD_PROBABILITY = 0.75
     # Load the JSON file
     with open(json_file, 'r') as f:
         data = json.load(f)
         speech_probs.append(speech_prob)
     vad_iterator.reset_states() # reset model states after each audio
+    voice_idxs = np.where(np.array(speech_probs) >= THRESHOLD_PROBABILITY)[0]
     print(len(voice_idxs))
     if len(voice_idxs) == 0: