deepsync commited on
Commit
9e90036
·
verified ·
1 Parent(s): df05b61

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -3,10 +3,12 @@ import os
3
  from uuid import uuid4
4
  from pydub.silence import detect_nonsilent
5
  from pydub import AudioSegment
 
6
 
7
 
8
  def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_segment_length):
9
- audio = AudioSegment.from_file(audio_fp)
 
10
  speech_timestamps = detect_nonsilent(audio, min_silence_len=min_silence_duration_ms, silence_thresh=-40)
11
  speech_timestamps = list(filter(lambda x: x[1]-x[0] > min_speech_duration_ms, speech_timestamps))
12
  speech_timestamps = [{"start": s[0]/1000, "end": s[1]/1000} for s in speech_timestamps]
@@ -61,8 +63,8 @@ interface = gr.Interface(
61
  gr.Number(label="min_speech_duration_ms", value=250, info="default (250)"),
62
  gr.Number(label="min_silence_duration_ms", value=40, info="default (100)"),
63
  gr.Checkbox(label="Auto merge", value=True),
64
- gr.Textbox(label="Gap max threshold value (seconds)", value=0.3),
65
- gr.Number(label="Approx Max Segment Length", value=5)
66
  ],
67
  [
68
  gr.File(label="VAD Labels"),
 
3
  from uuid import uuid4
4
  from pydub.silence import detect_nonsilent
5
  from pydub import AudioSegment
6
+ from pydub.effects import normalize
7
 
8
 
9
  def get_labels(audio_fp, threshold, min_speech_duration_ms, min_silence_duration_ms, auto_merge, uppper_merge_threshold, max_segment_length):
10
+ audio = AudioSegment.from_file(audio_fp).set_channels(1)
11
+ audio = normalize(audio)
12
  speech_timestamps = detect_nonsilent(audio, min_silence_len=min_silence_duration_ms, silence_thresh=-40)
13
  speech_timestamps = list(filter(lambda x: x[1]-x[0] > min_speech_duration_ms, speech_timestamps))
14
  speech_timestamps = [{"start": s[0]/1000, "end": s[1]/1000} for s in speech_timestamps]
 
63
  gr.Number(label="min_speech_duration_ms", value=250, info="default (250)"),
64
  gr.Number(label="min_silence_duration_ms", value=40, info="default (100)"),
65
  gr.Checkbox(label="Auto merge", value=True),
66
+ gr.Textbox(label="Gap max threshold value (seconds)", value=0.350),
67
+ gr.Number(label="Approx Max Segment Length", value=10)
68
  ],
69
  [
70
  gr.File(label="VAD Labels"),