qqwjq1981 commited on
Commit
447f785
·
verified ·
1 Parent(s): f256463

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -9
app.py CHANGED
@@ -120,7 +120,14 @@ def handle_feedback(feedback):
120
  conn.commit()
121
  return "Thank you for your feedback!", None
122
 
123
- def segment_background_audio(audio_path, background_audio_path="background_segments.wav"):
 
 
 
 
 
 
 
124
  pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection", use_auth_token=hf_api_key)
125
  vad_result = pipeline(audio_path)
126
 
@@ -131,16 +138,19 @@ def segment_background_audio(audio_path, background_audio_path="background_segme
131
  result_audio = AudioSegment.empty()
132
 
133
  for segment in vad_result.itersegments():
134
- # Background segment before the speech
135
- if current_time < segment.start:
136
- bg = full_audio[int(current_time * 1000):int(segment.start * 1000)]
 
 
 
137
  result_audio += bg
138
- # Add silence for the speech duration
139
- silence_duration = segment.end - segment.start
140
- result_audio += AudioSegment.silent(duration=int(silence_duration * 1000))
141
- current_time = segment.end
142
 
143
- # Handle any remaining background after the last speech
 
 
 
 
144
  if current_time < full_duration_sec:
145
  result_audio += full_audio[int(current_time * 1000):]
146
 
 
120
  conn.commit()
121
  return "Thank you for your feedback!", None
122
 
123
+ def segment_background_audio(audio_path, background_audio_path="background_segments.wav", speech_padding=0.15):
124
+ """
125
+ Segments and removes speech from audio, returning only background.
126
+ Padding is applied around speech segments to reduce overlap/bleed.
127
+ """
128
+ from pyannote.audio import Pipeline
129
+ from pydub import AudioSegment
130
+
131
  pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection", use_auth_token=hf_api_key)
132
  vad_result = pipeline(audio_path)
133
 
 
138
  result_audio = AudioSegment.empty()
139
 
140
  for segment in vad_result.itersegments():
141
+ start = max(0.0, segment.start - speech_padding)
142
+ end = min(full_duration_sec, segment.end + speech_padding)
143
+
144
+ # Extract non-speech segment before speech starts
145
+ if current_time < start:
146
+ bg = full_audio[int(current_time * 1000):int(start * 1000)]
147
  result_audio += bg
 
 
 
 
148
 
149
+ # Replace speech (plus margin) with silence
150
+ result_audio += AudioSegment.silent(duration=int((end - start) * 1000))
151
+ current_time = end
152
+
153
+ # Add trailing background after the last segment
154
  if current_time < full_duration_sec:
155
  result_audio += full_audio[int(current_time * 1000):]
156