Update app.py
Browse files
app.py
CHANGED
@@ -120,7 +120,14 @@ def handle_feedback(feedback):
|
|
120 |
conn.commit()
|
121 |
return "Thank you for your feedback!", None
|
122 |
|
123 |
-
def segment_background_audio(audio_path, background_audio_path="background_segments.wav"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection", use_auth_token=hf_api_key)
|
125 |
vad_result = pipeline(audio_path)
|
126 |
|
@@ -131,16 +138,19 @@ def segment_background_audio(audio_path, background_audio_path="background_segme
|
|
131 |
result_audio = AudioSegment.empty()
|
132 |
|
133 |
for segment in vad_result.itersegments():
|
134 |
-
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
137 |
result_audio += bg
|
138 |
-
# Add silence for the speech duration
|
139 |
-
silence_duration = segment.end - segment.start
|
140 |
-
result_audio += AudioSegment.silent(duration=int(silence_duration * 1000))
|
141 |
-
current_time = segment.end
|
142 |
|
143 |
-
|
|
|
|
|
|
|
|
|
144 |
if current_time < full_duration_sec:
|
145 |
result_audio += full_audio[int(current_time * 1000):]
|
146 |
|
|
|
120 |
conn.commit()
|
121 |
return "Thank you for your feedback!", None
|
122 |
|
123 |
+
def segment_background_audio(audio_path, background_audio_path="background_segments.wav", speech_padding=0.15):
|
124 |
+
"""
|
125 |
+
Segments and removes speech from audio, returning only background.
|
126 |
+
Padding is applied around speech segments to reduce overlap/bleed.
|
127 |
+
"""
|
128 |
+
from pyannote.audio import Pipeline
|
129 |
+
from pydub import AudioSegment
|
130 |
+
|
131 |
pipeline = Pipeline.from_pretrained("pyannote/voice-activity-detection", use_auth_token=hf_api_key)
|
132 |
vad_result = pipeline(audio_path)
|
133 |
|
|
|
138 |
result_audio = AudioSegment.empty()
|
139 |
|
140 |
for segment in vad_result.itersegments():
|
141 |
+
start = max(0.0, segment.start - speech_padding)
|
142 |
+
end = min(full_duration_sec, segment.end + speech_padding)
|
143 |
+
|
144 |
+
# Extract non-speech segment before speech starts
|
145 |
+
if current_time < start:
|
146 |
+
bg = full_audio[int(current_time * 1000):int(start * 1000)]
|
147 |
result_audio += bg
|
|
|
|
|
|
|
|
|
148 |
|
149 |
+
# Replace speech (plus margin) with silence
|
150 |
+
result_audio += AudioSegment.silent(duration=int((end - start) * 1000))
|
151 |
+
current_time = end
|
152 |
+
|
153 |
+
# Add trailing background after the last segment
|
154 |
if current_time < full_duration_sec:
|
155 |
result_audio += full_audio[int(current_time * 1000):]
|
156 |
|