Update modules/vad/silero_vad.py
Browse files- modules/vad/silero_vad.py +2 -17
modules/vad/silero_vad.py
CHANGED
@@ -252,22 +252,7 @@ class SileroVAD:
|
|
252 |
ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
|
253 |
|
254 |
for segment in segments:
|
255 |
-
|
256 |
-
|
257 |
-
for word in segment["words"]:
|
258 |
-
# Ensure the word start and end times are resolved to the same chunk.
|
259 |
-
middle = (word["start"] + word["end"]) / 2
|
260 |
-
chunk_index = ts_map.get_chunk_index(middle)
|
261 |
-
word["start"] = ts_map.get_original_time(word["start"], chunk_index)
|
262 |
-
word["end"] = ts_map.get_original_time(word["end"], chunk_index)
|
263 |
-
words.append(word)
|
264 |
-
|
265 |
-
segment["start"] = words[0].start
|
266 |
-
segment["end"] = words[-1].end
|
267 |
-
segment["words"] = words
|
268 |
-
|
269 |
-
else:
|
270 |
-
segment["start"] = ts_map.get_original_time(segment["start"])
|
271 |
-
segment["end"] = ts_map.get_original_time(segment["end"])
|
272 |
|
273 |
return segments
|
|
|
252 |
ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
|
253 |
|
254 |
for segment in segments:
|
255 |
+
segment["start"] = ts_map.get_original_time(segment["start"])
|
256 |
+
segment["end"] = ts_map.get_original_time(segment["end"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
|
258 |
return segments
|