Update modules/vad/silero_vad.py
Browse files
modules/vad/silero_vad.py
CHANGED
@@ -256,9 +256,9 @@ class SileroVAD:
|
|
256 |
ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
|
257 |
|
258 |
for segment in segments:
|
259 |
-
if segment["
|
260 |
words = []
|
261 |
-
for word in segment["
|
262 |
# Ensure the word start and end times are resolved to the same chunk.
|
263 |
middle = (word.start + word.end) / 2
|
264 |
chunk_index = ts_map.get_chunk_index(middle)
|
@@ -268,7 +268,7 @@ class SileroVAD:
|
|
268 |
|
269 |
segment["start"] = words[0].start
|
270 |
segment["end"] = words[-1].end
|
271 |
-
segment["
|
272 |
|
273 |
else:
|
274 |
segment["start"] = ts_map.get_original_time(segment["start"])
|
|
|
256 |
ts_map = SpeechTimestampsMap(speech_chunks, sampling_rate)
|
257 |
|
258 |
for segment in segments:
|
259 |
+
if segment["text"]:
|
260 |
words = []
|
261 |
+
for word in segment["text"]:
|
262 |
# Ensure the word start and end times are resolved to the same chunk.
|
263 |
middle = (word.start + word.end) / 2
|
264 |
chunk_index = ts_map.get_chunk_index(middle)
|
|
|
268 |
|
269 |
segment["start"] = words[0].start
|
270 |
segment["end"] = words[-1].end
|
271 |
+
segment["text"] = words
|
272 |
|
273 |
else:
|
274 |
segment["start"] = ts_map.get_original_time(segment["start"])
|