LAP-DEV commited on
Commit
b551526
·
verified ·
1 Parent(s): 3824a61

Update modules/vad/silero_vad.py

Browse files
Files changed (1) hide show
  1. modules/vad/silero_vad.py +19 -7
modules/vad/silero_vad.py CHANGED
@@ -215,13 +215,6 @@ class SileroVAD:
215
 
216
  return np.concatenate([audio[chunk["start"]: chunk["end"]] for chunk in chunks])
217
 
218
- def get_chunk_index(self, time: float) -> int:
219
- sample = int(time * self.sampling_rate)
220
- return min(
221
- bisect.bisect(self.chunk_end_sample, sample),
222
- len(self.chunk_end_sample) - 1,
223
- )
224
-
225
  @staticmethod
226
  def format_timestamp(
227
  seconds: float,
@@ -260,5 +253,24 @@ class SileroVAD:
260
  segment["start"] = ts_map.get_original_time(segment["start"])
261
  segment["end"] = ts_map.get_original_time(segment["end"])
262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  return segments
264
 
 
215
 
216
  return np.concatenate([audio[chunk["start"]: chunk["end"]] for chunk in chunks])
217
 
 
 
 
 
 
 
 
218
  @staticmethod
219
  def format_timestamp(
220
  seconds: float,
 
253
  segment["start"] = ts_map.get_original_time(segment["start"])
254
  segment["end"] = ts_map.get_original_time(segment["end"])
255
 
256
+ for segment in segments:
257
+ if segment.words:
258
+ words = []
259
+ for word in segment.words:
260
+ # Ensure the word start and end times are resolved to the same chunk.
261
+ middle = (word.start + word.end) / 2
262
+ chunk_index = ts_map.get_chunk_index(middle)
263
+ word.start = ts_map.get_original_time(word.start, chunk_index)
264
+ word.end = ts_map.get_original_time(word.end, chunk_index)
265
+ words.append(word)
266
+
267
+ segment["start"] = words[0].start
268
+ segment["end"] = words[-1].end
269
+ segment["words"] = words
270
+
271
+ else:
272
+ segment["start"] = ts_map.get_original_time(segment["start"])
273
+ segment["end"] = ts_map.get_original_time(segment["end"])
274
+
275
  return segments
276