LAP-DEV commited on
Commit
473303e
·
verified ·
1 Parent(s): e4fb0ac

Update modules/vad/silero_vad.py

Browse files
Files changed (1) hide show
  1. modules/vad/silero_vad.py +18 -16
modules/vad/silero_vad.py CHANGED
@@ -9,24 +9,26 @@ import faster_whisper
9
  from faster_whisper.transcribe import SpeechTimestampsMap
10
  import gradio as gr
11
 
12
- class Segment(self):
13
- id: Optional[int] = Field(default=None, description="Incremental id for the segment")
14
- seek: Optional[int] = Field(default=None, description="Seek of the segment from chunked audio")
15
- text: Optional[str] = Field(default=None, description="Transcription text of the segment")
16
- start: Optional[float] = Field(default=None, description="Start time of the segment")
17
- end: Optional[float] = Field(default=None, description="End time of the segment")
18
- tokens: Optional[List[int]] = Field(default=None, description="List of token IDs")
19
- temperature: Optional[float] = Field(default=None, description="Temperature used during the decoding process")
20
- avg_logprob: Optional[float] = Field(default=None, description="Average log probability of the tokens")
21
- compression_ratio: Optional[float] = Field(default=None, description="Compression ratio of the segment")
22
- no_speech_prob: Optional[float] = Field(default=None, description="Probability that it's not speech")
23
- words: Optional[List['Word']] = Field(default=None, description="List of words contained in the segment")
 
24
 
25
  class Word(self):
26
- start: Optional[float] = Field(default=None, description="Start time of the word")
27
- end: Optional[float] = Field(default=None, description="Start time of the word")
28
- word: Optional[str] = Field(default=None, description="Word text")
29
- probability: Optional[float] = Field(default=None, description="Probability of the word")
 
30
 
31
  class SileroVAD:
32
  def __init__(self):
 
9
  from faster_whisper.transcribe import SpeechTimestampsMap
10
  import gradio as gr
11
 
12
+ class Segment:
13
+ def __init__(self):
14
+ self.id: Optional[int] = Field(default=None, description="Incremental id for the segment")
15
+ self.seek: Optional[int] = Field(default=None, description="Seek of the segment from chunked audio")
16
+ self.text: Optional[str] = Field(default=None, description="Transcription text of the segment")
17
+ self.start: Optional[float] = Field(default=None, description="Start time of the segment")
18
+ self.end: Optional[float] = Field(default=None, description="End time of the segment")
19
+ self.tokens: Optional[List[int]] = Field(default=None, description="List of token IDs")
20
+ self.temperature: Optional[float] = Field(default=None, description="Temperature used during the decoding process")
21
+ self.avg_logprob: Optional[float] = Field(default=None, description="Average log probability of the tokens")
22
+ self.compression_ratio: Optional[float] = Field(default=None, description="Compression ratio of the segment")
23
+ self.no_speech_prob: Optional[float] = Field(default=None, description="Probability that it's not speech")
24
+ self.words: Optional[List['Word']] = Field(default=None, description="List of words contained in the segment")
25
 
26
  class Word(self):
27
+ def __init__(self):
28
+ self.start: Optional[float] = Field(default=None, description="Start time of the word")
29
+ self.end: Optional[float] = Field(default=None, description="Start time of the word")
30
+ self.word: Optional[str] = Field(default=None, description="Word text")
31
+ self.probability: Optional[float] = Field(default=None, description="Probability of the word")
32
 
33
  class SileroVAD:
34
  def __init__(self):