ammansik commited on
Commit
82b41e0
·
1 Parent(s): 2458b22

integrate accelerated whisper

Browse files
Files changed (2) hide show
  1. audio_to_text.py +18 -7
  2. requirements.txt +3 -1
audio_to_text.py CHANGED
@@ -1,32 +1,43 @@
1
- import whisper
 
2
 
3
  EOS_TOKENS = [".", "!", "?"]
4
 
5
 
6
  def transcribe_audio(audio_fpath, max_snt_len=100):
7
- model = whisper.load_model("small")
8
- result = model.transcribe(audio_fpath)
 
 
 
 
 
 
 
 
 
9
 
10
  sentences = []
11
  snt_start = None
12
  snt = ""
13
- for segment in result["segments"]:
14
  snt += f'{segment["text"]} '
 
15
  if not snt_start:
16
- snt_start = segment["start"]
17
  if (
18
  segment["text"].strip().split()[-1][-1] in EOS_TOKENS
19
  or len(snt) > max_snt_len
20
  ):
21
  sentences.append(
22
- {"text": snt.strip(), "start": snt_start, "end": segment["end"]}
23
  )
24
  snt_start = None
25
  snt = ""
26
 
27
  if len(snt) > 0:
28
  sentences.append(
29
- {"text": snt.strip(), "start": snt_start, "end": segment["end"]}
30
  )
31
  snt_start = None
32
  snt = ""
 
1
+ import torch
2
+ from transformers import pipeline
3
 
4
  EOS_TOKENS = [".", "!", "?"]
5
 
6
 
7
  def transcribe_audio(audio_fpath, max_snt_len=100):
8
+ pipe = pipeline("automatic-speech-recognition",
9
+ "openai/whisper-small",
10
+ torch_dtype=torch.float16,
11
+ device="cuda:0")
12
+
13
+ pipe.model = pipe.model.to_bettertransformer()
14
+
15
+ outputs = pipe(audio_fpath,
16
+ chunk_length_s=30,
17
+ batch_size=8,
18
+ return_timestamps=True)
19
 
20
  sentences = []
21
  snt_start = None
22
  snt = ""
23
+ for segment in result["chunks"]:
24
  snt += f'{segment["text"]} '
25
+ start_time, end_time = segment["timestamp"]
26
  if not snt_start:
27
+ snt_start = start_time
28
  if (
29
  segment["text"].strip().split()[-1][-1] in EOS_TOKENS
30
  or len(snt) > max_snt_len
31
  ):
32
  sentences.append(
33
+ {"text": snt.strip(), "start": snt_start, "end": end_time}
34
  )
35
  snt_start = None
36
  snt = ""
37
 
38
  if len(snt) > 0:
39
  sentences.append(
40
+ {"text": snt.strip(), "start": snt_start, "end": end_time}
41
  )
42
  snt_start = None
43
  snt = ""
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
- git+https://github.com/openai/whisper.git
 
 
2
  openai
3
  yt-dlp
4
  streamlit
 
1
+ transformers
2
+ optimum
3
+ accelerate
4
  openai
5
  yt-dlp
6
  streamlit