Spaces:
Sleeping
Sleeping
integrate accelerated whisper
Browse files- audio_to_text.py +18 -7
- requirements.txt +3 -1
audio_to_text.py
CHANGED
@@ -1,32 +1,43 @@
|
|
1 |
-
import
|
|
|
2 |
|
3 |
EOS_TOKENS = [".", "!", "?"]
|
4 |
|
5 |
|
6 |
def transcribe_audio(audio_fpath, max_snt_len=100):
|
7 |
-
|
8 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
sentences = []
|
11 |
snt_start = None
|
12 |
snt = ""
|
13 |
-
for segment in result["
|
14 |
snt += f'{segment["text"]} '
|
|
|
15 |
if not snt_start:
|
16 |
-
snt_start =
|
17 |
if (
|
18 |
segment["text"].strip().split()[-1][-1] in EOS_TOKENS
|
19 |
or len(snt) > max_snt_len
|
20 |
):
|
21 |
sentences.append(
|
22 |
-
{"text": snt.strip(), "start": snt_start, "end":
|
23 |
)
|
24 |
snt_start = None
|
25 |
snt = ""
|
26 |
|
27 |
if len(snt) > 0:
|
28 |
sentences.append(
|
29 |
-
{"text": snt.strip(), "start": snt_start, "end":
|
30 |
)
|
31 |
snt_start = None
|
32 |
snt = ""
|
|
|
1 |
+
import torch
|
2 |
+
from transformers import pipeline
|
3 |
|
4 |
EOS_TOKENS = [".", "!", "?"]
|
5 |
|
6 |
|
7 |
def transcribe_audio(audio_fpath, max_snt_len=100):
|
8 |
+
pipe = pipeline("automatic-speech-recognition",
|
9 |
+
"openai/whisper-small",
|
10 |
+
torch_dtype=torch.float16,
|
11 |
+
device="cuda:0")
|
12 |
+
|
13 |
+
pipe.model = pipe.model.to_bettertransformer()
|
14 |
+
|
15 |
+
outputs = pipe(audio_fpath,
|
16 |
+
chunk_length_s=30,
|
17 |
+
batch_size=8,
|
18 |
+
return_timestamps=True)
|
19 |
|
20 |
sentences = []
|
21 |
snt_start = None
|
22 |
snt = ""
|
23 |
+
for segment in result["chunks"]:
|
24 |
snt += f'{segment["text"]} '
|
25 |
+
start_time, end_time = segment["timestamp"]
|
26 |
if not snt_start:
|
27 |
+
snt_start = start_time
|
28 |
if (
|
29 |
segment["text"].strip().split()[-1][-1] in EOS_TOKENS
|
30 |
or len(snt) > max_snt_len
|
31 |
):
|
32 |
sentences.append(
|
33 |
+
{"text": snt.strip(), "start": snt_start, "end": end_time}
|
34 |
)
|
35 |
snt_start = None
|
36 |
snt = ""
|
37 |
|
38 |
if len(snt) > 0:
|
39 |
sentences.append(
|
40 |
+
{"text": snt.strip(), "start": snt_start, "end": end_time}
|
41 |
)
|
42 |
snt_start = None
|
43 |
snt = ""
|
requirements.txt
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
-
|
|
|
|
|
2 |
openai
|
3 |
yt-dlp
|
4 |
streamlit
|
|
|
1 |
+
transformers
|
2 |
+
optimum
|
3 |
+
accelerate
|
4 |
openai
|
5 |
yt-dlp
|
6 |
streamlit
|