Spaces:
Sleeping
Sleeping
File size: 1,059 Bytes
e3d3533 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import whisper
EOS_TOKENS = [".", "!", "?"]
def transcribe_audio(audio_fpath, max_snt_len=100):
model = whisper.load_model("small")
result = model.transcribe(audio_fpath)
sentences = []
snt_start = None
snt = ""
for segment in result["segments"]:
snt += f'{segment["text"]} '
if not snt_start:
snt_start = segment["start"]
if (
segment["text"].strip().split()[-1][-1] in EOS_TOKENS
or len(snt) > max_snt_len
):
sentences.append(
{"text": snt.strip(), "start": snt_start, "end": segment["end"]}
)
snt_start = None
snt = ""
if len(snt) > 0:
sentences.append(
{"text": snt.strip(), "start": snt_start, "end": segment["end"]}
)
snt_start = None
snt = ""
timestamped_text = ""
for sentence in sentences:
timestamped_text += (
f'{sentence["start"]} {sentence["end"]} {sentence["text"]}\n'
)
return timestamped_text
|