File size: 1,774 Bytes
97319bc 9ba2a1c a67942c 97319bc 9ba2a1c 365de93 9ba2a1c 5e68fac 9ba2a1c a67942c 51499e8 9ba2a1c 97319bc 9ba2a1c 97319bc 541a052 97319bc 793215b 97319bc 9ba2a1c 97319bc 541a052 9ba2a1c 97319bc 9ba2a1c 97319bc 5e68fac 9ba2a1c 97319bc 9ba2a1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import os
import tempfile
import gradio as gr
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
MODEL_NAME = "jensenlwt/whisper-small-singlish-122k"
FILE_LIMIT_MB = 1000
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def transcribe(inputs, task):
if inputs is None:
raise gr.Error(
"No audio file submitted! Please upload or record an audio file before submitting your request."
)
text = pipe(
inputs,
generate_kwargs={"language": "english"},
return_timestamps=True,
)["chunks"]
return text
demo = gr.Blocks()
mf_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Whisper Small: Singlish Edition 🇸🇬",
description=(""),
allow_flagging="never",
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=[
gr.inputs.Audio(
source="upload", type="filepath", optional=True, label="Audio file"
),
],
outputs="text",
layout="horizontal",
theme="huggingface",
title="Whisper Small: Singlish Edition 🇸🇬",
description=(
"NOTE: Current space seems to cut off the last few seconds of the recording. For exploration, I would recommend sticking to audio <10s long."
),
allow_flagging="never",
)
with demo:
gr.TabbedInterface(
[mf_transcribe, file_transcribe],
["Microphone", "Audio file"],
)
demo.launch(enable_queue=True)
|