File size: 1,774 Bytes
97319bc
 
9ba2a1c
a67942c
97319bc
9ba2a1c
 
 
365de93
9ba2a1c
 
5e68fac
9ba2a1c
 
 
 
 
 
 
a67942c
51499e8
9ba2a1c
 
97319bc
 
 
9ba2a1c
97319bc
 
541a052
97319bc
793215b
97319bc
9ba2a1c
 
 
 
 
 
 
 
 
 
 
 
97319bc
541a052
9ba2a1c
 
 
 
 
 
97319bc
 
 
9ba2a1c
 
 
 
97319bc
5e68fac
 
 
9ba2a1c
 
 
 
 
97319bc
 
 
 
9ba2a1c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import tempfile

import gradio as gr
import torch
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read

MODEL_NAME = "jensenlwt/whisper-small-singlish-122k"
FILE_LIMIT_MB = 1000

device = "cuda:0" if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)


def transcribe(inputs, task):
    if inputs is None:
        raise gr.Error(
            "No audio file submitted! Please upload or record an audio file before submitting your request."
        )

    text = pipe(
        inputs,
        generate_kwargs={"language": "english"},
        return_timestamps=True,
    )["chunks"]
    return text


demo = gr.Blocks()

mf_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
    ],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="Whisper Small: Singlish Edition 🇸🇬",
    description=(""),
    allow_flagging="never",
)

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.inputs.Audio(
            source="upload", type="filepath", optional=True, label="Audio file"
        ),
    ],
    outputs="text",
    layout="horizontal",
    theme="huggingface",
    title="Whisper Small: Singlish Edition 🇸🇬",
    description=(
        "NOTE: Current space seems to cut off the last few seconds of the recording. For exploration, I would recommend sticking to audio <10s long."
    ),
    allow_flagging="never",
)


with demo:
    gr.TabbedInterface(
        [mf_transcribe, file_transcribe],
        ["Microphone", "Audio file"],
    )

demo.launch(enable_queue=True)