File size: 443 Bytes
c5f8e1d
c7e3088
9bebeaf
c5f8e1d
9bebeaf
3e0dbc5
9bebeaf
 
 
 
588a062
c7e3088
3e0dbc5
588a062
 
 
 
 
c5f8e1d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import gradio as gr
import torch
from transformers import pipeline

canary_pipe = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")

def convert_speech(audio):
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))
    return canary_pipe({"sampling_rate": sr, "raw": y})["text"]


iface = gr.Interface(
    fn=convert_speech, 
    inputs=gr.Audio(sources="microphone"), 
    outputs="text"
)

iface.launch()