Chatty_Ashe / app.py
gdnartea's picture
Update app.py
588a062 verified
raw
history blame
443 Bytes
import gradio as gr
import torch
from transformers import pipeline
canary_pipe = pipeline("automatic-speech-recognition", model="nvidia/canary-1b")
def convert_speech(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return canary_pipe({"sampling_rate": sr, "raw": y})["text"]
iface = gr.Interface(
fn=convert_speech,
inputs=gr.Audio(sources="microphone"),
outputs="text"
)
iface.launch()