Chatty_Ashe / app.py
gdnartea's picture
Update app.py
7fcc45d verified
raw
history blame
797 Bytes
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
from nemo.collections.asr.models import EncDecMultiTaskModel
# load speech to text model
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
canary_model.eval()
canary_model.to('cpu')
# update decode params
canary_model.change_decoding_strategy(None)
decode_cfg = canary_model.cfg.decoding
decode_cfg.beam.beam_size = 1
canary_model.change_decoding_strategy(decode_cfg)
def convert_speech(speech):
# Convert the speech to text
transcription = canary_model.transcribe(
speech,
logprobs=False,
)
return transcription
iface = gr.Interface(fn=convert_speech, inputs=gr.inputs.Audio(source="microphone"), outputs="text")
iface.launch()