Spaces:
Runtime error
Runtime error
File size: 797 Bytes
c5f8e1d c7e3088 0003cc7 d1e03b7 c5f8e1d 3e0dbc5 c7e3088 d1e03b7 c7e3088 cadfe1a c7e3088 3e0dbc5 c5f8e1d c7e3088 7fcc45d c5f8e1d d1e03b7 c5f8e1d 7fcc45d c5f8e1d 7fcc45d c5f8e1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, VitsModel
from nemo.collections.asr.models import EncDecMultiTaskModel
# load speech to text model
canary_model = EncDecMultiTaskModel.from_pretrained('nvidia/canary-1b')
canary_model.eval()
canary_model.to('cpu')
# update decode params
canary_model.change_decoding_strategy(None)
decode_cfg = canary_model.cfg.decoding
decode_cfg.beam.beam_size = 1
canary_model.change_decoding_strategy(decode_cfg)
def convert_speech(speech):
# Convert the speech to text
transcription = canary_model.transcribe(
speech,
logprobs=False,
)
return transcription
iface = gr.Interface(fn=convert_speech, inputs=gr.inputs.Audio(source="microphone"), outputs="text")
iface.launch() |