speech_to_text / app.py
Aashiue's picture
Create app.py
0faa7b5
raw
history blame
702 Bytes
import gradio as gr
from transformers import pipeline
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
transcribe = pipeline("automatic-speech-recognition")
def speech_to_text(audio):
text = transcribe(audio)["text"]
model_inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
generated_tokens = model.generate(
**model_inputs,
forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"]
)
translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
return translation
gr.Interface(
fn=speech_to_text,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs="text").launch()