File size: 911 Bytes
0faa7b5
 
 
 
99e2cd1
 
 
 
0faa7b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import gradio as gr
from transformers import pipeline
from transformers import MBartForConditionalGeneration, MBart50TokenizerFast
transcribe = pipeline("automatic-speech-recognition")

model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-50-one-to-many-mmt")

tokenizer = MBart50TokenizerFast.from_pretrained("facebook/mbart-large-50-one-to-many-mmt", src_lang="en_XX")
def speech_to_text(audio):
    text = transcribe(audio)["text"]

    model_inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    generated_tokens = model.generate(
    **model_inputs,
    forced_bos_token_id=tokenizer.lang_code_to_id["hi_IN"]
    )

    translation = tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)

    return translation
    
gr.Interface(
    fn=speech_to_text, 
    inputs=gr.Audio(source="microphone", type="filepath"), 
    outputs="text").launch()