File size: 3,735 Bytes
feba911
 
 
 
f794f86
36e373b
40817ec
f794f86
feba911
 
 
 
 
 
 
 
e83f42b
 
 
feba911
 
 
 
 
 
33b8d42
 
 
d3cc82a
db5c8d0
 
a95d76f
 
d3cc82a
 
33b8d42
241ba79
d3cc82a
 
8ce4e5c
d3cc82a
 
8ce4e5c
33b8d42
 
a95d76f
d3cc82a
 
33b8d42
241ba79
d3cc82a
33b8d42
feba911
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr

#Get models
#ASR model for input speech
speech2text = gr.Interface.load("huggingface/facebook/hubert-large-ls960-ft",
                                inputs=gr.inputs.Audio(label="Record Audio", type="filepath", source = "microphone"))
                                     
#translates English to Spanish text                      
translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es",
                                outputs=gr.outputs.Textbox(label="English to Spanish Translated Text"))
#TTS model for output speech                                
text2speech = gr.Interface.load("huggingface/facebook/tts_transformer-es-css10",
                                outputs=gr.outputs.Audio(label="English to Spanish Translated Audio"),
                                allow_flagging="never")

                                
translate = gr.Series(speech2text, translator) #outputs Spanish text translation
en2es = gr.Series(translate, text2speech) #outputs Spanish audio
ui = gr.Parallel(translate, en2es) #allows transcription of Spanish audio

#gradio interface
ui.title = "English to Spanish Speech Translator"
ui.description = """<center>A useful tool in translating English to Spanish audio. All pre-trained models are found in huggingface.</center>"""
ui.examples = [['ljspeech.wav'],['ljspeech2.wav',]]
ui.theme = "peach"
ui.article = """<h2>Pre-trained model Information</h2>
                <h3>Automatic Speech Recognition</h3>
                <p style='text-align: justify'>The model used for the ASR part of this space is from                
                <a href=\"https://huggingface.co/facebook/hubert-large-ls960-ft">hubert-large-ls960-ft</a> which is pretrained and fine-tuned on <b>960 hours of 
                Librispeech</b> on 16kHz sampled speech audio. This model has a self-reported <b>word error rate (WER)</b> of <b>1.9 
                percent</b> and ranks first in <i>paperswithcode</i> for ASR on Librispeech. More information can be 
                found on its website at <a href=\"https://ai.facebook.com/blog/hubert-self-supervised-representation-learning-for-speech-
                recognition-
                generation-and-compression">hubert-self</a> and 
                original model is under <a href=\"https://github.com/pytorch/fairseq/tree/main/examples/hubert">pytorch/fairseq</a>.</p>
                <h3>Text Translator</h3>
                <p style='text-align: justify'>The English to Spanish text translator pre-trained model is from 
                <a href=\"https://huggingface.co/Helsinki-NLP/opus-mt-en-es">Helsinki-NLP/opus-mt-en-es</a> which is part of the <b>The 
                Tatoeba Translation Challenge 
                (v2021-08-07)</b> as seen from its github repo at 
                <a href=\"https://github.com/Helsinki-NLP/Tatoeba-Challenge">Helsinki-NLP/Tatoeba-Challenge</a>. This project aims to develop 
                machine 
                translation in real-world 
                cases for many languages. </p>
                <h3>Text to Speech</h3>
                <p style='text-align: justify'> The TTS model used is from <a href=\"https://huggingface.co/facebook/tts_transformer-es-
                css10">facebook/tts_transformer-es-
                css10</a>. 
                This model uses the <b>Fairseq(-py)</b> sequence modeling toolkit for speech synthesis, in this case, specifically TTS 
                for Spanish. More information can be seen on their git at 
                <a href=\"https://github.com/pytorch/fairseq/tree/main/examples/speech_synthesis">speech_synthesis</a>. </p>
            """           
                       
                       
ui.launch(inbrowser=True)