Spaces:
Runtime error
Runtime error
File size: 3,762 Bytes
feba911 f794f86 36e373b 40817ec f794f86 feba911 e83f42b feba911 933b458 feba911 33b8d42 d3cc82a db5c8d0 a95d76f d3cc82a 33b8d42 241ba79 d3cc82a 8ce4e5c d3cc82a 8ce4e5c 33b8d42 a95d76f d3cc82a 33b8d42 241ba79 d3cc82a 33b8d42 933b458 feba911 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import gradio as gr
#Get models
#ASR model for input speech
speech2text = gr.Interface.load("huggingface/facebook/hubert-large-ls960-ft",
inputs=gr.inputs.Audio(label="Record Audio", type="filepath", source = "microphone"))
#translates English to Spanish text
translator = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-en-es",
outputs=gr.outputs.Textbox(label="English to Spanish Translated Text"))
#TTS model for output speech
text2speech = gr.Interface.load("huggingface/facebook/tts_transformer-es-css10",
outputs=gr.outputs.Audio(label="English to Spanish Translated Audio"),
allow_flagging="never")
translate = gr.Series(speech2text, translator) #outputs Spanish text translation
en2es = gr.Series(translate, text2speech) #outputs Spanish audio
ui = gr.Parallel(translate, en2es) #allows transcription of Spanish audio
#gradio interface
ui.title = "English to Spanish Speech Translator"
ui.description = """<center>A useful tool in translating English to Spanish audio. All pre-trained models are found in huggingface.</center>"""
ui.examples = [['ljspeech.wav'],['ljspeech2.wav',]]
ui.allow_flagging = "never"
ui.theme = "peach"
ui.article = """<h2>Pre-trained model Information</h2>
<h3>Automatic Speech Recognition</h3>
<p style='text-align: justify'>The model used for the ASR part of this space is from
<a href=\"https://huggingface.co/facebook/hubert-large-ls960-ft">hubert-large-ls960-ft</a> which is pretrained and fine-tuned on <b>960 hours of
Librispeech</b> on 16kHz sampled speech audio. This model has a self-reported <b>word error rate (WER)</b> of <b>1.9
percent</b> and ranks first in <i>paperswithcode</i> for ASR on Librispeech. More information can be
found on its website at <a href=\"https://ai.facebook.com/blog/hubert-self-supervised-representation-learning-for-speech-
recognition-
generation-and-compression">hubert-self</a> and
original model is under <a href=\"https://github.com/pytorch/fairseq/tree/main/examples/hubert">pytorch/fairseq</a>.</p>
<h3>Text Translator</h3>
<p style='text-align: justify'>The English to Spanish text translator pre-trained model is from
<a href=\"https://huggingface.co/Helsinki-NLP/opus-mt-en-es">Helsinki-NLP/opus-mt-en-es</a> which is part of the <b>The
Tatoeba Translation Challenge
(v2021-08-07)</b> as seen from its github repo at
<a href=\"https://github.com/Helsinki-NLP/Tatoeba-Challenge">Helsinki-NLP/Tatoeba-Challenge</a>. This project aims to develop
machine
translation in real-world
cases for many languages. </p>
<h3>Text to Speech</h3>
<p style='text-align: justify'> The TTS model used is from <a href=\"https://huggingface.co/facebook/tts_transformer-es-
css10">facebook/tts_transformer-es-
css10</a>.
This model uses the <b>Fairseq(-py)</b> sequence modeling toolkit for speech synthesis, in this case, specifically TTS
for Spanish. More information can be seen on their git at
<a href=\"https://github.com/pytorch/fairseq/tree/main/examples/speech_synthesis">speech_synthesis</a>. </p>
"""
ui.launch(inbrowser=True)
|