Spaces:
Running
Running
import os | |
import gradio as gr | |
from transformers import pipeline | |
title = "Transcribe speech several languages" | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german") | |
asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-large", device=device) | |
#def transcribeFile(audio_path : str) -> str: | |
# transcription = asr_pipe_audio2Text_Ge(audio_path) | |
# return transcription["text"] | |
def translateAudio(audio_path): | |
translationOutput = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"translate"}) | |
return translationOutput["text"] | |
def transcribeFileMulti(inputlang, audio_path : str) -> str: | |
if inputlang == "English": | |
transcription = asr_pipe_whisper(audio_path) | |
elif inputlang == "German": | |
transcription = asr_pipe_audio2Text_Ge(audio_path) | |
translation = translateAudio(audio_path) | |
output = transcription + translation | |
return output #transcription["text"] | |
app1 = gr.Interface( | |
fn=transcribeFile, | |
inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"), | |
outputs="text", | |
title=title | |
) | |
app2 = gr.Interface( | |
fn=transcribeFileMulti, | |
inputs=[gr.Radio(["English", "German"], value="German", label="Source Language", info="Select the language of the speech you want to transcribe"), | |
gr.Audio(source="microphone", type="filepath")], | |
outputs="text", | |
title=title | |
) | |
demo = gr.TabbedInterface([app1, app2], ["Audio File", "Microphone"]) | |
if __name__ == "__main__": | |
demo.launch() | |