Speech2Text_Multi

Running

File size: 1,738 Bytes

6b9c021
 
c3ed104
6b9c021
 
a4340f7
c27a48e
6b9c021
c27a48e
cd94084
6b9c021
a543e48
 
 
c27a48e
 
 
 
a4340f7
 
 
c27a48e
a4340f7
c27a48e
 
dc74cbe
 
5721619
 
6b9c021
 
a4340f7
6b9c021
 
 
 
 
 
 
 
 
a4340f7
ec5ebbb
a4340f7
6b9c021

import os
import gradio as gr
import torch
from transformers import pipeline

title = "Transcribe speech several languages"
device = "cuda:0" if torch.cuda.is_available() else "cpu"

asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german")
asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-base", device=device)

def transcribeFile(audio_path : str) -> str:
   transcription = asr_pipe_audio2Text_Ge(audio_path)
   return transcription["text"]

def translateAudio(audio_path):
    translationOutput = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"translate"})
    return translationOutput["text"]

def transcribeFileMulti(inputlang, audio_path : str) -> str:
    if inputlang == "English":
        transcription = asr_pipe_whisper(audio_path)
    elif inputlang == "German":
        transcription = asr_pipe_audio2Text_Ge(audio_path)
        translation = translateAudio(audio_path)
        t1 = transcription["text"]
        t2 = translation["text"]
        output = t1+t2
    return output #transcription["text"]


    
app1 = gr.Interface(
    fn=transcribeFile,
    inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"),
    outputs="text",
    title=title
)


app2 = gr.Interface(
    fn=transcribeFileMulti,
    inputs=[gr.Radio(["English", "German"], value="German", label="Source Language", info="Select the language of the speech you want to transcribe"),
                     gr.Audio(source="microphone", type="filepath")], 
    outputs="text",
    title=title
)


demo = gr.TabbedInterface([app1, app2], ["Audio File", "Microphone"])

if __name__ == "__main__":
    demo.launch()