Speech2Text_Multi

Running

App Files Files Community

Tlanextli commited on Nov 20, 2023

Commit

ea2f95f

1 Parent(s): 6778a6d

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -6

app.py CHANGED Viewed

@@ -3,15 +3,20 @@ import gradio as gr
 import torch
 from transformers import pipeline
 title = "Transcribe speech several languages"
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
-asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german", chunk_length_s=25)
 asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-base", device=device)
-def transcribeFile(audio_path : str) -> str:
-   #transcription = asr_pipe_audio2Text_Ge(audio_path)
-    transcription = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"transcribe"})
     return transcription["text"]
 def translateAudio(audio_path):
@@ -33,7 +38,9 @@ def transcribeFileMulti(inputlang, audio_path : str) -> str:
 app1 = gr.Interface(
     fn=transcribeFile,
-    inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"),
     outputs="text",
     title=title
 )
@@ -41,7 +48,7 @@ app1 = gr.Interface(
 app2 = gr.Interface(
     fn=transcribeFileMulti,
-    inputs=[gr.Radio(["English", "German"], value="German", label="Source Language", info="Select the language of the speech you want to transcribe"),
                      gr.Audio(source="microphone", type="filepath")],
     outputs="text",
     title=title

 import torch
 from transformers import pipeline
 title = "Transcribe speech several languages"
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
+asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german")
 asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-base", device=device)
+def transcribeFile(inputlang, audio_path : str) -> str:
+    #transcription = asr_pipe_audio2Text_Ge(audio_path)
+    #transcription = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"transcribe"})
+   if inputlang == "English":
+        transcription = asr_pipe_whisper(audio_path, chunk_length_s=10, stride_length_s=(4, 2))
+    elif inputlang == "German":
+        transcription = asr_pipe_audio2Text_Ge(audio_path, chunk_length_s=10, stride_length_s=(4, 2))
     return transcription["text"]
 def translateAudio(audio_path):
 app1 = gr.Interface(
     fn=transcribeFile,
+    #inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"),
+    inputs=[gr.Radio(["English", "German"], value="German", label="Source Language", info="Select the language of the speech you want to transcribe"),
+                     gr.Audio(source="upload", type="filepath",label="Upload audio file")],
     outputs="text",
     title=title
 )
 app2 = gr.Interface(
     fn=transcribeFileMulti,
+    inputs=[gr.Radio(["English", "German"], value="German", label="Source Language", info="Select the language of the speech you want to transcribe", value="German"),
                      gr.Audio(source="microphone", type="filepath")],
     outputs="text",
     title=title