Tlanextli commited on
Commit
ea2f95f
·
1 Parent(s): 6778a6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -6
app.py CHANGED
@@ -3,15 +3,20 @@ import gradio as gr
3
  import torch
4
  from transformers import pipeline
5
 
 
6
  title = "Transcribe speech several languages"
7
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
 
9
- asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german", chunk_length_s=25)
10
  asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-base", device=device)
11
 
12
- def transcribeFile(audio_path : str) -> str:
13
- #transcription = asr_pipe_audio2Text_Ge(audio_path)
14
- transcription = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"transcribe"})
 
 
 
 
15
  return transcription["text"]
16
 
17
  def translateAudio(audio_path):
@@ -33,7 +38,9 @@ def transcribeFileMulti(inputlang, audio_path : str) -> str:
33
 
34
  app1 = gr.Interface(
35
  fn=transcribeFile,
36
- inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"),
 
 
37
  outputs="text",
38
  title=title
39
  )
@@ -41,7 +48,7 @@ app1 = gr.Interface(
41
 
42
  app2 = gr.Interface(
43
  fn=transcribeFileMulti,
44
- inputs=[gr.Radio(["English", "German"], value="German", label="Source Language", info="Select the language of the speech you want to transcribe"),
45
  gr.Audio(source="microphone", type="filepath")],
46
  outputs="text",
47
  title=title
 
3
  import torch
4
  from transformers import pipeline
5
 
6
+
7
  title = "Transcribe speech several languages"
8
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
9
 
10
+ asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german")
11
  asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-base", device=device)
12
 
13
+ def transcribeFile(inputlang, audio_path : str) -> str:
14
+ #transcription = asr_pipe_audio2Text_Ge(audio_path)
15
+ #transcription = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"transcribe"})
16
+ if inputlang == "English":
17
+ transcription = asr_pipe_whisper(audio_path, chunk_length_s=10, stride_length_s=(4, 2))
18
+ elif inputlang == "German":
19
+ transcription = asr_pipe_audio2Text_Ge(audio_path, chunk_length_s=10, stride_length_s=(4, 2))
20
  return transcription["text"]
21
 
22
  def translateAudio(audio_path):
 
38
 
39
  app1 = gr.Interface(
40
  fn=transcribeFile,
41
+ #inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"),
42
+ inputs=[gr.Radio(["English", "German"], value="German", label="Source Language", info="Select the language of the speech you want to transcribe"),
43
+ gr.Audio(source="upload", type="filepath",label="Upload audio file")],
44
  outputs="text",
45
  title=title
46
  )
 
48
 
49
  app2 = gr.Interface(
50
  fn=transcribeFileMulti,
51
+ inputs=[gr.Radio(["English", "German"], value="German", label="Source Language", info="Select the language of the speech you want to transcribe", value="German"),
52
  gr.Audio(source="microphone", type="filepath")],
53
  outputs="text",
54
  title=title