denis-kazakov commited on
Commit
fee55e0
·
verified ·
1 Parent(s): 6297784

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -29,11 +29,11 @@ def speech_to_speech_translation(audio):
29
  translated_text = translate(audio)
30
  synthesised_speech = synthesise(translated_text)
31
  synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
32
- return 16000, synthesised_speech
33
 
34
  title = "Cascaded STST"
35
  description = """
36
- Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in English. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and Microsoft's
37
  [SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech:
38
 
39
  ![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
@@ -44,7 +44,7 @@ demo = gr.Blocks()
44
  mic_translate = gr.Interface(
45
  fn=speech_to_speech_translation,
46
  inputs=gr.Audio(source="microphone", type="filepath"),
47
- outputs=gr.Audio(label="Generated Speech", type="numpy"),
48
  title=title,
49
  description=description,
50
  )
@@ -52,7 +52,7 @@ mic_translate = gr.Interface(
52
  file_translate = gr.Interface(
53
  fn=speech_to_speech_translation,
54
  inputs=gr.Audio(source="upload", type="filepath"),
55
- outputs=gr.Audio(label="Generated Speech", type="numpy"),
56
  examples=[["./example.wav"]],
57
  title=title,
58
  description=description,
 
29
  translated_text = translate(audio)
30
  synthesised_speech = synthesise(translated_text)
31
  synthesised_speech = (synthesised_speech.numpy() * max_range).astype(np.int16)
32
+ return [translated_text, (16000, synthesised_speech)]
33
 
34
  title = "Cascaded STST"
35
  description = """
36
+ Demo for cascaded speech-to-speech translation (STST), mapping from source speech in any language to target speech in Russian. Demo uses OpenAI's [Whisper Base](https://huggingface.co/openai/whisper-base) model for speech translation, and Microsoft's
37
  [SpeechT5 TTS](https://huggingface.co/microsoft/speecht5_tts) model for text-to-speech:
38
 
39
  ![Cascaded STST](https://huggingface.co/datasets/huggingface-course/audio-course-images/resolve/main/s2st_cascaded.png "Diagram of cascaded speech to speech translation")
 
44
  mic_translate = gr.Interface(
45
  fn=speech_to_speech_translation,
46
  inputs=gr.Audio(source="microphone", type="filepath"),
47
+ outputs=['text', gr.Audio(label="Generated Speech", type="numpy")],
48
  title=title,
49
  description=description,
50
  )
 
52
  file_translate = gr.Interface(
53
  fn=speech_to_speech_translation,
54
  inputs=gr.Audio(source="upload", type="filepath"),
55
+ outputs=['text', gr.Audio(label="Generated Speech", type="numpy")],
56
  examples=[["./example.wav"]],
57
  title=title,
58
  description=description,