Jaward commited on
Commit
738c0d0
·
verified ·
1 Parent(s): 99623cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +81 -34
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
  import subprocess
3
  import gradio as gr
 
4
 
5
  # Supported languages
6
  LANGUAGE_CODES = {
@@ -12,6 +13,18 @@ LANGUAGE_CODES = {
12
  "Chinese": "cmn"
13
  }
14
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def translate_speech(audio_file, target_language):
16
  """
17
  Translate input speech (audio file) to the specified target language.
@@ -21,49 +34,83 @@ def translate_speech(audio_file, target_language):
21
  target_language (str): The target language for translation.
22
 
23
  Returns:
24
- str: Path to the translated audio file.
25
  """
 
 
 
26
  language_code = LANGUAGE_CODES[target_language]
27
  output_file = "translated_audio.wav"
28
 
29
- command = [
30
- "expressivity_predict",
31
- audio_file,
32
- "--tgt_lang", language_code,
33
- "--model_name", "seamless_expressivity",
34
- "--vocoder_name", "vocoder_pretssel",
35
- "--gated-model-dir", "seamlessmodel",
36
- "--output_path", output_file
37
- ]
38
-
39
- subprocess.run(command, check=True)
 
40
 
41
- if os.path.exists(output_file):
42
- print(f"File created successfully: {output_file}")
43
- else:
44
- print(f"File not found: {output_file}")
 
 
45
 
46
- return output_file
 
 
47
 
48
  def create_interface():
49
  """Create and configure the Gradio interface."""
50
 
51
- inputs = [
52
- gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
53
- gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
54
- ]
55
-
56
- return gr.Interface(
57
- fn=translate_speech,
58
- inputs=inputs,
59
- outputs=gr.Audio(label="Translated Audio",
60
- interactive=False,
61
- autoplay=True,
62
- elem_classes="audio"),
63
- title="Seamless Expressive Speech-To-Speech Translator",
64
- description="Hear how you sound in another language.",
65
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  if __name__ == "__main__":
68
- iface = create_interface()
69
- iface.launch()
 
1
  import os
2
  import subprocess
3
  import gradio as gr
4
+ from streaming_stt_nemo import NemoSTT
5
 
6
  # Supported languages
7
  LANGUAGE_CODES = {
 
13
  "Chinese": "cmn"
14
  }
15
 
16
+ # Initialize the NemoSTT model
17
+ model = NemoSTT()
18
+
19
+ def transcribe(audio):
20
+ if audio is None:
21
+ return "No audio detected. Please record or upload an audio file."
22
+ try:
23
+ text = model.stt_file(audio)[0]
24
+ return text
25
+ except AttributeError:
26
+ return "Error processing audio. Please try again."
27
+
28
  def translate_speech(audio_file, target_language):
29
  """
30
  Translate input speech (audio file) to the specified target language.
 
34
  target_language (str): The target language for translation.
35
 
36
  Returns:
37
+ str: Path to the translated audio file or error message.
38
  """
39
+ if audio_file is None:
40
+ return "No audio detected. Please record or upload an audio file."
41
+
42
  language_code = LANGUAGE_CODES[target_language]
43
  output_file = "translated_audio.wav"
44
 
45
+ try:
46
+ command = [
47
+ "expressivity_predict",
48
+ audio_file,
49
+ "--tgt_lang", language_code,
50
+ "--model_name", "seamless_expressivity",
51
+ "--vocoder_name", "vocoder_pretssel",
52
+ "--gated-model-dir", "seamlessmodel",
53
+ "--output_path", output_file
54
+ ]
55
+
56
+ subprocess.run(command, check=True)
57
 
58
+ if os.path.exists(output_file):
59
+ print(f"File created successfully: {output_file}")
60
+ return output_file
61
+ else:
62
+ print(f"File not found: {output_file}")
63
+ return "Error: Translated audio file not created."
64
 
65
+ except subprocess.CalledProcessError as e:
66
+ print(f"Error during translation: {e}")
67
+ return f"Error during translation: {e}"
68
 
69
  def create_interface():
70
  """Create and configure the Gradio interface."""
71
 
72
+ with gr.Blocks() as demo:
73
+ gr.Markdown("# Seamless Expressive Speech-To-Speech Translator")
74
+ gr.Markdown("Hear how you sound in another language.")
75
+
76
+ with gr.Row():
77
+ audio_input = gr.Audio(
78
+ label="User Input",
79
+ sources="microphone",
80
+ type="filepath"
81
+ )
82
+ language_dropdown = gr.Dropdown(
83
+ list(LANGUAGE_CODES.keys()),
84
+ label="Target Language",
85
+ value="Spanish" # Default value
86
+ )
87
+
88
+ with gr.Row():
89
+ transcription_output = gr.Textbox(label="Transcription")
90
+ translated_audio_output = gr.Audio(
91
+ label="Translated Audio",
92
+ interactive=False,
93
+ autoplay=True
94
+ )
95
+
96
+ translate_btn = gr.Button("Translate")
97
+
98
+ # Transcribe and translate when the button is clicked
99
+ translate_btn.click(
100
+ fn=lambda audio, lang: (transcribe(audio), translate_speech(audio, lang)),
101
+ inputs=[audio_input, language_dropdown],
102
+ outputs=[transcription_output, translated_audio_output]
103
+ )
104
+
105
+ # Clear outputs when audio input changes
106
+ audio_input.change(
107
+ fn=lambda: (None, None),
108
+ inputs=[],
109
+ outputs=[transcription_output, translated_audio_output]
110
+ )
111
+
112
+ return demo
113
 
114
  if __name__ == "__main__":
115
+ demo = create_interface()
116
+ demo.launch()