Jaward commited on
Commit
f466968
·
verified ·
1 Parent(s): 738c0d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -82
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import subprocess
3
  import gradio as gr
4
- from streaming_stt_nemo import NemoSTT
5
 
6
  # Supported languages
7
  LANGUAGE_CODES = {
@@ -13,18 +12,6 @@ LANGUAGE_CODES = {
13
  "Chinese": "cmn"
14
  }
15
 
16
- # Initialize the NemoSTT model
17
- model = NemoSTT()
18
-
19
- def transcribe(audio):
20
- if audio is None:
21
- return "No audio detected. Please record or upload an audio file."
22
- try:
23
- text = model.stt_file(audio)[0]
24
- return text
25
- except AttributeError:
26
- return "Error processing audio. Please try again."
27
-
28
  def translate_speech(audio_file, target_language):
29
  """
30
  Translate input speech (audio file) to the specified target language.
@@ -34,83 +21,49 @@ def translate_speech(audio_file, target_language):
34
  target_language (str): The target language for translation.
35
 
36
  Returns:
37
- str: Path to the translated audio file or error message.
38
  """
39
- if audio_file is None:
40
- return "No audio detected. Please record or upload an audio file."
41
-
42
  language_code = LANGUAGE_CODES[target_language]
43
  output_file = "translated_audio.wav"
44
 
45
- try:
46
- command = [
47
- "expressivity_predict",
48
- audio_file,
49
- "--tgt_lang", language_code,
50
- "--model_name", "seamless_expressivity",
51
- "--vocoder_name", "vocoder_pretssel",
52
- "--gated-model-dir", "seamlessmodel",
53
- "--output_path", output_file
54
- ]
55
-
56
- subprocess.run(command, check=True)
57
 
58
- if os.path.exists(output_file):
59
- print(f"File created successfully: {output_file}")
60
- return output_file
61
- else:
62
- print(f"File not found: {output_file}")
63
- return "Error: Translated audio file not created."
64
 
65
- except subprocess.CalledProcessError as e:
66
- print(f"Error during translation: {e}")
67
- return f"Error during translation: {e}"
68
 
69
  def create_interface():
70
  """Create and configure the Gradio interface."""
71
 
72
- with gr.Blocks() as demo:
73
- gr.Markdown("# Seamless Expressive Speech-To-Speech Translator")
74
- gr.Markdown("Hear how you sound in another language.")
75
-
76
- with gr.Row():
77
- audio_input = gr.Audio(
78
- label="User Input",
79
- sources="microphone",
80
- type="filepath"
81
- )
82
- language_dropdown = gr.Dropdown(
83
- list(LANGUAGE_CODES.keys()),
84
- label="Target Language",
85
- value="Spanish" # Default value
86
- )
87
-
88
- with gr.Row():
89
- transcription_output = gr.Textbox(label="Transcription")
90
- translated_audio_output = gr.Audio(
91
- label="Translated Audio",
92
- interactive=False,
93
- autoplay=True
94
- )
95
-
96
- translate_btn = gr.Button("Translate")
97
-
98
- # Transcribe and translate when the button is clicked
99
- translate_btn.click(
100
- fn=lambda audio, lang: (transcribe(audio), translate_speech(audio, lang)),
101
- inputs=[audio_input, language_dropdown],
102
- outputs=[transcription_output, translated_audio_output]
103
- )
104
-
105
- # Clear outputs when audio input changes
106
- audio_input.change(
107
- fn=lambda: (None, None),
108
- inputs=[],
109
- outputs=[transcription_output, translated_audio_output]
110
- )
111
-
112
- return demo
113
 
114
- if __name__ == "__main__":
115
- demo = create_interface()
116
- demo.launch()
 
1
  import os
2
  import subprocess
3
  import gradio as gr
 
4
 
5
  # Supported languages
6
  LANGUAGE_CODES = {
 
12
  "Chinese": "cmn"
13
  }
14
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  def translate_speech(audio_file, target_language):
16
  """
17
  Translate input speech (audio file) to the specified target language.
 
21
  target_language (str): The target language for translation.
22
 
23
  Returns:
24
+ str: Path to the translated audio file.
25
  """
 
 
 
26
  language_code = LANGUAGE_CODES[target_language]
27
  output_file = "translated_audio.wav"
28
 
29
+ command = [
30
+ "expressivity_predict",
31
+ audio_file,
32
+ "--tgt_lang", language_code,
33
+ "--model_name", "seamless_expressivity",
34
+ "--vocoder_name", "vocoder_pretssel",
35
+ "--gated-model-dir", "seamlessmodel",
36
+ "--output_path", output_file
37
+ ]
38
+
39
+ subprocess.run(command, check=True)
 
40
 
41
+ if os.path.exists(output_file):
42
+ print(f"File created successfully: {output_file}")
43
+ else:
44
+ print(f"File not found: {output_file}")
 
 
45
 
46
+ return output_file
 
 
47
 
48
  def create_interface():
49
  """Create and configure the Gradio interface."""
50
 
51
+ inputs = [
52
+ gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
53
+ gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
54
+ ]
55
+
56
+ return gr.Interface(
57
+ fn=translate_speech,
58
+ inputs=inputs,
59
+ outputs=gr.Audio(label="Translated Audio",
60
+ interactive=False,
61
+ autoplay=True,
62
+ elem_classes="audio"),
63
+ title="Seamless Expressive Speech-To-Speech Translator",
64
+ description="Hear how you sound in another language.",
65
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ if name == "main":
68
+ iface = create_interface()
69
+ iface.launch()