Spaces:

Jaward
/

seamless-speech-translator

Running

App Files Files Community

Jaward commited on Aug 16, 2024

Commit

738c0d0

verified ·

1 Parent(s): 99623cf

Update app.py

Browse files

Files changed (1) hide show

app.py +81 -34

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import subprocess
 import gradio as gr
 # Supported languages
 LANGUAGE_CODES = {
@@ -12,6 +13,18 @@ LANGUAGE_CODES = {
     "Chinese": "cmn"
 }
 def translate_speech(audio_file, target_language):
     """
     Translate input speech (audio file) to the specified target language.
@@ -21,49 +34,83 @@ def translate_speech(audio_file, target_language):
     target_language (str): The target language for translation.
     Returns:
-    str: Path to the translated audio file.
     """
     language_code = LANGUAGE_CODES[target_language]
     output_file = "translated_audio.wav"
-    command = [
-        "expressivity_predict",
-        audio_file,
-        "--tgt_lang", language_code,
-        "--model_name", "seamless_expressivity",
-        "--vocoder_name", "vocoder_pretssel",
-        "--gated-model-dir", "seamlessmodel",
-        "--output_path", output_file
-    ]
-    subprocess.run(command, check=True)
-    if os.path.exists(output_file):
-        print(f"File created successfully: {output_file}")
-    else:
-        print(f"File not found: {output_file}")
-    return output_file
 def create_interface():
     """Create and configure the Gradio interface."""
-    inputs = [
-        gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
-        gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
-    ]
-    return gr.Interface(
-        fn=translate_speech,
-        inputs=inputs,
-        outputs=gr.Audio(label="Translated Audio",
-                        interactive=False,
-                        autoplay=True,
-                        elem_classes="audio"),
-        title="Seamless Expressive Speech-To-Speech Translator",
-        description="Hear how you sound in another language.",
-    )
 if __name__ == "__main__":
-    iface = create_interface()
-    iface.launch()

 import os
 import subprocess
 import gradio as gr
+from streaming_stt_nemo import NemoSTT
 # Supported languages
 LANGUAGE_CODES = {
     "Chinese": "cmn"
 }
+# Initialize the NemoSTT model
+model = NemoSTT()
+def transcribe(audio):
+    if audio is None:
+        return "No audio detected. Please record or upload an audio file."
+    try:
+        text = model.stt_file(audio)[0]
+        return text
+    except AttributeError:
+        return "Error processing audio. Please try again."
 def translate_speech(audio_file, target_language):
     """
     Translate input speech (audio file) to the specified target language.
     target_language (str): The target language for translation.
     Returns:
+    str: Path to the translated audio file or error message.
     """
+    if audio_file is None:
+        return "No audio detected. Please record or upload an audio file."
     language_code = LANGUAGE_CODES[target_language]
     output_file = "translated_audio.wav"
+    try:
+        command = [
+            "expressivity_predict",
+            audio_file,
+            "--tgt_lang", language_code,
+            "--model_name", "seamless_expressivity",
+            "--vocoder_name", "vocoder_pretssel",
+            "--gated-model-dir", "seamlessmodel",
+            "--output_path", output_file
+        ]
+        subprocess.run(command, check=True)
+        if os.path.exists(output_file):
+            print(f"File created successfully: {output_file}")
+            return output_file
+        else:
+            print(f"File not found: {output_file}")
+            return "Error: Translated audio file not created."
+    except subprocess.CalledProcessError as e:
+        print(f"Error during translation: {e}")
+        return f"Error during translation: {e}"
 def create_interface():
     """Create and configure the Gradio interface."""
+    with gr.Blocks() as demo:
+        gr.Markdown("# Seamless Expressive Speech-To-Speech Translator")
+        gr.Markdown("Hear how you sound in another language.")
+        with gr.Row():
+            audio_input = gr.Audio(
+                label="User Input",
+                sources="microphone",
+                type="filepath"
+            )
+            language_dropdown = gr.Dropdown(
+                list(LANGUAGE_CODES.keys()),
+                label="Target Language",
+                value="Spanish"  # Default value
+            )
+        with gr.Row():
+            transcription_output = gr.Textbox(label="Transcription")
+            translated_audio_output = gr.Audio(
+                label="Translated Audio",
+                interactive=False,
+                autoplay=True
+            )
+        translate_btn = gr.Button("Translate")
+        # Transcribe and translate when the button is clicked
+        translate_btn.click(
+            fn=lambda audio, lang: (transcribe(audio), translate_speech(audio, lang)),
+            inputs=[audio_input, language_dropdown],
+            outputs=[transcription_output, translated_audio_output]
+        )
+        # Clear outputs when audio input changes
+        audio_input.change(
+            fn=lambda: (None, None),
+            inputs=[],
+            outputs=[transcription_output, translated_audio_output]
+        )
+    return demo
 if __name__ == "__main__":
+    demo = create_interface()
+    demo.launch()