Spaces:

Jaward
/

seamless-speech-translator

Sleeping

App Files Files Community

Jaward commited on Aug 17, 2024

Commit

f466968

verified ·

1 Parent(s): 738c0d0

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -82

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import os
 import subprocess
 import gradio as gr
-from streaming_stt_nemo import NemoSTT
 # Supported languages
 LANGUAGE_CODES = {
@@ -13,18 +12,6 @@ LANGUAGE_CODES = {
     "Chinese": "cmn"
 }
-# Initialize the NemoSTT model
-model = NemoSTT()
-def transcribe(audio):
-    if audio is None:
-        return "No audio detected. Please record or upload an audio file."
-    try:
-        text = model.stt_file(audio)[0]
-        return text
-    except AttributeError:
-        return "Error processing audio. Please try again."
 def translate_speech(audio_file, target_language):
     """
     Translate input speech (audio file) to the specified target language.
@@ -34,83 +21,49 @@ def translate_speech(audio_file, target_language):
     target_language (str): The target language for translation.
     Returns:
-    str: Path to the translated audio file or error message.
     """
-    if audio_file is None:
-        return "No audio detected. Please record or upload an audio file."
     language_code = LANGUAGE_CODES[target_language]
     output_file = "translated_audio.wav"
-    try:
-        command = [
-            "expressivity_predict",
-            audio_file,
-            "--tgt_lang", language_code,
-            "--model_name", "seamless_expressivity",
-            "--vocoder_name", "vocoder_pretssel",
-            "--gated-model-dir", "seamlessmodel",
-            "--output_path", output_file
-        ]
-        subprocess.run(command, check=True)
-        if os.path.exists(output_file):
-            print(f"File created successfully: {output_file}")
-            return output_file
-        else:
-            print(f"File not found: {output_file}")
-            return "Error: Translated audio file not created."
-    except subprocess.CalledProcessError as e:
-        print(f"Error during translation: {e}")
-        return f"Error during translation: {e}"
 def create_interface():
     """Create and configure the Gradio interface."""
-    with gr.Blocks() as demo:
-        gr.Markdown("# Seamless Expressive Speech-To-Speech Translator")
-        gr.Markdown("Hear how you sound in another language.")
-        with gr.Row():
-            audio_input = gr.Audio(
-                label="User Input",
-                sources="microphone",
-                type="filepath"
-            )
-            language_dropdown = gr.Dropdown(
-                list(LANGUAGE_CODES.keys()),
-                label="Target Language",
-                value="Spanish"  # Default value
-            )
-        with gr.Row():
-            transcription_output = gr.Textbox(label="Transcription")
-            translated_audio_output = gr.Audio(
-                label="Translated Audio",
-                interactive=False,
-                autoplay=True
-            )
-        translate_btn = gr.Button("Translate")
-        # Transcribe and translate when the button is clicked
-        translate_btn.click(
-            fn=lambda audio, lang: (transcribe(audio), translate_speech(audio, lang)),
-            inputs=[audio_input, language_dropdown],
-            outputs=[transcription_output, translated_audio_output]
-        )
-        # Clear outputs when audio input changes
-        audio_input.change(
-            fn=lambda: (None, None),
-            inputs=[],
-            outputs=[transcription_output, translated_audio_output]
-        )
-    return demo
-if __name__ == "__main__":
-    demo = create_interface()
-    demo.launch()

 import os
 import subprocess
 import gradio as gr
 # Supported languages
 LANGUAGE_CODES = {
     "Chinese": "cmn"
 }
 def translate_speech(audio_file, target_language):
     """
     Translate input speech (audio file) to the specified target language.
     target_language (str): The target language for translation.
     Returns:
+    str: Path to the translated audio file.
     """
     language_code = LANGUAGE_CODES[target_language]
     output_file = "translated_audio.wav"
+    command = [
+        "expressivity_predict",
+        audio_file,
+        "--tgt_lang", language_code,
+        "--model_name", "seamless_expressivity",
+        "--vocoder_name", "vocoder_pretssel",
+        "--gated-model-dir", "seamlessmodel",
+        "--output_path", output_file
+    ]
+    subprocess.run(command, check=True)
+    if os.path.exists(output_file):
+        print(f"File created successfully: {output_file}")
+    else:
+        print(f"File not found: {output_file}")
+    return output_file
 def create_interface():
     """Create and configure the Gradio interface."""
+    inputs = [
+        gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
+        gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
+    ]
+    return gr.Interface(
+        fn=translate_speech,
+        inputs=inputs,
+        outputs=gr.Audio(label="Translated Audio",
+                        interactive=False,
+                        autoplay=True,
+                        elem_classes="audio"),
+        title="Seamless Expressive Speech-To-Speech Translator",
+        description="Hear how you sound in another language.",
+    )
+if name == "main":
+    iface = create_interface()
+    iface.launch()