Jaward's picture
Update app.py
ee60cd3 verified
raw
history blame
2.07 kB
import os
import subprocess
import gradio as gr
# Supported languages
LANGUAGE_CODES = {
"English": "eng",
"Spanish": "spa",
"French": "fra",
"German": "deu",
"Italian": "ita",
"Chinese": "cmn"
}
def transcribe(audio):
if audio is None:
return "No audio input detected. Please record or upload an audio file."
try:
text = model.stt_file(audio)[0]
return text
except Exception as e:
return f"Error transcribing audio: {str(e)}"
def translate_speech(audio_file, target_language):
if audio_file is None:
return "No audio input detected. Please record or upload an audio file."
try:
language_code = LANGUAGE_CODES[target_language]
output_file = "translated_audio.wav"
command = [
"expressivity_predict",
audio_file,
"--tgt_lang", language_code,
"--model_name", "seamless_expressivity",
"--vocoder_name", "vocoder_pretssel",
"--gated-model-dir", "seamlessmodel",
"--output_path", output_file
]
subprocess.run(command, check=True)
if os.path.exists(output_file):
print(f"File created successfully: {output_file}")
return output_file
else:
return "Error: Translated audio file not found."
except Exception as e:
return f"Error translating speech: {str(e)}"
def create_interface():
inputs = [
gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
]
return gr.Interface(
fn=translate_speech,
inputs=inputs,
outputs=gr.Audio(label="Translated Audio", interactive=False, autoplay=True, elem_classes="audio"),
title="Seamless Expressive Speech-To-Speech Translator",
description="Hear how you sound in another language.",
)
if __name__ == "__main__":
iface = create_interface()
iface.launch()