|
import os |
|
import subprocess |
|
import gradio as gr |
|
|
|
|
|
LANGUAGE_CODES = { |
|
"English": "eng", |
|
"Spanish": "spa", |
|
"French": "fra", |
|
"German": "deu", |
|
"Italian": "ita", |
|
"Chinese": "cmn" |
|
} |
|
|
|
def transcribe(audio): |
|
if audio is None: |
|
return "No audio input detected. Please record or upload an audio file." |
|
|
|
try: |
|
text = model.stt_file(audio)[0] |
|
return text |
|
except Exception as e: |
|
return f"Error transcribing audio: {str(e)}" |
|
|
|
def translate_speech(audio_file, target_language): |
|
if audio_file is None: |
|
return "No audio input detected. Please record or upload an audio file." |
|
|
|
try: |
|
language_code = LANGUAGE_CODES[target_language] |
|
output_file = "translated_audio.wav" |
|
|
|
command = [ |
|
"expressivity_predict", |
|
audio_file, |
|
"--tgt_lang", language_code, |
|
"--model_name", "seamless_expressivity", |
|
"--vocoder_name", "vocoder_pretssel", |
|
"--gated-model-dir", "seamlessmodel", |
|
"--output_path", output_file |
|
] |
|
|
|
subprocess.run(command, check=True) |
|
|
|
if os.path.exists(output_file): |
|
print(f"File created successfully: {output_file}") |
|
return output_file |
|
else: |
|
return "Error: Translated audio file not found." |
|
except Exception as e: |
|
return f"Error translating speech: {str(e)}" |
|
|
|
def create_interface(): |
|
inputs = [ |
|
gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False), |
|
gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language") |
|
] |
|
|
|
return gr.Interface( |
|
fn=translate_speech, |
|
inputs=inputs, |
|
outputs=gr.Audio(label="Translated Audio", interactive=False, autoplay=True, elem_classes="audio"), |
|
title="Seamless Expressive Speech-To-Speech Translator", |
|
description="Hear how you sound in another language.", |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface = create_interface() |
|
iface.launch() |