File size: 2,066 Bytes
535db75 ee60cd3 f466968 ee60cd3 535db75 ee60cd3 535db75 ee60cd3 535db75 f466968 ee60cd3 f466968 535db75 ee60cd3 f466968 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import os
import subprocess
import gradio as gr
# Supported languages
LANGUAGE_CODES = {
"English": "eng",
"Spanish": "spa",
"French": "fra",
"German": "deu",
"Italian": "ita",
"Chinese": "cmn"
}
def transcribe(audio):
if audio is None:
return "No audio input detected. Please record or upload an audio file."
try:
text = model.stt_file(audio)[0]
return text
except Exception as e:
return f"Error transcribing audio: {str(e)}"
def translate_speech(audio_file, target_language):
if audio_file is None:
return "No audio input detected. Please record or upload an audio file."
try:
language_code = LANGUAGE_CODES[target_language]
output_file = "translated_audio.wav"
command = [
"expressivity_predict",
audio_file,
"--tgt_lang", language_code,
"--model_name", "seamless_expressivity",
"--vocoder_name", "vocoder_pretssel",
"--gated-model-dir", "seamlessmodel",
"--output_path", output_file
]
subprocess.run(command, check=True)
if os.path.exists(output_file):
print(f"File created successfully: {output_file}")
return output_file
else:
return "Error: Translated audio file not found."
except Exception as e:
return f"Error translating speech: {str(e)}"
def create_interface():
inputs = [
gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
]
return gr.Interface(
fn=translate_speech,
inputs=inputs,
outputs=gr.Audio(label="Translated Audio", interactive=False, autoplay=True, elem_classes="audio"),
title="Seamless Expressive Speech-To-Speech Translator",
description="Hear how you sound in another language.",
)
if __name__ == "__main__":
iface = create_interface()
iface.launch() |