File size: 2,066 Bytes
535db75
 
 
 
 
 
 
 
 
 
 
 
 
 
ee60cd3
 
 
f466968
ee60cd3
 
 
 
 
535db75
ee60cd3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
535db75
ee60cd3
 
 
 
 
 
 
535db75
 
f466968
 
 
 
 
 
 
 
ee60cd3
f466968
 
 
535db75
ee60cd3
f466968
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import subprocess
import gradio as gr

# Supported languages
LANGUAGE_CODES = {
    "English": "eng",
    "Spanish": "spa",
    "French": "fra",
    "German": "deu",
    "Italian": "ita",
    "Chinese": "cmn"
}

def transcribe(audio):
    if audio is None:
        return "No audio input detected. Please record or upload an audio file."
    
    try:
        text = model.stt_file(audio)[0]
        return text
    except Exception as e:
        return f"Error transcribing audio: {str(e)}"

def translate_speech(audio_file, target_language):
    if audio_file is None:
        return "No audio input detected. Please record or upload an audio file."
    
    try:
        language_code = LANGUAGE_CODES[target_language]
        output_file = "translated_audio.wav"
        
        command = [
            "expressivity_predict",
            audio_file,
            "--tgt_lang", language_code,
            "--model_name", "seamless_expressivity",
            "--vocoder_name", "vocoder_pretssel",
            "--gated-model-dir", "seamlessmodel",
            "--output_path", output_file
        ]
        
        subprocess.run(command, check=True)

        if os.path.exists(output_file):
            print(f"File created successfully: {output_file}")
            return output_file
        else:
            return "Error: Translated audio file not found."
    except Exception as e:
        return f"Error translating speech: {str(e)}"

def create_interface():
    inputs = [
        gr.Audio(label="User", sources="microphone", type="filepath", waveform_options=False),
        gr.Dropdown(list(LANGUAGE_CODES.keys()), label="Target Language")
    ]

    return gr.Interface(
        fn=translate_speech,
        inputs=inputs,
        outputs=gr.Audio(label="Translated Audio", interactive=False, autoplay=True, elem_classes="audio"),
        title="Seamless Expressive Speech-To-Speech Translator",
        description="Hear how you sound in another language.",
    )

if __name__ == "__main__":
    iface = create_interface()
    iface.launch()