File size: 6,161 Bytes
620ebff
9934dbc
620ebff
 
 
 
 
63aca15
9934dbc
63aca15
6d9e4e4
9934dbc
2eb0f72
 
 
 
 
 
 
 
6d9e4e4
474d5c4
620ebff
 
63aca15
620ebff
 
63aca15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620ebff
 
 
2e0b1fa
6ad599b
de8d4a5
63aca15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ad599b
 
63aca15
620ebff
 
 
 
9934dbc
 
62bfbfb
9934dbc
 
 
 
 
 
02b5b4a
9934dbc
 
 
 
 
 
 
 
 
 
 
 
 
 
62bfbfb
63aca15
 
620ebff
 
 
 
 
63aca15
 
620ebff
 
 
63aca15
9934dbc
474d5c4
63aca15
620ebff
474d5c4
620ebff
 
474d5c4
620ebff
2f0bafd
620ebff
2f0bafd
 
 
 
9934dbc
 
2f0bafd
 
 
 
 
 
 
 
474d5c4
2f0bafd
 
 
 
620ebff
2f0bafd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import streamlit as st
import openai
from kokoro import KPipeline
import soundfile as sf
import io

# Streamlit App UI Setup
st.title("Text-to-Speech with Kokoro")

# Expander section to display information in multiple languages
with st.expander("Sample Prompt!"):
    st.markdown("""
    - My name is Shukdev. (In English)
    - Mi nombre es Shukdev. (In Spanish)
    - Je m'appelle Choukdev. (In French)
    - मेरा नाम शुकदेव है. (In Hindi)
    - Il mio nome è Shukdev. (In Italy)
    - Meu nome é Sukhdev. (In Portuguese, Brazil)
    - 我叫苏赫德夫。(In Chinese)
    - 私の名前はスクデフです。(In Japanese)
    """)

st.sidebar.header("Configuration & Instructions")

# Sidebar Instructions
st.sidebar.markdown("""
### How to Use the Text-to-Speech App:
1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
   
2. **Select Language**: 
   - Choose the language of the text you are entering. Available options include:
     - 🇺🇸 American English (`a`)
     - 🇬🇧 British English (`b`)
     - 🇪🇸 Spanish (`e`)
     - 🇫🇷 French (`f`)
     - 🇮🇳 Hindi (`h`)
     - 🇮🇹 Italian (`i`)
     - 🇧🇷 Brazilian Portuguese (`p`)
     - 🇨🇳 Mandarin Chinese (`z`)
     - 🇯🇵 Japanese (`j`)
   
3. **Select Voice**:
   - Choose the voice style for the speech. You can pick different voices based on tone and gender, such as `af_heart`, `af_joy`, etc.
   
4. **Adjust Speed**:
   - Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
5. **Generate Speech**:
   - After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
   
6. **Download**:
   - Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
""")

# User input for text, language, and voice settings
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 
 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 
 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 
 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 
 'ef_dora', 
 'em_alex', 'em_santa', 
 'ff_siwis', 
 'hf_alpha', 'hf_beta', 
 'hm_omega', 'hm_psi', 
 'if_sara', 
 'im_nicola', 
 'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro', 
 'jm_kumo', 
 'pf_dora', 
 'pm_alex', 'pm_santa', 
 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', 
 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang']
)  # Change voice options as per model
speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

# Initialize the TTS pipeline with user-selected language
pipeline = KPipeline(lang_code=lang_code)

# Function to get the OpenAI API key from the user
openai_api_key = st.text_input("Enter your OpenAI API Key:", type="password")

# Function to translate text to English using OpenAI's Chat API
def translate_to_english(api_key, text, lang_code):
    openai.api_key = api_key
    try:
        # Construct the prompt for translation
        prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"

        response = openai.ChatCompletion.create(
            model="gpt-4o",  # Using ChatGPT model for translation
            messages=[
                {"role": "system", "content": "You are a helpful assistant that translates text."},
                {"role": "user", "content": prompt}
            ]
        )
        
        # Extract translated text from response
        translated_text = response['choices'][0]['message']['content'].strip()
        return translated_text
    except Exception as e:
        st.error(f"Error occurred during translation: {e}")
        return text  # Fallback to original text in case of an error

# Generate Audio function
def generate_audio(text, lang_code, voice, speed):
    generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
    for i, (gs, ps, audio) in enumerate(generator):
        audio_data = audio
        # Save audio to in-memory buffer
        buffer = io.BytesIO()
        # Explicitly specify format as WAV
        sf.write(buffer, audio_data, 24000, format='WAV')  # Add 'format="WAV"'
        buffer.seek(0)
        return buffer

# Generate and display the audio file
if st.button('Generate Audio') and openai_api_key:
    st.write("Generating speech for the original text...")
    audio_buffer = generate_audio(input_text, lang_code, voice, speed)
    
    # Display Audio player for the original language
    st.audio(audio_buffer, format='audio/wav')

    # Optional: Save the generated audio file for download (Original Text)
    st.download_button(
        label="Download Audio (Original Text)",
        data=audio_buffer,
        file_name="generated_speech_original.wav",
        mime="audio/wav"
    )

    # Translate the input text to English using OpenAI
    translated_text = translate_to_english(openai_api_key, input_text, lang_code)

    # Generate audio for the translated English text
    translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed)

    # Display Audio for the translated text
    st.write(f"Translated Text: {translated_text}")
    st.audio(translated_audio_buffer, format='audio/wav')

    # Optional: Save the generated audio file for download (Translated Text)
    st.download_button(
        label="Download Audio (Translated to English)",
        data=translated_audio_buffer,
        file_name="generated_speech_translated.wav",
        mime="audio/wav"
    )