File size: 3,040 Bytes
620ebff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import streamlit as st
from kokoro import KPipeline
import soundfile as sf
import io

# Streamlit App UI Setup
st.title("Text-to-Speech with Kokoro")
st.sidebar.header("Configuration & Instructions")

# Sidebar Instructions
st.sidebar.markdown("""
### How to Use the Text-to-Speech App:

1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
   
2. **Select Language**: 
   - Choose the language of the text you are entering. Available options include:
     - ๐Ÿ‡บ๐Ÿ‡ธ American English (`a`)
     - ๐Ÿ‡ฌ๐Ÿ‡ง British English (`b`)
     - ๐Ÿ‡ช๐Ÿ‡ธ Spanish (`e`)
     - ๐Ÿ‡ซ๐Ÿ‡ท French (`f`)
     - ๐Ÿ‡ฎ๐Ÿ‡ณ Hindi (`h`)
     - ๐Ÿ‡ฎ๐Ÿ‡น Italian (`i`)
     - ๐Ÿ‡ง๐Ÿ‡ท Brazilian Portuguese (`p`)
     - ๐Ÿ‡ฏ๐Ÿ‡ต Japanese (`j`)
     - ๐Ÿ‡จ๐Ÿ‡ณ Mandarin Chinese (`z`)
   
3. **Select Voice**:
   - Choose the voice style for the speech. You can pick different voices based on tone and gender, such as `af_heart`, `af_joy`, etc.
   
4. **Adjust Speed**:
   - Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.

5. **Generate Speech**:
   - After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
   
6. **Download**:
   - Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.

Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
""")

# User input for text, language, and voice settings
input_text = st.text_area("Enter your text here", "The sky above the port was the color of television...")
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'j', 'z'])
voice = st.selectbox("Select Voice", ['af_heart', 'af_joy', 'af_female', 'af_male'])  # Change voice options as per model
speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

# Initialize the TTS pipeline with user-selected language
pipeline = KPipeline(lang_code=lang_code)

# Generate Audio function
def generate_audio(text, lang_code, voice, speed):
    generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
    for i, (gs, ps, audio) in enumerate(generator):
        audio_data = audio
        # Save audio to in-memory buffer
        buffer = io.BytesIO()
        sf.write(buffer, audio_data, 24000)
        buffer.seek(0)
        return buffer

# Generate and display the audio file
if st.button('Generate Audio'):
    st.write("Generating speech...")
    audio_buffer = generate_audio(input_text, lang_code, voice, speed)
    
    # Display Audio player in the app
    st.audio(audio_buffer, format='audio/wav')

    # Optional: Save the generated audio file for download
    st.download_button(
        label="Download Audio",
        data=audio_buffer,
        file_name="generated_speech.wav",
        mime="audio/wav"
    )