Spaces:
Running
Running
import streamlit as st | |
from kokoro import KPipeline | |
import soundfile as sf | |
import io | |
import os | |
import time | |
# Install espeak-ng if not installed | |
if not os.system("which espeak-ng"): | |
st.text("espeak-ng already installed.") | |
else: | |
os.system("apt-get -qq -y install espeak-ng") | |
st.text("Installing espeak-ng...") | |
# Streamlit App UI Setup | |
st.title("Interactive Text-to-Speech with Kokoro") | |
# Expander section for language samples | |
with st.expander("Sample Prompt!"): | |
st.markdown(""" | |
- My name is Shukdev. (In English) | |
- Mi nombre es Shukdev. (In Spanish) | |
- Je m'appelle Choukdev. (In French) | |
- मेरा नाम शुकदेव है. (In Hindi) | |
- Il mio nome è Shukdev. (In Italy) | |
- Meu nome é Sukhdev. (In Portuguese, Brazil) | |
- 我叫苏赫德夫。(In Chinese) | |
- 私の名前はスクデフです。(In Japanese) | |
""") | |
# Sidebar Instructions and Configuration | |
st.sidebar.header("Configuration & Instructions") | |
st.sidebar.markdown(""" | |
### How to Use the Text-to-Speech App: | |
1. **Enter Text**: Input any text that you want to convert to speech. | |
2. **Select Language**: Choose the language of the text. | |
3. **Select Voice**: Choose the voice style. | |
4. **Select Speed**: Use the slider to change the speech speed. | |
5. **Add Background Music**: Optional background music for the speech (choose genre). | |
6. **Generate Speech**: Click on **Generate Audio** to create speech. | |
7. **Download**: Download the generated audio file. | |
""") | |
# User input for text, language, and voice settings | |
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...") | |
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j']) | |
voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', | |
'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', | |
'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', | |
'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', | |
'ef_dora', | |
'em_alex', 'em_santa', | |
'ff_siwis', | |
'hf_alpha', 'hf_beta', | |
'hm_omega', 'hm_psi', | |
'if_sara', | |
'im_nicola', | |
'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro', | |
'jm_kumo', | |
'pf_dora', | |
'pm_alex', 'pm_santa', | |
'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', | |
'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang']) | |
speed = st.slider("Speech Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1) | |
background_music = st.selectbox("Add Background Music", ['None', 'Calm', 'Energetic', 'Focus', 'Nature']) | |
# Initialize the TTS pipeline with user-selected language | |
pipeline = KPipeline(lang_code=lang_code) | |
# Function to generate audio with background music | |
def generate_audio_with_music(text, lang_code, voice, speed, music_type): | |
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+') | |
for i, (gs, ps, audio) in enumerate(generator): | |
audio_data = audio | |
# Simulate adding background music (using silence as placeholder) | |
if music_type != 'None': | |
st.write(f"Adding {music_type} background music...") | |
# Add some background music (could be a placeholder for real music loading) | |
music = io.BytesIO() | |
music.write(b'\0' * 500000) # Placeholder: Replace with actual music blending logic | |
# Save audio to in-memory buffer | |
buffer = io.BytesIO() | |
sf.write(buffer, audio_data, 24000, format='WAV') | |
buffer.seek(0) | |
return buffer | |
# Generate Audio Button | |
if st.button('Generate Audio'): | |
st.write("Generating speech with background music...") | |
audio_buffer = generate_audio_with_music(input_text, lang_code, voice, speed, background_music) | |
# Display audio player in the app | |
st.audio(audio_buffer, format='audio/wav') | |
# Option to download the audio file | |
st.download_button( | |
label="Download Audio", | |
data=audio_buffer, | |
file_name="generated_speech_with_music.wav", | |
mime="audio/wav" | |
) | |
# Speech Progress Feedback: Add voice feedback or something fun | |
st.write("Would you like to hear the audio again?") | |
if st.button('Replay Audio'): | |
st.audio(audio_buffer, format='audio/wav') | |