Spaces:
Running
Running
File size: 4,934 Bytes
620ebff 666cd48 bf626b9 666cd48 620ebff bf626b9 6d9e4e4 2eb0f72 6d9e4e4 bf626b9 620ebff bf626b9 620ebff 2e0b1fa 9e6c87a de8d4a5 bf626b9 620ebff bf626b9 620ebff bf626b9 620ebff bf626b9 620ebff bf626b9 620ebff bf626b9 620ebff bf626b9 620ebff bf626b9 620ebff bf626b9 620ebff bf626b9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import streamlit as st
from kokoro import KPipeline
import soundfile as sf
import io
import os
import time
# Install espeak-ng if not installed
if not os.system("which espeak-ng"):
st.text("espeak-ng already installed.")
else:
os.system("apt-get -qq -y install espeak-ng")
st.text("Installing espeak-ng...")
# Streamlit App UI Setup
st.title("Interactive Text-to-Speech with Kokoro")
# Expander section for language samples
with st.expander("Sample Prompt!"):
st.markdown("""
- My name is Shukdev. (In English)
- Mi nombre es Shukdev. (In Spanish)
- Je m'appelle Choukdev. (In French)
- मेरा नाम शुकदेव है. (In Hindi)
- Il mio nome è Shukdev. (In Italy)
- Meu nome é Sukhdev. (In Portuguese, Brazil)
- 我叫苏赫德夫。(In Chinese)
- 私の名前はスクデフです。(In Japanese)
""")
# Sidebar Instructions and Configuration
st.sidebar.header("Configuration & Instructions")
st.sidebar.markdown("""
### How to Use the Text-to-Speech App:
1. **Enter Text**: Input any text that you want to convert to speech.
2. **Select Language**: Choose the language of the text.
3. **Select Voice**: Choose the voice style.
4. **Select Speed**: Use the slider to change the speech speed.
5. **Add Background Music**: Optional background music for the speech (choose genre).
6. **Generate Speech**: Click on **Generate Audio** to create speech.
7. **Download**: Download the generated audio file.
""")
# User input for text, language, and voice settings
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky',
'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily',
'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis',
'ef_dora',
'em_alex', 'em_santa',
'ff_siwis',
'hf_alpha', 'hf_beta',
'hm_omega', 'hm_psi',
'if_sara',
'im_nicola',
'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro',
'jm_kumo',
'pf_dora',
'pm_alex', 'pm_santa',
'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi',
'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'])
speed = st.slider("Speech Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
background_music = st.selectbox("Add Background Music", ['None', 'Calm', 'Energetic', 'Focus', 'Nature'])
# Initialize the TTS pipeline with user-selected language
pipeline = KPipeline(lang_code=lang_code)
# Function to generate audio with background music
def generate_audio_with_music(text, lang_code, voice, speed, music_type):
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
for i, (gs, ps, audio) in enumerate(generator):
audio_data = audio
# Simulate adding background music (using silence as placeholder)
if music_type != 'None':
st.write(f"Adding {music_type} background music...")
# Add some background music (could be a placeholder for real music loading)
music = io.BytesIO()
music.write(b'\0' * 500000) # Placeholder: Replace with actual music blending logic
# Save audio to in-memory buffer
buffer = io.BytesIO()
sf.write(buffer, audio_data, 24000, format='WAV')
buffer.seek(0)
return buffer
# Generate Audio Button
if st.button('Generate Audio'):
st.write("Generating speech with background music...")
audio_buffer = generate_audio_with_music(input_text, lang_code, voice, speed, background_music)
# Display audio player in the app
st.audio(audio_buffer, format='audio/wav')
# Option to download the audio file
st.download_button(
label="Download Audio",
data=audio_buffer,
file_name="generated_speech_with_music.wav",
mime="audio/wav"
)
# Speech Progress Feedback: Add voice feedback or something fun
st.write("Would you like to hear the audio again?")
if st.button('Replay Audio'):
st.audio(audio_buffer, format='audio/wav')
|