import streamlit as st
from kokoro import KPipeline
import soundfile as sf
import io
import os
import time

# Install espeak-ng if not installed
if not os.system("which espeak-ng"):
    st.text("espeak-ng already installed.")
else:
    os.system("apt-get -qq -y install espeak-ng")
    st.text("Installing espeak-ng...")

# Streamlit App UI Setup
st.title("Interactive Text-to-Speech with Kokoro")

# Expander section for language samples
with st.expander("Sample Prompt!"):
    st.markdown("""
    - My name is Shukdev. (In English)
    - Mi nombre es Shukdev. (In Spanish)
    - Je m'appelle Choukdev. (In French)
    - मेरा नाम शुकदेव है. (In Hindi)
    - Il mio nome è Shukdev. (In Italy)
    - Meu nome é Sukhdev. (In Portuguese, Brazil)
    - 我叫苏赫德夫。(In Chinese)
    - 私の名前はスクデフです。(In Japanese)
    """)
    
# Sidebar Instructions and Configuration
st.sidebar.header("Configuration & Instructions")

st.sidebar.markdown("""
### How to Use the Text-to-Speech App:
1. **Enter Text**: Input any text that you want to convert to speech.
2. **Select Language**: Choose the language of the text.
3. **Select Voice**: Choose the voice style.
4. **Select Speed**: Use the slider to change the speech speed.
5. **Add Background Music**: Optional background music for the speech (choose genre).
6. **Generate Speech**: Click on **Generate Audio** to create speech.
7. **Download**: Download the generated audio file.
""")

# User input for text, language, and voice settings
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky', 
                                     'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa', 
                                     'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily', 
                                     'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis', 
                                     'ef_dora', 
                                     'em_alex', 'em_santa', 
                                     'ff_siwis', 
                                     'hf_alpha', 'hf_beta', 
                                     'hm_omega', 'hm_psi', 
                                     'if_sara', 
                                     'im_nicola', 
                                     'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro', 
                                     'jm_kumo', 
                                     'pf_dora', 
                                     'pm_alex', 'pm_santa', 
                                     'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi', 
                                     'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'])
speed = st.slider("Speech Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

background_music = st.selectbox("Add Background Music", ['None', 'Calm', 'Energetic', 'Focus', 'Nature'])

# Initialize the TTS pipeline with user-selected language
pipeline = KPipeline(lang_code=lang_code)

# Function to generate audio with background music
def generate_audio_with_music(text, lang_code, voice, speed, music_type):
    generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
    for i, (gs, ps, audio) in enumerate(generator):
        audio_data = audio
        
        # Simulate adding background music (using silence as placeholder)
        if music_type != 'None':
            st.write(f"Adding {music_type} background music...")
            # Add some background music (could be a placeholder for real music loading)
            music = io.BytesIO()
            music.write(b'\0' * 500000)  # Placeholder: Replace with actual music blending logic

        # Save audio to in-memory buffer
        buffer = io.BytesIO()
        sf.write(buffer, audio_data, 24000, format='WAV')
        buffer.seek(0)
        return buffer

# Generate Audio Button
if st.button('Generate Audio'):
    st.write("Generating speech with background music...")
    audio_buffer = generate_audio_with_music(input_text, lang_code, voice, speed, background_music)
    
    # Display audio player in the app
    st.audio(audio_buffer, format='audio/wav')

    # Option to download the audio file
    st.download_button(
        label="Download Audio",
        data=audio_buffer,
        file_name="generated_speech_with_music.wav",
        mime="audio/wav"
    )

    # Speech Progress Feedback: Add voice feedback or something fun
    st.write("Would you like to hear the audio again?")
    if st.button('Replay Audio'):
        st.audio(audio_buffer, format='audio/wav')