Spaces:

shukdevdatta123
/

Kokoro-TTS

Paused

App Files Files Community

Kokoro-TTS / app.py

shukdevdatta123

Update app.py

bf626b9 verified 10 months ago

raw

history blame

4.93 kB

	import streamlit as st
	from kokoro import KPipeline
	import soundfile as sf
	import io
	import os
	import time

	# Install espeak-ng if not installed
	if not os.system("which espeak-ng"):
	st.text("espeak-ng already installed.")
	else:
	os.system("apt-get -qq -y install espeak-ng")
	st.text("Installing espeak-ng...")

	# Streamlit App UI Setup
	st.title("Interactive Text-to-Speech with Kokoro")

	# Expander section for language samples
	with st.expander("Sample Prompt!"):
	st.markdown("""
	- My name is Shukdev. (In English)
	- Mi nombre es Shukdev. (In Spanish)
	- Je m'appelle Choukdev. (In French)
	- मेरा नाम शुकदेव है. (In Hindi)
	- Il mio nome è Shukdev. (In Italy)
	- Meu nome é Sukhdev. (In Portuguese, Brazil)
	- 我叫苏赫德夫。(In Chinese)
	- 私の名前はスクデフです。(In Japanese)
	""")

	# Sidebar Instructions and Configuration
	st.sidebar.header("Configuration & Instructions")

	st.sidebar.markdown("""
	### How to Use the Text-to-Speech App:
	1. Enter Text: Input any text that you want to convert to speech.
	2. Select Language: Choose the language of the text.
	3. Select Voice: Choose the voice style.
	4. Select Speed: Use the slider to change the speech speed.
	5. Add Background Music: Optional background music for the speech (choose genre).
	6. Generate Speech: Click on Generate Audio to create speech.
	7. Download: Download the generated audio file.
	""")

	# User input for text, language, and voice settings
	input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
	lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
	voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky',
	'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
	'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily',
	'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis',
	'ef_dora',
	'em_alex', 'em_santa',
	'ff_siwis',
	'hf_alpha', 'hf_beta',
	'hm_omega', 'hm_psi',
	'if_sara',
	'im_nicola',
	'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro',
	'jm_kumo',
	'pf_dora',
	'pm_alex', 'pm_santa',
	'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi',
	'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'])
	speed = st.slider("Speech Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)

	background_music = st.selectbox("Add Background Music", ['None', 'Calm', 'Energetic', 'Focus', 'Nature'])

	# Initialize the TTS pipeline with user-selected language
	pipeline = KPipeline(lang_code=lang_code)

	# Function to generate audio with background music
	def generate_audio_with_music(text, lang_code, voice, speed, music_type):
	generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
	for i, (gs, ps, audio) in enumerate(generator):
	audio_data = audio

	# Simulate adding background music (using silence as placeholder)
	if music_type != 'None':
	st.write(f"Adding {music_type} background music...")
	# Add some background music (could be a placeholder for real music loading)
	music = io.BytesIO()
	music.write(b'\0' * 500000) # Placeholder: Replace with actual music blending logic

	# Save audio to in-memory buffer
	buffer = io.BytesIO()
	sf.write(buffer, audio_data, 24000, format='WAV')
	buffer.seek(0)
	return buffer

	# Generate Audio Button
	if st.button('Generate Audio'):
	st.write("Generating speech with background music...")
	audio_buffer = generate_audio_with_music(input_text, lang_code, voice, speed, background_music)

	# Display audio player in the app
	st.audio(audio_buffer, format='audio/wav')

	# Option to download the audio file
	st.download_button(
	label="Download Audio",
	data=audio_buffer,
	file_name="generated_speech_with_music.wav",
	mime="audio/wav"
	)

	# Speech Progress Feedback: Add voice feedback or something fun
	st.write("Would you like to hear the audio again?")
	if st.button('Replay Audio'):
	st.audio(audio_buffer, format='audio/wav')