Spaces:
Running
Running
File size: 6,588 Bytes
620ebff 9934dbc 620ebff f15a385 9934dbc 63aca15 6d9e4e4 dded7af 6d9e4e4 474d5c4 620ebff f15a385 63aca15 f15a385 63aca15 f15a385 63aca15 f15a385 63aca15 f15a385 620ebff 2e0b1fa 6ad599b de8d4a5 63aca15 6ad599b 63aca15 620ebff dded7af 62bfbfb 9934dbc 02b5b4a 9934dbc a8004f6 dded7af 9934dbc dded7af 9934dbc 62bfbfb 63aca15 620ebff 63aca15 620ebff 63aca15 dded7af 474d5c4 63aca15 620ebff 474d5c4 620ebff 474d5c4 620ebff 2f0bafd 620ebff 2f0bafd dded7af 2f0bafd dded7af 2f0bafd dded7af 2f0bafd dded7af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import streamlit as st
import openai
from kokoro import KPipeline
import soundfile as sf
import io
# Streamlit App UI Setup
st.title("Text-to-Speech Translator with Kokoro")
# Expander section to display information in multiple languages
with st.expander("Sample Prompt!"):
st.markdown("""
- My name is Shukdev. (In English)
- Mi nombre es Shukdev. (In Spanish)
- Je m'appelle Choukdev. (In French)
- मेरा नाम शुकदेव है. (In Hindi)
- Il mio nome è Shukdev. (In Italy)
- Meu nome é Sukhdev. (In Portuguese, Brazil)
- 我叫苏赫德夫。(In Chinese)
- 私の名前はスクデフです。(In Japanese)
""")
st.sidebar.header("Configuration & Instructions")
st.sidebar.markdown("""
### How to Use the Text-to-Speech App:
1. **Enter Text**:
- Type or paste the text you want to convert to speech in the main text area.
2. **Select Language**:
- Choose the language of the input text. The available language options include:
- 🇺🇸 **English** (American English)
- 🇬🇧 **British English**
- 🇪🇸 **Spanish**
- 🇫🇷 **French**
- 🇮🇳 **Hindi**
- 🇮🇹 **Italian**
- 🇧🇷 **Portuguese** (Brazilian)
- 🇨🇳 **Chinese** (Mandarin)
- 🇯🇵 **Japanese**
3. **Select Voice**:
- Choose the voice you want for the speech. There are multiple voice styles based on tone and gender (e.g., `af_heart`, `af_joy`, etc.).
4. **Adjust Speech Speed**:
- Use the slider to adjust how fast the speech will be generated. The speed can be set from `0.5x` to `2.0x`, with `1.0x` being the default normal speed.
5. **Generate Speech**:
- Once you've selected the text, language, voice, and speed, click the **"Generate Audio"** button. The app will process the text and generate the speech.
6. **Download Audio**:
- After the audio is generated, you can play it directly within the app or download it as a `.wav` file by clicking the **"Download Audio"** button.
### Additional Features:
- **Text Translation**:
- The app can automatically translate the text to English before generating audio. After the translation, you will hear the audio in English with your chosen voice.
Enjoy exploring different languages, voices, and speeds with the text-to-speech conversion!
""")
# User input for text, language, and voice settings
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky',
'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily',
'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis',
'ef_dora',
'em_alex', 'em_santa',
'ff_siwis',
'hf_alpha', 'hf_beta',
'hm_omega', 'hm_psi',
'if_sara',
'im_nicola',
'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro',
'jm_kumo',
'pf_dora',
'pm_alex', 'pm_santa',
'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi',
'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang']
) # Change voice options as per model
speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
# Initialize the TTS pipeline with user-selected language
pipeline = KPipeline(lang_code=lang_code)
# Function to get the OpenAI API key from the user (optional for translation)
openai_api_key = st.text_input("Enter your OpenAI API Key (Optional for Translation)", type="password")
# Function to translate text to English using OpenAI's Chat API
def translate_to_english(api_key, text, lang_code):
openai.api_key = api_key
try:
# Construct the prompt for translation
prompt = f"Translate the following text from {lang_code} to English: \n\n{text}"
response = openai.ChatCompletion.create(
model="gpt-4", # Using ChatGPT model for translation
messages=[{"role": "system", "content": "You are a helpful assistant that translates text."},
{"role": "user", "content": prompt}]
)
# Extract translated text from response
translated_text = response['choices'][0]['message']['content'].strip()
return translated_text
except Exception as e:
st.error(f"Error occurred during translation: {e}")
return text # Fallback to original text in case of an error
# Generate Audio function
def generate_audio(text, lang_code, voice, speed):
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
for i, (gs, ps, audio) in enumerate(generator):
audio_data = audio
# Save audio to in-memory buffer
buffer = io.BytesIO()
# Explicitly specify format as WAV
sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"'
buffer.seek(0)
return buffer
# Generate and display the audio file
if st.button('Generate Audio'):
# Generate audio for the original text
st.write("Generating speech for the original text...")
audio_buffer = generate_audio(input_text, lang_code, voice, speed)
# Display Audio player for the original language
st.audio(audio_buffer, format='audio/wav')
# Optional: Save the generated audio file for download (Original Text)
st.download_button(
label="Download Audio (Original Text)",
data=audio_buffer,
file_name="generated_speech_original.wav",
mime="audio/wav"
)
# Check if OpenAI API Key is provided for translation and English audio generation
if openai_api_key:
# Translate the input text to English using OpenAI
translated_text = translate_to_english(openai_api_key, input_text, lang_code)
# Generate audio for the translated English text
translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed)
# Display Audio for the translated text
st.write(f"Translated Text: {translated_text}")
st.audio(translated_audio_buffer, format='audio/wav')
# Optional: Save the generated audio file for download (Translated Text)
st.download_button(
label="Download Audio (Translated to English)",
data=translated_audio_buffer,
file_name="generated_speech_translated.wav",
mime="audio/wav"
)
|