shukdevdatta123 commited on
Commit
63aca15
·
verified ·
1 Parent(s): bf626b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -52
app.py CHANGED
@@ -3,7 +3,7 @@ from kokoro import KPipeline
3
  import soundfile as sf
4
  import io
5
  import os
6
- import time
7
 
8
  # Install espeak-ng if not installed
9
  if not os.system("which espeak-ng"):
@@ -13,9 +13,9 @@ else:
13
  st.text("Installing espeak-ng...")
14
 
15
  # Streamlit App UI Setup
16
- st.title("Interactive Text-to-Speech with Kokoro")
17
 
18
- # Expander section for language samples
19
  with st.expander("Sample Prompt!"):
20
  st.markdown("""
21
  - My name is Shukdev. (In English)
@@ -27,84 +27,125 @@ with st.expander("Sample Prompt!"):
27
  - 我叫苏赫德夫。(In Chinese)
28
  - 私の名前はスクデフです。(In Japanese)
29
  """)
30
-
31
- # Sidebar Instructions and Configuration
32
  st.sidebar.header("Configuration & Instructions")
33
 
 
34
  st.sidebar.markdown("""
35
  ### How to Use the Text-to-Speech App:
36
- 1. **Enter Text**: Input any text that you want to convert to speech.
37
- 2. **Select Language**: Choose the language of the text.
38
- 3. **Select Voice**: Choose the voice style.
39
- 4. **Select Speed**: Use the slider to change the speech speed.
40
- 5. **Add Background Music**: Optional background music for the speech (choose genre).
41
- 6. **Generate Speech**: Click on **Generate Audio** to create speech.
42
- 7. **Download**: Download the generated audio file.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  """)
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # User input for text, language, and voice settings
46
  input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
47
- lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky',
49
- 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
50
- 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily',
51
- 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis',
52
- 'ef_dora',
53
- 'em_alex', 'em_santa',
54
- 'ff_siwis',
55
- 'hf_alpha', 'hf_beta',
56
- 'hm_omega', 'hm_psi',
57
- 'if_sara',
58
- 'im_nicola',
59
- 'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro',
60
- 'jm_kumo',
61
- 'pf_dora',
62
- 'pm_alex', 'pm_santa',
63
- 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi',
64
- 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'])
65
- speed = st.slider("Speech Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
66
-
67
- background_music = st.selectbox("Add Background Music", ['None', 'Calm', 'Energetic', 'Focus', 'Nature'])
68
 
69
  # Initialize the TTS pipeline with user-selected language
70
  pipeline = KPipeline(lang_code=lang_code)
71
 
72
- # Function to generate audio with background music
73
- def generate_audio_with_music(text, lang_code, voice, speed, music_type):
74
  generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
75
  for i, (gs, ps, audio) in enumerate(generator):
76
  audio_data = audio
77
-
78
- # Simulate adding background music (using silence as placeholder)
79
- if music_type != 'None':
80
- st.write(f"Adding {music_type} background music...")
81
- # Add some background music (could be a placeholder for real music loading)
82
- music = io.BytesIO()
83
- music.write(b'\0' * 500000) # Placeholder: Replace with actual music blending logic
84
-
85
  # Save audio to in-memory buffer
86
  buffer = io.BytesIO()
87
- sf.write(buffer, audio_data, 24000, format='WAV')
 
88
  buffer.seek(0)
89
  return buffer
90
 
91
- # Generate Audio Button
92
  if st.button('Generate Audio'):
93
- st.write("Generating speech with background music...")
94
- audio_buffer = generate_audio_with_music(input_text, lang_code, voice, speed, background_music)
95
 
96
- # Display audio player in the app
97
  st.audio(audio_buffer, format='audio/wav')
98
 
99
- # Option to download the audio file
100
  st.download_button(
101
  label="Download Audio",
102
  data=audio_buffer,
103
- file_name="generated_speech_with_music.wav",
104
  mime="audio/wav"
105
  )
106
 
107
- # Speech Progress Feedback: Add voice feedback or something fun
108
- st.write("Would you like to hear the audio again?")
109
- if st.button('Replay Audio'):
 
110
  st.audio(audio_buffer, format='audio/wav')
 
3
  import soundfile as sf
4
  import io
5
  import os
6
+ from langdetect import detect # Language detection library
7
 
8
  # Install espeak-ng if not installed
9
  if not os.system("which espeak-ng"):
 
13
  st.text("Installing espeak-ng...")
14
 
15
  # Streamlit App UI Setup
16
+ st.title("Text-to-Speech with Kokoro")
17
 
18
+ # Expander section to display information in multiple languages
19
  with st.expander("Sample Prompt!"):
20
  st.markdown("""
21
  - My name is Shukdev. (In English)
 
27
  - 我叫苏赫德夫。(In Chinese)
28
  - 私の名前はスクデフです。(In Japanese)
29
  """)
30
+
 
31
  st.sidebar.header("Configuration & Instructions")
32
 
33
+ # Sidebar Instructions
34
  st.sidebar.markdown("""
35
  ### How to Use the Text-to-Speech App:
36
+ 1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
37
+
38
+ 2. **Select Language**:
39
+ - Choose the language of the text you are entering. Available options include:
40
+ - 🇺🇸 American English (`a`)
41
+ - 🇬🇧 British English (`b`)
42
+ - 🇪🇸 Spanish (`e`)
43
+ - 🇫🇷 French (`f`)
44
+ - 🇮🇳 Hindi (`h`)
45
+ - 🇮🇹 Italian (`i`)
46
+ - 🇧🇷 Brazilian Portuguese (`p`)
47
+ - 🇨🇳 Mandarin Chinese (`z`)
48
+ - 🇯🇵 Japanese (`j`)
49
+
50
+ 3. **Select Voice**:
51
+ - Choose the voice style for the speech. You can pick different voices based on tone and gender, such as `af_heart`, `af_joy`, etc.
52
+
53
+ 4. **Adjust Speed**:
54
+ - Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
55
+ 5. **Generate Speech**:
56
+ - After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
57
+
58
+ 6. **Download**:
59
+ - Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
60
+ Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
61
  """)
62
 
63
+ st.sidebar.markdown("""
64
+ ### Courtesy: [Kokoro](https://huggingface.co/hexgrad/Kokoro-82M?fbclid=IwY2xjawIKqzxleHRuA2FlbQIxMAABHaf9GldgYOzXktNuoRtNKqd-aL7r-S7zPGyC8ttYOiG2zYfQqLyV4Qm75A_aem_0wKLC2C87ZZ2F04WjPJbtA)
65
+ """)
66
+
67
+ # Language Detection Function
68
+ def detect_language(text):
69
+ try:
70
+ lang = detect(text)
71
+ return lang
72
+ except Exception as e:
73
+ st.error("Error detecting language: " + str(e))
74
+ return None
75
+
76
  # User input for text, language, and voice settings
77
  input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
78
+ auto_detect_lang = detect_language(input_text)
79
+
80
+ # Set detected language to the selectbox (if detected)
81
+ if auto_detect_lang:
82
+ lang_map = {
83
+ 'en': 'a', # American English
84
+ 'es': 'e', # Spanish
85
+ 'fr': 'f', # French
86
+ 'hi': 'h', # Hindi
87
+ 'it': 'i', # Italian
88
+ 'pt': 'p', # Portuguese
89
+ 'zh': 'z', # Chinese
90
+ 'ja': 'j' # Japanese
91
+ }
92
+ lang_code = lang_map.get(auto_detect_lang, 'a') # Default to English if not in map
93
+ else:
94
+ lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
95
+
96
  voice = st.selectbox("Select Voice", ['af_alloy', 'af_aoede', 'af_bella', 'af_heart', 'af_jessica', 'af_kore', 'af_nicole', 'af_nova', 'af_river', 'af_sarah', 'af_sky',
97
+ 'am_adam', 'am_echo', 'am_eric', 'am_fenrir', 'am_liam', 'am_michael', 'am_onyx', 'am_puck', 'am_santa',
98
+ 'bf_alice', 'bf_emma', 'bf_isabella', 'bf_lily',
99
+ 'bm_daniel', 'bm_fable', 'bm_george', 'bm_lewis',
100
+ 'ef_dora',
101
+ 'em_alex', 'em_santa',
102
+ 'ff_siwis',
103
+ 'hf_alpha', 'hf_beta',
104
+ 'hm_omega', 'hm_psi',
105
+ 'if_sara',
106
+ 'im_nicola',
107
+ 'jf_alpha', 'jf_gongitsune', 'jf_nezumi', 'jf_tebukuro',
108
+ 'jm_kumo',
109
+ 'pf_dora',
110
+ 'pm_alex', 'pm_santa',
111
+ 'zf_xiaobei', 'zf_xiaoni', 'zf_xiaoxiao', 'zf_xiaoyi',
112
+ 'zm_yunjian', 'zm_yunxi', 'zm_yunxia', 'zm_yunyang'])
113
+
114
+ speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
 
115
 
116
  # Initialize the TTS pipeline with user-selected language
117
  pipeline = KPipeline(lang_code=lang_code)
118
 
119
+ # Generate Audio function
120
+ def generate_audio(text, lang_code, voice, speed):
121
  generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
122
  for i, (gs, ps, audio) in enumerate(generator):
123
  audio_data = audio
 
 
 
 
 
 
 
 
124
  # Save audio to in-memory buffer
125
  buffer = io.BytesIO()
126
+ # Explicitly specify format as WAV
127
+ sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"'
128
  buffer.seek(0)
129
  return buffer
130
 
131
+ # Generate and display the audio file
132
  if st.button('Generate Audio'):
133
+ st.write("Generating speech...")
134
+ audio_buffer = generate_audio(input_text, lang_code, voice, speed)
135
 
136
+ # Display Audio player in the app
137
  st.audio(audio_buffer, format='audio/wav')
138
 
139
+ # Optional: Save the generated audio file for download
140
  st.download_button(
141
  label="Download Audio",
142
  data=audio_buffer,
143
+ file_name="generated_speech.wav",
144
  mime="audio/wav"
145
  )
146
 
147
+ # Interactive Voice Feedback
148
+ feedback = st.radio("Do you want to hear it again?", ('No', 'Yes'))
149
+ if feedback == 'Yes':
150
+ st.write("Replaying the generated speech...")
151
  st.audio(audio_buffer, format='audio/wav')