Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,7 +3,6 @@ import openai
|
|
3 |
from kokoro import KPipeline
|
4 |
import soundfile as sf
|
5 |
import io
|
6 |
-
import time
|
7 |
|
8 |
# Streamlit App UI Setup
|
9 |
st.title("Text-to-Speech Translator with Kokoro")
|
@@ -59,6 +58,71 @@ st.sidebar.markdown("""
|
|
59 |
- Enjoy exploring different languages, voices, and speeds with the text-to-speech conversion!
|
60 |
""")
|
61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
# User input for text, language, and voice settings
|
63 |
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
|
64 |
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
|
@@ -116,57 +180,51 @@ def translate_to_english(api_key, text, lang_code):
|
|
116 |
# Generate Audio function
|
117 |
def generate_audio(text, lang_code, voice, speed):
|
118 |
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
|
119 |
-
audio_data = None
|
120 |
for i, (gs, ps, audio) in enumerate(generator):
|
121 |
audio_data = audio
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
return buffer
|
129 |
|
130 |
# Generate and display the audio file
|
131 |
if st.button('Generate Audio'):
|
132 |
|
133 |
-
#
|
134 |
-
with st.spinner(
|
135 |
-
|
136 |
-
time.sleep(0.5) # Simulate work being done, you can adjust timing here
|
137 |
-
st.spinner(f"Generating audio... {i}%")
|
138 |
-
|
139 |
-
# Generate audio for the original text after the spinner
|
140 |
st.write("Generating speech for the original text...")
|
141 |
audio_buffer = generate_audio(input_text, lang_code, voice, speed)
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
st.download_button(
|
148 |
-
label="Download Audio (Original Text)",
|
149 |
-
data=audio_buffer,
|
150 |
-
file_name="generated_speech_original.wav",
|
151 |
-
mime="audio/wav"
|
152 |
-
)
|
153 |
-
|
154 |
-
# Check if OpenAI API Key is provided for translation and English audio generation
|
155 |
-
if openai_api_key:
|
156 |
-
# Translate the input text to English using OpenAI
|
157 |
-
translated_text = translate_to_english(openai_api_key, input_text, lang_code)
|
158 |
-
|
159 |
-
# Generate audio for the translated English text
|
160 |
-
translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed)
|
161 |
-
|
162 |
-
# Display Audio for the translated text
|
163 |
-
st.write(f"Translated Text: {translated_text}")
|
164 |
-
st.audio(translated_audio_buffer, format='audio/wav')
|
165 |
-
|
166 |
-
# Optional: Save the generated audio file for download (Translated Text)
|
167 |
st.download_button(
|
168 |
-
label="Download Audio (
|
169 |
-
data=
|
170 |
-
file_name="
|
171 |
mime="audio/wav"
|
172 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
from kokoro import KPipeline
|
4 |
import soundfile as sf
|
5 |
import io
|
|
|
6 |
|
7 |
# Streamlit App UI Setup
|
8 |
st.title("Text-to-Speech Translator with Kokoro")
|
|
|
58 |
- Enjoy exploring different languages, voices, and speeds with the text-to-speech conversion!
|
59 |
""")
|
60 |
|
61 |
+
# Custom HTML and CSS for Spinner
|
62 |
+
st.markdown("""
|
63 |
+
<style>
|
64 |
+
.lds-ellipsis,
|
65 |
+
.lds-ellipsis div {
|
66 |
+
box-sizing: border-box;
|
67 |
+
}
|
68 |
+
.lds-ellipsis {
|
69 |
+
display: inline-block;
|
70 |
+
position: relative;
|
71 |
+
width: 80px;
|
72 |
+
height: 80px;
|
73 |
+
}
|
74 |
+
.lds-ellipsis div {
|
75 |
+
position: absolute;
|
76 |
+
top: 33.33333px;
|
77 |
+
width: 13.33333px;
|
78 |
+
height: 13.33333px;
|
79 |
+
border-radius: 50%;
|
80 |
+
background: currentColor;
|
81 |
+
animation-timing-function: cubic-bezier(0, 1, 1, 0);
|
82 |
+
}
|
83 |
+
.lds-ellipsis div:nth-child(1) {
|
84 |
+
left: 8px;
|
85 |
+
animation: lds-ellipsis1 0.6s infinite;
|
86 |
+
}
|
87 |
+
.lds-ellipsis div:nth-child(2) {
|
88 |
+
left: 8px;
|
89 |
+
animation: lds-ellipsis2 0.6s infinite;
|
90 |
+
}
|
91 |
+
.lds-ellipsis div:nth-child(3) {
|
92 |
+
left: 32px;
|
93 |
+
animation: lds-ellipsis2 0.6s infinite;
|
94 |
+
}
|
95 |
+
.lds-ellipsis div:nth-child(4) {
|
96 |
+
left: 56px;
|
97 |
+
animation: lds-ellipsis3 0.6s infinite;
|
98 |
+
}
|
99 |
+
@keyframes lds-ellipsis1 {
|
100 |
+
0% {
|
101 |
+
transform: scale(0);
|
102 |
+
}
|
103 |
+
100% {
|
104 |
+
transform: scale(1);
|
105 |
+
}
|
106 |
+
}
|
107 |
+
@keyframes lds-ellipsis3 {
|
108 |
+
0% {
|
109 |
+
transform: scale(1);
|
110 |
+
}
|
111 |
+
100% {
|
112 |
+
transform: scale(0);
|
113 |
+
}
|
114 |
+
}
|
115 |
+
@keyframes lds-ellipsis2 {
|
116 |
+
0% {
|
117 |
+
transform: translate(0, 0);
|
118 |
+
}
|
119 |
+
100% {
|
120 |
+
transform: translate(24px, 0);
|
121 |
+
}
|
122 |
+
}
|
123 |
+
</style>
|
124 |
+
""", unsafe_allow_html=True)
|
125 |
+
|
126 |
# User input for text, language, and voice settings
|
127 |
input_text = st.text_area("Enter your text here", placeholder="The sky above the port was the color of television...")
|
128 |
lang_code = st.selectbox("Select Language", ['a', 'b', 'e', 'f', 'h', 'i', 'p', 'z', 'j'])
|
|
|
180 |
# Generate Audio function
|
181 |
def generate_audio(text, lang_code, voice, speed):
|
182 |
generator = pipeline(text, voice=voice, speed=speed, split_pattern=r'\n+')
|
|
|
183 |
for i, (gs, ps, audio) in enumerate(generator):
|
184 |
audio_data = audio
|
185 |
+
# Save audio to in-memory buffer
|
186 |
+
buffer = io.BytesIO()
|
187 |
+
# Explicitly specify format as WAV
|
188 |
+
sf.write(buffer, audio_data, 24000, format='WAV') # Add 'format="WAV"'
|
189 |
+
buffer.seek(0)
|
190 |
+
return buffer
|
|
|
191 |
|
192 |
# Generate and display the audio file
|
193 |
if st.button('Generate Audio'):
|
194 |
|
195 |
+
# Display spinner during audio generation
|
196 |
+
with st.spinner('Generating audio...'):
|
197 |
+
# Generate audio for the original text
|
|
|
|
|
|
|
|
|
198 |
st.write("Generating speech for the original text...")
|
199 |
audio_buffer = generate_audio(input_text, lang_code, voice, speed)
|
200 |
+
|
201 |
+
# Display Audio player for the original language
|
202 |
+
st.audio(audio_buffer, format='audio/wav')
|
203 |
+
|
204 |
+
# Optional: Save the generated audio file for download (Original Text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
st.download_button(
|
206 |
+
label="Download Audio (Original Text)",
|
207 |
+
data=audio_buffer,
|
208 |
+
file_name="generated_speech_original.wav",
|
209 |
mime="audio/wav"
|
210 |
)
|
211 |
+
|
212 |
+
# Check if OpenAI API Key is provided for translation and English audio generation
|
213 |
+
if openai_api_key:
|
214 |
+
# Translate the input text to English using OpenAI
|
215 |
+
translated_text = translate_to_english(openai_api_key, input_text, lang_code)
|
216 |
+
|
217 |
+
# Generate audio for the translated English text
|
218 |
+
translated_audio_buffer = generate_audio(translated_text, 'a', voice, speed)
|
219 |
+
|
220 |
+
# Display Audio for the translated text
|
221 |
+
st.write(f"Translated Text: {translated_text}")
|
222 |
+
st.audio(translated_audio_buffer, format='audio/wav')
|
223 |
+
|
224 |
+
# Optional: Save the generated audio file for download (Translated Text)
|
225 |
+
st.download_button(
|
226 |
+
label="Download Audio (Translated to English)",
|
227 |
+
data=translated_audio_buffer,
|
228 |
+
file_name="generated_speech_translated.wav",
|
229 |
+
mime="audio/wav"
|
230 |
+
)
|