Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,8 @@ from kokoro import KPipeline
|
|
3 |
import soundfile as sf
|
4 |
import io
|
5 |
import os
|
|
|
|
|
6 |
|
7 |
# Install espeak-ng if not installed
|
8 |
if not os.system("which espeak-ng"):
|
@@ -18,7 +20,6 @@ st.sidebar.header("Configuration & Instructions")
|
|
18 |
# Sidebar Instructions
|
19 |
st.sidebar.markdown("""
|
20 |
### How to Use the Text-to-Speech App:
|
21 |
-
|
22 |
1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
|
23 |
|
24 |
2. **Select Language**:
|
@@ -38,13 +39,11 @@ st.sidebar.markdown("""
|
|
38 |
|
39 |
4. **Adjust Speed**:
|
40 |
- Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
|
41 |
-
|
42 |
5. **Generate Speech**:
|
43 |
- After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
|
44 |
|
45 |
6. **Download**:
|
46 |
- Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
|
47 |
-
|
48 |
Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
|
49 |
""")
|
50 |
|
@@ -86,6 +85,26 @@ def generate_audio(text, lang_code, voice, speed):
|
|
86 |
buffer.seek(0)
|
87 |
return buffer
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
# Generate and display the audio file
|
90 |
if st.button('Generate Audio'):
|
91 |
st.write("Generating speech...")
|
@@ -94,6 +113,16 @@ if st.button('Generate Audio'):
|
|
94 |
# Display Audio player in the app
|
95 |
st.audio(audio_buffer, format='audio/wav')
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
# Optional: Save the generated audio file for download
|
98 |
st.download_button(
|
99 |
label="Download Audio",
|
|
|
3 |
import soundfile as sf
|
4 |
import io
|
5 |
import os
|
6 |
+
import speech_recognition as sr
|
7 |
+
import romkan # For Japanese Romanization
|
8 |
|
9 |
# Install espeak-ng if not installed
|
10 |
if not os.system("which espeak-ng"):
|
|
|
20 |
# Sidebar Instructions
|
21 |
st.sidebar.markdown("""
|
22 |
### How to Use the Text-to-Speech App:
|
|
|
23 |
1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
|
24 |
|
25 |
2. **Select Language**:
|
|
|
39 |
|
40 |
4. **Adjust Speed**:
|
41 |
- Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
|
|
|
42 |
5. **Generate Speech**:
|
43 |
- After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
|
44 |
|
45 |
6. **Download**:
|
46 |
- Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
|
|
|
47 |
Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
|
48 |
""")
|
49 |
|
|
|
85 |
buffer.seek(0)
|
86 |
return buffer
|
87 |
|
88 |
+
# Transcribe the generated audio using speech recognition
|
89 |
+
def transcribe_audio(audio_buffer):
|
90 |
+
recognizer = sr.Recognizer()
|
91 |
+
with sr.AudioFile(audio_buffer) as source:
|
92 |
+
audio = recognizer.record(source)
|
93 |
+
try:
|
94 |
+
# Transcribe using Google Web Speech API (requires internet)
|
95 |
+
text = recognizer.recognize_google(audio)
|
96 |
+
return text
|
97 |
+
except sr.UnknownValueError:
|
98 |
+
return "Sorry, could not understand the audio"
|
99 |
+
except sr.RequestError as e:
|
100 |
+
return f"Request error from Google Speech Recognition service; {e}"
|
101 |
+
|
102 |
+
# Romanize (convert to Romanji) if the language is Japanese
|
103 |
+
def romanize_text(text, lang_code):
|
104 |
+
if lang_code == 'j': # Japanese language code
|
105 |
+
return romkan.to_romaji(text)
|
106 |
+
return text # No need for Romanization for other languages
|
107 |
+
|
108 |
# Generate and display the audio file
|
109 |
if st.button('Generate Audio'):
|
110 |
st.write("Generating speech...")
|
|
|
113 |
# Display Audio player in the app
|
114 |
st.audio(audio_buffer, format='audio/wav')
|
115 |
|
116 |
+
# Transcribe the generated speech to text
|
117 |
+
transcription = transcribe_audio(audio_buffer)
|
118 |
+
|
119 |
+
# Romanize the transcription if it's Japanese
|
120 |
+
romanized_text = romanize_text(transcription, lang_code)
|
121 |
+
|
122 |
+
# Display the transcribed and Romanized text
|
123 |
+
st.write("Transcribed Text: ", transcription)
|
124 |
+
st.write("Romanized Pronunciation: ", romanized_text)
|
125 |
+
|
126 |
# Optional: Save the generated audio file for download
|
127 |
st.download_button(
|
128 |
label="Download Audio",
|