Spaces:

shukdevdatta123
/

Kokoro-TTS

Running

App Files Files Community

shukdevdatta123 commited on Feb 1

Commit

ee1b822

verified ·

1 Parent(s): 385c233

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -3

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ from kokoro import KPipeline
 import soundfile as sf
 import io
 import os
 # Install espeak-ng if not installed
 if not os.system("which espeak-ng"):
@@ -18,7 +20,6 @@ st.sidebar.header("Configuration & Instructions")
 # Sidebar Instructions
 st.sidebar.markdown("""
 ### How to Use the Text-to-Speech App:
 1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
 2. **Select Language**:
@@ -38,13 +39,11 @@ st.sidebar.markdown("""
 4. **Adjust Speed**:
    - Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
 5. **Generate Speech**:
    - After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
 6. **Download**:
    - Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
 Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
 """)
@@ -86,6 +85,26 @@ def generate_audio(text, lang_code, voice, speed):
         buffer.seek(0)
         return buffer
 # Generate and display the audio file
 if st.button('Generate Audio'):
     st.write("Generating speech...")
@@ -94,6 +113,16 @@ if st.button('Generate Audio'):
     # Display Audio player in the app
     st.audio(audio_buffer, format='audio/wav')
     # Optional: Save the generated audio file for download
     st.download_button(
         label="Download Audio",

 import soundfile as sf
 import io
 import os
+import speech_recognition as sr
+import romkan  # For Japanese Romanization
 # Install espeak-ng if not installed
 if not os.system("which espeak-ng"):
 # Sidebar Instructions
 st.sidebar.markdown("""
 ### How to Use the Text-to-Speech App:
 1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
 2. **Select Language**:
 4. **Adjust Speed**:
    - Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
 5. **Generate Speech**:
    - After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
 6. **Download**:
    - Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
 Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
 """)
         buffer.seek(0)
         return buffer
+# Transcribe the generated audio using speech recognition
+def transcribe_audio(audio_buffer):
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_buffer) as source:
+        audio = recognizer.record(source)
+    try:
+        # Transcribe using Google Web Speech API (requires internet)
+        text = recognizer.recognize_google(audio)
+        return text
+    except sr.UnknownValueError:
+        return "Sorry, could not understand the audio"
+    except sr.RequestError as e:
+        return f"Request error from Google Speech Recognition service; {e}"
+# Romanize (convert to Romanji) if the language is Japanese
+def romanize_text(text, lang_code):
+    if lang_code == 'j':  # Japanese language code
+        return romkan.to_romaji(text)
+    return text  # No need for Romanization for other languages
 # Generate and display the audio file
 if st.button('Generate Audio'):
     st.write("Generating speech...")
     # Display Audio player in the app
     st.audio(audio_buffer, format='audio/wav')
+    # Transcribe the generated speech to text
+    transcription = transcribe_audio(audio_buffer)
+    # Romanize the transcription if it's Japanese
+    romanized_text = romanize_text(transcription, lang_code)
+    # Display the transcribed and Romanized text
+    st.write("Transcribed Text: ", transcription)
+    st.write("Romanized Pronunciation: ", romanized_text)
     # Optional: Save the generated audio file for download
     st.download_button(
         label="Download Audio",