shukdevdatta123 commited on
Commit
ee1b822
·
verified ·
1 Parent(s): 385c233

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -3
app.py CHANGED
@@ -3,6 +3,8 @@ from kokoro import KPipeline
3
  import soundfile as sf
4
  import io
5
  import os
 
 
6
 
7
  # Install espeak-ng if not installed
8
  if not os.system("which espeak-ng"):
@@ -18,7 +20,6 @@ st.sidebar.header("Configuration & Instructions")
18
  # Sidebar Instructions
19
  st.sidebar.markdown("""
20
  ### How to Use the Text-to-Speech App:
21
-
22
  1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
23
 
24
  2. **Select Language**:
@@ -38,13 +39,11 @@ st.sidebar.markdown("""
38
 
39
  4. **Adjust Speed**:
40
  - Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
41
-
42
  5. **Generate Speech**:
43
  - After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
44
 
45
  6. **Download**:
46
  - Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
47
-
48
  Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
49
  """)
50
 
@@ -86,6 +85,26 @@ def generate_audio(text, lang_code, voice, speed):
86
  buffer.seek(0)
87
  return buffer
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  # Generate and display the audio file
90
  if st.button('Generate Audio'):
91
  st.write("Generating speech...")
@@ -94,6 +113,16 @@ if st.button('Generate Audio'):
94
  # Display Audio player in the app
95
  st.audio(audio_buffer, format='audio/wav')
96
 
 
 
 
 
 
 
 
 
 
 
97
  # Optional: Save the generated audio file for download
98
  st.download_button(
99
  label="Download Audio",
 
3
  import soundfile as sf
4
  import io
5
  import os
6
+ import speech_recognition as sr
7
+ import romkan # For Japanese Romanization
8
 
9
  # Install espeak-ng if not installed
10
  if not os.system("which espeak-ng"):
 
20
  # Sidebar Instructions
21
  st.sidebar.markdown("""
22
  ### How to Use the Text-to-Speech App:
 
23
  1. **Enter Text**: In the main text area, input any text that you want the model to convert to speech.
24
 
25
  2. **Select Language**:
 
39
 
40
  4. **Adjust Speed**:
41
  - Use the speed slider to change how fast the speech is generated. You can set it between `0.5x` to `2.0x`, where `1.0x` is the normal speed.
 
42
  5. **Generate Speech**:
43
  - After configuring the settings, click on the **"Generate Audio"** button. The app will process your text and produce speech audio accordingly.
44
 
45
  6. **Download**:
46
  - Once the audio is generated, you can play it directly in the app or download it as a `.wav` file by clicking on the **"Download Audio"** button.
 
47
  Enjoy experimenting with the text-to-speech conversion, and feel free to try different voices, speeds, and languages!
48
  """)
49
 
 
85
  buffer.seek(0)
86
  return buffer
87
 
88
+ # Transcribe the generated audio using speech recognition
89
+ def transcribe_audio(audio_buffer):
90
+ recognizer = sr.Recognizer()
91
+ with sr.AudioFile(audio_buffer) as source:
92
+ audio = recognizer.record(source)
93
+ try:
94
+ # Transcribe using Google Web Speech API (requires internet)
95
+ text = recognizer.recognize_google(audio)
96
+ return text
97
+ except sr.UnknownValueError:
98
+ return "Sorry, could not understand the audio"
99
+ except sr.RequestError as e:
100
+ return f"Request error from Google Speech Recognition service; {e}"
101
+
102
+ # Romanize (convert to Romanji) if the language is Japanese
103
+ def romanize_text(text, lang_code):
104
+ if lang_code == 'j': # Japanese language code
105
+ return romkan.to_romaji(text)
106
+ return text # No need for Romanization for other languages
107
+
108
  # Generate and display the audio file
109
  if st.button('Generate Audio'):
110
  st.write("Generating speech...")
 
113
  # Display Audio player in the app
114
  st.audio(audio_buffer, format='audio/wav')
115
 
116
+ # Transcribe the generated speech to text
117
+ transcription = transcribe_audio(audio_buffer)
118
+
119
+ # Romanize the transcription if it's Japanese
120
+ romanized_text = romanize_text(transcription, lang_code)
121
+
122
+ # Display the transcribed and Romanized text
123
+ st.write("Transcribed Text: ", transcription)
124
+ st.write("Romanized Pronunciation: ", romanized_text)
125
+
126
  # Optional: Save the generated audio file for download
127
  st.download_button(
128
  label="Download Audio",