Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import io
|
2 |
import base64
|
3 |
-
import soundfile as sf
|
4 |
from gtts import gTTS
|
5 |
import streamlit as st
|
6 |
import speech_recognition as sr
|
@@ -15,13 +14,11 @@ if "pre_prompt_sent" not in st.session_state:
|
|
15 |
|
16 |
pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
|
17 |
|
18 |
-
def recognize_speech(audio_data,
|
19 |
recognizer = sr.Recognizer()
|
20 |
|
21 |
try:
|
22 |
-
|
23 |
-
adjusted_audio_data = sf.resample(audio_data, sample_rate, 16000, subtype='PCM_16')
|
24 |
-
audio_text = recognizer.recognize_google(adjusted_audio_data, language="es-ES")
|
25 |
if show_messages:
|
26 |
st.subheader("Texto Reconocido:")
|
27 |
st.write(audio_text)
|
@@ -98,14 +95,13 @@ def main():
|
|
98 |
if not st.session_state.pre_prompt_sent:
|
99 |
st.session_state.pre_prompt_sent = True
|
100 |
|
101 |
-
audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder')
|
102 |
|
103 |
if audio:
|
104 |
st.audio(audio['bytes'], format="audio/wav")
|
105 |
audio_bytes = audio["bytes"]
|
106 |
-
sample_rate = audio["sample_rate"]
|
107 |
|
108 |
-
audio_text = recognize_speech(audio_bytes
|
109 |
|
110 |
if audio_text:
|
111 |
output, audio_file = generate(audio_text, history=st.session_state.history)
|
|
|
1 |
import io
|
2 |
import base64
|
|
|
3 |
from gtts import gTTS
|
4 |
import streamlit as st
|
5 |
import speech_recognition as sr
|
|
|
14 |
|
15 |
pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
|
16 |
|
17 |
+
def recognize_speech(audio_data, show_messages=True):
|
18 |
recognizer = sr.Recognizer()
|
19 |
|
20 |
try:
|
21 |
+
audio_text = recognizer.recognize_google(audio_data, language="es-ES")
|
|
|
|
|
22 |
if show_messages:
|
23 |
st.subheader("Texto Reconocido:")
|
24 |
st.write(audio_text)
|
|
|
95 |
if not st.session_state.pre_prompt_sent:
|
96 |
st.session_state.pre_prompt_sent = True
|
97 |
|
98 |
+
audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder', sample_rate=16000)
|
99 |
|
100 |
if audio:
|
101 |
st.audio(audio['bytes'], format="audio/wav")
|
102 |
audio_bytes = audio["bytes"]
|
|
|
103 |
|
104 |
+
audio_text = recognize_speech(audio_bytes)
|
105 |
|
106 |
if audio_text:
|
107 |
output, audio_file = generate(audio_text, history=st.session_state.history)
|