xaman4

Running

App Files Files Community

salomonsky commited on Jan 20, 2024

Commit

686ef78

verified ·

1 Parent(s): f2c3ba6

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -20

app.py CHANGED Viewed

@@ -15,27 +15,26 @@ microphone = sr.Microphone()
 # reconociendo voz
 def recognize_speech_with_vad(audio_data, show_messages=True):
-    with microphone as source:
-        try:
-            st.info("Escuchando...")
-            audio_data.record(source, vad_enabled=True)
             st.success("Fin de la grabación. Procesando audio...")
-            audio_text = recognizer.recognize_google(audio_data, language="es-ES")
-            if show_messages:
-                st.subheader("Texto Reconocido:")
-                st.write(audio_text)
-        except sr.UnknownValueError:
-            st.warning("No se pudo reconocer el audio. ¿Intentaste grabar algo?")
-            audio_text = ""
-        except sr.RequestError:
-            st.error("Hablame para comenzar!")
-            audio_text = ""
-    return audio_text
 # preparando entrada para el modelo de lenguaje
 def format_prompt(message, history):
     prompt = "<s>"
@@ -98,17 +97,16 @@ def audio_player_markup(audio_file):
 # interfaz de usuario
 def main():
     st.title("Chatbot de Voz a Voz")
-    st.info("Habla para grabar...")
-    audio_data = audiorecorder("Escuchando...", "Deteniendo la grabación...")
     if not audio_data.empty():
         st.audio(audio_data.export().read(), format="audio/wav")
         audio_data.export("audio.wav", format="wav")
-        audio_text = recognize_speech("audio.wav")
         if audio_text:
             st.success("Frase detectada. Procesando audio...")
-            output, audio_file = generate(audio_text, history=st.session_state.history)
             if audio_file is not None:
                 st.markdown(audio_player_markup(audio_file), unsafe_allow_html=True)

 # reconociendo voz
 def recognize_speech_with_vad(audio_data, show_messages=True):
+    try:
+        with sr.AudioFile(audio_data) as source:
+            audio_data = recognizer.record(source, vad_enabled=True)
             st.success("Fin de la grabación. Procesando audio...")
+        audio_text = recognizer.recognize_google(audio_data, language="es-ES")
+        if show_messages:
+            st.subheader("Texto Reconocido:")
+            st.write(audio_text)
+    except sr.UnknownValueError:
+        st.warning("No se pudo reconocer el audio. ¿Intentaste grabar algo?")
+        audio_text = ""
+    except sr.RequestError:
+        st.error("Hablame para comenzar!")
+        audio_text = ""
+    return audio_text
 # preparando entrada para el modelo de lenguaje
 def format_prompt(message, history):
     prompt = "<s>"
 # interfaz de usuario
 def main():
     st.title("Chatbot de Voz a Voz")
+    audio_data = audiorecorder("Deteniendo la grabación...", vad_enabled=True)
     if not audio_data.empty():
         st.audio(audio_data.export().read(), format="audio/wav")
         audio_data.export("audio.wav", format="wav")
+        audio_text = recognize_speech_with_vad("audio.wav")
         if audio_text:
             st.success("Frase detectada. Procesando audio...")
+            output, audio_file = generate(audio_text, history=st.session_state.history)
             if audio_file is not None:
                 st.markdown(audio_player_markup(audio_file), unsafe_allow_html=True)