xaman4

Running

App Files Files Community

salomonsky commited on Apr 7, 2024

Commit

d43da8d

verified ·

1 Parent(s): 9188a0a

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -40

app.py CHANGED Viewed

@@ -1,14 +1,12 @@
 import base64
 import io
-import numpy as np
-import pydub
-import speech_recognition as sr
-import streamlit as st
 from huggingface_hub import InferenceClient
 from gtts import gTTS
-from streamlit_mic_recorder import mic_recorder
-pre_prompt_text = ""
 if "history" not in st.session_state:
     st.session_state.history = []
@@ -26,14 +24,14 @@ def recognize_speech(audio_data, show_messages=True):
     try:
         audio_text = recognizer.recognize_google(audio, language="es-ES")
         if show_messages:
-            st.subheader("Texto reconocido:")
             st.write(audio_text)
-            st.success("Voz reconocida.")
     except sr.UnknownValueError:
-        st.warning("El audio no pudo ser reconocido. ¿Intentaste grabar algo?")
         audio_text = ""
     except sr.RequestError:
-        st.error("Error en la solicitud al servicio de reconocimiento de voz.")
         audio_text = ""
     return audio_text
@@ -85,39 +83,21 @@ def text_to_speech(text):
     audio_fp.seek(0)
     return audio_fp
-def detect_voice_activity(audio_data):
-    audio = pydub.AudioSegment.from_file(io.BytesIO(audio_data))
-    audio = audio.set_channels(1)
-    signal_energy = np.sum(np.abs(audio.get_array_of_samples()))
-    threshold_energy = 5000
-    is_voice_active = signal_energy > threshold_energy
-    return is_voice_active
-def main():
-    st.write("Di la palabra XAMAN para empezar o DETENTE para procesar")
-    recording = mic_recorder(recording_container=st.empty(), auto_recording=True)
-    audio_data = None
-    while audio_data is None:
-        if recording:
-            st.write("Escuchando...")
-            audio_data = base64.b64decode(recording.split(",")[1])
-            audio_file = io.BytesIO(audio_data)
-            st.audio(audio_file, format="audio/wav")
-            is_voice_active = detect_voice_activity(audio_data)
-            if is_voice_active:
-                audio_text = recognize_speech(audio_file)
-                if audio_text:
-                    output, audio_file = generate(audio_text, history=st.session_state.history)
-                    if audio_file is not None:
-                        st.markdown(
-                            f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
-                            unsafe_allow_html=True)
-        else:
-            st.write("Esperando entrada de voz...")
 if __name__ == "__main__":
     main()

+import streamlit as st
 import base64
 import io
 from huggingface_hub import InferenceClient
 from gtts import gTTS
+from audiorecorder import audiorecorder
+import speech_recognition as sr
+pre_prompt_text = "You are a behavioral AI, your answers should be brief, stoic and humanistic."
 if "history" not in st.session_state:
     st.session_state.history = []
     try:
         audio_text = recognizer.recognize_google(audio, language="es-ES")
         if show_messages:
+            st.subheader("Recognized text:")
             st.write(audio_text)
+            st.success("Voice Recognized.")
     except sr.UnknownValueError:
+        st.warning("The audio could not be recognized. Did you try to record something?")
         audio_text = ""
     except sr.RequestError:
+        st.error("Push/Talk to start!")
         audio_text = ""
     return audio_text
     audio_fp.seek(0)
     return audio_fp
+def main():
+    audio_data = audiorecorder("Push to Talk", "Stop Recording...")
+    if not audio_data.empty():
+        st.audio(audio_data.export().read(), format="audio/wav")
+        audio_data.export("audio.wav", format="wav")
+        audio_text = recognize_speech("audio.wav")
+        if audio_text:
+            output, audio_file = generate(audio_text, history=st.session_state.history)
+            if audio_file is not None:
+                st.markdown(
+                    f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
+                    unsafe_allow_html=True)
 if __name__ == "__main__":
     main()