salomonsky commited on
Commit
d43da8d
·
verified ·
1 Parent(s): 9188a0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -40
app.py CHANGED
@@ -1,14 +1,12 @@
 
1
  import base64
2
  import io
3
- import numpy as np
4
- import pydub
5
- import speech_recognition as sr
6
- import streamlit as st
7
  from huggingface_hub import InferenceClient
8
  from gtts import gTTS
9
- from streamlit_mic_recorder import mic_recorder
 
10
 
11
- pre_prompt_text = ""
12
 
13
  if "history" not in st.session_state:
14
  st.session_state.history = []
@@ -26,14 +24,14 @@ def recognize_speech(audio_data, show_messages=True):
26
  try:
27
  audio_text = recognizer.recognize_google(audio, language="es-ES")
28
  if show_messages:
29
- st.subheader("Texto reconocido:")
30
  st.write(audio_text)
31
- st.success("Voz reconocida.")
32
  except sr.UnknownValueError:
33
- st.warning("El audio no pudo ser reconocido. ¿Intentaste grabar algo?")
34
  audio_text = ""
35
  except sr.RequestError:
36
- st.error("Error en la solicitud al servicio de reconocimiento de voz.")
37
  audio_text = ""
38
 
39
  return audio_text
@@ -85,39 +83,21 @@ def text_to_speech(text):
85
  audio_fp.seek(0)
86
  return audio_fp
87
 
88
- def detect_voice_activity(audio_data):
89
- audio = pydub.AudioSegment.from_file(io.BytesIO(audio_data))
90
- audio = audio.set_channels(1)
91
- signal_energy = np.sum(np.abs(audio.get_array_of_samples()))
92
- threshold_energy = 5000
93
- is_voice_active = signal_energy > threshold_energy
94
 
95
- return is_voice_active
 
 
 
96
 
97
- def main():
98
- st.write("Di la palabra XAMAN para empezar o DETENTE para procesar")
99
 
100
- recording = mic_recorder(recording_container=st.empty(), auto_recording=True)
101
- audio_data = None
102
-
103
- while audio_data is None:
104
- if recording:
105
- st.write("Escuchando...")
106
- audio_data = base64.b64decode(recording.split(",")[1])
107
- audio_file = io.BytesIO(audio_data)
108
- st.audio(audio_file, format="audio/wav")
109
- is_voice_active = detect_voice_activity(audio_data)
110
-
111
- if is_voice_active:
112
- audio_text = recognize_speech(audio_file)
113
- if audio_text:
114
- output, audio_file = generate(audio_text, history=st.session_state.history)
115
- if audio_file is not None:
116
- st.markdown(
117
- f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
118
- unsafe_allow_html=True)
119
- else:
120
- st.write("Esperando entrada de voz...")
121
 
122
  if __name__ == "__main__":
123
  main()
 
1
+ import streamlit as st
2
  import base64
3
  import io
 
 
 
 
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
+ from audiorecorder import audiorecorder
7
+ import speech_recognition as sr
8
 
9
+ pre_prompt_text = "You are a behavioral AI, your answers should be brief, stoic and humanistic."
10
 
11
  if "history" not in st.session_state:
12
  st.session_state.history = []
 
24
  try:
25
  audio_text = recognizer.recognize_google(audio, language="es-ES")
26
  if show_messages:
27
+ st.subheader("Recognized text:")
28
  st.write(audio_text)
29
+ st.success("Voice Recognized.")
30
  except sr.UnknownValueError:
31
+ st.warning("The audio could not be recognized. Did you try to record something?")
32
  audio_text = ""
33
  except sr.RequestError:
34
+ st.error("Push/Talk to start!")
35
  audio_text = ""
36
 
37
  return audio_text
 
83
  audio_fp.seek(0)
84
  return audio_fp
85
 
86
+ def main():
87
+ audio_data = audiorecorder("Push to Talk", "Stop Recording...")
 
 
 
 
88
 
89
+ if not audio_data.empty():
90
+ st.audio(audio_data.export().read(), format="audio/wav")
91
+ audio_data.export("audio.wav", format="wav")
92
+ audio_text = recognize_speech("audio.wav")
93
 
94
+ if audio_text:
95
+ output, audio_file = generate(audio_text, history=st.session_state.history)
96
 
97
+ if audio_file is not None:
98
+ st.markdown(
99
+ f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
100
+ unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
  if __name__ == "__main__":
103
  main()