salomonsky commited on
Commit
295dea8
verified
1 Parent(s): 724cd71

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -30
app.py CHANGED
@@ -6,18 +6,16 @@ from huggingface_hub import InferenceClient
6
  from gtts import gTTS
7
  import speech_recognition as sr
8
  from pydub import AudioSegment
 
9
 
10
- if "history" not in st.session_state:
11
- st.session_state.history = []
12
-
13
- def list_available_microphones():
14
- p = pyaudio.PyAudio()
15
- info = p.get_host_api_info_by_index(0)
16
- numdevices = info.get('deviceCount')
17
- available_microphones = [i for i in range(numdevices) if p.get_device_info_by_index(i).get('maxInputChannels') > 0]
18
- p.terminate()
19
-
20
- return available_microphones
21
 
22
  def recognize_speech(device_index, show_messages=True):
23
  recognizer = sr.Recognizer()
@@ -93,12 +91,12 @@ def text_to_speech(text, speed=1.3):
93
 
94
  def main():
95
  st.title("Chatbot de Voz a Voz")
96
- available_microphones = list_available_microphones()
97
 
98
- if available_microphones:
99
- st.info("Micr贸fonos disponibles: {}".format(available_microphones))
100
- selected_microphone = st.selectbox("Selecciona un micr贸fono", available_microphones)
101
- st.info("Micr贸fono seleccionado: {}".format(selected_microphone))
102
  recognizer = sr.Recognizer()
103
  audio_recording = sr.Microphone(device_index=selected_microphone, sample_rate=16000, chunk_size=1024)
104
 
@@ -111,22 +109,21 @@ def main():
111
 
112
  if audio_data:
113
  st.audio(audio_data.frame_data, format="audio/wav")
114
- audio_text = recognize_speech(device_index=selected_microphone)
115
- if audio_text:
116
- output, audio_file = generate(audio_text, history=st.session_state.history)
117
-
118
  if audio_text:
119
- st.session_state.history.append((audio_text, output))
120
-
121
- if audio_file is not None:
122
- st.markdown(
123
- f"""
124
- <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
125
- """,
126
- unsafe_allow_html=True
127
- )
128
  else:
129
  st.warning("No se detectaron micr贸fonos disponibles. Aseg煤rate de que tengas un micr贸fono conectado.")
130
 
131
  if __name__ == "__main__":
132
- main()
 
6
  from gtts import gTTS
7
  import speech_recognition as sr
8
  from pydub import AudioSegment
9
+ import webrtcvad
10
 
11
+ def perform_vad(audio_data):
12
+ vad = webrtcvad.Vad()
13
+ vad.set_mode(1)
14
+
15
+ samples = audio_data.frame_data
16
+ is_speech = vad.is_speech(samples, sample_rate=audio_data.sample_rate)
17
+
18
+ return is_speech
 
 
 
19
 
20
  def recognize_speech(device_index, show_messages=True):
21
  recognizer = sr.Recognizer()
 
91
 
92
  def main():
93
  st.title("Chatbot de Voz a Voz")
94
+ available_microphones = pyaudio.PyAudio().get_device_count()
95
 
96
+ if available_microphones > 0:
97
+ st.info(f"N煤mero de micr贸fonos disponibles: {available_microphones}")
98
+ selected_microphone = st.selectbox("Selecciona un micr贸fono", list(range(available_microphones)))
99
+ st.info(f"N煤mero de micr贸fono seleccionado: {selected_microphone}")
100
  recognizer = sr.Recognizer()
101
  audio_recording = sr.Microphone(device_index=selected_microphone, sample_rate=16000, chunk_size=1024)
102
 
 
109
 
110
  if audio_data:
111
  st.audio(audio_data.frame_data, format="audio/wav")
112
+
113
+ if perform_vad(audio_data):
114
+ audio_text = recognize_speech(device_index=selected_microphone)
 
115
  if audio_text:
116
+ output, audio_file = generate(audio_text, history=st.session_state.history)
117
+
118
+ if audio_file is not None:
119
+ st.markdown(
120
+ f"""
121
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
122
+ """,
123
+ unsafe_allow_html=True
124
+ )
125
  else:
126
  st.warning("No se detectaron micr贸fonos disponibles. Aseg煤rate de que tengas un micr贸fono conectado.")
127
 
128
  if __name__ == "__main__":
129
+ main()