salomonsky commited on
Commit
a75c652
verified
1 Parent(s): 295dea8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -50
app.py CHANGED
@@ -1,29 +1,21 @@
1
- import pyaudio
2
  import streamlit as st
3
  import base64
4
  import io
5
  from huggingface_hub import InferenceClient
6
  from gtts import gTTS
 
7
  import speech_recognition as sr
8
  from pydub import AudioSegment
9
- import webrtcvad
10
 
11
- def perform_vad(audio_data):
12
- vad = webrtcvad.Vad()
13
- vad.set_mode(1)
14
-
15
- samples = audio_data.frame_data
16
- is_speech = vad.is_speech(samples, sample_rate=audio_data.sample_rate)
17
-
18
- return is_speech
19
 
20
- def recognize_speech(device_index, show_messages=True):
21
  recognizer = sr.Recognizer()
22
- audio_recording = sr.Microphone(device_index=device_index, sample_rate=16000, chunk_size=1024)
23
 
24
  with audio_recording as source:
25
- recognizer.adjust_for_ambient_noise(source, gain=None)
26
- audio = recognizer.listen(source, timeout=5, gain=None)
27
 
28
  try:
29
  audio_text = recognizer.recognize_google(audio, language="es-ES")
@@ -44,9 +36,10 @@ def format_prompt(message, history):
44
  prompt = "<s>"
45
 
46
  for user_prompt, bot_response in history:
47
- prompt += f"[/s] {user_prompt} [/s] {bot_response}</s> "
 
48
 
49
- prompt += f"[/s] {message} [/s]"
50
  return prompt
51
 
52
  def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
@@ -91,39 +84,26 @@ def text_to_speech(text, speed=1.3):
91
 
92
  def main():
93
  st.title("Chatbot de Voz a Voz")
94
- available_microphones = pyaudio.PyAudio().get_device_count()
95
-
96
- if available_microphones > 0:
97
- st.info(f"N煤mero de micr贸fonos disponibles: {available_microphones}")
98
- selected_microphone = st.selectbox("Selecciona un micr贸fono", list(range(available_microphones)))
99
- st.info(f"N煤mero de micr贸fono seleccionado: {selected_microphone}")
100
- recognizer = sr.Recognizer()
101
- audio_recording = sr.Microphone(device_index=selected_microphone, sample_rate=16000, chunk_size=1024)
102
-
103
- st.info("Habla para grabar")
104
- with audio_recording as source:
105
- recognizer.adjust_for_ambient_noise(source, duration=1)
106
- audio_data = recognizer.listen(source, timeout=5)
107
-
108
- st.success("Deteniendo la grabaci贸n...")
109
-
110
- if audio_data:
111
- st.audio(audio_data.frame_data, format="audio/wav")
112
-
113
- if perform_vad(audio_data):
114
- audio_text = recognize_speech(device_index=selected_microphone)
115
- if audio_text:
116
- output, audio_file = generate(audio_text, history=st.session_state.history)
117
-
118
- if audio_file is not None:
119
- st.markdown(
120
- f"""
121
- <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
122
- """,
123
- unsafe_allow_html=True
124
- )
125
- else:
126
- st.warning("No se detectaron micr贸fonos disponibles. Aseg煤rate de que tengas un micr贸fono conectado.")
127
 
128
  if __name__ == "__main__":
129
- main()
 
 
1
  import streamlit as st
2
  import base64
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
+ from audiorecorder import audiorecorder
7
  import speech_recognition as sr
8
  from pydub import AudioSegment
 
9
 
10
+ if "history" not in st.session_state:
11
+ st.session_state.history = []
 
 
 
 
 
 
12
 
13
+ def recognize_speech(audio_data, show_messages=True):
14
  recognizer = sr.Recognizer()
15
+ audio_recording = sr.AudioFile(audio_data)
16
 
17
  with audio_recording as source:
18
+ audio = recognizer.record(source)
 
19
 
20
  try:
21
  audio_text = recognizer.recognize_google(audio, language="es-ES")
 
36
  prompt = "<s>"
37
 
38
  for user_prompt, bot_response in history:
39
+ prompt += f"[INST] {user_prompt} [/INST]"
40
+ prompt += f" {bot_response}</s> "
41
 
42
+ prompt += f"[INST] {message} [/INST]"
43
  return prompt
44
 
45
  def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
 
84
 
85
  def main():
86
  st.title("Chatbot de Voz a Voz")
87
+ audio_data = sr.Microphone(device_index=-1)
88
+
89
+ if not audio_data.empty():
90
+ st.audio(audio_data.export().read(), format="audio/wav")
91
+ audio_data.export("audio.wav", format="wav")
92
+ audio_text = recognize_speech("audio.wav")
93
+
94
+ if audio_text:
95
+ output, audio_file = generate(audio_text, history=st.session_state.history)
96
+
97
+ if audio_text:
98
+ st.session_state.history.append((audio_text, output))
99
+
100
+ if audio_file is not None:
101
+ st.markdown(
102
+ f"""
103
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
104
+ """,
105
+ unsafe_allow_html=True
106
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  if __name__ == "__main__":
109
+ main()