Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -3,9 +3,12 @@ import base64
|
|
3 |
import io
|
4 |
from huggingface_hub import InferenceClient
|
5 |
from gtts import gTTS
|
6 |
-
import sounddevice as sd
|
7 |
-
import speech_recognition as sr
|
8 |
from pydub import AudioSegment
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
if "history" not in st.session_state:
|
11 |
st.session_state.history = []
|
@@ -29,7 +32,35 @@ def recognize_speech_with_vad(audio_data, show_messages=True):
|
|
29 |
audio_text = ""
|
30 |
|
31 |
return audio_text
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
# Preparando entrada para el modelo de lenguaje
|
34 |
def format_prompt(message, history):
|
35 |
prompt = "<s>"
|
@@ -89,28 +120,16 @@ def audio_player_markup(audio_file):
|
|
89 |
<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
|
90 |
"""
|
91 |
|
92 |
-
# Interfaz de usuario
|
93 |
def main():
|
94 |
st.title("Chatbot de Voz a Voz")
|
95 |
-
|
96 |
-
# Configuraci贸n de dispositivos de entrada
|
97 |
-
input_devices = sd.query_devices(kind='input')
|
98 |
-
selected_device = st.selectbox("Selecciona tu micr贸fono:", [device['name'] for device in input_devices])
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
audio_text = recognize_speech_with_vad(audio_data)
|
107 |
-
|
108 |
-
if audio_text:
|
109 |
-
st.success("Frase detectada. Procesando audio...")
|
110 |
-
output, audio_file = generate(audio_text, history=st.session_state.history)
|
111 |
-
|
112 |
-
if audio_file is not None:
|
113 |
-
st.markdown(audio_player_markup(audio_file), unsafe_allow_html=True)
|
114 |
|
115 |
if __name__ == "__main__":
|
116 |
main()
|
|
|
3 |
import io
|
4 |
from huggingface_hub import InferenceClient
|
5 |
from gtts import gTTS
|
|
|
|
|
6 |
from pydub import AudioSegment
|
7 |
+
from pydub.playback import play
|
8 |
+
from streamlit_webrtc import webrtc_streamer, VideoProcessorBase
|
9 |
+
import cv2
|
10 |
+
import numpy as np
|
11 |
+
import speech_recognition as sr
|
12 |
|
13 |
if "history" not in st.session_state:
|
14 |
st.session_state.history = []
|
|
|
32 |
audio_text = ""
|
33 |
|
34 |
return audio_text
|
35 |
+
|
36 |
+
# Procesador de video para VAD con streamlit_webrtc
|
37 |
+
class VADProcessor(AudioProcessorBase):
|
38 |
+
def __init__(self):
|
39 |
+
self.buffer = np.zeros((0,))
|
40 |
+
self.vad_active = True
|
41 |
+
|
42 |
+
def recv(self, audio_data):
|
43 |
+
if self.vad_active:
|
44 |
+
audio_array = np.frombuffer(audio_data, dtype=np.int16)
|
45 |
+
self.buffer = np.concatenate((self.buffer, audio_array), axis=None)
|
46 |
+
|
47 |
+
if len(self.buffer) >= 44100 * 5: # 5 seconds of audio
|
48 |
+
st.audio(self.buffer, format="audio/wav")
|
49 |
+
audio_text = recognize_speech_with_vad(self.buffer)
|
50 |
+
|
51 |
+
if audio_text:
|
52 |
+
st.success("Frase detectada. Procesando audio...")
|
53 |
+
output, audio_file = generate(audio_text, history=st.session_state.history)
|
54 |
+
|
55 |
+
if audio_file is not None:
|
56 |
+
play(audio_file)
|
57 |
+
|
58 |
+
# Desactiva el VAD despu茅s de detectar una frase
|
59 |
+
self.vad_active = False
|
60 |
+
|
61 |
+
self.buffer = np.zeros((0,))
|
62 |
+
|
63 |
+
|
64 |
# Preparando entrada para el modelo de lenguaje
|
65 |
def format_prompt(message, history):
|
66 |
prompt = "<s>"
|
|
|
120 |
<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
|
121 |
"""
|
122 |
|
123 |
+
# Interfaz de usuario con streamlit_webrtc
|
124 |
def main():
|
125 |
st.title("Chatbot de Voz a Voz")
|
|
|
|
|
|
|
|
|
126 |
|
127 |
+
webrtc_ctx = webrtc_streamer(
|
128 |
+
key="vad",
|
129 |
+
audio_processor_factory=VADProcessor,
|
130 |
+
async_processing=True,
|
131 |
+
media_stream_constraints={"video": False, "audio": True},
|
132 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
if __name__ == "__main__":
|
135 |
main()
|