Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,17 +1,19 @@
|
|
1 |
import tempfile
|
2 |
import webrtcvad
|
3 |
import speech_recognition as sr
|
4 |
-
import
|
5 |
import streamlit as st
|
|
|
6 |
from traceback import format_exc
|
7 |
|
8 |
def update_vad_status(status):
|
9 |
vad_status.text(status)
|
10 |
|
11 |
-
def
|
12 |
# Configuramos la tasa de muestreo y el tamaño del frame
|
13 |
sample_rate = 16000
|
14 |
frame_size = 30
|
|
|
15 |
|
16 |
# Creamos un objeto VAD y un reconocedor de voz
|
17 |
vad = webrtcvad.Vad()
|
@@ -23,67 +25,66 @@ def process_audio_file(audio_file_path):
|
|
23 |
phrase = ""
|
24 |
|
25 |
try:
|
26 |
-
#
|
27 |
-
with
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
33 |
st.error("Traceback:")
|
34 |
st.error(format_exc())
|
|
|
|
|
|
|
|
|
35 |
return
|
36 |
|
37 |
-
#
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
except sr.UnknownValueError:
|
75 |
-
st.error("⚠️ No se ha reconocido la frase - UnknownValueError")
|
76 |
-
|
77 |
-
# Reiniciamos el indicador de frase
|
78 |
-
speech_detected = False
|
79 |
|
80 |
# Imprimimos la frase completa
|
81 |
st.success(f"Transcripción completa: {phrase}")
|
82 |
|
83 |
# Streamlit UI
|
84 |
-
st.title("VAD and Speech Recognition App")
|
85 |
-
|
86 |
-
uploaded_file = st.file_uploader("Choose an audio file", type=["wav"])
|
87 |
|
88 |
-
|
89 |
-
|
|
|
1 |
import tempfile
|
2 |
import webrtcvad
|
3 |
import speech_recognition as sr
|
4 |
+
import numpy as np
|
5 |
import streamlit as st
|
6 |
+
import sounddevice as sd
|
7 |
from traceback import format_exc
|
8 |
|
9 |
def update_vad_status(status):
|
10 |
vad_status.text(status)
|
11 |
|
12 |
+
def process_microphone_input():
|
13 |
# Configuramos la tasa de muestreo y el tamaño del frame
|
14 |
sample_rate = 16000
|
15 |
frame_size = 30
|
16 |
+
chunk_size = 1024 # Adjust as needed for responsiveness
|
17 |
|
18 |
# Creamos un objeto VAD y un reconocedor de voz
|
19 |
vad = webrtcvad.Vad()
|
|
|
25 |
phrase = ""
|
26 |
|
27 |
try:
|
28 |
+
# Configuramos la captura de audio desde el micrófono
|
29 |
+
with sd.InputStream(callback=callback, channels=1, dtype=np.int16):
|
30 |
+
st.warning("Habla y observa los cambios en tiempo real...")
|
31 |
+
|
32 |
+
# Mantenemos la aplicación en ejecución
|
33 |
+
st.experimental_rerun()
|
34 |
+
|
35 |
+
except Exception as e:
|
36 |
+
st.error(f"Error during microphone input: {e}")
|
37 |
st.error("Traceback:")
|
38 |
st.error(format_exc())
|
39 |
+
|
40 |
+
def callback(indata, frames, time, status):
|
41 |
+
if status:
|
42 |
+
print(f"Error in callback: {status}")
|
43 |
return
|
44 |
|
45 |
+
# Procesamos el audio en chunks
|
46 |
+
for i in range(0, len(indata), chunk_size):
|
47 |
+
chunk = indata[i:i + chunk_size]
|
48 |
+
|
49 |
+
# Procesamos cada chunk en frames
|
50 |
+
for j in range(0, len(chunk), frame_size):
|
51 |
+
# Obtenemos el frame actual
|
52 |
+
frame = chunk[j:j + frame_size]
|
53 |
+
|
54 |
+
# Detectamos si hay voz en el frame
|
55 |
+
is_speech = vad.is_speech(frame, sample_rate)
|
56 |
+
|
57 |
+
# Actualizamos los indicadores de estado
|
58 |
+
if is_speech and not vad_active:
|
59 |
+
vad_active = True
|
60 |
+
speech_detected = True
|
61 |
+
update_vad_status("️ Detección de voz iniciada")
|
62 |
+
elif not is_speech and vad_active:
|
63 |
+
vad_active = False
|
64 |
+
update_vad_status("⏹️ Detección de voz finalizada")
|
65 |
+
|
66 |
+
# Si se ha detectado voz y hay un silencio, transcribimos la frase
|
67 |
+
if speech_detected and not is_speech:
|
68 |
+
# Transcribimos la frase
|
69 |
+
with sr.AudioData(frame.tobytes(), sample_rate) as source:
|
70 |
+
audio = recognizer.record(source)
|
71 |
+
try:
|
72 |
+
text = recognizer.recognize_google(audio)
|
73 |
+
phrase += f" {text}"
|
74 |
+
st.text(f"️ {text}")
|
75 |
+
except sr.RequestError:
|
76 |
+
st.error("⚠️ Error al transcribir la frase - RequestError")
|
77 |
+
except sr.UnknownValueError:
|
78 |
+
st.error("⚠️ No se ha reconocido la frase - UnknownValueError")
|
79 |
+
|
80 |
+
# Reiniciamos el indicador de frase
|
81 |
+
speech_detected = False
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
# Imprimimos la frase completa
|
84 |
st.success(f"Transcripción completa: {phrase}")
|
85 |
|
86 |
# Streamlit UI
|
87 |
+
st.title("VAD and Speech Recognition App (Microphone Input)")
|
|
|
|
|
88 |
|
89 |
+
vad_status = st.empty()
|
90 |
+
process_microphone_input()
|