Spaces:
Running
Running
| import tempfile | |
| import webrtcvad | |
| import speech_recognition as sr | |
| import numpy as np | |
| import streamlit as st | |
| import sounddevice as sd | |
| from traceback import format_exc | |
| def update_vad_status(status): | |
| vad_status.text(status) | |
| def process_microphone_input(): | |
| # Configuramos la tasa de muestreo y el tamaño del frame | |
| sample_rate = 16000 | |
| frame_size = 30 | |
| chunk_size = 1024 # Adjust as needed for responsiveness | |
| # Creamos un objeto VAD y un reconocedor de voz | |
| vad = webrtcvad.Vad() | |
| recognizer = sr.Recognizer() | |
| # Indicadores de estado | |
| vad_active = False | |
| speech_detected = False | |
| phrase = "" | |
| try: | |
| # Configuramos la captura de audio desde el micrófono | |
| with sd.InputStream(callback=callback, channels=1, dtype=np.int16): | |
| st.warning("Habla y observa los cambios en tiempo real...") | |
| # Mantenemos la aplicación en ejecución | |
| st.experimental_rerun() | |
| except Exception as e: | |
| st.error(f"Error during microphone input: {e}") | |
| st.error("Traceback:") | |
| st.error(format_exc()) | |
| def callback(indata, frames, time, status): | |
| if status: | |
| print(f"Error in callback: {status}") | |
| return | |
| # Procesamos el audio en chunks | |
| for i in range(0, len(indata), chunk_size): | |
| chunk = indata[i:i + chunk_size] | |
| # Procesamos cada chunk en frames | |
| for j in range(0, len(chunk), frame_size): | |
| # Obtenemos el frame actual | |
| frame = chunk[j:j + frame_size] | |
| # Detectamos si hay voz en el frame | |
| is_speech = vad.is_speech(frame, sample_rate) | |
| # Actualizamos los indicadores de estado | |
| if is_speech and not vad_active: | |
| vad_active = True | |
| speech_detected = True | |
| update_vad_status("️ Detección de voz iniciada") | |
| elif not is_speech and vad_active: | |
| vad_active = False | |
| update_vad_status("⏹️ Detección de voz finalizada") | |
| # Si se ha detectado voz y hay un silencio, transcribimos la frase | |
| if speech_detected and not is_speech: | |
| # Transcribimos la frase | |
| with sr.AudioData(frame.tobytes(), sample_rate) as source: | |
| audio = recognizer.record(source) | |
| try: | |
| text = recognizer.recognize_google(audio) | |
| phrase += f" {text}" | |
| st.text(f"️ {text}") | |
| except sr.RequestError: | |
| st.error("⚠️ Error al transcribir la frase - RequestError") | |
| except sr.UnknownValueError: | |
| st.error("⚠️ No se ha reconocido la frase - UnknownValueError") | |
| # Reiniciamos el indicador de frase | |
| speech_detected = False | |
| # Imprimimos la frase completa | |
| st.success(f"Transcripción completa: {phrase}") | |
| # Streamlit UI | |
| st.title("VAD and Speech Recognition App (Microphone Input)") | |
| vad_status = st.empty() | |
| process_microphone_input() | |