xaman4

Sleeping

File size: 3,098 Bytes

import tempfile
import webrtcvad
import speech_recognition as sr
import numpy as np
import streamlit as st
import sounddevice as sd
from traceback import format_exc

def update_vad_status(status):
    vad_status.text(status)

def process_microphone_input():
    # Configuramos la tasa de muestreo y el tamaño del frame
    sample_rate = 16000
    frame_size = 30
    chunk_size = 1024  # Adjust as needed for responsiveness

    # Creamos un objeto VAD y un reconocedor de voz
    vad = webrtcvad.Vad()
    recognizer = sr.Recognizer()

    # Indicadores de estado
    vad_active = False
    speech_detected = False
    phrase = ""

    try:
        # Configuramos la captura de audio desde el micrófono
        with sd.InputStream(callback=callback, channels=1, dtype=np.int16):
            st.warning("Habla y observa los cambios en tiempo real...")

            # Mantenemos la aplicación en ejecución
            st.experimental_rerun()
            
    except Exception as e:
        st.error(f"Error during microphone input: {e}")
        st.error("Traceback:")
        st.error(format_exc())

def callback(indata, frames, time, status):
    if status:
        print(f"Error in callback: {status}")
        return

    # Procesamos el audio en chunks
    for i in range(0, len(indata), chunk_size):
        chunk = indata[i:i + chunk_size]

        # Procesamos cada chunk en frames
        for j in range(0, len(chunk), frame_size):
            # Obtenemos el frame actual
            frame = chunk[j:j + frame_size]

            # Detectamos si hay voz en el frame
            is_speech = vad.is_speech(frame, sample_rate)

            # Actualizamos los indicadores de estado
            if is_speech and not vad_active:
                vad_active = True
                speech_detected = True
                update_vad_status("️ Detección de voz iniciada")
            elif not is_speech and vad_active:
                vad_active = False
                update_vad_status("⏹️ Detección de voz finalizada")

            # Si se ha detectado voz y hay un silencio, transcribimos la frase
            if speech_detected and not is_speech:
                # Transcribimos la frase
                with sr.AudioData(frame.tobytes(), sample_rate) as source:
                    audio = recognizer.record(source)
                    try:
                        text = recognizer.recognize_google(audio)
                        phrase += f" {text}"
                        st.text(f"️ {text}")
                    except sr.RequestError:
                        st.error("⚠️ Error al transcribir la frase - RequestError")
                    except sr.UnknownValueError:
                        st.error("⚠️ No se ha reconocido la frase - UnknownValueError")

                # Reiniciamos el indicador de frase
                speech_detected = False

    # Imprimimos la frase completa
    st.success(f"Transcripción completa: {phrase}")

# Streamlit UI
st.title("VAD and Speech Recognition App (Microphone Input)")

vad_status = st.empty()
process_microphone_input()