xaman4

Running

App Files Files Community

xaman4 / app.py

salomonsky

Update app.py

444f76a verified over 1 year ago

raw

history blame

3.1 kB

	import tempfile
	import webrtcvad
	import speech_recognition as sr
	import numpy as np
	import streamlit as st
	import sounddevice as sd
	from traceback import format_exc

	def update_vad_status(status):
	vad_status.text(status)

	def process_microphone_input():
	# Configuramos la tasa de muestreo y el tamaño del frame
	sample_rate = 16000
	frame_size = 30
	chunk_size = 1024 # Adjust as needed for responsiveness

	# Creamos un objeto VAD y un reconocedor de voz
	vad = webrtcvad.Vad()
	recognizer = sr.Recognizer()

	# Indicadores de estado
	vad_active = False
	speech_detected = False
	phrase = ""

	try:
	# Configuramos la captura de audio desde el micrófono
	with sd.InputStream(callback=callback, channels=1, dtype=np.int16):
	st.warning("Habla y observa los cambios en tiempo real...")

	# Mantenemos la aplicación en ejecución
	st.experimental_rerun()

	except Exception as e:
	st.error(f"Error during microphone input: {e}")
	st.error("Traceback:")
	st.error(format_exc())

	def callback(indata, frames, time, status):
	if status:
	print(f"Error in callback: {status}")
	return

	# Procesamos el audio en chunks
	for i in range(0, len(indata), chunk_size):
	chunk = indata[i:i + chunk_size]

	# Procesamos cada chunk en frames
	for j in range(0, len(chunk), frame_size):
	# Obtenemos el frame actual
	frame = chunk[j:j + frame_size]

	# Detectamos si hay voz en el frame
	is_speech = vad.is_speech(frame, sample_rate)

	# Actualizamos los indicadores de estado
	if is_speech and not vad_active:
	vad_active = True
	speech_detected = True
	update_vad_status("️ Detección de voz iniciada")
	elif not is_speech and vad_active:
	vad_active = False
	update_vad_status("⏹️ Detección de voz finalizada")

	# Si se ha detectado voz y hay un silencio, transcribimos la frase
	if speech_detected and not is_speech:
	# Transcribimos la frase
	with sr.AudioData(frame.tobytes(), sample_rate) as source:
	audio = recognizer.record(source)
	try:
	text = recognizer.recognize_google(audio)
	phrase += f" {text}"
	st.text(f"️ {text}")
	except sr.RequestError:
	st.error("⚠️ Error al transcribir la frase - RequestError")
	except sr.UnknownValueError:
	st.error("⚠️ No se ha reconocido la frase - UnknownValueError")

	# Reiniciamos el indicador de frase
	speech_detected = False

	# Imprimimos la frase completa
	st.success(f"Transcripción completa: {phrase}")

	# Streamlit UI
	st.title("VAD and Speech Recognition App (Microphone Input)")

	vad_status = st.empty()
	process_microphone_input()