xaman4

Sleeping

App Files Files Community

salomonsky commited on Feb 22, 2024

Commit

444f76a

verified ·

1 Parent(s): 057278f

Update app.py

Browse files

Files changed (1) hide show

app.py +57 -56

app.py CHANGED Viewed

@@ -1,17 +1,19 @@
 import tempfile
 import webrtcvad
 import speech_recognition as sr
-import os
 import streamlit as st
 from traceback import format_exc
 def update_vad_status(status):
     vad_status.text(status)
-def process_audio_file(audio_file_path):
     # Configuramos la tasa de muestreo y el tamaño del frame
     sample_rate = 16000
     frame_size = 30
     # Creamos un objeto VAD y un reconocedor de voz
     vad = webrtcvad.Vad()
@@ -23,67 +25,66 @@ def process_audio_file(audio_file_path):
     phrase = ""
     try:
-        # 1. Load the audio data from the original file:
-        with open(audio_file_path, "rb") as f:
-            audio_data = f.read()
-    except FileNotFoundError as e:
-        st.error(f"Error: File not found - {audio_file_path}")
-        st.error(f"Error Details: {e}")
         st.error("Traceback:")
         st.error(format_exc())
         return
-    # 2. Use a temporary file to process the audio data:
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_file:
-        temp_file.write(audio_data)
-        temp_file.flush()
-        # Procesamos el archivo temporal
-        with open(temp_file.name, "rb") as f:
-            audio_data = f.read()
-    # Procesamos el audio en frames
-    for i in range(0, len(audio_data), frame_size):
-        # Obtenemos el frame actual
-        frame = audio_data[i:i + frame_size]
-        # Detectamos si hay voz en el frame
-        is_speech = vad.is_speech(frame, sample_rate)
-        # Actualizamos los indicadores de estado
-        if is_speech and not vad_active:
-            vad_active = True
-            speech_detected = True
-            update_vad_status("️ Detección de voz iniciada")
-        elif not is_speech and vad_active:
-            vad_active = False
-            update_vad_status("⏹️ Detección de voz finalizada")
-        # Si se ha detectado voz y hay un silencio, transcribimos la frase
-        if speech_detected and not is_speech:
-            # Transcribimos la frase
-            with sr.AudioData(frame, sample_rate) as source:
-                audio = recognizer.record(source)
-                try:
-                    text = recognizer.recognize_google(audio)
-                    phrase += f" {text}"
-                    st.text(f"️ {text}")
-                except sr.RequestError:
-                    st.error("⚠️ Error al transcribir la frase - RequestError")
-                except sr.UnknownValueError:
-                    st.error("⚠️ No se ha reconocido la frase - UnknownValueError")
-            # Reiniciamos el indicador de frase
-            speech_detected = False
     # Imprimimos la frase completa
     st.success(f"Transcripción completa: {phrase}")
 # Streamlit UI
-st.title("VAD and Speech Recognition App")
-uploaded_file = st.file_uploader("Choose an audio file", type=["wav"])
-if uploaded_file:
-    process_audio_file(uploaded_file.name)

 import tempfile
 import webrtcvad
 import speech_recognition as sr
+import numpy as np
 import streamlit as st
+import sounddevice as sd
 from traceback import format_exc
 def update_vad_status(status):
     vad_status.text(status)
+def process_microphone_input():
     # Configuramos la tasa de muestreo y el tamaño del frame
     sample_rate = 16000
     frame_size = 30
+    chunk_size = 1024  # Adjust as needed for responsiveness
     # Creamos un objeto VAD y un reconocedor de voz
     vad = webrtcvad.Vad()
     phrase = ""
     try:
+        # Configuramos la captura de audio desde el micrófono
+        with sd.InputStream(callback=callback, channels=1, dtype=np.int16):
+            st.warning("Habla y observa los cambios en tiempo real...")
+            # Mantenemos la aplicación en ejecución
+            st.experimental_rerun()
+    except Exception as e:
+        st.error(f"Error during microphone input: {e}")
         st.error("Traceback:")
         st.error(format_exc())
+def callback(indata, frames, time, status):
+    if status:
+        print(f"Error in callback: {status}")
         return
+    # Procesamos el audio en chunks
+    for i in range(0, len(indata), chunk_size):
+        chunk = indata[i:i + chunk_size]
+        # Procesamos cada chunk en frames
+        for j in range(0, len(chunk), frame_size):
+            # Obtenemos el frame actual
+            frame = chunk[j:j + frame_size]
+            # Detectamos si hay voz en el frame
+            is_speech = vad.is_speech(frame, sample_rate)
+            # Actualizamos los indicadores de estado
+            if is_speech and not vad_active:
+                vad_active = True
+                speech_detected = True
+                update_vad_status("️ Detección de voz iniciada")
+            elif not is_speech and vad_active:
+                vad_active = False
+                update_vad_status("⏹️ Detección de voz finalizada")
+            # Si se ha detectado voz y hay un silencio, transcribimos la frase
+            if speech_detected and not is_speech:
+                # Transcribimos la frase
+                with sr.AudioData(frame.tobytes(), sample_rate) as source:
+                    audio = recognizer.record(source)
+                    try:
+                        text = recognizer.recognize_google(audio)
+                        phrase += f" {text}"
+                        st.text(f"️ {text}")
+                    except sr.RequestError:
+                        st.error("⚠️ Error al transcribir la frase - RequestError")
+                    except sr.UnknownValueError:
+                        st.error("⚠️ No se ha reconocido la frase - UnknownValueError")
+                # Reiniciamos el indicador de frase
+                speech_detected = False
     # Imprimimos la frase completa
     st.success(f"Transcripción completa: {phrase}")
 # Streamlit UI
+st.title("VAD and Speech Recognition App (Microphone Input)")
+vad_status = st.empty()
+process_microphone_input()