xaman4

Sleeping

App Files Files Community

salomonsky commited on Feb 22, 2024

Commit

2a43b85

verified ·

1 Parent(s): 47759f3

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -53

app.py CHANGED Viewed

@@ -1,66 +1,77 @@
 import tempfile
 import webrtcvad
 import speech_recognition as sr
-# Configuramos la tasa de muestreo y el tamaño del frame
-sample_rate = 16000
-frame_size = 30
-# Creamos un objeto VAD y un reconocedor de voz
-vad = webrtcvad.Vad()
-recognizer = sr.Recognizer()
-# 1. Load the audio data from the original file:
-with open("audio.wav", "rb") as f:
-    audio_data = f.read()
-# 2. Use a temporary file to process the audio data:
-with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as f:
-    f.write(audio_data)
-    f.flush()
-    # Procesamos el archivo temporal
-    with open(f.name, "rb") as f:
-        audio_data = f.read()
-# Indicadores de estado
-vad_active = False
-speech_detected = False
-phrase = ""
-# Procesamos el audio en frames
-for i in range(0, len(audio_data), frame_size):
-    # Obtenemos el frame actual
-    frame = audio_data[i:i+frame_size]
-    # Detectamos si hay voz en el frame
-    is_speech = vad.is_speech(frame, sample_rate)
-    # Actualizamos los indicadores de estado
-    if is_speech and not vad_active:
-        vad_active = True
-        speech_detected = True
-        print("️ Detección de voz iniciada")
-    elif not is_speech and vad_active:
-        vad_active = False
-        print("⏹️ Detección de voz finalizada")
-    # Si se ha detectado voz y hay un silencio, transcribimos la frase
-    if speech_detected and not is_speech:
-        # Transcribimos la frase
-        with sr.AudioData(frame, sample_rate) as source:
-            audio = recognizer.record(source)
-            try:
-                text = recognizer.recognize_google(audio)
-                phrase += f" {text}"
-                print(f"️ {text}")
-            except sr.RequestError:
-                print("⚠️ Error al transcribir la frase")
-            except sr.UnknownValueError:
-                print("⚠️ No se ha reconocido la frase")
-        # Reiniciamos el indicador de frase
-        speech_detected = False
-# Imprimimos la frase completa
-print(f"Transcripción completa: {phrase}")

 import tempfile
 import webrtcvad
 import speech_recognition as sr
+import os
+def process_audio_file(audio_file_path):
+    # Configuramos la tasa de muestreo y el tamaño del frame
+    sample_rate = 16000
+    frame_size = 30
+    # Creamos un objeto VAD y un reconocedor de voz
+    vad = webrtcvad.Vad()
+    recognizer = sr.Recognizer()
+    # Indicadores de estado
+    vad_active = False
+    speech_detected = False
+    phrase = ""
+    try:
+        # 1. Load the audio data from the original file:
+        with open(audio_file_path, "rb") as f:
+            audio_data = f.read()
+    except FileNotFoundError:
+        print(f"Error: File not found - {audio_file_path}")
+        return
+    # 2. Use a temporary file to process the audio data:
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=True) as temp_file:
+        temp_file.write(audio_data)
+        temp_file.flush()
+        # Procesamos el archivo temporal
+        with open(temp_file.name, "rb") as f:
+            audio_data = f.read()
+    # Procesamos el audio en frames
+    for i in range(0, len(audio_data), frame_size):
+        # Obtenemos el frame actual
+        frame = audio_data[i:i + frame_size]
+        # Detectamos si hay voz en el frame
+        is_speech = vad.is_speech(frame, sample_rate)
+        # Actualizamos los indicadores de estado
+        if is_speech and not vad_active:
+            vad_active = True
+            speech_detected = True
+            print("️ Detección de voz iniciada")
+        elif not is_speech and vad_active:
+            vad_active = False
+            print("⏹️ Detección de voz finalizada")
+        # Si se ha detectado voz y hay un silencio, transcribimos la frase
+        if speech_detected and not is_speech:
+            # Transcribimos la frase
+            with sr.AudioData(frame, sample_rate) as source:
+                audio = recognizer.record(source)
+                try:
+                    text = recognizer.recognize_google(audio)
+                    phrase += f" {text}"
+                    print(f"️ {text}")
+                except sr.RequestError:
+                    print("⚠️ Error al transcribir la frase")
+                except sr.UnknownValueError:
+                    print("⚠️ No se ha reconocido la frase")
+            # Reiniciamos el indicador de frase
+            speech_detected = False
+    # Imprimimos la frase completa
+    print(f"Transcripción completa: {phrase}")
+# Example usage:
+audio_file_path = os.path.join(os.getcwd(), "audio.wav")  # Replace "audio.wav" with your actual file name
+process_audio_file(audio_file_path)