salomonsky commited on
Commit
8c8c2b9
verified
1 Parent(s): b072c5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -37
app.py CHANGED
@@ -1,11 +1,13 @@
1
  from tempfile import NamedTemporaryFile
2
  import streamlit as st
3
- import base64
4
  import io
5
  from huggingface_hub import InferenceClient
6
  from gtts import gTTS
 
7
  import speech_recognition as sr
8
  from pydub import AudioSegment
 
 
9
 
10
  def recognize_speech(audio_data, show_messages=True):
11
  recognizer = sr.Recognizer()
@@ -53,27 +55,32 @@ def generate(audio_text, history, generation_params):
53
  audio_file = text_to_speech(response, speed=1.3)
54
  return response, audio_file
55
 
56
- def process_audio(frames, recognizer):
57
- audio_data = AudioSegment(
58
- data=b"".join(frames),
59
- sample_width=2,
60
- frame_rate=44100,
61
- channels=1
62
- )
63
-
64
- with NamedTemporaryFile(suffix=".wav") as temp_file:
65
- audio_data.export(temp_file.name, format="wav")
66
- audio_file = sr.AudioFile(temp_file.name)
67
-
68
- with audio_file as source:
69
- audio = recognizer.record(source)
70
- try:
71
- text = recognizer.recognize_google(audio, language="es-ES")
72
- st.text_input("Input", value=text)
73
- except sr.UnknownValueError:
74
- pass
75
- except sr.RequestError:
76
- pass
 
 
 
 
 
77
 
78
  def text_to_speech(text, speed=1.3):
79
  tts = gTTS(text=text, lang='es')
@@ -88,24 +95,26 @@ def text_to_speech(text, speed=1.3):
88
 
89
  def main():
90
  r = sr.Recognizer()
 
91
 
92
  mic_list = sr.Microphone.list_microphone_names()
93
  print("Dispositivos de micr贸fono encontrados:")
94
  for i, microphone_name in enumerate(mic_list):
95
  print(f"Dispositivo {i}: {microphone_name}")
96
 
97
- device_index = int(input("Seleccione el n煤mero del dispositivo de micr贸fono: "))
98
- with sr.Microphone(device_index=device_index) as source:
99
- print("Habla ahora...")
100
- while True:
101
- audio = r.listen(source)
102
- try:
103
- text = r.recognize_google(audio, language="es-ES")
104
- print("Texto reconocido:", text)
105
- except sr.UnknownValueError:
106
- print("No se pudo reconocer el audio.")
107
- except sr.RequestError:
108
- print("No se pudo conectar con el servicio de reconocimiento de voz.")
109
-
110
- if __name__ == "__main__":
111
- main()
 
 
1
  from tempfile import NamedTemporaryFile
2
  import streamlit as st
 
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
+ import numpy as np
7
  import speech_recognition as sr
8
  from pydub import AudioSegment
9
+ import webrtcvad
10
+ import soundfile as sf
11
 
12
  def recognize_speech(audio_data, show_messages=True):
13
  recognizer = sr.Recognizer()
 
55
  audio_file = text_to_speech(response, speed=1.3)
56
  return response, audio_file
57
 
58
+ def process_audio(frames, recognizer, vad):
59
+ audio_array = np.frombuffer(frames, dtype=np.int16)
60
+ is_speech = vad.is_speech(audio_array, sample_rate=44100)
61
+ speech_frames = audio_array[is_speech]
62
+
63
+ if len(speech_frames) > 0:
64
+ audio_data = AudioSegment(
65
+ data=speech_frames.tobytes(),
66
+ sample_width=2,
67
+ frame_rate=44100,
68
+ channels=1
69
+ )
70
+
71
+ with NamedTemporaryFile(suffix=".wav") as temp_file:
72
+ audio_data.export(temp_file.name, format="wav")
73
+ audio_file = sr.AudioFile(temp_file.name)
74
+
75
+ with audio_file as source:
76
+ audio = recognizer.record(source)
77
+ try:
78
+ text = recognizer.recognize_google(audio, language="es-ES")
79
+ st.text_input("Input", value=text)
80
+ except sr.UnknownValueError:
81
+ pass
82
+ except sr.RequestError:
83
+ pass
84
 
85
  def text_to_speech(text, speed=1.3):
86
  tts = gTTS(text=text, lang='es')
 
95
 
96
  def main():
97
  r = sr.Recognizer()
98
+ vad = webrtcvad.Vad()
99
 
100
  mic_list = sr.Microphone.list_microphone_names()
101
  print("Dispositivos de micr贸fono encontrados:")
102
  for i, microphone_name in enumerate(mic_list):
103
  print(f"Dispositivo {i}: {microphone_name}")
104
 
105
+ selected_device_index = None
106
+
107
+ for i in range(len(mic_list)):
108
+ try:
109
+ with sr.Microphone(device_index=i) as source:
110
+ vad.set_mode(3) # Establecer el modo de VAD
111
+ print(f"Probando con el dispositivo {i}...")
112
+ r.adjust_for_ambient_noise(source, duration=1)
113
+ selected_device_index = i
114
+ break
115
+ except sr.RequestError as e:
116
+ print(f"No se pudo conectar con el dispositivo {i}: {e}")
117
+ except sr.UnknownValueError:
118
+ pass
119
+
120
+ if selected_device_index is not