salomonsky commited on
Commit
d163c7a
verified
1 Parent(s): 5b6fd29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -42
app.py CHANGED
@@ -4,29 +4,18 @@ import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
  from audiorecorder import audiorecorder
7
- import streamlit_webrtc as webrtc
8
 
9
- def recognize_speech(audio_data, show_messages=True):
10
  recognizer = sr.Recognizer()
11
- audio_recording = sr.AudioFile(audio_data)
12
-
13
- with audio_recording as source:
14
- audio = recognizer.record(source)
15
-
16
- try:
17
- audio_text = recognizer.recognize_google(audio, language="es-ES")
18
- if show_messages:
19
- st.subheader("Texto Reconocido:")
20
- st.write(audio_text)
21
- st.success("Reconocimiento de voz completado.")
22
- except sr.UnknownValueError:
23
- st.warning("No se pudo reconocer el audio. 驴Intentaste grabar algo?")
24
- audio_text = ""
25
- except sr.RequestError:
26
- st.error("No he recibido ningun audio. Por favor, int茅ntalo de nuevo.")
27
- audio_text = ""
28
-
29
- return audio_text
30
 
31
  def format_prompt(message, history):
32
  prompt = "<s>"
@@ -81,14 +70,16 @@ def detect_vocal_activity(audio_data):
81
 
82
  return actividad_vocal
83
 
84
-
85
  def main():
86
  if "history" not in st.session_state:
87
  st.session_state.history = []
88
 
89
- if not audio_data.empty():
90
- st.audio(audio_data.export().read(), format="audio/wav")
91
- audio_data.export("audio.wav", format="wav")
 
 
 
92
  audio_text = recognize_speech("audio.wav")
93
 
94
  if not st.session_state.history:
@@ -97,23 +88,19 @@ def main():
97
  st.session_state.history.append((pre_prompt, output))
98
 
99
  if audio_text:
100
- actividad_vocal = detect_vocal_activity("audio.wav")
101
-
102
- if actividad_vocal.any():
103
- output, audio_file = generate(audio_text, history=st.session_state.history)
104
-
105
- if audio_text:
106
- st.session_state.history.append((audio_text, output))
107
-
108
- if audio_file is not None:
109
- st.markdown(
110
- f"""
111
- <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
112
- """,
113
- unsafe_allow_html=True
114
- )
115
- else:
116
- st.warning("No se detect贸 actividad vocal.")
117
 
118
  if __name__ == "__main__":
119
  main()
 
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
  from audiorecorder import audiorecorder
 
7
 
8
+ def record_audio(filename="audio.wav", duration=5):
9
  recognizer = sr.Recognizer()
10
+
11
+ with sr.Microphone() as source:
12
+ st.subheader("Habla para grabar...")
13
+ audio_data = recognizer.listen(source, timeout=duration)
14
+
15
+ st.subheader("Deteniendo la grabaci贸n...")
16
+
17
+ with open(filename, "wb") as f:
18
+ f.write(audio_data.get_wav_data())
 
 
 
 
 
 
 
 
 
 
19
 
20
  def format_prompt(message, history):
21
  prompt = "<s>"
 
70
 
71
  return actividad_vocal
72
 
 
73
  def main():
74
  if "history" not in st.session_state:
75
  st.session_state.history = []
76
 
77
+ start_stop_button = st.button("Iniciar/Detener Detecci贸n")
78
+
79
+ if start_stop_button:
80
+ record_audio("audio.wav")
81
+
82
+ st.audio("audio.wav", format="audio/wav")
83
  audio_text = recognize_speech("audio.wav")
84
 
85
  if not st.session_state.history:
 
88
  st.session_state.history.append((pre_prompt, output))
89
 
90
  if audio_text:
91
+ detect_vocal_activity("audio.wav")
92
+ output, audio_file = generate(audio_text, history=st.session_state.history)
93
+
94
+ if audio_text:
95
+ st.session_state.history.append((audio_text, output))
96
+
97
+ if audio_file is not None:
98
+ st.markdown(
99
+ f"""
100
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
101
+ """,
102
+ unsafe_allow_html=True
103
+ )
 
 
 
 
104
 
105
  if __name__ == "__main__":
106
  main()