salomonsky commited on
Commit
08bb6b5
·
verified ·
1 Parent(s): e4bb5ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -8
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import io
2
  import base64
3
- import soundfile as sf
4
  from gtts import gTTS
5
  import streamlit as st
6
  import speech_recognition as sr
@@ -15,13 +14,11 @@ if "pre_prompt_sent" not in st.session_state:
15
 
16
  pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
17
 
18
- def recognize_speech(audio_data, sample_rate, show_messages=True):
19
  recognizer = sr.Recognizer()
20
 
21
  try:
22
- # Ajustamos la tasa de muestreo del audio
23
- adjusted_audio_data = sf.resample(audio_data, sample_rate, 16000, subtype='PCM_16')
24
- audio_text = recognizer.recognize_google(adjusted_audio_data, language="es-ES")
25
  if show_messages:
26
  st.subheader("Texto Reconocido:")
27
  st.write(audio_text)
@@ -98,14 +95,13 @@ def main():
98
  if not st.session_state.pre_prompt_sent:
99
  st.session_state.pre_prompt_sent = True
100
 
101
- audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder')
102
 
103
  if audio:
104
  st.audio(audio['bytes'], format="audio/wav")
105
  audio_bytes = audio["bytes"]
106
- sample_rate = audio["sample_rate"]
107
 
108
- audio_text = recognize_speech(audio_bytes, sample_rate)
109
 
110
  if audio_text:
111
  output, audio_file = generate(audio_text, history=st.session_state.history)
 
1
  import io
2
  import base64
 
3
  from gtts import gTTS
4
  import streamlit as st
5
  import speech_recognition as sr
 
14
 
15
  pre_prompt_text = "eres una IA conductual, tus respuestas serán breves."
16
 
17
+ def recognize_speech(audio_data, show_messages=True):
18
  recognizer = sr.Recognizer()
19
 
20
  try:
21
+ audio_text = recognizer.recognize_google(audio_data, language="es-ES")
 
 
22
  if show_messages:
23
  st.subheader("Texto Reconocido:")
24
  st.write(audio_text)
 
95
  if not st.session_state.pre_prompt_sent:
96
  st.session_state.pre_prompt_sent = True
97
 
98
+ audio = mic_recorder(start_prompt="▶️", stop_prompt="🛑", key='recorder', sample_rate=16000)
99
 
100
  if audio:
101
  st.audio(audio['bytes'], format="audio/wav")
102
  audio_bytes = audio["bytes"]
 
103
 
104
+ audio_text = recognize_speech(audio_bytes)
105
 
106
  if audio_text:
107
  output, audio_file = generate(audio_text, history=st.session_state.history)