salomonsky commited on
Commit
3db068f
·
verified ·
1 Parent(s): 73bfb55

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -47
app.py CHANGED
@@ -3,7 +3,7 @@ import base64
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
- from audiorecorder import audiorecorder
7
  import speech_recognition as sr
8
 
9
  pre_prompt_text = ""
@@ -83,62 +83,55 @@ def text_to_speech(text):
83
  audio_fp.seek(0)
84
  return audio_fp
85
 
86
- def main():
87
- audio_data = audiorecorder("Push to Talk", "Stop Recording...")
88
-
89
- if not audio_data.empty():
90
- st.audio(audio_data.export().read(), format="audio/wav")
91
- audio_data.export("audio.wav", format="wav")
92
- audio_text = recognize_speech("audio.wav")
93
 
94
- if audio_text:
95
- output, audio_file = generate(audio_text, history=st.session_state.history)
96
 
97
- if audio_file is not None:
98
- st.markdown(
99
- f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
100
- unsafe_allow_html=True)
 
101
 
102
- def main():
103
- audio_data = audiorecorder("Push to Talk", "Stop Recording...")
 
 
 
 
 
104
 
105
- if not audio_data.empty():
106
- st.audio(audio_data.export().read(), format="audio/wav")
107
- audio_data.export("audio.wav", format="wav")
108
- audio_text = recognize_speech("audio.wav")
109
 
110
- if audio_text:
111
- output, audio_file = generate(audio_text, history=st.session_state.history)
112
 
113
- if audio_file is not None:
114
- st.markdown(
115
- f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
116
- unsafe_allow_html=True)
117
 
118
- def main():
119
- st.write("Dí la palabra XAMAN para empezar o DETENTE para procesar")
120
 
121
- st.components.v1.html(
122
- """
123
- <script>
124
- var recognizer = new webkitSpeechRecognition();
125
- recognizer.continuous = true;
126
- recognizer.lang = 'es-ES';
127
 
128
- recognizer.onresult = function(event) {
129
- var command = event.results[event.results.length-1][0].transcript;
130
- console.log('Command:', command);
131
- Streamlit.setComponentValue(command.toLowerCase());
132
- };
133
 
134
- recognizer.start();
 
 
135
 
136
- window.onbeforeunload = function() {
137
- recognizer.stop();
138
- };
139
- </script>
140
- """
141
- )
142
 
143
  if __name__ == "__main__":
144
- main()
 
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
+ from pydub import AudioSegment
7
  import speech_recognition as sr
8
 
9
  pre_prompt_text = ""
 
83
  audio_fp.seek(0)
84
  return audio_fp
85
 
86
+ def def_main():
87
+ st.write(" la palabra XAMAN para empezar o DETENTE para procesar")
 
 
 
 
 
88
 
89
+ recognizer = sr.Recognizer()
 
90
 
91
+ with sr.Microphone() as source:
92
+ while True:
93
+ st.write("Listening...")
94
+ recognizer.adjust_for_ambient_noise(source)
95
+ audio = recognizer.listen(source)
96
 
97
+ try:
98
+ command = recognizer.recognize_google(audio, language="es-ES")
99
+ st.write(f"Command: {command}")
100
+
101
+ if command.lower() == "xaman":
102
+ st.write("Voice capture activated. Say 'Detente', 'Alto', or 'Basta' to stop.")
103
+ audio_data = audiorecorder("Push to Talk", "Stop Recording...")
104
 
105
+ if not audio_data.empty():
106
+ st.audio(audio_data.export().read(), format="audio/wav")
107
+ audio_data.export("audio.wav", format="wav")
 
108
 
109
+ audio_segment = AudioSegment.from_wav("audio.wav")
110
+ non_silent_segments = pydub.effects.split_on_silence(audio_segment, min_silence_len=500, silence_thresh=-40)
111
 
112
+ combined_audio = AudioSegment.empty()
113
+ for segment in non_silent_segments:
114
+ combined_audio += segment
 
115
 
116
+ combined_audio.export("combined_audio.wav", format="wav")
117
+ audio_text = recognize_speech("combined_audio.wav")
118
 
119
+ if audio_text:
120
+ output, audio_file = generate(audio_text, history=st.session_state.history)
 
 
 
 
121
 
122
+ if audio_file is not None:
123
+ st.markdown(
124
+ f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
125
+ unsafe_allow_html=True)
 
126
 
127
+ elif command.lower() in ["detente", "alto", "basta"]:
128
+ st.write("Voice capture stopped.")
129
+ break
130
 
131
+ except sr.UnknownValueError:
132
+ st.write("Could not understand audio")
133
+ except sr.RequestError as e:
134
+ st.write(f"Error: {e}")
 
 
135
 
136
  if __name__ == "__main__":
137
+ def_main()