xaman4

Sleeping

App Files Files Community

salomonsky commited on Apr 2, 2024

Commit

3db068f

verified ·

1 Parent(s): 73bfb55

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -47

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import base64
 import io
 from huggingface_hub import InferenceClient
 from gtts import gTTS
-from audiorecorder import audiorecorder
 import speech_recognition as sr
 pre_prompt_text = ""
@@ -83,62 +83,55 @@ def text_to_speech(text):
     audio_fp.seek(0)
     return audio_fp
-def main():
-    audio_data = audiorecorder("Push to Talk", "Stop Recording...")
-    if not audio_data.empty():
-        st.audio(audio_data.export().read(), format="audio/wav")
-        audio_data.export("audio.wav", format="wav")
-        audio_text = recognize_speech("audio.wav")
-        if audio_text:
-            output, audio_file = generate(audio_text, history=st.session_state.history)
-            if audio_file is not None:
-                st.markdown(
-                    f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
-                    unsafe_allow_html=True)
-def main():
-    audio_data = audiorecorder("Push to Talk", "Stop Recording...")
-    if not audio_data.empty():
-        st.audio(audio_data.export().read(), format="audio/wav")
-        audio_data.export("audio.wav", format="wav")
-        audio_text = recognize_speech("audio.wav")
-        if audio_text:
-            output, audio_file = generate(audio_text, history=st.session_state.history)
-            if audio_file is not None:
-                st.markdown(
-                    f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
-                    unsafe_allow_html=True)
-def main():
-    st.write("Dí la palabra XAMAN para empezar o DETENTE para procesar")
-    st.components.v1.html(
-        """
-        <script>
-        var recognizer = new webkitSpeechRecognition();
-        recognizer.continuous = true;
-        recognizer.lang = 'es-ES';
-        recognizer.onresult = function(event) {
-            var command = event.results[event.results.length-1][0].transcript;
-            console.log('Command:', command);
-            Streamlit.setComponentValue(command.toLowerCase());
-        };
-        recognizer.start();
-        window.onbeforeunload = function() {
-            recognizer.stop();
-        };
-        </script>
-        """
-    )
 if __name__ == "__main__":
-    main()

 import io
 from huggingface_hub import InferenceClient
 from gtts import gTTS
+from pydub import AudioSegment
 import speech_recognition as sr
 pre_prompt_text = ""
     audio_fp.seek(0)
     return audio_fp
+def def_main():
+    st.write("Dí la palabra XAMAN para empezar o DETENTE para procesar")
+    recognizer = sr.Recognizer()
+    with sr.Microphone() as source:
+        while True:
+            st.write("Listening...")
+            recognizer.adjust_for_ambient_noise(source)
+            audio = recognizer.listen(source)
+            try:
+                command = recognizer.recognize_google(audio, language="es-ES")
+                st.write(f"Command: {command}")
+                if command.lower() == "xaman":
+                    st.write("Voice capture activated. Say 'Detente', 'Alto', or 'Basta' to stop.")
+                    audio_data = audiorecorder("Push to Talk", "Stop Recording...")
+                    if not audio_data.empty():
+                        st.audio(audio_data.export().read(), format="audio/wav")
+                        audio_data.export("audio.wav", format="wav")
+                        audio_segment = AudioSegment.from_wav("audio.wav")
+                        non_silent_segments = pydub.effects.split_on_silence(audio_segment, min_silence_len=500, silence_thresh=-40)
+                        combined_audio = AudioSegment.empty()
+                        for segment in non_silent_segments:
+                            combined_audio += segment
+                        combined_audio.export("combined_audio.wav", format="wav")
+                        audio_text = recognize_speech("combined_audio.wav")
+                        if audio_text:
+                            output, audio_file = generate(audio_text, history=st.session_state.history)
+                            if audio_file is not None:
+                                st.markdown(
+                                    f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
+                                    unsafe_allow_html=True)
+                elif command.lower() in ["detente", "alto", "basta"]:
+                    st.write("Voice capture stopped.")
+                    break
+            except sr.UnknownValueError:
+                st.write("Could not understand audio")
+            except sr.RequestError as e:
+                st.write(f"Error: {e}")
 if __name__ == "__main__":
+    def_main()