xaman4

Sleeping

salomonsky commited on Apr 3, 2024

Commit

6cad3ff

verified ·

1 Parent(s): 27ba7ab

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -84,9 +84,17 @@ def text_to_speech(text):
     audio_fp.seek(0)
     return audio_fp
 def def_main():
     st.write("Di la palabra XAMAN para empezar o DETENTE para procesar")
     recording = st_mic_recorder(recording_container=st.empty(), auto_recording=True)
     if recording:
@@ -95,13 +103,16 @@ def def_main():
         audio_file = io.BytesIO(audio_data)
         st.audio(audio_file, format="audio/wav")
-        audio_text = recognize_speech(audio_file)
-        if audio_text:
-            output, audio_file = generate(audio_text, history=st.session_state.history)
-            if audio_file is not None:
-                st.markdown(
-                    f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
-                    unsafe_allow_html=True)
 if __name__ == "__main__":
-    def_main()

     audio_fp.seek(0)
     return audio_fp
+def detect_voice_activity(audio_data):
+    audio = pydub.AudioSegment.from_file(io.BytesIO(audio_data))
+    audio = audio.set_channels(1)  # Convertir a mono
+    signal_energy = np.sum(np.abs(audio.get_array_of_samples()))
+    threshold_energy = 5000  # Ajustar según sea necesario
+    is_voice_active = signal_energy > threshold_energy
+    return is_voice_active
 def def_main():
     st.write("Di la palabra XAMAN para empezar o DETENTE para procesar")
     recording = st_mic_recorder(recording_container=st.empty(), auto_recording=True)
     if recording:
         audio_file = io.BytesIO(audio_data)
         st.audio(audio_file, format="audio/wav")
+        is_voice_active = detect_voice_activity(audio_data)
+        if is_voice_active:
+            audio_text = recognize_speech(audio_file)
+            if audio_text:
+                output, audio_file = generate(audio_text, history=st.session_state.history)
+                if audio_file is not None:
+                    st.markdown(
+                        f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
+                        unsafe_allow_html=True)
 if __name__ == "__main__":
+    def_main()