salomonsky commited on
Commit
8fb4803
verified
1 Parent(s): 0384131

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -19
app.py CHANGED
@@ -73,13 +73,20 @@ def text_to_speech(text, speed=1.3):
73
  audio_fp.seek(0)
74
  return audio_fp
75
 
 
 
 
 
 
 
 
 
 
76
  def main():
77
  if "history" not in st.session_state:
78
  st.session_state.history = []
79
 
80
- audio_data = webrtc_streamer(key="audio", audio_recorder=True)
81
-
82
- if audio_data is not None and not audio_data.empty():
83
  st.audio(audio_data.export().read(), format="audio/wav")
84
  audio_data.export("audio.wav", format="wav")
85
  audio_text = recognize_speech("audio.wav")
@@ -88,20 +95,22 @@ def main():
88
  pre_prompt = "Te Llamar谩s Chaman 4.0 y tus respuestas ser谩n sumamente breves."
89
  output, _ = generate(pre_prompt, history=st.session_state.history)
90
  st.session_state.history.append((pre_prompt, output))
91
-
92
- if audio_text:
93
- output, audio_file = generate(audio_text, history=st.session_state.history)
94
-
95
- if audio_text:
96
- st.session_state.history.append((audio_text, output))
97
 
98
- if audio_file is not None:
99
- st.markdown(
100
- f"""
101
- <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
102
- """,
103
- unsafe_allow_html=True
104
- )
105
-
106
- if __name__ == "__main__":
107
- main()
 
 
 
 
 
 
 
 
 
73
  audio_fp.seek(0)
74
  return audio_fp
75
 
76
+ def detect_vocal_activity(audio_data):
77
+ y, sr = librosa.load(audio_data, sr=None)
78
+ umbral_actividad_vocal = 0.01
79
+ amplitud_media = librosa.feature.rms(y=y)
80
+ actividad_vocal = amplitud_media > umbral_actividad_vocal
81
+
82
+ return actividad_vocal
83
+
84
+
85
  def main():
86
  if "history" not in st.session_state:
87
  st.session_state.history = []
88
 
89
+ if not audio_data.empty():
 
 
90
  st.audio(audio_data.export().read(), format="audio/wav")
91
  audio_data.export("audio.wav", format="wav")
92
  audio_text = recognize_speech("audio.wav")
 
95
  pre_prompt = "Te Llamar谩s Chaman 4.0 y tus respuestas ser谩n sumamente breves."
96
  output, _ = generate(pre_prompt, history=st.session_state.history)
97
  st.session_state.history.append((pre_prompt, output))
 
 
 
 
 
 
98
 
99
+ if audio_text:
100
+ actividad_vocal = detect_vocal_activity("audio.wav")
101
+
102
+ if actividad_vocal.any():
103
+ output, audio_file = generate(audio_text, history=st.session_state.history)
104
+
105
+ if audio_text:
106
+ st.session_state.history.append((audio_text, output))
107
+
108
+ if audio_file is not None:
109
+ st.markdown(
110
+ f"""
111
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
112
+ """,
113
+ unsafe_allow_html=True
114
+ )
115
+ else:
116
+ st.warning("No se detect贸 actividad vocal.")