salomonsky commited on
Commit
3625f99
verified
1 Parent(s): 2f24d83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -25
app.py CHANGED
@@ -3,16 +3,12 @@ import base64
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
- from pydub import AudioSegment
7
- from pydub.playback import play
8
  from audiorecorder import audiorecorder
9
  import speech_recognition as sr
10
 
11
- pre_prompt = "Te Llamaras Chaman 4.0, tus respuestas ser谩n lo m谩s breves posibles."
12
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
13
- pre_prompt_sent = False
14
-
15
  def recognize_speech(audio_data):
 
 
16
  recognizer = sr.Recognizer()
17
  audio_recording = sr.AudioFile(audio_data)
18
 
@@ -20,7 +16,7 @@ def recognize_speech(audio_data):
20
  audio = recognizer.record(source)
21
 
22
  audio_text = recognizer.recognize_google(audio, language="es-ES")
23
-
24
  return audio_text
25
 
26
  def format_prompt(message, history):
@@ -34,6 +30,8 @@ def format_prompt(message, history):
34
  return prompt
35
 
36
  def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
 
 
37
  temperature = float(temperature) if temperature is not None else 0.9
38
  if temperature < 1e-2:
39
  temperature = 1e-2
@@ -64,33 +62,34 @@ def text_to_speech(text, speed=1.3):
64
  audio_fp = io.BytesIO()
65
  tts.write_to_fp(audio_fp)
66
  audio_fp.seek(0)
67
-
68
- return audio.export(format="mp3", codec="mp3")
69
 
70
  def main():
71
- audio_data = audiorecorder("Habla para iniciar", "Procesando...")
72
-
73
  if "history" not in st.session_state:
74
  st.session_state.history = []
75
 
 
 
76
  if not audio_data.empty():
77
  st.audio(audio_data.export().read(), format="audio/wav")
78
  audio_data.export("audio.wav", format="wav")
 
 
79
  audio_text = recognize_speech("audio.wav")
80
-
81
- if audio_text:
82
- output, audio_file = generate(audio_text, history=st.session_state.history)
83
-
84
- if audio_file is not None:
85
- st.markdown(
86
- f"""
87
- <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
88
- """,
89
- unsafe_allow_html=True
90
- )
91
-
92
- if audio_text:
93
- st.session_state.history.append((audio_text, output))
94
 
95
  if __name__ == "__main__":
96
  main()
 
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
 
 
6
  from audiorecorder import audiorecorder
7
  import speech_recognition as sr
8
 
 
 
 
 
9
  def recognize_speech(audio_data):
10
+ st.info("Reconociendo audio...")
11
+
12
  recognizer = sr.Recognizer()
13
  audio_recording = sr.AudioFile(audio_data)
14
 
 
16
  audio = recognizer.record(source)
17
 
18
  audio_text = recognizer.recognize_google(audio, language="es-ES")
19
+
20
  return audio_text
21
 
22
  def format_prompt(message, history):
 
30
  return prompt
31
 
32
  def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
33
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
34
+
35
  temperature = float(temperature) if temperature is not None else 0.9
36
  if temperature < 1e-2:
37
  temperature = 1e-2
 
62
  audio_fp = io.BytesIO()
63
  tts.write_to_fp(audio_fp)
64
  audio_fp.seek(0)
65
+ return audio_fp
 
66
 
67
  def main():
68
+ st.description("voz a texto + chatbot + tts audio")
69
+
70
  if "history" not in st.session_state:
71
  st.session_state.history = []
72
 
73
+ audio_data = audiorecorder("Habla para grabar", "Deteniendo la grabaci贸n...")
74
+
75
  if not audio_data.empty():
76
  st.audio(audio_data.export().read(), format="audio/wav")
77
  audio_data.export("audio.wav", format="wav")
78
+ st.write(f"Frame rate: {audio_data.frame_rate}, Frame width: {audio_data.frame_width}, Duration: {audio_data.duration_seconds} seconds")
79
+
80
  audio_text = recognize_speech("audio.wav")
81
+ output, audio_file = generate(audio_text, history=st.session_state.history)
82
+
83
+ if audio_text:
84
+ st.session_state.history.append((audio_text, output))
85
+
86
+ if audio_file is not None:
87
+ st.markdown(
88
+ f"""
89
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
90
+ """,
91
+ unsafe_allow_html=True
92
+ )
 
 
93
 
94
  if __name__ == "__main__":
95
  main()