salomonsky commited on
Commit
c91e886
verified
1 Parent(s): d6b9b98

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -61
app.py CHANGED
@@ -1,48 +1,41 @@
1
- import io
2
  import base64
 
 
3
  from gtts import gTTS
4
- import streamlit as st
5
  import speech_recognition as sr
6
- from huggingface_hub import InferenceClient
7
- from streamlit_mic_recorder import mic_recorder
8
- import wave
9
- import numpy as np
10
  import os
11
 
12
- pre_prompt_text = "eres una IA conductual, tus respuestas ser谩n breves."
13
- temp_audio_file_path = "./output.wav"
14
-
15
  if "history" not in st.session_state:
16
  st.session_state.history = []
17
 
18
- if "pre_prompt_sent" not in st.session_state:
19
- st.session_state.pre_prompt_sent = False
20
-
21
  def recognize_speech(audio_data, show_messages=True):
22
  recognizer = sr.Recognizer()
23
-
24
- with io.BytesIO(audio_data) as audio_file:
25
- try:
26
- audio_text = recognizer.recognize_google(audio_file, language="es-ES")
27
- if show_messages:
28
- st.subheader("Texto Reconocido:")
29
- st.write(audio_text)
30
- st.success("Reconocimiento de voz completado.")
31
- except sr.UnknownValueError:
32
- st.warning("No se pudo reconocer el audio. 驴Intentaste grabar algo?")
33
- audio_text = ""
34
- except sr.RequestError:
35
- st.error("Hablame para comenzar!")
36
- audio_text = ""
 
 
 
37
 
38
  return audio_text
39
 
40
  def format_prompt(message, history):
41
  prompt = "<s>"
42
 
43
- if not st.session_state.pre_prompt_sent:
44
- prompt += f"[INST]{pre_prompt_text}[/INST]"
45
-
46
  for user_prompt, bot_response in history:
47
  prompt += f"[INST] {user_prompt} [/INST]"
48
  prompt += f" {bot_response}</s> "
@@ -64,7 +57,8 @@ def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.
64
  top_p=top_p,
65
  repetition_penalty=repetition_penalty,
66
  do_sample=True,
67
- seed=42,)
 
68
 
69
  formatted_prompt = format_prompt(audio_text, history)
70
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
@@ -82,39 +76,36 @@ def text_to_speech(text, speed=1.3):
82
  audio_fp = io.BytesIO()
83
  tts.write_to_fp(audio_fp)
84
  audio_fp.seek(0)
85
- return audio_fp
86
-
87
- def audio_play(audio_fp):
88
- st.audio(audio_fp.read(), format="audio/mp3", start_time=0)
89
-
90
- def display_recognition_result(audio_text, output, audio_file):
91
- if audio_text:
92
- st.session_state.history.append((audio_text, output))
93
-
94
- if audio_file is not None:
95
- st.markdown(
96
- f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
97
- unsafe_allow_html=True)
98
 
99
  def main():
100
- if not st.session_state.pre_prompt_sent:
101
- st.session_state.pre_prompt_sent = True
102
-
103
- audio = mic_recorder(start_prompt="鈻讹笍", stop_prompt="馃洃", key='recorder')
104
-
105
- if audio:
106
- st.audio(audio['bytes'])
107
-
108
- audio_bytes = audio["bytes"]
109
- sample_width = audio["sample_width"] # 2 bytes per sample for 16-bit PCM
110
- sample_rate = audio["sample_rate"] # 44.1 kHz sample rate
111
- num_channels = 1 # 1 channel for mono, 2 for stereo
112
-
113
- with wave.open(temp_audio_file_path, 'w') as wave_file:
114
- wave_file.setnchannels(num_channels)
115
- wave_file.setsampwidth(sample_width)
116
- wave_file.setframerate(sample_rate)
117
- wave_file.writeframes(audio_bytes)
 
 
 
 
118
 
119
  if __name__ == "__main__":
120
  main()
 
1
+ import streamlit as st
2
  import base64
3
+ import io
4
+ from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
+ from audiorecorder import audiorecorder
7
  import speech_recognition as sr
8
+ from pydub import AudioSegment
 
 
 
9
  import os
10
 
 
 
 
11
  if "history" not in st.session_state:
12
  st.session_state.history = []
13
 
 
 
 
14
  def recognize_speech(audio_data, show_messages=True):
15
  recognizer = sr.Recognizer()
16
+ audio_recording = sr.AudioFile(audio_data)
17
+
18
+ with audio_recording as source:
19
+ audio = recognizer.record(source)
20
+
21
+ try:
22
+ audio_text = recognizer.recognize_google(audio, language="es-ES")
23
+ if show_messages:
24
+ st.subheader("Texto Reconocido:")
25
+ st.write(audio_text)
26
+ st.success("Reconocimiento de voz completado.")
27
+ except sr.UnknownValueError:
28
+ st.warning("No se pudo reconocer el audio. 驴Intentaste grabar algo?")
29
+ audio_text = ""
30
+ except sr.RequestError:
31
+ st.error("Hablame para comenzar!")
32
+ audio_text = ""
33
 
34
  return audio_text
35
 
36
  def format_prompt(message, history):
37
  prompt = "<s>"
38
 
 
 
 
39
  for user_prompt, bot_response in history:
40
  prompt += f"[INST] {user_prompt} [/INST]"
41
  prompt += f" {bot_response}</s> "
 
57
  top_p=top_p,
58
  repetition_penalty=repetition_penalty,
59
  do_sample=True,
60
+ seed=42,
61
+ )
62
 
63
  formatted_prompt = format_prompt(audio_text, history)
64
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
 
76
  audio_fp = io.BytesIO()
77
  tts.write_to_fp(audio_fp)
78
  audio_fp.seek(0)
79
+ audio = AudioSegment.from_file(audio_fp, format="mp3")
80
+ modified_speed_audio = audio.speedup(playback_speed=speed)
81
+ modified_audio_fp = io.BytesIO()
82
+ modified_speed_audio.export(modified_audio_fp, format="mp3")
83
+ modified_audio_fp.seek(0)
84
+ return modified_audio_fp
 
 
 
 
 
 
 
85
 
86
  def main():
87
+ st.title("Chatbot de Voz a Voz")
88
+ audio_text = ""
89
+ audio_data = audiorecorder("Habla para grabar", "Deteniendo la grabaci贸n...")
90
+
91
+ if not audio_data.empty():
92
+ st.audio(audio_data.export().read(), format="audio/wav")
93
+ audio_data.export("audio.wav", format="wav")
94
+ audio_text = recognize_speech("audio.wav")
95
+
96
+ if audio_text:
97
+ output, audio_file = generate(audio_text, history=st.session_state.history)
98
+
99
+ if audio_text:
100
+ st.session_state.history.append((audio_text, output))
101
+
102
+ if audio_file is not None:
103
+ st.markdown(
104
+ f"""
105
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
106
+ """,
107
+ unsafe_allow_html=True
108
+ )
109
 
110
  if __name__ == "__main__":
111
  main()