salomonsky commited on
Commit
9ab5c4d
verified
1 Parent(s): 7ce7fe0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -0
app.py CHANGED
@@ -6,6 +6,7 @@ import base64
6
  import io
7
  from huggingface_hub import InferenceClient
8
  from gtts import gTTS
 
9
 
10
  st.title("Chatbot de Voz a Voz")
11
 
@@ -22,6 +23,7 @@ channels = 1
22
  seconds_per_frame = frames_per_buffer / audio_rate
23
  vad_threshold = 0.5
24
 
 
25
  def callback(data):
26
  try:
27
  audio_array = np.frombuffer(data, dtype=np.int16)
@@ -35,6 +37,14 @@ def callback(data):
35
  except Exception as e:
36
  st.error(f"Error durante la captura de audio: {e}")
37
 
 
 
 
 
 
 
 
 
38
  def format_prompt(message, history):
39
  prompt = "<s>"
40
 
@@ -45,6 +55,7 @@ def format_prompt(message, history):
45
  prompt += f"[INST] {message} [/INST]"
46
  return prompt
47
 
 
48
  def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
49
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
50
 
@@ -73,6 +84,7 @@ def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.
73
  audio_file = text_to_speech(response, speed=1.3)
74
  return response, audio_file
75
 
 
76
  def text_to_speech(text, speed=1.3):
77
  tts = gTTS(text=text, lang='es')
78
  audio_fp = io.BytesIO()
@@ -85,6 +97,7 @@ def text_to_speech(text, speed=1.3):
85
  modified_audio_fp.seek(0)
86
  return modified_audio_fp
87
 
 
88
  def save_audio_buffer():
89
  if buffer:
90
  audio_array = np.concatenate(buffer)
@@ -96,9 +109,14 @@ def save_audio_buffer():
96
  )
97
 
98
  st.audio(audio_array, format="audio/wav", channels=channels)
 
 
 
 
99
 
100
  buffer.clear()
101
 
 
102
  def main():
103
  st.title("Chatbot de Voz a Voz")
104
 
 
6
  import io
7
  from huggingface_hub import InferenceClient
8
  from gtts import gTTS
9
+ import speech_recognition as sr
10
 
11
  st.title("Chatbot de Voz a Voz")
12
 
 
23
  seconds_per_frame = frames_per_buffer / audio_rate
24
  vad_threshold = 0.5
25
 
26
+ #abrir microfono
27
  def callback(data):
28
  try:
29
  audio_array = np.frombuffer(data, dtype=np.int16)
 
37
  except Exception as e:
38
  st.error(f"Error durante la captura de audio: {e}")
39
 
40
+ # voz a texto
41
+ def transcribe_audio(audio_data):
42
+ recognizer = sr.Recognizer()
43
+ audio_chunk = sr.AudioData(audio_data, sample_rate=audio_rate, sample_width=2) # 16-bit PCM audio
44
+ text = recognizer.recognize_google(audio_chunk, language="es-ES")
45
+ return text
46
+
47
+ # entrada al modelo de lenguaje
48
  def format_prompt(message, history):
49
  prompt = "<s>"
50
 
 
55
  prompt += f"[INST] {message} [/INST]"
56
  return prompt
57
 
58
+ #generaci贸n de respuesta
59
  def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
60
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
61
 
 
84
  audio_file = text_to_speech(response, speed=1.3)
85
  return response, audio_file
86
 
87
+ #respuesta texto a voz
88
  def text_to_speech(text, speed=1.3):
89
  tts = gTTS(text=text, lang='es')
90
  audio_fp = io.BytesIO()
 
97
  modified_audio_fp.seek(0)
98
  return modified_audio_fp
99
 
100
+ #captura de audio
101
  def save_audio_buffer():
102
  if buffer:
103
  audio_array = np.concatenate(buffer)
 
109
  )
110
 
111
  st.audio(audio_array, format="audio/wav", channels=channels)
112
+ transcribed_text = transcribe_audio(audio_array.tobytes())
113
+ st.subheader("Texto Transcrito:")
114
+ st.write(transcribed_text)
115
+ output, audio_file = generate(transcribed_text, history=st.session_state.history)
116
 
117
  buffer.clear()
118
 
119
+ #interfaz de usuario
120
  def main():
121
  st.title("Chatbot de Voz a Voz")
122