salomonsky commited on
Commit
b0b5cd6
verified
1 Parent(s): 5abd901

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -28
app.py CHANGED
@@ -2,18 +2,17 @@ import streamlit as st
2
  from huggingface_hub import InferenceClient
3
  from gtts import gTTS
4
  import base64
5
- import sounddevice as sd
6
  import speech_recognition as sr
 
 
 
 
 
7
 
8
-
9
- # Inicializar el cliente de inferencia
10
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
11
-
12
- # Definir el prompt del sistema
13
  system_prompt = "Tu nombre es Chaman 3.0 una IA conductual"
14
  system_prompt_sent = False
15
 
16
- # Funci贸n para formatear el prompt
17
  def format_prompt(message, history):
18
  global system_prompt_sent
19
  prompt = "<s>"
@@ -30,15 +29,13 @@ def format_prompt(message, history):
30
  prompt += f"[INST] {message} [/INST]"
31
  return prompt
32
 
33
- # Funci贸n para convertir texto a audio
34
  def text_to_speech(text, speed=2.0):
35
  tts = gTTS(text=text, lang='es')
36
- audio_file_path = 'output.mp3'
37
- tts.save(audio_file_path)
38
  return audio_file_path
39
 
40
- # Funci贸n para generar respuesta
41
- def generate(
42
  user_input, history, temperature=None, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
43
  ):
44
  global system_prompt_sent
@@ -60,19 +57,16 @@ def generate(
60
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
61
 
62
  response = ""
 
63
  for response_token in stream:
64
  response += response_token.token.text
65
-
66
- response = ' '.join(response.split()).replace('</s>', '')
67
 
68
- # Convertir respuesta a audio y reproducirlo en Streamlit con autoplay HTML
69
- audio_file_path = text_to_speech(response)
70
- audio_file = open(audio_file_path, 'rb')
71
- audio_bytes = audio_file.read()
72
- st.markdown(
73
- f'<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_bytes).decode()}" type="audio/mp3"></audio>',
74
- unsafe_allow_html=True
75
- )
76
 
77
  return response
78
 
@@ -89,18 +83,42 @@ if "history" not in st.session_state:
89
  if start_recording_button:
90
  st.info("Habla ahora...")
91
 
92
- # Grabar audio
93
- with sd.InputStream(callback=st.audio_recorder(callback=True), channels=1):
94
- audio_data = st.audio_recorder()
 
 
 
 
 
 
 
 
 
 
95
 
96
- # Convertir audio a texto
97
  recognizer = sr.Recognizer()
98
  try:
99
- text = recognizer.recognize_google(audio_data, language="es-ES")
 
 
 
100
  st.success(f"Texto reconocido: {text}")
101
- # Generar respuesta y actualizar historial
102
- output = generate(text, history=st.session_state.history)
 
 
103
  st.session_state.history.append((text, output))
 
 
 
 
 
 
 
 
 
 
104
  except sr.UnknownValueError:
105
  st.warning("No se pudo reconocer el habla.")
106
  except sr.RequestError as e:
 
2
  from huggingface_hub import InferenceClient
3
  from gtts import gTTS
4
  import base64
 
5
  import speech_recognition as sr
6
+ from pydub import AudioSegment
7
+ from pydub.playback import play
8
+ import pyaudio
9
+ from io import BytesIO
10
+ from time import sleep
11
 
 
 
12
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 
 
13
  system_prompt = "Tu nombre es Chaman 3.0 una IA conductual"
14
  system_prompt_sent = False
15
 
 
16
  def format_prompt(message, history):
17
  global system_prompt_sent
18
  prompt = "<s>"
 
29
  prompt += f"[INST] {message} [/INST]"
30
  return prompt
31
 
 
32
  def text_to_speech(text, speed=2.0):
33
  tts = gTTS(text=text, lang='es')
34
+ audio_file_path = BytesIO()
35
+ tts.write_to_fp(audio_file_path)
36
  return audio_file_path
37
 
38
+ def generate_with_progress(
 
39
  user_input, history, temperature=None, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
40
  ):
41
  global system_prompt_sent
 
57
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
58
 
59
  response = ""
60
+ total_tokens = 0
61
  for response_token in stream:
62
  response += response_token.token.text
63
+ total_tokens += 1
 
64
 
65
+ # Actualizar la barra de progreso
66
+ st.subheader("Generando respuesta...")
67
+ st.progress(total_tokens / max_new_tokens)
68
+
69
+ response = ' '.join(response.split()).replace('</s>', '')
 
 
 
70
 
71
  return response
72
 
 
83
  if start_recording_button:
84
  st.info("Habla ahora...")
85
 
86
+ audio_data = BytesIO()
87
+ p = pyaudio.PyAudio()
88
+ stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
89
+
90
+ with st.spinner("Grabando..."):
91
+ frames = []
92
+ for i in range(int(44100 / 1024 * 5)): # grabar durante 5 segundos
93
+ data = stream.read(1024)
94
+ frames.append(data)
95
+
96
+ stream.stop_stream()
97
+ stream.close()
98
+ p.terminate()
99
 
 
100
  recognizer = sr.Recognizer()
101
  try:
102
+ audio_data.write(b''.join(frames))
103
+ audio_data.seek(0)
104
+ audio = AudioSegment.from_file(audio_data, format="wav")
105
+ text = recognizer.recognize_google(audio, language="es-ES")
106
  st.success(f"Texto reconocido: {text}")
107
+
108
+ generate_progress = st.empty()
109
+ generate_progress.progress(0.0)
110
+ output = generate_with_progress(text, history=st.session_state.history)
111
  st.session_state.history.append((text, output))
112
+ st.success("Respuesta generada con 茅xito.")
113
+
114
+ st.subheader("Reproduciendo respuesta...")
115
+ audio_file_path = text_to_speech(output)
116
+ play(audio_file_path)
117
+
118
+ for progress_value in range(0, 101, 10):
119
+ st.progress(progress_value / 100)
120
+ sleep(0.5)
121
+
122
  except sr.UnknownValueError:
123
  st.warning("No se pudo reconocer el habla.")
124
  except sr.RequestError as e: