salomonsky commited on
Commit
09559ae
verified
1 Parent(s): 35a3d64

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -45
app.py CHANGED
@@ -2,16 +2,18 @@ import streamlit as st
2
  from huggingface_hub import InferenceClient
3
  from gtts import gTTS
4
  import base64
5
- import speech_recognition as sr
6
- from pydub import AudioSegment
7
- from pydub.playback import play
8
- from io import BytesIO
9
- from time import sleep
10
 
 
 
 
 
11
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 
 
12
  system_prompt = "Tu nombre es Chaman 3.0 una IA conductual"
13
  system_prompt_sent = False
14
 
 
15
  def format_prompt(message, history):
16
  global system_prompt_sent
17
  prompt = "<s>"
@@ -28,13 +30,15 @@ def format_prompt(message, history):
28
  prompt += f"[INST] {message} [/INST]"
29
  return prompt
30
 
 
31
  def text_to_speech(text, speed=2.0):
32
  tts = gTTS(text=text, lang='es')
33
- audio_file_path = BytesIO()
34
- tts.write_to_fp(audio_file_path)
35
  return audio_file_path
36
 
37
- def generate_with_progress(
 
38
  user_input, history, temperature=None, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
39
  ):
40
  global system_prompt_sent
@@ -56,49 +60,26 @@ def generate_with_progress(
56
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
57
 
58
  response = ""
59
- total_tokens = 0
60
  for response_token in stream:
61
  response += response_token.token.text
62
- total_tokens += 1
63
-
64
- st.subheader("Generando respuesta...")
65
- st.progress(total_tokens / max_new_tokens)
66
-
67
  response = ' '.join(response.split()).replace('</s>', '')
68
 
 
 
 
 
 
 
 
 
 
69
  return response
70
 
 
71
  if "history" not in st.session_state:
72
  st.session_state.history = []
73
 
74
- recognizer = sr.Recognizer()
75
-
76
- while True:
77
- with st.spinner("Escuchando..."):
78
- try:
79
- with sr.Microphone() as source:
80
- audio_data = recognizer.listen(source, timeout=5)
81
- st.success("Audio capturado con 茅xito.")
82
-
83
- text = recognizer.recognize_google(audio_data, language="es-ES")
84
- st.success(f"Texto reconocido: {text}")
85
-
86
- st.subheader("Generando respuesta...")
87
- st.progress(0.0)
88
- output = generate_with_progress(text, history=st.session_state.history)
89
- st.session_state.history.append((text, output))
90
- st.success("Respuesta generada con 茅xito.")
91
-
92
- st.subheader("Reproduciendo respuesta...")
93
- audio_file_path = text_to_speech(output)
94
- play(audio_file_path)
95
-
96
- for progress_value in range(0, 101, 10):
97
- st.progress(progress_value / 100)
98
- sleep(0.5)
99
-
100
- except sr.UnknownValueError:
101
- st.warning("No se pudo reconocer el habla.")
102
- except sr.RequestError as e:
103
- st.error(f"Error en la solicitud al servicio de reconocimiento de voz: {e}")
104
- break
 
2
  from huggingface_hub import InferenceClient
3
  from gtts import gTTS
4
  import base64
 
 
 
 
 
5
 
6
+ # Interfaz de usuario con Streamlit
7
+ user_input = st.text_area(label="Usuario", value="Escribe aqu铆 tu mensaje", height=30)
8
+
9
+ # Inicializar el cliente de inferencia
10
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
11
+
12
+ # Definir el prompt del sistema
13
  system_prompt = "Tu nombre es Chaman 3.0 una IA conductual"
14
  system_prompt_sent = False
15
 
16
+ # Funci贸n para formatear el prompt
17
  def format_prompt(message, history):
18
  global system_prompt_sent
19
  prompt = "<s>"
 
30
  prompt += f"[INST] {message} [/INST]"
31
  return prompt
32
 
33
+ # Funci贸n para convertir texto a audio
34
  def text_to_speech(text, speed=2.0):
35
  tts = gTTS(text=text, lang='es')
36
+ audio_file_path = 'output.mp3'
37
+ tts.save(audio_file_path)
38
  return audio_file_path
39
 
40
+ # Funci贸n para generar respuesta
41
+ def generate(
42
  user_input, history, temperature=None, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
43
  ):
44
  global system_prompt_sent
 
60
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
61
 
62
  response = ""
 
63
  for response_token in stream:
64
  response += response_token.token.text
65
+
 
 
 
 
66
  response = ' '.join(response.split()).replace('</s>', '')
67
 
68
+ # Convertir respuesta a audio y reproducirlo en Streamlit con autoplay HTML
69
+ audio_file_path = text_to_speech(response)
70
+ audio_file = open(audio_file_path, 'rb')
71
+ audio_bytes = audio_file.read()
72
+ st.markdown(
73
+ f'<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_bytes).decode()}" type="audio/mp3"></audio>',
74
+ unsafe_allow_html=True
75
+ )
76
+
77
  return response
78
 
79
+ # Inicializar historial si no existe
80
  if "history" not in st.session_state:
81
  st.session_state.history = []
82
 
83
+ # Generar respuesta y actualizar historial
84
+ output = generate(user_input, history=st.session_state.history)
85
+ st.session_state.history.append((user_input, output))