xaman4

Sleeping

App Files Files Community

salomonsky commited on Jan 13, 2024

Commit

09559ae

verified ·

1 Parent(s): 35a3d64

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -45

app.py CHANGED Viewed

@@ -2,16 +2,18 @@ import streamlit as st
 from huggingface_hub import InferenceClient
 from gtts import gTTS
 import base64
-import speech_recognition as sr
-from pydub import AudioSegment
-from pydub.playback import play
-from io import BytesIO
-from time import sleep
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 system_prompt = "Tu nombre es Chaman 3.0 una IA conductual"
 system_prompt_sent = False
 def format_prompt(message, history):
     global system_prompt_sent
     prompt = "<s>"
@@ -28,13 +30,15 @@ def format_prompt(message, history):
     prompt += f"[INST] {message} [/INST]"
     return prompt
 def text_to_speech(text, speed=2.0):
     tts = gTTS(text=text, lang='es')
-    audio_file_path = BytesIO()
-    tts.write_to_fp(audio_file_path)
     return audio_file_path
-def generate_with_progress(
     user_input, history, temperature=None, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
 ):
     global system_prompt_sent
@@ -56,49 +60,26 @@ def generate_with_progress(
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
     response = ""
-    total_tokens = 0
     for response_token in stream:
         response += response_token.token.text
-        total_tokens += 1
-        st.subheader("Generando respuesta...")
-        st.progress(total_tokens / max_new_tokens)
     response = ' '.join(response.split()).replace('</s>', '')
     return response
 if "history" not in st.session_state:
     st.session_state.history = []
-recognizer = sr.Recognizer()
-while True:
-    with st.spinner("Escuchando..."):
-        try:
-            with sr.Microphone() as source:
-                audio_data = recognizer.listen(source, timeout=5)
-            st.success("Audio capturado con éxito.")
-            text = recognizer.recognize_google(audio_data, language="es-ES")
-            st.success(f"Texto reconocido: {text}")
-            st.subheader("Generando respuesta...")
-            st.progress(0.0)
-            output = generate_with_progress(text, history=st.session_state.history)
-            st.session_state.history.append((text, output))
-            st.success("Respuesta generada con éxito.")
-            st.subheader("Reproduciendo respuesta...")
-            audio_file_path = text_to_speech(output)
-            play(audio_file_path)
-            for progress_value in range(0, 101, 10):
-                st.progress(progress_value / 100)
-                sleep(0.5)
-        except sr.UnknownValueError:
-            st.warning("No se pudo reconocer el habla.")
-        except sr.RequestError as e:
-            st.error(f"Error en la solicitud al servicio de reconocimiento de voz: {e}")
-            break

 from huggingface_hub import InferenceClient
 from gtts import gTTS
 import base64
+# Interfaz de usuario con Streamlit
+user_input = st.text_area(label="Usuario", value="Escribe aquí tu mensaje", height=30)
+# Inicializar el cliente de inferencia
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
+# Definir el prompt del sistema
 system_prompt = "Tu nombre es Chaman 3.0 una IA conductual"
 system_prompt_sent = False
+# Función para formatear el prompt
 def format_prompt(message, history):
     global system_prompt_sent
     prompt = "<s>"
     prompt += f"[INST] {message} [/INST]"
     return prompt
+# Función para convertir texto a audio
 def text_to_speech(text, speed=2.0):
     tts = gTTS(text=text, lang='es')
+    audio_file_path = 'output.mp3'
+    tts.save(audio_file_path)
     return audio_file_path
+# Función para generar respuesta
+def generate(
     user_input, history, temperature=None, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
 ):
     global system_prompt_sent
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
     response = ""
     for response_token in stream:
         response += response_token.token.text
     response = ' '.join(response.split()).replace('</s>', '')
+    # Convertir respuesta a audio y reproducirlo en Streamlit con autoplay HTML
+    audio_file_path = text_to_speech(response)
+    audio_file = open(audio_file_path, 'rb')
+    audio_bytes = audio_file.read()
+    st.markdown(
+        f'<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_bytes).decode()}" type="audio/mp3"></audio>',
+        unsafe_allow_html=True
+    )
     return response
+# Inicializar historial si no existe
 if "history" not in st.session_state:
     st.session_state.history = []
+# Generar respuesta y actualizar historial
+output = generate(user_input, history=st.session_state.history)
+st.session_state.history.append((user_input, output))