xaman4

Sleeping

App Files Files Community

salomonsky commited on Jan 13, 2024

Commit

b0b5cd6

verified ·

1 Parent(s): 5abd901

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -28

app.py CHANGED Viewed

@@ -2,18 +2,17 @@ import streamlit as st
 from huggingface_hub import InferenceClient
 from gtts import gTTS
 import base64
-import sounddevice as sd
 import speech_recognition as sr
-# Inicializar el cliente de inferencia
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
-# Definir el prompt del sistema
 system_prompt = "Tu nombre es Chaman 3.0 una IA conductual"
 system_prompt_sent = False
-# Función para formatear el prompt
 def format_prompt(message, history):
     global system_prompt_sent
     prompt = "<s>"
@@ -30,15 +29,13 @@ def format_prompt(message, history):
     prompt += f"[INST] {message} [/INST]"
     return prompt
-# Función para convertir texto a audio
 def text_to_speech(text, speed=2.0):
     tts = gTTS(text=text, lang='es')
-    audio_file_path = 'output.mp3'
-    tts.save(audio_file_path)
     return audio_file_path
-# Función para generar respuesta
-def generate(
     user_input, history, temperature=None, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
 ):
     global system_prompt_sent
@@ -60,19 +57,16 @@ def generate(
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
     response = ""
     for response_token in stream:
         response += response_token.token.text
-    response = ' '.join(response.split()).replace('</s>', '')
-    # Convertir respuesta a audio y reproducirlo en Streamlit con autoplay HTML
-    audio_file_path = text_to_speech(response)
-    audio_file = open(audio_file_path, 'rb')
-    audio_bytes = audio_file.read()
-    st.markdown(
-        f'<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_bytes).decode()}" type="audio/mp3"></audio>',
-        unsafe_allow_html=True
-    )
     return response
@@ -89,18 +83,42 @@ if "history" not in st.session_state:
 if start_recording_button:
     st.info("Habla ahora...")
-    # Grabar audio
-    with sd.InputStream(callback=st.audio_recorder(callback=True), channels=1):
-        audio_data = st.audio_recorder()
-    # Convertir audio a texto
     recognizer = sr.Recognizer()
     try:
-        text = recognizer.recognize_google(audio_data, language="es-ES")
         st.success(f"Texto reconocido: {text}")
-        # Generar respuesta y actualizar historial
-        output = generate(text, history=st.session_state.history)
         st.session_state.history.append((text, output))
     except sr.UnknownValueError:
         st.warning("No se pudo reconocer el habla.")
     except sr.RequestError as e:

 from huggingface_hub import InferenceClient
 from gtts import gTTS
 import base64
 import speech_recognition as sr
+from pydub import AudioSegment
+from pydub.playback import play
+import pyaudio
+from io import BytesIO
+from time import sleep
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 system_prompt = "Tu nombre es Chaman 3.0 una IA conductual"
 system_prompt_sent = False
 def format_prompt(message, history):
     global system_prompt_sent
     prompt = "<s>"
     prompt += f"[INST] {message} [/INST]"
     return prompt
 def text_to_speech(text, speed=2.0):
     tts = gTTS(text=text, lang='es')
+    audio_file_path = BytesIO()
+    tts.write_to_fp(audio_file_path)
     return audio_file_path
+def generate_with_progress(
     user_input, history, temperature=None, max_new_tokens=2048, top_p=0.95, repetition_penalty=1.0,
 ):
     global system_prompt_sent
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
     response = ""
+    total_tokens = 0
     for response_token in stream:
         response += response_token.token.text
+        total_tokens += 1
+        # Actualizar la barra de progreso
+        st.subheader("Generando respuesta...")
+        st.progress(total_tokens / max_new_tokens)
+    response = ' '.join(response.split()).replace('</s>', '')
     return response
 if start_recording_button:
     st.info("Habla ahora...")
+    audio_data = BytesIO()
+    p = pyaudio.PyAudio()
+    stream = p.open(format=pyaudio.paInt16, channels=1, rate=44100, input=True, frames_per_buffer=1024)
+    with st.spinner("Grabando..."):
+        frames = []
+        for i in range(int(44100 / 1024 * 5)):  # grabar durante 5 segundos
+            data = stream.read(1024)
+            frames.append(data)
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
     recognizer = sr.Recognizer()
     try:
+        audio_data.write(b''.join(frames))
+        audio_data.seek(0)
+        audio = AudioSegment.from_file(audio_data, format="wav")
+        text = recognizer.recognize_google(audio, language="es-ES")
         st.success(f"Texto reconocido: {text}")
+        generate_progress = st.empty()
+        generate_progress.progress(0.0)
+        output = generate_with_progress(text, history=st.session_state.history)
         st.session_state.history.append((text, output))
+        st.success("Respuesta generada con éxito.")
+        st.subheader("Reproduciendo respuesta...")
+        audio_file_path = text_to_speech(output)
+        play(audio_file_path)
+        for progress_value in range(0, 101, 10):
+            st.progress(progress_value / 100)
+            sleep(0.5)
     except sr.UnknownValueError:
         st.warning("No se pudo reconocer el habla.")
     except sr.RequestError as e: