xaman4

Sleeping

App Files Files Community

salomonsky commited on Mar 25, 2024

Commit

0f213dd

verified ·

1 Parent(s): ad653b7

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -28

app.py CHANGED Viewed

@@ -1,13 +1,10 @@
-from huggingface_hub import InferenceClient
 from audiorecorder import audiorecorder
-import speech_recognition as sr
-from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
-from datasets import load_dataset
 import streamlit as st
 import base64
-import io
 import torch
-import os
 model = Wav2Vec2ForCTC.from_pretrained("facebook/mms-tts-spa")
 pre_prompt_text = "You are a behavioral AI, your answers should be brief, stoic and humanistic."
@@ -17,6 +14,7 @@ if "history" not in st.session_state:
 if "pre_prompt_sent" not in st.session_state:
     st.session_state.pre_prompt_sent = False
 def recognize_speech(audio_data, show_messages=True):
     recognizer = sr.Recognizer()
     audio_recording = sr.AudioFile(audio_data)
@@ -39,20 +37,6 @@ def recognize_speech(audio_data, show_messages=True):
     return audio_text
-def format_prompt(message, history):
-    prompt = "<s>"
-    if not st.session_state.pre_prompt_sent:
-        prompt += f"[INST] {pre_prompt_text} [/INST]"
-        st.session_state.pre_prompt_sent = True
-    for user_prompt, bot_response in history:
-        prompt += f"[INST] {user_prompt} [/INST]"
-        prompt += f" {bot_response}</s> "
-    prompt += f"[INST] {message} [/INST]"
-    return prompt
 def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
     client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
@@ -74,10 +58,22 @@ def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.
     for response_token in stream:
         response += response_token.token.text
-    response = ' '.join(response.split()).replace('</s>', '')
-    audio_file = text_to_speech(response)
-    return response, audio_file
 def text_to_speech(text):
     with torch.no_grad():
@@ -91,7 +87,7 @@ def text_to_speech(text):
     return encoded_audio
 def main():
-    audio_data = audiorecorder("Push to Play", "Stop Recording...")
     if not audio_data.empty():
         st.audio(audio_data.export().read(), format="audio/wav")
@@ -99,11 +95,13 @@ def main():
         audio_text = recognize_speech("audio.wav")
         if audio_text:
-            output, audio_file = generate(audio_text, history=st.session_state.history)
-            if audio_file is not None:
                 st.markdown(
-                    f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>""",
                     unsafe_allow_html=True)
 if __name__ == "__main__":

 from audiorecorder import audiorecorder
 import streamlit as st
 import base64
+import soundfile as sf
 import torch
+import speech_recognition as sr
+from transformers import Wav2Vec2ForCTC
 model = Wav2Vec2ForCTC.from_pretrained("facebook/mms-tts-spa")
 pre_prompt_text = "You are a behavioral AI, your answers should be brief, stoic and humanistic."
 if "pre_prompt_sent" not in st.session_state:
     st.session_state.pre_prompt_sent = False
 def recognize_speech(audio_data, show_messages=True):
     recognizer = sr.Recognizer()
     audio_recording = sr.AudioFile(audio_data)
     return audio_text
 def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
     client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
     for response_token in stream:
         response += response_token.token.text
+    return response
+def format_prompt(message, history):
+    prompt = "<s>"
+    if not st.session_state.pre_prompt_sent:
+        prompt += f"[INST] {pre_prompt_text} [/INST]"
+        st.session_state.pre_prompt_sent = True
+    for user_prompt, bot_response in history:
+        prompt += f"[INST] {user_prompt} [/INST]"
+        prompt += f" {bot_response}</s> "
+    prompt += f"[INST] {message} [/INST]"
+    return prompt
 def text_to_speech(text):
     with torch.no_grad():
     return encoded_audio
 def main():
+    audio_data = st.audio_recorder("Push to Play", "Stop Recording...")
     if not audio_data.empty():
         st.audio(audio_data.export().read(), format="audio/wav")
         audio_text = recognize_speech("audio.wav")
         if audio_text:
+            # Llama a la función generate para obtener la respuesta generada
+            generated_response = generate(audio_text, history=st.session_state.history)
+            output = text_to_speech(generated_response)
+            if output is not None:
                 st.markdown(
+                    f"""<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{output}" type="audio/mp3" id="audio_player"></audio>""",
                     unsafe_allow_html=True)
 if __name__ == "__main__":