salomonsky commited on
Commit
4e91bb7
·
verified ·
1 Parent(s): 1a51ae7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -23
app.py CHANGED
@@ -2,8 +2,10 @@ import streamlit as st
2
  from huggingface_hub import InferenceClient
3
  import base64
4
  from pydub import AudioSegment
5
- from pydub.playback import play
6
- import pyttsx3
 
 
7
 
8
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
9
  pre_prompt = ""
@@ -25,11 +27,36 @@ def format_prompt(message, history):
25
  return prompt
26
 
27
  def text_to_speech(text):
28
- engine = pyttsx3.init()
29
- engine.save_to_file(text, "output_pyttsx3.mp3")
30
- engine.runAndWait()
31
- return "output_pyttsx3.mp3"
 
 
 
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
34
  global pre_prompt_sent
35
  temperature = float(temperature) if temperature is not None else 0.9
@@ -47,30 +74,38 @@ def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.
47
  )
48
 
49
  formatted_prompt = format_prompt(user_input, history)
50
- stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
51
- response = ""
52
-
53
- for response_token in stream:
54
- response += response_token.token.text
55
-
56
- response = ' '.join(response.split()).replace('</s>', '')
 
 
 
57
 
58
- audio_file_path = text_to_speech(response)
 
59
  audio_file = open(audio_file_path, 'rb')
60
  audio_bytes = audio_file.read()
61
-
62
- return response, audio_bytes
63
 
64
  if "history" not in st.session_state:
65
  st.session_state.history = []
66
 
67
- user_input = st.text_input(label="", value="")
68
  output, audio_bytes = generate(user_input, history=st.session_state.history)
 
 
 
 
69
  st.text_area("Respuesta", value=output, height=400, key="output_text", disabled=True)
70
 
71
- st.markdown(
72
- f"""
73
- <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_bytes).decode()}" type="audio/mp3" id="audio_player"></audio>
74
- """,
75
- unsafe_allow_html=True
76
- )
 
 
2
  from huggingface_hub import InferenceClient
3
  import base64
4
  from pydub import AudioSegment
5
+ from io import BytesIO
6
+ from gtts import gTTS
7
+ import speech_recognition as sr
8
+ from datetime import datetime
9
 
10
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
11
  pre_prompt = ""
 
27
  return prompt
28
 
29
  def text_to_speech(text):
30
+ tts = gTTS(text=text, lang='es')
31
+ audio_stream = BytesIO()
32
+ tts.save(audio_stream)
33
+ audio_stream.seek(0)
34
+ return audio_stream.read()
35
+
36
+ def take_user_input():
37
 
38
+ r = sr.Recognizer()
39
+ with sr.Microphone() as source:
40
+ print('Escuchando....')
41
+ r.pause_threshold = 1
42
+ audio = r.listen(source)
43
+
44
+ try:
45
+ print('Reconociendo...')
46
+ query = r.recognize_google(audio, language='es-ES')
47
+ if 'salir' in query or 'detener' in query:
48
+ speak("Hasta luego.")
49
+ exit()
50
+ except Exception:
51
+ query = 'None'
52
+ else:
53
+ speak('de nuevo...')
54
+ query = 'None'
55
+ except Exception:
56
+ speak('de nuevo...')
57
+ query = 'None'
58
+ return query
59
+
60
  def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
61
  global pre_prompt_sent
62
  temperature = float(temperature) if temperature is not None else 0.9
 
74
  )
75
 
76
  formatted_prompt = format_prompt(user_input, history)
77
+ try:
78
+ stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
79
+ response = ""
80
+ for response_token in stream:
81
+ response += response_token.token.text
82
+ response = ' '.join(response.split()).replace('</s>', '')
83
+ audio_bytes = text_to_speech(response)
84
+ return response, audio_bytes
85
+ except Exception as e:
86
+ return str(e), None
87
 
88
+ def speak(text):
89
+ audio_file_path = text_to_speech(text)
90
  audio_file = open(audio_file_path, 'rb')
91
  audio_bytes = audio_file.read()
92
+ st.audio(audio_bytes, format="audio/mp3", start_time=0, key="audio_player")
 
93
 
94
  if "history" not in st.session_state:
95
  st.session_state.history = []
96
 
97
+ user_input = take_user_input()
98
  output, audio_bytes = generate(user_input, history=st.session_state.history)
99
+
100
+ if user_input != 'None':
101
+ st.session_state.history.append((user_input, output))
102
+
103
  st.text_area("Respuesta", value=output, height=400, key="output_text", disabled=True)
104
 
105
+ if audio_bytes is not None:
106
+ st.markdown(
107
+ f"""
108
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_bytes).decode()}" type="audio/mp3" id="audio_player"></audio>
109
+ """,
110
+ unsafe_allow_html=True
111
+ )