salomonsky commited on
Commit
882add6
verified
1 Parent(s): 94a3b7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -27
app.py CHANGED
@@ -3,19 +3,37 @@ import base64
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
 
 
6
 
7
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
8
- pre_prompt = "Hola, te llamar谩s Chaman 3.0, una IA conductual. Tus principios son el transhumanismo ecol贸gico."
9
- pre_prompt_sent = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def format_prompt(message, history):
12
- global pre_prompt_sent
13
  prompt = "<s>"
14
 
15
- if not pre_prompt_sent and all(f"[INST] {pre_prompt} [/INST]" not in user_prompt for user_prompt, _ in history):
16
- prompt += f"[INST] {pre_prompt} [/INST]"
17
- pre_prompt_sent = True
18
-
19
  for user_prompt, bot_response in history:
20
  prompt += f"[INST] {user_prompt} [/INST]"
21
  prompt += f" {bot_response}</s> "
@@ -30,8 +48,9 @@ def text_to_speech(text, speed=1.3):
30
  audio_fp.seek(0)
31
  return audio_fp
32
 
33
- def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
34
- global pre_prompt_sent
 
35
  temperature = float(temperature) if temperature is not None else 0.9
36
  if temperature < 1e-2:
37
  temperature = 1e-2
@@ -46,7 +65,7 @@ def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.
46
  seed=42,
47
  )
48
 
49
- formatted_prompt = format_prompt(user_input, history)
50
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
51
  response = ""
52
 
@@ -57,23 +76,34 @@ def generate(user_input, history, temperature=None, max_new_tokens=512, top_p=0.
57
  audio_file = text_to_speech(response, speed=1.3)
58
  return response, audio_file
59
 
60
- if "history" not in st.session_state:
61
- st.session_state.history = []
62
 
63
- with st.container():
64
- user_input = st.text_input(label="Usuario", value="Saludos")
65
- output, audio_file = generate(user_input, history=st.session_state.history)
66
- st.text_area("Respuesta", height=400, value=output, key="output_text", disabled=True)
67
 
68
- if user_input:
69
- st.session_state.history.append((user_input, output))
70
 
71
- st.write("Presiona el bot贸n y comienza a hablar...")
 
 
 
72
 
73
- if audio_file is not None:
74
- st.markdown(
75
- f"""
76
- <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
77
- """,
78
- unsafe_allow_html=True
79
- )
 
 
 
 
 
 
 
 
 
 
 
 
3
  import io
4
  from huggingface_hub import InferenceClient
5
  from gtts import gTTS
6
+ from audiorecorder import audiorecorder
7
+ import speech_recognition as sr
8
 
9
+ def recognize_speech(audio_data):
10
+ st.info("Reconociendo audio...")
11
+
12
+ recognizer = sr.Recognizer()
13
+ audio_recording = sr.AudioFile(audio_data)
14
+
15
+ try:
16
+ with audio_recording as source:
17
+ audio = recognizer.record(source)
18
+
19
+ audio_text = recognizer.recognize_google(audio, language="es-ES")
20
+ st.subheader("Texto Reconocido:")
21
+ st.write(audio_text)
22
+
23
+ except sr.UnknownValueError:
24
+ st.warning("No se pudo reconocer el audio. 驴Intentaste grabar algo?")
25
+ audio_text = ""
26
+
27
+ except sr.RequestError as e:
28
+ st.error(f"Error en el reconocimiento de voz: {e}")
29
+ audio_text = ""
30
+
31
+ st.success("Reconocimiento de voz completado.")
32
+ return audio_text
33
 
34
  def format_prompt(message, history):
 
35
  prompt = "<s>"
36
 
 
 
 
 
37
  for user_prompt, bot_response in history:
38
  prompt += f"[INST] {user_prompt} [/INST]"
39
  prompt += f" {bot_response}</s> "
 
48
  audio_fp.seek(0)
49
  return audio_fp
50
 
51
+ def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
52
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
53
+
54
  temperature = float(temperature) if temperature is not None else 0.9
55
  if temperature < 1e-2:
56
  temperature = 1e-2
 
65
  seed=42,
66
  )
67
 
68
+ formatted_prompt = format_prompt(audio_text, history)
69
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
70
  response = ""
71
 
 
76
  audio_file = text_to_speech(response, speed=1.3)
77
  return response, audio_file
78
 
79
+ def main():
80
+ st.title("Grabaci贸n de Audio y Reconocimiento de Voz")
81
 
82
+ if "history" not in st.session_state:
83
+ st.session_state.history = []
 
 
84
 
85
+ audio_data = audiorecorder("Habla para grabar", "Deteniendo la grabaci贸n...")
 
86
 
87
+ if not audio_data.empty():
88
+ st.audio(audio_data.export().read(), format="audio/wav")
89
+ audio_data.export("audio.wav", format="wav")
90
+ st.write(f"Frame rate: {audio_data.frame_rate}, Frame width: {audio_data.frame_width}, Duration: {audio_data.duration_seconds} seconds")
91
 
92
+ audio_text = recognize_speech("audio.wav")
93
+
94
+ output, audio_file = generate(audio_text, history=st.session_state.history)
95
+ st.text_area("Respuesta", height=100, value=output, key="output_text", disabled=True)
96
+
97
+ if audio_text:
98
+ st.session_state.history.append((audio_text, output))
99
+
100
+ if audio_file is not None:
101
+ st.markdown(
102
+ f"""
103
+ <audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
104
+ """,
105
+ unsafe_allow_html=True
106
+ )
107
+
108
+ if __name__ == "__main__":
109
+ main()