salomonsky commited on
Commit
56f4168
verified
1 Parent(s): 5a44809

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -40
app.py CHANGED
@@ -1,25 +1,41 @@
1
  import streamlit as st
2
  import base64
3
  import io
4
- from huggingface_hub import InferenceClient
 
 
5
  from gtts import gTTS
6
- from audiorecorder import audiorecorder
7
- import speechrecognition as sr
8
- import librosa
9
 
10
  def record_audio(filename="audio.wav", duration=5):
11
- recognizer = sr.Recognizer()
 
 
 
 
 
 
 
 
 
 
12
 
13
- with sr.Microphone() as source:
14
- st.subheader("Habla para grabar...")
15
- audio_data = recognizer.listen(source, timeout=duration)
16
 
17
- st.subheader("Deteniendo la grabaci贸n...")
 
 
18
 
19
- with open(filename, "wb") as f:
20
- f.write(audio_data.get_wav_data())
 
 
 
 
21
 
22
- def recognize_speech(audio_data, show_messages=True):
23
  recognizer = sr.Recognizer()
24
  audio_recording = sr.AudioFile(audio_data)
25
 
@@ -28,28 +44,16 @@ def recognize_speech(audio_data, show_messages=True):
28
 
29
  try:
30
  audio_text = recognizer.recognize_google(audio, language="es-ES")
31
- if show_messages:
32
- st.subheader("Texto Reconocido:")
33
- st.write(audio_text)
34
- st.success("Reconocimiento de voz completado.")
35
  except sr.UnknownValueError:
36
  st.warning("No se pudo reconocer el audio. 驴Intentaste grabar algo?")
37
- audio_text = ""
38
  except sr.RequestError:
39
  st.error("No he recibido ningun audio. Por favor, int茅ntalo de nuevo.")
40
- audio_text = ""
41
-
42
- return audio_text
43
-
44
- def format_prompt(message, history):
45
- prompt = "<s>"
46
-
47
- for user_prompt, bot_response in history:
48
- prompt += f"[INST] {user_prompt} [/INST]"
49
- prompt += f" {bot_response}</s> "
50
-
51
- prompt += f"[INST] {message} [/INST]"
52
- return prompt
53
 
54
  def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
55
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
@@ -68,7 +72,7 @@ def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.
68
  seed=42,
69
  )
70
 
71
- formatted_prompt = format_prompt(audio_text, history)
72
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
73
  response = ""
74
 
@@ -86,14 +90,6 @@ def text_to_speech(text, speed=1.3):
86
  audio_fp.seek(0)
87
  return audio_fp
88
 
89
- def detect_vocal_activity(audio_data):
90
- y, sr = librosa.load(audio_data, sr=None)
91
- umbral_actividad_vocal = 0.01
92
- amplitud_media = librosa.feature.rms(y=y)
93
- actividad_vocal = amplitud_media > umbral_actividad_vocal
94
-
95
- return actividad_vocal
96
-
97
  def main():
98
  if "history" not in st.session_state:
99
  st.session_state.history = []
@@ -112,7 +108,6 @@ def main():
112
  st.session_state.history.append((pre_prompt, output))
113
 
114
  if audio_text:
115
- detect_vocal_activity("audio.wav")
116
  output, audio_file = generate(audio_text, history=st.session_state.history)
117
 
118
  if audio_text:
 
1
  import streamlit as st
2
  import base64
3
  import io
4
+ import pyaudio
5
+ import wave
6
+ import numpy as np
7
  from gtts import gTTS
8
+ from huggingface_hub import InferenceClient
 
 
9
 
10
  def record_audio(filename="audio.wav", duration=5):
11
+ st.subheader("Habla para grabar...")
12
+
13
+ p = pyaudio.PyAudio()
14
+
15
+ stream = p.open(format=pyaudio.paInt16,
16
+ channels=1,
17
+ rate=44100,
18
+ input=True,
19
+ frames_per_buffer=1024)
20
+
21
+ frames = []
22
 
23
+ for i in range(0, int(44100 / 1024 * duration)):
24
+ data = stream.read(1024)
25
+ frames.append(data)
26
 
27
+ stream.stop_stream()
28
+ stream.close()
29
+ p.terminate()
30
 
31
+ wf = wave.open(filename, 'wb')
32
+ wf.setnchannels(1)
33
+ wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
34
+ wf.setframerate(44100)
35
+ wf.writeframes(b''.join(frames))
36
+ wf.close()
37
 
38
+ def recognize_speech(audio_data):
39
  recognizer = sr.Recognizer()
40
  audio_recording = sr.AudioFile(audio_data)
41
 
 
44
 
45
  try:
46
  audio_text = recognizer.recognize_google(audio, language="es-ES")
47
+ st.subheader("Texto Reconocido:")
48
+ st.write(audio_text)
49
+ st.success("Reconocimiento de voz completado.")
50
+ return audio_text
51
  except sr.UnknownValueError:
52
  st.warning("No se pudo reconocer el audio. 驴Intentaste grabar algo?")
53
+ return ""
54
  except sr.RequestError:
55
  st.error("No he recibido ningun audio. Por favor, int茅ntalo de nuevo.")
56
+ return ""
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
59
  client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 
72
  seed=42,
73
  )
74
 
75
+ formatted_prompt = f"<s>[INST] {audio_text} [/INST]</s>"
76
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
77
  response = ""
78
 
 
90
  audio_fp.seek(0)
91
  return audio_fp
92
 
 
 
 
 
 
 
 
 
93
  def main():
94
  if "history" not in st.session_state:
95
  st.session_state.history = []
 
108
  st.session_state.history.append((pre_prompt, output))
109
 
110
  if audio_text:
 
111
  output, audio_file = generate(audio_text, history=st.session_state.history)
112
 
113
  if audio_text: