xaman4

Running

App Files Files Community

salomonsky commited on Jan 16, 2024

Commit

56f4168

verified ·

1 Parent(s): 5a44809

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -40

app.py CHANGED Viewed

@@ -1,25 +1,41 @@
 import streamlit as st
 import base64
 import io
-from huggingface_hub import InferenceClient
 from gtts import gTTS
-from audiorecorder import audiorecorder
-import speechrecognition as sr
-import librosa
 def record_audio(filename="audio.wav", duration=5):
-    recognizer = sr.Recognizer()
-    with sr.Microphone() as source:
-        st.subheader("Habla para grabar...")
-        audio_data = recognizer.listen(source, timeout=duration)
-    st.subheader("Deteniendo la grabación...")
-    with open(filename, "wb") as f:
-        f.write(audio_data.get_wav_data())
-def recognize_speech(audio_data, show_messages=True):
     recognizer = sr.Recognizer()
     audio_recording = sr.AudioFile(audio_data)
@@ -28,28 +44,16 @@ def recognize_speech(audio_data, show_messages=True):
     try:
         audio_text = recognizer.recognize_google(audio, language="es-ES")
-        if show_messages:
-            st.subheader("Texto Reconocido:")
-            st.write(audio_text)
-            st.success("Reconocimiento de voz completado.")
     except sr.UnknownValueError:
         st.warning("No se pudo reconocer el audio. ¿Intentaste grabar algo?")
-        audio_text = ""
     except sr.RequestError:
         st.error("No he recibido ningun audio. Por favor, inténtalo de nuevo.")
-        audio_text = ""
-    return audio_text
-def format_prompt(message, history):
-    prompt = "<s>"
-    for user_prompt, bot_response in history:
-        prompt += f"[INST] {user_prompt} [/INST]"
-        prompt += f" {bot_response}</s> "
-    prompt += f"[INST] {message} [/INST]"
-    return prompt
 def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
     client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
@@ -68,7 +72,7 @@ def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.
         seed=42,
     )
-    formatted_prompt = format_prompt(audio_text, history)
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
     response = ""
@@ -86,14 +90,6 @@ def text_to_speech(text, speed=1.3):
     audio_fp.seek(0)
     return audio_fp
-def detect_vocal_activity(audio_data):
-    y, sr = librosa.load(audio_data, sr=None)
-    umbral_actividad_vocal = 0.01
-    amplitud_media = librosa.feature.rms(y=y)
-    actividad_vocal = amplitud_media > umbral_actividad_vocal
-    return actividad_vocal
 def main():
     if "history" not in st.session_state:
         st.session_state.history = []
@@ -112,7 +108,6 @@ def main():
             st.session_state.history.append((pre_prompt, output))
         if audio_text:
-            detect_vocal_activity("audio.wav")
             output, audio_file = generate(audio_text, history=st.session_state.history)
             if audio_text:

 import streamlit as st
 import base64
 import io
+import pyaudio
+import wave
+import numpy as np
 from gtts import gTTS
+from huggingface_hub import InferenceClient
 def record_audio(filename="audio.wav", duration=5):
+    st.subheader("Habla para grabar...")
+    p = pyaudio.PyAudio()
+    stream = p.open(format=pyaudio.paInt16,
+                    channels=1,
+                    rate=44100,
+                    input=True,
+                    frames_per_buffer=1024)
+    frames = []
+    for i in range(0, int(44100 / 1024 * duration)):
+        data = stream.read(1024)
+        frames.append(data)
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+    wf = wave.open(filename, 'wb')
+    wf.setnchannels(1)
+    wf.setsampwidth(pyaudio.PyAudio().get_sample_size(pyaudio.paInt16))
+    wf.setframerate(44100)
+    wf.writeframes(b''.join(frames))
+    wf.close()
+def recognize_speech(audio_data):
     recognizer = sr.Recognizer()
     audio_recording = sr.AudioFile(audio_data)
     try:
         audio_text = recognizer.recognize_google(audio, language="es-ES")
+        st.subheader("Texto Reconocido:")
+        st.write(audio_text)
+        st.success("Reconocimiento de voz completado.")
+        return audio_text
     except sr.UnknownValueError:
         st.warning("No se pudo reconocer el audio. ¿Intentaste grabar algo?")
+        return ""
     except sr.RequestError:
         st.error("No he recibido ningun audio. Por favor, inténtalo de nuevo.")
+        return ""
 def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
     client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
         seed=42,
     )
+    formatted_prompt = f"<s>[INST] {audio_text} [/INST]</s>"
     stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
     response = ""
     audio_fp.seek(0)
     return audio_fp
 def main():
     if "history" not in st.session_state:
         st.session_state.history = []
             st.session_state.history.append((pre_prompt, output))
         if audio_text:
             output, audio_file = generate(audio_text, history=st.session_state.history)
             if audio_text: