Spaces:
Running
Running
File size: 3,927 Bytes
7d2c473 23076a4 3784695 56f4168 719c3e8 640fac3 3784695 8f22654 abc037b 23076a4 e9ba3a2 093b41a 5a44809 7c1fe58 5a44809 8f22654 093b41a 5a44809 093b41a 5a44809 7118b55 093b41a 24dca16 093b41a 24dca16 093b41a 24dca16 8307fd0 23076a4 3625f99 23076a4 093b41a 23076a4 da45dce 8307fd0 da45dce 8f22654 da45dce 89ff019 8f22654 23076a4 3784695 e9ba3a2 7256c21 a64dfc8 7256c21 640fac3 c8a52a4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import streamlit as st
import base64
import io
from huggingface_hub import InferenceClient
from gtts import gTTS
from audiorecorder import audiorecorder
import speech_recognition as sr
from pydub import AudioSegment
from speech_recognition import Microphone, Recognizer
import pyttsx3
if "history" not in st.session_state:
st.session_state.history = []
def recognize_speech(audio_data, show_messages=True):
recognizer = sr.Recognizer()
mic_list = sr.Microphone.list_microphone_names()
if not mic_list:
engine = pyttsx3.init()
engine.say("No se encontraron micrófonos. Por favor, asegúrate de que estén conectados.")
engine.runAndWait()
else:
print("Micrófonos encontrados:")
for i, mic in enumerate(mic_list):
print(f"{i+1}. {mic}")
try:
audio_text = recognizer.recognize_google(audio, language="es-ES")
if show_messages:
st.subheader("Texto Reconocido:")
st.write(audio_text)
st.success("Reconocimiento de voz completado.")
except sr.UnknownValueError:
st.warning("No se pudo reconocer el audio. ¿Intentaste grabar algo?")
audio_text = ""
except sr.RequestError:
st.error("Hablame para comenzar!")
audio_text = ""
return audio_text
def format_prompt(message, history):
prompt_list = ["<s>"]
for user_prompt, bot_response in history:
prompt_list.extend([f"[INST] {user_prompt} [/INST]", f" {bot_response}</s> "])
prompt_list.append(f"[INST] {message} [/INST]")
return ''.join(prompt_list)
def generate(audio_text, history, temperature=None, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
temperature = float(temperature) if temperature is not None else 0.9
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(audio_text, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
response = ""
for response_token in stream:
response += response_token.token.text
response = ' '.join(response.split()).replace('</s>', '')
audio_file = text_to_speech(response, speed=1.3)
return response, audio_file
def text_to_speech(text, speed=1.3):
tts = gTTS(text=text, lang='es')
audio_fp = io.BytesIO()
tts.write_to_fp(audio_fp)
audio_fp.seek(0)
audio = AudioSegment.from_file(audio_fp, format="mp3")
modified_speed_audio = audio.speedup(playback_speed=speed)
modified_audio_fp = io.BytesIO()
modified_speed_audio.export(modified_audio_fp, format="mp3")
modified_audio_fp.seek(0)
return modified_audio_fp
def main():
st.title("Chatbot de Voz a Voz")
mic = sr.Microphone(device_index=-1)
if not audio_data.empty():
st.audio(audio_data.export().read(), format="audio/wav")
audio_data.export("audio.wav", format="wav")
audio_text = recognize_speech("audio.wav")
if audio_text:
output, audio_file = generate(audio_text, history=st.session_state.history)
if audio_text:
st.session_state.history.append((audio_text, output))
if audio_file is not None:
st.markdown(
f"""
<audio autoplay="autoplay" controls="controls" src="data:audio/mp3;base64,{base64.b64encode(audio_file.read()).decode()}" type="audio/mp3" id="audio_player"></audio>
""",
unsafe_allow_html=True
)
if __name__ == "__main__":
main() |