llm-voice-chat

Running

Codeblockz commited on Mar 10

Commit

3222963

verified ·

1 Parent(s): 0eb6af0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,12 +5,13 @@ import gradio as gr
 import numpy as np
 from dotenv import load_dotenv
 from elevenlabs import ElevenLabs
 from fastapi import FastAPI
 from fastrtc import (
     AdditionalOutputs,
     ReplyOnPause,
     Stream,
-    get_stt_model,
     get_twilio_turn_credentials,
 )
 from gradio.utils import get_space
@@ -19,7 +20,7 @@ from numpy.typing import NDArray
 load_dotenv()
 groq_client = Groq()
-tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
 stt_model = get_stt_model()
@@ -50,14 +51,10 @@ def response(
     chatbot.append({"role": "assistant", "content": response_text})
-    for chunk in tts_client.text_to_speech.convert_as_stream(
-        text=response_text,  # type: ignore
-        voice_id="JBFqnCBsd6RMkjVDRZzb",
-        model_id="eleven_multilingual_v2",
-        output_format="pcm_24000",
-    ):
-        audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
-        yield (24000, audio_array)
     yield AdditionalOutputs(chatbot)

 import numpy as np
 from dotenv import load_dotenv
 from elevenlabs import ElevenLabs
+from distil_whisper_fastrtc import get_stt_model
 from fastapi import FastAPI
 from fastrtc import (
     AdditionalOutputs,
     ReplyOnPause,
     Stream,
+    get_tts_model,
     get_twilio_turn_credentials,
 )
 from gradio.utils import get_space
 load_dotenv()
 groq_client = Groq()
+tts_client = get_tts_model()
 stt_model = get_stt_model()
     chatbot.append({"role": "assistant", "content": response_text})
+    # Convert response to audio using TTS model
+    for audio_chunk in tts_model.stream_tts_sync(response_text or ""):
+        # Yield the audio chunk
+        yield audio_chunk
     yield AdditionalOutputs(chatbot)