Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ import time
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
|
|
6 |
from dotenv import load_dotenv
|
7 |
from elevenlabs import ElevenLabs
|
8 |
from fastapi import FastAPI
|
@@ -11,9 +12,8 @@ from fastrtc import (
|
|
11 |
ReplyOnPause,
|
12 |
Stream,
|
13 |
WebRTCError,
|
14 |
-
|
15 |
get_twilio_turn_credentials,
|
16 |
-
stt,
|
17 |
)
|
18 |
from gradio.utils import get_space
|
19 |
from groq import Groq
|
@@ -21,19 +21,20 @@ from groq import Groq
|
|
21 |
load_dotenv()
|
22 |
groq_client = Groq()
|
23 |
tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
|
|
|
24 |
|
25 |
|
26 |
# See "Talk to Claude" in Cookbook for an example of how to keep
|
27 |
# track of the chat history.
|
28 |
def response(
|
29 |
-
audio: tuple[int, np.
|
30 |
chatbot: list[dict] | None = None,
|
31 |
):
|
32 |
try:
|
33 |
chatbot = chatbot or []
|
34 |
messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
|
35 |
start = time.time()
|
36 |
-
text = stt(audio)
|
37 |
print("transcription", time.time() - start)
|
38 |
print("prompt", text)
|
39 |
chatbot.append({"role": "user", "content": text})
|
@@ -51,13 +52,12 @@ def response(
|
|
51 |
|
52 |
chatbot.append({"role": "assistant", "content": response_text})
|
53 |
|
54 |
-
|
55 |
text=response_text, # type: ignore
|
56 |
voice_id="JBFqnCBsd6RMkjVDRZzb",
|
57 |
model_id="eleven_multilingual_v2",
|
58 |
output_format="pcm_24000",
|
59 |
-
)
|
60 |
-
for chunk in aggregate_bytes_to_16bit(iterator):
|
61 |
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
|
62 |
yield (24000, audio_array)
|
63 |
yield AdditionalOutputs(chatbot)
|
@@ -78,16 +78,8 @@ stream = Stream(
|
|
78 |
additional_outputs=[chatbot],
|
79 |
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
|
80 |
concurrency_limit=20 if get_space() else None,
|
|
|
81 |
)
|
82 |
-
for id, block in stream.ui.blocks.items():
|
83 |
-
if isinstance(block, gr.HTML):
|
84 |
-
stream.ui.blocks[id] = gr.HTML(
|
85 |
-
"""
|
86 |
-
<h1 style='text-align: center'>
|
87 |
-
LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)
|
88 |
-
</h1>
|
89 |
-
"""
|
90 |
-
)
|
91 |
|
92 |
# Mount the STREAM UI to the FastAPI app
|
93 |
# Because I don't want to build the UI manually
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import numpy as np
|
6 |
+
from numpy.typing import NDArray
|
7 |
from dotenv import load_dotenv
|
8 |
from elevenlabs import ElevenLabs
|
9 |
from fastapi import FastAPI
|
|
|
12 |
ReplyOnPause,
|
13 |
Stream,
|
14 |
WebRTCError,
|
15 |
+
get_stt_model,
|
16 |
get_twilio_turn_credentials,
|
|
|
17 |
)
|
18 |
from gradio.utils import get_space
|
19 |
from groq import Groq
|
|
|
21 |
load_dotenv()
|
22 |
groq_client = Groq()
|
23 |
tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
|
24 |
+
stt_model = get_stt_model()
|
25 |
|
26 |
|
27 |
# See "Talk to Claude" in Cookbook for an example of how to keep
|
28 |
# track of the chat history.
|
29 |
def response(
|
30 |
+
audio: tuple[int, NDArray[np.int16 | np.float32]],
|
31 |
chatbot: list[dict] | None = None,
|
32 |
):
|
33 |
try:
|
34 |
chatbot = chatbot or []
|
35 |
messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
|
36 |
start = time.time()
|
37 |
+
text = stt_model.stt(audio)
|
38 |
print("transcription", time.time() - start)
|
39 |
print("prompt", text)
|
40 |
chatbot.append({"role": "user", "content": text})
|
|
|
52 |
|
53 |
chatbot.append({"role": "assistant", "content": response_text})
|
54 |
|
55 |
+
for chunk in tts_client.text_to_speech.convert_as_stream(
|
56 |
text=response_text, # type: ignore
|
57 |
voice_id="JBFqnCBsd6RMkjVDRZzb",
|
58 |
model_id="eleven_multilingual_v2",
|
59 |
output_format="pcm_24000",
|
60 |
+
):
|
|
|
61 |
audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
|
62 |
yield (24000, audio_array)
|
63 |
yield AdditionalOutputs(chatbot)
|
|
|
78 |
additional_outputs=[chatbot],
|
79 |
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
|
80 |
concurrency_limit=20 if get_space() else None,
|
81 |
+
ui_args={"title": "LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)"},
|
82 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
# Mount the STREAM UI to the FastAPI app
|
85 |
# Because I don't want to build the UI manually
|