freddyaboulton HF staff commited on
Commit
bc0c6f3
·
verified ·
1 Parent(s): a2adb11

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +8 -16
app.py CHANGED
@@ -3,6 +3,7 @@ import time
3
 
4
  import gradio as gr
5
  import numpy as np
 
6
  from dotenv import load_dotenv
7
  from elevenlabs import ElevenLabs
8
  from fastapi import FastAPI
@@ -11,9 +12,8 @@ from fastrtc import (
11
  ReplyOnPause,
12
  Stream,
13
  WebRTCError,
14
- aggregate_bytes_to_16bit,
15
  get_twilio_turn_credentials,
16
- stt,
17
  )
18
  from gradio.utils import get_space
19
  from groq import Groq
@@ -21,19 +21,20 @@ from groq import Groq
21
  load_dotenv()
22
  groq_client = Groq()
23
  tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
 
24
 
25
 
26
  # See "Talk to Claude" in Cookbook for an example of how to keep
27
  # track of the chat history.
28
  def response(
29
- audio: tuple[int, np.ndarray],
30
  chatbot: list[dict] | None = None,
31
  ):
32
  try:
33
  chatbot = chatbot or []
34
  messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
35
  start = time.time()
36
- text = stt(audio)
37
  print("transcription", time.time() - start)
38
  print("prompt", text)
39
  chatbot.append({"role": "user", "content": text})
@@ -51,13 +52,12 @@ def response(
51
 
52
  chatbot.append({"role": "assistant", "content": response_text})
53
 
54
- iterator = tts_client.text_to_speech.convert_as_stream(
55
  text=response_text, # type: ignore
56
  voice_id="JBFqnCBsd6RMkjVDRZzb",
57
  model_id="eleven_multilingual_v2",
58
  output_format="pcm_24000",
59
- )
60
- for chunk in aggregate_bytes_to_16bit(iterator):
61
  audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
62
  yield (24000, audio_array)
63
  yield AdditionalOutputs(chatbot)
@@ -78,16 +78,8 @@ stream = Stream(
78
  additional_outputs=[chatbot],
79
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
80
  concurrency_limit=20 if get_space() else None,
 
81
  )
82
- for id, block in stream.ui.blocks.items():
83
- if isinstance(block, gr.HTML):
84
- stream.ui.blocks[id] = gr.HTML(
85
- """
86
- <h1 style='text-align: center'>
87
- LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)
88
- </h1>
89
- """
90
- )
91
 
92
  # Mount the STREAM UI to the FastAPI app
93
  # Because I don't want to build the UI manually
 
3
 
4
  import gradio as gr
5
  import numpy as np
6
+ from numpy.typing import NDArray
7
  from dotenv import load_dotenv
8
  from elevenlabs import ElevenLabs
9
  from fastapi import FastAPI
 
12
  ReplyOnPause,
13
  Stream,
14
  WebRTCError,
15
+ get_stt_model,
16
  get_twilio_turn_credentials,
 
17
  )
18
  from gradio.utils import get_space
19
  from groq import Groq
 
21
  load_dotenv()
22
  groq_client = Groq()
23
  tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
24
+ stt_model = get_stt_model()
25
 
26
 
27
  # See "Talk to Claude" in Cookbook for an example of how to keep
28
  # track of the chat history.
29
  def response(
30
+ audio: tuple[int, NDArray[np.int16 | np.float32]],
31
  chatbot: list[dict] | None = None,
32
  ):
33
  try:
34
  chatbot = chatbot or []
35
  messages = [{"role": d["role"], "content": d["content"]} for d in chatbot]
36
  start = time.time()
37
+ text = stt_model.stt(audio)
38
  print("transcription", time.time() - start)
39
  print("prompt", text)
40
  chatbot.append({"role": "user", "content": text})
 
52
 
53
  chatbot.append({"role": "assistant", "content": response_text})
54
 
55
+ for chunk in tts_client.text_to_speech.convert_as_stream(
56
  text=response_text, # type: ignore
57
  voice_id="JBFqnCBsd6RMkjVDRZzb",
58
  model_id="eleven_multilingual_v2",
59
  output_format="pcm_24000",
60
+ ):
 
61
  audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
62
  yield (24000, audio_array)
63
  yield AdditionalOutputs(chatbot)
 
78
  additional_outputs=[chatbot],
79
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
80
  concurrency_limit=20 if get_space() else None,
81
+ ui_args={"title": "LLM Voice Chat (Powered by Groq, ElevenLabs, and WebRTC ⚡️)"},
82
  )
 
 
 
 
 
 
 
 
 
83
 
84
  # Mount the STREAM UI to the FastAPI app
85
  # Because I don't want to build the UI manually