Codeblockz commited on
Commit
3222963
·
verified ·
1 Parent(s): 0eb6af0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -5,12 +5,13 @@ import gradio as gr
5
  import numpy as np
6
  from dotenv import load_dotenv
7
  from elevenlabs import ElevenLabs
 
8
  from fastapi import FastAPI
9
  from fastrtc import (
10
  AdditionalOutputs,
11
  ReplyOnPause,
12
  Stream,
13
- get_stt_model,
14
  get_twilio_turn_credentials,
15
  )
16
  from gradio.utils import get_space
@@ -19,7 +20,7 @@ from numpy.typing import NDArray
19
 
20
  load_dotenv()
21
  groq_client = Groq()
22
- tts_client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
23
  stt_model = get_stt_model()
24
 
25
 
@@ -50,14 +51,10 @@ def response(
50
 
51
  chatbot.append({"role": "assistant", "content": response_text})
52
 
53
- for chunk in tts_client.text_to_speech.convert_as_stream(
54
- text=response_text, # type: ignore
55
- voice_id="JBFqnCBsd6RMkjVDRZzb",
56
- model_id="eleven_multilingual_v2",
57
- output_format="pcm_24000",
58
- ):
59
- audio_array = np.frombuffer(chunk, dtype=np.int16).reshape(1, -1)
60
- yield (24000, audio_array)
61
  yield AdditionalOutputs(chatbot)
62
 
63
 
 
5
  import numpy as np
6
  from dotenv import load_dotenv
7
  from elevenlabs import ElevenLabs
8
+ from distil_whisper_fastrtc import get_stt_model
9
  from fastapi import FastAPI
10
  from fastrtc import (
11
  AdditionalOutputs,
12
  ReplyOnPause,
13
  Stream,
14
+ get_tts_model,
15
  get_twilio_turn_credentials,
16
  )
17
  from gradio.utils import get_space
 
20
 
21
  load_dotenv()
22
  groq_client = Groq()
23
+ tts_client = get_tts_model()
24
  stt_model = get_stt_model()
25
 
26
 
 
51
 
52
  chatbot.append({"role": "assistant", "content": response_text})
53
 
54
+ # Convert response to audio using TTS model
55
+ for audio_chunk in tts_model.stream_tts_sync(response_text or ""):
56
+ # Yield the audio chunk
57
+ yield audio_chunk
 
 
 
 
58
  yield AdditionalOutputs(chatbot)
59
 
60