aar2dee2 commited on
Commit
5a1ed1a
·
1 Parent(s): bf4c978

custom send_audio function

Browse files
Files changed (1) hide show
  1. app.py +21 -4
app.py CHANGED
@@ -7,7 +7,7 @@ from vocode import getenv
7
  import gradio as gr
8
  import os
9
  import logging
10
-
11
  from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
12
  from vocode.turn_based.synthesizer import CoquiSynthesizer
13
  from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
@@ -56,6 +56,9 @@ Answer the question accurately in less than 150 words. Remember you are Darth Va
56
 
57
  # # 1. Setup Vocode
58
  # import env vars
 
 
 
59
  vocode.setenv(
60
  OPENAI_API_KEY=os.getenv("OPENAI_API_KEY"),
61
  COQUI_API_KEY=os.getenv("COQUI_API_KEY"),
@@ -67,6 +70,8 @@ logging.basicConfig()
67
  logger = logging.getLogger(__name__)
68
  logger.setLevel(logging.DEBUG)
69
 
 
 
70
 
71
  def convert_to_audio_segment(input_audio):
72
  sample_rate, audio_data = input_audio
@@ -80,6 +85,20 @@ def convert_to_audio_segment(input_audio):
80
  return audio_segment
81
 
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def main(input_audio):
84
  logger.info(f"Type of input_audio: {type(input_audio)}")
85
  logger.info(f"input_audio: {input_audio}")
@@ -98,8 +117,6 @@ def main(input_audio):
98
  api_key=getenv("COQUI_API_KEY"),
99
  )
100
 
101
- speaker_output = SpeakerOutput.from_default_device()
102
-
103
  print("Starting conversation. Press Ctrl+C to exit.")
104
  while True:
105
  try:
@@ -113,7 +130,7 @@ def main(input_audio):
113
  response = agent.respond(transcript)
114
  logger.info(f"Agent response: {response}")
115
  output_audio = synthesizer.synthesize(response)
116
- return speaker_output.send_audio(output_audio)
117
 
118
  except Exception as e:
119
  logger.error("Failed to synthesize response: %s", e)
 
7
  import gradio as gr
8
  import os
9
  import logging
10
+ import sounddevice as sd
11
  from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
12
  from vocode.turn_based.synthesizer import CoquiSynthesizer
13
  from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
 
56
 
57
  # # 1. Setup Vocode
58
  # import env vars
59
+ if not os.getenv("OPENAI_API_KEY") or not os.getenv("COQUI_API_KEY"):
60
+ raise EnvironmentError("Required environment variables not set")
61
+
62
  vocode.setenv(
63
  OPENAI_API_KEY=os.getenv("OPENAI_API_KEY"),
64
  COQUI_API_KEY=os.getenv("COQUI_API_KEY"),
 
70
  logger = logging.getLogger(__name__)
71
  logger.setLevel(logging.DEBUG)
72
 
73
+ DEFAULT_SAMPLING_RATE = 44100
74
+
75
 
76
  def convert_to_audio_segment(input_audio):
77
  sample_rate, audio_data = input_audio
 
85
  return audio_segment
86
 
87
 
88
+ def send_audio(audio_segment: AudioSegment):
89
+ sampling_rate = DEFAULT_SAMPLING_RATE
90
+ stream = sd.OutputStream(
91
+ channels=1,
92
+ samplerate=sampling_rate,
93
+ dtype=np.int16,
94
+ device=None,
95
+ )
96
+ raw_data = audio_segment.raw_data
97
+ if audio_segment.frame_rate != sampling_rate:
98
+ raw_data = audio_segment.set_frame_rate(sampling_rate).raw_data
99
+ stream.write(np.frombuffer(raw_data, dtype=np.int16))
100
+
101
+
102
  def main(input_audio):
103
  logger.info(f"Type of input_audio: {type(input_audio)}")
104
  logger.info(f"input_audio: {input_audio}")
 
117
  api_key=getenv("COQUI_API_KEY"),
118
  )
119
 
 
 
120
  print("Starting conversation. Press Ctrl+C to exit.")
121
  while True:
122
  try:
 
130
  response = agent.respond(transcript)
131
  logger.info(f"Agent response: {response}")
132
  output_audio = synthesizer.synthesize(response)
133
+ return send_audio(output_audio)
134
 
135
  except Exception as e:
136
  logger.error("Failed to synthesize response: %s", e)