Spaces:
Runtime error
Runtime error
aar2dee2
commited on
Commit
·
5a1ed1a
1
Parent(s):
bf4c978
custom send_audio function
Browse files
app.py
CHANGED
@@ -7,7 +7,7 @@ from vocode import getenv
|
|
7 |
import gradio as gr
|
8 |
import os
|
9 |
import logging
|
10 |
-
|
11 |
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
|
12 |
from vocode.turn_based.synthesizer import CoquiSynthesizer
|
13 |
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
|
@@ -56,6 +56,9 @@ Answer the question accurately in less than 150 words. Remember you are Darth Va
|
|
56 |
|
57 |
# # 1. Setup Vocode
|
58 |
# import env vars
|
|
|
|
|
|
|
59 |
vocode.setenv(
|
60 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY"),
|
61 |
COQUI_API_KEY=os.getenv("COQUI_API_KEY"),
|
@@ -67,6 +70,8 @@ logging.basicConfig()
|
|
67 |
logger = logging.getLogger(__name__)
|
68 |
logger.setLevel(logging.DEBUG)
|
69 |
|
|
|
|
|
70 |
|
71 |
def convert_to_audio_segment(input_audio):
|
72 |
sample_rate, audio_data = input_audio
|
@@ -80,6 +85,20 @@ def convert_to_audio_segment(input_audio):
|
|
80 |
return audio_segment
|
81 |
|
82 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
def main(input_audio):
|
84 |
logger.info(f"Type of input_audio: {type(input_audio)}")
|
85 |
logger.info(f"input_audio: {input_audio}")
|
@@ -98,8 +117,6 @@ def main(input_audio):
|
|
98 |
api_key=getenv("COQUI_API_KEY"),
|
99 |
)
|
100 |
|
101 |
-
speaker_output = SpeakerOutput.from_default_device()
|
102 |
-
|
103 |
print("Starting conversation. Press Ctrl+C to exit.")
|
104 |
while True:
|
105 |
try:
|
@@ -113,7 +130,7 @@ def main(input_audio):
|
|
113 |
response = agent.respond(transcript)
|
114 |
logger.info(f"Agent response: {response}")
|
115 |
output_audio = synthesizer.synthesize(response)
|
116 |
-
return
|
117 |
|
118 |
except Exception as e:
|
119 |
logger.error("Failed to synthesize response: %s", e)
|
|
|
7 |
import gradio as gr
|
8 |
import os
|
9 |
import logging
|
10 |
+
import sounddevice as sd
|
11 |
from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
|
12 |
from vocode.turn_based.synthesizer import CoquiSynthesizer
|
13 |
from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
|
|
|
56 |
|
57 |
# # 1. Setup Vocode
|
58 |
# import env vars
|
59 |
+
if not os.getenv("OPENAI_API_KEY") or not os.getenv("COQUI_API_KEY"):
|
60 |
+
raise EnvironmentError("Required environment variables not set")
|
61 |
+
|
62 |
vocode.setenv(
|
63 |
OPENAI_API_KEY=os.getenv("OPENAI_API_KEY"),
|
64 |
COQUI_API_KEY=os.getenv("COQUI_API_KEY"),
|
|
|
70 |
logger = logging.getLogger(__name__)
|
71 |
logger.setLevel(logging.DEBUG)
|
72 |
|
73 |
+
DEFAULT_SAMPLING_RATE = 44100
|
74 |
+
|
75 |
|
76 |
def convert_to_audio_segment(input_audio):
|
77 |
sample_rate, audio_data = input_audio
|
|
|
85 |
return audio_segment
|
86 |
|
87 |
|
88 |
+
def send_audio(audio_segment: AudioSegment):
|
89 |
+
sampling_rate = DEFAULT_SAMPLING_RATE
|
90 |
+
stream = sd.OutputStream(
|
91 |
+
channels=1,
|
92 |
+
samplerate=sampling_rate,
|
93 |
+
dtype=np.int16,
|
94 |
+
device=None,
|
95 |
+
)
|
96 |
+
raw_data = audio_segment.raw_data
|
97 |
+
if audio_segment.frame_rate != sampling_rate:
|
98 |
+
raw_data = audio_segment.set_frame_rate(sampling_rate).raw_data
|
99 |
+
stream.write(np.frombuffer(raw_data, dtype=np.int16))
|
100 |
+
|
101 |
+
|
102 |
def main(input_audio):
|
103 |
logger.info(f"Type of input_audio: {type(input_audio)}")
|
104 |
logger.info(f"input_audio: {input_audio}")
|
|
|
117 |
api_key=getenv("COQUI_API_KEY"),
|
118 |
)
|
119 |
|
|
|
|
|
120 |
print("Starting conversation. Press Ctrl+C to exit.")
|
121 |
while True:
|
122 |
try:
|
|
|
130 |
response = agent.respond(transcript)
|
131 |
logger.info(f"Agent response: {response}")
|
132 |
output_audio = synthesizer.synthesize(response)
|
133 |
+
return send_audio(output_audio)
|
134 |
|
135 |
except Exception as e:
|
136 |
logger.error("Failed to synthesize response: %s", e)
|