aar2dee2 commited on
Commit
531e3ab
·
1 Parent(s): 5cb623a

switch to turn-based conversation

Browse files
Files changed (2) hide show
  1. app.py +39 -48
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,25 +1,25 @@
1
  # # Import required libraries
2
 
 
 
 
 
 
 
 
 
3
  import gradio as gr
4
- import requests
5
- import json
6
  import os
7
- from pydub import AudioSegment
8
- import asyncio
9
  import logging
10
- import signal
11
- from vocode.streaming.streaming_conversation import StreamingConversation
12
- from vocode.helpers import create_streaming_microphone_input_and_speaker_output
13
- from vocode.streaming.transcriber import *
14
- from vocode.streaming.agent import *
15
- from vocode.streaming.synthesizer import *
16
- from vocode.streaming.models.transcriber import *
17
- from vocode.streaming.models.agent import *
18
- from vocode.streaming.models.synthesizer import *
19
- from vocode.streaming.models.message import BaseMessage
20
- import vocode
21
 
22
 
 
 
 
 
 
 
 
23
  system_prompt = """
24
  You are the evil lord Darth Vader. You answer all questions your subjects ask you, but do so with immense contempt and loathing.
25
  You may use any of the following lines or take inspiration from this conversational style where appropriate in your response.
@@ -76,51 +76,42 @@ logger = logging.getLogger(__name__)
76
  logger.setLevel(logging.DEBUG)
77
 
78
 
79
- async def main():
80
  (
81
  microphone_input,
82
  speaker_output,
83
- ) = create_streaming_microphone_input_and_speaker_output(
84
- use_default_devices=True,
85
- logger=logger,
86
- use_blocking_speaker_output=True,
87
- )
88
 
89
- conversation = StreamingConversation(
 
90
  output_device=speaker_output,
91
- transcriber=WhisperCPPTranscriber(
92
- WhisperCPPTranscriberConfig.from_input_device(
93
- microphone_input,
94
- libname="/whisper.cpp/libwhisper.so",
95
- fname_model="/whisper.cpp/models/ggml-tiny.bin",
96
- )
97
- ),
98
  agent=ChatGPTAgent(
99
- ChatGPTAgentConfig(
100
- initial_message=BaseMessage(text="What up"),
101
- prompt_preamble=system_prompt,
102
- )
103
  ),
104
- synthesizer=CoquiTTSSynthesizer(
105
- CoquiTTSSynthesizerConfig.from_output_device(
106
  speaker_output,
107
  tts_kwargs={
108
- "model_name": "tts_models/en/ljspeech/tacotron2-DDC_ph",
109
  "voice_id": os.getenv("COQUI_VOICE_ID"),
110
  }
111
- )
 
112
  ),
113
  logger=logger,
114
  )
115
- await conversation.start()
116
- print("Conversation started, press Ctrl+C to end")
117
- signal.signal(
118
- signal.SIGINT, lambda _0, _1: asyncio.create_task(
119
- conversation.terminate())
120
- )
121
- while conversation.is_active():
122
- chunk = await microphone_input.get_audio()
123
- conversation.receive_audio(chunk)
 
124
 
125
- if __name__ == "__main__":
126
- asyncio.run(main())
 
1
  # # Import required libraries
2
 
3
+ import vocode
4
+ from dotenv import load_dotenv
5
+ from vocode import getenv
6
+ from vocode.streaming.models.message import BaseMessage
7
+ from vocode.streaming.models.synthesizer import *
8
+ from vocode.streaming.models.agent import *
9
+ from vocode.streaming.models.transcriber import *
10
+ from vocode.streaming.synthesizer import *
11
  import gradio as gr
 
 
12
  import os
 
 
13
  import logging
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
+ from vocode.helpers import create_turn_based_microphone_input_and_speaker_output
17
+
18
+ from vocode.turn_based.agent.chat_gpt_agent import ChatGPTAgent
19
+ from vocode.turn_based.synthesizer import CoquiSynthesizer
20
+ from vocode.turn_based.transcriber.whisper_transcriber import WhisperTranscriber
21
+ from vocode.turn_based.turn_based_conversation import TurnBasedConversation
22
+
23
  system_prompt = """
24
  You are the evil lord Darth Vader. You answer all questions your subjects ask you, but do so with immense contempt and loathing.
25
  You may use any of the following lines or take inspiration from this conversational style where appropriate in your response.
 
76
  logger.setLevel(logging.DEBUG)
77
 
78
 
79
+ def main():
80
  (
81
  microphone_input,
82
  speaker_output,
83
+ ) = create_turn_based_microphone_input_and_speaker_output(use_default_devices=True)
 
 
 
 
84
 
85
+ conversation = TurnBasedConversation(
86
+ input_device=microphone_input,
87
  output_device=speaker_output,
88
+ transcriber=WhisperTranscriber(api_key=getenv("OPENAI_API_KEY")),
 
 
 
 
 
 
89
  agent=ChatGPTAgent(
90
+ system_prompt=system_prompt,
91
+ initial_message="What up",
92
+ api_key=getenv("OPENAI_API_KEY"),
 
93
  ),
94
+ synthesizer=CoquiSynthesizer(
95
+ CoquiSynthesizerConfig.from_output_device(
96
  speaker_output,
97
  tts_kwargs={
 
98
  "voice_id": os.getenv("COQUI_VOICE_ID"),
99
  }
100
+ ),
101
+ api_key=getenv("COQUI_API_KEY"),
102
  ),
103
  logger=logger,
104
  )
105
+ print("Starting conversation. Press Ctrl+C to exit.")
106
+ while True:
107
+ try:
108
+ input("Press enter to start recording...")
109
+ conversation.start_speech()
110
+ input("Press enter to end recording...")
111
+ conversation.end_speech_and_respond()
112
+ except KeyboardInterrupt:
113
+ break
114
+
115
 
116
+ demo = gr.Interface(fn=main, inputs="audio", outputs="audio")
117
+ demo.launch()
requirements.txt CHANGED
@@ -1 +1,2 @@
1
  vocode[io]
 
 
1
  vocode[io]
2
+ dotenv