Spaces:
Runtime error
Runtime error
File size: 5,722 Bytes
730fe87 bd435b3 12f4772 bd435b3 12f4772 1fe79e2 730fe87 1fe79e2 730fe87 1fe79e2 730fe87 1fe79e2 bd435b3 245e7f9 bd435b3 1fe79e2 245e7f9 bd435b3 1fe79e2 245e7f9 bd435b3 730fe87 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import asyncio
import itertools
import json
import os
import torch
import openai
from audio_stream_processor import AudioStreamProcessor
from speech_service import SpeechService
class StreamingChatService:
def __init__(self, audio_processor:AudioStreamProcessor()=None, api="openai", model_id = "gpt-3.5-turbo", voice_id="Bella"):
self._audio_processor = audio_processor
self._speech_service = SpeechService(voice_id=voice_id)
self._api = api
self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
self._system_prompt = None
openai.api_key = os.getenv("OPENAI_API_KEY")
self._model_id = model_id
self.reset()
def reset(self):
self._messages = []
if self._system_prompt:
self._messages.append({"role": "system", "content": self._system_prompt})
def _should_we_send_to_voice(self, sentence):
sentence_termination_characters = [".", "?", "!"]
close_brackets = ['"', ')', ']']
temination_charicter_present = any(c in sentence for c in sentence_termination_characters)
# early exit if we don't have a termination character
if not temination_charicter_present:
return None
# early exit the last char is a termination character
if sentence[-1] in sentence_termination_characters:
return None
# early exit the last char is a close bracket
if sentence[-1] in close_brackets:
return None
termination_indices = [sentence.rfind(char) for char in sentence_termination_characters]
last_termination_index = max(termination_indices)
# handle case of close bracket
while last_termination_index+1 < len(sentence) and sentence[last_termination_index+1] in close_brackets:
last_termination_index += 1
text_to_speak = sentence[:last_termination_index+1]
return text_to_speak
def ignore_sentence(self, text_to_speak):
# exit if empty, white space or an single breaket
if text_to_speak.isspace():
return True
# exit if not letters or numbers
has_letters = any(char.isalpha() for char in text_to_speak)
has_numbers = any(char.isdigit() for char in text_to_speak)
if not has_letters and not has_numbers:
return True
return False
def _safe_enqueue_text_to_speak(self, text_to_speak):
if self.ignore_sentence(text_to_speak):
return
stream = self._speech_service.stream(text_to_speak)
self._audio_processor.add_audio_stream(stream)
def respond_to(self, prompt):
self._messages.append({"role": "user", "content": prompt})
agent_response = ""
current_sentence = ""
response = openai.ChatCompletion.create(
model=self._model_id,
messages=self._messages,
temperature=1.0, # use 1.0 for debugging/deteministic results
stream=True
)
for chunk in response:
chunk_message = chunk['choices'][0]['delta']
if 'content' in chunk_message:
chunk_text = chunk_message['content']
# print(chunk_text)
current_sentence += chunk_text
agent_response += chunk_text
text_to_speak = self._should_we_send_to_voice(current_sentence)
if text_to_speak:
self._safe_enqueue_text_to_speak(text_to_speak)
print(text_to_speak)
current_sentence = current_sentence[len(text_to_speak):]
if len(current_sentence) > 0:
self._safe_enqueue_text_to_speak(current_sentence)
print(current_sentence)
self._messages.append({"role": "assistant", "content": agent_response})
return agent_response
async def get_responses_as_sentances_async(self, prompt):
self._messages.append({"role": "user", "content": prompt})
agent_response = ""
current_sentence = ""
response = await openai.ChatCompletion.acreate(
model=self._model_id,
messages=self._messages,
temperature=1.0, # use 1.0 for debugging/deterministic results
stream=True
)
async for chunk in response:
chunk_message = chunk['choices'][0]['delta']
if 'content' in chunk_message:
chunk_text = chunk_message['content']
current_sentence += chunk_text
agent_response += chunk_text
text_to_speak = self._should_we_send_to_voice(current_sentence)
if text_to_speak:
yield text_to_speak
current_sentence = current_sentence[len(text_to_speak):]
if len(current_sentence) > 0:
yield current_sentence
self._messages.append({"role": "assistant", "content": agent_response})
async def get_speech_chunks_async(self, text_to_speak):
stream = self._speech_service.stream(text_to_speak)
stream, stream_backup = itertools.tee(stream)
while True:
# Check if there's a next item in the stream
next_item = next(stream_backup, None)
if next_item is None:
# Stream is exhausted, exit the loop
break
# Run next(stream) in a separate thread to avoid blocking the event loop
chunk = await asyncio.to_thread(next, stream)
yield chunk
def enqueue_speech_bytes_to_play(self, speech_bytes):
self._audio_processor.add_audio_stream(speech_bytes)
|