Spaces:

sohojoe
/

project_charles

Runtime error

App Files Files Community

sohojoe commited on Sep 9, 2023

Commit

cf5e7f4

1 Parent(s): f559f1e

animate charles

Browse files

Files changed (13) hide show

agent_response.py +1 -1
charles_actor.py +20 -2
charles_animator.py +128 -0
ffmpeg_converter_actor.py +2 -1
images/charles-open.png +3 -0
images/charles.png +3 -0
images/zoom-background.png +3 -0
respond_to_prompt_actor.py +2 -1
streamlit_av_queue.py +27 -9
tests/test_image.py +192 -0
tests/test_talking.py +65 -0
text_to_speech_service.py +3 -2
webrtc_av_queue_actor.py +17 -7

agent_response.py CHANGED Viewed

@@ -9,7 +9,7 @@ class AgentResponse(dict):
             self['llm_sentence'] = ''
             self['llm_sentence_id'] = 0
             self['llm_sentences'] = []
-            self['tts_raw_chunk'] = None
             self['tts_raw_chunk_id'] = 0
     def make_copy(self):

             self['llm_sentence'] = ''
             self['llm_sentence_id'] = 0
             self['llm_sentences'] = []
+            self['tts_raw_chunk_ref'] = None
             self['tts_raw_chunk_id'] = 0
     def make_copy(self):

charles_actor.py CHANGED Viewed

@@ -33,7 +33,8 @@ class CharlesActor:
         self._state = "000 - creating StreamlitAVQueue"
         from streamlit_av_queue import StreamlitAVQueue
         self._streamlit_av_queue = StreamlitAVQueue()
-        self._out_audio_queue = self._streamlit_av_queue.get_out_audio_queue()
         print("001 - create RespondToPromptActor")
         self._state = "001 - creating RespondToPromptActor"
@@ -57,6 +58,12 @@ class CharlesActor:
         self._state = "003 - creating Prototypes"
         from prototypes import Prototypes
         self._prototypes = Prototypes()
         print("010")
         self._needs_init = True
         self._state = "Initialized"
@@ -184,8 +191,19 @@ class CharlesActor:
             await asyncio.sleep(0.01)
             loops+=1
-            self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
 def init_ray():
     try:

         self._state = "000 - creating StreamlitAVQueue"
         from streamlit_av_queue import StreamlitAVQueue
         self._streamlit_av_queue = StreamlitAVQueue()
+        self._out_audio_queue = await self._streamlit_av_queue.get_out_audio_queue()
+        self._out_video_queue = await self._streamlit_av_queue.get_out_video_queue()
         print("001 - create RespondToPromptActor")
         self._state = "001 - creating RespondToPromptActor"
         self._state = "003 - creating Prototypes"
         from prototypes import Prototypes
         self._prototypes = Prototypes()
+        print("004 - create animator")
+        self._state = "004 - creating animator"
+        from charles_animator import CharlesAnimator
+        self._animator = CharlesAnimator()
         print("010")
         self._needs_init = True
         self._state = "Initialized"
             await asyncio.sleep(0.01)
+            # add observations to the environment state
+            count = len(self._out_audio_queue)
+            is_talking = bool(count > 0)
+            frame = self._animator.update(is_talking)
+            if self._out_video_queue.full():
+                evicted_item = await self._out_video_queue.get_async()
+                del evicted_item
+            frame_ref = ray.put(frame)
+            await self._out_video_queue.put_async(frame_ref)
             loops+=1
+            self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. Is speaking: {is_talking}({count}). {vector_debug}"
 def init_ray():
     try:

charles_animator.py ADDED Viewed

	@@ -0,0 +1,128 @@

+# Modifying the code to ensure the mouth is open when the character starts talking
+import random
+import time
+import cv2
+import av
+import numpy as np
+def resize_and_crop(image, dim=(640, 480)):
+    h, w = image.shape[:2]
+    aspect_ratio = w / h
+    target_width, target_height = dim
+    target_aspect = target_width / target_height
+    if aspect_ratio > target_aspect:
+        # Original aspect is wider than target, fit by height
+        new_height = target_height
+        new_width = int(target_height * aspect_ratio)
+    else:
+        # Original aspect is taller than target, fit by width
+        new_width = target_width
+        new_height = int(target_width / aspect_ratio)
+    # Resize the image with new dimensions
+    resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
+    # Crop to target dimensions
+    x_offset = (new_width - target_width) // 2
+    y_offset = (new_height - target_height) // 2
+    cropped_image = resized_image[y_offset:y_offset + target_height, x_offset:x_offset + target_width]
+    return cropped_image
+def overlay_images(background, overlay, x, y):
+    """
+    Overlay an image with transparency over another image.
+    """
+    # Check if overlay dimensions fit within the background at the given (x, y) position
+    if y + overlay.shape[0] > background.shape[0] or x + overlay.shape[1] > background.shape[1]:
+        raise ValueError("Overlay dimensions exceed background dimensions at the specified position.")
+    # Extract the alpha channel from the overlay and create an inverse alpha channel
+    alpha = overlay[:, :, 3] / 255.0
+    inverse_alpha = 1.0 - alpha
+    # Convert overlay to BGR if it's in RGB
+    if overlay.shape[2] == 4:  # If it has an alpha channel
+        overlay = cv2.cvtColor(overlay[:, :, :3], cv2.COLOR_RGB2BGR)
+        overlay = np.concatenate([overlay, overlay[:, :, 3:]], axis=2)  # Add alpha channel back
+    else:
+        overlay = cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR)
+    # Overlay the images
+    for c in range(0, 3):
+        background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c] = (
+            alpha * overlay[:, :, c] + inverse_alpha * background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c]
+        )
+    return background
+def create_charles_frames(background, charles_frames):
+    output_frames = []
+    # Load background image
+    background = cv2.imread(background, cv2.COLOR_BGR2RGB)
+    background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
+    # resize background to match user image
+    background = resize_and_crop(background, (640, 480))
+    for bot_image_path in charles_frames:
+        bot_image = cv2.imread(bot_image_path, cv2.IMREAD_UNCHANGED)
+        # assert bot image is square
+        assert bot_image.shape[0] == bot_image.shape[1]
+        # resize bot image if it is larger than backgroun impage in any direction
+        if bot_image.shape[0] > background.shape[0]:
+            bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
+        # Overlay bot image on the right-hand side
+        x_bot = background.shape[1] - bot_image.shape[1]
+        y_bot = background.shape[0] - bot_image.shape[0]
+        background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
+        output_frames.append(background_with_bot)
+    return output_frames
+class CharlesAnimator:
+    def __init__(self):
+        self.mouth_open = False
+        self.last_change_time = 0
+        self.next_change_in = 0
+        self.was_talking = False
+        # use static frames for pefromance
+        self.static_frames = create_charles_frames("./images/zoom-background.png", [
+            "./images/charles.png",
+            "./images/charles-open.png"
+            ])
+    def update(self, is_talking):
+        start_talking = True if is_talking and not self.was_talking else False
+        self.was_talking = is_talking
+        current_time = time.time()
+        # Open the mouth when the character starts talking
+        if start_talking:
+            self.mouth_open = True
+            self.next_change_in = current_time + random.uniform(0.1, 0.5)
+            return self.mouth_open
+        # Initialize the next change time if it's zero.
+        if self.next_change_in == 0:
+            self.next_change_in = current_time + random.uniform(0.1, 0.5)
+        # Update the mouth state only if the character is talking.
+        if is_talking:
+            # Check if it's time to change the mouth state.
+            if current_time >= self.next_change_in:
+                self.mouth_open = not self.mouth_open
+                self.next_change_in = current_time + random.uniform(0.1, 0.5)
+        else:
+            # Close the mouth if the character is not talking.
+            self.mouth_open = False
+        frame = self.static_frames[1] if self.mouth_open else self.static_frames[0]
+        return frame

ffmpeg_converter_actor.py CHANGED Viewed

@@ -19,7 +19,8 @@ class FFMpegConverterActor:
         while True:
             chunk = await self.output_pipe.readexactly(self.buffer_size)
             # print(f"FFMpegConverterActor: read {len(chunk)} bytes")
-            await self.output_queue.put_async(chunk)
     async def start_process(self):
         cmd = [

         while True:
             chunk = await self.output_pipe.readexactly(self.buffer_size)
             # print(f"FFMpegConverterActor: read {len(chunk)} bytes")
+            chunk_ref = ray.put(chunk)
+            await self.output_queue.put_async(chunk_ref)
     async def start_process(self):
         cmd = [

images/charles-open.png ADDED Viewed

Git LFS Details

SHA256: f9078c42fb437eb67a24ca6ba61207eed32a94bbb0ec7faf82273142ad8a773e
Pointer size: 131 Bytes
Size of remote file: 327 kB

images/charles.png ADDED Viewed

Git LFS Details

SHA256: 6c4bb66c4d5f88dc72a7acbe27bdfa67bd272a16f89f59a2d65eb5586eee9925
Pointer size: 131 Bytes
Size of remote file: 326 kB

images/zoom-background.png ADDED Viewed

Git LFS Details

SHA256: 0b80c7c201b44552475515653a6de32619c5ff5169b1d970a7a2e72e52b98c92
Pointer size: 132 Bytes
Size of remote file: 6.25 MB

respond_to_prompt_actor.py CHANGED Viewed

@@ -130,7 +130,8 @@ class SpeechToConverterActor:
         self.ffmpeg_converter_actor.run.remote()
         while True:
             chunk_response = await self.input_queue.get_async()
-            audio_chunk = chunk_response['tts_raw_chunk']
             await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
     async def cancel(self):

         self.ffmpeg_converter_actor.run.remote()
         while True:
             chunk_response = await self.input_queue.get_async()
+            audio_chunk_ref = chunk_response['tts_raw_chunk_ref']
+            audio_chunk = ray.get(audio_chunk_ref)
             await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
     async def cancel(self):

streamlit_av_queue.py CHANGED Viewed

@@ -3,9 +3,11 @@ import av
 import asyncio
 from collections import deque
 import threading
 import numpy as np
 import ray
 from webrtc_av_queue_actor import WebRtcAVQueueActor
 import pydub
 import torch
@@ -20,7 +22,8 @@ class StreamlitAVQueue:
         self.queue_actor = WebRtcAVQueueActor.options(
             name="WebRtcAVQueueActor",
             get_if_exists=True,
-            ).remote()
     def set_looking_listening(self, looking, listening: bool):
         with self._lock:
@@ -31,18 +34,33 @@ class StreamlitAVQueue:
                 self,
                 frames: List[av.VideoFrame],
             ) -> av.VideoFrame:
         try:
             with self._lock:
                 should_look = self._looking
-            if len(frames) > 0 and should_look:
-                for frame in frames:
-                    shared_tensor = frame.to_ndarray(format="rgb24")
-                    shared_tensor_ref = ray.put(shared_tensor)
                     await self.queue_actor.enqueue_in_video_frame.remote(shared_tensor_ref)
                 # print (f"tesnor len: {len(shared_tensor)}, tensor shape: {shared_tensor.shape}, tensor type:{shared_tensor.dtype} tensor ref: {shared_tensor_ref}")
         except Exception as e:
             print (e)
-        return frames
     async def queued_audio_frames_callback(
                 self,
@@ -103,8 +121,8 @@ class StreamlitAVQueue:
         shared_tensors = await self.queue_actor.get_in_video_frames.remote()
         return shared_tensors
-    def get_out_audio_queue(self):
         return self.queue_actor.get_out_audio_queue.remote()
-    # def get_out_audio_frame(self):
-    #     return self.queue_actor.get_out_audio_frame.remote()

 import asyncio
 from collections import deque
 import threading
+import cv2
 import numpy as np
 import ray
+from ray.util.queue import Queue
 from webrtc_av_queue_actor import WebRtcAVQueueActor
 import pydub
 import torch
         self.queue_actor = WebRtcAVQueueActor.options(
             name="WebRtcAVQueueActor",
             get_if_exists=True,
+            ).remote()
+        self._out_video_frame = None
     def set_looking_listening(self, looking, listening: bool):
         with self._lock:
                 self,
                 frames: List[av.VideoFrame],
             ) -> av.VideoFrame:
+        updated_frames = []
         try:
             with self._lock:
                 should_look = self._looking
+            next_out_video_frame = await self.queue_actor.get_out_video_frame.remote()
+            if next_out_video_frame is not None:
+                self._out_video_frame = next_out_video_frame
+            for i, frame in enumerate(frames):
+                user_image = frame.to_ndarray(format="rgb24")
+                if should_look:
+                    shared_tensor_ref = ray.put(user_image)
                     await self.queue_actor.enqueue_in_video_frame.remote(shared_tensor_ref)
+                if self._out_video_frame is not None:
+                    frame = self._out_video_frame
+                    # resize user image to 1/4 size
+                    user_frame = cv2.resize(user_image, (user_image.shape[1]//4, user_image.shape[0]//4), interpolation=cv2.INTER_AREA)
+                    x_user = 0
+                    y_user = frame.shape[0] - user_frame.shape[0]
+                    final_frame = frame.copy()
+                    final_frame[y_user:y_user+user_frame.shape[0], x_user:x_user+user_frame.shape[1]] = user_frame
+                    frame = av.VideoFrame.from_ndarray(final_frame, format="rgb24")
+                updated_frames.append(frame)
                 # print (f"tesnor len: {len(shared_tensor)}, tensor shape: {shared_tensor.shape}, tensor type:{shared_tensor.dtype} tensor ref: {shared_tensor_ref}")
         except Exception as e:
             print (e)
+        return updated_frames
     async def queued_audio_frames_callback(
                 self,
         shared_tensors = await self.queue_actor.get_in_video_frames.remote()
         return shared_tensors
+    def get_out_audio_queue(self)->Queue:
         return self.queue_actor.get_out_audio_queue.remote()
+    def get_out_video_queue(self)->Queue:
+        return self.queue_actor.get_out_video_queue.remote()

tests/test_image.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import cv2
+import av
+import numpy as np
+def resize_aspect_fit(image, dim=(640, 480)):
+    h, w = image.shape[:2]
+    aspect_ratio = w / h
+    target_width, target_height = dim
+    target_aspect = target_width / target_height
+    if aspect_ratio > target_aspect:
+        # Original aspect is wider than target
+        new_width = target_width
+        new_height = int(target_width / aspect_ratio)
+    else:
+        # Original aspect is taller than target
+        new_height = target_height
+        new_width = int(target_height * aspect_ratio)
+    resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
+    return resized_image
+def resize_and_crop(image, dim=(640, 480)):
+    h, w = image.shape[:2]
+    aspect_ratio = w / h
+    target_width, target_height = dim
+    target_aspect = target_width / target_height
+    if aspect_ratio > target_aspect:
+        # Original aspect is wider than target, fit by height
+        new_height = target_height
+        new_width = int(target_height * aspect_ratio)
+    else:
+        # Original aspect is taller than target, fit by width
+        new_width = target_width
+        new_height = int(target_width / aspect_ratio)
+    # Resize the image with new dimensions
+    resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
+    # Crop to target dimensions
+    x_offset = (new_width - target_width) // 2
+    y_offset = (new_height - target_height) // 2
+    cropped_image = resized_image[y_offset:y_offset + target_height, x_offset:x_offset + target_width]
+    return cropped_image
+def overlay_images(background, overlay, x, y):
+    """
+    Overlay an image with transparency over another image.
+    """
+    # Check if overlay dimensions fit within the background at the given (x, y) position
+    if y + overlay.shape[0] > background.shape[0] or x + overlay.shape[1] > background.shape[1]:
+        raise ValueError("Overlay dimensions exceed background dimensions at the specified position.")
+    # Extract the alpha channel from the overlay and create an inverse alpha channel
+    alpha = overlay[:, :, 3] / 255.0
+    inverse_alpha = 1.0 - alpha
+    # Convert overlay to BGR if it's in RGB
+    if overlay.shape[2] == 4:  # If it has an alpha channel
+        overlay = cv2.cvtColor(overlay[:, :, :3], cv2.COLOR_RGB2BGR)
+        overlay = np.concatenate([overlay, overlay[:, :, 3:]], axis=2)  # Add alpha channel back
+    else:
+        overlay = cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR)
+    # Overlay the images
+    for c in range(0, 3):
+        background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c] = (
+            alpha * overlay[:, :, c] + inverse_alpha * background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c]
+        )
+    return background
+def transform_frame(user_frame: av.VideoFrame) -> av.VideoFrame:
+    # Convert av.VideoFrame to numpy array (OpenCV format)
+    user_frame_np = np.frombuffer(user_frame.planes[0], np.uint8).reshape(user_frame.height, user_frame.width, -1)
+    # Load background image
+    background = cv2.imread("zoom-background.png")
+    # Load bot image (assuming it has an alpha channel for transparency)
+    bot_image = cv2.imread("bot-image.png", cv2.IMREAD_UNCHANGED)
+    # Resize background to match the user frame dimensions
+    aspect_ratio = background.shape[1] / background.shape[0]
+    new_h = user_frame.height
+    new_w = int(new_h * aspect_ratio)
+    background_resized = cv2.resize(background, (new_w, new_h))
+    # Crop the background if it exceeds the user frame width
+    if new_w > user_frame.width:
+        crop_x1 = (new_w - user_frame.width) // 2
+        crop_x2 = crop_x1 + user_frame.width
+        background_resized = background_resized[:, crop_x1:crop_x2, :3]
+    # Overlay bot image on the right-hand side
+    x_bot = background_resized.shape[1] - bot_image.shape[1]
+    y_bot = 0
+    background_resized = overlay_images(background_resized, bot_image, x_bot, y_bot)
+    # Overlay user's video frame in the bottom-left corner
+    x_user = 0
+    y_user = background_resized.shape[0] - user_frame.height
+    background_resized[y_user:user_frame.height+y_user, x_user:user_frame.width+x_user, :3] = user_frame_np
+    # Convert the final frame back to av.VideoFrame
+    output_frame = av.VideoFrame.from_ndarray(background_resized, format="bgr24")
+    return output_frame
+def create_charles_frames(background, charles_frames):
+    output_frames = []
+    # Load background image
+    background = cv2.imread(background, cv2.COLOR_BGR2RGB)
+    background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
+    # resize background to match user image
+    background = resize_and_crop(background, (640, 480))
+    for bot_image_path in charles_frames:
+        bot_image = cv2.imread(bot_image_path, cv2.IMREAD_UNCHANGED)
+        # assert bot image is square
+        assert bot_image.shape[0] == bot_image.shape[1]
+        # resize bot image if it is larger than backgroun impage in any direction
+        if bot_image.shape[0] > background.shape[0]:
+            bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
+        # Overlay bot image on the right-hand side
+        x_bot = background.shape[1] - bot_image.shape[1]
+        y_bot = background.shape[0] - bot_image.shape[0]
+        background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
+        output_frames.append(background_with_bot)
+    return output_frames
+def test_create_bot_frames():
+    frames = create_charles_frames("./images/zoom-background.png", ["./images/charles.png", "./images/charles-open.png"])
+    index = 0
+    for frame in frames:
+        final_frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+        cv2.imwrite(f"./images/charles_frame_{index}.jpg", final_frame_bgr)
+        index += 1
+def test_overlay():
+    # Load mock user image
+    user_image = cv2.imread("./prototypes/person-016.jpg", cv2.COLOR_BGR2RGB)
+    user_image = cv2.cvtColor(user_image, cv2.COLOR_BGR2RGB)
+    # resize to 640x480, handle that this is smaller and can be cropped
+    user_image = resize_and_crop(user_image, (640, 480))
+    # Load background image
+    background = cv2.imread("./images/zoom-background.png", cv2.COLOR_BGR2RGB)
+    background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
+    # resize background to match user image
+    background = resize_and_crop(background, (user_image.shape[:2][1], user_image.shape[:2][0]))
+    # Load bot image (assuming it has an alpha channel for transparency)
+    bot_image = cv2.imread("./images/charles-open.png", cv2.IMREAD_UNCHANGED)
+    # resize bot image if it is larger than backgroun impage in any direction
+    if bot_image.shape[0] > background.shape[0]:
+        bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
+    # Overlay bot image on the right-hand side
+    x_bot = background.shape[1] - bot_image.shape[1]
+    y_bot = background.shape[0] - bot_image.shape[0]
+    background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
+    # Overlay user's frame in the bottom-left corner (1/3 size)
+    # resize user image to 1/4 size
+    user_frame = cv2.resize(user_image, (user_image.shape[1]//4, user_image.shape[0]//4), interpolation=cv2.INTER_AREA)
+    x_user = 0
+    y_user = background.shape[0] - user_frame.shape[0]
+    final_frame = background_with_bot.copy()
+    # final_frame[y_user:user_frame.shape[0]+y_user, x_user:user_frame.shape[1]+x_user, :3] = user_frame
+    final_frame[y_user:y_user+user_frame.shape[0], x_user:x_user+user_frame.shape[1]] = user_frame
+    # Save the final frame as JPEG
+    final_frame_bgr = cv2.cvtColor(final_frame, cv2.COLOR_RGB2BGR)
+    cv2.imwrite("./images/final_frame.jpg", final_frame_bgr)
+test_overlay()
+test_create_bot_frames()

tests/test_talking.py ADDED Viewed

	@@ -0,0 +1,65 @@

+# Modifying the code to ensure the mouth is open when the character starts talking
+import random
+import time
+class CharacterFace:
+    def __init__(self):
+        self.mouth_open = False
+        self.last_change_time = 0
+        self.next_change_in = 0
+    def update(self, is_talking, start_talking=False):
+        current_time = time.time()
+        # Open the mouth when the character starts talking
+        if start_talking:
+            self.mouth_open = True
+            self.next_change_in = current_time + random.uniform(0.1, 0.5)
+            return self.mouth_open
+        # Initialize the next change time if it's zero.
+        if self.next_change_in == 0:
+            self.next_change_in = current_time + random.uniform(0.1, 0.5)
+        # Update the mouth state only if the character is talking.
+        if is_talking:
+            # Check if it's time to change the mouth state.
+            if current_time >= self.next_change_in:
+                self.mouth_open = not self.mouth_open
+                self.next_change_in = current_time + random.uniform(0.1, 0.5)
+        else:
+            # Close the mouth if the character is not talking.
+            self.mouth_open = False
+        return self.mouth_open
+def _debug_test():
+    # Example usage
+    face = CharacterFace()
+    output = []
+    # Initialize variables to control talk and pause durations
+    next_talk_time = 0
+    next_pause_time = 0
+    is_talking = False
+    # Simulate the character talking and not talking with variable durations
+    for _ in range(500):  # Increase the number of iterations for a longer simulation
+        current_time = time.time()
+        start_talking = False
+        if is_talking and current_time >= next_talk_time:
+            is_talking = False
+            next_pause_time = current_time + random.uniform(0.5, 3.0)
+        if not is_talking and current_time >= next_pause_time:
+            is_talking = True
+            start_talking = True  # Set flag to open mouth at the start of talking
+            next_talk_time = current_time + random.uniform(1.0, 5.0)
+        mouth_open = face.update(is_talking, start_talking)
+        print(f"Is Talking: {is_talking}, Mouth Open: {mouth_open}")
+        time.sleep(random.uniform(0.1, 0.5))

text_to_speech_service.py CHANGED Viewed

@@ -5,7 +5,7 @@ from elevenlabs import generate, play
 from elevenlabs import set_api_key
 from elevenlabs import generate, stream
 from agent_response import AgentResponse
 class TextToSpeechService:
     def __init__(self, voice_id="Bella", model_id="eleven_monolingual_v1"):
@@ -60,7 +60,8 @@ class TextToSpeechService:
             # Run next(stream) in a separate thread to avoid blocking the event loop
             chunk = await asyncio.to_thread(next, stream)
-            sentence_response['tts_raw_chunk'] = chunk
             if cancel_event.is_set():
                 return
             yield sentence_response

 from elevenlabs import set_api_key
 from elevenlabs import generate, stream
 from agent_response import AgentResponse
+import ray
 class TextToSpeechService:
     def __init__(self, voice_id="Bella", model_id="eleven_monolingual_v1"):
             # Run next(stream) in a separate thread to avoid blocking the event loop
             chunk = await asyncio.to_thread(next, stream)
+            chunk_ref = ray.put(chunk)
+            sentence_response['tts_raw_chunk_ref'] = chunk_ref
             if cancel_event.is_set():
                 return
             yield sentence_response

webrtc_av_queue_actor.py CHANGED Viewed

@@ -8,9 +8,10 @@ import numpy as np
 @ray.remote
 class WebRtcAVQueueActor:
     def __init__(self):
-        self.in_audio_queue = Queue(maxsize=100)  # Adjust the size as needed
-        self.in_video_queue = Queue(maxsize=100)  # Adjust the size as needed
-        self.out_audio_queue = Queue(maxsize=100)  # Adjust the size as needed
     async def enqueue_in_video_frame(self, shared_tensor_ref):
@@ -25,7 +26,6 @@ class WebRtcAVQueueActor:
             del evicted_item
         await self.in_audio_queue.put_async(shared_buffer_ref)
     async def get_in_audio_frames(self):
         audio_frames = []
         if self.in_audio_queue.empty():
@@ -44,11 +44,21 @@ class WebRtcAVQueueActor:
             video_frames.append(shared_tensor_ref)
         return video_frames
-    def get_out_audio_queue(self):
         return self.out_audio_queue
     async def get_out_audio_frame(self):
         if self.out_audio_queue.empty():
             return None
-        audio_frame = await self.out_audio_queue.get_async()
-        return audio_frame

 @ray.remote
 class WebRtcAVQueueActor:
     def __init__(self):
+        self.in_audio_queue = Queue(maxsize=3000)  # Adjust the size as needed
+        self.in_video_queue = Queue(maxsize=10)  # Adjust the size as needed
+        self.out_audio_queue = Queue(maxsize=3000)  # Adjust the size as needed
+        self.out_video_queue = Queue(maxsize=10)  # Adjust the size as needed
     async def enqueue_in_video_frame(self, shared_tensor_ref):
             del evicted_item
         await self.in_audio_queue.put_async(shared_buffer_ref)
     async def get_in_audio_frames(self):
         audio_frames = []
         if self.in_audio_queue.empty():
             video_frames.append(shared_tensor_ref)
         return video_frames
+    def get_out_audio_queue(self)->Queue:
         return self.out_audio_queue
+    def get_out_video_queue(self)->Queue:
+        return self.out_video_queue
     async def get_out_audio_frame(self):
         if self.out_audio_queue.empty():
             return None
+        frame = await self.out_audio_queue.get_async()
+        return frame
+    async def get_out_video_frame(self):
+        if self.out_video_queue.empty():
+            return None
+        while not self.out_video_queue.empty():
+            frame = await self.out_video_queue.get_async()
+        return frame