Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	animate charles
Browse files- agent_response.py +1 -1
- charles_actor.py +20 -2
- charles_animator.py +128 -0
- ffmpeg_converter_actor.py +2 -1
- images/charles-open.png +3 -0
- images/charles.png +3 -0
- images/zoom-background.png +3 -0
- respond_to_prompt_actor.py +2 -1
- streamlit_av_queue.py +27 -9
- tests/test_image.py +192 -0
- tests/test_talking.py +65 -0
- text_to_speech_service.py +3 -2
- webrtc_av_queue_actor.py +17 -7
    	
        agent_response.py
    CHANGED
    
    | @@ -9,7 +9,7 @@ class AgentResponse(dict): | |
| 9 | 
             
                        self['llm_sentence'] = ''
         | 
| 10 | 
             
                        self['llm_sentence_id'] = 0
         | 
| 11 | 
             
                        self['llm_sentences'] = []
         | 
| 12 | 
            -
                        self[' | 
| 13 | 
             
                        self['tts_raw_chunk_id'] = 0
         | 
| 14 |  | 
| 15 | 
             
                def make_copy(self):
         | 
|  | |
| 9 | 
             
                        self['llm_sentence'] = ''
         | 
| 10 | 
             
                        self['llm_sentence_id'] = 0
         | 
| 11 | 
             
                        self['llm_sentences'] = []
         | 
| 12 | 
            +
                        self['tts_raw_chunk_ref'] = None
         | 
| 13 | 
             
                        self['tts_raw_chunk_id'] = 0
         | 
| 14 |  | 
| 15 | 
             
                def make_copy(self):
         | 
    	
        charles_actor.py
    CHANGED
    
    | @@ -33,7 +33,8 @@ class CharlesActor: | |
| 33 | 
             
                    self._state = "000 - creating StreamlitAVQueue"
         | 
| 34 | 
             
                    from streamlit_av_queue import StreamlitAVQueue
         | 
| 35 | 
             
                    self._streamlit_av_queue = StreamlitAVQueue()
         | 
| 36 | 
            -
                    self._out_audio_queue = self._streamlit_av_queue.get_out_audio_queue()
         | 
|  | |
| 37 |  | 
| 38 | 
             
                    print("001 - create RespondToPromptActor")
         | 
| 39 | 
             
                    self._state = "001 - creating RespondToPromptActor"
         | 
| @@ -57,6 +58,12 @@ class CharlesActor: | |
| 57 | 
             
                    self._state = "003 - creating Prototypes"
         | 
| 58 | 
             
                    from prototypes import Prototypes
         | 
| 59 | 
             
                    self._prototypes = Prototypes()
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 60 | 
             
                    print("010")
         | 
| 61 | 
             
                    self._needs_init = True
         | 
| 62 | 
             
                    self._state = "Initialized"
         | 
| @@ -184,8 +191,19 @@ class CharlesActor: | |
| 184 |  | 
| 185 |  | 
| 186 | 
             
                        await asyncio.sleep(0.01)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 187 | 
             
                        loops+=1
         | 
| 188 | 
            -
                        self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. {vector_debug}"
         | 
| 189 |  | 
| 190 | 
             
            def init_ray():
         | 
| 191 | 
             
                try:
         | 
|  | |
| 33 | 
             
                    self._state = "000 - creating StreamlitAVQueue"
         | 
| 34 | 
             
                    from streamlit_av_queue import StreamlitAVQueue
         | 
| 35 | 
             
                    self._streamlit_av_queue = StreamlitAVQueue()
         | 
| 36 | 
            +
                    self._out_audio_queue = await self._streamlit_av_queue.get_out_audio_queue()
         | 
| 37 | 
            +
                    self._out_video_queue = await self._streamlit_av_queue.get_out_video_queue()
         | 
| 38 |  | 
| 39 | 
             
                    print("001 - create RespondToPromptActor")
         | 
| 40 | 
             
                    self._state = "001 - creating RespondToPromptActor"
         | 
|  | |
| 58 | 
             
                    self._state = "003 - creating Prototypes"
         | 
| 59 | 
             
                    from prototypes import Prototypes
         | 
| 60 | 
             
                    self._prototypes = Prototypes()
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                    print("004 - create animator")
         | 
| 63 | 
            +
                    self._state = "004 - creating animator"
         | 
| 64 | 
            +
                    from charles_animator import CharlesAnimator
         | 
| 65 | 
            +
                    self._animator = CharlesAnimator()
         | 
| 66 | 
            +
             | 
| 67 | 
             
                    print("010")
         | 
| 68 | 
             
                    self._needs_init = True
         | 
| 69 | 
             
                    self._state = "Initialized"
         | 
|  | |
| 191 |  | 
| 192 |  | 
| 193 | 
             
                        await asyncio.sleep(0.01)
         | 
| 194 | 
            +
             | 
| 195 | 
            +
                        # add observations to the environment state
         | 
| 196 | 
            +
                        count = len(self._out_audio_queue)
         | 
| 197 | 
            +
                        is_talking = bool(count > 0)
         | 
| 198 | 
            +
                        frame = self._animator.update(is_talking)
         | 
| 199 | 
            +
                        if self._out_video_queue.full():
         | 
| 200 | 
            +
                            evicted_item = await self._out_video_queue.get_async()
         | 
| 201 | 
            +
                            del evicted_item
         | 
| 202 | 
            +
                        frame_ref = ray.put(frame)
         | 
| 203 | 
            +
                        await self._out_video_queue.put_async(frame_ref)                
         | 
| 204 | 
            +
             | 
| 205 | 
             
                        loops+=1
         | 
| 206 | 
            +
                        self._state = f"Processed {total_video_frames} video frames and {total_audio_frames} audio frames, loops: {loops}. loops per second: {loops/(time.time()-start_time):.2f}. Is speaking: {is_talking}({count}). {vector_debug}"
         | 
| 207 |  | 
| 208 | 
             
            def init_ray():
         | 
| 209 | 
             
                try:
         | 
    	
        charles_animator.py
    ADDED
    
    | @@ -0,0 +1,128 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Modifying the code to ensure the mouth is open when the character starts talking
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import random
         | 
| 4 | 
            +
            import time
         | 
| 5 | 
            +
            import cv2
         | 
| 6 | 
            +
            import av
         | 
| 7 | 
            +
            import numpy as np
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            def resize_and_crop(image, dim=(640, 480)):
         | 
| 10 | 
            +
                h, w = image.shape[:2]
         | 
| 11 | 
            +
                aspect_ratio = w / h
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                target_width, target_height = dim
         | 
| 14 | 
            +
                target_aspect = target_width / target_height
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                if aspect_ratio > target_aspect:
         | 
| 17 | 
            +
                    # Original aspect is wider than target, fit by height
         | 
| 18 | 
            +
                    new_height = target_height
         | 
| 19 | 
            +
                    new_width = int(target_height * aspect_ratio)
         | 
| 20 | 
            +
                else:
         | 
| 21 | 
            +
                    # Original aspect is taller than target, fit by width
         | 
| 22 | 
            +
                    new_width = target_width
         | 
| 23 | 
            +
                    new_height = int(target_width / aspect_ratio)
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                # Resize the image with new dimensions
         | 
| 26 | 
            +
                resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                # Crop to target dimensions
         | 
| 29 | 
            +
                x_offset = (new_width - target_width) // 2
         | 
| 30 | 
            +
                y_offset = (new_height - target_height) // 2
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                cropped_image = resized_image[y_offset:y_offset + target_height, x_offset:x_offset + target_width]
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                return cropped_image
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            def overlay_images(background, overlay, x, y):
         | 
| 37 | 
            +
                """
         | 
| 38 | 
            +
                Overlay an image with transparency over another image.
         | 
| 39 | 
            +
                """
         | 
| 40 | 
            +
                # Check if overlay dimensions fit within the background at the given (x, y) position
         | 
| 41 | 
            +
                if y + overlay.shape[0] > background.shape[0] or x + overlay.shape[1] > background.shape[1]:
         | 
| 42 | 
            +
                    raise ValueError("Overlay dimensions exceed background dimensions at the specified position.")
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                # Extract the alpha channel from the overlay and create an inverse alpha channel
         | 
| 45 | 
            +
                alpha = overlay[:, :, 3] / 255.0
         | 
| 46 | 
            +
                inverse_alpha = 1.0 - alpha
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                # Convert overlay to BGR if it's in RGB
         | 
| 49 | 
            +
                if overlay.shape[2] == 4:  # If it has an alpha channel
         | 
| 50 | 
            +
                    overlay = cv2.cvtColor(overlay[:, :, :3], cv2.COLOR_RGB2BGR)
         | 
| 51 | 
            +
                    overlay = np.concatenate([overlay, overlay[:, :, 3:]], axis=2)  # Add alpha channel back
         | 
| 52 | 
            +
                else:
         | 
| 53 | 
            +
                    overlay = cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR)
         | 
| 54 | 
            +
                    
         | 
| 55 | 
            +
                # Overlay the images
         | 
| 56 | 
            +
                for c in range(0, 3):
         | 
| 57 | 
            +
                    background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c] = (
         | 
| 58 | 
            +
                        alpha * overlay[:, :, c] + inverse_alpha * background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c]
         | 
| 59 | 
            +
                    )
         | 
| 60 | 
            +
                    
         | 
| 61 | 
            +
                return background
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            def create_charles_frames(background, charles_frames):
         | 
| 64 | 
            +
                output_frames = []
         | 
| 65 | 
            +
                # Load background image
         | 
| 66 | 
            +
                background = cv2.imread(background, cv2.COLOR_BGR2RGB)
         | 
| 67 | 
            +
                background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
         | 
| 68 | 
            +
                # resize background to match user image
         | 
| 69 | 
            +
                background = resize_and_crop(background, (640, 480))
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                for bot_image_path in charles_frames:
         | 
| 72 | 
            +
                    bot_image = cv2.imread(bot_image_path, cv2.IMREAD_UNCHANGED)
         | 
| 73 | 
            +
             | 
| 74 | 
            +
                    # assert bot image is square
         | 
| 75 | 
            +
                    assert bot_image.shape[0] == bot_image.shape[1]
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                    # resize bot image if it is larger than backgroun impage in any direction
         | 
| 78 | 
            +
                    if bot_image.shape[0] > background.shape[0]:
         | 
| 79 | 
            +
                        bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                    # Overlay bot image on the right-hand side
         | 
| 82 | 
            +
                    x_bot = background.shape[1] - bot_image.shape[1]
         | 
| 83 | 
            +
                    y_bot = background.shape[0] - bot_image.shape[0]
         | 
| 84 | 
            +
                    background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                    output_frames.append(background_with_bot)
         | 
| 87 | 
            +
                
         | 
| 88 | 
            +
                return output_frames
         | 
| 89 | 
            +
             | 
| 90 | 
            +
            class CharlesAnimator:
         | 
| 91 | 
            +
                def __init__(self):
         | 
| 92 | 
            +
                    self.mouth_open = False
         | 
| 93 | 
            +
                    self.last_change_time = 0
         | 
| 94 | 
            +
                    self.next_change_in = 0
         | 
| 95 | 
            +
                    self.was_talking = False
         | 
| 96 | 
            +
                    # use static frames for pefromance
         | 
| 97 | 
            +
                    self.static_frames = create_charles_frames("./images/zoom-background.png", [
         | 
| 98 | 
            +
                        "./images/charles.png", 
         | 
| 99 | 
            +
                        "./images/charles-open.png"
         | 
| 100 | 
            +
                        ])
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                def update(self, is_talking):
         | 
| 103 | 
            +
                    start_talking = True if is_talking and not self.was_talking else False
         | 
| 104 | 
            +
                    self.was_talking = is_talking
         | 
| 105 | 
            +
                    current_time = time.time()
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                    # Open the mouth when the character starts talking
         | 
| 108 | 
            +
                    if start_talking:
         | 
| 109 | 
            +
                        self.mouth_open = True
         | 
| 110 | 
            +
                        self.next_change_in = current_time + random.uniform(0.1, 0.5)
         | 
| 111 | 
            +
                        return self.mouth_open
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                    # Initialize the next change time if it's zero.
         | 
| 114 | 
            +
                    if self.next_change_in == 0:
         | 
| 115 | 
            +
                        self.next_change_in = current_time + random.uniform(0.1, 0.5)
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                    # Update the mouth state only if the character is talking.
         | 
| 118 | 
            +
                    if is_talking:
         | 
| 119 | 
            +
                        # Check if it's time to change the mouth state.
         | 
| 120 | 
            +
                        if current_time >= self.next_change_in:
         | 
| 121 | 
            +
                            self.mouth_open = not self.mouth_open
         | 
| 122 | 
            +
                            self.next_change_in = current_time + random.uniform(0.1, 0.5)
         | 
| 123 | 
            +
                    else:
         | 
| 124 | 
            +
                        # Close the mouth if the character is not talking.
         | 
| 125 | 
            +
                        self.mouth_open = False
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                    frame = self.static_frames[1] if self.mouth_open else self.static_frames[0]
         | 
| 128 | 
            +
                    return frame
         | 
    	
        ffmpeg_converter_actor.py
    CHANGED
    
    | @@ -19,7 +19,8 @@ class FFMpegConverterActor: | |
| 19 | 
             
                    while True:
         | 
| 20 | 
             
                        chunk = await self.output_pipe.readexactly(self.buffer_size)
         | 
| 21 | 
             
                        # print(f"FFMpegConverterActor: read {len(chunk)} bytes")
         | 
| 22 | 
            -
                         | 
|  | |
| 23 |  | 
| 24 | 
             
                async def start_process(self):
         | 
| 25 | 
             
                    cmd = [
         | 
|  | |
| 19 | 
             
                    while True:
         | 
| 20 | 
             
                        chunk = await self.output_pipe.readexactly(self.buffer_size)
         | 
| 21 | 
             
                        # print(f"FFMpegConverterActor: read {len(chunk)} bytes")
         | 
| 22 | 
            +
                        chunk_ref = ray.put(chunk)
         | 
| 23 | 
            +
                        await self.output_queue.put_async(chunk_ref)       
         | 
| 24 |  | 
| 25 | 
             
                async def start_process(self):
         | 
| 26 | 
             
                    cmd = [
         | 
    	
        images/charles-open.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        images/charles.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        images/zoom-background.png
    ADDED
    
    |   | 
| Git LFS Details
 | 
    	
        respond_to_prompt_actor.py
    CHANGED
    
    | @@ -130,7 +130,8 @@ class SpeechToConverterActor: | |
| 130 | 
             
                    self.ffmpeg_converter_actor.run.remote()
         | 
| 131 | 
             
                    while True:
         | 
| 132 | 
             
                        chunk_response = await self.input_queue.get_async()
         | 
| 133 | 
            -
                         | 
|  | |
| 134 | 
             
                        await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
         | 
| 135 |  | 
| 136 | 
             
                async def cancel(self):
         | 
|  | |
| 130 | 
             
                    self.ffmpeg_converter_actor.run.remote()
         | 
| 131 | 
             
                    while True:
         | 
| 132 | 
             
                        chunk_response = await self.input_queue.get_async()
         | 
| 133 | 
            +
                        audio_chunk_ref = chunk_response['tts_raw_chunk_ref']
         | 
| 134 | 
            +
                        audio_chunk = ray.get(audio_chunk_ref)
         | 
| 135 | 
             
                        await self.ffmpeg_converter_actor.push_chunk.remote(audio_chunk)
         | 
| 136 |  | 
| 137 | 
             
                async def cancel(self):
         | 
    	
        streamlit_av_queue.py
    CHANGED
    
    | @@ -3,9 +3,11 @@ import av | |
| 3 | 
             
            import asyncio
         | 
| 4 | 
             
            from collections import deque
         | 
| 5 | 
             
            import threading
         | 
|  | |
| 6 |  | 
| 7 | 
             
            import numpy as np
         | 
| 8 | 
             
            import ray
         | 
|  | |
| 9 | 
             
            from webrtc_av_queue_actor import WebRtcAVQueueActor
         | 
| 10 | 
             
            import pydub
         | 
| 11 | 
             
            import torch
         | 
| @@ -20,7 +22,8 @@ class StreamlitAVQueue: | |
| 20 | 
             
                    self.queue_actor = WebRtcAVQueueActor.options(
         | 
| 21 | 
             
                        name="WebRtcAVQueueActor", 
         | 
| 22 | 
             
                        get_if_exists=True,
         | 
| 23 | 
            -
                        ).remote() | 
|  | |
| 24 |  | 
| 25 | 
             
                def set_looking_listening(self, looking, listening: bool):
         | 
| 26 | 
             
                    with self._lock:
         | 
| @@ -31,18 +34,33 @@ class StreamlitAVQueue: | |
| 31 | 
             
                            self,
         | 
| 32 | 
             
                            frames: List[av.VideoFrame],
         | 
| 33 | 
             
                        ) -> av.VideoFrame:
         | 
|  | |
| 34 | 
             
                    try:
         | 
| 35 | 
             
                        with self._lock:
         | 
| 36 | 
             
                            should_look = self._looking
         | 
| 37 | 
            -
                         | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
|  | |
|  | |
|  | |
| 41 | 
             
                                await self.queue_actor.enqueue_in_video_frame.remote(shared_tensor_ref)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 42 | 
             
                            # print (f"tesnor len: {len(shared_tensor)}, tensor shape: {shared_tensor.shape}, tensor type:{shared_tensor.dtype} tensor ref: {shared_tensor_ref}")
         | 
| 43 | 
             
                    except Exception as e:
         | 
| 44 | 
             
                        print (e)
         | 
| 45 | 
            -
                    return  | 
| 46 |  | 
| 47 | 
             
                async def queued_audio_frames_callback(
         | 
| 48 | 
             
                            self,
         | 
| @@ -103,8 +121,8 @@ class StreamlitAVQueue: | |
| 103 | 
             
                    shared_tensors = await self.queue_actor.get_in_video_frames.remote()
         | 
| 104 | 
             
                    return shared_tensors
         | 
| 105 |  | 
| 106 | 
            -
                def get_out_audio_queue(self):
         | 
| 107 | 
             
                    return self.queue_actor.get_out_audio_queue.remote()
         | 
| 108 |  | 
| 109 | 
            -
                 | 
| 110 | 
            -
             | 
|  | |
| 3 | 
             
            import asyncio
         | 
| 4 | 
             
            from collections import deque
         | 
| 5 | 
             
            import threading
         | 
| 6 | 
            +
            import cv2
         | 
| 7 |  | 
| 8 | 
             
            import numpy as np
         | 
| 9 | 
             
            import ray
         | 
| 10 | 
            +
            from ray.util.queue import Queue
         | 
| 11 | 
             
            from webrtc_av_queue_actor import WebRtcAVQueueActor
         | 
| 12 | 
             
            import pydub
         | 
| 13 | 
             
            import torch
         | 
|  | |
| 22 | 
             
                    self.queue_actor = WebRtcAVQueueActor.options(
         | 
| 23 | 
             
                        name="WebRtcAVQueueActor", 
         | 
| 24 | 
             
                        get_if_exists=True,
         | 
| 25 | 
            +
                        ).remote()
         | 
| 26 | 
            +
                    self._out_video_frame = None 
         | 
| 27 |  | 
| 28 | 
             
                def set_looking_listening(self, looking, listening: bool):
         | 
| 29 | 
             
                    with self._lock:
         | 
|  | |
| 34 | 
             
                            self,
         | 
| 35 | 
             
                            frames: List[av.VideoFrame],
         | 
| 36 | 
             
                        ) -> av.VideoFrame:
         | 
| 37 | 
            +
                    updated_frames = []
         | 
| 38 | 
             
                    try:
         | 
| 39 | 
             
                        with self._lock:
         | 
| 40 | 
             
                            should_look = self._looking
         | 
| 41 | 
            +
                        next_out_video_frame = await self.queue_actor.get_out_video_frame.remote()
         | 
| 42 | 
            +
                        if next_out_video_frame is not None:
         | 
| 43 | 
            +
                            self._out_video_frame = next_out_video_frame
         | 
| 44 | 
            +
                        for i, frame in enumerate(frames):
         | 
| 45 | 
            +
                            user_image = frame.to_ndarray(format="rgb24")
         | 
| 46 | 
            +
                            if should_look:
         | 
| 47 | 
            +
                                shared_tensor_ref = ray.put(user_image)
         | 
| 48 | 
             
                                await self.queue_actor.enqueue_in_video_frame.remote(shared_tensor_ref)
         | 
| 49 | 
            +
                            if self._out_video_frame is not None:
         | 
| 50 | 
            +
                                frame = self._out_video_frame
         | 
| 51 | 
            +
                                # resize user image to 1/4 size
         | 
| 52 | 
            +
                                user_frame = cv2.resize(user_image, (user_image.shape[1]//4, user_image.shape[0]//4), interpolation=cv2.INTER_AREA)
         | 
| 53 | 
            +
                                x_user = 0
         | 
| 54 | 
            +
                                y_user = frame.shape[0] - user_frame.shape[0]
         | 
| 55 | 
            +
                                final_frame = frame.copy()
         | 
| 56 | 
            +
                                final_frame[y_user:y_user+user_frame.shape[0], x_user:x_user+user_frame.shape[1]] = user_frame
         | 
| 57 | 
            +
                                frame = av.VideoFrame.from_ndarray(final_frame, format="rgb24")
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                            updated_frames.append(frame)
         | 
| 60 | 
             
                            # print (f"tesnor len: {len(shared_tensor)}, tensor shape: {shared_tensor.shape}, tensor type:{shared_tensor.dtype} tensor ref: {shared_tensor_ref}")
         | 
| 61 | 
             
                    except Exception as e:
         | 
| 62 | 
             
                        print (e)
         | 
| 63 | 
            +
                    return updated_frames
         | 
| 64 |  | 
| 65 | 
             
                async def queued_audio_frames_callback(
         | 
| 66 | 
             
                            self,
         | 
|  | |
| 121 | 
             
                    shared_tensors = await self.queue_actor.get_in_video_frames.remote()
         | 
| 122 | 
             
                    return shared_tensors
         | 
| 123 |  | 
| 124 | 
            +
                def get_out_audio_queue(self)->Queue:
         | 
| 125 | 
             
                    return self.queue_actor.get_out_audio_queue.remote()
         | 
| 126 |  | 
| 127 | 
            +
                def get_out_video_queue(self)->Queue:
         | 
| 128 | 
            +
                    return self.queue_actor.get_out_video_queue.remote()
         | 
    	
        tests/test_image.py
    ADDED
    
    | @@ -0,0 +1,192 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import cv2
         | 
| 2 | 
            +
            import av
         | 
| 3 | 
            +
            import numpy as np
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            def resize_aspect_fit(image, dim=(640, 480)):
         | 
| 6 | 
            +
                h, w = image.shape[:2]
         | 
| 7 | 
            +
                aspect_ratio = w / h
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                target_width, target_height = dim
         | 
| 10 | 
            +
                target_aspect = target_width / target_height
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                if aspect_ratio > target_aspect:
         | 
| 13 | 
            +
                    # Original aspect is wider than target
         | 
| 14 | 
            +
                    new_width = target_width
         | 
| 15 | 
            +
                    new_height = int(target_width / aspect_ratio)
         | 
| 16 | 
            +
                else:
         | 
| 17 | 
            +
                    # Original aspect is taller than target
         | 
| 18 | 
            +
                    new_height = target_height
         | 
| 19 | 
            +
                    new_width = int(target_height * aspect_ratio)
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         | 
| 22 | 
            +
                return resized_image
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            def resize_and_crop(image, dim=(640, 480)):
         | 
| 25 | 
            +
                h, w = image.shape[:2]
         | 
| 26 | 
            +
                aspect_ratio = w / h
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                target_width, target_height = dim
         | 
| 29 | 
            +
                target_aspect = target_width / target_height
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                if aspect_ratio > target_aspect:
         | 
| 32 | 
            +
                    # Original aspect is wider than target, fit by height
         | 
| 33 | 
            +
                    new_height = target_height
         | 
| 34 | 
            +
                    new_width = int(target_height * aspect_ratio)
         | 
| 35 | 
            +
                else:
         | 
| 36 | 
            +
                    # Original aspect is taller than target, fit by width
         | 
| 37 | 
            +
                    new_width = target_width
         | 
| 38 | 
            +
                    new_height = int(target_width / aspect_ratio)
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                # Resize the image with new dimensions
         | 
| 41 | 
            +
                resized_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                # Crop to target dimensions
         | 
| 44 | 
            +
                x_offset = (new_width - target_width) // 2
         | 
| 45 | 
            +
                y_offset = (new_height - target_height) // 2
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                cropped_image = resized_image[y_offset:y_offset + target_height, x_offset:x_offset + target_width]
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                return cropped_image
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            def overlay_images(background, overlay, x, y):
         | 
| 52 | 
            +
                """
         | 
| 53 | 
            +
                Overlay an image with transparency over another image.
         | 
| 54 | 
            +
                """
         | 
| 55 | 
            +
                # Check if overlay dimensions fit within the background at the given (x, y) position
         | 
| 56 | 
            +
                if y + overlay.shape[0] > background.shape[0] or x + overlay.shape[1] > background.shape[1]:
         | 
| 57 | 
            +
                    raise ValueError("Overlay dimensions exceed background dimensions at the specified position.")
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                # Extract the alpha channel from the overlay and create an inverse alpha channel
         | 
| 60 | 
            +
                alpha = overlay[:, :, 3] / 255.0
         | 
| 61 | 
            +
                inverse_alpha = 1.0 - alpha
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                # Convert overlay to BGR if it's in RGB
         | 
| 64 | 
            +
                if overlay.shape[2] == 4:  # If it has an alpha channel
         | 
| 65 | 
            +
                    overlay = cv2.cvtColor(overlay[:, :, :3], cv2.COLOR_RGB2BGR)
         | 
| 66 | 
            +
                    overlay = np.concatenate([overlay, overlay[:, :, 3:]], axis=2)  # Add alpha channel back
         | 
| 67 | 
            +
                else:
         | 
| 68 | 
            +
                    overlay = cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR)
         | 
| 69 | 
            +
                    
         | 
| 70 | 
            +
                # Overlay the images
         | 
| 71 | 
            +
                for c in range(0, 3):
         | 
| 72 | 
            +
                    background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c] = (
         | 
| 73 | 
            +
                        alpha * overlay[:, :, c] + inverse_alpha * background[y:overlay.shape[0]+y, x:overlay.shape[1]+x, c]
         | 
| 74 | 
            +
                    )
         | 
| 75 | 
            +
                    
         | 
| 76 | 
            +
                return background
         | 
| 77 | 
            +
             | 
| 78 | 
            +
             | 
| 79 | 
            +
            def transform_frame(user_frame: av.VideoFrame) -> av.VideoFrame:
         | 
| 80 | 
            +
                # Convert av.VideoFrame to numpy array (OpenCV format)
         | 
| 81 | 
            +
                user_frame_np = np.frombuffer(user_frame.planes[0], np.uint8).reshape(user_frame.height, user_frame.width, -1)
         | 
| 82 | 
            +
                
         | 
| 83 | 
            +
                # Load background image
         | 
| 84 | 
            +
                background = cv2.imread("zoom-background.png")
         | 
| 85 | 
            +
                
         | 
| 86 | 
            +
                # Load bot image (assuming it has an alpha channel for transparency)
         | 
| 87 | 
            +
                bot_image = cv2.imread("bot-image.png", cv2.IMREAD_UNCHANGED)
         | 
| 88 | 
            +
                
         | 
| 89 | 
            +
                # Resize background to match the user frame dimensions
         | 
| 90 | 
            +
                aspect_ratio = background.shape[1] / background.shape[0]
         | 
| 91 | 
            +
                new_h = user_frame.height
         | 
| 92 | 
            +
                new_w = int(new_h * aspect_ratio)
         | 
| 93 | 
            +
                background_resized = cv2.resize(background, (new_w, new_h))
         | 
| 94 | 
            +
                
         | 
| 95 | 
            +
                # Crop the background if it exceeds the user frame width
         | 
| 96 | 
            +
                if new_w > user_frame.width:
         | 
| 97 | 
            +
                    crop_x1 = (new_w - user_frame.width) // 2
         | 
| 98 | 
            +
                    crop_x2 = crop_x1 + user_frame.width
         | 
| 99 | 
            +
                    background_resized = background_resized[:, crop_x1:crop_x2, :3]
         | 
| 100 | 
            +
                
         | 
| 101 | 
            +
                # Overlay bot image on the right-hand side
         | 
| 102 | 
            +
                x_bot = background_resized.shape[1] - bot_image.shape[1]
         | 
| 103 | 
            +
                y_bot = 0
         | 
| 104 | 
            +
                background_resized = overlay_images(background_resized, bot_image, x_bot, y_bot)
         | 
| 105 | 
            +
                
         | 
| 106 | 
            +
                # Overlay user's video frame in the bottom-left corner
         | 
| 107 | 
            +
                x_user = 0
         | 
| 108 | 
            +
                y_user = background_resized.shape[0] - user_frame.height
         | 
| 109 | 
            +
                background_resized[y_user:user_frame.height+y_user, x_user:user_frame.width+x_user, :3] = user_frame_np
         | 
| 110 | 
            +
                
         | 
| 111 | 
            +
                # Convert the final frame back to av.VideoFrame
         | 
| 112 | 
            +
                output_frame = av.VideoFrame.from_ndarray(background_resized, format="bgr24")
         | 
| 113 | 
            +
                
         | 
| 114 | 
            +
                return output_frame
         | 
| 115 | 
            +
             | 
| 116 | 
            +
            def create_charles_frames(background, charles_frames):
         | 
| 117 | 
            +
                output_frames = []
         | 
| 118 | 
            +
                # Load background image
         | 
| 119 | 
            +
                background = cv2.imread(background, cv2.COLOR_BGR2RGB)
         | 
| 120 | 
            +
                background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
         | 
| 121 | 
            +
                # resize background to match user image
         | 
| 122 | 
            +
                background = resize_and_crop(background, (640, 480))
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                for bot_image_path in charles_frames:
         | 
| 125 | 
            +
                    bot_image = cv2.imread(bot_image_path, cv2.IMREAD_UNCHANGED)
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                    # assert bot image is square
         | 
| 128 | 
            +
                    assert bot_image.shape[0] == bot_image.shape[1]
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                    # resize bot image if it is larger than backgroun impage in any direction
         | 
| 131 | 
            +
                    if bot_image.shape[0] > background.shape[0]:
         | 
| 132 | 
            +
                        bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                    # Overlay bot image on the right-hand side
         | 
| 135 | 
            +
                    x_bot = background.shape[1] - bot_image.shape[1]
         | 
| 136 | 
            +
                    y_bot = background.shape[0] - bot_image.shape[0]
         | 
| 137 | 
            +
                    background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
         | 
| 138 | 
            +
             | 
| 139 | 
            +
                    output_frames.append(background_with_bot)
         | 
| 140 | 
            +
                
         | 
| 141 | 
            +
                return output_frames
         | 
| 142 | 
            +
             | 
| 143 | 
            +
             | 
| 144 | 
            +
            def test_create_bot_frames():
         | 
| 145 | 
            +
                frames = create_charles_frames("./images/zoom-background.png", ["./images/charles.png", "./images/charles-open.png"])
         | 
| 146 | 
            +
                index = 0
         | 
| 147 | 
            +
                for frame in frames:
         | 
| 148 | 
            +
                    final_frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
         | 
| 149 | 
            +
                    cv2.imwrite(f"./images/charles_frame_{index}.jpg", final_frame_bgr)
         | 
| 150 | 
            +
                    index += 1
         | 
| 151 | 
            +
             | 
| 152 | 
            +
            def test_overlay():
         | 
| 153 | 
            +
                # Load mock user image
         | 
| 154 | 
            +
                user_image = cv2.imread("./prototypes/person-016.jpg", cv2.COLOR_BGR2RGB)
         | 
| 155 | 
            +
                user_image = cv2.cvtColor(user_image, cv2.COLOR_BGR2RGB)
         | 
| 156 | 
            +
                # resize to 640x480, handle that this is smaller and can be cropped
         | 
| 157 | 
            +
                user_image = resize_and_crop(user_image, (640, 480))
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                # Load background image
         | 
| 160 | 
            +
                background = cv2.imread("./images/zoom-background.png", cv2.COLOR_BGR2RGB)
         | 
| 161 | 
            +
                background = cv2.cvtColor(background, cv2.COLOR_BGR2RGB)
         | 
| 162 | 
            +
                # resize background to match user image
         | 
| 163 | 
            +
                background = resize_and_crop(background, (user_image.shape[:2][1], user_image.shape[:2][0]))
         | 
| 164 | 
            +
                
         | 
| 165 | 
            +
                # Load bot image (assuming it has an alpha channel for transparency)
         | 
| 166 | 
            +
                bot_image = cv2.imread("./images/charles-open.png", cv2.IMREAD_UNCHANGED)
         | 
| 167 | 
            +
             | 
| 168 | 
            +
                # resize bot image if it is larger than backgroun impage in any direction
         | 
| 169 | 
            +
                if bot_image.shape[0] > background.shape[0]:
         | 
| 170 | 
            +
                    bot_image = cv2.resize(bot_image, (background.shape[0], background.shape[0]), interpolation=cv2.INTER_AREA)
         | 
| 171 | 
            +
                
         | 
| 172 | 
            +
                # Overlay bot image on the right-hand side
         | 
| 173 | 
            +
                x_bot = background.shape[1] - bot_image.shape[1]
         | 
| 174 | 
            +
                y_bot = background.shape[0] - bot_image.shape[0]
         | 
| 175 | 
            +
                background_with_bot = overlay_images(background.copy(), bot_image, x_bot, y_bot)
         | 
| 176 | 
            +
                
         | 
| 177 | 
            +
                # Overlay user's frame in the bottom-left corner (1/3 size)
         | 
| 178 | 
            +
                # resize user image to 1/4 size
         | 
| 179 | 
            +
                user_frame = cv2.resize(user_image, (user_image.shape[1]//4, user_image.shape[0]//4), interpolation=cv2.INTER_AREA)
         | 
| 180 | 
            +
                x_user = 0
         | 
| 181 | 
            +
                y_user = background.shape[0] - user_frame.shape[0]
         | 
| 182 | 
            +
                final_frame = background_with_bot.copy()
         | 
| 183 | 
            +
                # final_frame[y_user:user_frame.shape[0]+y_user, x_user:user_frame.shape[1]+x_user, :3] = user_frame
         | 
| 184 | 
            +
                final_frame[y_user:y_user+user_frame.shape[0], x_user:x_user+user_frame.shape[1]] = user_frame
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                
         | 
| 187 | 
            +
                # Save the final frame as JPEG
         | 
| 188 | 
            +
                final_frame_bgr = cv2.cvtColor(final_frame, cv2.COLOR_RGB2BGR)
         | 
| 189 | 
            +
                cv2.imwrite("./images/final_frame.jpg", final_frame_bgr)
         | 
| 190 | 
            +
             | 
| 191 | 
            +
            test_overlay()
         | 
| 192 | 
            +
            test_create_bot_frames()
         | 
    	
        tests/test_talking.py
    ADDED
    
    | @@ -0,0 +1,65 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # Modifying the code to ensure the mouth is open when the character starts talking
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import random
         | 
| 4 | 
            +
            import time
         | 
| 5 | 
            +
             | 
| 6 | 
            +
             | 
| 7 | 
            +
            class CharacterFace:
         | 
| 8 | 
            +
                def __init__(self):
         | 
| 9 | 
            +
                    self.mouth_open = False
         | 
| 10 | 
            +
                    self.last_change_time = 0
         | 
| 11 | 
            +
                    self.next_change_in = 0
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def update(self, is_talking, start_talking=False):
         | 
| 14 | 
            +
                    current_time = time.time()
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                    # Open the mouth when the character starts talking
         | 
| 17 | 
            +
                    if start_talking:
         | 
| 18 | 
            +
                        self.mouth_open = True
         | 
| 19 | 
            +
                        self.next_change_in = current_time + random.uniform(0.1, 0.5)
         | 
| 20 | 
            +
                        return self.mouth_open
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                    # Initialize the next change time if it's zero.
         | 
| 23 | 
            +
                    if self.next_change_in == 0:
         | 
| 24 | 
            +
                        self.next_change_in = current_time + random.uniform(0.1, 0.5)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                    # Update the mouth state only if the character is talking.
         | 
| 27 | 
            +
                    if is_talking:
         | 
| 28 | 
            +
                        # Check if it's time to change the mouth state.
         | 
| 29 | 
            +
                        if current_time >= self.next_change_in:
         | 
| 30 | 
            +
                            self.mouth_open = not self.mouth_open
         | 
| 31 | 
            +
                            self.next_change_in = current_time + random.uniform(0.1, 0.5)
         | 
| 32 | 
            +
                    else:
         | 
| 33 | 
            +
                        # Close the mouth if the character is not talking.
         | 
| 34 | 
            +
                        self.mouth_open = False
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                    return self.mouth_open
         | 
| 37 | 
            +
             | 
| 38 | 
            +
             | 
| 39 | 
            +
            def _debug_test():
         | 
| 40 | 
            +
                # Example usage
         | 
| 41 | 
            +
                face = CharacterFace()
         | 
| 42 | 
            +
                output = []
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                # Initialize variables to control talk and pause durations
         | 
| 45 | 
            +
                next_talk_time = 0
         | 
| 46 | 
            +
                next_pause_time = 0
         | 
| 47 | 
            +
                is_talking = False
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                # Simulate the character talking and not talking with variable durations
         | 
| 50 | 
            +
                for _ in range(500):  # Increase the number of iterations for a longer simulation
         | 
| 51 | 
            +
                    current_time = time.time()
         | 
| 52 | 
            +
                    start_talking = False
         | 
| 53 | 
            +
                    
         | 
| 54 | 
            +
                    if is_talking and current_time >= next_talk_time:
         | 
| 55 | 
            +
                        is_talking = False
         | 
| 56 | 
            +
                        next_pause_time = current_time + random.uniform(0.5, 3.0)
         | 
| 57 | 
            +
                        
         | 
| 58 | 
            +
                    if not is_talking and current_time >= next_pause_time:
         | 
| 59 | 
            +
                        is_talking = True
         | 
| 60 | 
            +
                        start_talking = True  # Set flag to open mouth at the start of talking
         | 
| 61 | 
            +
                        next_talk_time = current_time + random.uniform(1.0, 5.0)
         | 
| 62 | 
            +
                    
         | 
| 63 | 
            +
                    mouth_open = face.update(is_talking, start_talking)
         | 
| 64 | 
            +
                    print(f"Is Talking: {is_talking}, Mouth Open: {mouth_open}")
         | 
| 65 | 
            +
                    time.sleep(random.uniform(0.1, 0.5))
         | 
    	
        text_to_speech_service.py
    CHANGED
    
    | @@ -5,7 +5,7 @@ from elevenlabs import generate, play | |
| 5 | 
             
            from elevenlabs import set_api_key
         | 
| 6 | 
             
            from elevenlabs import generate, stream
         | 
| 7 | 
             
            from agent_response import AgentResponse
         | 
| 8 | 
            -
             | 
| 9 |  | 
| 10 | 
             
            class TextToSpeechService:
         | 
| 11 | 
             
                def __init__(self, voice_id="Bella", model_id="eleven_monolingual_v1"):
         | 
| @@ -60,7 +60,8 @@ class TextToSpeechService: | |
| 60 |  | 
| 61 | 
             
                        # Run next(stream) in a separate thread to avoid blocking the event loop
         | 
| 62 | 
             
                        chunk = await asyncio.to_thread(next, stream)
         | 
| 63 | 
            -
                         | 
|  | |
| 64 | 
             
                        if cancel_event.is_set():
         | 
| 65 | 
             
                            return
         | 
| 66 | 
             
                        yield sentence_response
         | 
|  | |
| 5 | 
             
            from elevenlabs import set_api_key
         | 
| 6 | 
             
            from elevenlabs import generate, stream
         | 
| 7 | 
             
            from agent_response import AgentResponse
         | 
| 8 | 
            +
            import ray
         | 
| 9 |  | 
| 10 | 
             
            class TextToSpeechService:
         | 
| 11 | 
             
                def __init__(self, voice_id="Bella", model_id="eleven_monolingual_v1"):
         | 
|  | |
| 60 |  | 
| 61 | 
             
                        # Run next(stream) in a separate thread to avoid blocking the event loop
         | 
| 62 | 
             
                        chunk = await asyncio.to_thread(next, stream)
         | 
| 63 | 
            +
                        chunk_ref = ray.put(chunk)
         | 
| 64 | 
            +
                        sentence_response['tts_raw_chunk_ref'] = chunk_ref
         | 
| 65 | 
             
                        if cancel_event.is_set():
         | 
| 66 | 
             
                            return
         | 
| 67 | 
             
                        yield sentence_response
         | 
    	
        webrtc_av_queue_actor.py
    CHANGED
    
    | @@ -8,9 +8,10 @@ import numpy as np | |
| 8 | 
             
            @ray.remote
         | 
| 9 | 
             
            class WebRtcAVQueueActor:
         | 
| 10 | 
             
                def __init__(self):
         | 
| 11 | 
            -
                    self.in_audio_queue = Queue(maxsize= | 
| 12 | 
            -
                    self.in_video_queue = Queue(maxsize= | 
| 13 | 
            -
                    self.out_audio_queue = Queue(maxsize= | 
|  | |
| 14 |  | 
| 15 |  | 
| 16 | 
             
                async def enqueue_in_video_frame(self, shared_tensor_ref):
         | 
| @@ -25,7 +26,6 @@ class WebRtcAVQueueActor: | |
| 25 | 
             
                        del evicted_item
         | 
| 26 | 
             
                    await self.in_audio_queue.put_async(shared_buffer_ref)
         | 
| 27 |  | 
| 28 | 
            -
             | 
| 29 | 
             
                async def get_in_audio_frames(self):
         | 
| 30 | 
             
                    audio_frames = []
         | 
| 31 | 
             
                    if self.in_audio_queue.empty():
         | 
| @@ -44,11 +44,21 @@ class WebRtcAVQueueActor: | |
| 44 | 
             
                        video_frames.append(shared_tensor_ref)
         | 
| 45 | 
             
                    return video_frames
         | 
| 46 |  | 
| 47 | 
            -
                def get_out_audio_queue(self):
         | 
| 48 | 
             
                    return self.out_audio_queue
         | 
| 49 |  | 
|  | |
|  | |
|  | |
| 50 | 
             
                async def get_out_audio_frame(self):
         | 
| 51 | 
             
                    if self.out_audio_queue.empty():
         | 
| 52 | 
             
                        return None
         | 
| 53 | 
            -
                     | 
| 54 | 
            -
                    return  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 8 | 
             
            @ray.remote
         | 
| 9 | 
             
            class WebRtcAVQueueActor:
         | 
| 10 | 
             
                def __init__(self):
         | 
| 11 | 
            +
                    self.in_audio_queue = Queue(maxsize=3000)  # Adjust the size as needed
         | 
| 12 | 
            +
                    self.in_video_queue = Queue(maxsize=10)  # Adjust the size as needed
         | 
| 13 | 
            +
                    self.out_audio_queue = Queue(maxsize=3000)  # Adjust the size as needed
         | 
| 14 | 
            +
                    self.out_video_queue = Queue(maxsize=10)  # Adjust the size as needed
         | 
| 15 |  | 
| 16 |  | 
| 17 | 
             
                async def enqueue_in_video_frame(self, shared_tensor_ref):
         | 
|  | |
| 26 | 
             
                        del evicted_item
         | 
| 27 | 
             
                    await self.in_audio_queue.put_async(shared_buffer_ref)
         | 
| 28 |  | 
|  | |
| 29 | 
             
                async def get_in_audio_frames(self):
         | 
| 30 | 
             
                    audio_frames = []
         | 
| 31 | 
             
                    if self.in_audio_queue.empty():
         | 
|  | |
| 44 | 
             
                        video_frames.append(shared_tensor_ref)
         | 
| 45 | 
             
                    return video_frames
         | 
| 46 |  | 
| 47 | 
            +
                def get_out_audio_queue(self)->Queue:
         | 
| 48 | 
             
                    return self.out_audio_queue
         | 
| 49 |  | 
| 50 | 
            +
                def get_out_video_queue(self)->Queue:
         | 
| 51 | 
            +
                    return self.out_video_queue
         | 
| 52 | 
            +
                
         | 
| 53 | 
             
                async def get_out_audio_frame(self):
         | 
| 54 | 
             
                    if self.out_audio_queue.empty():
         | 
| 55 | 
             
                        return None
         | 
| 56 | 
            +
                    frame = await self.out_audio_queue.get_async()
         | 
| 57 | 
            +
                    return frame
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                async def get_out_video_frame(self):
         | 
| 60 | 
            +
                    if self.out_video_queue.empty():
         | 
| 61 | 
            +
                        return None
         | 
| 62 | 
            +
                    while not self.out_video_queue.empty():
         | 
| 63 | 
            +
                        frame = await self.out_video_queue.get_async()
         | 
| 64 | 
            +
                    return frame
         | 
