freddyaboulton HF Staff commited on
Commit
cfb8318
·
verified ·
1 Parent(s): d17661f

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +1 -5
  2. app.py +11 -8
README.md CHANGED
@@ -9,11 +9,7 @@ app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: Gemini understands audio and video!
12
- tags:
13
- - webrtc
14
- - websocket
15
- - gradio
16
- - secret|HF_TOKEN secret|GEMINI_API_KEY
17
  ---
18
 
19
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
9
  pinned: false
10
  license: mit
11
  short_description: Gemini understands audio and video!
12
+ tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GEMINI_API_KEY]
 
 
 
 
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -63,7 +63,8 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
63
  )
64
  config = {"response_modalities": ["AUDIO"]}
65
  async with client.aio.live.connect(
66
- model="gemini-2.0-flash-exp", config=config
 
67
  ) as session:
68
  self.session = session
69
  while not self.quit.is_set():
@@ -78,6 +79,8 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
78
  break
79
 
80
  async def video_receive(self, frame: np.ndarray):
 
 
81
  if self.session:
82
  # send image every 1 second
83
  print(time.time() - self.last_frame_time)
@@ -87,10 +90,8 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
87
  if self.latest_args[1] is not None:
88
  await self.session.send(input=encode_image(self.latest_args[1]))
89
 
90
- self.video_queue.put_nowait(frame)
91
-
92
  async def video_emit(self):
93
- frame = await self.video_queue.get()
94
  if frame is not None:
95
  return frame
96
  else:
@@ -104,8 +105,10 @@ class GeminiHandler(AsyncAudioVideoStreamHandler):
104
  await self.session.send(input=audio_message)
105
 
106
  async def emit(self):
107
- array = await self.audio_queue.get()
108
- return (self.output_sample_rate, array)
 
 
109
 
110
  async def shutdown(self) -> None:
111
  if self.session:
@@ -119,7 +122,7 @@ stream = Stream(
119
  modality="audio-video",
120
  mode="send-receive",
121
  rtc_configuration=get_cloudflare_turn_credentials_async,
122
- time_limit=90 if get_space() else None,
123
  additional_inputs=[
124
  gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
125
  ],
@@ -172,7 +175,7 @@ with gr.Blocks(css=css) as demo:
172
  GeminiHandler(),
173
  inputs=[webrtc, image_input],
174
  outputs=[webrtc],
175
- time_limit=60 if get_space() else None,
176
  concurrency_limit=2 if get_space() else None,
177
  )
178
 
 
63
  )
64
  config = {"response_modalities": ["AUDIO"]}
65
  async with client.aio.live.connect(
66
+ model="gemini-2.0-flash-exp",
67
+ config=config, # type: ignore
68
  ) as session:
69
  self.session = session
70
  while not self.quit.is_set():
 
79
  break
80
 
81
  async def video_receive(self, frame: np.ndarray):
82
+ self.video_queue.put_nowait(frame)
83
+
84
  if self.session:
85
  # send image every 1 second
86
  print(time.time() - self.last_frame_time)
 
90
  if self.latest_args[1] is not None:
91
  await self.session.send(input=encode_image(self.latest_args[1]))
92
 
 
 
93
  async def video_emit(self):
94
+ frame = await wait_for_item(self.video_queue, 0.01)
95
  if frame is not None:
96
  return frame
97
  else:
 
105
  await self.session.send(input=audio_message)
106
 
107
  async def emit(self):
108
+ array = await wait_for_item(self.audio_queue, 0.01)
109
+ if array is not None:
110
+ return (self.output_sample_rate, array)
111
+ return array
112
 
113
  async def shutdown(self) -> None:
114
  if self.session:
 
122
  modality="audio-video",
123
  mode="send-receive",
124
  rtc_configuration=get_cloudflare_turn_credentials_async,
125
+ time_limit=180 if get_space() else None,
126
  additional_inputs=[
127
  gr.Image(label="Image", type="numpy", sources=["upload", "clipboard"])
128
  ],
 
175
  GeminiHandler(),
176
  inputs=[webrtc, image_input],
177
  outputs=[webrtc],
178
+ time_limit=180 if get_space() else None,
179
  concurrency_limit=2 if get_space() else None,
180
  )
181