aiqcamp commited on
Commit
de04166
Β·
verified Β·
1 Parent(s): 408e58c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -58
app.py CHANGED
@@ -59,46 +59,73 @@ class WebcamProcessor:
59
  self.last_process_time = 0
60
 
61
  def start(self):
62
- self.is_running = True
63
- self.capture = cv2.VideoCapture(0)
64
- self.capture_thread = threading.Thread(target=self._capture_loop)
65
- self.process_thread = threading.Thread(target=self._process_loop)
66
- self.capture_thread.start()
67
- self.process_thread.start()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
  def stop(self):
70
- self.is_running = False
71
- if hasattr(self, 'capture_thread'):
72
- self.capture_thread.join()
73
- self.process_thread.join()
74
- self.capture.release()
 
 
 
 
 
 
75
 
76
  def _capture_loop(self):
77
  while self.is_running:
78
- ret, frame = self.capture.read()
79
- if ret:
80
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
81
- frame = cv2.resize(frame, (640, 480))
82
- current_time = time.time()
83
- if current_time - self.last_process_time >= self.frame_interval:
84
- self.frame_buffer.append(frame)
85
- self.last_process_time = current_time
 
 
 
 
 
86
 
87
  def _process_loop(self):
88
  while self.is_running:
89
- if len(self.frame_buffer) >= self.buffer_size:
90
- frames = list(self.frame_buffer)
91
- try:
92
  result = self.model.predict_forward(
93
  video=frames,
94
  text="<image>Describe what you see",
95
  tokenizer=self.tokenizer
96
  )
97
  self.result_queue.put(result)
98
- except Exception as e:
99
- print(f"Processing error: {e}")
100
- self.frame_buffer.clear()
101
- time.sleep(0.1)
 
 
102
 
103
  from third_parts import VideoReader
104
  def read_video(video_path, video_interval):
@@ -229,28 +256,34 @@ def video_vision(video_input_path, prompt, video_interval):
229
  else:
230
  return prediction, None
231
 
232
- @spaces.GPU
233
  def webcam_vision(prompt):
234
- is_korean = any(ord('κ°€') <= ord(char) <= ord('힣') for char in prompt)
235
-
236
- if not hasattr(webcam_vision, 'processor'):
237
- webcam_vision.processor = WebcamProcessor(model, tokenizer)
238
-
239
- if not webcam_vision.processor.is_running:
240
- webcam_vision.processor.start()
241
-
242
  try:
243
- result = webcam_vision.processor.result_queue.get(timeout=5)
244
- prediction = result['prediction']
245
 
246
- if is_korean:
247
- prediction = translate_to_korean(prediction)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
- return prediction
250
- except queue.Empty:
251
- return "No results available yet"
252
  except Exception as e:
253
- return f"Error: {str(e)}"
 
254
 
255
  # Gradio UI
256
  with gr.Blocks(analytics_enabled=False) as demo:
@@ -292,33 +325,33 @@ with gr.Blocks(analytics_enabled=False) as demo:
292
  outputs = [vid_output_res, output_video]
293
  )
294
 
 
295
  with gr.Tab("Webcam"):
296
  with gr.Row():
297
  with gr.Column():
298
- webcam_input = gr.Image(label="Webcam Input", sources=["webcam"], streaming=True)
 
 
 
 
299
  with gr.Row():
300
- webcam_instruction = gr.Textbox(
301
- label="Instruction",
302
- placeholder="Enter instruction here...",
303
- scale=4
304
- )
305
- start_button = gr.Button("Start", scale=1)
306
- stop_button = gr.Button("Stop", scale=1)
307
  with gr.Column():
308
  webcam_output = gr.Textbox(label="Response")
309
- processed_view = gr.Image(label="Processed View")
310
-
311
- status_text = gr.Textbox(label="Status", value="Ready")
312
-
313
  start_button.click(
314
- fn=lambda x: webcam_vision(x),
315
  inputs=[webcam_instruction],
316
  outputs=[webcam_output]
317
  )
318
-
319
  stop_button.click(
320
- fn=lambda: "Stopped" if hasattr(webcam_vision, 'processor') and webcam_vision.processor.stop() else "Not running",
321
  outputs=[status_text]
322
  )
323
 
 
 
324
  demo.queue().launch(show_api=False, show_error=True)
 
59
  self.last_process_time = 0
60
 
61
  def start(self):
62
+ try:
63
+ self.is_running = True
64
+ self.capture = cv2.VideoCapture(0)
65
+ if not self.capture.isOpened():
66
+ raise Exception("Failed to open webcam")
67
+
68
+ # Set camera properties
69
+ self.capture.set(cv2.CAP_PROP_FRAME_WIDTH, 640)
70
+ self.capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 480)
71
+
72
+ self.capture_thread = threading.Thread(target=self._capture_loop)
73
+ self.process_thread = threading.Thread(target=self._process_loop)
74
+ self.capture_thread.daemon = True
75
+ self.process_thread.daemon = True
76
+ self.capture_thread.start()
77
+ self.process_thread.start()
78
+ return "Webcam started successfully"
79
+ except Exception as e:
80
+ self.is_running = False
81
+ return f"Failed to start webcam: {str(e)}"
82
 
83
  def stop(self):
84
+ try:
85
+ self.is_running = False
86
+ if hasattr(self, 'capture_thread'):
87
+ self.capture_thread.join(timeout=1.0)
88
+ if hasattr(self, 'process_thread'):
89
+ self.process_thread.join(timeout=1.0)
90
+ if hasattr(self, 'capture'):
91
+ self.capture.release()
92
+ return "Webcam stopped successfully"
93
+ except Exception as e:
94
+ return f"Error stopping webcam: {str(e)}"
95
 
96
  def _capture_loop(self):
97
  while self.is_running:
98
+ try:
99
+ ret, frame = self.capture.read()
100
+ if ret:
101
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
102
+ frame = cv2.resize(frame, (640, 480))
103
+ current_time = time.time()
104
+ if current_time - self.last_process_time >= self.frame_interval:
105
+ self.frame_buffer.append(frame)
106
+ self.last_process_time = current_time
107
+ time.sleep(0.01) # Small delay to prevent CPU overuse
108
+ except Exception as e:
109
+ print(f"Capture error: {e}")
110
+ time.sleep(0.1)
111
 
112
  def _process_loop(self):
113
  while self.is_running:
114
+ try:
115
+ if len(self.frame_buffer) >= self.buffer_size:
116
+ frames = list(self.frame_buffer)
117
  result = self.model.predict_forward(
118
  video=frames,
119
  text="<image>Describe what you see",
120
  tokenizer=self.tokenizer
121
  )
122
  self.result_queue.put(result)
123
+ self.frame_buffer.clear()
124
+ time.sleep(0.1)
125
+ except Exception as e:
126
+ print(f"Processing error: {e}")
127
+ time.sleep(0.1)
128
+
129
 
130
  from third_parts import VideoReader
131
  def read_video(video_path, video_interval):
 
256
  else:
257
  return prediction, None
258
 
 
259
  def webcam_vision(prompt):
 
 
 
 
 
 
 
 
260
  try:
261
+ if not hasattr(webcam_vision, 'processor'):
262
+ webcam_vision.processor = WebcamProcessor(model, tokenizer)
263
 
264
+ if not webcam_vision.processor.is_running:
265
+ status = webcam_vision.processor.start()
266
+ if "Failed" in status:
267
+ return f"Error: {status}"
268
+
269
+ try:
270
+ result = webcam_vision.processor.result_queue.get(timeout=5)
271
+ prediction = result['prediction']
272
+
273
+ # Check if Korean translation is needed
274
+ is_korean = any(ord('κ°€') <= ord(char) <= ord('힣') for char in prompt)
275
+ if is_korean:
276
+ prediction = translate_to_korean(prediction)
277
+
278
+ return prediction
279
+ except queue.Empty:
280
+ return "No results available yet. Please try again."
281
+ except Exception as e:
282
+ return f"Processing error: {str(e)}"
283
 
 
 
 
284
  except Exception as e:
285
+ return f"System error: {str(e)}"
286
+
287
 
288
  # Gradio UI
289
  with gr.Blocks(analytics_enabled=False) as demo:
 
325
  outputs = [vid_output_res, output_video]
326
  )
327
 
328
+
329
  with gr.Tab("Webcam"):
330
  with gr.Row():
331
  with gr.Column():
332
+ webcam_instruction = gr.Textbox(
333
+ label="Instruction",
334
+ placeholder="Enter instruction here...",
335
+ scale=4
336
+ )
337
  with gr.Row():
338
+ start_button = gr.Button("Start Processing")
339
+ stop_button = gr.Button("Stop Processing")
 
 
 
 
 
340
  with gr.Column():
341
  webcam_output = gr.Textbox(label="Response")
342
+ status_text = gr.Textbox(label="Status", value="Ready")
343
+
 
 
344
  start_button.click(
345
+ fn=webcam_vision,
346
  inputs=[webcam_instruction],
347
  outputs=[webcam_output]
348
  )
349
+
350
  stop_button.click(
351
+ fn=lambda: webcam_vision.processor.stop() if hasattr(webcam_vision, 'processor') else "Not running",
352
  outputs=[status_text]
353
  )
354
 
355
+
356
+
357
  demo.queue().launch(show_api=False, show_error=True)