Marcus Vinicius Zerbini Canhaço commited on
Commit
479ff3a
·
1 Parent(s): ea56678

feat: atualização do detector com otimizações para GPU T4

Browse files
src/domain/detectors/gpu.py CHANGED
@@ -164,7 +164,7 @@ class WeaponDetectorGPU(BaseDetector):
164
 
165
  # Processar frames em batch
166
  t0 = time.time()
167
- batch_size = 4 # Reduzido para evitar erros de shape
168
  detections_by_frame = []
169
 
170
  for i in range(0, len(frames), batch_size):
@@ -185,17 +185,74 @@ class WeaponDetectorGPU(BaseDetector):
185
  return_tensors="pt",
186
  padding=True
187
  )
188
- batch_inputs = {
189
- key: val.to(self.device)
190
- for key, val in batch_inputs.items()
191
- }
192
 
193
  # Validar shapes antes da inferência
194
  if not self._validate_batch_shapes(batch_inputs):
195
- logger.warning(f"Shape inválido detectado no batch {i}, pulando...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
  continue
197
 
198
- # Inferência em batch
 
 
 
 
 
199
  with torch.no_grad():
200
  inputs = {**batch_inputs, **self.processed_text}
201
  outputs = self.owlv2_model(**inputs)
 
164
 
165
  # Processar frames em batch
166
  t0 = time.time()
167
+ batch_size = 2 # Reduzido ainda mais para garantir compatibilidade
168
  detections_by_frame = []
169
 
170
  for i in range(0, len(frames), batch_size):
 
185
  return_tensors="pt",
186
  padding=True
187
  )
 
 
 
 
188
 
189
  # Validar shapes antes da inferência
190
  if not self._validate_batch_shapes(batch_inputs):
191
+ logger.warning(f"Shape inválido detectado no batch {i}, processando frames individualmente...")
192
+ # Processar frames individualmente
193
+ for frame_idx, frame_pil in enumerate(batch_pil_frames):
194
+ try:
195
+ single_input = self.owlv2_processor(
196
+ images=frame_pil,
197
+ return_tensors="pt"
198
+ )
199
+ single_input = {
200
+ key: val.to(self.device)
201
+ for key, val in single_input.items()
202
+ }
203
+
204
+ with torch.no_grad():
205
+ inputs = {**single_input, **self.processed_text}
206
+ outputs = self.owlv2_model(**inputs)
207
+
208
+ target_sizes = torch.tensor([frame_pil.size[::-1]], device=self.device)
209
+ results = self.owlv2_processor.post_process_grounded_object_detection(
210
+ outputs=outputs,
211
+ target_sizes=target_sizes,
212
+ threshold=threshold
213
+ )
214
+
215
+ if len(results[0]["scores"]) > 0:
216
+ scores = results[0]["scores"]
217
+ boxes = results[0]["boxes"]
218
+ labels = results[0]["labels"]
219
+
220
+ frame_detections = []
221
+ for score, box, label in zip(scores, boxes, labels):
222
+ score_val = score.item()
223
+ if score_val >= threshold:
224
+ label_idx = min(label.item(), len(self.text_queries) - 1)
225
+ label_text = self.text_queries[label_idx]
226
+ frame_detections.append({
227
+ "confidence": round(score_val * 100, 2),
228
+ "box": [int(x) for x in box.tolist()],
229
+ "label": label_text,
230
+ "frame": i + frame_idx,
231
+ "timestamp": (i + frame_idx) / (fps or 2)
232
+ })
233
+
234
+ if frame_detections:
235
+ frame_detections = self._apply_nms(frame_detections)
236
+ detections_by_frame.extend(frame_detections)
237
+
238
+ except Exception as e:
239
+ logger.error(f"Erro ao processar frame individual {i + frame_idx}: {str(e)}")
240
+ continue
241
+
242
+ finally:
243
+ if 'single_input' in locals():
244
+ del single_input
245
+ if 'outputs' in locals():
246
+ del outputs
247
+ torch.cuda.empty_cache()
248
  continue
249
 
250
+ # Processar batch normalmente
251
+ batch_inputs = {
252
+ key: val.to(self.device)
253
+ for key, val in batch_inputs.items()
254
+ }
255
+
256
  with torch.no_grad():
257
  inputs = {**batch_inputs, **self.processed_text}
258
  outputs = self.owlv2_model(**inputs)
src/presentation/web/gradio_interface.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  import os
3
- from typing import Tuple, Any
4
  from pathlib import Path
5
  from src.application.use_cases.process_video import ProcessVideoUseCase, ProcessVideoRequest
6
  from src.infrastructure.services.weapon_detector import WeaponDetectorService
@@ -259,74 +259,89 @@ class GradioInterface:
259
  resolution: str = None,
260
  notification_type: str = None,
261
  notification_target: str = None
262
- ) -> Tuple[str, Any]:
263
  """Processa o vídeo usando o caso de uso."""
264
- if not video_path:
265
- return "Erro: Nenhum vídeo fornecido", {}
 
 
 
 
 
266
 
267
- # Usar valores padrão se não especificados
268
- fps = fps or self.default_fps
269
- resolution = resolution or self.default_resolution
270
-
271
- request = ProcessVideoRequest(
272
- video_path=video_path,
273
- threshold=threshold,
274
- fps=fps,
275
- resolution=int(resolution),
276
- notification_type=notification_type,
277
- notification_target=notification_target
278
- )
279
-
280
- response = self.use_case.execute(request)
281
-
282
- # Formatar saída para o Gradio
283
- status_color = "#ff0000" if response.detection_result.detections else "#00ff00"
284
- status_html = f"""
285
- <div style='padding: 1em; background: {status_color}20; border-radius: 8px;'>
286
- <h3 style='color: {status_color}; margin: 0;'>
287
- {"⚠️ RISCO DETECTADO" if response.detection_result.detections else "✅ SEGURO"}
288
- </h3>
289
- <p style='margin: 0.5em 0;'>
290
- Processado em: {response.detection_result.device_type}<br>
291
- Total de detecções: {len(response.detection_result.detections)}<br>
292
- Frames analisados: {response.detection_result.frames_analyzed}<br>
293
- Tempo total: {response.detection_result.total_time:.2f}s
294
- </p>
295
- </div>
296
- """
297
-
298
- if response.detection_result.detections:
299
- status_html += "<div style='margin-top: 1em;'><h4>Detecções:</h4><ul>"
300
- for det in response.detection_result.detections[:5]: # Mostrar até 5 detecções
301
- confidence_pct = det.confidence * 100 if det.confidence <= 1.0 else det.confidence
302
- status_html += f"""
303
- <li style='margin: 0.5em 0;'>
304
- <strong>{det.label}</strong><br>
305
- Confiança: {confidence_pct:.1f}%<br>
306
- Frame: {det.frame}
307
- </li>"""
308
- if len(response.detection_result.detections) > 5:
309
- status_html += f"<li>... e mais {len(response.detection_result.detections) - 5} detecção(ões)</li>"
310
- status_html += "</ul></div>"
311
-
312
- # Preparar JSON técnico
313
- technical_data = {
314
- "device_type": response.detection_result.device_type,
315
- "total_detections": len(response.detection_result.detections),
316
- "frames_analyzed": response.detection_result.frames_analyzed,
317
- "total_time": round(response.detection_result.total_time, 2),
318
- "detections": [
319
- {
320
  "label": det.label,
321
  "confidence": round(det.confidence * 100 if det.confidence <= 1.0 else det.confidence, 2),
322
  "frame": det.frame,
323
- "timestamp": round(det.timestamp, 2) if hasattr(det, "timestamp") else None
324
- }
325
- for det in response.detection_result.detections[:10] # Limitar a 10 detecções no JSON
326
- ]
327
- }
328
-
329
- return (
330
- response.status_message,
331
- technical_data # Retorna dicionário Python em vez de HTML
332
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  import os
3
+ from typing import Tuple, Any, Dict
4
  from pathlib import Path
5
  from src.application.use_cases.process_video import ProcessVideoUseCase, ProcessVideoRequest
6
  from src.infrastructure.services.weapon_detector import WeaponDetectorService
 
259
  resolution: str = None,
260
  notification_type: str = None,
261
  notification_target: str = None
262
+ ) -> Tuple[str, Dict[str, Any]]:
263
  """Processa o vídeo usando o caso de uso."""
264
+ try:
265
+ if not video_path:
266
+ return "Erro: Nenhum vídeo fornecido", {}
267
+
268
+ # Usar valores padrão se não especificados
269
+ fps = fps or self.default_fps
270
+ resolution = resolution or self.default_resolution
271
 
272
+ request = ProcessVideoRequest(
273
+ video_path=video_path,
274
+ threshold=threshold,
275
+ fps=fps,
276
+ resolution=int(resolution),
277
+ notification_type=notification_type,
278
+ notification_target=notification_target
279
+ )
280
+
281
+ response = self.use_case.execute(request)
282
+
283
+ # Formatar mensagem de status
284
+ status_msg = self._format_status_message(response.detection_result)
285
+
286
+ # Preparar JSON técnico
287
+ technical_data = {
288
+ "device_info": {
289
+ "type": response.detection_result.device_type,
290
+ "memory": response.memory_info,
291
+ "details": response.device_info
292
+ },
293
+ "processing_stats": {
294
+ "total_detections": len(response.detection_result.detections),
295
+ "frames_analyzed": response.detection_result.frames_analyzed,
296
+ "total_time": round(response.detection_result.total_time, 2),
297
+ "frame_extraction_time": round(response.detection_result.frame_extraction_time, 2),
298
+ "analysis_time": round(response.detection_result.analysis_time, 2)
299
+ },
300
+ "detections": []
301
+ }
302
+
303
+ # Adicionar detecções ao JSON
304
+ for det in response.detection_result.detections[:10]: # Limitar a 10 detec�es
305
+ technical_data["detections"].append({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  "label": det.label,
307
  "confidence": round(det.confidence * 100 if det.confidence <= 1.0 else det.confidence, 2),
308
  "frame": det.frame,
309
+ "timestamp": round(det.timestamp, 2) if hasattr(det, "timestamp") else None,
310
+ "box": det.box if hasattr(det, "box") else None
311
+ })
312
+
313
+ return status_msg, technical_data
314
+
315
+ except Exception as e:
316
+ logger.error(f"Erro ao processar vídeo: {str(e)}")
317
+ return "Erro ao processar o vídeo. Por favor, tente novamente.", {
318
+ "error": str(e),
319
+ "device_type": "unknown",
320
+ "total_detections": 0,
321
+ "frames_analyzed": 0
322
+ }
323
+
324
+ def _format_status_message(self, result) -> str:
325
+ """Formata a mensagem de status do processamento."""
326
+ try:
327
+ status = "⚠️ RISCO DETECTADO" if result.detections else "✅ SEGURO"
328
+
329
+ message = f"""Status: {status}
330
+ Processado em: {result.device_type}
331
+ Total de detecções: {len(result.detections)}
332
+ Frames analisados: {result.frames_analyzed}
333
+ Tempo total: {result.total_time:.2f}s"""
334
+
335
+ if result.detections:
336
+ message += "\n\nDetecções encontradas:"
337
+ for i, det in enumerate(result.detections[:5], 1):
338
+ confidence_pct = det.confidence * 100 if det.confidence <= 1.0 else det.confidence
339
+ message += f"\n{i}. {det.label} (Confiança: {confidence_pct:.1f}%, Frame: {det.frame})"
340
+ if len(result.detections) > 5:
341
+ message += f"\n... e mais {len(result.detections) - 5} detecção(ões)"
342
+
343
+ return message
344
+
345
+ except Exception as e:
346
+ logger.error(f"Erro ao formatar mensagem de status: {str(e)}")
347
+ return "Erro ao processar o vídeo. Por favor, tente novamente."