Spaces:

marcuscanhaco
/

weapon-detection-app

Runtime error

App Files Files Community

Marcus Vinicius Zerbini Canhaço commited on Feb 13

Commit

d78970a

1 Parent(s): eb1a752

feat: atualização do detector com otimizações para GPU T4

Browse files

Files changed (6) hide show

README.md +18 -19
src/application/use_cases/process_video.py +5 -4
src/domain/detectors/gpu.py +68 -21
src/infrastructure/services/weapon_detector.py +13 -11
src/main.py +7 -4
src/presentation/web/gradio_interface.py +31 -1

README.md CHANGED Viewed

@@ -1,29 +1,28 @@
----
-title: Weapon Detection App
-emoji: 🚨
-colorFrom: red
-colorTo: yellow
-sdk: gradio
-sdk_version: 5.15.0
-app_file: app.py
-pinned: false
-license: mit
-hardware: true
-resources:
-  accelerator: T4
-  gpu: true
----
-Sistema de detecção de objetos de risco em vídeos usando OWL-ViT e processamento
 GPU/CPU otimizado.
 [![Open in Hugging Face][hf-badge]][hf-space]
 [![GitHub][gh-badge]][gh-repo]
 [hf-badge]: https://img.shields.io/badge/Hugging%20Face-Spaces-yellow
-[hf-space]: https://huggingface.co/spaces/seu-usuario/seu-espaco
 [gh-badge]: https://img.shields.io/badge/GitHub-Repo-blue
-[gh-repo]: https://github.com/seu-usuario/hackatoon-1iadt
 ## Funcionalidades

+# Detecção de Armas - FIAP Vision Guard - Hackatoon 1IADT
+## Sobre o Projeto
+A FIAP VisionGuard é uma empresa especializada em monitoramento de câmeras de segurança que busca inovar através da implementação de tecnologias avançadas de detecção de riscos. Este projeto demonstra a viabilidade de uma nova funcionalidade que utiliza Inteligência Artificial para identificar objetos potencialmente perigosos em tempo real, como armas brancas (facas, tesouras e similares) e outros objetos de risco.
+### Objetivo
+O sistema visa otimizar a segurança de estabelecimentos e comércios através de:
+- Detecção automática de objetos perigosos
+- Emissão de alertas em tempo real para centrais de segurança
+- Análise contínua de feeds de vídeo
+- Redução do tempo de resposta a incidentes
+Sistema de detecção de objetos de risco em vídeos usando OWLV2-ViT e processamento
 GPU/CPU otimizado.
 [![Open in Hugging Face][hf-badge]][hf-space]
 [![GitHub][gh-badge]][gh-repo]
 [hf-badge]: https://img.shields.io/badge/Hugging%20Face-Spaces-yellow
+[hf-space]: https://huggingface.co/spaces/marcuscanhaco/weapon-detection-app
 [gh-badge]: https://img.shields.io/badge/GitHub-Repo-blue
+[gh-repo]: https://github.com/mvzcanhaco/hackatoon-1IADT-fiap
 ## Funcionalidades

src/application/use_cases/process_video.py CHANGED Viewed

@@ -144,10 +144,11 @@ Tempo de análise: {result.analysis_time:.2f}s"""
             # Adicionar detalhes das detecções se houver
             if result.detections:
                 message += "\n\nDetecções encontradas:"
-                for i, det in enumerate(result.detections[:3], 1):  # Mostrar até 3 detecções
-                    message += f"\n{i}. {det.label} (Confiança: {det.confidence:.1%}, Frame: {det.frame})"
-                if len(result.detections) > 3:
-                    message += f"\n... e mais {len(result.detections) - 3} detecção(ões)"
             return message

             # Adicionar detalhes das detecções se houver
             if result.detections:
                 message += "\n\nDetecções encontradas:"
+                for i, det in enumerate(result.detections[:5], 1):  # Mostrar até 5 detecções
+                    confidence_pct = det.confidence * 100 if det.confidence <= 1.0 else det.confidence
+                    message += f"\n{i}. {det.label} (Confiança: {confidence_pct:.1f}%, Frame: {det.frame})"
+                if len(result.detections) > 5:
+                    message += f"\n... e mais {len(result.detections) - 5} detecção(ões)"
             return message

src/domain/detectors/gpu.py CHANGED Viewed

@@ -163,35 +163,82 @@ class WeaponDetectorGPU(BaseDetector):
             # Calcular duração do vídeo
             metrics["video_duration"] = len(frames) / (fps or 2)
-            # Processar frames
             t0 = time.time()
-            for i, frame in enumerate(frames):
-                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-                frame_pil = Image.fromarray(frame_rgb)
-                detections = self.detect_objects(frame_pil, threshold)
-                # Filtrar apenas detecções válidas (sem filtrar unknown)
-                valid_detections = [
-                    {
-                        "confidence": d["confidence"],
-                        "box": d["box"],
-                        "label": d["label"],
-                        "timestamp": i / (fps or 2)
-                    }
-                    for d in detections
-                    if d["confidence"] > threshold
-                ]
-                if valid_detections:
-                    metrics["detections"].append({
-                        "frame": i,
-                        "detections": valid_detections
-                    })
             # Atualizar métricas finais
             metrics["analysis_time"] = time.time() - t0
             metrics["total_time"] = time.time() - start_time
             return video_path, metrics

             # Calcular duração do vídeo
             metrics["video_duration"] = len(frames) / (fps or 2)
+            # Processar frames em batch
             t0 = time.time()
+            batch_size = 16  # Aumentado para T4 dedicada
+            detections_by_frame = []
+            for i in range(0, len(frames), batch_size):
+                batch_frames = frames[i:i + batch_size]
+                batch_pil_frames = []
+                # Preparar batch
+                for frame in batch_frames:
+                    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+                    frame_pil = Image.fromarray(frame_rgb)
+                    frame_pil = self._preprocess_image(frame_pil)
+                    batch_pil_frames.append(frame_pil)
+                # Processar batch
+                batch_inputs = self.owlv2_processor(
+                    images=batch_pil_frames,
+                    return_tensors="pt",
+                    padding=True
+                )
+                batch_inputs = {
+                    key: val.to(self.device)
+                    for key, val in batch_inputs.items()
+                }
+                # Inferência em batch
+                with torch.no_grad():
+                    inputs = {**batch_inputs, **self.processed_text}
+                    outputs = self.owlv2_model(**inputs)
+                    target_sizes = torch.tensor(
+                        [frame.size[::-1] for frame in batch_pil_frames],
+                        device=self.device
+                    )
+                    results = self.owlv2_processor.post_process_grounded_object_detection(
+                        outputs=outputs,
+                        target_sizes=target_sizes,
+                        threshold=threshold
+                    )
+                # Processar resultados do batch
+                for frame_idx, frame_results in enumerate(results):
+                    if len(frame_results["scores"]) > 0:
+                        scores = frame_results["scores"]
+                        boxes = frame_results["boxes"]
+                        labels = frame_results["labels"]
+                        frame_detections = []
+                        for score, box, label in zip(scores, boxes, labels):
+                            score_val = score.item()
+                            if score_val >= threshold:
+                                label_idx = min(label.item(), len(self.text_queries) - 1)
+                                label_text = self.text_queries[label_idx]
+                                frame_detections.append({
+                                    "confidence": round(score_val * 100, 2),
+                                    "box": [int(x) for x in box.tolist()],
+                                    "label": label_text
+                                })
+                        if frame_detections:
+                            frame_detections = self._apply_nms(frame_detections)
+                            detections_by_frame.append({
+                                "frame": i + frame_idx,
+                                "detections": frame_detections
+                            })
+                # Liberar memória do batch
+                del batch_inputs, outputs
+                torch.cuda.empty_cache()
             # Atualizar métricas finais
             metrics["analysis_time"] = time.time() - t0
             metrics["total_time"] = time.time() - start_time
+            metrics["detections"] = detections_by_frame
             return video_path, metrics

src/infrastructure/services/weapon_detector.py CHANGED Viewed

@@ -70,17 +70,19 @@ class WeaponDetectorService(DetectorInterface):
             # Converter detecções para entidades do domínio
             detections = []
-            for d in metrics.get('detections', []):
-                try:
-                    detections.append(Detection(
-                        frame=d.get('frame', 0),
-                        confidence=d.get('confidence', 0.0),
-                        label=d.get('label', 'unknown'),
-                        box=d.get('box', [0, 0, 0, 0]),
-                        timestamp=d.get('frame', 0) / fps if fps else 0
-                    ))
-                except Exception as e:
-                    logger.error(f"Erro ao processar detecção: {str(e)}")
             result = DetectionResult(
                 video_path=output_path or video_path,

             # Converter detecções para entidades do domínio
             detections = []
+            for detection_group in metrics.get('detections', []):
+                frame = detection_group.get('frame', 0)
+                for det in detection_group.get('detections', []):
+                    try:
+                        detections.append(Detection(
+                            frame=frame,
+                            confidence=det.get('confidence', 0.0),
+                            label=det.get('label', 'objeto perigoso'),  # Valor padrão mais informativo
+                            box=det.get('box', [0, 0, 0, 0]),
+                            timestamp=frame / fps if fps else 0
+                        ))
+                    except Exception as e:
+                        logger.error(f"Erro ao processar detecção: {str(e)}")
             result = DetectionResult(
                 video_path=output_path or video_path,

src/main.py CHANGED Viewed

@@ -61,19 +61,22 @@ def setup_gpu_environment(gpu_type: str) -> bool:
         gc.collect()
         if gpu_type == "t4_dedicated":
-            # Configurações para T4 dedicada
             logger.info("Configurando para T4 dedicada")
             torch.backends.cuda.matmul.allow_tf32 = True
             torch.backends.cudnn.benchmark = True
             torch.backends.cudnn.allow_tf32 = True
-            # Usar mais memória pois temos GPU dedicada
-            torch.cuda.set_per_process_memory_fraction(0.9)
-            os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:512'
         elif gpu_type == "zero_gpu_shared":
             # Configurações para Zero-GPU compartilhada
             logger.info("Configurando para Zero-GPU compartilhada")
             torch.backends.cudnn.benchmark = False
             # Limitar uso de memória
             torch.cuda.set_per_process_memory_fraction(0.6)
             os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'

         gc.collect()
         if gpu_type == "t4_dedicated":
+            # Configurações otimizadas para T4 dedicada
             logger.info("Configurando para T4 dedicada")
             torch.backends.cuda.matmul.allow_tf32 = True
             torch.backends.cudnn.benchmark = True
             torch.backends.cudnn.allow_tf32 = True
+            torch.backends.cudnn.enabled = True
+            torch.backends.cudnn.deterministic = False
+            # Aumentar fração de memória e tamanho do split
+            torch.cuda.set_per_process_memory_fraction(0.95)  # Aumentado para 95%
+            os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:1024'  # Aumentado para 1GB
         elif gpu_type == "zero_gpu_shared":
             # Configurações para Zero-GPU compartilhada
             logger.info("Configurando para Zero-GPU compartilhada")
             torch.backends.cudnn.benchmark = False
+            torch.backends.cudnn.deterministic = True
             # Limitar uso de memória
             torch.cuda.set_per_process_memory_fraction(0.6)
             os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'

src/presentation/web/gradio_interface.py CHANGED Viewed

@@ -279,7 +279,37 @@ class GradioInterface:
         response = self.use_case.execute(request)
         return (
             response.status_message,
-            response.detection_result.__dict__
         )

         response = self.use_case.execute(request)
+        # Formatar saída para o Gradio
+        status_color = "#ff0000" if response.detections else "#00ff00"
+        status_html = f"""
+        <div style='padding: 1em; background: {status_color}20; border-radius: 8px;'>
+            <h3 style='color: {status_color}; margin: 0;'>
+                {"⚠️ RISCO DETECTADO" if response.detections else "✅ SEGURO"}
+            </h3>
+            <p style='margin: 0.5em 0;'>
+                Processado em: {response.device_type}<br>
+                Total de detecções: {len(response.detections)}<br>
+                Frames analisados: {response.frames_analyzed}<br>
+                Tempo total: {response.total_time:.2f}s
+            </p>
+        </div>
+        """
+        if response.detections:
+            status_html += "<div style='margin-top: 1em;'><h4>Detecções:</h4><ul>"
+            for det in response.detections[:5]:  # Mostrar até 5 detecções
+                confidence_pct = det.confidence * 100 if det.confidence <= 1.0 else det.confidence
+                status_html += f"""
+                <li style='margin: 0.5em 0;'>
+                    <strong>{det.label}</strong><br>
+                    Confiança: {confidence_pct:.1f}%<br>
+                    Frame: {det.frame}
+                </li>"""
+            if len(response.detections) > 5:
+                status_html += f"<li>... e mais {len(response.detections) - 5} detecção(ões)</li>"
+            status_html += "</ul></div>"
         return (
             response.status_message,
+            status_html
         )