File size: 5,464 Bytes
8fb6272
 
 
 
 
 
 
 
 
b181644
8fb6272
 
 
 
b181644
8fb6272
 
b181644
8fb6272
b181644
577120c
b181644
8fb6272
 
b181644
8fb6272
 
b181644
 
8fb6272
577120c
 
 
b181644
8fb6272
 
 
b181644
8fb6272
 
b181644
577120c
 
8fb6272
b181644
8fb6272
 
b181644
8fb6272
b181644
 
 
 
 
 
 
 
 
8fb6272
b181644
8fb6272
 
 
 
 
 
 
b181644
8fb6272
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b181644
 
577120c
b181644
 
 
 
 
 
8fb6272
b181644
 
 
 
 
 
 
8fb6272
b181644
8fb6272
 
b181644
 
 
 
 
 
 
 
 
 
 
8fb6272
 
 
 
 
b181644
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import torch
import torch.nn.functional as F
import logging
import os
import gc
import numpy as np
import cv2
from PIL import Image
from transformers import Owlv2Processor, Owlv2ForObjectDetection
from .base import BaseDetector

logger = logging.getLogger(__name__)

class WeaponDetectorGPU(BaseDetector):
    """Detector de armas otimizado para GPU."""
    
    def __init__(self):
        """Inicializa o detector."""
        super().__init__()
        self.default_resolution = 640
        self.device = None  # Será configurado em _initialize
        self._initialize()
    
    def _initialize(self):
        """Inicializa o modelo."""
        try:
            # Configurar device
            if not torch.cuda.is_available():
                raise RuntimeError("CUDA não está disponível!")
            
            # Configurar device corretamente
            self.device = 0  # Usar índice inteiro para GPU
            
            # Carregar modelo e processador
            logger.info("Carregando modelo e processador...")
            model_name = "google/owlv2-base-patch16"
            
            self.owlv2_processor = Owlv2Processor.from_pretrained(model_name)
            self.owlv2_model = Owlv2ForObjectDetection.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                device_map={"": self.device}  # Mapear todo o modelo para GPU 0
            )
            
            # Otimizar modelo
            self.owlv2_model.eval()
            
            # Processar queries
            self.text_queries = self._get_detection_queries()
            self.processed_text = self.owlv2_processor(
                text=self.text_queries,
                return_tensors="pt",
                padding=True
            )
            self.processed_text = {
                key: val.to(self.device) 
                for key, val in self.processed_text.items()
            }
            
            logger.info("Inicialização GPU completa!")
            self._initialized = True
            
        except Exception as e:
            logger.error(f"Erro na inicialização GPU: {str(e)}")
            raise

    def detect_objects(self, image: Image.Image, threshold: float = 0.3) -> list:
        """Detecta objetos em uma imagem."""
        try:
            # Pré-processar imagem
            if image.mode != 'RGB':
                image = image.convert('RGB')
            
            # Processar imagem
            image_inputs = self.owlv2_processor(
                images=image,
                return_tensors="pt"
            )
            image_inputs = {
                key: val.to(self.device) 
                for key, val in image_inputs.items()
            }
            
            # Inferência
            with torch.no_grad():
                inputs = {**image_inputs, **self.processed_text}
                outputs = self.owlv2_model(**inputs)
                
                target_sizes = torch.tensor([image.size[::-1]], device=self.device)
                results = self.owlv2_processor.post_process_grounded_object_detection(
                    outputs=outputs,
                    target_sizes=target_sizes,
                    threshold=threshold
                )[0]
            
            # Processar detecções
            detections = []
            if len(results["scores"]) > 0:
                scores = results["scores"]
                boxes = results["boxes"]
                labels = results["labels"]
                
                for score, box, label in zip(scores, boxes, labels):
                    if score.item() >= threshold:
                        detections.append({
                            "confidence": score.item(),
                            "box": [int(x) for x in box.tolist()],
                            "label": self.text_queries[label]
                        })
            
            return detections
            
        except Exception as e:
            logger.error(f"Erro em detect_objects: {str(e)}")
            return []

    def _get_best_device(self):
        """Retorna o melhor dispositivo disponível."""
        return 0  # Usar índice inteiro para GPU

    def _clear_gpu_memory(self):
        """Limpa memória GPU."""
        torch.cuda.empty_cache()
        gc.collect()

    def process_video(self, video_path: str, fps: int = None, threshold: float = 0.3, resolution: int = 640) -> tuple:
        """Processa um vídeo."""
        metrics = {
            "total_time": 0,
            "frames_analyzed": 0,
            "detections": []
        }
        
        try:
            frames = self.extract_frames(video_path, fps or 2, resolution)
            metrics["frames_analyzed"] = len(frames)
            
            for i, frame in enumerate(frames):
                frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frame_pil = Image.fromarray(frame_rgb)
                
                detections = self.detect_objects(frame_pil, threshold)
                if detections:
                    metrics["detections"].append({
                        "frame": i,
                        "detections": detections
                    })
                    return video_path, metrics
            
            return video_path, metrics
            
        except Exception as e:
            logger.error(f"Erro ao processar vídeo: {str(e)}")
            return video_path, metrics