Spaces:
Runtime error
Runtime error
Marcus Vinicius Zerbini Canhaço
commited on
Commit
·
479ff3a
1
Parent(s):
ea56678
feat: atualização do detector com otimizações para GPU T4
Browse files
src/domain/detectors/gpu.py
CHANGED
@@ -164,7 +164,7 @@ class WeaponDetectorGPU(BaseDetector):
|
|
164 |
|
165 |
# Processar frames em batch
|
166 |
t0 = time.time()
|
167 |
-
batch_size =
|
168 |
detections_by_frame = []
|
169 |
|
170 |
for i in range(0, len(frames), batch_size):
|
@@ -185,17 +185,74 @@ class WeaponDetectorGPU(BaseDetector):
|
|
185 |
return_tensors="pt",
|
186 |
padding=True
|
187 |
)
|
188 |
-
batch_inputs = {
|
189 |
-
key: val.to(self.device)
|
190 |
-
for key, val in batch_inputs.items()
|
191 |
-
}
|
192 |
|
193 |
# Validar shapes antes da inferência
|
194 |
if not self._validate_batch_shapes(batch_inputs):
|
195 |
-
logger.warning(f"Shape inválido detectado no batch {i},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
196 |
continue
|
197 |
|
198 |
-
#
|
|
|
|
|
|
|
|
|
|
|
199 |
with torch.no_grad():
|
200 |
inputs = {**batch_inputs, **self.processed_text}
|
201 |
outputs = self.owlv2_model(**inputs)
|
|
|
164 |
|
165 |
# Processar frames em batch
|
166 |
t0 = time.time()
|
167 |
+
batch_size = 2 # Reduzido ainda mais para garantir compatibilidade
|
168 |
detections_by_frame = []
|
169 |
|
170 |
for i in range(0, len(frames), batch_size):
|
|
|
185 |
return_tensors="pt",
|
186 |
padding=True
|
187 |
)
|
|
|
|
|
|
|
|
|
188 |
|
189 |
# Validar shapes antes da inferência
|
190 |
if not self._validate_batch_shapes(batch_inputs):
|
191 |
+
logger.warning(f"Shape inválido detectado no batch {i}, processando frames individualmente...")
|
192 |
+
# Processar frames individualmente
|
193 |
+
for frame_idx, frame_pil in enumerate(batch_pil_frames):
|
194 |
+
try:
|
195 |
+
single_input = self.owlv2_processor(
|
196 |
+
images=frame_pil,
|
197 |
+
return_tensors="pt"
|
198 |
+
)
|
199 |
+
single_input = {
|
200 |
+
key: val.to(self.device)
|
201 |
+
for key, val in single_input.items()
|
202 |
+
}
|
203 |
+
|
204 |
+
with torch.no_grad():
|
205 |
+
inputs = {**single_input, **self.processed_text}
|
206 |
+
outputs = self.owlv2_model(**inputs)
|
207 |
+
|
208 |
+
target_sizes = torch.tensor([frame_pil.size[::-1]], device=self.device)
|
209 |
+
results = self.owlv2_processor.post_process_grounded_object_detection(
|
210 |
+
outputs=outputs,
|
211 |
+
target_sizes=target_sizes,
|
212 |
+
threshold=threshold
|
213 |
+
)
|
214 |
+
|
215 |
+
if len(results[0]["scores"]) > 0:
|
216 |
+
scores = results[0]["scores"]
|
217 |
+
boxes = results[0]["boxes"]
|
218 |
+
labels = results[0]["labels"]
|
219 |
+
|
220 |
+
frame_detections = []
|
221 |
+
for score, box, label in zip(scores, boxes, labels):
|
222 |
+
score_val = score.item()
|
223 |
+
if score_val >= threshold:
|
224 |
+
label_idx = min(label.item(), len(self.text_queries) - 1)
|
225 |
+
label_text = self.text_queries[label_idx]
|
226 |
+
frame_detections.append({
|
227 |
+
"confidence": round(score_val * 100, 2),
|
228 |
+
"box": [int(x) for x in box.tolist()],
|
229 |
+
"label": label_text,
|
230 |
+
"frame": i + frame_idx,
|
231 |
+
"timestamp": (i + frame_idx) / (fps or 2)
|
232 |
+
})
|
233 |
+
|
234 |
+
if frame_detections:
|
235 |
+
frame_detections = self._apply_nms(frame_detections)
|
236 |
+
detections_by_frame.extend(frame_detections)
|
237 |
+
|
238 |
+
except Exception as e:
|
239 |
+
logger.error(f"Erro ao processar frame individual {i + frame_idx}: {str(e)}")
|
240 |
+
continue
|
241 |
+
|
242 |
+
finally:
|
243 |
+
if 'single_input' in locals():
|
244 |
+
del single_input
|
245 |
+
if 'outputs' in locals():
|
246 |
+
del outputs
|
247 |
+
torch.cuda.empty_cache()
|
248 |
continue
|
249 |
|
250 |
+
# Processar batch normalmente
|
251 |
+
batch_inputs = {
|
252 |
+
key: val.to(self.device)
|
253 |
+
for key, val in batch_inputs.items()
|
254 |
+
}
|
255 |
+
|
256 |
with torch.no_grad():
|
257 |
inputs = {**batch_inputs, **self.processed_text}
|
258 |
outputs = self.owlv2_model(**inputs)
|
src/presentation/web/gradio_interface.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
-
from typing import Tuple, Any
|
4 |
from pathlib import Path
|
5 |
from src.application.use_cases.process_video import ProcessVideoUseCase, ProcessVideoRequest
|
6 |
from src.infrastructure.services.weapon_detector import WeaponDetectorService
|
@@ -259,74 +259,89 @@ class GradioInterface:
|
|
259 |
resolution: str = None,
|
260 |
notification_type: str = None,
|
261 |
notification_target: str = None
|
262 |
-
) -> Tuple[str, Any]:
|
263 |
"""Processa o vídeo usando o caso de uso."""
|
264 |
-
|
265 |
-
|
|
|
|
|
|
|
|
|
|
|
266 |
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
confidence_pct = det.confidence * 100 if det.confidence <= 1.0 else det.confidence
|
302 |
-
status_html += f"""
|
303 |
-
<li style='margin: 0.5em 0;'>
|
304 |
-
<strong>{det.label}</strong><br>
|
305 |
-
Confiança: {confidence_pct:.1f}%<br>
|
306 |
-
Frame: {det.frame}
|
307 |
-
</li>"""
|
308 |
-
if len(response.detection_result.detections) > 5:
|
309 |
-
status_html += f"<li>... e mais {len(response.detection_result.detections) - 5} detecção(ões)</li>"
|
310 |
-
status_html += "</ul></div>"
|
311 |
-
|
312 |
-
# Preparar JSON técnico
|
313 |
-
technical_data = {
|
314 |
-
"device_type": response.detection_result.device_type,
|
315 |
-
"total_detections": len(response.detection_result.detections),
|
316 |
-
"frames_analyzed": response.detection_result.frames_analyzed,
|
317 |
-
"total_time": round(response.detection_result.total_time, 2),
|
318 |
-
"detections": [
|
319 |
-
{
|
320 |
"label": det.label,
|
321 |
"confidence": round(det.confidence * 100 if det.confidence <= 1.0 else det.confidence, 2),
|
322 |
"frame": det.frame,
|
323 |
-
"timestamp": round(det.timestamp, 2) if hasattr(det, "timestamp") else None
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import os
|
3 |
+
from typing import Tuple, Any, Dict
|
4 |
from pathlib import Path
|
5 |
from src.application.use_cases.process_video import ProcessVideoUseCase, ProcessVideoRequest
|
6 |
from src.infrastructure.services.weapon_detector import WeaponDetectorService
|
|
|
259 |
resolution: str = None,
|
260 |
notification_type: str = None,
|
261 |
notification_target: str = None
|
262 |
+
) -> Tuple[str, Dict[str, Any]]:
|
263 |
"""Processa o vídeo usando o caso de uso."""
|
264 |
+
try:
|
265 |
+
if not video_path:
|
266 |
+
return "Erro: Nenhum vídeo fornecido", {}
|
267 |
+
|
268 |
+
# Usar valores padrão se não especificados
|
269 |
+
fps = fps or self.default_fps
|
270 |
+
resolution = resolution or self.default_resolution
|
271 |
|
272 |
+
request = ProcessVideoRequest(
|
273 |
+
video_path=video_path,
|
274 |
+
threshold=threshold,
|
275 |
+
fps=fps,
|
276 |
+
resolution=int(resolution),
|
277 |
+
notification_type=notification_type,
|
278 |
+
notification_target=notification_target
|
279 |
+
)
|
280 |
+
|
281 |
+
response = self.use_case.execute(request)
|
282 |
+
|
283 |
+
# Formatar mensagem de status
|
284 |
+
status_msg = self._format_status_message(response.detection_result)
|
285 |
+
|
286 |
+
# Preparar JSON técnico
|
287 |
+
technical_data = {
|
288 |
+
"device_info": {
|
289 |
+
"type": response.detection_result.device_type,
|
290 |
+
"memory": response.memory_info,
|
291 |
+
"details": response.device_info
|
292 |
+
},
|
293 |
+
"processing_stats": {
|
294 |
+
"total_detections": len(response.detection_result.detections),
|
295 |
+
"frames_analyzed": response.detection_result.frames_analyzed,
|
296 |
+
"total_time": round(response.detection_result.total_time, 2),
|
297 |
+
"frame_extraction_time": round(response.detection_result.frame_extraction_time, 2),
|
298 |
+
"analysis_time": round(response.detection_result.analysis_time, 2)
|
299 |
+
},
|
300 |
+
"detections": []
|
301 |
+
}
|
302 |
+
|
303 |
+
# Adicionar detecções ao JSON
|
304 |
+
for det in response.detection_result.detections[:10]: # Limitar a 10 detec�es
|
305 |
+
technical_data["detections"].append({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
"label": det.label,
|
307 |
"confidence": round(det.confidence * 100 if det.confidence <= 1.0 else det.confidence, 2),
|
308 |
"frame": det.frame,
|
309 |
+
"timestamp": round(det.timestamp, 2) if hasattr(det, "timestamp") else None,
|
310 |
+
"box": det.box if hasattr(det, "box") else None
|
311 |
+
})
|
312 |
+
|
313 |
+
return status_msg, technical_data
|
314 |
+
|
315 |
+
except Exception as e:
|
316 |
+
logger.error(f"Erro ao processar vídeo: {str(e)}")
|
317 |
+
return "Erro ao processar o vídeo. Por favor, tente novamente.", {
|
318 |
+
"error": str(e),
|
319 |
+
"device_type": "unknown",
|
320 |
+
"total_detections": 0,
|
321 |
+
"frames_analyzed": 0
|
322 |
+
}
|
323 |
+
|
324 |
+
def _format_status_message(self, result) -> str:
|
325 |
+
"""Formata a mensagem de status do processamento."""
|
326 |
+
try:
|
327 |
+
status = "⚠️ RISCO DETECTADO" if result.detections else "✅ SEGURO"
|
328 |
+
|
329 |
+
message = f"""Status: {status}
|
330 |
+
Processado em: {result.device_type}
|
331 |
+
Total de detecções: {len(result.detections)}
|
332 |
+
Frames analisados: {result.frames_analyzed}
|
333 |
+
Tempo total: {result.total_time:.2f}s"""
|
334 |
+
|
335 |
+
if result.detections:
|
336 |
+
message += "\n\nDetecções encontradas:"
|
337 |
+
for i, det in enumerate(result.detections[:5], 1):
|
338 |
+
confidence_pct = det.confidence * 100 if det.confidence <= 1.0 else det.confidence
|
339 |
+
message += f"\n{i}. {det.label} (Confiança: {confidence_pct:.1f}%, Frame: {det.frame})"
|
340 |
+
if len(result.detections) > 5:
|
341 |
+
message += f"\n... e mais {len(result.detections) - 5} detecção(ões)"
|
342 |
+
|
343 |
+
return message
|
344 |
+
|
345 |
+
except Exception as e:
|
346 |
+
logger.error(f"Erro ao formatar mensagem de status: {str(e)}")
|
347 |
+
return "Erro ao processar o vídeo. Por favor, tente novamente."
|