|
import gradio as gr |
|
import cv2 |
|
import numpy as np |
|
from gradio_webrtc import WebRTC |
|
from pathlib import Path |
|
|
|
CLASSES = [ |
|
"background", |
|
"aeroplane", |
|
"bicycle", |
|
"bird", |
|
"boat", |
|
"bottle", |
|
"bus", |
|
"car", |
|
"cat", |
|
"chair", |
|
"cow", |
|
"diningtable", |
|
"dog", |
|
"horse", |
|
"motorbike", |
|
"person", |
|
"pottedplant", |
|
"sheep", |
|
"sofa", |
|
"train", |
|
"tvmonitor", |
|
] |
|
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3)) |
|
|
|
directory = Path(__file__).parent |
|
|
|
MODEL = str((directory / "MobileNetSSD_deploy.caffemodel").resolve()) |
|
PROTOTXT = str((directory / "MobileNetSSD_deploy.prototxt.txt").resolve()) |
|
net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL) |
|
|
|
|
|
def detection(image, conf_threshold=0.3): |
|
|
|
blob = cv2.dnn.blobFromImage( |
|
cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5 |
|
) |
|
net.setInput(blob) |
|
|
|
detections = net.forward() |
|
image = cv2.resize(image, (500, 500)) |
|
(h, w) = image.shape[:2] |
|
labels = [] |
|
for i in np.arange(0, detections.shape[2]): |
|
confidence = detections[0, 0, i, 2] |
|
|
|
if confidence > conf_threshold: |
|
|
|
|
|
|
|
idx = int(detections[0, 0, i, 1]) |
|
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) |
|
(startX, startY, endX, endY) = box.astype("int") |
|
|
|
|
|
label = f"{CLASSES[idx]}: {round(confidence * 100, 2)}%" |
|
labels.append(label) |
|
cv2.rectangle(image, (startX, startY), (endX, endY), COLORS[idx], 2) |
|
y = startY - 15 if startY - 15 > 15 else startY + 15 |
|
cv2.putText( |
|
image, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2 |
|
) |
|
return image |
|
|
|
|
|
css=""".my-group {max-width: 600px !important; max-height: 600 !important;} |
|
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};""" |
|
|
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.HTML( |
|
""" |
|
<h1 style='text-align: center'> |
|
YOLOv10 Webcam Stream |
|
</h1> |
|
""") |
|
gr.HTML( |
|
""" |
|
<h3 style='text-align: center'> |
|
<a href='https://arxiv.org/abs/2405.14458' target='_blank'>arXiv</a> | <a href='https://github.com/THU-MIG/yolov10' target='_blank'>github</a> |
|
</h3> |
|
""") |
|
with gr.Column(elem_classes=["my-column"]): |
|
with gr.Group(elem_classes=["my-group"]): |
|
image = WebRTC(label="Strean") |
|
conf_threshold = gr.Slider( |
|
label="Confidence Threshold", |
|
minimum=0.0, |
|
maximum=1.0, |
|
step=0.05, |
|
value=0.30, |
|
) |
|
|
|
image.webrtc_stream( |
|
fn=detection, |
|
inputs=[image], |
|
stream_every=0.05, |
|
time_limit=30 |
|
) |
|
|
|
if __name__ == '__main__': |
|
demo.launch() |
|
|