Spaces:
Sleeping
Sleeping
File size: 3,442 Bytes
780389c 8b2cbe6 1e8e71b 794b1a6 ccc35d4 8b2cbe6 385e56e 1e8e71b 790227b ccc35d4 790227b ccc35d4 1e8e71b 66947f7 385e56e 790227b 385e56e ccc35d4 8b2cbe6 790227b 8b2cbe6 2172fc2 8b2cbe6 2172fc2 8b2cbe6 f8727f7 8337710 790227b 8b2cbe6 790227b ccc35d4 8b2cbe6 ccc35d4 8b2cbe6 66947f7 8b2cbe6 790227b 8b2cbe6 66947f7 8b2cbe6 790227b 66947f7 790227b 66947f7 790227b ccc35d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import spaces
import gradio as gr
import cv2
import tempfile
from ultralytics import YOLOv10
from PIL import Image, ImageDraw, ImageFont
image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
def draw_bounding_boxes(image, results, model, threshold=0.3):
draw = ImageDraw.Draw(image)
for result in results:
for score, label_id, box in zip(
result["scores"], result["labels"], result["boxes"]
):
if score > threshold:
label = model.config.id2label[label_id.item()]
box = [round(i) for i in box.tolist()]
draw.rectangle(box, outline="red", width=3)
draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
return image
@spaces.GPU
def inference(image, conf_threshold):
inputs = image_processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
results = image_processor.post_process_object_detection(
outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3
)
return draw_bounding_boxes(image, results, model, threshold=conf_threshold)
def app():
with gr.Blocks():
with gr.Row():
with gr.Column():
image = gr.Image(
type="pil",
label="Image",
visible=True,
sources="webcam",
height=500,
width=500,
)
conf_threshold = gr.Slider(
label="Confidence Threshold",
minimum=0.0,
maximum=1.0,
step=0.05,
value=0.25,
)
image.stream(
fn=yolov10_inference,
inputs=[image, conf_threshold],
outputs=[image],
stream_every=0.2,
time_limit=30,
)
css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
with gr.Blocks(css=css) as app:
gr.HTML(
"""
<h1 style='text-align: center'>
Near Real-Time Webcam Stream with RT-DETR
</h1>
"""
)
gr.HTML(
"""
<h3 style='text-align: center'>
<a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> | <a href='https://github.com/lyuwenyu/RT-DETR' target='_blank'>github</a>
</h3>
"""
)
with gr.Column(elem_classes=["my-column"]):
with gr.Group(elem_classes=["my-group"]):
image = gr.Image(
type="pil",
label="Image",
visible=True,
sources="webcam",
height=500,
width=500,
)
conf_threshold = gr.Slider(
label="Confidence Threshold",
minimum=0.0,
maximum=1.0,
step=0.05,
value=0.85,
)
image.stream(
fn=inference,
inputs=[image, conf_threshold],
outputs=[image],
stream_every=0.2,
time_limit=30,
)
if __name__ == "__main__":
app.launch()
|