freddyaboulton's picture
format
790227b
raw
history blame
3.44 kB
import spaces
import gradio as gr
import cv2
import tempfile
from ultralytics import YOLOv10
from PIL import Image, ImageDraw, ImageFont
image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")
def draw_bounding_boxes(image, results, model, threshold=0.3):
draw = ImageDraw.Draw(image)
for result in results:
for score, label_id, box in zip(
result["scores"], result["labels"], result["boxes"]
):
if score > threshold:
label = model.config.id2label[label_id.item()]
box = [round(i) for i in box.tolist()]
draw.rectangle(box, outline="red", width=3)
draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
return image
@spaces.GPU
def inference(image, conf_threshold):
inputs = image_processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
results = image_processor.post_process_object_detection(
outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3
)
return draw_bounding_boxes(image, results, model, threshold=conf_threshold)
def app():
with gr.Blocks():
with gr.Row():
with gr.Column():
image = gr.Image(
type="pil",
label="Image",
visible=True,
sources="webcam",
height=500,
width=500,
)
conf_threshold = gr.Slider(
label="Confidence Threshold",
minimum=0.0,
maximum=1.0,
step=0.05,
value=0.25,
)
image.stream(
fn=yolov10_inference,
inputs=[image, conf_threshold],
outputs=[image],
stream_every=0.2,
time_limit=30,
)
css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""
with gr.Blocks(css=css) as app:
gr.HTML(
"""
<h1 style='text-align: center'>
Near Real-Time Webcam Stream with RT-DETR
</h1>
"""
)
gr.HTML(
"""
<h3 style='text-align: center'>
<a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> | <a href='https://github.com/lyuwenyu/RT-DETR' target='_blank'>github</a>
</h3>
"""
)
with gr.Column(elem_classes=["my-column"]):
with gr.Group(elem_classes=["my-group"]):
image = gr.Image(
type="pil",
label="Image",
visible=True,
sources="webcam",
height=500,
width=500,
)
conf_threshold = gr.Slider(
label="Confidence Threshold",
minimum=0.0,
maximum=1.0,
step=0.05,
value=0.85,
)
image.stream(
fn=inference,
inputs=[image, conf_threshold],
outputs=[image],
stream_every=0.2,
time_limit=30,
)
if __name__ == "__main__":
app.launch()