YOLOv10-webcam-stream

Sleeping

App Files Files

xet

Community

YOLOv10-webcam-stream / app.py

freddyaboulton HF Staff

format

790227b about 1 year ago

raw

history blame

3.44 kB

	import spaces
	import gradio as gr
	import cv2
	import tempfile
	from ultralytics import YOLOv10
	from PIL import Image, ImageDraw, ImageFont

	image_processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd")
	model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd")


	def draw_bounding_boxes(image, results, model, threshold=0.3):
	draw = ImageDraw.Draw(image)
	for result in results:
	for score, label_id, box in zip(
	result["scores"], result["labels"], result["boxes"]
	):
	if score > threshold:
	label = model.config.id2label[label_id.item()]
	box = [round(i) for i in box.tolist()]
	draw.rectangle(box, outline="red", width=3)
	draw.text((box[0], box[1]), f"{label}: {score:.2f}", fill="red")
	return image


	@spaces.GPU
	def inference(image, conf_threshold):
	inputs = image_processor(images=image, return_tensors="pt")

	with torch.no_grad():
	outputs = model(**inputs)

	results = image_processor.post_process_object_detection(
	outputs, target_sizes=torch.tensor([image.size[::-1]]), threshold=0.3
	)

	return draw_bounding_boxes(image, results, model, threshold=conf_threshold)


	def app():
	with gr.Blocks():
	with gr.Row():
	with gr.Column():
	image = gr.Image(
	type="pil",
	label="Image",
	visible=True,
	sources="webcam",
	height=500,
	width=500,
	)
	conf_threshold = gr.Slider(
	label="Confidence Threshold",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.25,
	)
	image.stream(
	fn=yolov10_inference,
	inputs=[image, conf_threshold],
	outputs=[image],
	stream_every=0.2,
	time_limit=30,
	)


	css = """.my-group {max-width: 600px !important; max-height: 600 !important;}
	.my-column {display: flex !important; justify-content: center !important; align-items: center !important};"""

	with gr.Blocks(css=css) as app:
	gr.HTML(
	"""
	<h1 style='text-align: center'>
	Near Real-Time Webcam Stream with RT-DETR
	</h1>
	"""
	)
	gr.HTML(
	"""
	<h3 style='text-align: center'>
	<a href='https://arxiv.org/abs/2304.08069' target='_blank'>arXiv</a> \| <a href='https://github.com/lyuwenyu/RT-DETR' target='_blank'>github</a>
	</h3>
	"""
	)
	with gr.Column(elem_classes=["my-column"]):
	with gr.Group(elem_classes=["my-group"]):
	image = gr.Image(
	type="pil",
	label="Image",
	visible=True,
	sources="webcam",
	height=500,
	width=500,
	)
	conf_threshold = gr.Slider(
	label="Confidence Threshold",
	minimum=0.0,
	maximum=1.0,
	step=0.05,
	value=0.85,
	)
	image.stream(
	fn=inference,
	inputs=[image, conf_threshold],
	outputs=[image],
	stream_every=0.2,
	time_limit=30,
	)
	if __name__ == "__main__":
	app.launch()