Spaces:

atalaydenknalbant
/

Yolo11

Running on Zero

App Files Files Community

Yolo11 / app.py

atalaydenknalbant

Update app.py

9c39e47 verified 7 months ago

raw

history blame

7.95 kB

	import gradio as gr
	from PIL import Image, ImageDraw, ImageFont
	from ultralytics import YOLO
	import spaces
	import cv2
	import numpy as np
	import tempfile

	@spaces.GPU
	def yolo_inference(input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection):
	if input_type == "Image":
	if image is None:
	width, height = 640, 480
	blank_image = Image.new("RGB", (width, height), color="white")
	draw = ImageDraw.Draw(blank_image)
	message = "No image provided"
	font = ImageFont.load_default(size=40)
	bbox = draw.textbbox((0, 0), message, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]
	text_x = (width - text_width) / 2
	text_y = (height - text_height) / 2
	draw.text((text_x, text_y), message, fill="black", font=font)
	return blank_image, None

	model = YOLO(model_id)
	results = model.predict(
	source=image,
	conf=conf_threshold,
	iou=iou_threshold,
	imgsz=640,
	max_det=max_detection,
	show_labels=True,
	show_conf=True,
	)
	for r in results:
	image_array = r.plot()
	annotated_image = Image.fromarray(image_array[..., ::-1])
	return annotated_image, None

	elif input_type == "Video":
	if video is None:
	width, height = 640, 480
	blank_image = Image.new("RGB", (width, height), color="white")
	draw = ImageDraw.Draw(blank_image)
	message = "No video provided"
	font = ImageFont.load_default(size=40)
	bbox = draw.textbbox((0, 0), message, font=font)
	text_width = bbox[2] - bbox[0]
	text_height = bbox[3] - bbox[1]
	text_x = (width - text_width) / 2
	text_y = (height - text_height) / 2
	draw.text((text_x, text_y), message, fill="black", font=font)
	temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
	fourcc = cv2.VideoWriter_fourcc(*"mp4v")
	out = cv2.VideoWriter(temp_video_file, fourcc, 1, (width, height))
	frame = cv2.cvtColor(np.array(blank_image), cv2.COLOR_RGB2BGR)
	out.write(frame)
	out.release()
	return None, temp_video_file

	model = YOLO(model_id)
	cap = cv2.VideoCapture(video)
	fps = cap.get(cv2.CAP_PROP_FPS) if cap.get(cv2.CAP_PROP_FPS) > 0 else 25
	frames = []
	while True:
	ret, frame = cap.read()
	if not ret:
	break
	pil_frame = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	results = model.predict(
	source=pil_frame,
	conf=conf_threshold,
	iou=iou_threshold,
	imgsz=640,
	max_det=max_detection,
	show_labels=True,
	show_conf=True,
	)
	for r in results:
	annotated_frame_array = r.plot()
	annotated_frame = cv2.cvtColor(annotated_frame_array, cv2.COLOR_BGR2RGB)
	frames.append(annotated_frame)
	cap.release()
	if len(frames) == 0:
	return None, None

	height_out, width_out, _ = frames[0].shape
	temp_video_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False).name
	fourcc = cv2.VideoWriter_fourcc(*"mp4v")
	out = cv2.VideoWriter(temp_video_file, fourcc, fps, (width_out, height_out))
	for f in frames:
	f_bgr = cv2.cvtColor(f, cv2.COLOR_RGB2BGR)
	out.write(f_bgr)
	out.release()
	return None, temp_video_file

	else:
	return None, None

	def update_visibility(input_type):
	"""
	Show/hide image/video input and output depending on input_type.
	"""
	if input_type == "Image":
	# image, video, output_image, output_video
	return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
	else:
	return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)

	def yolo_inference_for_examples(image, model_id, conf_threshold, iou_threshold, max_detection):
	"""
	This is called by gr.Examples. We force the radio to 'Image'
	and then do a standard image inference, returning both updated radio
	value and the annotated image.
	"""
	annotated_image, _ = yolo_inference(
	input_type="Image",
	image=image,
	video=None,
	model_id=model_id,
	conf_threshold=conf_threshold,
	iou_threshold=iou_threshold,
	max_detection=max_detection
	)
	return gr.update(value="Image"), annotated_image

	with gr.Blocks() as app:
	gr.Markdown("# Yolo11: Object Detection, Instance Segmentation, Pose/Keypoints, Oriented Detection, Classification")
	gr.Markdown("Upload image(s) or video(s) for inference using the latest Ultralytics YOLO11 models.")

	with gr.Row():
	with gr.Column():
	image = gr.Image(type="pil", label="Image", visible=True)
	video = gr.Video(label="Video", visible=False)
	input_type = gr.Radio(
	choices=["Image", "Video"],
	value="Image",
	label="Input Type",
	)
	model_id = gr.Dropdown(
	label="Model Name",
	choices=[
	'yolo11n.pt', 'yolo11s.pt', 'yolo11m.pt', 'yolo11l.pt', 'yolo11x.pt',
	'yolo11n-seg.pt', 'yolo11s-seg.pt', 'yolo11m-seg.pt', 'yolo11l-seg.pt', 'yolo11x-seg.pt',
	'yolo11n-pose.pt', 'yolo11s-pose.pt', 'yolo11m-pose.pt', 'yolo11l-pose.pt', 'yolo11x-pose.pt',
	'yolo11n-obb.pt', 'yolo11s-obb.pt', 'yolo11m-obb.pt', 'yolo11l-obb.pt', 'yolo11x-obb.pt',
	'yolo11n-cls.pt', 'yolo11s-cls.pt', 'yolo11m-cls.pt', 'yolo11l-cls.pt', 'yolo11x-cls.pt'
	],
	value="yolo11n.pt",
	)
	conf_threshold = gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
	iou_threshold = gr.Slider(minimum=0, maximum=1, value=0.45, label="IoU Threshold")
	max_detection = gr.Slider(minimum=1, maximum=300, step=1, value=300, label="Max Detection")
	infer_button = gr.Button("Detect Objects")
	with gr.Column():
	output_image = gr.Image(type="pil", label="Annotated Image", visible=True)
	output_video = gr.Video(label="Annotated Video", visible=False)

	# Toggle input/output visibility
	input_type.change(
	fn=update_visibility,
	inputs=input_type,
	outputs=[image, video, output_image, output_video],
	)

	# Main inference for button click
	infer_button.click(
	fn=yolo_inference,
	inputs=[input_type, image, video, model_id, conf_threshold, iou_threshold, max_detection],
	outputs=[output_image, output_video],
	)

	# Examples for images only
	gr.Examples(
	examples=[
	["zidane.jpg", "yolo11s.pt", 0.25, 0.45, 300],
	["bus.jpg", "yolo11m.pt", 0.25, 0.45, 300],
	["yolo_vision.jpg", "yolo11x.pt", 0.25, 0.45, 300],
	["Tricycle.jpg", "yolo11x-cls.pt", 0.25, 0.45, 300],
	["tcganadolu.jpg", "yolo11m-obb.pt", 0.25, 0.45, 300],
	["San Diego Airport.jpg", "yolo11x-seg.pt", 0.25, 0.45, 300],
	["Theodore_Roosevelt.png", "yolo11l-pose.pt", 0.25, 0.45, 300],
	],
	fn=yolo_inference_for_examples,
	inputs=[image, model_id, conf_threshold, iou_threshold, max_detection],
	outputs=[input_type, output_image],
	label="Examples (Images)",
	cache_examples=True,
	)

	if __name__ == '__main__':
	app.launch()