Spaces:

pollen-robotics
/

pollen-vision-demo

Running

Simon Le Goff

Remove comments.

6da0440 about 1 year ago

3.16 kB

	"""
	Gradio app for pollen-vision

	This script creates a Gradio app for pollen-vision. The app allows users to perform object detection and object segmentation using the OWL-ViT and MobileSAM models.
	"""

	from datasets import load_dataset
	import gradio as gr

	import numpy as np
	import numpy.typing as npt
	from typing import Any, Dict, List

	from pollen_vision.vision_models.object_detection import OwlVitWrapper
	from pollen_vision.vision_models.object_segmentation import MobileSamWrapper
	from pollen_vision.vision_models.utils import Annotator, get_bboxes


	owl_vit = OwlVitWrapper()
	mobile_sam = MobileSamWrapper()
	annotator = Annotator()


	def object_detection(
	img: npt.NDArray[np.uint8], text_queries: List[str], score_threshold: float
	) -> List[Dict[str, Any]]:
	predictions: List[Dict[str, Any]] = owl_vit.infer(
	im=img, candidate_labels=text_queries, detection_threshold=score_threshold
	)
	return predictions


	def object_segmentation(
	img: npt.NDArray[np.uint8], object_detection_predictions: List[Dict[str, Any]]
	) -> List[npt.NDArray[np.uint8]]:
	bboxes = get_bboxes(predictions=object_detection_predictions)
	masks: List[npt.NDArray[np.uint8]] = mobile_sam.infer(im=img, bboxes=bboxes)
	return masks


	def query(
	task: str,
	img: npt.NDArray[np.uint8],
	text_queries: List[str],
	score_threshold: float,
	) -> npt.NDArray[np.uint8]:
	object_detection_predictions = object_detection(
	img=img, text_queries=text_queries, score_threshold=score_threshold
	)

	if task == "Object detection + segmentation (OWL-ViT + MobileSAM)":
	masks = object_segmentation(
	img=img, object_detection_predictions=object_detection_predictions
	)
	img = annotator.annotate(
	im=img, detection_predictions=object_detection_predictions, masks=masks
	)
	return img

	img = annotator.annotate(im=img, detection_predictions=object_detection_predictions)
	return img


	description = """
	Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam nec purus et nunc tincidunt tincidunt.
	"""

	demo_inputs = [
	gr.Dropdown(
	[
	"Object detection (OWL-ViT)",
	"Object detection + segmentation (OWL-ViT + MobileSAM)",
	],
	label="Choose a task",
	value="Object detection (OWL-ViT)",
	),
	gr.Image(),
	"text",
	gr.Slider(0, 1, value=0.1),
	]

	rdt_dataset = load_dataset("pollen-robotics/reachy-doing-things", split="train")

	img_kitchen_detection = rdt_dataset[11]["image"]
	img_kitchen_segmentation = rdt_dataset[12]["image"]

	demo_examples = [
	[
	"Object detection (OWL-ViT)",
	img_kitchen_detection,
	["kettle", "black mug", "sink", "blue mug", "sponge", "bag of chips"],
	0.15,
	],
	[
	"Object detection + segmentation (OWL-ViT + MobileSAM)",
	img_kitchen_segmentation,
	["blue mug", "paper cup", "kettle", "sponge"],
	0.12,
	],
	]

	demo = gr.Interface(
	fn=query,
	inputs=demo_inputs,
	outputs="image",
	title="pollen-vision",
	description=description,
	examples=demo_examples,
	)
	demo.launch()