Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
machineuser
commited on
Commit
·
e7caf98
1
Parent(s):
7fd9bbe
Sync widgets demo
Browse files
packages/tasks/src/tasks/zero-shot-object-detection/about.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Use Cases
|
| 2 |
+
|
| 3 |
+
### Object Search
|
| 4 |
+
|
| 5 |
+
Zero-shot object detection models can be used in image search. Smartphones, for example, use zero-shot object detection models to detect entities (such as specific places or objects) and allow the user to search for the entity on the internet.
|
| 6 |
+
|
| 7 |
+
### Object Counting
|
| 8 |
+
|
| 9 |
+
Zero-shot object detection models are used to count instances of objects in a given image. This can include counting the objects in warehouses or stores or the number of visitors in a store. They are also used to manage crowds at events to prevent disasters.
|
| 10 |
+
|
| 11 |
+
## Inference
|
| 12 |
+
|
| 13 |
+
You can infer with zero-shot object detection models through the `zero-shot-object-detection` pipeline. When calling the pipeline, you just need to specify a path or HTTP link to an image and the candidate labels.
|
| 14 |
+
|
| 15 |
+
```python
|
| 16 |
+
from transformers import pipeline
|
| 17 |
+
from PIL import Image
|
| 18 |
+
|
| 19 |
+
image = Image.open("my-image.png").convert("RGB")
|
| 20 |
+
|
| 21 |
+
detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection")
|
| 22 |
+
|
| 23 |
+
predictions = detector(
|
| 24 |
+
image,
|
| 25 |
+
candidate_labels=["a photo of a cat", "a photo of a dog"],
|
| 26 |
+
)
|
| 27 |
+
|
| 28 |
+
# [{'score': 0.95,
|
| 29 |
+
# 'label': 'a photo of a cat',
|
| 30 |
+
# 'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
|
| 31 |
+
# ...
|
| 32 |
+
# ]
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
# Useful Resources
|
| 36 |
+
|
| 37 |
+
- [Zero-shot object detection task guide](https://huggingface.co/docs/transformers/tasks/zero_shot_object_detection)
|
| 38 |
+
|
| 39 |
+
This page was made possible thanks to the efforts of [Victor Guichard](https://huggingface.co/VictorGuichard)
|
packages/tasks/src/tasks/zero-shot-object-detection/data.ts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { TaskDataCustom } from "..";
|
| 2 |
+
|
| 3 |
+
const taskData: TaskDataCustom = {
|
| 4 |
+
datasets: [],
|
| 5 |
+
demo: {
|
| 6 |
+
inputs: [
|
| 7 |
+
{
|
| 8 |
+
filename: "zero-shot-object-detection-input.jpg",
|
| 9 |
+
type: "img",
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
label: "Classes",
|
| 13 |
+
content: "cat, dog, bird",
|
| 14 |
+
type: "text",
|
| 15 |
+
},
|
| 16 |
+
],
|
| 17 |
+
outputs: [
|
| 18 |
+
{
|
| 19 |
+
filename: "zero-shot-object-detection-output.jpg",
|
| 20 |
+
type: "img",
|
| 21 |
+
},
|
| 22 |
+
],
|
| 23 |
+
},
|
| 24 |
+
metrics: [
|
| 25 |
+
{
|
| 26 |
+
description:
|
| 27 |
+
"The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
|
| 28 |
+
id: "Average Precision",
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
|
| 32 |
+
id: "Mean Average Precision",
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
description:
|
| 36 |
+
"The APα metric is the Average Precision at the IoU threshold of a α value, for example, AP50 and AP75",
|
| 37 |
+
id: "APα",
|
| 38 |
+
},
|
| 39 |
+
],
|
| 40 |
+
models: [
|
| 41 |
+
{
|
| 42 |
+
description: "Solid zero-shot object detection model that uses CLIP as backbone.",
|
| 43 |
+
id: "google/owlvit-base-patch32",
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
description: "The improved version of the owlvit model.",
|
| 47 |
+
id: "google/owlv2-base-patch16-ensemble",
|
| 48 |
+
},
|
| 49 |
+
],
|
| 50 |
+
spaces: [],
|
| 51 |
+
summary:
|
| 52 |
+
"Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
|
| 53 |
+
widgetModels: [],
|
| 54 |
+
youtubeId: "",
|
| 55 |
+
};
|
| 56 |
+
|
| 57 |
+
export default taskData;
|