Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
machineuser
commited on
Commit
·
e7caf98
1
Parent(s):
7fd9bbe
Sync widgets demo
Browse files
packages/tasks/src/tasks/zero-shot-object-detection/about.md
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Use Cases
|
2 |
+
|
3 |
+
### Object Search
|
4 |
+
|
5 |
+
Zero-shot object detection models can be used in image search. Smartphones, for example, use zero-shot object detection models to detect entities (such as specific places or objects) and allow the user to search for the entity on the internet.
|
6 |
+
|
7 |
+
### Object Counting
|
8 |
+
|
9 |
+
Zero-shot object detection models are used to count instances of objects in a given image. This can include counting the objects in warehouses or stores or the number of visitors in a store. They are also used to manage crowds at events to prevent disasters.
|
10 |
+
|
11 |
+
## Inference
|
12 |
+
|
13 |
+
You can infer with zero-shot object detection models through the `zero-shot-object-detection` pipeline. When calling the pipeline, you just need to specify a path or HTTP link to an image and the candidate labels.
|
14 |
+
|
15 |
+
```python
|
16 |
+
from transformers import pipeline
|
17 |
+
from PIL import Image
|
18 |
+
|
19 |
+
image = Image.open("my-image.png").convert("RGB")
|
20 |
+
|
21 |
+
detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection")
|
22 |
+
|
23 |
+
predictions = detector(
|
24 |
+
image,
|
25 |
+
candidate_labels=["a photo of a cat", "a photo of a dog"],
|
26 |
+
)
|
27 |
+
|
28 |
+
# [{'score': 0.95,
|
29 |
+
# 'label': 'a photo of a cat',
|
30 |
+
# 'box': {'xmin': 180, 'ymin': 71, 'xmax': 271, 'ymax': 178}},
|
31 |
+
# ...
|
32 |
+
# ]
|
33 |
+
```
|
34 |
+
|
35 |
+
# Useful Resources
|
36 |
+
|
37 |
+
- [Zero-shot object detection task guide](https://huggingface.co/docs/transformers/tasks/zero_shot_object_detection)
|
38 |
+
|
39 |
+
This page was made possible thanks to the efforts of [Victor Guichard](https://huggingface.co/VictorGuichard)
|
packages/tasks/src/tasks/zero-shot-object-detection/data.ts
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import type { TaskDataCustom } from "..";
|
2 |
+
|
3 |
+
const taskData: TaskDataCustom = {
|
4 |
+
datasets: [],
|
5 |
+
demo: {
|
6 |
+
inputs: [
|
7 |
+
{
|
8 |
+
filename: "zero-shot-object-detection-input.jpg",
|
9 |
+
type: "img",
|
10 |
+
},
|
11 |
+
{
|
12 |
+
label: "Classes",
|
13 |
+
content: "cat, dog, bird",
|
14 |
+
type: "text",
|
15 |
+
},
|
16 |
+
],
|
17 |
+
outputs: [
|
18 |
+
{
|
19 |
+
filename: "zero-shot-object-detection-output.jpg",
|
20 |
+
type: "img",
|
21 |
+
},
|
22 |
+
],
|
23 |
+
},
|
24 |
+
metrics: [
|
25 |
+
{
|
26 |
+
description:
|
27 |
+
"The Average Precision (AP) metric is the Area Under the PR Curve (AUC-PR). It is calculated for each class separately",
|
28 |
+
id: "Average Precision",
|
29 |
+
},
|
30 |
+
{
|
31 |
+
description: "The Mean Average Precision (mAP) metric is the overall average of the AP values",
|
32 |
+
id: "Mean Average Precision",
|
33 |
+
},
|
34 |
+
{
|
35 |
+
description:
|
36 |
+
"The APα metric is the Average Precision at the IoU threshold of a α value, for example, AP50 and AP75",
|
37 |
+
id: "APα",
|
38 |
+
},
|
39 |
+
],
|
40 |
+
models: [
|
41 |
+
{
|
42 |
+
description: "Solid zero-shot object detection model that uses CLIP as backbone.",
|
43 |
+
id: "google/owlvit-base-patch32",
|
44 |
+
},
|
45 |
+
{
|
46 |
+
description: "The improved version of the owlvit model.",
|
47 |
+
id: "google/owlv2-base-patch16-ensemble",
|
48 |
+
},
|
49 |
+
],
|
50 |
+
spaces: [],
|
51 |
+
summary:
|
52 |
+
"Zero-shot object detection is a computer vision task to detect objects and their classes in images, without any prior training or knowledge of the classes. Zero-shot object detection models receive an image as input, as well as a list of candidate classes, and output the bounding boxes and labels where the objects have been detected.",
|
53 |
+
widgetModels: [],
|
54 |
+
youtubeId: "",
|
55 |
+
};
|
56 |
+
|
57 |
+
export default taskData;
|