Upload 25 files
Browse files- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/README.md +48 -0
- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin +3 -0
- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/models/fastsam_s.onnx +3 -0
- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/dogs.jpg +0 -0
- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/onnx_export.py +50 -0
- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/prompt.py +456 -0
- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/run_test.py +224 -0
- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/tools_pt.py +372 -0
- model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/utils.py +86 -0
- model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/README.md +48 -0
- model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/models/cutoff_fastsam_s_fp16.qnn216.ctx.bin +3 -0
- model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/dogs.jpg +0 -0
- model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/onnx_export.py +50 -0
- model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/prompt.py +456 -0
- model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/run_test.py +224 -0
- model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/tools_pt.py +372 -0
- model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/utils.py +86 -0
- model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/README.md +48 -0
- model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin +3 -0
- model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/dogs.jpg +0 -0
- model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/onnx_export.py +50 -0
- model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/prompt.py +456 -0
- model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/run_test.py +224 -0
- model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/tools_pt.py +372 -0
- model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/utils.py +86 -0
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/README.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
## Source model
|
3 |
+
- Input shape: 640x640
|
4 |
+
- Number of parameters: 11.24M
|
5 |
+
- Model size: 45.0M
|
6 |
+
- Output shape: 1x37x8400,1x32x160x160
|
7 |
+
|
8 |
+
Source model repository: [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: INT8
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: FV01 QCS6490
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- Install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
```bash
|
43 |
+
cd fastsam_s/model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite
|
44 |
+
export LD_PRELOAD=/home/aidlux/.local/lib/python3.8/site-packages/torch/lib/../../torch.libs/libgomp-804f19d4.so.1.0.0
|
45 |
+
|
46 |
+
python3 ./python/run_test.py --target_model ./models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin --imgs ./python/dogs.jpg --invoke_nums 10
|
47 |
+
```
|
48 |
+
|
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa2680a1ccd80463b4faa7b38801e5bb31b8c9d3060db8822b5f0b23b43dc57e
|
3 |
+
size 12523024
|
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/models/fastsam_s.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eff808553c465e3e1aa3ac7753ec22818cb7c67df1f6ed02e9ed036981e09edd
|
3 |
+
size 47284660
|
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/dogs.jpg
ADDED
![]() |
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/onnx_export.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import cv2
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
|
6 |
+
from ultralytics.models.fastsam import FastSAM
|
7 |
+
|
8 |
+
class Fast_SAM(torch.nn.Module):
|
9 |
+
"""Exportable FastSAM model, end-to-end."""
|
10 |
+
|
11 |
+
def __init__(self) -> None:
|
12 |
+
super().__init__()
|
13 |
+
pt_name ='./models/FastSAM-s.pt'
|
14 |
+
self.model =FastSAM(pt_name).model
|
15 |
+
|
16 |
+
def forward(self, image: torch.Tensor):
|
17 |
+
"""
|
18 |
+
Run FastSAM on `image`, and produce high quality segmentation masks.
|
19 |
+
Faster than SAM as it is based on YOLOv8.
|
20 |
+
|
21 |
+
Parameters:
|
22 |
+
image: Pixel values pre-processed for encoder consumption.
|
23 |
+
Range: float[0, 1]
|
24 |
+
3-channel Color Space: BGR
|
25 |
+
Returns:
|
26 |
+
|
27 |
+
"""
|
28 |
+
predictions = self.model(image)
|
29 |
+
# Return predictions as a tuple instead of nested tuple.
|
30 |
+
return (predictions[0], predictions[1][2])
|
31 |
+
|
32 |
+
|
33 |
+
model = Fast_SAM()
|
34 |
+
num_params = sum(p.numel() for p in model.parameters())
|
35 |
+
print(f'Number of FastSAM-s parameters: {num_params}')
|
36 |
+
dummy_input = torch.randn( [1,3,640,640],dtype=torch.float32 )
|
37 |
+
source_model = torch.jit.trace(
|
38 |
+
model.to("cpu"), dummy_input, check_trace=False
|
39 |
+
)
|
40 |
+
torch.onnx.export(model, # model being run
|
41 |
+
dummy_input, # model input (or a tuple for multiple inputs)
|
42 |
+
"./models/fastsam_s.onnx", # where to save the model
|
43 |
+
export_params=True, # store the trained parameter weights inside the model file
|
44 |
+
opset_version=12, # the ONNX version to export the model to
|
45 |
+
do_constant_folding=True, # whether to execute constant folding for optimization
|
46 |
+
input_names = ['input'], # the model's input names
|
47 |
+
output_names = ['boxes','mask'],
|
48 |
+
verbose=True,
|
49 |
+
)
|
50 |
+
print("Convert to onnx successfully!")
|
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/prompt.py
ADDED
@@ -0,0 +1,456 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import cv2
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
from utils import image_to_np_ndarray
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
|
11 |
+
class FastSAMPrompt:
|
12 |
+
|
13 |
+
def __init__(self, image, results, device='cpu'):
|
14 |
+
if isinstance(image, str) or isinstance(image, Image.Image):
|
15 |
+
image = image_to_np_ndarray(image)
|
16 |
+
self.device = device
|
17 |
+
self.results = results
|
18 |
+
self.img = image
|
19 |
+
|
20 |
+
def _segment_image(self, image, bbox):
|
21 |
+
if isinstance(image, Image.Image):
|
22 |
+
image_array = np.array(image)
|
23 |
+
else:
|
24 |
+
image_array = image
|
25 |
+
segmented_image_array = np.zeros_like(image_array)
|
26 |
+
x1, y1, x2, y2 = bbox
|
27 |
+
segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2]
|
28 |
+
segmented_image = Image.fromarray(segmented_image_array)
|
29 |
+
black_image = Image.new('RGB', image.size, (255, 255, 255))
|
30 |
+
# transparency_mask = np.zeros_like((), dtype=np.uint8)
|
31 |
+
transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8)
|
32 |
+
transparency_mask[y1:y2, x1:x2] = 255
|
33 |
+
transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
|
34 |
+
black_image.paste(segmented_image, mask=transparency_mask_image)
|
35 |
+
return black_image
|
36 |
+
|
37 |
+
def _format_results(self, result, filter=0):
|
38 |
+
annotations = []
|
39 |
+
n = len(result.masks.data)
|
40 |
+
for i in range(n):
|
41 |
+
annotation = {}
|
42 |
+
mask = result.masks.data[i] == 1.0
|
43 |
+
|
44 |
+
if torch.sum(mask) < filter:
|
45 |
+
continue
|
46 |
+
annotation['id'] = i
|
47 |
+
annotation['segmentation'] = mask.cpu().numpy()
|
48 |
+
annotation['bbox'] = result.boxes.data[i]
|
49 |
+
annotation['score'] = result.boxes.conf[i]
|
50 |
+
annotation['area'] = annotation['segmentation'].sum()
|
51 |
+
annotations.append(annotation)
|
52 |
+
return annotations
|
53 |
+
|
54 |
+
def filter_masks(annotations): # filte the overlap mask
|
55 |
+
annotations.sort(key=lambda x: x['area'], reverse=True)
|
56 |
+
to_remove = set()
|
57 |
+
for i in range(0, len(annotations)):
|
58 |
+
a = annotations[i]
|
59 |
+
for j in range(i + 1, len(annotations)):
|
60 |
+
b = annotations[j]
|
61 |
+
if i != j and j not in to_remove:
|
62 |
+
# check if
|
63 |
+
if b['area'] < a['area']:
|
64 |
+
if (a['segmentation'] & b['segmentation']).sum() / b['segmentation'].sum() > 0.8:
|
65 |
+
to_remove.add(j)
|
66 |
+
|
67 |
+
return [a for i, a in enumerate(annotations) if i not in to_remove], to_remove
|
68 |
+
|
69 |
+
def _get_bbox_from_mask(self, mask):
|
70 |
+
mask = mask.astype(np.uint8)
|
71 |
+
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
72 |
+
x1, y1, w, h = cv2.boundingRect(contours[0])
|
73 |
+
x2, y2 = x1 + w, y1 + h
|
74 |
+
if len(contours) > 1:
|
75 |
+
for b in contours:
|
76 |
+
x_t, y_t, w_t, h_t = cv2.boundingRect(b)
|
77 |
+
# Merge multiple bounding boxes into one.
|
78 |
+
x1 = min(x1, x_t)
|
79 |
+
y1 = min(y1, y_t)
|
80 |
+
x2 = max(x2, x_t + w_t)
|
81 |
+
y2 = max(y2, y_t + h_t)
|
82 |
+
h = y2 - y1
|
83 |
+
w = x2 - x1
|
84 |
+
return [x1, y1, x2, y2]
|
85 |
+
|
86 |
+
def plot_to_result(self,
|
87 |
+
annotations,
|
88 |
+
bboxes=None,
|
89 |
+
points=None,
|
90 |
+
point_label=None,
|
91 |
+
mask_random_color=True,
|
92 |
+
better_quality=True,
|
93 |
+
retina=False,
|
94 |
+
withContours=True) -> np.ndarray:
|
95 |
+
if isinstance(annotations[0], dict):
|
96 |
+
annotations = [annotation['segmentation'] for annotation in annotations]
|
97 |
+
image = self.img
|
98 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
99 |
+
original_h = image.shape[0]
|
100 |
+
original_w = image.shape[1]
|
101 |
+
if sys.platform == "darwin":
|
102 |
+
plt.switch_backend("TkAgg")
|
103 |
+
plt.figure(figsize=(original_w / 100, original_h / 100))
|
104 |
+
# Add subplot with no margin.
|
105 |
+
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
|
106 |
+
plt.margins(0, 0)
|
107 |
+
plt.gca().xaxis.set_major_locator(plt.NullLocator())
|
108 |
+
plt.gca().yaxis.set_major_locator(plt.NullLocator())
|
109 |
+
|
110 |
+
plt.imshow(image)
|
111 |
+
if better_quality:
|
112 |
+
if isinstance(annotations[0], torch.Tensor):
|
113 |
+
annotations = np.array(annotations.cpu())
|
114 |
+
for i, mask in enumerate(annotations):
|
115 |
+
mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
|
116 |
+
annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
|
117 |
+
if self.device == 'cpu':
|
118 |
+
annotations = np.array(annotations)
|
119 |
+
self.fast_show_mask(
|
120 |
+
annotations,
|
121 |
+
plt.gca(),
|
122 |
+
random_color=mask_random_color,
|
123 |
+
bboxes=bboxes,
|
124 |
+
points=points,
|
125 |
+
pointlabel=point_label,
|
126 |
+
retinamask=retina,
|
127 |
+
target_height=original_h,
|
128 |
+
target_width=original_w,
|
129 |
+
)
|
130 |
+
else:
|
131 |
+
if isinstance(annotations[0], np.ndarray):
|
132 |
+
annotations = torch.from_numpy(annotations)
|
133 |
+
self.fast_show_mask_gpu(
|
134 |
+
annotations,
|
135 |
+
plt.gca(),
|
136 |
+
random_color=mask_random_color,
|
137 |
+
bboxes=bboxes,
|
138 |
+
points=points,
|
139 |
+
pointlabel=point_label,
|
140 |
+
retinamask=retina,
|
141 |
+
target_height=original_h,
|
142 |
+
target_width=original_w,
|
143 |
+
)
|
144 |
+
if isinstance(annotations, torch.Tensor):
|
145 |
+
annotations = annotations.cpu().numpy()
|
146 |
+
if withContours:
|
147 |
+
contour_all = []
|
148 |
+
temp = np.zeros((original_h, original_w, 1))
|
149 |
+
for i, mask in enumerate(annotations):
|
150 |
+
if type(mask) == dict:
|
151 |
+
mask = mask['segmentation']
|
152 |
+
annotation = mask.astype(np.uint8)
|
153 |
+
if not retina:
|
154 |
+
annotation = cv2.resize(
|
155 |
+
annotation,
|
156 |
+
(original_w, original_h),
|
157 |
+
interpolation=cv2.INTER_NEAREST,
|
158 |
+
)
|
159 |
+
contours, hierarchy = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
160 |
+
for contour in contours:
|
161 |
+
contour_all.append(contour)
|
162 |
+
cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
|
163 |
+
color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
|
164 |
+
contour_mask = temp / 255 * color.reshape(1, 1, -1)
|
165 |
+
plt.imshow(contour_mask)
|
166 |
+
|
167 |
+
plt.axis('off')
|
168 |
+
fig = plt.gcf()
|
169 |
+
plt.draw()
|
170 |
+
|
171 |
+
try:
|
172 |
+
buf = fig.canvas.tostring_rgb()
|
173 |
+
except AttributeError:
|
174 |
+
fig.canvas.draw()
|
175 |
+
buf = fig.canvas.tostring_rgb()
|
176 |
+
cols, rows = fig.canvas.get_width_height()
|
177 |
+
img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
|
178 |
+
result = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
179 |
+
plt.close()
|
180 |
+
return result
|
181 |
+
|
182 |
+
# Remark for refactoring: IMO a function should do one thing only, storing the image and plotting should be seperated and do not necessarily need to be class functions but standalone utility functions that the user can chain in his scripts to have more fine-grained control.
|
183 |
+
def plot(self,
|
184 |
+
annotations,
|
185 |
+
output_path,
|
186 |
+
bboxes=None,
|
187 |
+
points=None,
|
188 |
+
point_label=None,
|
189 |
+
mask_random_color=True,
|
190 |
+
better_quality=True,
|
191 |
+
retina=False,
|
192 |
+
withContours=True):
|
193 |
+
if len(annotations) == 0:
|
194 |
+
return None
|
195 |
+
result = self.plot_to_result(
|
196 |
+
annotations,
|
197 |
+
bboxes,
|
198 |
+
points,
|
199 |
+
point_label,
|
200 |
+
mask_random_color,
|
201 |
+
better_quality,
|
202 |
+
retina,
|
203 |
+
withContours,
|
204 |
+
)
|
205 |
+
|
206 |
+
path = os.path.dirname(os.path.abspath(output_path))
|
207 |
+
if not os.path.exists(path):
|
208 |
+
os.makedirs(path)
|
209 |
+
result = result[:, :, ::-1]
|
210 |
+
cv2.imwrite(output_path, result)
|
211 |
+
|
212 |
+
# CPU post process
|
213 |
+
def fast_show_mask(
|
214 |
+
self,
|
215 |
+
annotation,
|
216 |
+
ax,
|
217 |
+
random_color=False,
|
218 |
+
bboxes=None,
|
219 |
+
points=None,
|
220 |
+
pointlabel=None,
|
221 |
+
retinamask=True,
|
222 |
+
target_height=960,
|
223 |
+
target_width=960,
|
224 |
+
):
|
225 |
+
msak_sum = annotation.shape[0]
|
226 |
+
height = annotation.shape[1]
|
227 |
+
weight = annotation.shape[2]
|
228 |
+
#Sort annotations based on area.
|
229 |
+
areas = np.sum(annotation, axis=(1, 2))
|
230 |
+
sorted_indices = np.argsort(areas)
|
231 |
+
annotation = annotation[sorted_indices]
|
232 |
+
|
233 |
+
index = (annotation != 0).argmax(axis=0)
|
234 |
+
if random_color:
|
235 |
+
color = np.random.random((msak_sum, 1, 1, 3))
|
236 |
+
else:
|
237 |
+
color = np.ones((msak_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 255 / 255])
|
238 |
+
transparency = np.ones((msak_sum, 1, 1, 1)) * 0.6
|
239 |
+
visual = np.concatenate([color, transparency], axis=-1)
|
240 |
+
mask_image = np.expand_dims(annotation, -1) * visual
|
241 |
+
|
242 |
+
show = np.zeros((height, weight, 4))
|
243 |
+
h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing='ij')
|
244 |
+
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
|
245 |
+
# Use vectorized indexing to update the values of 'show'.
|
246 |
+
show[h_indices, w_indices, :] = mask_image[indices]
|
247 |
+
if bboxes is not None:
|
248 |
+
for bbox in bboxes:
|
249 |
+
x1, y1, x2, y2 = bbox
|
250 |
+
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
|
251 |
+
# draw point
|
252 |
+
if points is not None:
|
253 |
+
plt.scatter(
|
254 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
|
255 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
|
256 |
+
s=20,
|
257 |
+
c='y',
|
258 |
+
)
|
259 |
+
plt.scatter(
|
260 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
|
261 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
|
262 |
+
s=20,
|
263 |
+
c='m',
|
264 |
+
)
|
265 |
+
|
266 |
+
if not retinamask:
|
267 |
+
show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
|
268 |
+
ax.imshow(show)
|
269 |
+
|
270 |
+
def fast_show_mask_gpu(
|
271 |
+
self,
|
272 |
+
annotation,
|
273 |
+
ax,
|
274 |
+
random_color=False,
|
275 |
+
bboxes=None,
|
276 |
+
points=None,
|
277 |
+
pointlabel=None,
|
278 |
+
retinamask=True,
|
279 |
+
target_height=960,
|
280 |
+
target_width=960,
|
281 |
+
):
|
282 |
+
msak_sum = annotation.shape[0]
|
283 |
+
height = annotation.shape[1]
|
284 |
+
weight = annotation.shape[2]
|
285 |
+
areas = torch.sum(annotation, dim=(1, 2))
|
286 |
+
sorted_indices = torch.argsort(areas, descending=False)
|
287 |
+
annotation = annotation[sorted_indices]
|
288 |
+
# Find the index of the first non-zero value at each position.
|
289 |
+
index = (annotation != 0).to(torch.long).argmax(dim=0)
|
290 |
+
if random_color:
|
291 |
+
color = torch.rand((msak_sum, 1, 1, 3)).to(annotation.device)
|
292 |
+
else:
|
293 |
+
color = torch.ones((msak_sum, 1, 1, 3)).to(annotation.device) * torch.tensor([
|
294 |
+
30 / 255, 144 / 255, 255 / 255]).to(annotation.device)
|
295 |
+
transparency = torch.ones((msak_sum, 1, 1, 1)).to(annotation.device) * 0.6
|
296 |
+
visual = torch.cat([color, transparency], dim=-1)
|
297 |
+
mask_image = torch.unsqueeze(annotation, -1) * visual
|
298 |
+
# Select data according to the index. The index indicates which batch's data to choose at each position, converting the mask_image into a single batch form.
|
299 |
+
show = torch.zeros((height, weight, 4)).to(annotation.device)
|
300 |
+
try:
|
301 |
+
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight), indexing='ij')
|
302 |
+
except:
|
303 |
+
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight))
|
304 |
+
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
|
305 |
+
# Use vectorized indexing to update the values of 'show'.
|
306 |
+
show[h_indices, w_indices, :] = mask_image[indices]
|
307 |
+
show_cpu = show.cpu().numpy()
|
308 |
+
if bboxes is not None:
|
309 |
+
for bbox in bboxes:
|
310 |
+
x1, y1, x2, y2 = bbox
|
311 |
+
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
|
312 |
+
# draw point
|
313 |
+
if points is not None:
|
314 |
+
plt.scatter(
|
315 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
|
316 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
|
317 |
+
s=20,
|
318 |
+
c='y',
|
319 |
+
)
|
320 |
+
plt.scatter(
|
321 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
|
322 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
|
323 |
+
s=20,
|
324 |
+
c='m',
|
325 |
+
)
|
326 |
+
if not retinamask:
|
327 |
+
show_cpu = cv2.resize(show_cpu, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
|
328 |
+
ax.imshow(show_cpu)
|
329 |
+
|
330 |
+
# clip
|
331 |
+
@torch.no_grad()
|
332 |
+
def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
|
333 |
+
preprocessed_images = [preprocess(image).to(device) for image in elements]
|
334 |
+
try:
|
335 |
+
import clip # for linear_assignment
|
336 |
+
|
337 |
+
except (ImportError, AssertionError, AttributeError):
|
338 |
+
from ultralytics.yolo.utils.checks import check_requirements
|
339 |
+
|
340 |
+
check_requirements('git+https://github.com/openai/CLIP.git') # required before installing lap from source
|
341 |
+
import clip
|
342 |
+
|
343 |
+
|
344 |
+
tokenized_text = clip.tokenize([search_text]).to(device)
|
345 |
+
stacked_images = torch.stack(preprocessed_images)
|
346 |
+
image_features = model.encode_image(stacked_images)
|
347 |
+
text_features = model.encode_text(tokenized_text)
|
348 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
349 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
350 |
+
probs = 100.0 * image_features @ text_features.T
|
351 |
+
return probs[:, 0].softmax(dim=0)
|
352 |
+
|
353 |
+
def _crop_image(self, format_results):
|
354 |
+
|
355 |
+
image = Image.fromarray(cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB))
|
356 |
+
ori_w, ori_h = image.size
|
357 |
+
annotations = format_results
|
358 |
+
mask_h, mask_w = annotations[0]['segmentation'].shape
|
359 |
+
if ori_w != mask_w or ori_h != mask_h:
|
360 |
+
image = image.resize((mask_w, mask_h))
|
361 |
+
cropped_boxes = []
|
362 |
+
cropped_images = []
|
363 |
+
not_crop = []
|
364 |
+
filter_id = []
|
365 |
+
# annotations, _ = filter_masks(annotations)
|
366 |
+
# filter_id = list(_)
|
367 |
+
for _, mask in enumerate(annotations):
|
368 |
+
if np.sum(mask['segmentation']) <= 100:
|
369 |
+
filter_id.append(_)
|
370 |
+
continue
|
371 |
+
bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox
|
372 |
+
cropped_boxes.append(self._segment_image(image, bbox))
|
373 |
+
# cropped_boxes.append(segment_image(image,mask["segmentation"]))
|
374 |
+
cropped_images.append(bbox) # Save the bounding box of the cropped image.
|
375 |
+
|
376 |
+
return cropped_boxes, cropped_images, not_crop, filter_id, annotations
|
377 |
+
|
378 |
+
def box_prompt(self, bbox=None, bboxes=None):
|
379 |
+
if self.results == None:
|
380 |
+
return []
|
381 |
+
assert bbox or bboxes
|
382 |
+
if bboxes is None:
|
383 |
+
bboxes = [bbox]
|
384 |
+
max_iou_index = []
|
385 |
+
for bbox in bboxes:
|
386 |
+
assert (bbox[2] != 0 and bbox[3] != 0)
|
387 |
+
masks = self.results[0].masks.data
|
388 |
+
target_height = self.img.shape[0]
|
389 |
+
target_width = self.img.shape[1]
|
390 |
+
h = masks.shape[1]
|
391 |
+
w = masks.shape[2]
|
392 |
+
if h != target_height or w != target_width:
|
393 |
+
bbox = [
|
394 |
+
int(bbox[0] * w / target_width),
|
395 |
+
int(bbox[1] * h / target_height),
|
396 |
+
int(bbox[2] * w / target_width),
|
397 |
+
int(bbox[3] * h / target_height), ]
|
398 |
+
bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
|
399 |
+
bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
|
400 |
+
bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
|
401 |
+
bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
|
402 |
+
|
403 |
+
# IoUs = torch.zeros(len(masks), dtype=torch.float32)
|
404 |
+
bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
|
405 |
+
|
406 |
+
masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2))
|
407 |
+
orig_masks_area = torch.sum(masks, dim=(1, 2))
|
408 |
+
|
409 |
+
union = bbox_area + orig_masks_area - masks_area
|
410 |
+
IoUs = masks_area / union
|
411 |
+
max_iou_index.append(int(torch.argmax(IoUs)))
|
412 |
+
max_iou_index = list(set(max_iou_index))
|
413 |
+
return np.array(masks[max_iou_index].cpu().numpy())
|
414 |
+
|
415 |
+
def point_prompt(self, points, pointlabel): # numpy
|
416 |
+
if self.results == None:
|
417 |
+
return []
|
418 |
+
masks = self._format_results(self.results[0], 0)
|
419 |
+
target_height = self.img.shape[0]
|
420 |
+
target_width = self.img.shape[1]
|
421 |
+
h = masks[0]['segmentation'].shape[0]
|
422 |
+
w = masks[0]['segmentation'].shape[1]
|
423 |
+
if h != target_height or w != target_width:
|
424 |
+
points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
|
425 |
+
onemask = np.zeros((h, w))
|
426 |
+
masks = sorted(masks, key=lambda x: x['area'], reverse=True)
|
427 |
+
for i, annotation in enumerate(masks):
|
428 |
+
if type(annotation) == dict:
|
429 |
+
mask = annotation['segmentation']
|
430 |
+
else:
|
431 |
+
mask = annotation
|
432 |
+
for i, point in enumerate(points):
|
433 |
+
if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
|
434 |
+
onemask[mask] = 1
|
435 |
+
if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
|
436 |
+
onemask[mask] = 0
|
437 |
+
onemask = onemask >= 1
|
438 |
+
return np.array([onemask])
|
439 |
+
|
440 |
+
def text_prompt(self, text):
|
441 |
+
if self.results == None:
|
442 |
+
return []
|
443 |
+
format_results = self._format_results(self.results[0], 0)
|
444 |
+
cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
|
445 |
+
clip_model, preprocess = clip.load('ViT-B/32', device=self.device)
|
446 |
+
scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device)
|
447 |
+
max_idx = scores.argsort()
|
448 |
+
max_idx = max_idx[-1]
|
449 |
+
max_idx += sum(np.array(filter_id) <= int(max_idx))
|
450 |
+
return np.array([annotations[max_idx]['segmentation']])
|
451 |
+
|
452 |
+
def everything_prompt(self):
|
453 |
+
if self.results == None:
|
454 |
+
return []
|
455 |
+
return self.results[0].masks.data
|
456 |
+
|
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/run_test.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import cv2
|
4 |
+
import numpy as np
|
5 |
+
import onnxruntime
|
6 |
+
import time
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import torch
|
9 |
+
from ultralytics.engine.results import Results
|
10 |
+
from tools_pt import *
|
11 |
+
from prompt import FastSAMPrompt
|
12 |
+
import aidlite
|
13 |
+
import argparse
|
14 |
+
import ast
|
15 |
+
|
16 |
+
# 定义相似度函数
|
17 |
+
def get_acc(onnx_out,other_out):
|
18 |
+
cosine_similarity=np.dot(np.array(onnx_out),np.array(other_out))/(np.linalg.norm(np.array(onnx_out)) * np.linalg.norm(np.array(other_out)))
|
19 |
+
return cosine_similarity
|
20 |
+
|
21 |
+
def cal_sigmoid(x):
|
22 |
+
return 1 / (1 + np.exp(-x))
|
23 |
+
|
24 |
+
class qnn_predict(object):
|
25 |
+
def __init__(self,inputshape,outputshape,args) -> None:
|
26 |
+
aidlite.set_log_level(aidlite.LogLevel.INFO)
|
27 |
+
aidlite.log_to_stderr()
|
28 |
+
print(f"Aidlite library version : {aidlite.get_library_version()}")
|
29 |
+
print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
|
30 |
+
config = aidlite.Config.create_instance()
|
31 |
+
if config is None:
|
32 |
+
print("Create model failed !")
|
33 |
+
config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
34 |
+
config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
35 |
+
config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
36 |
+
config.is_quantify_model = 1
|
37 |
+
|
38 |
+
model = aidlite.Model.create_instance(args.target_model)
|
39 |
+
if model is None:
|
40 |
+
print("Create model failed !")
|
41 |
+
|
42 |
+
self.input_shape=inputshape
|
43 |
+
self.out_shape = outputshape
|
44 |
+
model.set_model_properties(self.input_shape, aidlite.DataType.TYPE_FLOAT32, self.out_shape, aidlite.DataType.TYPE_FLOAT32)
|
45 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
|
46 |
+
if self.interpreter is None:
|
47 |
+
print("build_interpretper_from_model_and_config failed !")
|
48 |
+
result = self.interpreter.init()
|
49 |
+
if result != 0:
|
50 |
+
print(f"interpreter init failed !")
|
51 |
+
result = self.interpreter.load_model()
|
52 |
+
if result != 0:
|
53 |
+
print("interpreter load model failed !")
|
54 |
+
print("detect model load success!")
|
55 |
+
|
56 |
+
self.conf = 0.4
|
57 |
+
self.iou=0.9
|
58 |
+
self.size = 640
|
59 |
+
self.agnostic_nms=False
|
60 |
+
self.max_det = 300
|
61 |
+
self.names=['object']
|
62 |
+
self.classes =None
|
63 |
+
self.retina_masks=True
|
64 |
+
|
65 |
+
def pretreat_img(self,img):
|
66 |
+
scale = 1/255.
|
67 |
+
img_size = cv2.resize(img, (self.size,self.size), interpolation=cv2.INTER_LINEAR)
|
68 |
+
float_img = img_size.astype('float32')
|
69 |
+
float_img = float_img* scale
|
70 |
+
float_img = float_img[:, :, ::-1]
|
71 |
+
return float_img
|
72 |
+
|
73 |
+
def postprocess(self, preds, img, orig_imgs):
|
74 |
+
"""TODO: filter by classes."""
|
75 |
+
p = non_max_suppression(torch.from_numpy(preds[0]),
|
76 |
+
self.conf,
|
77 |
+
self.iou,
|
78 |
+
agnostic=self.agnostic_nms,
|
79 |
+
max_det=self.max_det,
|
80 |
+
nc=len(self.names),
|
81 |
+
classes=self.classes)
|
82 |
+
|
83 |
+
results = []
|
84 |
+
if len(p) == 0 or len(p[0]) == 0:
|
85 |
+
print("No object detected.")
|
86 |
+
return results
|
87 |
+
|
88 |
+
full_box = torch.zeros_like(p[0][0])
|
89 |
+
full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
|
90 |
+
full_box = full_box.view(1, -1)
|
91 |
+
critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
|
92 |
+
if critical_iou_index.numel() != 0:
|
93 |
+
full_box[0][4] = p[0][critical_iou_index][:,4]
|
94 |
+
full_box[0][6:] = p[0][critical_iou_index][:,6:]
|
95 |
+
p[0][critical_iou_index] = full_box
|
96 |
+
|
97 |
+
#proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
98 |
+
proto=torch.from_numpy(preds[-1])
|
99 |
+
for i, pred in enumerate(p):
|
100 |
+
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
|
101 |
+
path =img[0] #self.batch[0]
|
102 |
+
img_path = path[i] if isinstance(path, list) else path
|
103 |
+
if not len(pred): # save empty boxes
|
104 |
+
results.append(Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6]))
|
105 |
+
continue
|
106 |
+
if self.retina_masks:
|
107 |
+
if not isinstance(orig_imgs, torch.Tensor):
|
108 |
+
pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
109 |
+
masks = process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
|
110 |
+
else:
|
111 |
+
masks = process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
|
112 |
+
if not isinstance(orig_imgs, torch.Tensor):
|
113 |
+
pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
114 |
+
results.append(
|
115 |
+
Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6], masks=masks))
|
116 |
+
return results
|
117 |
+
|
118 |
+
def qnn_run(self, orig_imgs,img_path,args):
|
119 |
+
input_img_f =self.pretreat_img(orig_imgs) # 图片resize HWC
|
120 |
+
# print("qnn_input:",input_img_f)
|
121 |
+
# encoder texts
|
122 |
+
input_img = np.expand_dims(input_img_f, 0)
|
123 |
+
|
124 |
+
invoke_time=[]
|
125 |
+
for i in range(args.invoke_nums):
|
126 |
+
result = self.interpreter.set_input_tensor(0, input_img.data)
|
127 |
+
t0 = time.time()
|
128 |
+
result = self.interpreter.invoke()
|
129 |
+
t1 = time.time()
|
130 |
+
cost_time=(t1-t0)*1000
|
131 |
+
invoke_time.append(cost_time)
|
132 |
+
mask_ = self.interpreter.get_output_tensor(0)
|
133 |
+
concat_ = self.interpreter.get_output_tensor(1)
|
134 |
+
mul_ = self.interpreter.get_output_tensor(3)
|
135 |
+
split_ = self.interpreter.get_output_tensor(2)
|
136 |
+
mask_ = mask_.reshape( * self.out_shape[3])
|
137 |
+
mask_=mask_.transpose((0, 3, 1,2))
|
138 |
+
concat_ = concat_.reshape( *self.out_shape[2])
|
139 |
+
mul_ = mul_.reshape( *self.out_shape[1])
|
140 |
+
split_ = split_.reshape( *self.out_shape[0])
|
141 |
+
sig_ = cal_sigmoid(split_)
|
142 |
+
|
143 |
+
output_concat = np.concatenate((mul_,sig_),axis=1)
|
144 |
+
output_concat = np.concatenate((output_concat,concat_),axis=1)
|
145 |
+
|
146 |
+
# outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
|
147 |
+
## time 统计
|
148 |
+
max_invoke_time = max(invoke_time)
|
149 |
+
min_invoke_time = min(invoke_time)
|
150 |
+
mean_invoke_time = sum(invoke_time)/args.invoke_nums
|
151 |
+
var_invoketime=np.var(invoke_time)
|
152 |
+
print("========================================")
|
153 |
+
print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
154 |
+
print("========================================")
|
155 |
+
|
156 |
+
qnn_out = [np.array(output_concat),np.array(mask_)]
|
157 |
+
# print("qnn predict out:",qnn_out)
|
158 |
+
|
159 |
+
nchw_img = input_img.transpose(0,3,1,2)
|
160 |
+
everything_results = self.postprocess( qnn_out, nchw_img, [orig_imgs])
|
161 |
+
# print("everything_results: ",everything_results)
|
162 |
+
|
163 |
+
prompt_process = FastSAMPrompt(args.imgs, everything_results, device="cpu")
|
164 |
+
|
165 |
+
# ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
|
166 |
+
try:
|
167 |
+
if args.point_prompt ==[[0,0]]:
|
168 |
+
ann = prompt_process.everything_prompt()
|
169 |
+
else:
|
170 |
+
ann = prompt_process.point_prompt(points=args.point_prompt, pointlabel=[1])
|
171 |
+
out_name = os.path.basename(img_path).split(".")[0]
|
172 |
+
if True: # savepic
|
173 |
+
outpath = "python/"
|
174 |
+
if not os.path.exists(outpath):
|
175 |
+
os.mkdir(outpath)
|
176 |
+
prompt_process.plot(
|
177 |
+
annotations=ann,
|
178 |
+
output_path=os.path.join(outpath,out_name+"_result_int8.jpg"),
|
179 |
+
mask_random_color=True,
|
180 |
+
better_quality=True,
|
181 |
+
retina=False,
|
182 |
+
withContours=True,
|
183 |
+
)
|
184 |
+
else:
|
185 |
+
plt.figure()
|
186 |
+
prompt_process.fast_show_mask(annotation=ann,
|
187 |
+
ax = plt)
|
188 |
+
except Exception as e:
|
189 |
+
print(f"Waning : An error occurred in the picture {img_path} prediction -{e}")
|
190 |
+
return [mask_.reshape(-1),output_concat.reshape(-1)]
|
191 |
+
|
192 |
+
|
193 |
+
|
194 |
+
def parser_args():
|
195 |
+
parser = argparse.ArgumentParser(description="Run model benchmarks")
|
196 |
+
parser.add_argument('--target_model',type=str,default='models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin',help="inference model path")
|
197 |
+
parser.add_argument('--source_model',type=str,default='models/fastsam_s.onnx',help="original model path")
|
198 |
+
parser.add_argument('--imgs',type=str,default='python/dogs.jpg',help="Predict images path")
|
199 |
+
parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
|
200 |
+
parser.add_argument('--point_prompt',type=str,default="[[0,0]]",help="example:[[x1,y1],[x2,y2]]")
|
201 |
+
args = parser.parse_args()
|
202 |
+
return args
|
203 |
+
|
204 |
+
|
205 |
+
if __name__ == "__main__":
|
206 |
+
args = parser_args()
|
207 |
+
inputshape=[[1,640,640,3]]
|
208 |
+
outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
|
209 |
+
args.point_prompt = ast.literal_eval(args.point_prompt)
|
210 |
+
|
211 |
+
predict = qnn_predict(inputshape,outputshape,args)
|
212 |
+
if os.path.isdir(args.imgs):
|
213 |
+
img_files = os.listdir(args.imgs)
|
214 |
+
for fi in img_files:
|
215 |
+
img_path = os.path.join(args.imgs,fi)
|
216 |
+
im0s = cv2.imread(img_path) # BGR
|
217 |
+
im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
|
218 |
+
predict.qnn_run(im0s,img_path,args)
|
219 |
+
else:
|
220 |
+
img_path = args.imgs
|
221 |
+
im0s = cv2.imread(img_path) # BGR
|
222 |
+
im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
|
223 |
+
qnn_result = predict.qnn_run(im0s,img_path,args)
|
224 |
+
print("Prediction completion and the results are saved !")
|
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/tools_pt.py
ADDED
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import torchvision
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
def clip_boxes(boxes, shape):
|
10 |
+
"""
|
11 |
+
Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
boxes (torch.Tensor): the bounding boxes to clip
|
15 |
+
shape (tuple): the shape of the image
|
16 |
+
"""
|
17 |
+
if isinstance(boxes, torch.Tensor): # faster individually
|
18 |
+
boxes[..., 0].clamp_(0, shape[1]) # x1
|
19 |
+
boxes[..., 1].clamp_(0, shape[0]) # y1
|
20 |
+
boxes[..., 2].clamp_(0, shape[1]) # x2
|
21 |
+
boxes[..., 3].clamp_(0, shape[0]) # y2
|
22 |
+
else: # np.array (faster grouped)
|
23 |
+
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
|
24 |
+
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
25 |
+
|
26 |
+
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
|
27 |
+
"""
|
28 |
+
Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
|
29 |
+
(img1_shape) to the shape of a different image (img0_shape).
|
30 |
+
|
31 |
+
Args:
|
32 |
+
img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
|
33 |
+
boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
|
34 |
+
img0_shape (tuple): the shape of the target image, in the format of (height, width).
|
35 |
+
ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
|
36 |
+
calculated based on the size difference between the two images.
|
37 |
+
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
38 |
+
rescaling.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
|
42 |
+
"""
|
43 |
+
if ratio_pad is None: # calculate from img0_shape
|
44 |
+
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
45 |
+
pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
|
46 |
+
(img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding
|
47 |
+
else:
|
48 |
+
gain = ratio_pad[0][0]
|
49 |
+
pad = ratio_pad[1]
|
50 |
+
|
51 |
+
if padding:
|
52 |
+
boxes[..., [0, 2]] -= pad[0] # x padding
|
53 |
+
boxes[..., [1, 3]] -= pad[1] # y padding
|
54 |
+
boxes[..., :4] /= gain
|
55 |
+
clip_boxes(boxes, img0_shape)
|
56 |
+
return boxes
|
57 |
+
|
58 |
+
|
59 |
+
def xywh2xyxy(x):
|
60 |
+
"""
|
61 |
+
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
|
62 |
+
top-left corner and (x2, y2) is the bottom-right corner.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
|
69 |
+
"""
|
70 |
+
assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
|
71 |
+
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
|
72 |
+
dw = x[..., 2] / 2 # half-width
|
73 |
+
dh = x[..., 3] / 2 # half-height
|
74 |
+
y[..., 0] = x[..., 0] - dw # top left x
|
75 |
+
y[..., 1] = x[..., 1] - dh # top left y
|
76 |
+
y[..., 2] = x[..., 0] + dw # bottom right x
|
77 |
+
y[..., 3] = x[..., 1] + dh # bottom right y
|
78 |
+
return y
|
79 |
+
|
80 |
+
|
81 |
+
def non_max_suppression(
|
82 |
+
prediction,
|
83 |
+
conf_thres=0.25,
|
84 |
+
iou_thres=0.45,
|
85 |
+
classes=None,
|
86 |
+
agnostic=False,
|
87 |
+
multi_label=False,
|
88 |
+
labels=(),
|
89 |
+
max_det=300,
|
90 |
+
nc=0, # number of classes (optional)
|
91 |
+
max_time_img=0.05,
|
92 |
+
max_nms=30000,
|
93 |
+
max_wh=7680,
|
94 |
+
):
|
95 |
+
"""
|
96 |
+
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
|
97 |
+
|
98 |
+
Args:
|
99 |
+
prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
|
100 |
+
containing the predicted boxes, classes, and masks. The tensor should be in the format
|
101 |
+
output by a model, such as YOLO.
|
102 |
+
conf_thres (float): The confidence threshold below which boxes will be filtered out.
|
103 |
+
Valid values are between 0.0 and 1.0.
|
104 |
+
iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
|
105 |
+
Valid values are between 0.0 and 1.0.
|
106 |
+
classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
|
107 |
+
agnostic (bool): If True, the model is agnostic to the number of classes, and all
|
108 |
+
classes will be considered as one.
|
109 |
+
multi_label (bool): If True, each box may have multiple labels.
|
110 |
+
labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
|
111 |
+
list contains the apriori labels for a given image. The list should be in the format
|
112 |
+
output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
|
113 |
+
max_det (int): The maximum number of boxes to keep after NMS.
|
114 |
+
nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
|
115 |
+
max_time_img (float): The maximum time (seconds) for processing one image.
|
116 |
+
max_nms (int): The maximum number of boxes into torchvision.ops.nms().
|
117 |
+
max_wh (int): The maximum box width and height in pixels
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
(List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
|
121 |
+
shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
|
122 |
+
(x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
|
123 |
+
"""
|
124 |
+
|
125 |
+
# Checks
|
126 |
+
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
127 |
+
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
128 |
+
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
|
129 |
+
prediction = prediction[0] # select only inference output
|
130 |
+
|
131 |
+
device = prediction.device
|
132 |
+
mps = 'mps' in device.type # Apple MPS
|
133 |
+
if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
|
134 |
+
prediction = prediction.cpu()
|
135 |
+
bs = prediction.shape[0] # batch size
|
136 |
+
nc = nc or (prediction.shape[1] - 4) # number of classes
|
137 |
+
nm = prediction.shape[1] - nc - 4
|
138 |
+
mi = 4 + nc # mask start index
|
139 |
+
xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
|
140 |
+
|
141 |
+
# Settings
|
142 |
+
# min_wh = 2 # (pixels) minimum box width and height
|
143 |
+
time_limit = 0.5 + max_time_img * bs # seconds to quit after
|
144 |
+
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
145 |
+
|
146 |
+
prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
|
147 |
+
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
|
148 |
+
|
149 |
+
t = time.time()
|
150 |
+
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
|
151 |
+
for xi, x in enumerate(prediction): # image index, image inference
|
152 |
+
# Apply constraints
|
153 |
+
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
154 |
+
x = x[xc[xi]] # confidence
|
155 |
+
|
156 |
+
# Cat apriori labels if autolabelling
|
157 |
+
if labels and len(labels[xi]):
|
158 |
+
lb = labels[xi]
|
159 |
+
v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
|
160 |
+
v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
|
161 |
+
v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
|
162 |
+
x = torch.cat((x, v), 0)
|
163 |
+
|
164 |
+
# If none remain process next image
|
165 |
+
if not x.shape[0]:
|
166 |
+
continue
|
167 |
+
|
168 |
+
# Detections matrix nx6 (xyxy, conf, cls)
|
169 |
+
box, cls, mask = x.split((4, nc, nm), 1)
|
170 |
+
|
171 |
+
if multi_label:
|
172 |
+
i, j = torch.where(cls > conf_thres)
|
173 |
+
x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
|
174 |
+
else: # best class only
|
175 |
+
conf, j = cls.max(1, keepdim=True)
|
176 |
+
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
|
177 |
+
|
178 |
+
# Filter by class
|
179 |
+
if classes is not None:
|
180 |
+
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
181 |
+
|
182 |
+
# Check shape
|
183 |
+
n = x.shape[0] # number of boxes
|
184 |
+
if not n: # no boxes
|
185 |
+
continue
|
186 |
+
if n > max_nms: # excess boxes
|
187 |
+
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
|
188 |
+
|
189 |
+
# Batched NMS
|
190 |
+
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
191 |
+
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
192 |
+
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
193 |
+
i = i[:max_det] # limit detections
|
194 |
+
|
195 |
+
# # Experimental
|
196 |
+
# merge = False # use merge-NMS
|
197 |
+
# if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
198 |
+
# # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
199 |
+
# from .metrics import box_iou
|
200 |
+
# iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
201 |
+
# weights = iou * scores[None] # box weights
|
202 |
+
# x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
203 |
+
# redundant = True # require redundant detections
|
204 |
+
# if redundant:
|
205 |
+
# i = i[iou.sum(1) > 1] # require redundancy
|
206 |
+
|
207 |
+
output[xi] = x[i]
|
208 |
+
if mps:
|
209 |
+
output[xi] = output[xi].to(device)
|
210 |
+
# if (time.time() - t) > time_limit:
|
211 |
+
# LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
|
212 |
+
# break # time limit exceeded
|
213 |
+
|
214 |
+
return output
|
215 |
+
|
216 |
+
|
217 |
+
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
|
218 |
+
'''Adjust bounding boxes to stick to image border if they are within a certain threshold.
|
219 |
+
Args:
|
220 |
+
boxes: (n, 4)
|
221 |
+
image_shape: (height, width)
|
222 |
+
threshold: pixel threshold
|
223 |
+
Returns:
|
224 |
+
adjusted_boxes: adjusted bounding boxes
|
225 |
+
'''
|
226 |
+
|
227 |
+
# Image dimensions
|
228 |
+
h, w = image_shape
|
229 |
+
|
230 |
+
# Adjust boxes
|
231 |
+
boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
|
232 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
|
233 |
+
boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
|
234 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
|
235 |
+
boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
|
236 |
+
w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
|
237 |
+
boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
|
238 |
+
h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
|
239 |
+
|
240 |
+
return boxes
|
241 |
+
|
242 |
+
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
|
243 |
+
'''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
|
244 |
+
Args:
|
245 |
+
box1: (4, )
|
246 |
+
boxes: (n, 4)
|
247 |
+
Returns:
|
248 |
+
high_iou_indices: Indices of boxes with IoU > thres
|
249 |
+
'''
|
250 |
+
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
|
251 |
+
# obtain coordinates for intersections
|
252 |
+
x1 = torch.max(box1[0], boxes[:, 0])
|
253 |
+
y1 = torch.max(box1[1], boxes[:, 1])
|
254 |
+
x2 = torch.min(box1[2], boxes[:, 2])
|
255 |
+
y2 = torch.min(box1[3], boxes[:, 3])
|
256 |
+
|
257 |
+
# compute the area of intersection
|
258 |
+
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
259 |
+
|
260 |
+
# compute the area of both individual boxes
|
261 |
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
262 |
+
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
263 |
+
|
264 |
+
# compute the area of union
|
265 |
+
union = box1_area + box2_area - intersection
|
266 |
+
|
267 |
+
# compute the IoU
|
268 |
+
iou = intersection / union # Should be shape (n, )
|
269 |
+
if raw_output:
|
270 |
+
if iou.numel() == 0:
|
271 |
+
return 0
|
272 |
+
return iou
|
273 |
+
|
274 |
+
# get indices of boxes with IoU > thres
|
275 |
+
high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
|
276 |
+
|
277 |
+
return high_iou_indices
|
278 |
+
|
279 |
+
|
280 |
+
def scale_masks(masks, shape, padding=True):
|
281 |
+
"""
|
282 |
+
Rescale segment masks to shape.
|
283 |
+
|
284 |
+
Args:
|
285 |
+
masks (torch.Tensor): (N, C, H, W).
|
286 |
+
shape (tuple): Height and width.
|
287 |
+
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
288 |
+
rescaling.
|
289 |
+
"""
|
290 |
+
mh, mw = masks.shape[2:]
|
291 |
+
gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
|
292 |
+
pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding
|
293 |
+
if padding:
|
294 |
+
pad[0] /= 2
|
295 |
+
pad[1] /= 2
|
296 |
+
top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # y, x
|
297 |
+
bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
|
298 |
+
masks = masks[..., top:bottom, left:right]
|
299 |
+
|
300 |
+
masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
|
301 |
+
return masks
|
302 |
+
|
303 |
+
|
304 |
+
def process_mask_native(protos, masks_in, bboxes, shape):
|
305 |
+
"""
|
306 |
+
It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
|
307 |
+
|
308 |
+
Args:
|
309 |
+
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
|
310 |
+
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
|
311 |
+
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
|
312 |
+
shape (tuple): the size of the input image (h,w)
|
313 |
+
|
314 |
+
Returns:
|
315 |
+
masks (torch.Tensor): The returned masks with dimensions [h, w, n]
|
316 |
+
"""
|
317 |
+
c, mh, mw = protos.shape # CHW
|
318 |
+
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
|
319 |
+
masks = scale_masks(masks[None], shape)[0] # CHW
|
320 |
+
masks = crop_mask(masks, bboxes) # CHW
|
321 |
+
return masks.gt_(0.5)
|
322 |
+
|
323 |
+
def crop_mask(masks, boxes):
|
324 |
+
"""
|
325 |
+
It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
|
326 |
+
|
327 |
+
Args:
|
328 |
+
masks (torch.Tensor): [n, h, w] tensor of masks
|
329 |
+
boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
|
330 |
+
|
331 |
+
Returns:
|
332 |
+
(torch.Tensor): The masks are being cropped to the bounding box.
|
333 |
+
"""
|
334 |
+
_, h, w = masks.shape
|
335 |
+
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
|
336 |
+
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
|
337 |
+
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
|
338 |
+
|
339 |
+
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
|
340 |
+
|
341 |
+
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
342 |
+
"""
|
343 |
+
Apply masks to bounding boxes using the output of the mask head.
|
344 |
+
|
345 |
+
Args:
|
346 |
+
protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
|
347 |
+
masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
|
348 |
+
bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
|
349 |
+
shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
|
350 |
+
upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
|
351 |
+
|
352 |
+
Returns:
|
353 |
+
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
|
354 |
+
are the height and width of the input image. The mask is applied to the bounding boxes.
|
355 |
+
"""
|
356 |
+
|
357 |
+
c, mh, mw = protos.shape # CHW
|
358 |
+
ih, iw = shape
|
359 |
+
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
|
360 |
+
|
361 |
+
downsampled_bboxes = bboxes.clone()
|
362 |
+
downsampled_bboxes[:, 0] *= mw / iw
|
363 |
+
downsampled_bboxes[:, 2] *= mw / iw
|
364 |
+
downsampled_bboxes[:, 3] *= mh / ih
|
365 |
+
downsampled_bboxes[:, 1] *= mh / ih
|
366 |
+
|
367 |
+
masks = crop_mask(masks, downsampled_bboxes) # CHW
|
368 |
+
if upsample:
|
369 |
+
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
|
370 |
+
return masks.gt_(0.5)
|
371 |
+
|
372 |
+
|
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/utils.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
|
6 |
+
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
|
7 |
+
'''Adjust bounding boxes to stick to image border if they are within a certain threshold.
|
8 |
+
Args:
|
9 |
+
boxes: (n, 4)
|
10 |
+
image_shape: (height, width)
|
11 |
+
threshold: pixel threshold
|
12 |
+
Returns:
|
13 |
+
adjusted_boxes: adjusted bounding boxes
|
14 |
+
'''
|
15 |
+
|
16 |
+
# Image dimensions
|
17 |
+
h, w = image_shape
|
18 |
+
|
19 |
+
# Adjust boxes
|
20 |
+
boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
|
21 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
|
22 |
+
boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
|
23 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
|
24 |
+
boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
|
25 |
+
w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
|
26 |
+
boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
|
27 |
+
h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
|
28 |
+
|
29 |
+
return boxes
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
def convert_box_xywh_to_xyxy(box):
|
34 |
+
x1 = box[0]
|
35 |
+
y1 = box[1]
|
36 |
+
x2 = box[0] + box[2]
|
37 |
+
y2 = box[1] + box[3]
|
38 |
+
return [x1, y1, x2, y2]
|
39 |
+
|
40 |
+
|
41 |
+
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
|
42 |
+
'''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
|
43 |
+
Args:
|
44 |
+
box1: (4, )
|
45 |
+
boxes: (n, 4)
|
46 |
+
Returns:
|
47 |
+
high_iou_indices: Indices of boxes with IoU > thres
|
48 |
+
'''
|
49 |
+
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
|
50 |
+
# obtain coordinates for intersections
|
51 |
+
x1 = torch.max(box1[0], boxes[:, 0])
|
52 |
+
y1 = torch.max(box1[1], boxes[:, 1])
|
53 |
+
x2 = torch.min(box1[2], boxes[:, 2])
|
54 |
+
y2 = torch.min(box1[3], boxes[:, 3])
|
55 |
+
|
56 |
+
# compute the area of intersection
|
57 |
+
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
58 |
+
|
59 |
+
# compute the area of both individual boxes
|
60 |
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
61 |
+
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
62 |
+
|
63 |
+
# compute the area of union
|
64 |
+
union = box1_area + box2_area - intersection
|
65 |
+
|
66 |
+
# compute the IoU
|
67 |
+
iou = intersection / union # Should be shape (n, )
|
68 |
+
if raw_output:
|
69 |
+
if iou.numel() == 0:
|
70 |
+
return 0
|
71 |
+
return iou
|
72 |
+
|
73 |
+
# get indices of boxes with IoU > thres
|
74 |
+
high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
|
75 |
+
|
76 |
+
return high_iou_indices
|
77 |
+
|
78 |
+
|
79 |
+
def image_to_np_ndarray(image):
|
80 |
+
if type(image) is str:
|
81 |
+
return np.array(Image.open(image))
|
82 |
+
elif issubclass(type(image), Image.Image):
|
83 |
+
return np.array(image)
|
84 |
+
elif type(image) is np.ndarray:
|
85 |
+
return image
|
86 |
+
return None
|
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/README.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
## Source model
|
3 |
+
- Input shape: 640x640
|
4 |
+
- Number of parameters: 11.24M
|
5 |
+
- Model size: 45.0M
|
6 |
+
- Output shape: 1x37x8400,1x32x160x160
|
7 |
+
|
8 |
+
Source model repository: [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: FP16
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: SNM972 QCS8550
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- Install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
```bash
|
43 |
+
cd fastsam_s/model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite
|
44 |
+
export LD_PRELOAD=/home/aidlux/.local/lib/python3.8/site-packages/torch/lib/../../torch.libs/libgomp-804f19d4.so.1.0.0
|
45 |
+
|
46 |
+
python3 ./python/run_test.py --target_model ./models/cutoff_fastsam_s_fp16.qnn216.ctx.bin --imgs ./python/dogs.jpg --invoke_nums 10
|
47 |
+
```
|
48 |
+
|
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/models/cutoff_fastsam_s_fp16.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1efdad137601411928d741ca90c811c07d92709de49676660b225438d7357aa9
|
3 |
+
size 24249816
|
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/dogs.jpg
ADDED
![]() |
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/onnx_export.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import cv2
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
|
6 |
+
from ultralytics.models.fastsam import FastSAM
|
7 |
+
|
8 |
+
class Fast_SAM(torch.nn.Module):
|
9 |
+
"""Exportable FastSAM model, end-to-end."""
|
10 |
+
|
11 |
+
def __init__(self) -> None:
|
12 |
+
super().__init__()
|
13 |
+
pt_name ='./models/FastSAM-s.pt'
|
14 |
+
self.model =FastSAM(pt_name).model
|
15 |
+
|
16 |
+
def forward(self, image: torch.Tensor):
|
17 |
+
"""
|
18 |
+
Run FastSAM on `image`, and produce high quality segmentation masks.
|
19 |
+
Faster than SAM as it is based on YOLOv8.
|
20 |
+
|
21 |
+
Parameters:
|
22 |
+
image: Pixel values pre-processed for encoder consumption.
|
23 |
+
Range: float[0, 1]
|
24 |
+
3-channel Color Space: BGR
|
25 |
+
Returns:
|
26 |
+
|
27 |
+
"""
|
28 |
+
predictions = self.model(image)
|
29 |
+
# Return predictions as a tuple instead of nested tuple.
|
30 |
+
return (predictions[0], predictions[1][2])
|
31 |
+
|
32 |
+
|
33 |
+
model = Fast_SAM()
|
34 |
+
num_params = sum(p.numel() for p in model.parameters())
|
35 |
+
print(f'Number of FastSAM-s parameters: {num_params}')
|
36 |
+
dummy_input = torch.randn( [1,3,640,640],dtype=torch.float32 )
|
37 |
+
source_model = torch.jit.trace(
|
38 |
+
model.to("cpu"), dummy_input, check_trace=False
|
39 |
+
)
|
40 |
+
torch.onnx.export(model, # model being run
|
41 |
+
dummy_input, # model input (or a tuple for multiple inputs)
|
42 |
+
"./models/fastsam_s.onnx", # where to save the model
|
43 |
+
export_params=True, # store the trained parameter weights inside the model file
|
44 |
+
opset_version=12, # the ONNX version to export the model to
|
45 |
+
do_constant_folding=True, # whether to execute constant folding for optimization
|
46 |
+
input_names = ['input'], # the model's input names
|
47 |
+
output_names = ['boxes','mask'],
|
48 |
+
verbose=True,
|
49 |
+
)
|
50 |
+
print("Convert to onnx successfully!")
|
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/prompt.py
ADDED
@@ -0,0 +1,456 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import cv2
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
from utils import image_to_np_ndarray
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
|
11 |
+
class FastSAMPrompt:
|
12 |
+
|
13 |
+
def __init__(self, image, results, device='cpu'):
|
14 |
+
if isinstance(image, str) or isinstance(image, Image.Image):
|
15 |
+
image = image_to_np_ndarray(image)
|
16 |
+
self.device = device
|
17 |
+
self.results = results
|
18 |
+
self.img = image
|
19 |
+
|
20 |
+
def _segment_image(self, image, bbox):
|
21 |
+
if isinstance(image, Image.Image):
|
22 |
+
image_array = np.array(image)
|
23 |
+
else:
|
24 |
+
image_array = image
|
25 |
+
segmented_image_array = np.zeros_like(image_array)
|
26 |
+
x1, y1, x2, y2 = bbox
|
27 |
+
segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2]
|
28 |
+
segmented_image = Image.fromarray(segmented_image_array)
|
29 |
+
black_image = Image.new('RGB', image.size, (255, 255, 255))
|
30 |
+
# transparency_mask = np.zeros_like((), dtype=np.uint8)
|
31 |
+
transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8)
|
32 |
+
transparency_mask[y1:y2, x1:x2] = 255
|
33 |
+
transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
|
34 |
+
black_image.paste(segmented_image, mask=transparency_mask_image)
|
35 |
+
return black_image
|
36 |
+
|
37 |
+
def _format_results(self, result, filter=0):
|
38 |
+
annotations = []
|
39 |
+
n = len(result.masks.data)
|
40 |
+
for i in range(n):
|
41 |
+
annotation = {}
|
42 |
+
mask = result.masks.data[i] == 1.0
|
43 |
+
|
44 |
+
if torch.sum(mask) < filter:
|
45 |
+
continue
|
46 |
+
annotation['id'] = i
|
47 |
+
annotation['segmentation'] = mask.cpu().numpy()
|
48 |
+
annotation['bbox'] = result.boxes.data[i]
|
49 |
+
annotation['score'] = result.boxes.conf[i]
|
50 |
+
annotation['area'] = annotation['segmentation'].sum()
|
51 |
+
annotations.append(annotation)
|
52 |
+
return annotations
|
53 |
+
|
54 |
+
def filter_masks(annotations): # filte the overlap mask
|
55 |
+
annotations.sort(key=lambda x: x['area'], reverse=True)
|
56 |
+
to_remove = set()
|
57 |
+
for i in range(0, len(annotations)):
|
58 |
+
a = annotations[i]
|
59 |
+
for j in range(i + 1, len(annotations)):
|
60 |
+
b = annotations[j]
|
61 |
+
if i != j and j not in to_remove:
|
62 |
+
# check if
|
63 |
+
if b['area'] < a['area']:
|
64 |
+
if (a['segmentation'] & b['segmentation']).sum() / b['segmentation'].sum() > 0.8:
|
65 |
+
to_remove.add(j)
|
66 |
+
|
67 |
+
return [a for i, a in enumerate(annotations) if i not in to_remove], to_remove
|
68 |
+
|
69 |
+
def _get_bbox_from_mask(self, mask):
|
70 |
+
mask = mask.astype(np.uint8)
|
71 |
+
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
72 |
+
x1, y1, w, h = cv2.boundingRect(contours[0])
|
73 |
+
x2, y2 = x1 + w, y1 + h
|
74 |
+
if len(contours) > 1:
|
75 |
+
for b in contours:
|
76 |
+
x_t, y_t, w_t, h_t = cv2.boundingRect(b)
|
77 |
+
# Merge multiple bounding boxes into one.
|
78 |
+
x1 = min(x1, x_t)
|
79 |
+
y1 = min(y1, y_t)
|
80 |
+
x2 = max(x2, x_t + w_t)
|
81 |
+
y2 = max(y2, y_t + h_t)
|
82 |
+
h = y2 - y1
|
83 |
+
w = x2 - x1
|
84 |
+
return [x1, y1, x2, y2]
|
85 |
+
|
86 |
+
def plot_to_result(self,
|
87 |
+
annotations,
|
88 |
+
bboxes=None,
|
89 |
+
points=None,
|
90 |
+
point_label=None,
|
91 |
+
mask_random_color=True,
|
92 |
+
better_quality=True,
|
93 |
+
retina=False,
|
94 |
+
withContours=True) -> np.ndarray:
|
95 |
+
if isinstance(annotations[0], dict):
|
96 |
+
annotations = [annotation['segmentation'] for annotation in annotations]
|
97 |
+
image = self.img
|
98 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
99 |
+
original_h = image.shape[0]
|
100 |
+
original_w = image.shape[1]
|
101 |
+
if sys.platform == "darwin":
|
102 |
+
plt.switch_backend("TkAgg")
|
103 |
+
plt.figure(figsize=(original_w / 100, original_h / 100))
|
104 |
+
# Add subplot with no margin.
|
105 |
+
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
|
106 |
+
plt.margins(0, 0)
|
107 |
+
plt.gca().xaxis.set_major_locator(plt.NullLocator())
|
108 |
+
plt.gca().yaxis.set_major_locator(plt.NullLocator())
|
109 |
+
|
110 |
+
plt.imshow(image)
|
111 |
+
if better_quality:
|
112 |
+
if isinstance(annotations[0], torch.Tensor):
|
113 |
+
annotations = np.array(annotations.cpu())
|
114 |
+
for i, mask in enumerate(annotations):
|
115 |
+
mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
|
116 |
+
annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
|
117 |
+
if self.device == 'cpu':
|
118 |
+
annotations = np.array(annotations)
|
119 |
+
self.fast_show_mask(
|
120 |
+
annotations,
|
121 |
+
plt.gca(),
|
122 |
+
random_color=mask_random_color,
|
123 |
+
bboxes=bboxes,
|
124 |
+
points=points,
|
125 |
+
pointlabel=point_label,
|
126 |
+
retinamask=retina,
|
127 |
+
target_height=original_h,
|
128 |
+
target_width=original_w,
|
129 |
+
)
|
130 |
+
else:
|
131 |
+
if isinstance(annotations[0], np.ndarray):
|
132 |
+
annotations = torch.from_numpy(annotations)
|
133 |
+
self.fast_show_mask_gpu(
|
134 |
+
annotations,
|
135 |
+
plt.gca(),
|
136 |
+
random_color=mask_random_color,
|
137 |
+
bboxes=bboxes,
|
138 |
+
points=points,
|
139 |
+
pointlabel=point_label,
|
140 |
+
retinamask=retina,
|
141 |
+
target_height=original_h,
|
142 |
+
target_width=original_w,
|
143 |
+
)
|
144 |
+
if isinstance(annotations, torch.Tensor):
|
145 |
+
annotations = annotations.cpu().numpy()
|
146 |
+
if withContours:
|
147 |
+
contour_all = []
|
148 |
+
temp = np.zeros((original_h, original_w, 1))
|
149 |
+
for i, mask in enumerate(annotations):
|
150 |
+
if type(mask) == dict:
|
151 |
+
mask = mask['segmentation']
|
152 |
+
annotation = mask.astype(np.uint8)
|
153 |
+
if not retina:
|
154 |
+
annotation = cv2.resize(
|
155 |
+
annotation,
|
156 |
+
(original_w, original_h),
|
157 |
+
interpolation=cv2.INTER_NEAREST,
|
158 |
+
)
|
159 |
+
contours, hierarchy = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
160 |
+
for contour in contours:
|
161 |
+
contour_all.append(contour)
|
162 |
+
cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
|
163 |
+
color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
|
164 |
+
contour_mask = temp / 255 * color.reshape(1, 1, -1)
|
165 |
+
plt.imshow(contour_mask)
|
166 |
+
|
167 |
+
plt.axis('off')
|
168 |
+
fig = plt.gcf()
|
169 |
+
plt.draw()
|
170 |
+
|
171 |
+
try:
|
172 |
+
buf = fig.canvas.tostring_rgb()
|
173 |
+
except AttributeError:
|
174 |
+
fig.canvas.draw()
|
175 |
+
buf = fig.canvas.tostring_rgb()
|
176 |
+
cols, rows = fig.canvas.get_width_height()
|
177 |
+
img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
|
178 |
+
result = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
179 |
+
plt.close()
|
180 |
+
return result
|
181 |
+
|
182 |
+
# Remark for refactoring: IMO a function should do one thing only, storing the image and plotting should be seperated and do not necessarily need to be class functions but standalone utility functions that the user can chain in his scripts to have more fine-grained control.
|
183 |
+
def plot(self,
|
184 |
+
annotations,
|
185 |
+
output_path,
|
186 |
+
bboxes=None,
|
187 |
+
points=None,
|
188 |
+
point_label=None,
|
189 |
+
mask_random_color=True,
|
190 |
+
better_quality=True,
|
191 |
+
retina=False,
|
192 |
+
withContours=True):
|
193 |
+
if len(annotations) == 0:
|
194 |
+
return None
|
195 |
+
result = self.plot_to_result(
|
196 |
+
annotations,
|
197 |
+
bboxes,
|
198 |
+
points,
|
199 |
+
point_label,
|
200 |
+
mask_random_color,
|
201 |
+
better_quality,
|
202 |
+
retina,
|
203 |
+
withContours,
|
204 |
+
)
|
205 |
+
|
206 |
+
path = os.path.dirname(os.path.abspath(output_path))
|
207 |
+
if not os.path.exists(path):
|
208 |
+
os.makedirs(path)
|
209 |
+
result = result[:, :, ::-1]
|
210 |
+
cv2.imwrite(output_path, result)
|
211 |
+
|
212 |
+
# CPU post process
|
213 |
+
def fast_show_mask(
|
214 |
+
self,
|
215 |
+
annotation,
|
216 |
+
ax,
|
217 |
+
random_color=False,
|
218 |
+
bboxes=None,
|
219 |
+
points=None,
|
220 |
+
pointlabel=None,
|
221 |
+
retinamask=True,
|
222 |
+
target_height=960,
|
223 |
+
target_width=960,
|
224 |
+
):
|
225 |
+
msak_sum = annotation.shape[0]
|
226 |
+
height = annotation.shape[1]
|
227 |
+
weight = annotation.shape[2]
|
228 |
+
#Sort annotations based on area.
|
229 |
+
areas = np.sum(annotation, axis=(1, 2))
|
230 |
+
sorted_indices = np.argsort(areas)
|
231 |
+
annotation = annotation[sorted_indices]
|
232 |
+
|
233 |
+
index = (annotation != 0).argmax(axis=0)
|
234 |
+
if random_color:
|
235 |
+
color = np.random.random((msak_sum, 1, 1, 3))
|
236 |
+
else:
|
237 |
+
color = np.ones((msak_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 255 / 255])
|
238 |
+
transparency = np.ones((msak_sum, 1, 1, 1)) * 0.6
|
239 |
+
visual = np.concatenate([color, transparency], axis=-1)
|
240 |
+
mask_image = np.expand_dims(annotation, -1) * visual
|
241 |
+
|
242 |
+
show = np.zeros((height, weight, 4))
|
243 |
+
h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing='ij')
|
244 |
+
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
|
245 |
+
# Use vectorized indexing to update the values of 'show'.
|
246 |
+
show[h_indices, w_indices, :] = mask_image[indices]
|
247 |
+
if bboxes is not None:
|
248 |
+
for bbox in bboxes:
|
249 |
+
x1, y1, x2, y2 = bbox
|
250 |
+
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
|
251 |
+
# draw point
|
252 |
+
if points is not None:
|
253 |
+
plt.scatter(
|
254 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
|
255 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
|
256 |
+
s=20,
|
257 |
+
c='y',
|
258 |
+
)
|
259 |
+
plt.scatter(
|
260 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
|
261 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
|
262 |
+
s=20,
|
263 |
+
c='m',
|
264 |
+
)
|
265 |
+
|
266 |
+
if not retinamask:
|
267 |
+
show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
|
268 |
+
ax.imshow(show)
|
269 |
+
|
270 |
+
def fast_show_mask_gpu(
|
271 |
+
self,
|
272 |
+
annotation,
|
273 |
+
ax,
|
274 |
+
random_color=False,
|
275 |
+
bboxes=None,
|
276 |
+
points=None,
|
277 |
+
pointlabel=None,
|
278 |
+
retinamask=True,
|
279 |
+
target_height=960,
|
280 |
+
target_width=960,
|
281 |
+
):
|
282 |
+
msak_sum = annotation.shape[0]
|
283 |
+
height = annotation.shape[1]
|
284 |
+
weight = annotation.shape[2]
|
285 |
+
areas = torch.sum(annotation, dim=(1, 2))
|
286 |
+
sorted_indices = torch.argsort(areas, descending=False)
|
287 |
+
annotation = annotation[sorted_indices]
|
288 |
+
# Find the index of the first non-zero value at each position.
|
289 |
+
index = (annotation != 0).to(torch.long).argmax(dim=0)
|
290 |
+
if random_color:
|
291 |
+
color = torch.rand((msak_sum, 1, 1, 3)).to(annotation.device)
|
292 |
+
else:
|
293 |
+
color = torch.ones((msak_sum, 1, 1, 3)).to(annotation.device) * torch.tensor([
|
294 |
+
30 / 255, 144 / 255, 255 / 255]).to(annotation.device)
|
295 |
+
transparency = torch.ones((msak_sum, 1, 1, 1)).to(annotation.device) * 0.6
|
296 |
+
visual = torch.cat([color, transparency], dim=-1)
|
297 |
+
mask_image = torch.unsqueeze(annotation, -1) * visual
|
298 |
+
# Select data according to the index. The index indicates which batch's data to choose at each position, converting the mask_image into a single batch form.
|
299 |
+
show = torch.zeros((height, weight, 4)).to(annotation.device)
|
300 |
+
try:
|
301 |
+
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight), indexing='ij')
|
302 |
+
except:
|
303 |
+
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight))
|
304 |
+
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
|
305 |
+
# Use vectorized indexing to update the values of 'show'.
|
306 |
+
show[h_indices, w_indices, :] = mask_image[indices]
|
307 |
+
show_cpu = show.cpu().numpy()
|
308 |
+
if bboxes is not None:
|
309 |
+
for bbox in bboxes:
|
310 |
+
x1, y1, x2, y2 = bbox
|
311 |
+
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
|
312 |
+
# draw point
|
313 |
+
if points is not None:
|
314 |
+
plt.scatter(
|
315 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
|
316 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
|
317 |
+
s=20,
|
318 |
+
c='y',
|
319 |
+
)
|
320 |
+
plt.scatter(
|
321 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
|
322 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
|
323 |
+
s=20,
|
324 |
+
c='m',
|
325 |
+
)
|
326 |
+
if not retinamask:
|
327 |
+
show_cpu = cv2.resize(show_cpu, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
|
328 |
+
ax.imshow(show_cpu)
|
329 |
+
|
330 |
+
# clip
|
331 |
+
@torch.no_grad()
|
332 |
+
def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
|
333 |
+
preprocessed_images = [preprocess(image).to(device) for image in elements]
|
334 |
+
try:
|
335 |
+
import clip # for linear_assignment
|
336 |
+
|
337 |
+
except (ImportError, AssertionError, AttributeError):
|
338 |
+
from ultralytics.yolo.utils.checks import check_requirements
|
339 |
+
|
340 |
+
check_requirements('git+https://github.com/openai/CLIP.git') # required before installing lap from source
|
341 |
+
import clip
|
342 |
+
|
343 |
+
|
344 |
+
tokenized_text = clip.tokenize([search_text]).to(device)
|
345 |
+
stacked_images = torch.stack(preprocessed_images)
|
346 |
+
image_features = model.encode_image(stacked_images)
|
347 |
+
text_features = model.encode_text(tokenized_text)
|
348 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
349 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
350 |
+
probs = 100.0 * image_features @ text_features.T
|
351 |
+
return probs[:, 0].softmax(dim=0)
|
352 |
+
|
353 |
+
def _crop_image(self, format_results):
|
354 |
+
|
355 |
+
image = Image.fromarray(cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB))
|
356 |
+
ori_w, ori_h = image.size
|
357 |
+
annotations = format_results
|
358 |
+
mask_h, mask_w = annotations[0]['segmentation'].shape
|
359 |
+
if ori_w != mask_w or ori_h != mask_h:
|
360 |
+
image = image.resize((mask_w, mask_h))
|
361 |
+
cropped_boxes = []
|
362 |
+
cropped_images = []
|
363 |
+
not_crop = []
|
364 |
+
filter_id = []
|
365 |
+
# annotations, _ = filter_masks(annotations)
|
366 |
+
# filter_id = list(_)
|
367 |
+
for _, mask in enumerate(annotations):
|
368 |
+
if np.sum(mask['segmentation']) <= 100:
|
369 |
+
filter_id.append(_)
|
370 |
+
continue
|
371 |
+
bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox
|
372 |
+
cropped_boxes.append(self._segment_image(image, bbox))
|
373 |
+
# cropped_boxes.append(segment_image(image,mask["segmentation"]))
|
374 |
+
cropped_images.append(bbox) # Save the bounding box of the cropped image.
|
375 |
+
|
376 |
+
return cropped_boxes, cropped_images, not_crop, filter_id, annotations
|
377 |
+
|
378 |
+
def box_prompt(self, bbox=None, bboxes=None):
|
379 |
+
if self.results == None:
|
380 |
+
return []
|
381 |
+
assert bbox or bboxes
|
382 |
+
if bboxes is None:
|
383 |
+
bboxes = [bbox]
|
384 |
+
max_iou_index = []
|
385 |
+
for bbox in bboxes:
|
386 |
+
assert (bbox[2] != 0 and bbox[3] != 0)
|
387 |
+
masks = self.results[0].masks.data
|
388 |
+
target_height = self.img.shape[0]
|
389 |
+
target_width = self.img.shape[1]
|
390 |
+
h = masks.shape[1]
|
391 |
+
w = masks.shape[2]
|
392 |
+
if h != target_height or w != target_width:
|
393 |
+
bbox = [
|
394 |
+
int(bbox[0] * w / target_width),
|
395 |
+
int(bbox[1] * h / target_height),
|
396 |
+
int(bbox[2] * w / target_width),
|
397 |
+
int(bbox[3] * h / target_height), ]
|
398 |
+
bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
|
399 |
+
bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
|
400 |
+
bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
|
401 |
+
bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
|
402 |
+
|
403 |
+
# IoUs = torch.zeros(len(masks), dtype=torch.float32)
|
404 |
+
bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
|
405 |
+
|
406 |
+
masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2))
|
407 |
+
orig_masks_area = torch.sum(masks, dim=(1, 2))
|
408 |
+
|
409 |
+
union = bbox_area + orig_masks_area - masks_area
|
410 |
+
IoUs = masks_area / union
|
411 |
+
max_iou_index.append(int(torch.argmax(IoUs)))
|
412 |
+
max_iou_index = list(set(max_iou_index))
|
413 |
+
return np.array(masks[max_iou_index].cpu().numpy())
|
414 |
+
|
415 |
+
def point_prompt(self, points, pointlabel): # numpy
|
416 |
+
if self.results == None:
|
417 |
+
return []
|
418 |
+
masks = self._format_results(self.results[0], 0)
|
419 |
+
target_height = self.img.shape[0]
|
420 |
+
target_width = self.img.shape[1]
|
421 |
+
h = masks[0]['segmentation'].shape[0]
|
422 |
+
w = masks[0]['segmentation'].shape[1]
|
423 |
+
if h != target_height or w != target_width:
|
424 |
+
points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
|
425 |
+
onemask = np.zeros((h, w))
|
426 |
+
masks = sorted(masks, key=lambda x: x['area'], reverse=True)
|
427 |
+
for i, annotation in enumerate(masks):
|
428 |
+
if type(annotation) == dict:
|
429 |
+
mask = annotation['segmentation']
|
430 |
+
else:
|
431 |
+
mask = annotation
|
432 |
+
for i, point in enumerate(points):
|
433 |
+
if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
|
434 |
+
onemask[mask] = 1
|
435 |
+
if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
|
436 |
+
onemask[mask] = 0
|
437 |
+
onemask = onemask >= 1
|
438 |
+
return np.array([onemask])
|
439 |
+
|
440 |
+
def text_prompt(self, text):
|
441 |
+
if self.results == None:
|
442 |
+
return []
|
443 |
+
format_results = self._format_results(self.results[0], 0)
|
444 |
+
cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
|
445 |
+
clip_model, preprocess = clip.load('ViT-B/32', device=self.device)
|
446 |
+
scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device)
|
447 |
+
max_idx = scores.argsort()
|
448 |
+
max_idx = max_idx[-1]
|
449 |
+
max_idx += sum(np.array(filter_id) <= int(max_idx))
|
450 |
+
return np.array([annotations[max_idx]['segmentation']])
|
451 |
+
|
452 |
+
def everything_prompt(self):
|
453 |
+
if self.results == None:
|
454 |
+
return []
|
455 |
+
return self.results[0].masks.data
|
456 |
+
|
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/run_test.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import cv2
|
4 |
+
import numpy as np
|
5 |
+
import onnxruntime
|
6 |
+
import time
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import torch
|
9 |
+
from ultralytics.engine.results import Results
|
10 |
+
from tools_pt import *
|
11 |
+
from prompt import FastSAMPrompt
|
12 |
+
import aidlite
|
13 |
+
import argparse
|
14 |
+
import ast
|
15 |
+
|
16 |
+
# 定义相似度函数
|
17 |
+
def get_acc(onnx_out,other_out):
|
18 |
+
cosine_similarity=np.dot(np.array(onnx_out),np.array(other_out))/(np.linalg.norm(np.array(onnx_out)) * np.linalg.norm(np.array(other_out)))
|
19 |
+
return cosine_similarity
|
20 |
+
|
21 |
+
def cal_sigmoid(x):
|
22 |
+
return 1 / (1 + np.exp(-x))
|
23 |
+
|
24 |
+
class qnn_predict(object):
|
25 |
+
def __init__(self,inputshape,outputshape,args) -> None:
|
26 |
+
aidlite.set_log_level(aidlite.LogLevel.INFO)
|
27 |
+
aidlite.log_to_stderr()
|
28 |
+
print(f"Aidlite library version : {aidlite.get_library_version()}")
|
29 |
+
print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
|
30 |
+
config = aidlite.Config.create_instance()
|
31 |
+
if config is None:
|
32 |
+
print("Create model failed !")
|
33 |
+
config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
34 |
+
config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
35 |
+
config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
36 |
+
config.is_quantify_model = 1
|
37 |
+
|
38 |
+
model = aidlite.Model.create_instance(args.target_model)
|
39 |
+
if model is None:
|
40 |
+
print("Create model failed !")
|
41 |
+
|
42 |
+
self.input_shape=inputshape
|
43 |
+
self.out_shape = outputshape
|
44 |
+
model.set_model_properties(self.input_shape, aidlite.DataType.TYPE_FLOAT32, self.out_shape, aidlite.DataType.TYPE_FLOAT32)
|
45 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
|
46 |
+
if self.interpreter is None:
|
47 |
+
print("build_interpretper_from_model_and_config failed !")
|
48 |
+
result = self.interpreter.init()
|
49 |
+
if result != 0:
|
50 |
+
print(f"interpreter init failed !")
|
51 |
+
result = self.interpreter.load_model()
|
52 |
+
if result != 0:
|
53 |
+
print("interpreter load model failed !")
|
54 |
+
print("detect model load success!")
|
55 |
+
|
56 |
+
self.conf = 0.4
|
57 |
+
self.iou=0.9
|
58 |
+
self.size = 640
|
59 |
+
self.agnostic_nms=False
|
60 |
+
self.max_det = 300
|
61 |
+
self.names=['object']
|
62 |
+
self.classes =None
|
63 |
+
self.retina_masks=True
|
64 |
+
|
65 |
+
def pretreat_img(self,img):
|
66 |
+
scale = 1/255.
|
67 |
+
img_size = cv2.resize(img, (self.size,self.size), interpolation=cv2.INTER_LINEAR)
|
68 |
+
float_img = img_size.astype('float32')
|
69 |
+
float_img = float_img* scale
|
70 |
+
float_img = float_img[:, :, ::-1]
|
71 |
+
return float_img
|
72 |
+
|
73 |
+
def postprocess(self, preds, img, orig_imgs):
|
74 |
+
"""TODO: filter by classes."""
|
75 |
+
p = non_max_suppression(torch.from_numpy(preds[0]),
|
76 |
+
self.conf,
|
77 |
+
self.iou,
|
78 |
+
agnostic=self.agnostic_nms,
|
79 |
+
max_det=self.max_det,
|
80 |
+
nc=len(self.names),
|
81 |
+
classes=self.classes)
|
82 |
+
|
83 |
+
results = []
|
84 |
+
if len(p) == 0 or len(p[0]) == 0:
|
85 |
+
print("No object detected.")
|
86 |
+
return results
|
87 |
+
|
88 |
+
full_box = torch.zeros_like(p[0][0])
|
89 |
+
full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
|
90 |
+
full_box = full_box.view(1, -1)
|
91 |
+
critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
|
92 |
+
if critical_iou_index.numel() != 0:
|
93 |
+
full_box[0][4] = p[0][critical_iou_index][:,4]
|
94 |
+
full_box[0][6:] = p[0][critical_iou_index][:,6:]
|
95 |
+
p[0][critical_iou_index] = full_box
|
96 |
+
|
97 |
+
#proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
98 |
+
proto=torch.from_numpy(preds[-1])
|
99 |
+
for i, pred in enumerate(p):
|
100 |
+
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
|
101 |
+
path =img[0] #self.batch[0]
|
102 |
+
img_path = path[i] if isinstance(path, list) else path
|
103 |
+
if not len(pred): # save empty boxes
|
104 |
+
results.append(Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6]))
|
105 |
+
continue
|
106 |
+
if self.retina_masks:
|
107 |
+
if not isinstance(orig_imgs, torch.Tensor):
|
108 |
+
pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
109 |
+
masks = process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
|
110 |
+
else:
|
111 |
+
masks = process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
|
112 |
+
if not isinstance(orig_imgs, torch.Tensor):
|
113 |
+
pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
114 |
+
results.append(
|
115 |
+
Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6], masks=masks))
|
116 |
+
return results
|
117 |
+
|
118 |
+
def qnn_run(self, orig_imgs,img_path,args):
|
119 |
+
input_img_f =self.pretreat_img(orig_imgs) # 图片resize HWC
|
120 |
+
# print("qnn_input:",input_img_f)
|
121 |
+
# encoder texts
|
122 |
+
input_img = np.expand_dims(input_img_f, 0)
|
123 |
+
|
124 |
+
invoke_time=[]
|
125 |
+
for i in range(args.invoke_nums):
|
126 |
+
result = self.interpreter.set_input_tensor(0, input_img.data)
|
127 |
+
t0 = time.time()
|
128 |
+
result = self.interpreter.invoke()
|
129 |
+
t1 = time.time()
|
130 |
+
cost_time=(t1-t0)*1000
|
131 |
+
invoke_time.append(cost_time)
|
132 |
+
mask_ = self.interpreter.get_output_tensor(0)
|
133 |
+
concat_ = self.interpreter.get_output_tensor(1)
|
134 |
+
mul_ = self.interpreter.get_output_tensor(3)
|
135 |
+
split_ = self.interpreter.get_output_tensor(2)
|
136 |
+
mask_ = mask_.reshape( * self.out_shape[3])
|
137 |
+
mask_=mask_.transpose((0, 3, 1,2))
|
138 |
+
concat_ = concat_.reshape( *self.out_shape[2])
|
139 |
+
mul_ = mul_.reshape( *self.out_shape[1])
|
140 |
+
split_ = split_.reshape( *self.out_shape[0])
|
141 |
+
sig_ = cal_sigmoid(split_)
|
142 |
+
|
143 |
+
output_concat = np.concatenate((mul_,sig_),axis=1)
|
144 |
+
output_concat = np.concatenate((output_concat,concat_),axis=1)
|
145 |
+
|
146 |
+
# outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
|
147 |
+
## time 统计
|
148 |
+
max_invoke_time = max(invoke_time)
|
149 |
+
min_invoke_time = min(invoke_time)
|
150 |
+
mean_invoke_time = sum(invoke_time)/args.invoke_nums
|
151 |
+
var_invoketime=np.var(invoke_time)
|
152 |
+
print("========================================")
|
153 |
+
print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
154 |
+
print("========================================")
|
155 |
+
|
156 |
+
qnn_out = [np.array(output_concat),np.array(mask_)]
|
157 |
+
# print("qnn predict out:",qnn_out)
|
158 |
+
|
159 |
+
nchw_img = input_img.transpose(0,3,1,2)
|
160 |
+
everything_results = self.postprocess( qnn_out, nchw_img, [orig_imgs])
|
161 |
+
# print("everything_results: ",everything_results)
|
162 |
+
|
163 |
+
prompt_process = FastSAMPrompt(args.imgs, everything_results, device="cpu")
|
164 |
+
|
165 |
+
# ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
|
166 |
+
try:
|
167 |
+
if args.point_prompt ==[[0,0]]:
|
168 |
+
ann = prompt_process.everything_prompt()
|
169 |
+
else:
|
170 |
+
ann = prompt_process.point_prompt(points=args.point_prompt, pointlabel=[1])
|
171 |
+
out_name = os.path.basename(img_path).split(".")[0]
|
172 |
+
if True: # savepic
|
173 |
+
outpath = "python/"
|
174 |
+
if not os.path.exists(outpath):
|
175 |
+
os.mkdir(outpath)
|
176 |
+
prompt_process.plot(
|
177 |
+
annotations=ann,
|
178 |
+
output_path=os.path.join(outpath,out_name+"_result.jpg"),
|
179 |
+
mask_random_color=True,
|
180 |
+
better_quality=True,
|
181 |
+
retina=False,
|
182 |
+
withContours=True,
|
183 |
+
)
|
184 |
+
else:
|
185 |
+
plt.figure()
|
186 |
+
prompt_process.fast_show_mask(annotation=ann,
|
187 |
+
ax = plt)
|
188 |
+
except Exception as e:
|
189 |
+
print(f"Waning : An error occurred in the picture {img_path} prediction -{e}")
|
190 |
+
return [mask_.reshape(-1),output_concat.reshape(-1)]
|
191 |
+
|
192 |
+
|
193 |
+
|
194 |
+
def parser_args():
|
195 |
+
parser = argparse.ArgumentParser(description="Run model benchmarks")
|
196 |
+
parser.add_argument('--target_model',type=str,default='models/cutoff_fastsam_s_fp16.qnn216.ctx.bin',help="inference model path")
|
197 |
+
parser.add_argument('--source_model',type=str,default='models/fastsam_s.onnx',help="original model path")
|
198 |
+
parser.add_argument('--imgs',type=str,default='python/dogs.jpg',help="Predict images path")
|
199 |
+
parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
|
200 |
+
parser.add_argument('--point_prompt',type=str,default="[[0,0]]",help="example:[[x1,y1],[x2,y2]]")
|
201 |
+
args = parser.parse_args()
|
202 |
+
return args
|
203 |
+
|
204 |
+
|
205 |
+
if __name__ == "__main__":
|
206 |
+
args = parser_args()
|
207 |
+
inputshape=[[1,640,640,3]]
|
208 |
+
outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
|
209 |
+
args.point_prompt = ast.literal_eval(args.point_prompt)
|
210 |
+
|
211 |
+
predict = qnn_predict(inputshape,outputshape,args)
|
212 |
+
if os.path.isdir(args.imgs):
|
213 |
+
img_files = os.listdir(args.imgs)
|
214 |
+
for fi in img_files:
|
215 |
+
img_path = os.path.join(args.imgs,fi)
|
216 |
+
im0s = cv2.imread(img_path) # BGR
|
217 |
+
im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
|
218 |
+
predict.qnn_run(im0s,img_path,args)
|
219 |
+
else:
|
220 |
+
img_path = args.imgs
|
221 |
+
im0s = cv2.imread(img_path) # BGR
|
222 |
+
im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
|
223 |
+
qnn_result = predict.qnn_run(im0s,img_path,args)
|
224 |
+
print("Prediction completion and the results are saved !")
|
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/tools_pt.py
ADDED
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import torchvision
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
def clip_boxes(boxes, shape):
|
10 |
+
"""
|
11 |
+
Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
boxes (torch.Tensor): the bounding boxes to clip
|
15 |
+
shape (tuple): the shape of the image
|
16 |
+
"""
|
17 |
+
if isinstance(boxes, torch.Tensor): # faster individually
|
18 |
+
boxes[..., 0].clamp_(0, shape[1]) # x1
|
19 |
+
boxes[..., 1].clamp_(0, shape[0]) # y1
|
20 |
+
boxes[..., 2].clamp_(0, shape[1]) # x2
|
21 |
+
boxes[..., 3].clamp_(0, shape[0]) # y2
|
22 |
+
else: # np.array (faster grouped)
|
23 |
+
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
|
24 |
+
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
25 |
+
|
26 |
+
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
|
27 |
+
"""
|
28 |
+
Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
|
29 |
+
(img1_shape) to the shape of a different image (img0_shape).
|
30 |
+
|
31 |
+
Args:
|
32 |
+
img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
|
33 |
+
boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
|
34 |
+
img0_shape (tuple): the shape of the target image, in the format of (height, width).
|
35 |
+
ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
|
36 |
+
calculated based on the size difference between the two images.
|
37 |
+
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
38 |
+
rescaling.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
|
42 |
+
"""
|
43 |
+
if ratio_pad is None: # calculate from img0_shape
|
44 |
+
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
45 |
+
pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
|
46 |
+
(img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding
|
47 |
+
else:
|
48 |
+
gain = ratio_pad[0][0]
|
49 |
+
pad = ratio_pad[1]
|
50 |
+
|
51 |
+
if padding:
|
52 |
+
boxes[..., [0, 2]] -= pad[0] # x padding
|
53 |
+
boxes[..., [1, 3]] -= pad[1] # y padding
|
54 |
+
boxes[..., :4] /= gain
|
55 |
+
clip_boxes(boxes, img0_shape)
|
56 |
+
return boxes
|
57 |
+
|
58 |
+
|
59 |
+
def xywh2xyxy(x):
|
60 |
+
"""
|
61 |
+
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
|
62 |
+
top-left corner and (x2, y2) is the bottom-right corner.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
|
69 |
+
"""
|
70 |
+
assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
|
71 |
+
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
|
72 |
+
dw = x[..., 2] / 2 # half-width
|
73 |
+
dh = x[..., 3] / 2 # half-height
|
74 |
+
y[..., 0] = x[..., 0] - dw # top left x
|
75 |
+
y[..., 1] = x[..., 1] - dh # top left y
|
76 |
+
y[..., 2] = x[..., 0] + dw # bottom right x
|
77 |
+
y[..., 3] = x[..., 1] + dh # bottom right y
|
78 |
+
return y
|
79 |
+
|
80 |
+
|
81 |
+
def non_max_suppression(
|
82 |
+
prediction,
|
83 |
+
conf_thres=0.25,
|
84 |
+
iou_thres=0.45,
|
85 |
+
classes=None,
|
86 |
+
agnostic=False,
|
87 |
+
multi_label=False,
|
88 |
+
labels=(),
|
89 |
+
max_det=300,
|
90 |
+
nc=0, # number of classes (optional)
|
91 |
+
max_time_img=0.05,
|
92 |
+
max_nms=30000,
|
93 |
+
max_wh=7680,
|
94 |
+
):
|
95 |
+
"""
|
96 |
+
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
|
97 |
+
|
98 |
+
Args:
|
99 |
+
prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
|
100 |
+
containing the predicted boxes, classes, and masks. The tensor should be in the format
|
101 |
+
output by a model, such as YOLO.
|
102 |
+
conf_thres (float): The confidence threshold below which boxes will be filtered out.
|
103 |
+
Valid values are between 0.0 and 1.0.
|
104 |
+
iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
|
105 |
+
Valid values are between 0.0 and 1.0.
|
106 |
+
classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
|
107 |
+
agnostic (bool): If True, the model is agnostic to the number of classes, and all
|
108 |
+
classes will be considered as one.
|
109 |
+
multi_label (bool): If True, each box may have multiple labels.
|
110 |
+
labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
|
111 |
+
list contains the apriori labels for a given image. The list should be in the format
|
112 |
+
output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
|
113 |
+
max_det (int): The maximum number of boxes to keep after NMS.
|
114 |
+
nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
|
115 |
+
max_time_img (float): The maximum time (seconds) for processing one image.
|
116 |
+
max_nms (int): The maximum number of boxes into torchvision.ops.nms().
|
117 |
+
max_wh (int): The maximum box width and height in pixels
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
(List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
|
121 |
+
shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
|
122 |
+
(x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
|
123 |
+
"""
|
124 |
+
|
125 |
+
# Checks
|
126 |
+
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
127 |
+
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
128 |
+
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
|
129 |
+
prediction = prediction[0] # select only inference output
|
130 |
+
|
131 |
+
device = prediction.device
|
132 |
+
mps = 'mps' in device.type # Apple MPS
|
133 |
+
if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
|
134 |
+
prediction = prediction.cpu()
|
135 |
+
bs = prediction.shape[0] # batch size
|
136 |
+
nc = nc or (prediction.shape[1] - 4) # number of classes
|
137 |
+
nm = prediction.shape[1] - nc - 4
|
138 |
+
mi = 4 + nc # mask start index
|
139 |
+
xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
|
140 |
+
|
141 |
+
# Settings
|
142 |
+
# min_wh = 2 # (pixels) minimum box width and height
|
143 |
+
time_limit = 0.5 + max_time_img * bs # seconds to quit after
|
144 |
+
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
145 |
+
|
146 |
+
prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
|
147 |
+
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
|
148 |
+
|
149 |
+
t = time.time()
|
150 |
+
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
|
151 |
+
for xi, x in enumerate(prediction): # image index, image inference
|
152 |
+
# Apply constraints
|
153 |
+
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
154 |
+
x = x[xc[xi]] # confidence
|
155 |
+
|
156 |
+
# Cat apriori labels if autolabelling
|
157 |
+
if labels and len(labels[xi]):
|
158 |
+
lb = labels[xi]
|
159 |
+
v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
|
160 |
+
v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
|
161 |
+
v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
|
162 |
+
x = torch.cat((x, v), 0)
|
163 |
+
|
164 |
+
# If none remain process next image
|
165 |
+
if not x.shape[0]:
|
166 |
+
continue
|
167 |
+
|
168 |
+
# Detections matrix nx6 (xyxy, conf, cls)
|
169 |
+
box, cls, mask = x.split((4, nc, nm), 1)
|
170 |
+
|
171 |
+
if multi_label:
|
172 |
+
i, j = torch.where(cls > conf_thres)
|
173 |
+
x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
|
174 |
+
else: # best class only
|
175 |
+
conf, j = cls.max(1, keepdim=True)
|
176 |
+
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
|
177 |
+
|
178 |
+
# Filter by class
|
179 |
+
if classes is not None:
|
180 |
+
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
181 |
+
|
182 |
+
# Check shape
|
183 |
+
n = x.shape[0] # number of boxes
|
184 |
+
if not n: # no boxes
|
185 |
+
continue
|
186 |
+
if n > max_nms: # excess boxes
|
187 |
+
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
|
188 |
+
|
189 |
+
# Batched NMS
|
190 |
+
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
191 |
+
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
192 |
+
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
193 |
+
i = i[:max_det] # limit detections
|
194 |
+
|
195 |
+
# # Experimental
|
196 |
+
# merge = False # use merge-NMS
|
197 |
+
# if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
198 |
+
# # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
199 |
+
# from .metrics import box_iou
|
200 |
+
# iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
201 |
+
# weights = iou * scores[None] # box weights
|
202 |
+
# x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
203 |
+
# redundant = True # require redundant detections
|
204 |
+
# if redundant:
|
205 |
+
# i = i[iou.sum(1) > 1] # require redundancy
|
206 |
+
|
207 |
+
output[xi] = x[i]
|
208 |
+
if mps:
|
209 |
+
output[xi] = output[xi].to(device)
|
210 |
+
# if (time.time() - t) > time_limit:
|
211 |
+
# LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
|
212 |
+
# break # time limit exceeded
|
213 |
+
|
214 |
+
return output
|
215 |
+
|
216 |
+
|
217 |
+
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
|
218 |
+
'''Adjust bounding boxes to stick to image border if they are within a certain threshold.
|
219 |
+
Args:
|
220 |
+
boxes: (n, 4)
|
221 |
+
image_shape: (height, width)
|
222 |
+
threshold: pixel threshold
|
223 |
+
Returns:
|
224 |
+
adjusted_boxes: adjusted bounding boxes
|
225 |
+
'''
|
226 |
+
|
227 |
+
# Image dimensions
|
228 |
+
h, w = image_shape
|
229 |
+
|
230 |
+
# Adjust boxes
|
231 |
+
boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
|
232 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
|
233 |
+
boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
|
234 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
|
235 |
+
boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
|
236 |
+
w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
|
237 |
+
boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
|
238 |
+
h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
|
239 |
+
|
240 |
+
return boxes
|
241 |
+
|
242 |
+
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
|
243 |
+
'''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
|
244 |
+
Args:
|
245 |
+
box1: (4, )
|
246 |
+
boxes: (n, 4)
|
247 |
+
Returns:
|
248 |
+
high_iou_indices: Indices of boxes with IoU > thres
|
249 |
+
'''
|
250 |
+
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
|
251 |
+
# obtain coordinates for intersections
|
252 |
+
x1 = torch.max(box1[0], boxes[:, 0])
|
253 |
+
y1 = torch.max(box1[1], boxes[:, 1])
|
254 |
+
x2 = torch.min(box1[2], boxes[:, 2])
|
255 |
+
y2 = torch.min(box1[3], boxes[:, 3])
|
256 |
+
|
257 |
+
# compute the area of intersection
|
258 |
+
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
259 |
+
|
260 |
+
# compute the area of both individual boxes
|
261 |
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
262 |
+
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
263 |
+
|
264 |
+
# compute the area of union
|
265 |
+
union = box1_area + box2_area - intersection
|
266 |
+
|
267 |
+
# compute the IoU
|
268 |
+
iou = intersection / union # Should be shape (n, )
|
269 |
+
if raw_output:
|
270 |
+
if iou.numel() == 0:
|
271 |
+
return 0
|
272 |
+
return iou
|
273 |
+
|
274 |
+
# get indices of boxes with IoU > thres
|
275 |
+
high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
|
276 |
+
|
277 |
+
return high_iou_indices
|
278 |
+
|
279 |
+
|
280 |
+
def scale_masks(masks, shape, padding=True):
|
281 |
+
"""
|
282 |
+
Rescale segment masks to shape.
|
283 |
+
|
284 |
+
Args:
|
285 |
+
masks (torch.Tensor): (N, C, H, W).
|
286 |
+
shape (tuple): Height and width.
|
287 |
+
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
288 |
+
rescaling.
|
289 |
+
"""
|
290 |
+
mh, mw = masks.shape[2:]
|
291 |
+
gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
|
292 |
+
pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding
|
293 |
+
if padding:
|
294 |
+
pad[0] /= 2
|
295 |
+
pad[1] /= 2
|
296 |
+
top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # y, x
|
297 |
+
bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
|
298 |
+
masks = masks[..., top:bottom, left:right]
|
299 |
+
|
300 |
+
masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
|
301 |
+
return masks
|
302 |
+
|
303 |
+
|
304 |
+
def process_mask_native(protos, masks_in, bboxes, shape):
|
305 |
+
"""
|
306 |
+
It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
|
307 |
+
|
308 |
+
Args:
|
309 |
+
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
|
310 |
+
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
|
311 |
+
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
|
312 |
+
shape (tuple): the size of the input image (h,w)
|
313 |
+
|
314 |
+
Returns:
|
315 |
+
masks (torch.Tensor): The returned masks with dimensions [h, w, n]
|
316 |
+
"""
|
317 |
+
c, mh, mw = protos.shape # CHW
|
318 |
+
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
|
319 |
+
masks = scale_masks(masks[None], shape)[0] # CHW
|
320 |
+
masks = crop_mask(masks, bboxes) # CHW
|
321 |
+
return masks.gt_(0.5)
|
322 |
+
|
323 |
+
def crop_mask(masks, boxes):
|
324 |
+
"""
|
325 |
+
It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
|
326 |
+
|
327 |
+
Args:
|
328 |
+
masks (torch.Tensor): [n, h, w] tensor of masks
|
329 |
+
boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
|
330 |
+
|
331 |
+
Returns:
|
332 |
+
(torch.Tensor): The masks are being cropped to the bounding box.
|
333 |
+
"""
|
334 |
+
_, h, w = masks.shape
|
335 |
+
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
|
336 |
+
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
|
337 |
+
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
|
338 |
+
|
339 |
+
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
|
340 |
+
|
341 |
+
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
342 |
+
"""
|
343 |
+
Apply masks to bounding boxes using the output of the mask head.
|
344 |
+
|
345 |
+
Args:
|
346 |
+
protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
|
347 |
+
masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
|
348 |
+
bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
|
349 |
+
shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
|
350 |
+
upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
|
351 |
+
|
352 |
+
Returns:
|
353 |
+
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
|
354 |
+
are the height and width of the input image. The mask is applied to the bounding boxes.
|
355 |
+
"""
|
356 |
+
|
357 |
+
c, mh, mw = protos.shape # CHW
|
358 |
+
ih, iw = shape
|
359 |
+
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
|
360 |
+
|
361 |
+
downsampled_bboxes = bboxes.clone()
|
362 |
+
downsampled_bboxes[:, 0] *= mw / iw
|
363 |
+
downsampled_bboxes[:, 2] *= mw / iw
|
364 |
+
downsampled_bboxes[:, 3] *= mh / ih
|
365 |
+
downsampled_bboxes[:, 1] *= mh / ih
|
366 |
+
|
367 |
+
masks = crop_mask(masks, downsampled_bboxes) # CHW
|
368 |
+
if upsample:
|
369 |
+
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
|
370 |
+
return masks.gt_(0.5)
|
371 |
+
|
372 |
+
|
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/utils.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
|
6 |
+
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
|
7 |
+
'''Adjust bounding boxes to stick to image border if they are within a certain threshold.
|
8 |
+
Args:
|
9 |
+
boxes: (n, 4)
|
10 |
+
image_shape: (height, width)
|
11 |
+
threshold: pixel threshold
|
12 |
+
Returns:
|
13 |
+
adjusted_boxes: adjusted bounding boxes
|
14 |
+
'''
|
15 |
+
|
16 |
+
# Image dimensions
|
17 |
+
h, w = image_shape
|
18 |
+
|
19 |
+
# Adjust boxes
|
20 |
+
boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
|
21 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
|
22 |
+
boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
|
23 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
|
24 |
+
boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
|
25 |
+
w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
|
26 |
+
boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
|
27 |
+
h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
|
28 |
+
|
29 |
+
return boxes
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
def convert_box_xywh_to_xyxy(box):
|
34 |
+
x1 = box[0]
|
35 |
+
y1 = box[1]
|
36 |
+
x2 = box[0] + box[2]
|
37 |
+
y2 = box[1] + box[3]
|
38 |
+
return [x1, y1, x2, y2]
|
39 |
+
|
40 |
+
|
41 |
+
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
|
42 |
+
'''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
|
43 |
+
Args:
|
44 |
+
box1: (4, )
|
45 |
+
boxes: (n, 4)
|
46 |
+
Returns:
|
47 |
+
high_iou_indices: Indices of boxes with IoU > thres
|
48 |
+
'''
|
49 |
+
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
|
50 |
+
# obtain coordinates for intersections
|
51 |
+
x1 = torch.max(box1[0], boxes[:, 0])
|
52 |
+
y1 = torch.max(box1[1], boxes[:, 1])
|
53 |
+
x2 = torch.min(box1[2], boxes[:, 2])
|
54 |
+
y2 = torch.min(box1[3], boxes[:, 3])
|
55 |
+
|
56 |
+
# compute the area of intersection
|
57 |
+
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
58 |
+
|
59 |
+
# compute the area of both individual boxes
|
60 |
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
61 |
+
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
62 |
+
|
63 |
+
# compute the area of union
|
64 |
+
union = box1_area + box2_area - intersection
|
65 |
+
|
66 |
+
# compute the IoU
|
67 |
+
iou = intersection / union # Should be shape (n, )
|
68 |
+
if raw_output:
|
69 |
+
if iou.numel() == 0:
|
70 |
+
return 0
|
71 |
+
return iou
|
72 |
+
|
73 |
+
# get indices of boxes with IoU > thres
|
74 |
+
high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
|
75 |
+
|
76 |
+
return high_iou_indices
|
77 |
+
|
78 |
+
|
79 |
+
def image_to_np_ndarray(image):
|
80 |
+
if type(image) is str:
|
81 |
+
return np.array(Image.open(image))
|
82 |
+
elif issubclass(type(image), Image.Image):
|
83 |
+
return np.array(image)
|
84 |
+
elif type(image) is np.ndarray:
|
85 |
+
return image
|
86 |
+
return None
|
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/README.md
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Model Information
|
2 |
+
## Source model
|
3 |
+
- Input shape: 640x640
|
4 |
+
- Number of parameters: 11.24M
|
5 |
+
- Model size: 45.0M
|
6 |
+
- Output shape: 1x37x8400,1x32x160x160
|
7 |
+
|
8 |
+
Source model repository: [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
|
9 |
+
|
10 |
+
### Converted model
|
11 |
+
|
12 |
+
- Precision: INT8
|
13 |
+
- Backend: QNN2.16
|
14 |
+
- Target Device: SNM972 QCS8550
|
15 |
+
|
16 |
+
## Inference with AidLite SDK
|
17 |
+
|
18 |
+
### SDK installation
|
19 |
+
Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
|
20 |
+
|
21 |
+
- Install AidLite SDK
|
22 |
+
|
23 |
+
```bash
|
24 |
+
# Install the appropriate version of the aidlite sdk
|
25 |
+
sudo aid-pkg update
|
26 |
+
sudo aid-pkg install aidlite-sdk
|
27 |
+
# Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
|
28 |
+
sudo aid-pkg install aidlite-{QNN VERSION}
|
29 |
+
```
|
30 |
+
|
31 |
+
- Verify AidLite SDK
|
32 |
+
|
33 |
+
```bash
|
34 |
+
# aidlite sdk c++ check
|
35 |
+
python3 -c "import aidlite ; print(aidlite.get_library_version())"
|
36 |
+
|
37 |
+
# aidlite sdk python check
|
38 |
+
python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
|
39 |
+
```
|
40 |
+
|
41 |
+
### Run demo
|
42 |
+
```bash
|
43 |
+
cd fastsam_s/model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite
|
44 |
+
export LD_PRELOAD=/home/aidlux/.local/lib/python3.8/site-packages/torch/lib/../../torch.libs/libgomp-804f19d4.so.1.0.0
|
45 |
+
|
46 |
+
python3 ./python/run_test.py --target_model ./models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin --imgs ./python/dogs.jpg --invoke_nums 10
|
47 |
+
```
|
48 |
+
|
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1e44ab88e5fd143d0c5e80c8a03955f7411cbd0d61e36e99c03c7eaf32c43e5f
|
3 |
+
size 12363280
|
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/dogs.jpg
ADDED
![]() |
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/onnx_export.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import cv2
|
3 |
+
import os
|
4 |
+
import sys
|
5 |
+
|
6 |
+
from ultralytics.models.fastsam import FastSAM
|
7 |
+
|
8 |
+
class Fast_SAM(torch.nn.Module):
|
9 |
+
"""Exportable FastSAM model, end-to-end."""
|
10 |
+
|
11 |
+
def __init__(self) -> None:
|
12 |
+
super().__init__()
|
13 |
+
pt_name ='./models/FastSAM-s.pt'
|
14 |
+
self.model =FastSAM(pt_name).model
|
15 |
+
|
16 |
+
def forward(self, image: torch.Tensor):
|
17 |
+
"""
|
18 |
+
Run FastSAM on `image`, and produce high quality segmentation masks.
|
19 |
+
Faster than SAM as it is based on YOLOv8.
|
20 |
+
|
21 |
+
Parameters:
|
22 |
+
image: Pixel values pre-processed for encoder consumption.
|
23 |
+
Range: float[0, 1]
|
24 |
+
3-channel Color Space: BGR
|
25 |
+
Returns:
|
26 |
+
|
27 |
+
"""
|
28 |
+
predictions = self.model(image)
|
29 |
+
# Return predictions as a tuple instead of nested tuple.
|
30 |
+
return (predictions[0], predictions[1][2])
|
31 |
+
|
32 |
+
|
33 |
+
model = Fast_SAM()
|
34 |
+
num_params = sum(p.numel() for p in model.parameters())
|
35 |
+
print(f'Number of FastSAM-s parameters: {num_params}')
|
36 |
+
dummy_input = torch.randn( [1,3,640,640],dtype=torch.float32 )
|
37 |
+
source_model = torch.jit.trace(
|
38 |
+
model.to("cpu"), dummy_input, check_trace=False
|
39 |
+
)
|
40 |
+
torch.onnx.export(model, # model being run
|
41 |
+
dummy_input, # model input (or a tuple for multiple inputs)
|
42 |
+
"./models/fastsam_s.onnx", # where to save the model
|
43 |
+
export_params=True, # store the trained parameter weights inside the model file
|
44 |
+
opset_version=12, # the ONNX version to export the model to
|
45 |
+
do_constant_folding=True, # whether to execute constant folding for optimization
|
46 |
+
input_names = ['input'], # the model's input names
|
47 |
+
output_names = ['boxes','mask'],
|
48 |
+
verbose=True,
|
49 |
+
)
|
50 |
+
print("Convert to onnx successfully!")
|
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/prompt.py
ADDED
@@ -0,0 +1,456 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import cv2
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import numpy as np
|
6 |
+
import torch
|
7 |
+
from utils import image_to_np_ndarray
|
8 |
+
from PIL import Image
|
9 |
+
|
10 |
+
|
11 |
+
class FastSAMPrompt:
|
12 |
+
|
13 |
+
def __init__(self, image, results, device='cpu'):
|
14 |
+
if isinstance(image, str) or isinstance(image, Image.Image):
|
15 |
+
image = image_to_np_ndarray(image)
|
16 |
+
self.device = device
|
17 |
+
self.results = results
|
18 |
+
self.img = image
|
19 |
+
|
20 |
+
def _segment_image(self, image, bbox):
|
21 |
+
if isinstance(image, Image.Image):
|
22 |
+
image_array = np.array(image)
|
23 |
+
else:
|
24 |
+
image_array = image
|
25 |
+
segmented_image_array = np.zeros_like(image_array)
|
26 |
+
x1, y1, x2, y2 = bbox
|
27 |
+
segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2]
|
28 |
+
segmented_image = Image.fromarray(segmented_image_array)
|
29 |
+
black_image = Image.new('RGB', image.size, (255, 255, 255))
|
30 |
+
# transparency_mask = np.zeros_like((), dtype=np.uint8)
|
31 |
+
transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8)
|
32 |
+
transparency_mask[y1:y2, x1:x2] = 255
|
33 |
+
transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
|
34 |
+
black_image.paste(segmented_image, mask=transparency_mask_image)
|
35 |
+
return black_image
|
36 |
+
|
37 |
+
def _format_results(self, result, filter=0):
|
38 |
+
annotations = []
|
39 |
+
n = len(result.masks.data)
|
40 |
+
for i in range(n):
|
41 |
+
annotation = {}
|
42 |
+
mask = result.masks.data[i] == 1.0
|
43 |
+
|
44 |
+
if torch.sum(mask) < filter:
|
45 |
+
continue
|
46 |
+
annotation['id'] = i
|
47 |
+
annotation['segmentation'] = mask.cpu().numpy()
|
48 |
+
annotation['bbox'] = result.boxes.data[i]
|
49 |
+
annotation['score'] = result.boxes.conf[i]
|
50 |
+
annotation['area'] = annotation['segmentation'].sum()
|
51 |
+
annotations.append(annotation)
|
52 |
+
return annotations
|
53 |
+
|
54 |
+
def filter_masks(annotations): # filte the overlap mask
|
55 |
+
annotations.sort(key=lambda x: x['area'], reverse=True)
|
56 |
+
to_remove = set()
|
57 |
+
for i in range(0, len(annotations)):
|
58 |
+
a = annotations[i]
|
59 |
+
for j in range(i + 1, len(annotations)):
|
60 |
+
b = annotations[j]
|
61 |
+
if i != j and j not in to_remove:
|
62 |
+
# check if
|
63 |
+
if b['area'] < a['area']:
|
64 |
+
if (a['segmentation'] & b['segmentation']).sum() / b['segmentation'].sum() > 0.8:
|
65 |
+
to_remove.add(j)
|
66 |
+
|
67 |
+
return [a for i, a in enumerate(annotations) if i not in to_remove], to_remove
|
68 |
+
|
69 |
+
def _get_bbox_from_mask(self, mask):
|
70 |
+
mask = mask.astype(np.uint8)
|
71 |
+
contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
72 |
+
x1, y1, w, h = cv2.boundingRect(contours[0])
|
73 |
+
x2, y2 = x1 + w, y1 + h
|
74 |
+
if len(contours) > 1:
|
75 |
+
for b in contours:
|
76 |
+
x_t, y_t, w_t, h_t = cv2.boundingRect(b)
|
77 |
+
# Merge multiple bounding boxes into one.
|
78 |
+
x1 = min(x1, x_t)
|
79 |
+
y1 = min(y1, y_t)
|
80 |
+
x2 = max(x2, x_t + w_t)
|
81 |
+
y2 = max(y2, y_t + h_t)
|
82 |
+
h = y2 - y1
|
83 |
+
w = x2 - x1
|
84 |
+
return [x1, y1, x2, y2]
|
85 |
+
|
86 |
+
def plot_to_result(self,
|
87 |
+
annotations,
|
88 |
+
bboxes=None,
|
89 |
+
points=None,
|
90 |
+
point_label=None,
|
91 |
+
mask_random_color=True,
|
92 |
+
better_quality=True,
|
93 |
+
retina=False,
|
94 |
+
withContours=True) -> np.ndarray:
|
95 |
+
if isinstance(annotations[0], dict):
|
96 |
+
annotations = [annotation['segmentation'] for annotation in annotations]
|
97 |
+
image = self.img
|
98 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
99 |
+
original_h = image.shape[0]
|
100 |
+
original_w = image.shape[1]
|
101 |
+
if sys.platform == "darwin":
|
102 |
+
plt.switch_backend("TkAgg")
|
103 |
+
plt.figure(figsize=(original_w / 100, original_h / 100))
|
104 |
+
# Add subplot with no margin.
|
105 |
+
plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
|
106 |
+
plt.margins(0, 0)
|
107 |
+
plt.gca().xaxis.set_major_locator(plt.NullLocator())
|
108 |
+
plt.gca().yaxis.set_major_locator(plt.NullLocator())
|
109 |
+
|
110 |
+
plt.imshow(image)
|
111 |
+
if better_quality:
|
112 |
+
if isinstance(annotations[0], torch.Tensor):
|
113 |
+
annotations = np.array(annotations.cpu())
|
114 |
+
for i, mask in enumerate(annotations):
|
115 |
+
mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
|
116 |
+
annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
|
117 |
+
if self.device == 'cpu':
|
118 |
+
annotations = np.array(annotations)
|
119 |
+
self.fast_show_mask(
|
120 |
+
annotations,
|
121 |
+
plt.gca(),
|
122 |
+
random_color=mask_random_color,
|
123 |
+
bboxes=bboxes,
|
124 |
+
points=points,
|
125 |
+
pointlabel=point_label,
|
126 |
+
retinamask=retina,
|
127 |
+
target_height=original_h,
|
128 |
+
target_width=original_w,
|
129 |
+
)
|
130 |
+
else:
|
131 |
+
if isinstance(annotations[0], np.ndarray):
|
132 |
+
annotations = torch.from_numpy(annotations)
|
133 |
+
self.fast_show_mask_gpu(
|
134 |
+
annotations,
|
135 |
+
plt.gca(),
|
136 |
+
random_color=mask_random_color,
|
137 |
+
bboxes=bboxes,
|
138 |
+
points=points,
|
139 |
+
pointlabel=point_label,
|
140 |
+
retinamask=retina,
|
141 |
+
target_height=original_h,
|
142 |
+
target_width=original_w,
|
143 |
+
)
|
144 |
+
if isinstance(annotations, torch.Tensor):
|
145 |
+
annotations = annotations.cpu().numpy()
|
146 |
+
if withContours:
|
147 |
+
contour_all = []
|
148 |
+
temp = np.zeros((original_h, original_w, 1))
|
149 |
+
for i, mask in enumerate(annotations):
|
150 |
+
if type(mask) == dict:
|
151 |
+
mask = mask['segmentation']
|
152 |
+
annotation = mask.astype(np.uint8)
|
153 |
+
if not retina:
|
154 |
+
annotation = cv2.resize(
|
155 |
+
annotation,
|
156 |
+
(original_w, original_h),
|
157 |
+
interpolation=cv2.INTER_NEAREST,
|
158 |
+
)
|
159 |
+
contours, hierarchy = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
160 |
+
for contour in contours:
|
161 |
+
contour_all.append(contour)
|
162 |
+
cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
|
163 |
+
color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
|
164 |
+
contour_mask = temp / 255 * color.reshape(1, 1, -1)
|
165 |
+
plt.imshow(contour_mask)
|
166 |
+
|
167 |
+
plt.axis('off')
|
168 |
+
fig = plt.gcf()
|
169 |
+
plt.draw()
|
170 |
+
|
171 |
+
try:
|
172 |
+
buf = fig.canvas.tostring_rgb()
|
173 |
+
except AttributeError:
|
174 |
+
fig.canvas.draw()
|
175 |
+
buf = fig.canvas.tostring_rgb()
|
176 |
+
cols, rows = fig.canvas.get_width_height()
|
177 |
+
img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
|
178 |
+
result = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
|
179 |
+
plt.close()
|
180 |
+
return result
|
181 |
+
|
182 |
+
# Remark for refactoring: IMO a function should do one thing only, storing the image and plotting should be seperated and do not necessarily need to be class functions but standalone utility functions that the user can chain in his scripts to have more fine-grained control.
|
183 |
+
def plot(self,
|
184 |
+
annotations,
|
185 |
+
output_path,
|
186 |
+
bboxes=None,
|
187 |
+
points=None,
|
188 |
+
point_label=None,
|
189 |
+
mask_random_color=True,
|
190 |
+
better_quality=True,
|
191 |
+
retina=False,
|
192 |
+
withContours=True):
|
193 |
+
if len(annotations) == 0:
|
194 |
+
return None
|
195 |
+
result = self.plot_to_result(
|
196 |
+
annotations,
|
197 |
+
bboxes,
|
198 |
+
points,
|
199 |
+
point_label,
|
200 |
+
mask_random_color,
|
201 |
+
better_quality,
|
202 |
+
retina,
|
203 |
+
withContours,
|
204 |
+
)
|
205 |
+
|
206 |
+
path = os.path.dirname(os.path.abspath(output_path))
|
207 |
+
if not os.path.exists(path):
|
208 |
+
os.makedirs(path)
|
209 |
+
result = result[:, :, ::-1]
|
210 |
+
cv2.imwrite(output_path, result)
|
211 |
+
|
212 |
+
# CPU post process
|
213 |
+
def fast_show_mask(
|
214 |
+
self,
|
215 |
+
annotation,
|
216 |
+
ax,
|
217 |
+
random_color=False,
|
218 |
+
bboxes=None,
|
219 |
+
points=None,
|
220 |
+
pointlabel=None,
|
221 |
+
retinamask=True,
|
222 |
+
target_height=960,
|
223 |
+
target_width=960,
|
224 |
+
):
|
225 |
+
msak_sum = annotation.shape[0]
|
226 |
+
height = annotation.shape[1]
|
227 |
+
weight = annotation.shape[2]
|
228 |
+
#Sort annotations based on area.
|
229 |
+
areas = np.sum(annotation, axis=(1, 2))
|
230 |
+
sorted_indices = np.argsort(areas)
|
231 |
+
annotation = annotation[sorted_indices]
|
232 |
+
|
233 |
+
index = (annotation != 0).argmax(axis=0)
|
234 |
+
if random_color:
|
235 |
+
color = np.random.random((msak_sum, 1, 1, 3))
|
236 |
+
else:
|
237 |
+
color = np.ones((msak_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 255 / 255])
|
238 |
+
transparency = np.ones((msak_sum, 1, 1, 1)) * 0.6
|
239 |
+
visual = np.concatenate([color, transparency], axis=-1)
|
240 |
+
mask_image = np.expand_dims(annotation, -1) * visual
|
241 |
+
|
242 |
+
show = np.zeros((height, weight, 4))
|
243 |
+
h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing='ij')
|
244 |
+
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
|
245 |
+
# Use vectorized indexing to update the values of 'show'.
|
246 |
+
show[h_indices, w_indices, :] = mask_image[indices]
|
247 |
+
if bboxes is not None:
|
248 |
+
for bbox in bboxes:
|
249 |
+
x1, y1, x2, y2 = bbox
|
250 |
+
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
|
251 |
+
# draw point
|
252 |
+
if points is not None:
|
253 |
+
plt.scatter(
|
254 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
|
255 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
|
256 |
+
s=20,
|
257 |
+
c='y',
|
258 |
+
)
|
259 |
+
plt.scatter(
|
260 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
|
261 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
|
262 |
+
s=20,
|
263 |
+
c='m',
|
264 |
+
)
|
265 |
+
|
266 |
+
if not retinamask:
|
267 |
+
show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
|
268 |
+
ax.imshow(show)
|
269 |
+
|
270 |
+
def fast_show_mask_gpu(
|
271 |
+
self,
|
272 |
+
annotation,
|
273 |
+
ax,
|
274 |
+
random_color=False,
|
275 |
+
bboxes=None,
|
276 |
+
points=None,
|
277 |
+
pointlabel=None,
|
278 |
+
retinamask=True,
|
279 |
+
target_height=960,
|
280 |
+
target_width=960,
|
281 |
+
):
|
282 |
+
msak_sum = annotation.shape[0]
|
283 |
+
height = annotation.shape[1]
|
284 |
+
weight = annotation.shape[2]
|
285 |
+
areas = torch.sum(annotation, dim=(1, 2))
|
286 |
+
sorted_indices = torch.argsort(areas, descending=False)
|
287 |
+
annotation = annotation[sorted_indices]
|
288 |
+
# Find the index of the first non-zero value at each position.
|
289 |
+
index = (annotation != 0).to(torch.long).argmax(dim=0)
|
290 |
+
if random_color:
|
291 |
+
color = torch.rand((msak_sum, 1, 1, 3)).to(annotation.device)
|
292 |
+
else:
|
293 |
+
color = torch.ones((msak_sum, 1, 1, 3)).to(annotation.device) * torch.tensor([
|
294 |
+
30 / 255, 144 / 255, 255 / 255]).to(annotation.device)
|
295 |
+
transparency = torch.ones((msak_sum, 1, 1, 1)).to(annotation.device) * 0.6
|
296 |
+
visual = torch.cat([color, transparency], dim=-1)
|
297 |
+
mask_image = torch.unsqueeze(annotation, -1) * visual
|
298 |
+
# Select data according to the index. The index indicates which batch's data to choose at each position, converting the mask_image into a single batch form.
|
299 |
+
show = torch.zeros((height, weight, 4)).to(annotation.device)
|
300 |
+
try:
|
301 |
+
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight), indexing='ij')
|
302 |
+
except:
|
303 |
+
h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight))
|
304 |
+
indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
|
305 |
+
# Use vectorized indexing to update the values of 'show'.
|
306 |
+
show[h_indices, w_indices, :] = mask_image[indices]
|
307 |
+
show_cpu = show.cpu().numpy()
|
308 |
+
if bboxes is not None:
|
309 |
+
for bbox in bboxes:
|
310 |
+
x1, y1, x2, y2 = bbox
|
311 |
+
ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
|
312 |
+
# draw point
|
313 |
+
if points is not None:
|
314 |
+
plt.scatter(
|
315 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
|
316 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
|
317 |
+
s=20,
|
318 |
+
c='y',
|
319 |
+
)
|
320 |
+
plt.scatter(
|
321 |
+
[point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
|
322 |
+
[point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
|
323 |
+
s=20,
|
324 |
+
c='m',
|
325 |
+
)
|
326 |
+
if not retinamask:
|
327 |
+
show_cpu = cv2.resize(show_cpu, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
|
328 |
+
ax.imshow(show_cpu)
|
329 |
+
|
330 |
+
# clip
|
331 |
+
@torch.no_grad()
|
332 |
+
def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
|
333 |
+
preprocessed_images = [preprocess(image).to(device) for image in elements]
|
334 |
+
try:
|
335 |
+
import clip # for linear_assignment
|
336 |
+
|
337 |
+
except (ImportError, AssertionError, AttributeError):
|
338 |
+
from ultralytics.yolo.utils.checks import check_requirements
|
339 |
+
|
340 |
+
check_requirements('git+https://github.com/openai/CLIP.git') # required before installing lap from source
|
341 |
+
import clip
|
342 |
+
|
343 |
+
|
344 |
+
tokenized_text = clip.tokenize([search_text]).to(device)
|
345 |
+
stacked_images = torch.stack(preprocessed_images)
|
346 |
+
image_features = model.encode_image(stacked_images)
|
347 |
+
text_features = model.encode_text(tokenized_text)
|
348 |
+
image_features /= image_features.norm(dim=-1, keepdim=True)
|
349 |
+
text_features /= text_features.norm(dim=-1, keepdim=True)
|
350 |
+
probs = 100.0 * image_features @ text_features.T
|
351 |
+
return probs[:, 0].softmax(dim=0)
|
352 |
+
|
353 |
+
def _crop_image(self, format_results):
|
354 |
+
|
355 |
+
image = Image.fromarray(cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB))
|
356 |
+
ori_w, ori_h = image.size
|
357 |
+
annotations = format_results
|
358 |
+
mask_h, mask_w = annotations[0]['segmentation'].shape
|
359 |
+
if ori_w != mask_w or ori_h != mask_h:
|
360 |
+
image = image.resize((mask_w, mask_h))
|
361 |
+
cropped_boxes = []
|
362 |
+
cropped_images = []
|
363 |
+
not_crop = []
|
364 |
+
filter_id = []
|
365 |
+
# annotations, _ = filter_masks(annotations)
|
366 |
+
# filter_id = list(_)
|
367 |
+
for _, mask in enumerate(annotations):
|
368 |
+
if np.sum(mask['segmentation']) <= 100:
|
369 |
+
filter_id.append(_)
|
370 |
+
continue
|
371 |
+
bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox
|
372 |
+
cropped_boxes.append(self._segment_image(image, bbox))
|
373 |
+
# cropped_boxes.append(segment_image(image,mask["segmentation"]))
|
374 |
+
cropped_images.append(bbox) # Save the bounding box of the cropped image.
|
375 |
+
|
376 |
+
return cropped_boxes, cropped_images, not_crop, filter_id, annotations
|
377 |
+
|
378 |
+
def box_prompt(self, bbox=None, bboxes=None):
|
379 |
+
if self.results == None:
|
380 |
+
return []
|
381 |
+
assert bbox or bboxes
|
382 |
+
if bboxes is None:
|
383 |
+
bboxes = [bbox]
|
384 |
+
max_iou_index = []
|
385 |
+
for bbox in bboxes:
|
386 |
+
assert (bbox[2] != 0 and bbox[3] != 0)
|
387 |
+
masks = self.results[0].masks.data
|
388 |
+
target_height = self.img.shape[0]
|
389 |
+
target_width = self.img.shape[1]
|
390 |
+
h = masks.shape[1]
|
391 |
+
w = masks.shape[2]
|
392 |
+
if h != target_height or w != target_width:
|
393 |
+
bbox = [
|
394 |
+
int(bbox[0] * w / target_width),
|
395 |
+
int(bbox[1] * h / target_height),
|
396 |
+
int(bbox[2] * w / target_width),
|
397 |
+
int(bbox[3] * h / target_height), ]
|
398 |
+
bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
|
399 |
+
bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
|
400 |
+
bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
|
401 |
+
bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
|
402 |
+
|
403 |
+
# IoUs = torch.zeros(len(masks), dtype=torch.float32)
|
404 |
+
bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
|
405 |
+
|
406 |
+
masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2))
|
407 |
+
orig_masks_area = torch.sum(masks, dim=(1, 2))
|
408 |
+
|
409 |
+
union = bbox_area + orig_masks_area - masks_area
|
410 |
+
IoUs = masks_area / union
|
411 |
+
max_iou_index.append(int(torch.argmax(IoUs)))
|
412 |
+
max_iou_index = list(set(max_iou_index))
|
413 |
+
return np.array(masks[max_iou_index].cpu().numpy())
|
414 |
+
|
415 |
+
def point_prompt(self, points, pointlabel): # numpy
|
416 |
+
if self.results == None:
|
417 |
+
return []
|
418 |
+
masks = self._format_results(self.results[0], 0)
|
419 |
+
target_height = self.img.shape[0]
|
420 |
+
target_width = self.img.shape[1]
|
421 |
+
h = masks[0]['segmentation'].shape[0]
|
422 |
+
w = masks[0]['segmentation'].shape[1]
|
423 |
+
if h != target_height or w != target_width:
|
424 |
+
points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
|
425 |
+
onemask = np.zeros((h, w))
|
426 |
+
masks = sorted(masks, key=lambda x: x['area'], reverse=True)
|
427 |
+
for i, annotation in enumerate(masks):
|
428 |
+
if type(annotation) == dict:
|
429 |
+
mask = annotation['segmentation']
|
430 |
+
else:
|
431 |
+
mask = annotation
|
432 |
+
for i, point in enumerate(points):
|
433 |
+
if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
|
434 |
+
onemask[mask] = 1
|
435 |
+
if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
|
436 |
+
onemask[mask] = 0
|
437 |
+
onemask = onemask >= 1
|
438 |
+
return np.array([onemask])
|
439 |
+
|
440 |
+
def text_prompt(self, text):
|
441 |
+
if self.results == None:
|
442 |
+
return []
|
443 |
+
format_results = self._format_results(self.results[0], 0)
|
444 |
+
cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
|
445 |
+
clip_model, preprocess = clip.load('ViT-B/32', device=self.device)
|
446 |
+
scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device)
|
447 |
+
max_idx = scores.argsort()
|
448 |
+
max_idx = max_idx[-1]
|
449 |
+
max_idx += sum(np.array(filter_id) <= int(max_idx))
|
450 |
+
return np.array([annotations[max_idx]['segmentation']])
|
451 |
+
|
452 |
+
def everything_prompt(self):
|
453 |
+
if self.results == None:
|
454 |
+
return []
|
455 |
+
return self.results[0].masks.data
|
456 |
+
|
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/run_test.py
ADDED
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import sys
|
3 |
+
import cv2
|
4 |
+
import numpy as np
|
5 |
+
import onnxruntime
|
6 |
+
import time
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
import torch
|
9 |
+
from ultralytics.engine.results import Results
|
10 |
+
from tools_pt import *
|
11 |
+
from prompt import FastSAMPrompt
|
12 |
+
import aidlite
|
13 |
+
import argparse
|
14 |
+
import ast
|
15 |
+
|
16 |
+
# 定义相似度函数
|
17 |
+
def get_acc(onnx_out,other_out):
|
18 |
+
cosine_similarity=np.dot(np.array(onnx_out),np.array(other_out))/(np.linalg.norm(np.array(onnx_out)) * np.linalg.norm(np.array(other_out)))
|
19 |
+
return cosine_similarity
|
20 |
+
|
21 |
+
def cal_sigmoid(x):
|
22 |
+
return 1 / (1 + np.exp(-x))
|
23 |
+
|
24 |
+
class qnn_predict(object):
|
25 |
+
def __init__(self,inputshape,outputshape,args) -> None:
|
26 |
+
aidlite.set_log_level(aidlite.LogLevel.INFO)
|
27 |
+
aidlite.log_to_stderr()
|
28 |
+
print(f"Aidlite library version : {aidlite.get_library_version()}")
|
29 |
+
print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
|
30 |
+
config = aidlite.Config.create_instance()
|
31 |
+
if config is None:
|
32 |
+
print("Create model failed !")
|
33 |
+
config.implement_type = aidlite.ImplementType.TYPE_LOCAL
|
34 |
+
config.framework_type = aidlite.FrameworkType.TYPE_QNN
|
35 |
+
config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
|
36 |
+
config.is_quantify_model = 1
|
37 |
+
|
38 |
+
model = aidlite.Model.create_instance(args.target_model)
|
39 |
+
if model is None:
|
40 |
+
print("Create model failed !")
|
41 |
+
|
42 |
+
self.input_shape=inputshape
|
43 |
+
self.out_shape = outputshape
|
44 |
+
model.set_model_properties(self.input_shape, aidlite.DataType.TYPE_FLOAT32, self.out_shape, aidlite.DataType.TYPE_FLOAT32)
|
45 |
+
self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
|
46 |
+
if self.interpreter is None:
|
47 |
+
print("build_interpretper_from_model_and_config failed !")
|
48 |
+
result = self.interpreter.init()
|
49 |
+
if result != 0:
|
50 |
+
print(f"interpreter init failed !")
|
51 |
+
result = self.interpreter.load_model()
|
52 |
+
if result != 0:
|
53 |
+
print("interpreter load model failed !")
|
54 |
+
print("detect model load success!")
|
55 |
+
|
56 |
+
self.conf = 0.4
|
57 |
+
self.iou=0.9
|
58 |
+
self.size = 640
|
59 |
+
self.agnostic_nms=False
|
60 |
+
self.max_det = 300
|
61 |
+
self.names=['object']
|
62 |
+
self.classes =None
|
63 |
+
self.retina_masks=True
|
64 |
+
|
65 |
+
def pretreat_img(self,img):
|
66 |
+
scale = 1/255.
|
67 |
+
img_size = cv2.resize(img, (self.size,self.size), interpolation=cv2.INTER_LINEAR)
|
68 |
+
float_img = img_size.astype('float32')
|
69 |
+
float_img = float_img* scale
|
70 |
+
float_img = float_img[:, :, ::-1]
|
71 |
+
return float_img
|
72 |
+
|
73 |
+
def postprocess(self, preds, img, orig_imgs):
|
74 |
+
"""TODO: filter by classes."""
|
75 |
+
p = non_max_suppression(torch.from_numpy(preds[0]),
|
76 |
+
self.conf,
|
77 |
+
self.iou,
|
78 |
+
agnostic=self.agnostic_nms,
|
79 |
+
max_det=self.max_det,
|
80 |
+
nc=len(self.names),
|
81 |
+
classes=self.classes)
|
82 |
+
|
83 |
+
results = []
|
84 |
+
if len(p) == 0 or len(p[0]) == 0:
|
85 |
+
print("No object detected.")
|
86 |
+
return results
|
87 |
+
|
88 |
+
full_box = torch.zeros_like(p[0][0])
|
89 |
+
full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
|
90 |
+
full_box = full_box.view(1, -1)
|
91 |
+
critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
|
92 |
+
if critical_iou_index.numel() != 0:
|
93 |
+
full_box[0][4] = p[0][critical_iou_index][:,4]
|
94 |
+
full_box[0][6:] = p[0][critical_iou_index][:,6:]
|
95 |
+
p[0][critical_iou_index] = full_box
|
96 |
+
|
97 |
+
#proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
|
98 |
+
proto=torch.from_numpy(preds[-1])
|
99 |
+
for i, pred in enumerate(p):
|
100 |
+
orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
|
101 |
+
path =img[0] #self.batch[0]
|
102 |
+
img_path = path[i] if isinstance(path, list) else path
|
103 |
+
if not len(pred): # save empty boxes
|
104 |
+
results.append(Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6]))
|
105 |
+
continue
|
106 |
+
if self.retina_masks:
|
107 |
+
if not isinstance(orig_imgs, torch.Tensor):
|
108 |
+
pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
109 |
+
masks = process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
|
110 |
+
else:
|
111 |
+
masks = process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
|
112 |
+
if not isinstance(orig_imgs, torch.Tensor):
|
113 |
+
pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
|
114 |
+
results.append(
|
115 |
+
Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6], masks=masks))
|
116 |
+
return results
|
117 |
+
|
118 |
+
def qnn_run(self, orig_imgs,img_path,args):
|
119 |
+
input_img_f =self.pretreat_img(orig_imgs) # 图片resize HWC
|
120 |
+
# print("qnn_input:",input_img_f)
|
121 |
+
# encoder texts
|
122 |
+
input_img = np.expand_dims(input_img_f, 0)
|
123 |
+
|
124 |
+
invoke_time=[]
|
125 |
+
for i in range(args.invoke_nums):
|
126 |
+
result = self.interpreter.set_input_tensor(0, input_img.data)
|
127 |
+
t0 = time.time()
|
128 |
+
result = self.interpreter.invoke()
|
129 |
+
t1 = time.time()
|
130 |
+
cost_time=(t1-t0)*1000
|
131 |
+
invoke_time.append(cost_time)
|
132 |
+
mask_ = self.interpreter.get_output_tensor(0)
|
133 |
+
concat_ = self.interpreter.get_output_tensor(1)
|
134 |
+
mul_ = self.interpreter.get_output_tensor(3)
|
135 |
+
split_ = self.interpreter.get_output_tensor(2)
|
136 |
+
mask_ = mask_.reshape( * self.out_shape[3])
|
137 |
+
mask_=mask_.transpose((0, 3, 1,2))
|
138 |
+
concat_ = concat_.reshape( *self.out_shape[2])
|
139 |
+
mul_ = mul_.reshape( *self.out_shape[1])
|
140 |
+
split_ = split_.reshape( *self.out_shape[0])
|
141 |
+
sig_ = cal_sigmoid(split_)
|
142 |
+
|
143 |
+
output_concat = np.concatenate((mul_,sig_),axis=1)
|
144 |
+
output_concat = np.concatenate((output_concat,concat_),axis=1)
|
145 |
+
|
146 |
+
# outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
|
147 |
+
## time 统计
|
148 |
+
max_invoke_time = max(invoke_time)
|
149 |
+
min_invoke_time = min(invoke_time)
|
150 |
+
mean_invoke_time = sum(invoke_time)/args.invoke_nums
|
151 |
+
var_invoketime=np.var(invoke_time)
|
152 |
+
print("========================================")
|
153 |
+
print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
|
154 |
+
print("========================================")
|
155 |
+
|
156 |
+
qnn_out = [np.array(output_concat),np.array(mask_)]
|
157 |
+
# print("qnn predict out:",qnn_out)
|
158 |
+
|
159 |
+
nchw_img = input_img.transpose(0,3,1,2)
|
160 |
+
everything_results = self.postprocess( qnn_out, nchw_img, [orig_imgs])
|
161 |
+
# print("everything_results: ",everything_results)
|
162 |
+
|
163 |
+
prompt_process = FastSAMPrompt(args.imgs, everything_results, device="cpu")
|
164 |
+
|
165 |
+
# ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
|
166 |
+
try:
|
167 |
+
if args.point_prompt ==[[0,0]]:
|
168 |
+
ann = prompt_process.everything_prompt()
|
169 |
+
else:
|
170 |
+
ann = prompt_process.point_prompt(points=args.point_prompt, pointlabel=[1])
|
171 |
+
out_name = os.path.basename(img_path).split(".")[0]
|
172 |
+
if True: # savepic
|
173 |
+
outpath = "python/"
|
174 |
+
if not os.path.exists(outpath):
|
175 |
+
os.mkdir(outpath)
|
176 |
+
prompt_process.plot(
|
177 |
+
annotations=ann,
|
178 |
+
output_path=os.path.join(outpath,out_name+"_result.jpg"),
|
179 |
+
mask_random_color=True,
|
180 |
+
better_quality=True,
|
181 |
+
retina=False,
|
182 |
+
withContours=True,
|
183 |
+
)
|
184 |
+
else:
|
185 |
+
plt.figure()
|
186 |
+
prompt_process.fast_show_mask(annotation=ann,
|
187 |
+
ax = plt)
|
188 |
+
except Exception as e:
|
189 |
+
print(f"Waning : An error occurred in the picture {img_path} prediction -{e}")
|
190 |
+
return [mask_.reshape(-1),output_concat.reshape(-1)]
|
191 |
+
|
192 |
+
|
193 |
+
|
194 |
+
def parser_args():
|
195 |
+
parser = argparse.ArgumentParser(description="Run model benchmarks")
|
196 |
+
parser.add_argument('--target_model',type=str,default='models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin',help="inference model path")
|
197 |
+
parser.add_argument('--source_model',type=str,default='models/fastsam_s.onnx',help="original model path")
|
198 |
+
parser.add_argument('--imgs',type=str,default='python/dogs.jpg',help="Predict images path")
|
199 |
+
parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
|
200 |
+
parser.add_argument('--point_prompt',type=str,default="[[0,0]]",help="example:[[x1,y1],[x2,y2]]")
|
201 |
+
args = parser.parse_args()
|
202 |
+
return args
|
203 |
+
|
204 |
+
|
205 |
+
if __name__ == "__main__":
|
206 |
+
args = parser_args()
|
207 |
+
inputshape=[[1,640,640,3]]
|
208 |
+
outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
|
209 |
+
args.point_prompt = ast.literal_eval(args.point_prompt)
|
210 |
+
|
211 |
+
predict = qnn_predict(inputshape,outputshape,args)
|
212 |
+
if os.path.isdir(args.imgs):
|
213 |
+
img_files = os.listdir(args.imgs)
|
214 |
+
for fi in img_files:
|
215 |
+
img_path = os.path.join(args.imgs,fi)
|
216 |
+
im0s = cv2.imread(img_path) # BGR
|
217 |
+
im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
|
218 |
+
predict.qnn_run(im0s,img_path,args)
|
219 |
+
else:
|
220 |
+
img_path = args.imgs
|
221 |
+
im0s = cv2.imread(img_path) # BGR
|
222 |
+
im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
|
223 |
+
qnn_result = predict.qnn_run(im0s,img_path,args)
|
224 |
+
print("Prediction completion and the results are saved !")
|
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/tools_pt.py
ADDED
@@ -0,0 +1,372 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import time
|
3 |
+
import torch
|
4 |
+
import torchvision
|
5 |
+
import torch.nn.functional as F
|
6 |
+
|
7 |
+
|
8 |
+
|
9 |
+
def clip_boxes(boxes, shape):
|
10 |
+
"""
|
11 |
+
Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
boxes (torch.Tensor): the bounding boxes to clip
|
15 |
+
shape (tuple): the shape of the image
|
16 |
+
"""
|
17 |
+
if isinstance(boxes, torch.Tensor): # faster individually
|
18 |
+
boxes[..., 0].clamp_(0, shape[1]) # x1
|
19 |
+
boxes[..., 1].clamp_(0, shape[0]) # y1
|
20 |
+
boxes[..., 2].clamp_(0, shape[1]) # x2
|
21 |
+
boxes[..., 3].clamp_(0, shape[0]) # y2
|
22 |
+
else: # np.array (faster grouped)
|
23 |
+
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
|
24 |
+
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
25 |
+
|
26 |
+
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
|
27 |
+
"""
|
28 |
+
Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
|
29 |
+
(img1_shape) to the shape of a different image (img0_shape).
|
30 |
+
|
31 |
+
Args:
|
32 |
+
img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
|
33 |
+
boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
|
34 |
+
img0_shape (tuple): the shape of the target image, in the format of (height, width).
|
35 |
+
ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
|
36 |
+
calculated based on the size difference between the two images.
|
37 |
+
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
38 |
+
rescaling.
|
39 |
+
|
40 |
+
Returns:
|
41 |
+
boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
|
42 |
+
"""
|
43 |
+
if ratio_pad is None: # calculate from img0_shape
|
44 |
+
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
45 |
+
pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
|
46 |
+
(img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding
|
47 |
+
else:
|
48 |
+
gain = ratio_pad[0][0]
|
49 |
+
pad = ratio_pad[1]
|
50 |
+
|
51 |
+
if padding:
|
52 |
+
boxes[..., [0, 2]] -= pad[0] # x padding
|
53 |
+
boxes[..., [1, 3]] -= pad[1] # y padding
|
54 |
+
boxes[..., :4] /= gain
|
55 |
+
clip_boxes(boxes, img0_shape)
|
56 |
+
return boxes
|
57 |
+
|
58 |
+
|
59 |
+
def xywh2xyxy(x):
|
60 |
+
"""
|
61 |
+
Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
|
62 |
+
top-left corner and (x2, y2) is the bottom-right corner.
|
63 |
+
|
64 |
+
Args:
|
65 |
+
x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
|
69 |
+
"""
|
70 |
+
assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
|
71 |
+
y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
|
72 |
+
dw = x[..., 2] / 2 # half-width
|
73 |
+
dh = x[..., 3] / 2 # half-height
|
74 |
+
y[..., 0] = x[..., 0] - dw # top left x
|
75 |
+
y[..., 1] = x[..., 1] - dh # top left y
|
76 |
+
y[..., 2] = x[..., 0] + dw # bottom right x
|
77 |
+
y[..., 3] = x[..., 1] + dh # bottom right y
|
78 |
+
return y
|
79 |
+
|
80 |
+
|
81 |
+
def non_max_suppression(
|
82 |
+
prediction,
|
83 |
+
conf_thres=0.25,
|
84 |
+
iou_thres=0.45,
|
85 |
+
classes=None,
|
86 |
+
agnostic=False,
|
87 |
+
multi_label=False,
|
88 |
+
labels=(),
|
89 |
+
max_det=300,
|
90 |
+
nc=0, # number of classes (optional)
|
91 |
+
max_time_img=0.05,
|
92 |
+
max_nms=30000,
|
93 |
+
max_wh=7680,
|
94 |
+
):
|
95 |
+
"""
|
96 |
+
Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
|
97 |
+
|
98 |
+
Args:
|
99 |
+
prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
|
100 |
+
containing the predicted boxes, classes, and masks. The tensor should be in the format
|
101 |
+
output by a model, such as YOLO.
|
102 |
+
conf_thres (float): The confidence threshold below which boxes will be filtered out.
|
103 |
+
Valid values are between 0.0 and 1.0.
|
104 |
+
iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
|
105 |
+
Valid values are between 0.0 and 1.0.
|
106 |
+
classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
|
107 |
+
agnostic (bool): If True, the model is agnostic to the number of classes, and all
|
108 |
+
classes will be considered as one.
|
109 |
+
multi_label (bool): If True, each box may have multiple labels.
|
110 |
+
labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
|
111 |
+
list contains the apriori labels for a given image. The list should be in the format
|
112 |
+
output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
|
113 |
+
max_det (int): The maximum number of boxes to keep after NMS.
|
114 |
+
nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
|
115 |
+
max_time_img (float): The maximum time (seconds) for processing one image.
|
116 |
+
max_nms (int): The maximum number of boxes into torchvision.ops.nms().
|
117 |
+
max_wh (int): The maximum box width and height in pixels
|
118 |
+
|
119 |
+
Returns:
|
120 |
+
(List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
|
121 |
+
shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
|
122 |
+
(x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
|
123 |
+
"""
|
124 |
+
|
125 |
+
# Checks
|
126 |
+
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
|
127 |
+
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
|
128 |
+
if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
|
129 |
+
prediction = prediction[0] # select only inference output
|
130 |
+
|
131 |
+
device = prediction.device
|
132 |
+
mps = 'mps' in device.type # Apple MPS
|
133 |
+
if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
|
134 |
+
prediction = prediction.cpu()
|
135 |
+
bs = prediction.shape[0] # batch size
|
136 |
+
nc = nc or (prediction.shape[1] - 4) # number of classes
|
137 |
+
nm = prediction.shape[1] - nc - 4
|
138 |
+
mi = 4 + nc # mask start index
|
139 |
+
xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
|
140 |
+
|
141 |
+
# Settings
|
142 |
+
# min_wh = 2 # (pixels) minimum box width and height
|
143 |
+
time_limit = 0.5 + max_time_img * bs # seconds to quit after
|
144 |
+
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
|
145 |
+
|
146 |
+
prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
|
147 |
+
prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
|
148 |
+
|
149 |
+
t = time.time()
|
150 |
+
output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
|
151 |
+
for xi, x in enumerate(prediction): # image index, image inference
|
152 |
+
# Apply constraints
|
153 |
+
# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
|
154 |
+
x = x[xc[xi]] # confidence
|
155 |
+
|
156 |
+
# Cat apriori labels if autolabelling
|
157 |
+
if labels and len(labels[xi]):
|
158 |
+
lb = labels[xi]
|
159 |
+
v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
|
160 |
+
v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
|
161 |
+
v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
|
162 |
+
x = torch.cat((x, v), 0)
|
163 |
+
|
164 |
+
# If none remain process next image
|
165 |
+
if not x.shape[0]:
|
166 |
+
continue
|
167 |
+
|
168 |
+
# Detections matrix nx6 (xyxy, conf, cls)
|
169 |
+
box, cls, mask = x.split((4, nc, nm), 1)
|
170 |
+
|
171 |
+
if multi_label:
|
172 |
+
i, j = torch.where(cls > conf_thres)
|
173 |
+
x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
|
174 |
+
else: # best class only
|
175 |
+
conf, j = cls.max(1, keepdim=True)
|
176 |
+
x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
|
177 |
+
|
178 |
+
# Filter by class
|
179 |
+
if classes is not None:
|
180 |
+
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
|
181 |
+
|
182 |
+
# Check shape
|
183 |
+
n = x.shape[0] # number of boxes
|
184 |
+
if not n: # no boxes
|
185 |
+
continue
|
186 |
+
if n > max_nms: # excess boxes
|
187 |
+
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
|
188 |
+
|
189 |
+
# Batched NMS
|
190 |
+
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
|
191 |
+
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
|
192 |
+
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
|
193 |
+
i = i[:max_det] # limit detections
|
194 |
+
|
195 |
+
# # Experimental
|
196 |
+
# merge = False # use merge-NMS
|
197 |
+
# if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
|
198 |
+
# # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
|
199 |
+
# from .metrics import box_iou
|
200 |
+
# iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
|
201 |
+
# weights = iou * scores[None] # box weights
|
202 |
+
# x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
|
203 |
+
# redundant = True # require redundant detections
|
204 |
+
# if redundant:
|
205 |
+
# i = i[iou.sum(1) > 1] # require redundancy
|
206 |
+
|
207 |
+
output[xi] = x[i]
|
208 |
+
if mps:
|
209 |
+
output[xi] = output[xi].to(device)
|
210 |
+
# if (time.time() - t) > time_limit:
|
211 |
+
# LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
|
212 |
+
# break # time limit exceeded
|
213 |
+
|
214 |
+
return output
|
215 |
+
|
216 |
+
|
217 |
+
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
|
218 |
+
'''Adjust bounding boxes to stick to image border if they are within a certain threshold.
|
219 |
+
Args:
|
220 |
+
boxes: (n, 4)
|
221 |
+
image_shape: (height, width)
|
222 |
+
threshold: pixel threshold
|
223 |
+
Returns:
|
224 |
+
adjusted_boxes: adjusted bounding boxes
|
225 |
+
'''
|
226 |
+
|
227 |
+
# Image dimensions
|
228 |
+
h, w = image_shape
|
229 |
+
|
230 |
+
# Adjust boxes
|
231 |
+
boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
|
232 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
|
233 |
+
boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
|
234 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
|
235 |
+
boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
|
236 |
+
w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
|
237 |
+
boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
|
238 |
+
h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
|
239 |
+
|
240 |
+
return boxes
|
241 |
+
|
242 |
+
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
|
243 |
+
'''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
|
244 |
+
Args:
|
245 |
+
box1: (4, )
|
246 |
+
boxes: (n, 4)
|
247 |
+
Returns:
|
248 |
+
high_iou_indices: Indices of boxes with IoU > thres
|
249 |
+
'''
|
250 |
+
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
|
251 |
+
# obtain coordinates for intersections
|
252 |
+
x1 = torch.max(box1[0], boxes[:, 0])
|
253 |
+
y1 = torch.max(box1[1], boxes[:, 1])
|
254 |
+
x2 = torch.min(box1[2], boxes[:, 2])
|
255 |
+
y2 = torch.min(box1[3], boxes[:, 3])
|
256 |
+
|
257 |
+
# compute the area of intersection
|
258 |
+
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
259 |
+
|
260 |
+
# compute the area of both individual boxes
|
261 |
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
262 |
+
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
263 |
+
|
264 |
+
# compute the area of union
|
265 |
+
union = box1_area + box2_area - intersection
|
266 |
+
|
267 |
+
# compute the IoU
|
268 |
+
iou = intersection / union # Should be shape (n, )
|
269 |
+
if raw_output:
|
270 |
+
if iou.numel() == 0:
|
271 |
+
return 0
|
272 |
+
return iou
|
273 |
+
|
274 |
+
# get indices of boxes with IoU > thres
|
275 |
+
high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
|
276 |
+
|
277 |
+
return high_iou_indices
|
278 |
+
|
279 |
+
|
280 |
+
def scale_masks(masks, shape, padding=True):
|
281 |
+
"""
|
282 |
+
Rescale segment masks to shape.
|
283 |
+
|
284 |
+
Args:
|
285 |
+
masks (torch.Tensor): (N, C, H, W).
|
286 |
+
shape (tuple): Height and width.
|
287 |
+
padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
|
288 |
+
rescaling.
|
289 |
+
"""
|
290 |
+
mh, mw = masks.shape[2:]
|
291 |
+
gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
|
292 |
+
pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding
|
293 |
+
if padding:
|
294 |
+
pad[0] /= 2
|
295 |
+
pad[1] /= 2
|
296 |
+
top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # y, x
|
297 |
+
bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
|
298 |
+
masks = masks[..., top:bottom, left:right]
|
299 |
+
|
300 |
+
masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
|
301 |
+
return masks
|
302 |
+
|
303 |
+
|
304 |
+
def process_mask_native(protos, masks_in, bboxes, shape):
|
305 |
+
"""
|
306 |
+
It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
|
307 |
+
|
308 |
+
Args:
|
309 |
+
protos (torch.Tensor): [mask_dim, mask_h, mask_w]
|
310 |
+
masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
|
311 |
+
bboxes (torch.Tensor): [n, 4], n is number of masks after nms
|
312 |
+
shape (tuple): the size of the input image (h,w)
|
313 |
+
|
314 |
+
Returns:
|
315 |
+
masks (torch.Tensor): The returned masks with dimensions [h, w, n]
|
316 |
+
"""
|
317 |
+
c, mh, mw = protos.shape # CHW
|
318 |
+
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
|
319 |
+
masks = scale_masks(masks[None], shape)[0] # CHW
|
320 |
+
masks = crop_mask(masks, bboxes) # CHW
|
321 |
+
return masks.gt_(0.5)
|
322 |
+
|
323 |
+
def crop_mask(masks, boxes):
|
324 |
+
"""
|
325 |
+
It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
|
326 |
+
|
327 |
+
Args:
|
328 |
+
masks (torch.Tensor): [n, h, w] tensor of masks
|
329 |
+
boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
|
330 |
+
|
331 |
+
Returns:
|
332 |
+
(torch.Tensor): The masks are being cropped to the bounding box.
|
333 |
+
"""
|
334 |
+
_, h, w = masks.shape
|
335 |
+
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
|
336 |
+
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
|
337 |
+
c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
|
338 |
+
|
339 |
+
return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
|
340 |
+
|
341 |
+
def process_mask(protos, masks_in, bboxes, shape, upsample=False):
|
342 |
+
"""
|
343 |
+
Apply masks to bounding boxes using the output of the mask head.
|
344 |
+
|
345 |
+
Args:
|
346 |
+
protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
|
347 |
+
masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
|
348 |
+
bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
|
349 |
+
shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
|
350 |
+
upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
|
351 |
+
|
352 |
+
Returns:
|
353 |
+
(torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
|
354 |
+
are the height and width of the input image. The mask is applied to the bounding boxes.
|
355 |
+
"""
|
356 |
+
|
357 |
+
c, mh, mw = protos.shape # CHW
|
358 |
+
ih, iw = shape
|
359 |
+
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
|
360 |
+
|
361 |
+
downsampled_bboxes = bboxes.clone()
|
362 |
+
downsampled_bboxes[:, 0] *= mw / iw
|
363 |
+
downsampled_bboxes[:, 2] *= mw / iw
|
364 |
+
downsampled_bboxes[:, 3] *= mh / ih
|
365 |
+
downsampled_bboxes[:, 1] *= mh / ih
|
366 |
+
|
367 |
+
masks = crop_mask(masks, downsampled_bboxes) # CHW
|
368 |
+
if upsample:
|
369 |
+
masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
|
370 |
+
return masks.gt_(0.5)
|
371 |
+
|
372 |
+
|
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/utils.py
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
|
5 |
+
|
6 |
+
def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
|
7 |
+
'''Adjust bounding boxes to stick to image border if they are within a certain threshold.
|
8 |
+
Args:
|
9 |
+
boxes: (n, 4)
|
10 |
+
image_shape: (height, width)
|
11 |
+
threshold: pixel threshold
|
12 |
+
Returns:
|
13 |
+
adjusted_boxes: adjusted bounding boxes
|
14 |
+
'''
|
15 |
+
|
16 |
+
# Image dimensions
|
17 |
+
h, w = image_shape
|
18 |
+
|
19 |
+
# Adjust boxes
|
20 |
+
boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
|
21 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
|
22 |
+
boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
|
23 |
+
0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
|
24 |
+
boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
|
25 |
+
w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
|
26 |
+
boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
|
27 |
+
h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
|
28 |
+
|
29 |
+
return boxes
|
30 |
+
|
31 |
+
|
32 |
+
|
33 |
+
def convert_box_xywh_to_xyxy(box):
|
34 |
+
x1 = box[0]
|
35 |
+
y1 = box[1]
|
36 |
+
x2 = box[0] + box[2]
|
37 |
+
y2 = box[1] + box[3]
|
38 |
+
return [x1, y1, x2, y2]
|
39 |
+
|
40 |
+
|
41 |
+
def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
|
42 |
+
'''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
|
43 |
+
Args:
|
44 |
+
box1: (4, )
|
45 |
+
boxes: (n, 4)
|
46 |
+
Returns:
|
47 |
+
high_iou_indices: Indices of boxes with IoU > thres
|
48 |
+
'''
|
49 |
+
boxes = adjust_bboxes_to_image_border(boxes, image_shape)
|
50 |
+
# obtain coordinates for intersections
|
51 |
+
x1 = torch.max(box1[0], boxes[:, 0])
|
52 |
+
y1 = torch.max(box1[1], boxes[:, 1])
|
53 |
+
x2 = torch.min(box1[2], boxes[:, 2])
|
54 |
+
y2 = torch.min(box1[3], boxes[:, 3])
|
55 |
+
|
56 |
+
# compute the area of intersection
|
57 |
+
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
58 |
+
|
59 |
+
# compute the area of both individual boxes
|
60 |
+
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
|
61 |
+
box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
|
62 |
+
|
63 |
+
# compute the area of union
|
64 |
+
union = box1_area + box2_area - intersection
|
65 |
+
|
66 |
+
# compute the IoU
|
67 |
+
iou = intersection / union # Should be shape (n, )
|
68 |
+
if raw_output:
|
69 |
+
if iou.numel() == 0:
|
70 |
+
return 0
|
71 |
+
return iou
|
72 |
+
|
73 |
+
# get indices of boxes with IoU > thres
|
74 |
+
high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
|
75 |
+
|
76 |
+
return high_iou_indices
|
77 |
+
|
78 |
+
|
79 |
+
def image_to_np_ndarray(image):
|
80 |
+
if type(image) is str:
|
81 |
+
return np.array(Image.open(image))
|
82 |
+
elif issubclass(type(image), Image.Image):
|
83 |
+
return np.array(image)
|
84 |
+
elif type(image) is np.ndarray:
|
85 |
+
return image
|
86 |
+
return None
|