qc903113684 commited on
Commit
6011527
·
verified ·
1 Parent(s): d60797f

Upload 25 files

Browse files
Files changed (25) hide show
  1. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/README.md +48 -0
  2. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin +3 -0
  3. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/models/fastsam_s.onnx +3 -0
  4. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/dogs.jpg +0 -0
  5. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/onnx_export.py +50 -0
  6. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/prompt.py +456 -0
  7. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/run_test.py +224 -0
  8. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/tools_pt.py +372 -0
  9. model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/utils.py +86 -0
  10. model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/README.md +48 -0
  11. model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/models/cutoff_fastsam_s_fp16.qnn216.ctx.bin +3 -0
  12. model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/dogs.jpg +0 -0
  13. model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/onnx_export.py +50 -0
  14. model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/prompt.py +456 -0
  15. model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/run_test.py +224 -0
  16. model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/tools_pt.py +372 -0
  17. model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/utils.py +86 -0
  18. model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/README.md +48 -0
  19. model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin +3 -0
  20. model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/dogs.jpg +0 -0
  21. model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/onnx_export.py +50 -0
  22. model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/prompt.py +456 -0
  23. model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/run_test.py +224 -0
  24. model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/tools_pt.py +372 -0
  25. model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/utils.py +86 -0
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ## Source model
3
+ - Input shape: 640x640
4
+ - Number of parameters: 11.24M
5
+ - Model size: 45.0M
6
+ - Output shape: 1x37x8400,1x32x160x160
7
+
8
+ Source model repository: [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: FV01 QCS6490
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - Install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ ```bash
43
+ cd fastsam_s/model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite
44
+ export LD_PRELOAD=/home/aidlux/.local/lib/python3.8/site-packages/torch/lib/../../torch.libs/libgomp-804f19d4.so.1.0.0
45
+
46
+ python3 ./python/run_test.py --target_model ./models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin --imgs ./python/dogs.jpg --invoke_nums 10
47
+ ```
48
+
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa2680a1ccd80463b4faa7b38801e5bb31b8c9d3060db8822b5f0b23b43dc57e
3
+ size 12523024
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/models/fastsam_s.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eff808553c465e3e1aa3ac7753ec22818cb7c67df1f6ed02e9ed036981e09edd
3
+ size 47284660
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/dogs.jpg ADDED
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/onnx_export.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import os
4
+ import sys
5
+
6
+ from ultralytics.models.fastsam import FastSAM
7
+
8
+ class Fast_SAM(torch.nn.Module):
9
+ """Exportable FastSAM model, end-to-end."""
10
+
11
+ def __init__(self) -> None:
12
+ super().__init__()
13
+ pt_name ='./models/FastSAM-s.pt'
14
+ self.model =FastSAM(pt_name).model
15
+
16
+ def forward(self, image: torch.Tensor):
17
+ """
18
+ Run FastSAM on `image`, and produce high quality segmentation masks.
19
+ Faster than SAM as it is based on YOLOv8.
20
+
21
+ Parameters:
22
+ image: Pixel values pre-processed for encoder consumption.
23
+ Range: float[0, 1]
24
+ 3-channel Color Space: BGR
25
+ Returns:
26
+
27
+ """
28
+ predictions = self.model(image)
29
+ # Return predictions as a tuple instead of nested tuple.
30
+ return (predictions[0], predictions[1][2])
31
+
32
+
33
+ model = Fast_SAM()
34
+ num_params = sum(p.numel() for p in model.parameters())
35
+ print(f'Number of FastSAM-s parameters: {num_params}')
36
+ dummy_input = torch.randn( [1,3,640,640],dtype=torch.float32 )
37
+ source_model = torch.jit.trace(
38
+ model.to("cpu"), dummy_input, check_trace=False
39
+ )
40
+ torch.onnx.export(model, # model being run
41
+ dummy_input, # model input (or a tuple for multiple inputs)
42
+ "./models/fastsam_s.onnx", # where to save the model
43
+ export_params=True, # store the trained parameter weights inside the model file
44
+ opset_version=12, # the ONNX version to export the model to
45
+ do_constant_folding=True, # whether to execute constant folding for optimization
46
+ input_names = ['input'], # the model's input names
47
+ output_names = ['boxes','mask'],
48
+ verbose=True,
49
+ )
50
+ print("Convert to onnx successfully!")
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/prompt.py ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ import torch
7
+ from utils import image_to_np_ndarray
8
+ from PIL import Image
9
+
10
+
11
+ class FastSAMPrompt:
12
+
13
+ def __init__(self, image, results, device='cpu'):
14
+ if isinstance(image, str) or isinstance(image, Image.Image):
15
+ image = image_to_np_ndarray(image)
16
+ self.device = device
17
+ self.results = results
18
+ self.img = image
19
+
20
+ def _segment_image(self, image, bbox):
21
+ if isinstance(image, Image.Image):
22
+ image_array = np.array(image)
23
+ else:
24
+ image_array = image
25
+ segmented_image_array = np.zeros_like(image_array)
26
+ x1, y1, x2, y2 = bbox
27
+ segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2]
28
+ segmented_image = Image.fromarray(segmented_image_array)
29
+ black_image = Image.new('RGB', image.size, (255, 255, 255))
30
+ # transparency_mask = np.zeros_like((), dtype=np.uint8)
31
+ transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8)
32
+ transparency_mask[y1:y2, x1:x2] = 255
33
+ transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
34
+ black_image.paste(segmented_image, mask=transparency_mask_image)
35
+ return black_image
36
+
37
+ def _format_results(self, result, filter=0):
38
+ annotations = []
39
+ n = len(result.masks.data)
40
+ for i in range(n):
41
+ annotation = {}
42
+ mask = result.masks.data[i] == 1.0
43
+
44
+ if torch.sum(mask) < filter:
45
+ continue
46
+ annotation['id'] = i
47
+ annotation['segmentation'] = mask.cpu().numpy()
48
+ annotation['bbox'] = result.boxes.data[i]
49
+ annotation['score'] = result.boxes.conf[i]
50
+ annotation['area'] = annotation['segmentation'].sum()
51
+ annotations.append(annotation)
52
+ return annotations
53
+
54
+ def filter_masks(annotations): # filte the overlap mask
55
+ annotations.sort(key=lambda x: x['area'], reverse=True)
56
+ to_remove = set()
57
+ for i in range(0, len(annotations)):
58
+ a = annotations[i]
59
+ for j in range(i + 1, len(annotations)):
60
+ b = annotations[j]
61
+ if i != j and j not in to_remove:
62
+ # check if
63
+ if b['area'] < a['area']:
64
+ if (a['segmentation'] & b['segmentation']).sum() / b['segmentation'].sum() > 0.8:
65
+ to_remove.add(j)
66
+
67
+ return [a for i, a in enumerate(annotations) if i not in to_remove], to_remove
68
+
69
+ def _get_bbox_from_mask(self, mask):
70
+ mask = mask.astype(np.uint8)
71
+ contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
72
+ x1, y1, w, h = cv2.boundingRect(contours[0])
73
+ x2, y2 = x1 + w, y1 + h
74
+ if len(contours) > 1:
75
+ for b in contours:
76
+ x_t, y_t, w_t, h_t = cv2.boundingRect(b)
77
+ # Merge multiple bounding boxes into one.
78
+ x1 = min(x1, x_t)
79
+ y1 = min(y1, y_t)
80
+ x2 = max(x2, x_t + w_t)
81
+ y2 = max(y2, y_t + h_t)
82
+ h = y2 - y1
83
+ w = x2 - x1
84
+ return [x1, y1, x2, y2]
85
+
86
+ def plot_to_result(self,
87
+ annotations,
88
+ bboxes=None,
89
+ points=None,
90
+ point_label=None,
91
+ mask_random_color=True,
92
+ better_quality=True,
93
+ retina=False,
94
+ withContours=True) -> np.ndarray:
95
+ if isinstance(annotations[0], dict):
96
+ annotations = [annotation['segmentation'] for annotation in annotations]
97
+ image = self.img
98
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
99
+ original_h = image.shape[0]
100
+ original_w = image.shape[1]
101
+ if sys.platform == "darwin":
102
+ plt.switch_backend("TkAgg")
103
+ plt.figure(figsize=(original_w / 100, original_h / 100))
104
+ # Add subplot with no margin.
105
+ plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
106
+ plt.margins(0, 0)
107
+ plt.gca().xaxis.set_major_locator(plt.NullLocator())
108
+ plt.gca().yaxis.set_major_locator(plt.NullLocator())
109
+
110
+ plt.imshow(image)
111
+ if better_quality:
112
+ if isinstance(annotations[0], torch.Tensor):
113
+ annotations = np.array(annotations.cpu())
114
+ for i, mask in enumerate(annotations):
115
+ mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
116
+ annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
117
+ if self.device == 'cpu':
118
+ annotations = np.array(annotations)
119
+ self.fast_show_mask(
120
+ annotations,
121
+ plt.gca(),
122
+ random_color=mask_random_color,
123
+ bboxes=bboxes,
124
+ points=points,
125
+ pointlabel=point_label,
126
+ retinamask=retina,
127
+ target_height=original_h,
128
+ target_width=original_w,
129
+ )
130
+ else:
131
+ if isinstance(annotations[0], np.ndarray):
132
+ annotations = torch.from_numpy(annotations)
133
+ self.fast_show_mask_gpu(
134
+ annotations,
135
+ plt.gca(),
136
+ random_color=mask_random_color,
137
+ bboxes=bboxes,
138
+ points=points,
139
+ pointlabel=point_label,
140
+ retinamask=retina,
141
+ target_height=original_h,
142
+ target_width=original_w,
143
+ )
144
+ if isinstance(annotations, torch.Tensor):
145
+ annotations = annotations.cpu().numpy()
146
+ if withContours:
147
+ contour_all = []
148
+ temp = np.zeros((original_h, original_w, 1))
149
+ for i, mask in enumerate(annotations):
150
+ if type(mask) == dict:
151
+ mask = mask['segmentation']
152
+ annotation = mask.astype(np.uint8)
153
+ if not retina:
154
+ annotation = cv2.resize(
155
+ annotation,
156
+ (original_w, original_h),
157
+ interpolation=cv2.INTER_NEAREST,
158
+ )
159
+ contours, hierarchy = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
160
+ for contour in contours:
161
+ contour_all.append(contour)
162
+ cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
163
+ color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
164
+ contour_mask = temp / 255 * color.reshape(1, 1, -1)
165
+ plt.imshow(contour_mask)
166
+
167
+ plt.axis('off')
168
+ fig = plt.gcf()
169
+ plt.draw()
170
+
171
+ try:
172
+ buf = fig.canvas.tostring_rgb()
173
+ except AttributeError:
174
+ fig.canvas.draw()
175
+ buf = fig.canvas.tostring_rgb()
176
+ cols, rows = fig.canvas.get_width_height()
177
+ img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
178
+ result = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
179
+ plt.close()
180
+ return result
181
+
182
+ # Remark for refactoring: IMO a function should do one thing only, storing the image and plotting should be seperated and do not necessarily need to be class functions but standalone utility functions that the user can chain in his scripts to have more fine-grained control.
183
+ def plot(self,
184
+ annotations,
185
+ output_path,
186
+ bboxes=None,
187
+ points=None,
188
+ point_label=None,
189
+ mask_random_color=True,
190
+ better_quality=True,
191
+ retina=False,
192
+ withContours=True):
193
+ if len(annotations) == 0:
194
+ return None
195
+ result = self.plot_to_result(
196
+ annotations,
197
+ bboxes,
198
+ points,
199
+ point_label,
200
+ mask_random_color,
201
+ better_quality,
202
+ retina,
203
+ withContours,
204
+ )
205
+
206
+ path = os.path.dirname(os.path.abspath(output_path))
207
+ if not os.path.exists(path):
208
+ os.makedirs(path)
209
+ result = result[:, :, ::-1]
210
+ cv2.imwrite(output_path, result)
211
+
212
+ # CPU post process
213
+ def fast_show_mask(
214
+ self,
215
+ annotation,
216
+ ax,
217
+ random_color=False,
218
+ bboxes=None,
219
+ points=None,
220
+ pointlabel=None,
221
+ retinamask=True,
222
+ target_height=960,
223
+ target_width=960,
224
+ ):
225
+ msak_sum = annotation.shape[0]
226
+ height = annotation.shape[1]
227
+ weight = annotation.shape[2]
228
+ #Sort annotations based on area.
229
+ areas = np.sum(annotation, axis=(1, 2))
230
+ sorted_indices = np.argsort(areas)
231
+ annotation = annotation[sorted_indices]
232
+
233
+ index = (annotation != 0).argmax(axis=0)
234
+ if random_color:
235
+ color = np.random.random((msak_sum, 1, 1, 3))
236
+ else:
237
+ color = np.ones((msak_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 255 / 255])
238
+ transparency = np.ones((msak_sum, 1, 1, 1)) * 0.6
239
+ visual = np.concatenate([color, transparency], axis=-1)
240
+ mask_image = np.expand_dims(annotation, -1) * visual
241
+
242
+ show = np.zeros((height, weight, 4))
243
+ h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing='ij')
244
+ indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
245
+ # Use vectorized indexing to update the values of 'show'.
246
+ show[h_indices, w_indices, :] = mask_image[indices]
247
+ if bboxes is not None:
248
+ for bbox in bboxes:
249
+ x1, y1, x2, y2 = bbox
250
+ ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
251
+ # draw point
252
+ if points is not None:
253
+ plt.scatter(
254
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
255
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
256
+ s=20,
257
+ c='y',
258
+ )
259
+ plt.scatter(
260
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
261
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
262
+ s=20,
263
+ c='m',
264
+ )
265
+
266
+ if not retinamask:
267
+ show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
268
+ ax.imshow(show)
269
+
270
+ def fast_show_mask_gpu(
271
+ self,
272
+ annotation,
273
+ ax,
274
+ random_color=False,
275
+ bboxes=None,
276
+ points=None,
277
+ pointlabel=None,
278
+ retinamask=True,
279
+ target_height=960,
280
+ target_width=960,
281
+ ):
282
+ msak_sum = annotation.shape[0]
283
+ height = annotation.shape[1]
284
+ weight = annotation.shape[2]
285
+ areas = torch.sum(annotation, dim=(1, 2))
286
+ sorted_indices = torch.argsort(areas, descending=False)
287
+ annotation = annotation[sorted_indices]
288
+ # Find the index of the first non-zero value at each position.
289
+ index = (annotation != 0).to(torch.long).argmax(dim=0)
290
+ if random_color:
291
+ color = torch.rand((msak_sum, 1, 1, 3)).to(annotation.device)
292
+ else:
293
+ color = torch.ones((msak_sum, 1, 1, 3)).to(annotation.device) * torch.tensor([
294
+ 30 / 255, 144 / 255, 255 / 255]).to(annotation.device)
295
+ transparency = torch.ones((msak_sum, 1, 1, 1)).to(annotation.device) * 0.6
296
+ visual = torch.cat([color, transparency], dim=-1)
297
+ mask_image = torch.unsqueeze(annotation, -1) * visual
298
+ # Select data according to the index. The index indicates which batch's data to choose at each position, converting the mask_image into a single batch form.
299
+ show = torch.zeros((height, weight, 4)).to(annotation.device)
300
+ try:
301
+ h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight), indexing='ij')
302
+ except:
303
+ h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight))
304
+ indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
305
+ # Use vectorized indexing to update the values of 'show'.
306
+ show[h_indices, w_indices, :] = mask_image[indices]
307
+ show_cpu = show.cpu().numpy()
308
+ if bboxes is not None:
309
+ for bbox in bboxes:
310
+ x1, y1, x2, y2 = bbox
311
+ ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
312
+ # draw point
313
+ if points is not None:
314
+ plt.scatter(
315
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
316
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
317
+ s=20,
318
+ c='y',
319
+ )
320
+ plt.scatter(
321
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
322
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
323
+ s=20,
324
+ c='m',
325
+ )
326
+ if not retinamask:
327
+ show_cpu = cv2.resize(show_cpu, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
328
+ ax.imshow(show_cpu)
329
+
330
+ # clip
331
+ @torch.no_grad()
332
+ def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
333
+ preprocessed_images = [preprocess(image).to(device) for image in elements]
334
+ try:
335
+ import clip # for linear_assignment
336
+
337
+ except (ImportError, AssertionError, AttributeError):
338
+ from ultralytics.yolo.utils.checks import check_requirements
339
+
340
+ check_requirements('git+https://github.com/openai/CLIP.git') # required before installing lap from source
341
+ import clip
342
+
343
+
344
+ tokenized_text = clip.tokenize([search_text]).to(device)
345
+ stacked_images = torch.stack(preprocessed_images)
346
+ image_features = model.encode_image(stacked_images)
347
+ text_features = model.encode_text(tokenized_text)
348
+ image_features /= image_features.norm(dim=-1, keepdim=True)
349
+ text_features /= text_features.norm(dim=-1, keepdim=True)
350
+ probs = 100.0 * image_features @ text_features.T
351
+ return probs[:, 0].softmax(dim=0)
352
+
353
+ def _crop_image(self, format_results):
354
+
355
+ image = Image.fromarray(cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB))
356
+ ori_w, ori_h = image.size
357
+ annotations = format_results
358
+ mask_h, mask_w = annotations[0]['segmentation'].shape
359
+ if ori_w != mask_w or ori_h != mask_h:
360
+ image = image.resize((mask_w, mask_h))
361
+ cropped_boxes = []
362
+ cropped_images = []
363
+ not_crop = []
364
+ filter_id = []
365
+ # annotations, _ = filter_masks(annotations)
366
+ # filter_id = list(_)
367
+ for _, mask in enumerate(annotations):
368
+ if np.sum(mask['segmentation']) <= 100:
369
+ filter_id.append(_)
370
+ continue
371
+ bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox
372
+ cropped_boxes.append(self._segment_image(image, bbox))
373
+ # cropped_boxes.append(segment_image(image,mask["segmentation"]))
374
+ cropped_images.append(bbox) # Save the bounding box of the cropped image.
375
+
376
+ return cropped_boxes, cropped_images, not_crop, filter_id, annotations
377
+
378
+ def box_prompt(self, bbox=None, bboxes=None):
379
+ if self.results == None:
380
+ return []
381
+ assert bbox or bboxes
382
+ if bboxes is None:
383
+ bboxes = [bbox]
384
+ max_iou_index = []
385
+ for bbox in bboxes:
386
+ assert (bbox[2] != 0 and bbox[3] != 0)
387
+ masks = self.results[0].masks.data
388
+ target_height = self.img.shape[0]
389
+ target_width = self.img.shape[1]
390
+ h = masks.shape[1]
391
+ w = masks.shape[2]
392
+ if h != target_height or w != target_width:
393
+ bbox = [
394
+ int(bbox[0] * w / target_width),
395
+ int(bbox[1] * h / target_height),
396
+ int(bbox[2] * w / target_width),
397
+ int(bbox[3] * h / target_height), ]
398
+ bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
399
+ bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
400
+ bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
401
+ bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
402
+
403
+ # IoUs = torch.zeros(len(masks), dtype=torch.float32)
404
+ bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
405
+
406
+ masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2))
407
+ orig_masks_area = torch.sum(masks, dim=(1, 2))
408
+
409
+ union = bbox_area + orig_masks_area - masks_area
410
+ IoUs = masks_area / union
411
+ max_iou_index.append(int(torch.argmax(IoUs)))
412
+ max_iou_index = list(set(max_iou_index))
413
+ return np.array(masks[max_iou_index].cpu().numpy())
414
+
415
+ def point_prompt(self, points, pointlabel): # numpy
416
+ if self.results == None:
417
+ return []
418
+ masks = self._format_results(self.results[0], 0)
419
+ target_height = self.img.shape[0]
420
+ target_width = self.img.shape[1]
421
+ h = masks[0]['segmentation'].shape[0]
422
+ w = masks[0]['segmentation'].shape[1]
423
+ if h != target_height or w != target_width:
424
+ points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
425
+ onemask = np.zeros((h, w))
426
+ masks = sorted(masks, key=lambda x: x['area'], reverse=True)
427
+ for i, annotation in enumerate(masks):
428
+ if type(annotation) == dict:
429
+ mask = annotation['segmentation']
430
+ else:
431
+ mask = annotation
432
+ for i, point in enumerate(points):
433
+ if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
434
+ onemask[mask] = 1
435
+ if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
436
+ onemask[mask] = 0
437
+ onemask = onemask >= 1
438
+ return np.array([onemask])
439
+
440
+ def text_prompt(self, text):
441
+ if self.results == None:
442
+ return []
443
+ format_results = self._format_results(self.results[0], 0)
444
+ cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
445
+ clip_model, preprocess = clip.load('ViT-B/32', device=self.device)
446
+ scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device)
447
+ max_idx = scores.argsort()
448
+ max_idx = max_idx[-1]
449
+ max_idx += sum(np.array(filter_id) <= int(max_idx))
450
+ return np.array([annotations[max_idx]['segmentation']])
451
+
452
+ def everything_prompt(self):
453
+ if self.results == None:
454
+ return []
455
+ return self.results[0].masks.data
456
+
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/run_test.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+ import onnxruntime
6
+ import time
7
+ import matplotlib.pyplot as plt
8
+ import torch
9
+ from ultralytics.engine.results import Results
10
+ from tools_pt import *
11
+ from prompt import FastSAMPrompt
12
+ import aidlite
13
+ import argparse
14
+ import ast
15
+
16
+ # 定义相似度函数
17
+ def get_acc(onnx_out,other_out):
18
+ cosine_similarity=np.dot(np.array(onnx_out),np.array(other_out))/(np.linalg.norm(np.array(onnx_out)) * np.linalg.norm(np.array(other_out)))
19
+ return cosine_similarity
20
+
21
+ def cal_sigmoid(x):
22
+ return 1 / (1 + np.exp(-x))
23
+
24
+ class qnn_predict(object):
25
+ def __init__(self,inputshape,outputshape,args) -> None:
26
+ aidlite.set_log_level(aidlite.LogLevel.INFO)
27
+ aidlite.log_to_stderr()
28
+ print(f"Aidlite library version : {aidlite.get_library_version()}")
29
+ print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
30
+ config = aidlite.Config.create_instance()
31
+ if config is None:
32
+ print("Create model failed !")
33
+ config.implement_type = aidlite.ImplementType.TYPE_LOCAL
34
+ config.framework_type = aidlite.FrameworkType.TYPE_QNN
35
+ config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
36
+ config.is_quantify_model = 1
37
+
38
+ model = aidlite.Model.create_instance(args.target_model)
39
+ if model is None:
40
+ print("Create model failed !")
41
+
42
+ self.input_shape=inputshape
43
+ self.out_shape = outputshape
44
+ model.set_model_properties(self.input_shape, aidlite.DataType.TYPE_FLOAT32, self.out_shape, aidlite.DataType.TYPE_FLOAT32)
45
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
46
+ if self.interpreter is None:
47
+ print("build_interpretper_from_model_and_config failed !")
48
+ result = self.interpreter.init()
49
+ if result != 0:
50
+ print(f"interpreter init failed !")
51
+ result = self.interpreter.load_model()
52
+ if result != 0:
53
+ print("interpreter load model failed !")
54
+ print("detect model load success!")
55
+
56
+ self.conf = 0.4
57
+ self.iou=0.9
58
+ self.size = 640
59
+ self.agnostic_nms=False
60
+ self.max_det = 300
61
+ self.names=['object']
62
+ self.classes =None
63
+ self.retina_masks=True
64
+
65
+ def pretreat_img(self,img):
66
+ scale = 1/255.
67
+ img_size = cv2.resize(img, (self.size,self.size), interpolation=cv2.INTER_LINEAR)
68
+ float_img = img_size.astype('float32')
69
+ float_img = float_img* scale
70
+ float_img = float_img[:, :, ::-1]
71
+ return float_img
72
+
73
+ def postprocess(self, preds, img, orig_imgs):
74
+ """TODO: filter by classes."""
75
+ p = non_max_suppression(torch.from_numpy(preds[0]),
76
+ self.conf,
77
+ self.iou,
78
+ agnostic=self.agnostic_nms,
79
+ max_det=self.max_det,
80
+ nc=len(self.names),
81
+ classes=self.classes)
82
+
83
+ results = []
84
+ if len(p) == 0 or len(p[0]) == 0:
85
+ print("No object detected.")
86
+ return results
87
+
88
+ full_box = torch.zeros_like(p[0][0])
89
+ full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
90
+ full_box = full_box.view(1, -1)
91
+ critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
92
+ if critical_iou_index.numel() != 0:
93
+ full_box[0][4] = p[0][critical_iou_index][:,4]
94
+ full_box[0][6:] = p[0][critical_iou_index][:,6:]
95
+ p[0][critical_iou_index] = full_box
96
+
97
+ #proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
98
+ proto=torch.from_numpy(preds[-1])
99
+ for i, pred in enumerate(p):
100
+ orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
101
+ path =img[0] #self.batch[0]
102
+ img_path = path[i] if isinstance(path, list) else path
103
+ if not len(pred): # save empty boxes
104
+ results.append(Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6]))
105
+ continue
106
+ if self.retina_masks:
107
+ if not isinstance(orig_imgs, torch.Tensor):
108
+ pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
109
+ masks = process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
110
+ else:
111
+ masks = process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
112
+ if not isinstance(orig_imgs, torch.Tensor):
113
+ pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
114
+ results.append(
115
+ Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6], masks=masks))
116
+ return results
117
+
118
+ def qnn_run(self, orig_imgs,img_path,args):
119
+ input_img_f =self.pretreat_img(orig_imgs) # 图片resize HWC
120
+ # print("qnn_input:",input_img_f)
121
+ # encoder texts
122
+ input_img = np.expand_dims(input_img_f, 0)
123
+
124
+ invoke_time=[]
125
+ for i in range(args.invoke_nums):
126
+ result = self.interpreter.set_input_tensor(0, input_img.data)
127
+ t0 = time.time()
128
+ result = self.interpreter.invoke()
129
+ t1 = time.time()
130
+ cost_time=(t1-t0)*1000
131
+ invoke_time.append(cost_time)
132
+ mask_ = self.interpreter.get_output_tensor(0)
133
+ concat_ = self.interpreter.get_output_tensor(1)
134
+ mul_ = self.interpreter.get_output_tensor(3)
135
+ split_ = self.interpreter.get_output_tensor(2)
136
+ mask_ = mask_.reshape( * self.out_shape[3])
137
+ mask_=mask_.transpose((0, 3, 1,2))
138
+ concat_ = concat_.reshape( *self.out_shape[2])
139
+ mul_ = mul_.reshape( *self.out_shape[1])
140
+ split_ = split_.reshape( *self.out_shape[0])
141
+ sig_ = cal_sigmoid(split_)
142
+
143
+ output_concat = np.concatenate((mul_,sig_),axis=1)
144
+ output_concat = np.concatenate((output_concat,concat_),axis=1)
145
+
146
+ # outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
147
+ ## time 统计
148
+ max_invoke_time = max(invoke_time)
149
+ min_invoke_time = min(invoke_time)
150
+ mean_invoke_time = sum(invoke_time)/args.invoke_nums
151
+ var_invoketime=np.var(invoke_time)
152
+ print("========================================")
153
+ print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
154
+ print("========================================")
155
+
156
+ qnn_out = [np.array(output_concat),np.array(mask_)]
157
+ # print("qnn predict out:",qnn_out)
158
+
159
+ nchw_img = input_img.transpose(0,3,1,2)
160
+ everything_results = self.postprocess( qnn_out, nchw_img, [orig_imgs])
161
+ # print("everything_results: ",everything_results)
162
+
163
+ prompt_process = FastSAMPrompt(args.imgs, everything_results, device="cpu")
164
+
165
+ # ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
166
+ try:
167
+ if args.point_prompt ==[[0,0]]:
168
+ ann = prompt_process.everything_prompt()
169
+ else:
170
+ ann = prompt_process.point_prompt(points=args.point_prompt, pointlabel=[1])
171
+ out_name = os.path.basename(img_path).split(".")[0]
172
+ if True: # savepic
173
+ outpath = "python/"
174
+ if not os.path.exists(outpath):
175
+ os.mkdir(outpath)
176
+ prompt_process.plot(
177
+ annotations=ann,
178
+ output_path=os.path.join(outpath,out_name+"_result_int8.jpg"),
179
+ mask_random_color=True,
180
+ better_quality=True,
181
+ retina=False,
182
+ withContours=True,
183
+ )
184
+ else:
185
+ plt.figure()
186
+ prompt_process.fast_show_mask(annotation=ann,
187
+ ax = plt)
188
+ except Exception as e:
189
+ print(f"Waning : An error occurred in the picture {img_path} prediction -{e}")
190
+ return [mask_.reshape(-1),output_concat.reshape(-1)]
191
+
192
+
193
+
194
+ def parser_args():
195
+ parser = argparse.ArgumentParser(description="Run model benchmarks")
196
+ parser.add_argument('--target_model',type=str,default='models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin',help="inference model path")
197
+ parser.add_argument('--source_model',type=str,default='models/fastsam_s.onnx',help="original model path")
198
+ parser.add_argument('--imgs',type=str,default='python/dogs.jpg',help="Predict images path")
199
+ parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
200
+ parser.add_argument('--point_prompt',type=str,default="[[0,0]]",help="example:[[x1,y1],[x2,y2]]")
201
+ args = parser.parse_args()
202
+ return args
203
+
204
+
205
+ if __name__ == "__main__":
206
+ args = parser_args()
207
+ inputshape=[[1,640,640,3]]
208
+ outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
209
+ args.point_prompt = ast.literal_eval(args.point_prompt)
210
+
211
+ predict = qnn_predict(inputshape,outputshape,args)
212
+ if os.path.isdir(args.imgs):
213
+ img_files = os.listdir(args.imgs)
214
+ for fi in img_files:
215
+ img_path = os.path.join(args.imgs,fi)
216
+ im0s = cv2.imread(img_path) # BGR
217
+ im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
218
+ predict.qnn_run(im0s,img_path,args)
219
+ else:
220
+ img_path = args.imgs
221
+ im0s = cv2.imread(img_path) # BGR
222
+ im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
223
+ qnn_result = predict.qnn_run(im0s,img_path,args)
224
+ print("Prediction completion and the results are saved !")
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/tools_pt.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import time
3
+ import torch
4
+ import torchvision
5
+ import torch.nn.functional as F
6
+
7
+
8
+
9
+ def clip_boxes(boxes, shape):
10
+ """
11
+ Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
12
+
13
+ Args:
14
+ boxes (torch.Tensor): the bounding boxes to clip
15
+ shape (tuple): the shape of the image
16
+ """
17
+ if isinstance(boxes, torch.Tensor): # faster individually
18
+ boxes[..., 0].clamp_(0, shape[1]) # x1
19
+ boxes[..., 1].clamp_(0, shape[0]) # y1
20
+ boxes[..., 2].clamp_(0, shape[1]) # x2
21
+ boxes[..., 3].clamp_(0, shape[0]) # y2
22
+ else: # np.array (faster grouped)
23
+ boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
24
+ boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
25
+
26
+ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
27
+ """
28
+ Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
29
+ (img1_shape) to the shape of a different image (img0_shape).
30
+
31
+ Args:
32
+ img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
33
+ boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
34
+ img0_shape (tuple): the shape of the target image, in the format of (height, width).
35
+ ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
36
+ calculated based on the size difference between the two images.
37
+ padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
38
+ rescaling.
39
+
40
+ Returns:
41
+ boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
42
+ """
43
+ if ratio_pad is None: # calculate from img0_shape
44
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
45
+ pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
46
+ (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding
47
+ else:
48
+ gain = ratio_pad[0][0]
49
+ pad = ratio_pad[1]
50
+
51
+ if padding:
52
+ boxes[..., [0, 2]] -= pad[0] # x padding
53
+ boxes[..., [1, 3]] -= pad[1] # y padding
54
+ boxes[..., :4] /= gain
55
+ clip_boxes(boxes, img0_shape)
56
+ return boxes
57
+
58
+
59
+ def xywh2xyxy(x):
60
+ """
61
+ Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
62
+ top-left corner and (x2, y2) is the bottom-right corner.
63
+
64
+ Args:
65
+ x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
66
+
67
+ Returns:
68
+ y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
69
+ """
70
+ assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
71
+ y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
72
+ dw = x[..., 2] / 2 # half-width
73
+ dh = x[..., 3] / 2 # half-height
74
+ y[..., 0] = x[..., 0] - dw # top left x
75
+ y[..., 1] = x[..., 1] - dh # top left y
76
+ y[..., 2] = x[..., 0] + dw # bottom right x
77
+ y[..., 3] = x[..., 1] + dh # bottom right y
78
+ return y
79
+
80
+
81
+ def non_max_suppression(
82
+ prediction,
83
+ conf_thres=0.25,
84
+ iou_thres=0.45,
85
+ classes=None,
86
+ agnostic=False,
87
+ multi_label=False,
88
+ labels=(),
89
+ max_det=300,
90
+ nc=0, # number of classes (optional)
91
+ max_time_img=0.05,
92
+ max_nms=30000,
93
+ max_wh=7680,
94
+ ):
95
+ """
96
+ Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
97
+
98
+ Args:
99
+ prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
100
+ containing the predicted boxes, classes, and masks. The tensor should be in the format
101
+ output by a model, such as YOLO.
102
+ conf_thres (float): The confidence threshold below which boxes will be filtered out.
103
+ Valid values are between 0.0 and 1.0.
104
+ iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
105
+ Valid values are between 0.0 and 1.0.
106
+ classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
107
+ agnostic (bool): If True, the model is agnostic to the number of classes, and all
108
+ classes will be considered as one.
109
+ multi_label (bool): If True, each box may have multiple labels.
110
+ labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
111
+ list contains the apriori labels for a given image. The list should be in the format
112
+ output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
113
+ max_det (int): The maximum number of boxes to keep after NMS.
114
+ nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
115
+ max_time_img (float): The maximum time (seconds) for processing one image.
116
+ max_nms (int): The maximum number of boxes into torchvision.ops.nms().
117
+ max_wh (int): The maximum box width and height in pixels
118
+
119
+ Returns:
120
+ (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
121
+ shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
122
+ (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
123
+ """
124
+
125
+ # Checks
126
+ assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
127
+ assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
128
+ if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
129
+ prediction = prediction[0] # select only inference output
130
+
131
+ device = prediction.device
132
+ mps = 'mps' in device.type # Apple MPS
133
+ if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
134
+ prediction = prediction.cpu()
135
+ bs = prediction.shape[0] # batch size
136
+ nc = nc or (prediction.shape[1] - 4) # number of classes
137
+ nm = prediction.shape[1] - nc - 4
138
+ mi = 4 + nc # mask start index
139
+ xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
140
+
141
+ # Settings
142
+ # min_wh = 2 # (pixels) minimum box width and height
143
+ time_limit = 0.5 + max_time_img * bs # seconds to quit after
144
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
145
+
146
+ prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
147
+ prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
148
+
149
+ t = time.time()
150
+ output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
151
+ for xi, x in enumerate(prediction): # image index, image inference
152
+ # Apply constraints
153
+ # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
154
+ x = x[xc[xi]] # confidence
155
+
156
+ # Cat apriori labels if autolabelling
157
+ if labels and len(labels[xi]):
158
+ lb = labels[xi]
159
+ v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
160
+ v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
161
+ v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
162
+ x = torch.cat((x, v), 0)
163
+
164
+ # If none remain process next image
165
+ if not x.shape[0]:
166
+ continue
167
+
168
+ # Detections matrix nx6 (xyxy, conf, cls)
169
+ box, cls, mask = x.split((4, nc, nm), 1)
170
+
171
+ if multi_label:
172
+ i, j = torch.where(cls > conf_thres)
173
+ x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
174
+ else: # best class only
175
+ conf, j = cls.max(1, keepdim=True)
176
+ x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
177
+
178
+ # Filter by class
179
+ if classes is not None:
180
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
181
+
182
+ # Check shape
183
+ n = x.shape[0] # number of boxes
184
+ if not n: # no boxes
185
+ continue
186
+ if n > max_nms: # excess boxes
187
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
188
+
189
+ # Batched NMS
190
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
191
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
192
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
193
+ i = i[:max_det] # limit detections
194
+
195
+ # # Experimental
196
+ # merge = False # use merge-NMS
197
+ # if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
198
+ # # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
199
+ # from .metrics import box_iou
200
+ # iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
201
+ # weights = iou * scores[None] # box weights
202
+ # x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
203
+ # redundant = True # require redundant detections
204
+ # if redundant:
205
+ # i = i[iou.sum(1) > 1] # require redundancy
206
+
207
+ output[xi] = x[i]
208
+ if mps:
209
+ output[xi] = output[xi].to(device)
210
+ # if (time.time() - t) > time_limit:
211
+ # LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
212
+ # break # time limit exceeded
213
+
214
+ return output
215
+
216
+
217
+ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
218
+ '''Adjust bounding boxes to stick to image border if they are within a certain threshold.
219
+ Args:
220
+ boxes: (n, 4)
221
+ image_shape: (height, width)
222
+ threshold: pixel threshold
223
+ Returns:
224
+ adjusted_boxes: adjusted bounding boxes
225
+ '''
226
+
227
+ # Image dimensions
228
+ h, w = image_shape
229
+
230
+ # Adjust boxes
231
+ boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
232
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
233
+ boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
234
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
235
+ boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
236
+ w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
237
+ boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
238
+ h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
239
+
240
+ return boxes
241
+
242
+ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
243
+ '''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
244
+ Args:
245
+ box1: (4, )
246
+ boxes: (n, 4)
247
+ Returns:
248
+ high_iou_indices: Indices of boxes with IoU > thres
249
+ '''
250
+ boxes = adjust_bboxes_to_image_border(boxes, image_shape)
251
+ # obtain coordinates for intersections
252
+ x1 = torch.max(box1[0], boxes[:, 0])
253
+ y1 = torch.max(box1[1], boxes[:, 1])
254
+ x2 = torch.min(box1[2], boxes[:, 2])
255
+ y2 = torch.min(box1[3], boxes[:, 3])
256
+
257
+ # compute the area of intersection
258
+ intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
259
+
260
+ # compute the area of both individual boxes
261
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
262
+ box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
263
+
264
+ # compute the area of union
265
+ union = box1_area + box2_area - intersection
266
+
267
+ # compute the IoU
268
+ iou = intersection / union # Should be shape (n, )
269
+ if raw_output:
270
+ if iou.numel() == 0:
271
+ return 0
272
+ return iou
273
+
274
+ # get indices of boxes with IoU > thres
275
+ high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
276
+
277
+ return high_iou_indices
278
+
279
+
280
+ def scale_masks(masks, shape, padding=True):
281
+ """
282
+ Rescale segment masks to shape.
283
+
284
+ Args:
285
+ masks (torch.Tensor): (N, C, H, W).
286
+ shape (tuple): Height and width.
287
+ padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
288
+ rescaling.
289
+ """
290
+ mh, mw = masks.shape[2:]
291
+ gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
292
+ pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding
293
+ if padding:
294
+ pad[0] /= 2
295
+ pad[1] /= 2
296
+ top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # y, x
297
+ bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
298
+ masks = masks[..., top:bottom, left:right]
299
+
300
+ masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
301
+ return masks
302
+
303
+
304
+ def process_mask_native(protos, masks_in, bboxes, shape):
305
+ """
306
+ It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
307
+
308
+ Args:
309
+ protos (torch.Tensor): [mask_dim, mask_h, mask_w]
310
+ masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
311
+ bboxes (torch.Tensor): [n, 4], n is number of masks after nms
312
+ shape (tuple): the size of the input image (h,w)
313
+
314
+ Returns:
315
+ masks (torch.Tensor): The returned masks with dimensions [h, w, n]
316
+ """
317
+ c, mh, mw = protos.shape # CHW
318
+ masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
319
+ masks = scale_masks(masks[None], shape)[0] # CHW
320
+ masks = crop_mask(masks, bboxes) # CHW
321
+ return masks.gt_(0.5)
322
+
323
+ def crop_mask(masks, boxes):
324
+ """
325
+ It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
326
+
327
+ Args:
328
+ masks (torch.Tensor): [n, h, w] tensor of masks
329
+ boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
330
+
331
+ Returns:
332
+ (torch.Tensor): The masks are being cropped to the bounding box.
333
+ """
334
+ _, h, w = masks.shape
335
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
336
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
337
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
338
+
339
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
340
+
341
+ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
342
+ """
343
+ Apply masks to bounding boxes using the output of the mask head.
344
+
345
+ Args:
346
+ protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
347
+ masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
348
+ bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
349
+ shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
350
+ upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
351
+
352
+ Returns:
353
+ (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
354
+ are the height and width of the input image. The mask is applied to the bounding boxes.
355
+ """
356
+
357
+ c, mh, mw = protos.shape # CHW
358
+ ih, iw = shape
359
+ masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
360
+
361
+ downsampled_bboxes = bboxes.clone()
362
+ downsampled_bboxes[:, 0] *= mw / iw
363
+ downsampled_bboxes[:, 2] *= mw / iw
364
+ downsampled_bboxes[:, 3] *= mh / ih
365
+ downsampled_bboxes[:, 1] *= mh / ih
366
+
367
+ masks = crop_mask(masks, downsampled_bboxes) # CHW
368
+ if upsample:
369
+ masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
370
+ return masks.gt_(0.5)
371
+
372
+
model_farm_fastsams_qsc6490_qnn2.16_int8_aidlite/python/utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from PIL import Image
4
+
5
+
6
+ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
7
+ '''Adjust bounding boxes to stick to image border if they are within a certain threshold.
8
+ Args:
9
+ boxes: (n, 4)
10
+ image_shape: (height, width)
11
+ threshold: pixel threshold
12
+ Returns:
13
+ adjusted_boxes: adjusted bounding boxes
14
+ '''
15
+
16
+ # Image dimensions
17
+ h, w = image_shape
18
+
19
+ # Adjust boxes
20
+ boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
21
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
22
+ boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
23
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
24
+ boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
25
+ w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
26
+ boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
27
+ h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
28
+
29
+ return boxes
30
+
31
+
32
+
33
+ def convert_box_xywh_to_xyxy(box):
34
+ x1 = box[0]
35
+ y1 = box[1]
36
+ x2 = box[0] + box[2]
37
+ y2 = box[1] + box[3]
38
+ return [x1, y1, x2, y2]
39
+
40
+
41
+ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
42
+ '''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
43
+ Args:
44
+ box1: (4, )
45
+ boxes: (n, 4)
46
+ Returns:
47
+ high_iou_indices: Indices of boxes with IoU > thres
48
+ '''
49
+ boxes = adjust_bboxes_to_image_border(boxes, image_shape)
50
+ # obtain coordinates for intersections
51
+ x1 = torch.max(box1[0], boxes[:, 0])
52
+ y1 = torch.max(box1[1], boxes[:, 1])
53
+ x2 = torch.min(box1[2], boxes[:, 2])
54
+ y2 = torch.min(box1[3], boxes[:, 3])
55
+
56
+ # compute the area of intersection
57
+ intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
58
+
59
+ # compute the area of both individual boxes
60
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
61
+ box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
62
+
63
+ # compute the area of union
64
+ union = box1_area + box2_area - intersection
65
+
66
+ # compute the IoU
67
+ iou = intersection / union # Should be shape (n, )
68
+ if raw_output:
69
+ if iou.numel() == 0:
70
+ return 0
71
+ return iou
72
+
73
+ # get indices of boxes with IoU > thres
74
+ high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
75
+
76
+ return high_iou_indices
77
+
78
+
79
+ def image_to_np_ndarray(image):
80
+ if type(image) is str:
81
+ return np.array(Image.open(image))
82
+ elif issubclass(type(image), Image.Image):
83
+ return np.array(image)
84
+ elif type(image) is np.ndarray:
85
+ return image
86
+ return None
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ## Source model
3
+ - Input shape: 640x640
4
+ - Number of parameters: 11.24M
5
+ - Model size: 45.0M
6
+ - Output shape: 1x37x8400,1x32x160x160
7
+
8
+ Source model repository: [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: FP16
13
+ - Backend: QNN2.16
14
+ - Target Device: SNM972 QCS8550
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - Install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ ```bash
43
+ cd fastsam_s/model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite
44
+ export LD_PRELOAD=/home/aidlux/.local/lib/python3.8/site-packages/torch/lib/../../torch.libs/libgomp-804f19d4.so.1.0.0
45
+
46
+ python3 ./python/run_test.py --target_model ./models/cutoff_fastsam_s_fp16.qnn216.ctx.bin --imgs ./python/dogs.jpg --invoke_nums 10
47
+ ```
48
+
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/models/cutoff_fastsam_s_fp16.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1efdad137601411928d741ca90c811c07d92709de49676660b225438d7357aa9
3
+ size 24249816
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/dogs.jpg ADDED
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/onnx_export.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import os
4
+ import sys
5
+
6
+ from ultralytics.models.fastsam import FastSAM
7
+
8
+ class Fast_SAM(torch.nn.Module):
9
+ """Exportable FastSAM model, end-to-end."""
10
+
11
+ def __init__(self) -> None:
12
+ super().__init__()
13
+ pt_name ='./models/FastSAM-s.pt'
14
+ self.model =FastSAM(pt_name).model
15
+
16
+ def forward(self, image: torch.Tensor):
17
+ """
18
+ Run FastSAM on `image`, and produce high quality segmentation masks.
19
+ Faster than SAM as it is based on YOLOv8.
20
+
21
+ Parameters:
22
+ image: Pixel values pre-processed for encoder consumption.
23
+ Range: float[0, 1]
24
+ 3-channel Color Space: BGR
25
+ Returns:
26
+
27
+ """
28
+ predictions = self.model(image)
29
+ # Return predictions as a tuple instead of nested tuple.
30
+ return (predictions[0], predictions[1][2])
31
+
32
+
33
+ model = Fast_SAM()
34
+ num_params = sum(p.numel() for p in model.parameters())
35
+ print(f'Number of FastSAM-s parameters: {num_params}')
36
+ dummy_input = torch.randn( [1,3,640,640],dtype=torch.float32 )
37
+ source_model = torch.jit.trace(
38
+ model.to("cpu"), dummy_input, check_trace=False
39
+ )
40
+ torch.onnx.export(model, # model being run
41
+ dummy_input, # model input (or a tuple for multiple inputs)
42
+ "./models/fastsam_s.onnx", # where to save the model
43
+ export_params=True, # store the trained parameter weights inside the model file
44
+ opset_version=12, # the ONNX version to export the model to
45
+ do_constant_folding=True, # whether to execute constant folding for optimization
46
+ input_names = ['input'], # the model's input names
47
+ output_names = ['boxes','mask'],
48
+ verbose=True,
49
+ )
50
+ print("Convert to onnx successfully!")
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/prompt.py ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ import torch
7
+ from utils import image_to_np_ndarray
8
+ from PIL import Image
9
+
10
+
11
+ class FastSAMPrompt:
12
+
13
+ def __init__(self, image, results, device='cpu'):
14
+ if isinstance(image, str) or isinstance(image, Image.Image):
15
+ image = image_to_np_ndarray(image)
16
+ self.device = device
17
+ self.results = results
18
+ self.img = image
19
+
20
+ def _segment_image(self, image, bbox):
21
+ if isinstance(image, Image.Image):
22
+ image_array = np.array(image)
23
+ else:
24
+ image_array = image
25
+ segmented_image_array = np.zeros_like(image_array)
26
+ x1, y1, x2, y2 = bbox
27
+ segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2]
28
+ segmented_image = Image.fromarray(segmented_image_array)
29
+ black_image = Image.new('RGB', image.size, (255, 255, 255))
30
+ # transparency_mask = np.zeros_like((), dtype=np.uint8)
31
+ transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8)
32
+ transparency_mask[y1:y2, x1:x2] = 255
33
+ transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
34
+ black_image.paste(segmented_image, mask=transparency_mask_image)
35
+ return black_image
36
+
37
+ def _format_results(self, result, filter=0):
38
+ annotations = []
39
+ n = len(result.masks.data)
40
+ for i in range(n):
41
+ annotation = {}
42
+ mask = result.masks.data[i] == 1.0
43
+
44
+ if torch.sum(mask) < filter:
45
+ continue
46
+ annotation['id'] = i
47
+ annotation['segmentation'] = mask.cpu().numpy()
48
+ annotation['bbox'] = result.boxes.data[i]
49
+ annotation['score'] = result.boxes.conf[i]
50
+ annotation['area'] = annotation['segmentation'].sum()
51
+ annotations.append(annotation)
52
+ return annotations
53
+
54
+ def filter_masks(annotations): # filte the overlap mask
55
+ annotations.sort(key=lambda x: x['area'], reverse=True)
56
+ to_remove = set()
57
+ for i in range(0, len(annotations)):
58
+ a = annotations[i]
59
+ for j in range(i + 1, len(annotations)):
60
+ b = annotations[j]
61
+ if i != j and j not in to_remove:
62
+ # check if
63
+ if b['area'] < a['area']:
64
+ if (a['segmentation'] & b['segmentation']).sum() / b['segmentation'].sum() > 0.8:
65
+ to_remove.add(j)
66
+
67
+ return [a for i, a in enumerate(annotations) if i not in to_remove], to_remove
68
+
69
+ def _get_bbox_from_mask(self, mask):
70
+ mask = mask.astype(np.uint8)
71
+ contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
72
+ x1, y1, w, h = cv2.boundingRect(contours[0])
73
+ x2, y2 = x1 + w, y1 + h
74
+ if len(contours) > 1:
75
+ for b in contours:
76
+ x_t, y_t, w_t, h_t = cv2.boundingRect(b)
77
+ # Merge multiple bounding boxes into one.
78
+ x1 = min(x1, x_t)
79
+ y1 = min(y1, y_t)
80
+ x2 = max(x2, x_t + w_t)
81
+ y2 = max(y2, y_t + h_t)
82
+ h = y2 - y1
83
+ w = x2 - x1
84
+ return [x1, y1, x2, y2]
85
+
86
+ def plot_to_result(self,
87
+ annotations,
88
+ bboxes=None,
89
+ points=None,
90
+ point_label=None,
91
+ mask_random_color=True,
92
+ better_quality=True,
93
+ retina=False,
94
+ withContours=True) -> np.ndarray:
95
+ if isinstance(annotations[0], dict):
96
+ annotations = [annotation['segmentation'] for annotation in annotations]
97
+ image = self.img
98
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
99
+ original_h = image.shape[0]
100
+ original_w = image.shape[1]
101
+ if sys.platform == "darwin":
102
+ plt.switch_backend("TkAgg")
103
+ plt.figure(figsize=(original_w / 100, original_h / 100))
104
+ # Add subplot with no margin.
105
+ plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
106
+ plt.margins(0, 0)
107
+ plt.gca().xaxis.set_major_locator(plt.NullLocator())
108
+ plt.gca().yaxis.set_major_locator(plt.NullLocator())
109
+
110
+ plt.imshow(image)
111
+ if better_quality:
112
+ if isinstance(annotations[0], torch.Tensor):
113
+ annotations = np.array(annotations.cpu())
114
+ for i, mask in enumerate(annotations):
115
+ mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
116
+ annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
117
+ if self.device == 'cpu':
118
+ annotations = np.array(annotations)
119
+ self.fast_show_mask(
120
+ annotations,
121
+ plt.gca(),
122
+ random_color=mask_random_color,
123
+ bboxes=bboxes,
124
+ points=points,
125
+ pointlabel=point_label,
126
+ retinamask=retina,
127
+ target_height=original_h,
128
+ target_width=original_w,
129
+ )
130
+ else:
131
+ if isinstance(annotations[0], np.ndarray):
132
+ annotations = torch.from_numpy(annotations)
133
+ self.fast_show_mask_gpu(
134
+ annotations,
135
+ plt.gca(),
136
+ random_color=mask_random_color,
137
+ bboxes=bboxes,
138
+ points=points,
139
+ pointlabel=point_label,
140
+ retinamask=retina,
141
+ target_height=original_h,
142
+ target_width=original_w,
143
+ )
144
+ if isinstance(annotations, torch.Tensor):
145
+ annotations = annotations.cpu().numpy()
146
+ if withContours:
147
+ contour_all = []
148
+ temp = np.zeros((original_h, original_w, 1))
149
+ for i, mask in enumerate(annotations):
150
+ if type(mask) == dict:
151
+ mask = mask['segmentation']
152
+ annotation = mask.astype(np.uint8)
153
+ if not retina:
154
+ annotation = cv2.resize(
155
+ annotation,
156
+ (original_w, original_h),
157
+ interpolation=cv2.INTER_NEAREST,
158
+ )
159
+ contours, hierarchy = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
160
+ for contour in contours:
161
+ contour_all.append(contour)
162
+ cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
163
+ color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
164
+ contour_mask = temp / 255 * color.reshape(1, 1, -1)
165
+ plt.imshow(contour_mask)
166
+
167
+ plt.axis('off')
168
+ fig = plt.gcf()
169
+ plt.draw()
170
+
171
+ try:
172
+ buf = fig.canvas.tostring_rgb()
173
+ except AttributeError:
174
+ fig.canvas.draw()
175
+ buf = fig.canvas.tostring_rgb()
176
+ cols, rows = fig.canvas.get_width_height()
177
+ img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
178
+ result = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
179
+ plt.close()
180
+ return result
181
+
182
+ # Remark for refactoring: IMO a function should do one thing only, storing the image and plotting should be seperated and do not necessarily need to be class functions but standalone utility functions that the user can chain in his scripts to have more fine-grained control.
183
+ def plot(self,
184
+ annotations,
185
+ output_path,
186
+ bboxes=None,
187
+ points=None,
188
+ point_label=None,
189
+ mask_random_color=True,
190
+ better_quality=True,
191
+ retina=False,
192
+ withContours=True):
193
+ if len(annotations) == 0:
194
+ return None
195
+ result = self.plot_to_result(
196
+ annotations,
197
+ bboxes,
198
+ points,
199
+ point_label,
200
+ mask_random_color,
201
+ better_quality,
202
+ retina,
203
+ withContours,
204
+ )
205
+
206
+ path = os.path.dirname(os.path.abspath(output_path))
207
+ if not os.path.exists(path):
208
+ os.makedirs(path)
209
+ result = result[:, :, ::-1]
210
+ cv2.imwrite(output_path, result)
211
+
212
+ # CPU post process
213
+ def fast_show_mask(
214
+ self,
215
+ annotation,
216
+ ax,
217
+ random_color=False,
218
+ bboxes=None,
219
+ points=None,
220
+ pointlabel=None,
221
+ retinamask=True,
222
+ target_height=960,
223
+ target_width=960,
224
+ ):
225
+ msak_sum = annotation.shape[0]
226
+ height = annotation.shape[1]
227
+ weight = annotation.shape[2]
228
+ #Sort annotations based on area.
229
+ areas = np.sum(annotation, axis=(1, 2))
230
+ sorted_indices = np.argsort(areas)
231
+ annotation = annotation[sorted_indices]
232
+
233
+ index = (annotation != 0).argmax(axis=0)
234
+ if random_color:
235
+ color = np.random.random((msak_sum, 1, 1, 3))
236
+ else:
237
+ color = np.ones((msak_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 255 / 255])
238
+ transparency = np.ones((msak_sum, 1, 1, 1)) * 0.6
239
+ visual = np.concatenate([color, transparency], axis=-1)
240
+ mask_image = np.expand_dims(annotation, -1) * visual
241
+
242
+ show = np.zeros((height, weight, 4))
243
+ h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing='ij')
244
+ indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
245
+ # Use vectorized indexing to update the values of 'show'.
246
+ show[h_indices, w_indices, :] = mask_image[indices]
247
+ if bboxes is not None:
248
+ for bbox in bboxes:
249
+ x1, y1, x2, y2 = bbox
250
+ ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
251
+ # draw point
252
+ if points is not None:
253
+ plt.scatter(
254
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
255
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
256
+ s=20,
257
+ c='y',
258
+ )
259
+ plt.scatter(
260
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
261
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
262
+ s=20,
263
+ c='m',
264
+ )
265
+
266
+ if not retinamask:
267
+ show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
268
+ ax.imshow(show)
269
+
270
+ def fast_show_mask_gpu(
271
+ self,
272
+ annotation,
273
+ ax,
274
+ random_color=False,
275
+ bboxes=None,
276
+ points=None,
277
+ pointlabel=None,
278
+ retinamask=True,
279
+ target_height=960,
280
+ target_width=960,
281
+ ):
282
+ msak_sum = annotation.shape[0]
283
+ height = annotation.shape[1]
284
+ weight = annotation.shape[2]
285
+ areas = torch.sum(annotation, dim=(1, 2))
286
+ sorted_indices = torch.argsort(areas, descending=False)
287
+ annotation = annotation[sorted_indices]
288
+ # Find the index of the first non-zero value at each position.
289
+ index = (annotation != 0).to(torch.long).argmax(dim=0)
290
+ if random_color:
291
+ color = torch.rand((msak_sum, 1, 1, 3)).to(annotation.device)
292
+ else:
293
+ color = torch.ones((msak_sum, 1, 1, 3)).to(annotation.device) * torch.tensor([
294
+ 30 / 255, 144 / 255, 255 / 255]).to(annotation.device)
295
+ transparency = torch.ones((msak_sum, 1, 1, 1)).to(annotation.device) * 0.6
296
+ visual = torch.cat([color, transparency], dim=-1)
297
+ mask_image = torch.unsqueeze(annotation, -1) * visual
298
+ # Select data according to the index. The index indicates which batch's data to choose at each position, converting the mask_image into a single batch form.
299
+ show = torch.zeros((height, weight, 4)).to(annotation.device)
300
+ try:
301
+ h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight), indexing='ij')
302
+ except:
303
+ h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight))
304
+ indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
305
+ # Use vectorized indexing to update the values of 'show'.
306
+ show[h_indices, w_indices, :] = mask_image[indices]
307
+ show_cpu = show.cpu().numpy()
308
+ if bboxes is not None:
309
+ for bbox in bboxes:
310
+ x1, y1, x2, y2 = bbox
311
+ ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
312
+ # draw point
313
+ if points is not None:
314
+ plt.scatter(
315
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
316
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
317
+ s=20,
318
+ c='y',
319
+ )
320
+ plt.scatter(
321
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
322
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
323
+ s=20,
324
+ c='m',
325
+ )
326
+ if not retinamask:
327
+ show_cpu = cv2.resize(show_cpu, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
328
+ ax.imshow(show_cpu)
329
+
330
+ # clip
331
+ @torch.no_grad()
332
+ def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
333
+ preprocessed_images = [preprocess(image).to(device) for image in elements]
334
+ try:
335
+ import clip # for linear_assignment
336
+
337
+ except (ImportError, AssertionError, AttributeError):
338
+ from ultralytics.yolo.utils.checks import check_requirements
339
+
340
+ check_requirements('git+https://github.com/openai/CLIP.git') # required before installing lap from source
341
+ import clip
342
+
343
+
344
+ tokenized_text = clip.tokenize([search_text]).to(device)
345
+ stacked_images = torch.stack(preprocessed_images)
346
+ image_features = model.encode_image(stacked_images)
347
+ text_features = model.encode_text(tokenized_text)
348
+ image_features /= image_features.norm(dim=-1, keepdim=True)
349
+ text_features /= text_features.norm(dim=-1, keepdim=True)
350
+ probs = 100.0 * image_features @ text_features.T
351
+ return probs[:, 0].softmax(dim=0)
352
+
353
+ def _crop_image(self, format_results):
354
+
355
+ image = Image.fromarray(cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB))
356
+ ori_w, ori_h = image.size
357
+ annotations = format_results
358
+ mask_h, mask_w = annotations[0]['segmentation'].shape
359
+ if ori_w != mask_w or ori_h != mask_h:
360
+ image = image.resize((mask_w, mask_h))
361
+ cropped_boxes = []
362
+ cropped_images = []
363
+ not_crop = []
364
+ filter_id = []
365
+ # annotations, _ = filter_masks(annotations)
366
+ # filter_id = list(_)
367
+ for _, mask in enumerate(annotations):
368
+ if np.sum(mask['segmentation']) <= 100:
369
+ filter_id.append(_)
370
+ continue
371
+ bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox
372
+ cropped_boxes.append(self._segment_image(image, bbox))
373
+ # cropped_boxes.append(segment_image(image,mask["segmentation"]))
374
+ cropped_images.append(bbox) # Save the bounding box of the cropped image.
375
+
376
+ return cropped_boxes, cropped_images, not_crop, filter_id, annotations
377
+
378
+ def box_prompt(self, bbox=None, bboxes=None):
379
+ if self.results == None:
380
+ return []
381
+ assert bbox or bboxes
382
+ if bboxes is None:
383
+ bboxes = [bbox]
384
+ max_iou_index = []
385
+ for bbox in bboxes:
386
+ assert (bbox[2] != 0 and bbox[3] != 0)
387
+ masks = self.results[0].masks.data
388
+ target_height = self.img.shape[0]
389
+ target_width = self.img.shape[1]
390
+ h = masks.shape[1]
391
+ w = masks.shape[2]
392
+ if h != target_height or w != target_width:
393
+ bbox = [
394
+ int(bbox[0] * w / target_width),
395
+ int(bbox[1] * h / target_height),
396
+ int(bbox[2] * w / target_width),
397
+ int(bbox[3] * h / target_height), ]
398
+ bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
399
+ bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
400
+ bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
401
+ bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
402
+
403
+ # IoUs = torch.zeros(len(masks), dtype=torch.float32)
404
+ bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
405
+
406
+ masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2))
407
+ orig_masks_area = torch.sum(masks, dim=(1, 2))
408
+
409
+ union = bbox_area + orig_masks_area - masks_area
410
+ IoUs = masks_area / union
411
+ max_iou_index.append(int(torch.argmax(IoUs)))
412
+ max_iou_index = list(set(max_iou_index))
413
+ return np.array(masks[max_iou_index].cpu().numpy())
414
+
415
+ def point_prompt(self, points, pointlabel): # numpy
416
+ if self.results == None:
417
+ return []
418
+ masks = self._format_results(self.results[0], 0)
419
+ target_height = self.img.shape[0]
420
+ target_width = self.img.shape[1]
421
+ h = masks[0]['segmentation'].shape[0]
422
+ w = masks[0]['segmentation'].shape[1]
423
+ if h != target_height or w != target_width:
424
+ points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
425
+ onemask = np.zeros((h, w))
426
+ masks = sorted(masks, key=lambda x: x['area'], reverse=True)
427
+ for i, annotation in enumerate(masks):
428
+ if type(annotation) == dict:
429
+ mask = annotation['segmentation']
430
+ else:
431
+ mask = annotation
432
+ for i, point in enumerate(points):
433
+ if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
434
+ onemask[mask] = 1
435
+ if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
436
+ onemask[mask] = 0
437
+ onemask = onemask >= 1
438
+ return np.array([onemask])
439
+
440
+ def text_prompt(self, text):
441
+ if self.results == None:
442
+ return []
443
+ format_results = self._format_results(self.results[0], 0)
444
+ cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
445
+ clip_model, preprocess = clip.load('ViT-B/32', device=self.device)
446
+ scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device)
447
+ max_idx = scores.argsort()
448
+ max_idx = max_idx[-1]
449
+ max_idx += sum(np.array(filter_id) <= int(max_idx))
450
+ return np.array([annotations[max_idx]['segmentation']])
451
+
452
+ def everything_prompt(self):
453
+ if self.results == None:
454
+ return []
455
+ return self.results[0].masks.data
456
+
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/run_test.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+ import onnxruntime
6
+ import time
7
+ import matplotlib.pyplot as plt
8
+ import torch
9
+ from ultralytics.engine.results import Results
10
+ from tools_pt import *
11
+ from prompt import FastSAMPrompt
12
+ import aidlite
13
+ import argparse
14
+ import ast
15
+
16
+ # 定义相似度函数
17
+ def get_acc(onnx_out,other_out):
18
+ cosine_similarity=np.dot(np.array(onnx_out),np.array(other_out))/(np.linalg.norm(np.array(onnx_out)) * np.linalg.norm(np.array(other_out)))
19
+ return cosine_similarity
20
+
21
+ def cal_sigmoid(x):
22
+ return 1 / (1 + np.exp(-x))
23
+
24
+ class qnn_predict(object):
25
+ def __init__(self,inputshape,outputshape,args) -> None:
26
+ aidlite.set_log_level(aidlite.LogLevel.INFO)
27
+ aidlite.log_to_stderr()
28
+ print(f"Aidlite library version : {aidlite.get_library_version()}")
29
+ print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
30
+ config = aidlite.Config.create_instance()
31
+ if config is None:
32
+ print("Create model failed !")
33
+ config.implement_type = aidlite.ImplementType.TYPE_LOCAL
34
+ config.framework_type = aidlite.FrameworkType.TYPE_QNN
35
+ config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
36
+ config.is_quantify_model = 1
37
+
38
+ model = aidlite.Model.create_instance(args.target_model)
39
+ if model is None:
40
+ print("Create model failed !")
41
+
42
+ self.input_shape=inputshape
43
+ self.out_shape = outputshape
44
+ model.set_model_properties(self.input_shape, aidlite.DataType.TYPE_FLOAT32, self.out_shape, aidlite.DataType.TYPE_FLOAT32)
45
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
46
+ if self.interpreter is None:
47
+ print("build_interpretper_from_model_and_config failed !")
48
+ result = self.interpreter.init()
49
+ if result != 0:
50
+ print(f"interpreter init failed !")
51
+ result = self.interpreter.load_model()
52
+ if result != 0:
53
+ print("interpreter load model failed !")
54
+ print("detect model load success!")
55
+
56
+ self.conf = 0.4
57
+ self.iou=0.9
58
+ self.size = 640
59
+ self.agnostic_nms=False
60
+ self.max_det = 300
61
+ self.names=['object']
62
+ self.classes =None
63
+ self.retina_masks=True
64
+
65
+ def pretreat_img(self,img):
66
+ scale = 1/255.
67
+ img_size = cv2.resize(img, (self.size,self.size), interpolation=cv2.INTER_LINEAR)
68
+ float_img = img_size.astype('float32')
69
+ float_img = float_img* scale
70
+ float_img = float_img[:, :, ::-1]
71
+ return float_img
72
+
73
+ def postprocess(self, preds, img, orig_imgs):
74
+ """TODO: filter by classes."""
75
+ p = non_max_suppression(torch.from_numpy(preds[0]),
76
+ self.conf,
77
+ self.iou,
78
+ agnostic=self.agnostic_nms,
79
+ max_det=self.max_det,
80
+ nc=len(self.names),
81
+ classes=self.classes)
82
+
83
+ results = []
84
+ if len(p) == 0 or len(p[0]) == 0:
85
+ print("No object detected.")
86
+ return results
87
+
88
+ full_box = torch.zeros_like(p[0][0])
89
+ full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
90
+ full_box = full_box.view(1, -1)
91
+ critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
92
+ if critical_iou_index.numel() != 0:
93
+ full_box[0][4] = p[0][critical_iou_index][:,4]
94
+ full_box[0][6:] = p[0][critical_iou_index][:,6:]
95
+ p[0][critical_iou_index] = full_box
96
+
97
+ #proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
98
+ proto=torch.from_numpy(preds[-1])
99
+ for i, pred in enumerate(p):
100
+ orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
101
+ path =img[0] #self.batch[0]
102
+ img_path = path[i] if isinstance(path, list) else path
103
+ if not len(pred): # save empty boxes
104
+ results.append(Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6]))
105
+ continue
106
+ if self.retina_masks:
107
+ if not isinstance(orig_imgs, torch.Tensor):
108
+ pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
109
+ masks = process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
110
+ else:
111
+ masks = process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
112
+ if not isinstance(orig_imgs, torch.Tensor):
113
+ pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
114
+ results.append(
115
+ Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6], masks=masks))
116
+ return results
117
+
118
+ def qnn_run(self, orig_imgs,img_path,args):
119
+ input_img_f =self.pretreat_img(orig_imgs) # 图片resize HWC
120
+ # print("qnn_input:",input_img_f)
121
+ # encoder texts
122
+ input_img = np.expand_dims(input_img_f, 0)
123
+
124
+ invoke_time=[]
125
+ for i in range(args.invoke_nums):
126
+ result = self.interpreter.set_input_tensor(0, input_img.data)
127
+ t0 = time.time()
128
+ result = self.interpreter.invoke()
129
+ t1 = time.time()
130
+ cost_time=(t1-t0)*1000
131
+ invoke_time.append(cost_time)
132
+ mask_ = self.interpreter.get_output_tensor(0)
133
+ concat_ = self.interpreter.get_output_tensor(1)
134
+ mul_ = self.interpreter.get_output_tensor(3)
135
+ split_ = self.interpreter.get_output_tensor(2)
136
+ mask_ = mask_.reshape( * self.out_shape[3])
137
+ mask_=mask_.transpose((0, 3, 1,2))
138
+ concat_ = concat_.reshape( *self.out_shape[2])
139
+ mul_ = mul_.reshape( *self.out_shape[1])
140
+ split_ = split_.reshape( *self.out_shape[0])
141
+ sig_ = cal_sigmoid(split_)
142
+
143
+ output_concat = np.concatenate((mul_,sig_),axis=1)
144
+ output_concat = np.concatenate((output_concat,concat_),axis=1)
145
+
146
+ # outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
147
+ ## time 统计
148
+ max_invoke_time = max(invoke_time)
149
+ min_invoke_time = min(invoke_time)
150
+ mean_invoke_time = sum(invoke_time)/args.invoke_nums
151
+ var_invoketime=np.var(invoke_time)
152
+ print("========================================")
153
+ print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
154
+ print("========================================")
155
+
156
+ qnn_out = [np.array(output_concat),np.array(mask_)]
157
+ # print("qnn predict out:",qnn_out)
158
+
159
+ nchw_img = input_img.transpose(0,3,1,2)
160
+ everything_results = self.postprocess( qnn_out, nchw_img, [orig_imgs])
161
+ # print("everything_results: ",everything_results)
162
+
163
+ prompt_process = FastSAMPrompt(args.imgs, everything_results, device="cpu")
164
+
165
+ # ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
166
+ try:
167
+ if args.point_prompt ==[[0,0]]:
168
+ ann = prompt_process.everything_prompt()
169
+ else:
170
+ ann = prompt_process.point_prompt(points=args.point_prompt, pointlabel=[1])
171
+ out_name = os.path.basename(img_path).split(".")[0]
172
+ if True: # savepic
173
+ outpath = "python/"
174
+ if not os.path.exists(outpath):
175
+ os.mkdir(outpath)
176
+ prompt_process.plot(
177
+ annotations=ann,
178
+ output_path=os.path.join(outpath,out_name+"_result.jpg"),
179
+ mask_random_color=True,
180
+ better_quality=True,
181
+ retina=False,
182
+ withContours=True,
183
+ )
184
+ else:
185
+ plt.figure()
186
+ prompt_process.fast_show_mask(annotation=ann,
187
+ ax = plt)
188
+ except Exception as e:
189
+ print(f"Waning : An error occurred in the picture {img_path} prediction -{e}")
190
+ return [mask_.reshape(-1),output_concat.reshape(-1)]
191
+
192
+
193
+
194
+ def parser_args():
195
+ parser = argparse.ArgumentParser(description="Run model benchmarks")
196
+ parser.add_argument('--target_model',type=str,default='models/cutoff_fastsam_s_fp16.qnn216.ctx.bin',help="inference model path")
197
+ parser.add_argument('--source_model',type=str,default='models/fastsam_s.onnx',help="original model path")
198
+ parser.add_argument('--imgs',type=str,default='python/dogs.jpg',help="Predict images path")
199
+ parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
200
+ parser.add_argument('--point_prompt',type=str,default="[[0,0]]",help="example:[[x1,y1],[x2,y2]]")
201
+ args = parser.parse_args()
202
+ return args
203
+
204
+
205
+ if __name__ == "__main__":
206
+ args = parser_args()
207
+ inputshape=[[1,640,640,3]]
208
+ outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
209
+ args.point_prompt = ast.literal_eval(args.point_prompt)
210
+
211
+ predict = qnn_predict(inputshape,outputshape,args)
212
+ if os.path.isdir(args.imgs):
213
+ img_files = os.listdir(args.imgs)
214
+ for fi in img_files:
215
+ img_path = os.path.join(args.imgs,fi)
216
+ im0s = cv2.imread(img_path) # BGR
217
+ im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
218
+ predict.qnn_run(im0s,img_path,args)
219
+ else:
220
+ img_path = args.imgs
221
+ im0s = cv2.imread(img_path) # BGR
222
+ im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
223
+ qnn_result = predict.qnn_run(im0s,img_path,args)
224
+ print("Prediction completion and the results are saved !")
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/tools_pt.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import time
3
+ import torch
4
+ import torchvision
5
+ import torch.nn.functional as F
6
+
7
+
8
+
9
+ def clip_boxes(boxes, shape):
10
+ """
11
+ Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
12
+
13
+ Args:
14
+ boxes (torch.Tensor): the bounding boxes to clip
15
+ shape (tuple): the shape of the image
16
+ """
17
+ if isinstance(boxes, torch.Tensor): # faster individually
18
+ boxes[..., 0].clamp_(0, shape[1]) # x1
19
+ boxes[..., 1].clamp_(0, shape[0]) # y1
20
+ boxes[..., 2].clamp_(0, shape[1]) # x2
21
+ boxes[..., 3].clamp_(0, shape[0]) # y2
22
+ else: # np.array (faster grouped)
23
+ boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
24
+ boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
25
+
26
+ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
27
+ """
28
+ Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
29
+ (img1_shape) to the shape of a different image (img0_shape).
30
+
31
+ Args:
32
+ img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
33
+ boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
34
+ img0_shape (tuple): the shape of the target image, in the format of (height, width).
35
+ ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
36
+ calculated based on the size difference between the two images.
37
+ padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
38
+ rescaling.
39
+
40
+ Returns:
41
+ boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
42
+ """
43
+ if ratio_pad is None: # calculate from img0_shape
44
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
45
+ pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
46
+ (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding
47
+ else:
48
+ gain = ratio_pad[0][0]
49
+ pad = ratio_pad[1]
50
+
51
+ if padding:
52
+ boxes[..., [0, 2]] -= pad[0] # x padding
53
+ boxes[..., [1, 3]] -= pad[1] # y padding
54
+ boxes[..., :4] /= gain
55
+ clip_boxes(boxes, img0_shape)
56
+ return boxes
57
+
58
+
59
+ def xywh2xyxy(x):
60
+ """
61
+ Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
62
+ top-left corner and (x2, y2) is the bottom-right corner.
63
+
64
+ Args:
65
+ x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
66
+
67
+ Returns:
68
+ y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
69
+ """
70
+ assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
71
+ y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
72
+ dw = x[..., 2] / 2 # half-width
73
+ dh = x[..., 3] / 2 # half-height
74
+ y[..., 0] = x[..., 0] - dw # top left x
75
+ y[..., 1] = x[..., 1] - dh # top left y
76
+ y[..., 2] = x[..., 0] + dw # bottom right x
77
+ y[..., 3] = x[..., 1] + dh # bottom right y
78
+ return y
79
+
80
+
81
+ def non_max_suppression(
82
+ prediction,
83
+ conf_thres=0.25,
84
+ iou_thres=0.45,
85
+ classes=None,
86
+ agnostic=False,
87
+ multi_label=False,
88
+ labels=(),
89
+ max_det=300,
90
+ nc=0, # number of classes (optional)
91
+ max_time_img=0.05,
92
+ max_nms=30000,
93
+ max_wh=7680,
94
+ ):
95
+ """
96
+ Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
97
+
98
+ Args:
99
+ prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
100
+ containing the predicted boxes, classes, and masks. The tensor should be in the format
101
+ output by a model, such as YOLO.
102
+ conf_thres (float): The confidence threshold below which boxes will be filtered out.
103
+ Valid values are between 0.0 and 1.0.
104
+ iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
105
+ Valid values are between 0.0 and 1.0.
106
+ classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
107
+ agnostic (bool): If True, the model is agnostic to the number of classes, and all
108
+ classes will be considered as one.
109
+ multi_label (bool): If True, each box may have multiple labels.
110
+ labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
111
+ list contains the apriori labels for a given image. The list should be in the format
112
+ output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
113
+ max_det (int): The maximum number of boxes to keep after NMS.
114
+ nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
115
+ max_time_img (float): The maximum time (seconds) for processing one image.
116
+ max_nms (int): The maximum number of boxes into torchvision.ops.nms().
117
+ max_wh (int): The maximum box width and height in pixels
118
+
119
+ Returns:
120
+ (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
121
+ shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
122
+ (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
123
+ """
124
+
125
+ # Checks
126
+ assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
127
+ assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
128
+ if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
129
+ prediction = prediction[0] # select only inference output
130
+
131
+ device = prediction.device
132
+ mps = 'mps' in device.type # Apple MPS
133
+ if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
134
+ prediction = prediction.cpu()
135
+ bs = prediction.shape[0] # batch size
136
+ nc = nc or (prediction.shape[1] - 4) # number of classes
137
+ nm = prediction.shape[1] - nc - 4
138
+ mi = 4 + nc # mask start index
139
+ xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
140
+
141
+ # Settings
142
+ # min_wh = 2 # (pixels) minimum box width and height
143
+ time_limit = 0.5 + max_time_img * bs # seconds to quit after
144
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
145
+
146
+ prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
147
+ prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
148
+
149
+ t = time.time()
150
+ output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
151
+ for xi, x in enumerate(prediction): # image index, image inference
152
+ # Apply constraints
153
+ # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
154
+ x = x[xc[xi]] # confidence
155
+
156
+ # Cat apriori labels if autolabelling
157
+ if labels and len(labels[xi]):
158
+ lb = labels[xi]
159
+ v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
160
+ v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
161
+ v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
162
+ x = torch.cat((x, v), 0)
163
+
164
+ # If none remain process next image
165
+ if not x.shape[0]:
166
+ continue
167
+
168
+ # Detections matrix nx6 (xyxy, conf, cls)
169
+ box, cls, mask = x.split((4, nc, nm), 1)
170
+
171
+ if multi_label:
172
+ i, j = torch.where(cls > conf_thres)
173
+ x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
174
+ else: # best class only
175
+ conf, j = cls.max(1, keepdim=True)
176
+ x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
177
+
178
+ # Filter by class
179
+ if classes is not None:
180
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
181
+
182
+ # Check shape
183
+ n = x.shape[0] # number of boxes
184
+ if not n: # no boxes
185
+ continue
186
+ if n > max_nms: # excess boxes
187
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
188
+
189
+ # Batched NMS
190
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
191
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
192
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
193
+ i = i[:max_det] # limit detections
194
+
195
+ # # Experimental
196
+ # merge = False # use merge-NMS
197
+ # if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
198
+ # # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
199
+ # from .metrics import box_iou
200
+ # iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
201
+ # weights = iou * scores[None] # box weights
202
+ # x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
203
+ # redundant = True # require redundant detections
204
+ # if redundant:
205
+ # i = i[iou.sum(1) > 1] # require redundancy
206
+
207
+ output[xi] = x[i]
208
+ if mps:
209
+ output[xi] = output[xi].to(device)
210
+ # if (time.time() - t) > time_limit:
211
+ # LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
212
+ # break # time limit exceeded
213
+
214
+ return output
215
+
216
+
217
+ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
218
+ '''Adjust bounding boxes to stick to image border if they are within a certain threshold.
219
+ Args:
220
+ boxes: (n, 4)
221
+ image_shape: (height, width)
222
+ threshold: pixel threshold
223
+ Returns:
224
+ adjusted_boxes: adjusted bounding boxes
225
+ '''
226
+
227
+ # Image dimensions
228
+ h, w = image_shape
229
+
230
+ # Adjust boxes
231
+ boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
232
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
233
+ boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
234
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
235
+ boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
236
+ w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
237
+ boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
238
+ h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
239
+
240
+ return boxes
241
+
242
+ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
243
+ '''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
244
+ Args:
245
+ box1: (4, )
246
+ boxes: (n, 4)
247
+ Returns:
248
+ high_iou_indices: Indices of boxes with IoU > thres
249
+ '''
250
+ boxes = adjust_bboxes_to_image_border(boxes, image_shape)
251
+ # obtain coordinates for intersections
252
+ x1 = torch.max(box1[0], boxes[:, 0])
253
+ y1 = torch.max(box1[1], boxes[:, 1])
254
+ x2 = torch.min(box1[2], boxes[:, 2])
255
+ y2 = torch.min(box1[3], boxes[:, 3])
256
+
257
+ # compute the area of intersection
258
+ intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
259
+
260
+ # compute the area of both individual boxes
261
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
262
+ box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
263
+
264
+ # compute the area of union
265
+ union = box1_area + box2_area - intersection
266
+
267
+ # compute the IoU
268
+ iou = intersection / union # Should be shape (n, )
269
+ if raw_output:
270
+ if iou.numel() == 0:
271
+ return 0
272
+ return iou
273
+
274
+ # get indices of boxes with IoU > thres
275
+ high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
276
+
277
+ return high_iou_indices
278
+
279
+
280
+ def scale_masks(masks, shape, padding=True):
281
+ """
282
+ Rescale segment masks to shape.
283
+
284
+ Args:
285
+ masks (torch.Tensor): (N, C, H, W).
286
+ shape (tuple): Height and width.
287
+ padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
288
+ rescaling.
289
+ """
290
+ mh, mw = masks.shape[2:]
291
+ gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
292
+ pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding
293
+ if padding:
294
+ pad[0] /= 2
295
+ pad[1] /= 2
296
+ top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # y, x
297
+ bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
298
+ masks = masks[..., top:bottom, left:right]
299
+
300
+ masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
301
+ return masks
302
+
303
+
304
+ def process_mask_native(protos, masks_in, bboxes, shape):
305
+ """
306
+ It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
307
+
308
+ Args:
309
+ protos (torch.Tensor): [mask_dim, mask_h, mask_w]
310
+ masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
311
+ bboxes (torch.Tensor): [n, 4], n is number of masks after nms
312
+ shape (tuple): the size of the input image (h,w)
313
+
314
+ Returns:
315
+ masks (torch.Tensor): The returned masks with dimensions [h, w, n]
316
+ """
317
+ c, mh, mw = protos.shape # CHW
318
+ masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
319
+ masks = scale_masks(masks[None], shape)[0] # CHW
320
+ masks = crop_mask(masks, bboxes) # CHW
321
+ return masks.gt_(0.5)
322
+
323
+ def crop_mask(masks, boxes):
324
+ """
325
+ It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
326
+
327
+ Args:
328
+ masks (torch.Tensor): [n, h, w] tensor of masks
329
+ boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
330
+
331
+ Returns:
332
+ (torch.Tensor): The masks are being cropped to the bounding box.
333
+ """
334
+ _, h, w = masks.shape
335
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
336
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
337
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
338
+
339
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
340
+
341
+ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
342
+ """
343
+ Apply masks to bounding boxes using the output of the mask head.
344
+
345
+ Args:
346
+ protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
347
+ masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
348
+ bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
349
+ shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
350
+ upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
351
+
352
+ Returns:
353
+ (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
354
+ are the height and width of the input image. The mask is applied to the bounding boxes.
355
+ """
356
+
357
+ c, mh, mw = protos.shape # CHW
358
+ ih, iw = shape
359
+ masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
360
+
361
+ downsampled_bboxes = bboxes.clone()
362
+ downsampled_bboxes[:, 0] *= mw / iw
363
+ downsampled_bboxes[:, 2] *= mw / iw
364
+ downsampled_bboxes[:, 3] *= mh / ih
365
+ downsampled_bboxes[:, 1] *= mh / ih
366
+
367
+ masks = crop_mask(masks, downsampled_bboxes) # CHW
368
+ if upsample:
369
+ masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
370
+ return masks.gt_(0.5)
371
+
372
+
model_farm_fastsams_qsc8550_qnn2.16_fp16_aidlite/python/utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from PIL import Image
4
+
5
+
6
+ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
7
+ '''Adjust bounding boxes to stick to image border if they are within a certain threshold.
8
+ Args:
9
+ boxes: (n, 4)
10
+ image_shape: (height, width)
11
+ threshold: pixel threshold
12
+ Returns:
13
+ adjusted_boxes: adjusted bounding boxes
14
+ '''
15
+
16
+ # Image dimensions
17
+ h, w = image_shape
18
+
19
+ # Adjust boxes
20
+ boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
21
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
22
+ boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
23
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
24
+ boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
25
+ w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
26
+ boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
27
+ h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
28
+
29
+ return boxes
30
+
31
+
32
+
33
+ def convert_box_xywh_to_xyxy(box):
34
+ x1 = box[0]
35
+ y1 = box[1]
36
+ x2 = box[0] + box[2]
37
+ y2 = box[1] + box[3]
38
+ return [x1, y1, x2, y2]
39
+
40
+
41
+ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
42
+ '''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
43
+ Args:
44
+ box1: (4, )
45
+ boxes: (n, 4)
46
+ Returns:
47
+ high_iou_indices: Indices of boxes with IoU > thres
48
+ '''
49
+ boxes = adjust_bboxes_to_image_border(boxes, image_shape)
50
+ # obtain coordinates for intersections
51
+ x1 = torch.max(box1[0], boxes[:, 0])
52
+ y1 = torch.max(box1[1], boxes[:, 1])
53
+ x2 = torch.min(box1[2], boxes[:, 2])
54
+ y2 = torch.min(box1[3], boxes[:, 3])
55
+
56
+ # compute the area of intersection
57
+ intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
58
+
59
+ # compute the area of both individual boxes
60
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
61
+ box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
62
+
63
+ # compute the area of union
64
+ union = box1_area + box2_area - intersection
65
+
66
+ # compute the IoU
67
+ iou = intersection / union # Should be shape (n, )
68
+ if raw_output:
69
+ if iou.numel() == 0:
70
+ return 0
71
+ return iou
72
+
73
+ # get indices of boxes with IoU > thres
74
+ high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
75
+
76
+ return high_iou_indices
77
+
78
+
79
+ def image_to_np_ndarray(image):
80
+ if type(image) is str:
81
+ return np.array(Image.open(image))
82
+ elif issubclass(type(image), Image.Image):
83
+ return np.array(image)
84
+ elif type(image) is np.ndarray:
85
+ return image
86
+ return None
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/README.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Model Information
2
+ ## Source model
3
+ - Input shape: 640x640
4
+ - Number of parameters: 11.24M
5
+ - Model size: 45.0M
6
+ - Output shape: 1x37x8400,1x32x160x160
7
+
8
+ Source model repository: [FastSAM](https://github.com/CASIA-IVA-Lab/FastSAM)
9
+
10
+ ### Converted model
11
+
12
+ - Precision: INT8
13
+ - Backend: QNN2.16
14
+ - Target Device: SNM972 QCS8550
15
+
16
+ ## Inference with AidLite SDK
17
+
18
+ ### SDK installation
19
+ Model Farm uses AidLite SDK as the model inference SDK. For details, please refer to the [AidLite Developer Documentation](https://v2.docs.aidlux.com/en/sdk-api/aidlite-sdk/)
20
+
21
+ - Install AidLite SDK
22
+
23
+ ```bash
24
+ # Install the appropriate version of the aidlite sdk
25
+ sudo aid-pkg update
26
+ sudo aid-pkg install aidlite-sdk
27
+ # Download the qnn version that matches the above backend. Eg Install QNN2.23 Aidlite: sudo aid-pkg install aidlite-qnn223
28
+ sudo aid-pkg install aidlite-{QNN VERSION}
29
+ ```
30
+
31
+ - Verify AidLite SDK
32
+
33
+ ```bash
34
+ # aidlite sdk c++ check
35
+ python3 -c "import aidlite ; print(aidlite.get_library_version())"
36
+
37
+ # aidlite sdk python check
38
+ python3 -c "import aidlite ; print(aidlite.get_py_library_version())"
39
+ ```
40
+
41
+ ### Run demo
42
+ ```bash
43
+ cd fastsam_s/model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite
44
+ export LD_PRELOAD=/home/aidlux/.local/lib/python3.8/site-packages/torch/lib/../../torch.libs/libgomp-804f19d4.so.1.0.0
45
+
46
+ python3 ./python/run_test.py --target_model ./models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin --imgs ./python/dogs.jpg --invoke_nums 10
47
+ ```
48
+
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e44ab88e5fd143d0c5e80c8a03955f7411cbd0d61e36e99c03c7eaf32c43e5f
3
+ size 12363280
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/dogs.jpg ADDED
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/onnx_export.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import cv2
3
+ import os
4
+ import sys
5
+
6
+ from ultralytics.models.fastsam import FastSAM
7
+
8
+ class Fast_SAM(torch.nn.Module):
9
+ """Exportable FastSAM model, end-to-end."""
10
+
11
+ def __init__(self) -> None:
12
+ super().__init__()
13
+ pt_name ='./models/FastSAM-s.pt'
14
+ self.model =FastSAM(pt_name).model
15
+
16
+ def forward(self, image: torch.Tensor):
17
+ """
18
+ Run FastSAM on `image`, and produce high quality segmentation masks.
19
+ Faster than SAM as it is based on YOLOv8.
20
+
21
+ Parameters:
22
+ image: Pixel values pre-processed for encoder consumption.
23
+ Range: float[0, 1]
24
+ 3-channel Color Space: BGR
25
+ Returns:
26
+
27
+ """
28
+ predictions = self.model(image)
29
+ # Return predictions as a tuple instead of nested tuple.
30
+ return (predictions[0], predictions[1][2])
31
+
32
+
33
+ model = Fast_SAM()
34
+ num_params = sum(p.numel() for p in model.parameters())
35
+ print(f'Number of FastSAM-s parameters: {num_params}')
36
+ dummy_input = torch.randn( [1,3,640,640],dtype=torch.float32 )
37
+ source_model = torch.jit.trace(
38
+ model.to("cpu"), dummy_input, check_trace=False
39
+ )
40
+ torch.onnx.export(model, # model being run
41
+ dummy_input, # model input (or a tuple for multiple inputs)
42
+ "./models/fastsam_s.onnx", # where to save the model
43
+ export_params=True, # store the trained parameter weights inside the model file
44
+ opset_version=12, # the ONNX version to export the model to
45
+ do_constant_folding=True, # whether to execute constant folding for optimization
46
+ input_names = ['input'], # the model's input names
47
+ output_names = ['boxes','mask'],
48
+ verbose=True,
49
+ )
50
+ print("Convert to onnx successfully!")
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/prompt.py ADDED
@@ -0,0 +1,456 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ import torch
7
+ from utils import image_to_np_ndarray
8
+ from PIL import Image
9
+
10
+
11
+ class FastSAMPrompt:
12
+
13
+ def __init__(self, image, results, device='cpu'):
14
+ if isinstance(image, str) or isinstance(image, Image.Image):
15
+ image = image_to_np_ndarray(image)
16
+ self.device = device
17
+ self.results = results
18
+ self.img = image
19
+
20
+ def _segment_image(self, image, bbox):
21
+ if isinstance(image, Image.Image):
22
+ image_array = np.array(image)
23
+ else:
24
+ image_array = image
25
+ segmented_image_array = np.zeros_like(image_array)
26
+ x1, y1, x2, y2 = bbox
27
+ segmented_image_array[y1:y2, x1:x2] = image_array[y1:y2, x1:x2]
28
+ segmented_image = Image.fromarray(segmented_image_array)
29
+ black_image = Image.new('RGB', image.size, (255, 255, 255))
30
+ # transparency_mask = np.zeros_like((), dtype=np.uint8)
31
+ transparency_mask = np.zeros((image_array.shape[0], image_array.shape[1]), dtype=np.uint8)
32
+ transparency_mask[y1:y2, x1:x2] = 255
33
+ transparency_mask_image = Image.fromarray(transparency_mask, mode='L')
34
+ black_image.paste(segmented_image, mask=transparency_mask_image)
35
+ return black_image
36
+
37
+ def _format_results(self, result, filter=0):
38
+ annotations = []
39
+ n = len(result.masks.data)
40
+ for i in range(n):
41
+ annotation = {}
42
+ mask = result.masks.data[i] == 1.0
43
+
44
+ if torch.sum(mask) < filter:
45
+ continue
46
+ annotation['id'] = i
47
+ annotation['segmentation'] = mask.cpu().numpy()
48
+ annotation['bbox'] = result.boxes.data[i]
49
+ annotation['score'] = result.boxes.conf[i]
50
+ annotation['area'] = annotation['segmentation'].sum()
51
+ annotations.append(annotation)
52
+ return annotations
53
+
54
+ def filter_masks(annotations): # filte the overlap mask
55
+ annotations.sort(key=lambda x: x['area'], reverse=True)
56
+ to_remove = set()
57
+ for i in range(0, len(annotations)):
58
+ a = annotations[i]
59
+ for j in range(i + 1, len(annotations)):
60
+ b = annotations[j]
61
+ if i != j and j not in to_remove:
62
+ # check if
63
+ if b['area'] < a['area']:
64
+ if (a['segmentation'] & b['segmentation']).sum() / b['segmentation'].sum() > 0.8:
65
+ to_remove.add(j)
66
+
67
+ return [a for i, a in enumerate(annotations) if i not in to_remove], to_remove
68
+
69
+ def _get_bbox_from_mask(self, mask):
70
+ mask = mask.astype(np.uint8)
71
+ contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
72
+ x1, y1, w, h = cv2.boundingRect(contours[0])
73
+ x2, y2 = x1 + w, y1 + h
74
+ if len(contours) > 1:
75
+ for b in contours:
76
+ x_t, y_t, w_t, h_t = cv2.boundingRect(b)
77
+ # Merge multiple bounding boxes into one.
78
+ x1 = min(x1, x_t)
79
+ y1 = min(y1, y_t)
80
+ x2 = max(x2, x_t + w_t)
81
+ y2 = max(y2, y_t + h_t)
82
+ h = y2 - y1
83
+ w = x2 - x1
84
+ return [x1, y1, x2, y2]
85
+
86
+ def plot_to_result(self,
87
+ annotations,
88
+ bboxes=None,
89
+ points=None,
90
+ point_label=None,
91
+ mask_random_color=True,
92
+ better_quality=True,
93
+ retina=False,
94
+ withContours=True) -> np.ndarray:
95
+ if isinstance(annotations[0], dict):
96
+ annotations = [annotation['segmentation'] for annotation in annotations]
97
+ image = self.img
98
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
99
+ original_h = image.shape[0]
100
+ original_w = image.shape[1]
101
+ if sys.platform == "darwin":
102
+ plt.switch_backend("TkAgg")
103
+ plt.figure(figsize=(original_w / 100, original_h / 100))
104
+ # Add subplot with no margin.
105
+ plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)
106
+ plt.margins(0, 0)
107
+ plt.gca().xaxis.set_major_locator(plt.NullLocator())
108
+ plt.gca().yaxis.set_major_locator(plt.NullLocator())
109
+
110
+ plt.imshow(image)
111
+ if better_quality:
112
+ if isinstance(annotations[0], torch.Tensor):
113
+ annotations = np.array(annotations.cpu())
114
+ for i, mask in enumerate(annotations):
115
+ mask = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8))
116
+ annotations[i] = cv2.morphologyEx(mask.astype(np.uint8), cv2.MORPH_OPEN, np.ones((8, 8), np.uint8))
117
+ if self.device == 'cpu':
118
+ annotations = np.array(annotations)
119
+ self.fast_show_mask(
120
+ annotations,
121
+ plt.gca(),
122
+ random_color=mask_random_color,
123
+ bboxes=bboxes,
124
+ points=points,
125
+ pointlabel=point_label,
126
+ retinamask=retina,
127
+ target_height=original_h,
128
+ target_width=original_w,
129
+ )
130
+ else:
131
+ if isinstance(annotations[0], np.ndarray):
132
+ annotations = torch.from_numpy(annotations)
133
+ self.fast_show_mask_gpu(
134
+ annotations,
135
+ plt.gca(),
136
+ random_color=mask_random_color,
137
+ bboxes=bboxes,
138
+ points=points,
139
+ pointlabel=point_label,
140
+ retinamask=retina,
141
+ target_height=original_h,
142
+ target_width=original_w,
143
+ )
144
+ if isinstance(annotations, torch.Tensor):
145
+ annotations = annotations.cpu().numpy()
146
+ if withContours:
147
+ contour_all = []
148
+ temp = np.zeros((original_h, original_w, 1))
149
+ for i, mask in enumerate(annotations):
150
+ if type(mask) == dict:
151
+ mask = mask['segmentation']
152
+ annotation = mask.astype(np.uint8)
153
+ if not retina:
154
+ annotation = cv2.resize(
155
+ annotation,
156
+ (original_w, original_h),
157
+ interpolation=cv2.INTER_NEAREST,
158
+ )
159
+ contours, hierarchy = cv2.findContours(annotation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
160
+ for contour in contours:
161
+ contour_all.append(contour)
162
+ cv2.drawContours(temp, contour_all, -1, (255, 255, 255), 2)
163
+ color = np.array([0 / 255, 0 / 255, 255 / 255, 0.8])
164
+ contour_mask = temp / 255 * color.reshape(1, 1, -1)
165
+ plt.imshow(contour_mask)
166
+
167
+ plt.axis('off')
168
+ fig = plt.gcf()
169
+ plt.draw()
170
+
171
+ try:
172
+ buf = fig.canvas.tostring_rgb()
173
+ except AttributeError:
174
+ fig.canvas.draw()
175
+ buf = fig.canvas.tostring_rgb()
176
+ cols, rows = fig.canvas.get_width_height()
177
+ img_array = np.frombuffer(buf, dtype=np.uint8).reshape(rows, cols, 3)
178
+ result = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
179
+ plt.close()
180
+ return result
181
+
182
+ # Remark for refactoring: IMO a function should do one thing only, storing the image and plotting should be seperated and do not necessarily need to be class functions but standalone utility functions that the user can chain in his scripts to have more fine-grained control.
183
+ def plot(self,
184
+ annotations,
185
+ output_path,
186
+ bboxes=None,
187
+ points=None,
188
+ point_label=None,
189
+ mask_random_color=True,
190
+ better_quality=True,
191
+ retina=False,
192
+ withContours=True):
193
+ if len(annotations) == 0:
194
+ return None
195
+ result = self.plot_to_result(
196
+ annotations,
197
+ bboxes,
198
+ points,
199
+ point_label,
200
+ mask_random_color,
201
+ better_quality,
202
+ retina,
203
+ withContours,
204
+ )
205
+
206
+ path = os.path.dirname(os.path.abspath(output_path))
207
+ if not os.path.exists(path):
208
+ os.makedirs(path)
209
+ result = result[:, :, ::-1]
210
+ cv2.imwrite(output_path, result)
211
+
212
+ # CPU post process
213
+ def fast_show_mask(
214
+ self,
215
+ annotation,
216
+ ax,
217
+ random_color=False,
218
+ bboxes=None,
219
+ points=None,
220
+ pointlabel=None,
221
+ retinamask=True,
222
+ target_height=960,
223
+ target_width=960,
224
+ ):
225
+ msak_sum = annotation.shape[0]
226
+ height = annotation.shape[1]
227
+ weight = annotation.shape[2]
228
+ #Sort annotations based on area.
229
+ areas = np.sum(annotation, axis=(1, 2))
230
+ sorted_indices = np.argsort(areas)
231
+ annotation = annotation[sorted_indices]
232
+
233
+ index = (annotation != 0).argmax(axis=0)
234
+ if random_color:
235
+ color = np.random.random((msak_sum, 1, 1, 3))
236
+ else:
237
+ color = np.ones((msak_sum, 1, 1, 3)) * np.array([30 / 255, 144 / 255, 255 / 255])
238
+ transparency = np.ones((msak_sum, 1, 1, 1)) * 0.6
239
+ visual = np.concatenate([color, transparency], axis=-1)
240
+ mask_image = np.expand_dims(annotation, -1) * visual
241
+
242
+ show = np.zeros((height, weight, 4))
243
+ h_indices, w_indices = np.meshgrid(np.arange(height), np.arange(weight), indexing='ij')
244
+ indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
245
+ # Use vectorized indexing to update the values of 'show'.
246
+ show[h_indices, w_indices, :] = mask_image[indices]
247
+ if bboxes is not None:
248
+ for bbox in bboxes:
249
+ x1, y1, x2, y2 = bbox
250
+ ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
251
+ # draw point
252
+ if points is not None:
253
+ plt.scatter(
254
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
255
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
256
+ s=20,
257
+ c='y',
258
+ )
259
+ plt.scatter(
260
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
261
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
262
+ s=20,
263
+ c='m',
264
+ )
265
+
266
+ if not retinamask:
267
+ show = cv2.resize(show, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
268
+ ax.imshow(show)
269
+
270
+ def fast_show_mask_gpu(
271
+ self,
272
+ annotation,
273
+ ax,
274
+ random_color=False,
275
+ bboxes=None,
276
+ points=None,
277
+ pointlabel=None,
278
+ retinamask=True,
279
+ target_height=960,
280
+ target_width=960,
281
+ ):
282
+ msak_sum = annotation.shape[0]
283
+ height = annotation.shape[1]
284
+ weight = annotation.shape[2]
285
+ areas = torch.sum(annotation, dim=(1, 2))
286
+ sorted_indices = torch.argsort(areas, descending=False)
287
+ annotation = annotation[sorted_indices]
288
+ # Find the index of the first non-zero value at each position.
289
+ index = (annotation != 0).to(torch.long).argmax(dim=0)
290
+ if random_color:
291
+ color = torch.rand((msak_sum, 1, 1, 3)).to(annotation.device)
292
+ else:
293
+ color = torch.ones((msak_sum, 1, 1, 3)).to(annotation.device) * torch.tensor([
294
+ 30 / 255, 144 / 255, 255 / 255]).to(annotation.device)
295
+ transparency = torch.ones((msak_sum, 1, 1, 1)).to(annotation.device) * 0.6
296
+ visual = torch.cat([color, transparency], dim=-1)
297
+ mask_image = torch.unsqueeze(annotation, -1) * visual
298
+ # Select data according to the index. The index indicates which batch's data to choose at each position, converting the mask_image into a single batch form.
299
+ show = torch.zeros((height, weight, 4)).to(annotation.device)
300
+ try:
301
+ h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight), indexing='ij')
302
+ except:
303
+ h_indices, w_indices = torch.meshgrid(torch.arange(height), torch.arange(weight))
304
+ indices = (index[h_indices, w_indices], h_indices, w_indices, slice(None))
305
+ # Use vectorized indexing to update the values of 'show'.
306
+ show[h_indices, w_indices, :] = mask_image[indices]
307
+ show_cpu = show.cpu().numpy()
308
+ if bboxes is not None:
309
+ for bbox in bboxes:
310
+ x1, y1, x2, y2 = bbox
311
+ ax.add_patch(plt.Rectangle((x1, y1), x2 - x1, y2 - y1, fill=False, edgecolor='b', linewidth=1))
312
+ # draw point
313
+ if points is not None:
314
+ plt.scatter(
315
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 1],
316
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 1],
317
+ s=20,
318
+ c='y',
319
+ )
320
+ plt.scatter(
321
+ [point[0] for i, point in enumerate(points) if pointlabel[i] == 0],
322
+ [point[1] for i, point in enumerate(points) if pointlabel[i] == 0],
323
+ s=20,
324
+ c='m',
325
+ )
326
+ if not retinamask:
327
+ show_cpu = cv2.resize(show_cpu, (target_width, target_height), interpolation=cv2.INTER_NEAREST)
328
+ ax.imshow(show_cpu)
329
+
330
+ # clip
331
+ @torch.no_grad()
332
+ def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
333
+ preprocessed_images = [preprocess(image).to(device) for image in elements]
334
+ try:
335
+ import clip # for linear_assignment
336
+
337
+ except (ImportError, AssertionError, AttributeError):
338
+ from ultralytics.yolo.utils.checks import check_requirements
339
+
340
+ check_requirements('git+https://github.com/openai/CLIP.git') # required before installing lap from source
341
+ import clip
342
+
343
+
344
+ tokenized_text = clip.tokenize([search_text]).to(device)
345
+ stacked_images = torch.stack(preprocessed_images)
346
+ image_features = model.encode_image(stacked_images)
347
+ text_features = model.encode_text(tokenized_text)
348
+ image_features /= image_features.norm(dim=-1, keepdim=True)
349
+ text_features /= text_features.norm(dim=-1, keepdim=True)
350
+ probs = 100.0 * image_features @ text_features.T
351
+ return probs[:, 0].softmax(dim=0)
352
+
353
+ def _crop_image(self, format_results):
354
+
355
+ image = Image.fromarray(cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB))
356
+ ori_w, ori_h = image.size
357
+ annotations = format_results
358
+ mask_h, mask_w = annotations[0]['segmentation'].shape
359
+ if ori_w != mask_w or ori_h != mask_h:
360
+ image = image.resize((mask_w, mask_h))
361
+ cropped_boxes = []
362
+ cropped_images = []
363
+ not_crop = []
364
+ filter_id = []
365
+ # annotations, _ = filter_masks(annotations)
366
+ # filter_id = list(_)
367
+ for _, mask in enumerate(annotations):
368
+ if np.sum(mask['segmentation']) <= 100:
369
+ filter_id.append(_)
370
+ continue
371
+ bbox = self._get_bbox_from_mask(mask['segmentation']) # mask 的 bbox
372
+ cropped_boxes.append(self._segment_image(image, bbox))
373
+ # cropped_boxes.append(segment_image(image,mask["segmentation"]))
374
+ cropped_images.append(bbox) # Save the bounding box of the cropped image.
375
+
376
+ return cropped_boxes, cropped_images, not_crop, filter_id, annotations
377
+
378
+ def box_prompt(self, bbox=None, bboxes=None):
379
+ if self.results == None:
380
+ return []
381
+ assert bbox or bboxes
382
+ if bboxes is None:
383
+ bboxes = [bbox]
384
+ max_iou_index = []
385
+ for bbox in bboxes:
386
+ assert (bbox[2] != 0 and bbox[3] != 0)
387
+ masks = self.results[0].masks.data
388
+ target_height = self.img.shape[0]
389
+ target_width = self.img.shape[1]
390
+ h = masks.shape[1]
391
+ w = masks.shape[2]
392
+ if h != target_height or w != target_width:
393
+ bbox = [
394
+ int(bbox[0] * w / target_width),
395
+ int(bbox[1] * h / target_height),
396
+ int(bbox[2] * w / target_width),
397
+ int(bbox[3] * h / target_height), ]
398
+ bbox[0] = round(bbox[0]) if round(bbox[0]) > 0 else 0
399
+ bbox[1] = round(bbox[1]) if round(bbox[1]) > 0 else 0
400
+ bbox[2] = round(bbox[2]) if round(bbox[2]) < w else w
401
+ bbox[3] = round(bbox[3]) if round(bbox[3]) < h else h
402
+
403
+ # IoUs = torch.zeros(len(masks), dtype=torch.float32)
404
+ bbox_area = (bbox[3] - bbox[1]) * (bbox[2] - bbox[0])
405
+
406
+ masks_area = torch.sum(masks[:, bbox[1]:bbox[3], bbox[0]:bbox[2]], dim=(1, 2))
407
+ orig_masks_area = torch.sum(masks, dim=(1, 2))
408
+
409
+ union = bbox_area + orig_masks_area - masks_area
410
+ IoUs = masks_area / union
411
+ max_iou_index.append(int(torch.argmax(IoUs)))
412
+ max_iou_index = list(set(max_iou_index))
413
+ return np.array(masks[max_iou_index].cpu().numpy())
414
+
415
+ def point_prompt(self, points, pointlabel): # numpy
416
+ if self.results == None:
417
+ return []
418
+ masks = self._format_results(self.results[0], 0)
419
+ target_height = self.img.shape[0]
420
+ target_width = self.img.shape[1]
421
+ h = masks[0]['segmentation'].shape[0]
422
+ w = masks[0]['segmentation'].shape[1]
423
+ if h != target_height or w != target_width:
424
+ points = [[int(point[0] * w / target_width), int(point[1] * h / target_height)] for point in points]
425
+ onemask = np.zeros((h, w))
426
+ masks = sorted(masks, key=lambda x: x['area'], reverse=True)
427
+ for i, annotation in enumerate(masks):
428
+ if type(annotation) == dict:
429
+ mask = annotation['segmentation']
430
+ else:
431
+ mask = annotation
432
+ for i, point in enumerate(points):
433
+ if mask[point[1], point[0]] == 1 and pointlabel[i] == 1:
434
+ onemask[mask] = 1
435
+ if mask[point[1], point[0]] == 1 and pointlabel[i] == 0:
436
+ onemask[mask] = 0
437
+ onemask = onemask >= 1
438
+ return np.array([onemask])
439
+
440
+ def text_prompt(self, text):
441
+ if self.results == None:
442
+ return []
443
+ format_results = self._format_results(self.results[0], 0)
444
+ cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
445
+ clip_model, preprocess = clip.load('ViT-B/32', device=self.device)
446
+ scores = self.retrieve(clip_model, preprocess, cropped_boxes, text, device=self.device)
447
+ max_idx = scores.argsort()
448
+ max_idx = max_idx[-1]
449
+ max_idx += sum(np.array(filter_id) <= int(max_idx))
450
+ return np.array([annotations[max_idx]['segmentation']])
451
+
452
+ def everything_prompt(self):
453
+ if self.results == None:
454
+ return []
455
+ return self.results[0].masks.data
456
+
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/run_test.py ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+ import onnxruntime
6
+ import time
7
+ import matplotlib.pyplot as plt
8
+ import torch
9
+ from ultralytics.engine.results import Results
10
+ from tools_pt import *
11
+ from prompt import FastSAMPrompt
12
+ import aidlite
13
+ import argparse
14
+ import ast
15
+
16
+ # 定义相似度函数
17
+ def get_acc(onnx_out,other_out):
18
+ cosine_similarity=np.dot(np.array(onnx_out),np.array(other_out))/(np.linalg.norm(np.array(onnx_out)) * np.linalg.norm(np.array(other_out)))
19
+ return cosine_similarity
20
+
21
+ def cal_sigmoid(x):
22
+ return 1 / (1 + np.exp(-x))
23
+
24
+ class qnn_predict(object):
25
+ def __init__(self,inputshape,outputshape,args) -> None:
26
+ aidlite.set_log_level(aidlite.LogLevel.INFO)
27
+ aidlite.log_to_stderr()
28
+ print(f"Aidlite library version : {aidlite.get_library_version()}")
29
+ print(f"Aidlite python library version : {aidlite.get_py_library_version()}")
30
+ config = aidlite.Config.create_instance()
31
+ if config is None:
32
+ print("Create model failed !")
33
+ config.implement_type = aidlite.ImplementType.TYPE_LOCAL
34
+ config.framework_type = aidlite.FrameworkType.TYPE_QNN
35
+ config.accelerate_type = aidlite.AccelerateType.TYPE_DSP
36
+ config.is_quantify_model = 1
37
+
38
+ model = aidlite.Model.create_instance(args.target_model)
39
+ if model is None:
40
+ print("Create model failed !")
41
+
42
+ self.input_shape=inputshape
43
+ self.out_shape = outputshape
44
+ model.set_model_properties(self.input_shape, aidlite.DataType.TYPE_FLOAT32, self.out_shape, aidlite.DataType.TYPE_FLOAT32)
45
+ self.interpreter = aidlite.InterpreterBuilder.build_interpretper_from_model_and_config(model, config)
46
+ if self.interpreter is None:
47
+ print("build_interpretper_from_model_and_config failed !")
48
+ result = self.interpreter.init()
49
+ if result != 0:
50
+ print(f"interpreter init failed !")
51
+ result = self.interpreter.load_model()
52
+ if result != 0:
53
+ print("interpreter load model failed !")
54
+ print("detect model load success!")
55
+
56
+ self.conf = 0.4
57
+ self.iou=0.9
58
+ self.size = 640
59
+ self.agnostic_nms=False
60
+ self.max_det = 300
61
+ self.names=['object']
62
+ self.classes =None
63
+ self.retina_masks=True
64
+
65
+ def pretreat_img(self,img):
66
+ scale = 1/255.
67
+ img_size = cv2.resize(img, (self.size,self.size), interpolation=cv2.INTER_LINEAR)
68
+ float_img = img_size.astype('float32')
69
+ float_img = float_img* scale
70
+ float_img = float_img[:, :, ::-1]
71
+ return float_img
72
+
73
+ def postprocess(self, preds, img, orig_imgs):
74
+ """TODO: filter by classes."""
75
+ p = non_max_suppression(torch.from_numpy(preds[0]),
76
+ self.conf,
77
+ self.iou,
78
+ agnostic=self.agnostic_nms,
79
+ max_det=self.max_det,
80
+ nc=len(self.names),
81
+ classes=self.classes)
82
+
83
+ results = []
84
+ if len(p) == 0 or len(p[0]) == 0:
85
+ print("No object detected.")
86
+ return results
87
+
88
+ full_box = torch.zeros_like(p[0][0])
89
+ full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
90
+ full_box = full_box.view(1, -1)
91
+ critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
92
+ if critical_iou_index.numel() != 0:
93
+ full_box[0][4] = p[0][critical_iou_index][:,4]
94
+ full_box[0][6:] = p[0][critical_iou_index][:,6:]
95
+ p[0][critical_iou_index] = full_box
96
+
97
+ #proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exported
98
+ proto=torch.from_numpy(preds[-1])
99
+ for i, pred in enumerate(p):
100
+ orig_img = orig_imgs[i] if isinstance(orig_imgs, list) else orig_imgs
101
+ path =img[0] #self.batch[0]
102
+ img_path = path[i] if isinstance(path, list) else path
103
+ if not len(pred): # save empty boxes
104
+ results.append(Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6]))
105
+ continue
106
+ if self.retina_masks:
107
+ if not isinstance(orig_imgs, torch.Tensor):
108
+ pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
109
+ masks = process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWC
110
+ else:
111
+ masks = process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWC
112
+ if not isinstance(orig_imgs, torch.Tensor):
113
+ pred[:, :4] = scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
114
+ results.append(
115
+ Results(orig_img=orig_img, path=img_path, names=self.names, boxes=pred[:, :6], masks=masks))
116
+ return results
117
+
118
+ def qnn_run(self, orig_imgs,img_path,args):
119
+ input_img_f =self.pretreat_img(orig_imgs) # 图片resize HWC
120
+ # print("qnn_input:",input_img_f)
121
+ # encoder texts
122
+ input_img = np.expand_dims(input_img_f, 0)
123
+
124
+ invoke_time=[]
125
+ for i in range(args.invoke_nums):
126
+ result = self.interpreter.set_input_tensor(0, input_img.data)
127
+ t0 = time.time()
128
+ result = self.interpreter.invoke()
129
+ t1 = time.time()
130
+ cost_time=(t1-t0)*1000
131
+ invoke_time.append(cost_time)
132
+ mask_ = self.interpreter.get_output_tensor(0)
133
+ concat_ = self.interpreter.get_output_tensor(1)
134
+ mul_ = self.interpreter.get_output_tensor(3)
135
+ split_ = self.interpreter.get_output_tensor(2)
136
+ mask_ = mask_.reshape( * self.out_shape[3])
137
+ mask_=mask_.transpose((0, 3, 1,2))
138
+ concat_ = concat_.reshape( *self.out_shape[2])
139
+ mul_ = mul_.reshape( *self.out_shape[1])
140
+ split_ = split_.reshape( *self.out_shape[0])
141
+ sig_ = cal_sigmoid(split_)
142
+
143
+ output_concat = np.concatenate((mul_,sig_),axis=1)
144
+ output_concat = np.concatenate((output_concat,concat_),axis=1)
145
+
146
+ # outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
147
+ ## time 统计
148
+ max_invoke_time = max(invoke_time)
149
+ min_invoke_time = min(invoke_time)
150
+ mean_invoke_time = sum(invoke_time)/args.invoke_nums
151
+ var_invoketime=np.var(invoke_time)
152
+ print("========================================")
153
+ print(f"QNN inference {args.invoke_nums} times :\n --mean_invoke_time is {mean_invoke_time} \n --max_invoke_time is {max_invoke_time} \n --min_invoke_time is {min_invoke_time} \n --var_invoketime is {var_invoketime}")
154
+ print("========================================")
155
+
156
+ qnn_out = [np.array(output_concat),np.array(mask_)]
157
+ # print("qnn predict out:",qnn_out)
158
+
159
+ nchw_img = input_img.transpose(0,3,1,2)
160
+ everything_results = self.postprocess( qnn_out, nchw_img, [orig_imgs])
161
+ # print("everything_results: ",everything_results)
162
+
163
+ prompt_process = FastSAMPrompt(args.imgs, everything_results, device="cpu")
164
+
165
+ # ann = prompt_process.point_prompt(points=[[620, 360]], pointlabel=[1])
166
+ try:
167
+ if args.point_prompt ==[[0,0]]:
168
+ ann = prompt_process.everything_prompt()
169
+ else:
170
+ ann = prompt_process.point_prompt(points=args.point_prompt, pointlabel=[1])
171
+ out_name = os.path.basename(img_path).split(".")[0]
172
+ if True: # savepic
173
+ outpath = "python/"
174
+ if not os.path.exists(outpath):
175
+ os.mkdir(outpath)
176
+ prompt_process.plot(
177
+ annotations=ann,
178
+ output_path=os.path.join(outpath,out_name+"_result.jpg"),
179
+ mask_random_color=True,
180
+ better_quality=True,
181
+ retina=False,
182
+ withContours=True,
183
+ )
184
+ else:
185
+ plt.figure()
186
+ prompt_process.fast_show_mask(annotation=ann,
187
+ ax = plt)
188
+ except Exception as e:
189
+ print(f"Waning : An error occurred in the picture {img_path} prediction -{e}")
190
+ return [mask_.reshape(-1),output_concat.reshape(-1)]
191
+
192
+
193
+
194
+ def parser_args():
195
+ parser = argparse.ArgumentParser(description="Run model benchmarks")
196
+ parser.add_argument('--target_model',type=str,default='models/cutoff_fastsam_s_w8a8.qnn216.ctx.bin',help="inference model path")
197
+ parser.add_argument('--source_model',type=str,default='models/fastsam_s.onnx',help="original model path")
198
+ parser.add_argument('--imgs',type=str,default='python/dogs.jpg',help="Predict images path")
199
+ parser.add_argument('--invoke_nums',type=int,default=10,help="Inference nums")
200
+ parser.add_argument('--point_prompt',type=str,default="[[0,0]]",help="example:[[x1,y1],[x2,y2]]")
201
+ args = parser.parse_args()
202
+ return args
203
+
204
+
205
+ if __name__ == "__main__":
206
+ args = parser_args()
207
+ inputshape=[[1,640,640,3]]
208
+ outputshape=[[1,1,8400],[1,4,8400],[1,32,8400],[1,160,160,32]]
209
+ args.point_prompt = ast.literal_eval(args.point_prompt)
210
+
211
+ predict = qnn_predict(inputshape,outputshape,args)
212
+ if os.path.isdir(args.imgs):
213
+ img_files = os.listdir(args.imgs)
214
+ for fi in img_files:
215
+ img_path = os.path.join(args.imgs,fi)
216
+ im0s = cv2.imread(img_path) # BGR
217
+ im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
218
+ predict.qnn_run(im0s,img_path,args)
219
+ else:
220
+ img_path = args.imgs
221
+ im0s = cv2.imread(img_path) # BGR
222
+ im0s = cv2.resize(im0s, (640,640), interpolation=cv2.INTER_LINEAR)
223
+ qnn_result = predict.qnn_run(im0s,img_path,args)
224
+ print("Prediction completion and the results are saved !")
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/tools_pt.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import time
3
+ import torch
4
+ import torchvision
5
+ import torch.nn.functional as F
6
+
7
+
8
+
9
+ def clip_boxes(boxes, shape):
10
+ """
11
+ Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.
12
+
13
+ Args:
14
+ boxes (torch.Tensor): the bounding boxes to clip
15
+ shape (tuple): the shape of the image
16
+ """
17
+ if isinstance(boxes, torch.Tensor): # faster individually
18
+ boxes[..., 0].clamp_(0, shape[1]) # x1
19
+ boxes[..., 1].clamp_(0, shape[0]) # y1
20
+ boxes[..., 2].clamp_(0, shape[1]) # x2
21
+ boxes[..., 3].clamp_(0, shape[0]) # y2
22
+ else: # np.array (faster grouped)
23
+ boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
24
+ boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
25
+
26
+ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):
27
+ """
28
+ Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in
29
+ (img1_shape) to the shape of a different image (img0_shape).
30
+
31
+ Args:
32
+ img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).
33
+ boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)
34
+ img0_shape (tuple): the shape of the target image, in the format of (height, width).
35
+ ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will be
36
+ calculated based on the size difference between the two images.
37
+ padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
38
+ rescaling.
39
+
40
+ Returns:
41
+ boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)
42
+ """
43
+ if ratio_pad is None: # calculate from img0_shape
44
+ gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
45
+ pad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round(
46
+ (img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh padding
47
+ else:
48
+ gain = ratio_pad[0][0]
49
+ pad = ratio_pad[1]
50
+
51
+ if padding:
52
+ boxes[..., [0, 2]] -= pad[0] # x padding
53
+ boxes[..., [1, 3]] -= pad[1] # y padding
54
+ boxes[..., :4] /= gain
55
+ clip_boxes(boxes, img0_shape)
56
+ return boxes
57
+
58
+
59
+ def xywh2xyxy(x):
60
+ """
61
+ Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is the
62
+ top-left corner and (x2, y2) is the bottom-right corner.
63
+
64
+ Args:
65
+ x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.
66
+
67
+ Returns:
68
+ y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format.
69
+ """
70
+ assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'
71
+ y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copy
72
+ dw = x[..., 2] / 2 # half-width
73
+ dh = x[..., 3] / 2 # half-height
74
+ y[..., 0] = x[..., 0] - dw # top left x
75
+ y[..., 1] = x[..., 1] - dh # top left y
76
+ y[..., 2] = x[..., 0] + dw # bottom right x
77
+ y[..., 3] = x[..., 1] + dh # bottom right y
78
+ return y
79
+
80
+
81
+ def non_max_suppression(
82
+ prediction,
83
+ conf_thres=0.25,
84
+ iou_thres=0.45,
85
+ classes=None,
86
+ agnostic=False,
87
+ multi_label=False,
88
+ labels=(),
89
+ max_det=300,
90
+ nc=0, # number of classes (optional)
91
+ max_time_img=0.05,
92
+ max_nms=30000,
93
+ max_wh=7680,
94
+ ):
95
+ """
96
+ Perform non-maximum suppression (NMS) on a set of boxes, with support for masks and multiple labels per box.
97
+
98
+ Args:
99
+ prediction (torch.Tensor): A tensor of shape (batch_size, num_classes + 4 + num_masks, num_boxes)
100
+ containing the predicted boxes, classes, and masks. The tensor should be in the format
101
+ output by a model, such as YOLO.
102
+ conf_thres (float): The confidence threshold below which boxes will be filtered out.
103
+ Valid values are between 0.0 and 1.0.
104
+ iou_thres (float): The IoU threshold below which boxes will be filtered out during NMS.
105
+ Valid values are between 0.0 and 1.0.
106
+ classes (List[int]): A list of class indices to consider. If None, all classes will be considered.
107
+ agnostic (bool): If True, the model is agnostic to the number of classes, and all
108
+ classes will be considered as one.
109
+ multi_label (bool): If True, each box may have multiple labels.
110
+ labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
111
+ list contains the apriori labels for a given image. The list should be in the format
112
+ output by a dataloader, with each label being a tuple of (class_index, x1, y1, x2, y2).
113
+ max_det (int): The maximum number of boxes to keep after NMS.
114
+ nc (int, optional): The number of classes output by the model. Any indices after this will be considered masks.
115
+ max_time_img (float): The maximum time (seconds) for processing one image.
116
+ max_nms (int): The maximum number of boxes into torchvision.ops.nms().
117
+ max_wh (int): The maximum box width and height in pixels
118
+
119
+ Returns:
120
+ (List[torch.Tensor]): A list of length batch_size, where each element is a tensor of
121
+ shape (num_boxes, 6 + num_masks) containing the kept boxes, with columns
122
+ (x1, y1, x2, y2, confidence, class, mask1, mask2, ...).
123
+ """
124
+
125
+ # Checks
126
+ assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
127
+ assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
128
+ if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)
129
+ prediction = prediction[0] # select only inference output
130
+
131
+ device = prediction.device
132
+ mps = 'mps' in device.type # Apple MPS
133
+ if mps: # MPS not fully supported yet, convert tensors to CPU before NMS
134
+ prediction = prediction.cpu()
135
+ bs = prediction.shape[0] # batch size
136
+ nc = nc or (prediction.shape[1] - 4) # number of classes
137
+ nm = prediction.shape[1] - nc - 4
138
+ mi = 4 + nc # mask start index
139
+ xc = prediction[:, 4:mi].amax(1) > conf_thres # candidates
140
+
141
+ # Settings
142
+ # min_wh = 2 # (pixels) minimum box width and height
143
+ time_limit = 0.5 + max_time_img * bs # seconds to quit after
144
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
145
+
146
+ prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)
147
+ prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxy
148
+
149
+ t = time.time()
150
+ output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bs
151
+ for xi, x in enumerate(prediction): # image index, image inference
152
+ # Apply constraints
153
+ # x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-height
154
+ x = x[xc[xi]] # confidence
155
+
156
+ # Cat apriori labels if autolabelling
157
+ if labels and len(labels[xi]):
158
+ lb = labels[xi]
159
+ v = torch.zeros((len(lb), nc + nm + 4), device=x.device)
160
+ v[:, :4] = xywh2xyxy(lb[:, 1:5]) # box
161
+ v[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # cls
162
+ x = torch.cat((x, v), 0)
163
+
164
+ # If none remain process next image
165
+ if not x.shape[0]:
166
+ continue
167
+
168
+ # Detections matrix nx6 (xyxy, conf, cls)
169
+ box, cls, mask = x.split((4, nc, nm), 1)
170
+
171
+ if multi_label:
172
+ i, j = torch.where(cls > conf_thres)
173
+ x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)
174
+ else: # best class only
175
+ conf, j = cls.max(1, keepdim=True)
176
+ x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]
177
+
178
+ # Filter by class
179
+ if classes is not None:
180
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
181
+
182
+ # Check shape
183
+ n = x.shape[0] # number of boxes
184
+ if not n: # no boxes
185
+ continue
186
+ if n > max_nms: # excess boxes
187
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes
188
+
189
+ # Batched NMS
190
+ c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
191
+ boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
192
+ i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
193
+ i = i[:max_det] # limit detections
194
+
195
+ # # Experimental
196
+ # merge = False # use merge-NMS
197
+ # if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
198
+ # # Update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
199
+ # from .metrics import box_iou
200
+ # iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
201
+ # weights = iou * scores[None] # box weights
202
+ # x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
203
+ # redundant = True # require redundant detections
204
+ # if redundant:
205
+ # i = i[iou.sum(1) > 1] # require redundancy
206
+
207
+ output[xi] = x[i]
208
+ if mps:
209
+ output[xi] = output[xi].to(device)
210
+ # if (time.time() - t) > time_limit:
211
+ # LOGGER.warning(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')
212
+ # break # time limit exceeded
213
+
214
+ return output
215
+
216
+
217
+ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
218
+ '''Adjust bounding boxes to stick to image border if they are within a certain threshold.
219
+ Args:
220
+ boxes: (n, 4)
221
+ image_shape: (height, width)
222
+ threshold: pixel threshold
223
+ Returns:
224
+ adjusted_boxes: adjusted bounding boxes
225
+ '''
226
+
227
+ # Image dimensions
228
+ h, w = image_shape
229
+
230
+ # Adjust boxes
231
+ boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
232
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
233
+ boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
234
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
235
+ boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
236
+ w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
237
+ boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
238
+ h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
239
+
240
+ return boxes
241
+
242
+ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
243
+ '''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
244
+ Args:
245
+ box1: (4, )
246
+ boxes: (n, 4)
247
+ Returns:
248
+ high_iou_indices: Indices of boxes with IoU > thres
249
+ '''
250
+ boxes = adjust_bboxes_to_image_border(boxes, image_shape)
251
+ # obtain coordinates for intersections
252
+ x1 = torch.max(box1[0], boxes[:, 0])
253
+ y1 = torch.max(box1[1], boxes[:, 1])
254
+ x2 = torch.min(box1[2], boxes[:, 2])
255
+ y2 = torch.min(box1[3], boxes[:, 3])
256
+
257
+ # compute the area of intersection
258
+ intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
259
+
260
+ # compute the area of both individual boxes
261
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
262
+ box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
263
+
264
+ # compute the area of union
265
+ union = box1_area + box2_area - intersection
266
+
267
+ # compute the IoU
268
+ iou = intersection / union # Should be shape (n, )
269
+ if raw_output:
270
+ if iou.numel() == 0:
271
+ return 0
272
+ return iou
273
+
274
+ # get indices of boxes with IoU > thres
275
+ high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
276
+
277
+ return high_iou_indices
278
+
279
+
280
+ def scale_masks(masks, shape, padding=True):
281
+ """
282
+ Rescale segment masks to shape.
283
+
284
+ Args:
285
+ masks (torch.Tensor): (N, C, H, W).
286
+ shape (tuple): Height and width.
287
+ padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regular
288
+ rescaling.
289
+ """
290
+ mh, mw = masks.shape[2:]
291
+ gain = min(mh / shape[0], mw / shape[1]) # gain = old / new
292
+ pad = [mw - shape[1] * gain, mh - shape[0] * gain] # wh padding
293
+ if padding:
294
+ pad[0] /= 2
295
+ pad[1] /= 2
296
+ top, left = (int(pad[1]), int(pad[0])) if padding else (0, 0) # y, x
297
+ bottom, right = (int(mh - pad[1]), int(mw - pad[0]))
298
+ masks = masks[..., top:bottom, left:right]
299
+
300
+ masks = F.interpolate(masks, shape, mode="bilinear", align_corners=False) # NCHW
301
+ return masks
302
+
303
+
304
+ def process_mask_native(protos, masks_in, bboxes, shape):
305
+ """
306
+ It takes the output of the mask head, and crops it after upsampling to the bounding boxes.
307
+
308
+ Args:
309
+ protos (torch.Tensor): [mask_dim, mask_h, mask_w]
310
+ masks_in (torch.Tensor): [n, mask_dim], n is number of masks after nms
311
+ bboxes (torch.Tensor): [n, 4], n is number of masks after nms
312
+ shape (tuple): the size of the input image (h,w)
313
+
314
+ Returns:
315
+ masks (torch.Tensor): The returned masks with dimensions [h, w, n]
316
+ """
317
+ c, mh, mw = protos.shape # CHW
318
+ masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
319
+ masks = scale_masks(masks[None], shape)[0] # CHW
320
+ masks = crop_mask(masks, bboxes) # CHW
321
+ return masks.gt_(0.5)
322
+
323
+ def crop_mask(masks, boxes):
324
+ """
325
+ It takes a mask and a bounding box, and returns a mask that is cropped to the bounding box.
326
+
327
+ Args:
328
+ masks (torch.Tensor): [n, h, w] tensor of masks
329
+ boxes (torch.Tensor): [n, 4] tensor of bbox coordinates in relative point form
330
+
331
+ Returns:
332
+ (torch.Tensor): The masks are being cropped to the bounding box.
333
+ """
334
+ _, h, w = masks.shape
335
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(n,1,1)
336
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,1,w)
337
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(1,h,1)
338
+
339
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
340
+
341
+ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
342
+ """
343
+ Apply masks to bounding boxes using the output of the mask head.
344
+
345
+ Args:
346
+ protos (torch.Tensor): A tensor of shape [mask_dim, mask_h, mask_w].
347
+ masks_in (torch.Tensor): A tensor of shape [n, mask_dim], where n is the number of masks after NMS.
348
+ bboxes (torch.Tensor): A tensor of shape [n, 4], where n is the number of masks after NMS.
349
+ shape (tuple): A tuple of integers representing the size of the input image in the format (h, w).
350
+ upsample (bool): A flag to indicate whether to upsample the mask to the original image size. Default is False.
351
+
352
+ Returns:
353
+ (torch.Tensor): A binary mask tensor of shape [n, h, w], where n is the number of masks after NMS, and h and w
354
+ are the height and width of the input image. The mask is applied to the bounding boxes.
355
+ """
356
+
357
+ c, mh, mw = protos.shape # CHW
358
+ ih, iw = shape
359
+ masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW
360
+
361
+ downsampled_bboxes = bboxes.clone()
362
+ downsampled_bboxes[:, 0] *= mw / iw
363
+ downsampled_bboxes[:, 2] *= mw / iw
364
+ downsampled_bboxes[:, 3] *= mh / ih
365
+ downsampled_bboxes[:, 1] *= mh / ih
366
+
367
+ masks = crop_mask(masks, downsampled_bboxes) # CHW
368
+ if upsample:
369
+ masks = F.interpolate(masks[None], shape, mode='bilinear', align_corners=False)[0] # CHW
370
+ return masks.gt_(0.5)
371
+
372
+
model_farm_fastsams_qsc8550_qnn2.16_int8_aidlite/python/utils.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ from PIL import Image
4
+
5
+
6
+ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
7
+ '''Adjust bounding boxes to stick to image border if they are within a certain threshold.
8
+ Args:
9
+ boxes: (n, 4)
10
+ image_shape: (height, width)
11
+ threshold: pixel threshold
12
+ Returns:
13
+ adjusted_boxes: adjusted bounding boxes
14
+ '''
15
+
16
+ # Image dimensions
17
+ h, w = image_shape
18
+
19
+ # Adjust boxes
20
+ boxes[:, 0] = torch.where(boxes[:, 0] < threshold, torch.tensor(
21
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 0]) # x1
22
+ boxes[:, 1] = torch.where(boxes[:, 1] < threshold, torch.tensor(
23
+ 0, dtype=torch.float, device=boxes.device), boxes[:, 1]) # y1
24
+ boxes[:, 2] = torch.where(boxes[:, 2] > w - threshold, torch.tensor(
25
+ w, dtype=torch.float, device=boxes.device), boxes[:, 2]) # x2
26
+ boxes[:, 3] = torch.where(boxes[:, 3] > h - threshold, torch.tensor(
27
+ h, dtype=torch.float, device=boxes.device), boxes[:, 3]) # y2
28
+
29
+ return boxes
30
+
31
+
32
+
33
+ def convert_box_xywh_to_xyxy(box):
34
+ x1 = box[0]
35
+ y1 = box[1]
36
+ x2 = box[0] + box[2]
37
+ y2 = box[1] + box[3]
38
+ return [x1, y1, x2, y2]
39
+
40
+
41
+ def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
42
+ '''Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
43
+ Args:
44
+ box1: (4, )
45
+ boxes: (n, 4)
46
+ Returns:
47
+ high_iou_indices: Indices of boxes with IoU > thres
48
+ '''
49
+ boxes = adjust_bboxes_to_image_border(boxes, image_shape)
50
+ # obtain coordinates for intersections
51
+ x1 = torch.max(box1[0], boxes[:, 0])
52
+ y1 = torch.max(box1[1], boxes[:, 1])
53
+ x2 = torch.min(box1[2], boxes[:, 2])
54
+ y2 = torch.min(box1[3], boxes[:, 3])
55
+
56
+ # compute the area of intersection
57
+ intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
58
+
59
+ # compute the area of both individual boxes
60
+ box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
61
+ box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
62
+
63
+ # compute the area of union
64
+ union = box1_area + box2_area - intersection
65
+
66
+ # compute the IoU
67
+ iou = intersection / union # Should be shape (n, )
68
+ if raw_output:
69
+ if iou.numel() == 0:
70
+ return 0
71
+ return iou
72
+
73
+ # get indices of boxes with IoU > thres
74
+ high_iou_indices = torch.nonzero(iou > iou_thres).flatten()
75
+
76
+ return high_iou_indices
77
+
78
+
79
+ def image_to_np_ndarray(image):
80
+ if type(image) is str:
81
+ return np.array(Image.open(image))
82
+ elif issubclass(type(image), Image.Image):
83
+ return np.array(image)
84
+ elif type(image) is np.ndarray:
85
+ return image
86
+ return None