Spaces:
Runtime error
Runtime error
Delete tool
Browse files
tool/__pycache__/detector.cpython-311.pyc
DELETED
Binary file (6.36 kB)
|
|
tool/__pycache__/segmentor.cpython-311.pyc
DELETED
Binary file (5.54 kB)
|
|
tool/__pycache__/transfer_tools.cpython-311.pyc
DELETED
Binary file (3.53 kB)
|
|
tool/detector.py
DELETED
@@ -1,93 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import numpy as np
|
3 |
-
import cv2
|
4 |
-
import PIL
|
5 |
-
|
6 |
-
from groundingdino.models import build_model as build_grounding_dino
|
7 |
-
from groundingdino.util.slconfig import SLConfig
|
8 |
-
from groundingdino.util.utils import clean_state_dict
|
9 |
-
from groundingdino.util.inference import annotate, load_image, predict
|
10 |
-
import groundingdino.datasets.transforms as T
|
11 |
-
|
12 |
-
from torchvision.ops import box_convert
|
13 |
-
|
14 |
-
class Detector:
|
15 |
-
def __init__(self, device):
|
16 |
-
config_file = "src/groundingdino/groundingdino/config/GroundingDINO_SwinT_OGC.py"
|
17 |
-
grounding_dino_ckpt = './ckpt/groundingdino_swint_ogc.pth'
|
18 |
-
args = SLConfig.fromfile(config_file)
|
19 |
-
args.device = device
|
20 |
-
self.deivce = device
|
21 |
-
self.gd = build_grounding_dino(args)
|
22 |
-
|
23 |
-
checkpoint = torch.load(grounding_dino_ckpt, map_location='cpu')
|
24 |
-
log = self.gd.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
|
25 |
-
print("Model loaded from {} \n => {}".format(grounding_dino_ckpt, log))
|
26 |
-
self.gd.eval()
|
27 |
-
|
28 |
-
def image_transform_grounding(self, init_image):
|
29 |
-
transform = T.Compose([
|
30 |
-
T.RandomResize([800], max_size=1333),
|
31 |
-
T.ToTensor(),
|
32 |
-
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
|
33 |
-
])
|
34 |
-
image, _ = transform(init_image, None) # 3, h, w
|
35 |
-
return init_image, image
|
36 |
-
|
37 |
-
def image_transform_grounding_for_vis(self, init_image):
|
38 |
-
transform = T.Compose([
|
39 |
-
T.RandomResize([800], max_size=1333),
|
40 |
-
])
|
41 |
-
image, _ = transform(init_image, None) # 3, h, w
|
42 |
-
return image
|
43 |
-
|
44 |
-
def transfer_boxes_format(self, boxes, height, width):
|
45 |
-
boxes = boxes * torch.Tensor([width, height, width, height])
|
46 |
-
boxes = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy")
|
47 |
-
|
48 |
-
transfered_boxes = []
|
49 |
-
for i in range(len(boxes)):
|
50 |
-
box = boxes[i]
|
51 |
-
transfered_box = [[int(box[0]), int(box[1])], [int(box[2]), int(box[3])]]
|
52 |
-
transfered_boxes.append(transfered_box)
|
53 |
-
|
54 |
-
transfered_boxes = np.array(transfered_boxes)
|
55 |
-
return transfered_boxes
|
56 |
-
|
57 |
-
@torch.no_grad()
|
58 |
-
def run_grounding(self, origin_frame, grounding_caption, box_threshold, text_threshold):
|
59 |
-
'''
|
60 |
-
return:
|
61 |
-
annotated_frame:nd.array
|
62 |
-
transfered_boxes: nd.array [N, 4]: [[x0, y0], [x1, y1]]
|
63 |
-
'''
|
64 |
-
height, width, _ = origin_frame.shape
|
65 |
-
img_pil = PIL.Image.fromarray(origin_frame)
|
66 |
-
re_width, re_height = img_pil.size
|
67 |
-
_, image_tensor = self.image_transform_grounding(img_pil)
|
68 |
-
# img_pil = self.image_transform_grounding_for_vis(img_pil)
|
69 |
-
|
70 |
-
# run grounidng
|
71 |
-
boxes, logits, phrases = predict(self.gd, image_tensor, grounding_caption, box_threshold, text_threshold, device=self.deivce)
|
72 |
-
annotated_frame = annotate(image_source=np.asarray(img_pil), boxes=boxes, logits=logits, phrases=phrases)[:, :, ::-1]
|
73 |
-
annotated_frame = cv2.resize(annotated_frame, (width, height), interpolation=cv2.INTER_LINEAR)
|
74 |
-
|
75 |
-
# transfer boxes to sam-format
|
76 |
-
transfered_boxes = self.transfer_boxes_format(boxes, re_height, re_width)
|
77 |
-
return annotated_frame, transfered_boxes
|
78 |
-
|
79 |
-
if __name__ == "__main__":
|
80 |
-
detector = Detector("cuda")
|
81 |
-
origin_frame = cv2.imread('./debug/point.png')
|
82 |
-
origin_frame = cv2.cvtColor(origin_frame, cv2.COLOR_BGR2RGB)
|
83 |
-
grounding_caption = "swan.water"
|
84 |
-
box_threshold = 0.25
|
85 |
-
text_threshold = 0.25
|
86 |
-
|
87 |
-
annotated_frame, boxes = detector.run_grounding(origin_frame, grounding_caption, box_threshold, text_threshold)
|
88 |
-
cv2.imwrite('./debug/x.png', annotated_frame)
|
89 |
-
|
90 |
-
for i in range(len(boxes)):
|
91 |
-
bbox = boxes[i]
|
92 |
-
origin_frame = cv2.rectangle(origin_frame, bbox[0], bbox[1], (0, 0, 255))
|
93 |
-
cv2.imwrite('./debug/bbox_frame.png', origin_frame)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tool/segmentor.py
DELETED
@@ -1,96 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
import cv2
|
3 |
-
import numpy as np
|
4 |
-
from sam.segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
|
5 |
-
|
6 |
-
class Segmentor:
|
7 |
-
def __init__(self, sam_args):
|
8 |
-
"""
|
9 |
-
sam_args:
|
10 |
-
sam_checkpoint: path of SAM checkpoint
|
11 |
-
generator_args: args for everything_generator
|
12 |
-
gpu_id: device
|
13 |
-
"""
|
14 |
-
self.device = sam_args["gpu_id"]
|
15 |
-
self.sam = sam_model_registry[sam_args["model_type"]](checkpoint=sam_args["sam_checkpoint"])
|
16 |
-
self.sam.to(device=self.device)
|
17 |
-
self.everything_generator = SamAutomaticMaskGenerator(model=self.sam, **sam_args['generator_args'])
|
18 |
-
self.interactive_predictor = self.everything_generator.predictor
|
19 |
-
self.have_embedded = False
|
20 |
-
|
21 |
-
@torch.no_grad()
|
22 |
-
def set_image(self, image):
|
23 |
-
# calculate the embedding only once per frame.
|
24 |
-
if not self.have_embedded:
|
25 |
-
self.interactive_predictor.set_image(image)
|
26 |
-
self.have_embedded = True
|
27 |
-
@torch.no_grad()
|
28 |
-
def interactive_predict(self, prompts, mode, multimask=True):
|
29 |
-
assert self.have_embedded, 'image embedding for sam need be set before predict.'
|
30 |
-
|
31 |
-
if mode == 'point':
|
32 |
-
masks, scores, logits = self.interactive_predictor.predict(point_coords=prompts['point_coords'],
|
33 |
-
point_labels=prompts['point_modes'],
|
34 |
-
multimask_output=multimask)
|
35 |
-
elif mode == 'mask':
|
36 |
-
masks, scores, logits = self.interactive_predictor.predict(mask_input=prompts['mask_prompt'],
|
37 |
-
multimask_output=multimask)
|
38 |
-
elif mode == 'point_mask':
|
39 |
-
masks, scores, logits = self.interactive_predictor.predict(point_coords=prompts['point_coords'],
|
40 |
-
point_labels=prompts['point_modes'],
|
41 |
-
mask_input=prompts['mask_prompt'],
|
42 |
-
multimask_output=multimask)
|
43 |
-
|
44 |
-
return masks, scores, logits
|
45 |
-
|
46 |
-
@torch.no_grad()
|
47 |
-
def segment_with_click(self, origin_frame, coords, modes, multimask=True):
|
48 |
-
'''
|
49 |
-
|
50 |
-
return:
|
51 |
-
mask: one-hot
|
52 |
-
'''
|
53 |
-
self.set_image(origin_frame)
|
54 |
-
|
55 |
-
prompts = {
|
56 |
-
'point_coords': coords,
|
57 |
-
'point_modes': modes,
|
58 |
-
}
|
59 |
-
masks, scores, logits = self.interactive_predict(prompts, 'point', multimask)
|
60 |
-
mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]
|
61 |
-
prompts = {
|
62 |
-
'point_coords': coords,
|
63 |
-
'point_modes': modes,
|
64 |
-
'mask_prompt': logit[None, :, :]
|
65 |
-
}
|
66 |
-
masks, scores, logits = self.interactive_predict(prompts, 'point_mask', multimask)
|
67 |
-
mask = masks[np.argmax(scores)]
|
68 |
-
|
69 |
-
return mask.astype(np.uint8)
|
70 |
-
|
71 |
-
def segment_with_box(self, origin_frame, bbox, reset_image=False):
|
72 |
-
if reset_image:
|
73 |
-
self.interactive_predictor.set_image(origin_frame)
|
74 |
-
else:
|
75 |
-
self.set_image(origin_frame)
|
76 |
-
# coord = np.array([[int((bbox[1][0] - bbox[0][0]) / 2.), int((bbox[1][1] - bbox[0][1]) / 2)]])
|
77 |
-
# point_label = np.array([1])
|
78 |
-
|
79 |
-
masks, scores, logits = self.interactive_predictor.predict(
|
80 |
-
point_coords=None,
|
81 |
-
point_labels=None,
|
82 |
-
box=np.array([bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]]),
|
83 |
-
multimask_output=True
|
84 |
-
)
|
85 |
-
mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]
|
86 |
-
|
87 |
-
masks, scores, logits = self.interactive_predictor.predict(
|
88 |
-
point_coords=None,
|
89 |
-
point_labels=None,
|
90 |
-
box=np.array([[bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]]]),
|
91 |
-
mask_input=logit[None, :, :],
|
92 |
-
multimask_output=True
|
93 |
-
)
|
94 |
-
mask = masks[np.argmax(scores)]
|
95 |
-
|
96 |
-
return [mask]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tool/transfer_tools.py
DELETED
@@ -1,51 +0,0 @@
|
|
1 |
-
import cv2
|
2 |
-
import numpy as np
|
3 |
-
|
4 |
-
def mask2bbox(mask):
|
5 |
-
if len(np.where(mask > 0)[0]) == 0:
|
6 |
-
print(f'not mask')
|
7 |
-
return np.array([[0, 0], [0, 0]]).astype(np.int64)
|
8 |
-
|
9 |
-
x_ = np.sum(mask, axis=0)
|
10 |
-
y_ = np.sum(mask, axis=1)
|
11 |
-
|
12 |
-
x0 = np.min(np.nonzero(x_)[0])
|
13 |
-
x1 = np.max(np.nonzero(x_)[0])
|
14 |
-
y0 = np.min(np.nonzero(y_)[0])
|
15 |
-
y1 = np.max(np.nonzero(y_)[0])
|
16 |
-
|
17 |
-
return np.array([[x0, y0], [x1, y1]]).astype(np.int64)
|
18 |
-
|
19 |
-
def draw_outline(mask, frame):
|
20 |
-
_, binary_mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
|
21 |
-
|
22 |
-
contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
23 |
-
|
24 |
-
cv2.drawContours(frame, contours, -1, (0, 0, 255), 2)
|
25 |
-
|
26 |
-
return frame
|
27 |
-
|
28 |
-
def draw_points(points, modes, frame):
|
29 |
-
neg_points = points[np.argwhere(modes==0)[:, 0]]
|
30 |
-
pos_points = points[np.argwhere(modes==1)[:, 0]]
|
31 |
-
|
32 |
-
for i in range(len(neg_points)):
|
33 |
-
point = neg_points[i]
|
34 |
-
cv2.circle(frame, (point[0], point[1]), 8, (255, 80, 80), -1)
|
35 |
-
|
36 |
-
for i in range(len(pos_points)):
|
37 |
-
point = pos_points[i]
|
38 |
-
cv2.circle(frame, (point[0], point[1]), 8, (0, 153, 255), -1)
|
39 |
-
|
40 |
-
return frame
|
41 |
-
|
42 |
-
if __name__ == '__main__':
|
43 |
-
mask = cv2.imread('./debug/mask.jpg', cv2.IMREAD_GRAYSCALE)
|
44 |
-
frame = cv2.imread('./debug/frame.jpg')
|
45 |
-
draw_frame = draw_outline(mask, frame)
|
46 |
-
|
47 |
-
cv2.imwrite('./debug/outline.jpg', draw_frame)
|
48 |
-
|
49 |
-
# bbox = mask2bbox(mask)
|
50 |
-
# draw_0 = cv2.rectangle(mask, bbox[0], bbox[1], (0, 0, 255))
|
51 |
-
# cv2.imwrite('./debug/rect.png', draw_0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|