aikenml commited on
Commit
26779ff
·
1 Parent(s): 2145598

Delete tool

Browse files
tool/__pycache__/detector.cpython-311.pyc DELETED
Binary file (6.36 kB)
 
tool/__pycache__/segmentor.cpython-311.pyc DELETED
Binary file (5.54 kB)
 
tool/__pycache__/transfer_tools.cpython-311.pyc DELETED
Binary file (3.53 kB)
 
tool/detector.py DELETED
@@ -1,93 +0,0 @@
1
- import torch
2
- import numpy as np
3
- import cv2
4
- import PIL
5
-
6
- from groundingdino.models import build_model as build_grounding_dino
7
- from groundingdino.util.slconfig import SLConfig
8
- from groundingdino.util.utils import clean_state_dict
9
- from groundingdino.util.inference import annotate, load_image, predict
10
- import groundingdino.datasets.transforms as T
11
-
12
- from torchvision.ops import box_convert
13
-
14
- class Detector:
15
- def __init__(self, device):
16
- config_file = "src/groundingdino/groundingdino/config/GroundingDINO_SwinT_OGC.py"
17
- grounding_dino_ckpt = './ckpt/groundingdino_swint_ogc.pth'
18
- args = SLConfig.fromfile(config_file)
19
- args.device = device
20
- self.deivce = device
21
- self.gd = build_grounding_dino(args)
22
-
23
- checkpoint = torch.load(grounding_dino_ckpt, map_location='cpu')
24
- log = self.gd.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
25
- print("Model loaded from {} \n => {}".format(grounding_dino_ckpt, log))
26
- self.gd.eval()
27
-
28
- def image_transform_grounding(self, init_image):
29
- transform = T.Compose([
30
- T.RandomResize([800], max_size=1333),
31
- T.ToTensor(),
32
- T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
33
- ])
34
- image, _ = transform(init_image, None) # 3, h, w
35
- return init_image, image
36
-
37
- def image_transform_grounding_for_vis(self, init_image):
38
- transform = T.Compose([
39
- T.RandomResize([800], max_size=1333),
40
- ])
41
- image, _ = transform(init_image, None) # 3, h, w
42
- return image
43
-
44
- def transfer_boxes_format(self, boxes, height, width):
45
- boxes = boxes * torch.Tensor([width, height, width, height])
46
- boxes = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy")
47
-
48
- transfered_boxes = []
49
- for i in range(len(boxes)):
50
- box = boxes[i]
51
- transfered_box = [[int(box[0]), int(box[1])], [int(box[2]), int(box[3])]]
52
- transfered_boxes.append(transfered_box)
53
-
54
- transfered_boxes = np.array(transfered_boxes)
55
- return transfered_boxes
56
-
57
- @torch.no_grad()
58
- def run_grounding(self, origin_frame, grounding_caption, box_threshold, text_threshold):
59
- '''
60
- return:
61
- annotated_frame:nd.array
62
- transfered_boxes: nd.array [N, 4]: [[x0, y0], [x1, y1]]
63
- '''
64
- height, width, _ = origin_frame.shape
65
- img_pil = PIL.Image.fromarray(origin_frame)
66
- re_width, re_height = img_pil.size
67
- _, image_tensor = self.image_transform_grounding(img_pil)
68
- # img_pil = self.image_transform_grounding_for_vis(img_pil)
69
-
70
- # run grounidng
71
- boxes, logits, phrases = predict(self.gd, image_tensor, grounding_caption, box_threshold, text_threshold, device=self.deivce)
72
- annotated_frame = annotate(image_source=np.asarray(img_pil), boxes=boxes, logits=logits, phrases=phrases)[:, :, ::-1]
73
- annotated_frame = cv2.resize(annotated_frame, (width, height), interpolation=cv2.INTER_LINEAR)
74
-
75
- # transfer boxes to sam-format
76
- transfered_boxes = self.transfer_boxes_format(boxes, re_height, re_width)
77
- return annotated_frame, transfered_boxes
78
-
79
- if __name__ == "__main__":
80
- detector = Detector("cuda")
81
- origin_frame = cv2.imread('./debug/point.png')
82
- origin_frame = cv2.cvtColor(origin_frame, cv2.COLOR_BGR2RGB)
83
- grounding_caption = "swan.water"
84
- box_threshold = 0.25
85
- text_threshold = 0.25
86
-
87
- annotated_frame, boxes = detector.run_grounding(origin_frame, grounding_caption, box_threshold, text_threshold)
88
- cv2.imwrite('./debug/x.png', annotated_frame)
89
-
90
- for i in range(len(boxes)):
91
- bbox = boxes[i]
92
- origin_frame = cv2.rectangle(origin_frame, bbox[0], bbox[1], (0, 0, 255))
93
- cv2.imwrite('./debug/bbox_frame.png', origin_frame)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tool/segmentor.py DELETED
@@ -1,96 +0,0 @@
1
- import torch
2
- import cv2
3
- import numpy as np
4
- from sam.segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator
5
-
6
- class Segmentor:
7
- def __init__(self, sam_args):
8
- """
9
- sam_args:
10
- sam_checkpoint: path of SAM checkpoint
11
- generator_args: args for everything_generator
12
- gpu_id: device
13
- """
14
- self.device = sam_args["gpu_id"]
15
- self.sam = sam_model_registry[sam_args["model_type"]](checkpoint=sam_args["sam_checkpoint"])
16
- self.sam.to(device=self.device)
17
- self.everything_generator = SamAutomaticMaskGenerator(model=self.sam, **sam_args['generator_args'])
18
- self.interactive_predictor = self.everything_generator.predictor
19
- self.have_embedded = False
20
-
21
- @torch.no_grad()
22
- def set_image(self, image):
23
- # calculate the embedding only once per frame.
24
- if not self.have_embedded:
25
- self.interactive_predictor.set_image(image)
26
- self.have_embedded = True
27
- @torch.no_grad()
28
- def interactive_predict(self, prompts, mode, multimask=True):
29
- assert self.have_embedded, 'image embedding for sam need be set before predict.'
30
-
31
- if mode == 'point':
32
- masks, scores, logits = self.interactive_predictor.predict(point_coords=prompts['point_coords'],
33
- point_labels=prompts['point_modes'],
34
- multimask_output=multimask)
35
- elif mode == 'mask':
36
- masks, scores, logits = self.interactive_predictor.predict(mask_input=prompts['mask_prompt'],
37
- multimask_output=multimask)
38
- elif mode == 'point_mask':
39
- masks, scores, logits = self.interactive_predictor.predict(point_coords=prompts['point_coords'],
40
- point_labels=prompts['point_modes'],
41
- mask_input=prompts['mask_prompt'],
42
- multimask_output=multimask)
43
-
44
- return masks, scores, logits
45
-
46
- @torch.no_grad()
47
- def segment_with_click(self, origin_frame, coords, modes, multimask=True):
48
- '''
49
-
50
- return:
51
- mask: one-hot
52
- '''
53
- self.set_image(origin_frame)
54
-
55
- prompts = {
56
- 'point_coords': coords,
57
- 'point_modes': modes,
58
- }
59
- masks, scores, logits = self.interactive_predict(prompts, 'point', multimask)
60
- mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]
61
- prompts = {
62
- 'point_coords': coords,
63
- 'point_modes': modes,
64
- 'mask_prompt': logit[None, :, :]
65
- }
66
- masks, scores, logits = self.interactive_predict(prompts, 'point_mask', multimask)
67
- mask = masks[np.argmax(scores)]
68
-
69
- return mask.astype(np.uint8)
70
-
71
- def segment_with_box(self, origin_frame, bbox, reset_image=False):
72
- if reset_image:
73
- self.interactive_predictor.set_image(origin_frame)
74
- else:
75
- self.set_image(origin_frame)
76
- # coord = np.array([[int((bbox[1][0] - bbox[0][0]) / 2.), int((bbox[1][1] - bbox[0][1]) / 2)]])
77
- # point_label = np.array([1])
78
-
79
- masks, scores, logits = self.interactive_predictor.predict(
80
- point_coords=None,
81
- point_labels=None,
82
- box=np.array([bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]]),
83
- multimask_output=True
84
- )
85
- mask, logit = masks[np.argmax(scores)], logits[np.argmax(scores), :, :]
86
-
87
- masks, scores, logits = self.interactive_predictor.predict(
88
- point_coords=None,
89
- point_labels=None,
90
- box=np.array([[bbox[0][0], bbox[0][1], bbox[1][0], bbox[1][1]]]),
91
- mask_input=logit[None, :, :],
92
- multimask_output=True
93
- )
94
- mask = masks[np.argmax(scores)]
95
-
96
- return [mask]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tool/transfer_tools.py DELETED
@@ -1,51 +0,0 @@
1
- import cv2
2
- import numpy as np
3
-
4
- def mask2bbox(mask):
5
- if len(np.where(mask > 0)[0]) == 0:
6
- print(f'not mask')
7
- return np.array([[0, 0], [0, 0]]).astype(np.int64)
8
-
9
- x_ = np.sum(mask, axis=0)
10
- y_ = np.sum(mask, axis=1)
11
-
12
- x0 = np.min(np.nonzero(x_)[0])
13
- x1 = np.max(np.nonzero(x_)[0])
14
- y0 = np.min(np.nonzero(y_)[0])
15
- y1 = np.max(np.nonzero(y_)[0])
16
-
17
- return np.array([[x0, y0], [x1, y1]]).astype(np.int64)
18
-
19
- def draw_outline(mask, frame):
20
- _, binary_mask = cv2.threshold(mask, 0, 255, cv2.THRESH_BINARY)
21
-
22
- contours, _ = cv2.findContours(binary_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
23
-
24
- cv2.drawContours(frame, contours, -1, (0, 0, 255), 2)
25
-
26
- return frame
27
-
28
- def draw_points(points, modes, frame):
29
- neg_points = points[np.argwhere(modes==0)[:, 0]]
30
- pos_points = points[np.argwhere(modes==1)[:, 0]]
31
-
32
- for i in range(len(neg_points)):
33
- point = neg_points[i]
34
- cv2.circle(frame, (point[0], point[1]), 8, (255, 80, 80), -1)
35
-
36
- for i in range(len(pos_points)):
37
- point = pos_points[i]
38
- cv2.circle(frame, (point[0], point[1]), 8, (0, 153, 255), -1)
39
-
40
- return frame
41
-
42
- if __name__ == '__main__':
43
- mask = cv2.imread('./debug/mask.jpg', cv2.IMREAD_GRAYSCALE)
44
- frame = cv2.imread('./debug/frame.jpg')
45
- draw_frame = draw_outline(mask, frame)
46
-
47
- cv2.imwrite('./debug/outline.jpg', draw_frame)
48
-
49
- # bbox = mask2bbox(mask)
50
- # draw_0 = cv2.rectangle(mask, bbox[0], bbox[1], (0, 0, 255))
51
- # cv2.imwrite('./debug/rect.png', draw_0)