File size: 3,704 Bytes
2a4462d
 
 
 
 
 
0869998
7b271ac
3cf5303
e80b224
2a4462d
98375b6
2e620cf
0869998
f0b653f
 
7b3d454
18ff9ad
98375b6
18ff9ad
98375b6
2a4462d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98375b6
 
7b271ac
98375b6
 
 
 
 
7b271ac
98375b6
165b257
2a4462d
 
 
 
 
 
 
 
 
5fd0532
2a4462d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5fd0532
2a4462d
 
 
5fd0532
2a4462d
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
import cv2
import albumentations as A
from utils import *
import random
from albumentations.pytorch import ToTensorV2
from yolov3 import YOLOV3_PL
from pytorch_grad_cam.utils.image import show_cam_on_image
from utils import YoloCAM, cells_to_bboxes, non_max_suppression
from model import YOLOv3

def inference(image: np.ndarray, iou_thresh: float = 0.5, thresh: float = 0.5,  transparency: float = 0.5):
    model = YOLOV3_PL() #YOLOv3(num_classes=20)
    model.load_state_dict(torch.load("model.pth", map_location=torch.device('cpu')), strict=False)
    # iou_thresh = 0.75
    # thresh = 0.75
    scaled_anchors  =  config.SCALED_ANCHORS
    
   # target_layer_list = model.layers[-2]

   # cam = YoloCAM(model=model, target_layers = target_layer_list, use_cuda=False)
    transforms =  A.Compose(
    [
        A.LongestMaxSize(max_size=config.IMAGE_SIZE),
        A.PadIfNeeded(
            min_height=config.IMAGE_SIZE, min_width=config.IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
        ),
        A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
        ToTensorV2(),
    ],
    )
    with torch.no_grad():
        transformed_image = transforms(image=image)["image"].unsqueeze(0)
        output = model(transformed_image)

        bboxes = [[] for _ in range(1)]
        for i in range(3):
            batch_size, A1, S, _, _ = output[i].shape
            anchor = scaled_anchors[i].to('cpu')
            boxes_scale_i = cells_to_bboxes(
                output[i].to('cpu'), anchor, S=S, is_preds=True
            )
            for idx, (box) in enumerate(boxes_scale_i):
                bboxes[idx] += box

    nms_boxes = non_max_suppression(
        bboxes[0], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint",
    )
    plot_img = draw_predictions(image, nms_boxes, class_labels=config.PASCAL_CLASSES)
    # if not show_cam:
    #     return [plot_img]

    # grayscale_cam = cam(transformed_image, scaled_anchors)[0, :, :]
    # img = cv2.resize(image, (416, 416))
    # img = np.float32(img) / 255
    # cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True, image_weight=transparency)
    # return [plot_img, cam_image]

    return [plot_img]



def draw_predictions(image: np.ndarray, boxes: list[list], class_labels: list[str]) -> np.ndarray:
    """Plots predicted bounding boxes on the image"""

    colors = [[random.randint(0, 255) for _ in range(3)] for name in class_labels]

    im = np.array(image)
    height, width, _ = im.shape
    bbox_thick = int((height + width) /500)

    # Create a Rectangle patch
    for box in boxes:
        assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
        class_pred = box[0]
        conf = box[1]
        box = box[2:]
        upper_left_x = box[0] - box[2] / 2
        upper_left_y = box[1] - box[3] / 2

        x1  = int(upper_left_x * width)
        y1 = int(upper_left_y * height)

        x2 = x1 + int(box[2] * width)
        y2 = y1 + int(box[3] * height)

        cv2.rectangle(
            image,
            (x1, y1), (x2, y2),
            color=colors[int(class_pred)],
            thickness=bbox_thick
        )
        text = f"{class_labels[int(class_pred)]}: {conf:.2f}"
        t_size = cv2.getTextSize(text, 0, 0.7, thickness=bbox_thick // 2)[0]
        c3 = (x1 + t_size[0], y1 - t_size[1] - 3)

        cv2.rectangle(image, (x1, y1), c3, colors[int(class_pred)], -1)
        cv2.putText(
            image,
            text,
            (x1, y1 - 2),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.7,
            (0, 0, 0),
            bbox_thick // 2,
            lineType=cv2.LINE_AA,
        )

    return image