File size: 3,704 Bytes
2a4462d 0869998 7b271ac 3cf5303 e80b224 2a4462d 98375b6 2e620cf 0869998 f0b653f 7b3d454 18ff9ad 98375b6 18ff9ad 98375b6 2a4462d 98375b6 7b271ac 98375b6 7b271ac 98375b6 165b257 2a4462d 5fd0532 2a4462d 5fd0532 2a4462d 5fd0532 2a4462d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import numpy as np
import cv2
import albumentations as A
from utils import *
import random
from albumentations.pytorch import ToTensorV2
from yolov3 import YOLOV3_PL
from pytorch_grad_cam.utils.image import show_cam_on_image
from utils import YoloCAM, cells_to_bboxes, non_max_suppression
from model import YOLOv3
def inference(image: np.ndarray, iou_thresh: float = 0.5, thresh: float = 0.5, transparency: float = 0.5):
model = YOLOV3_PL() #YOLOv3(num_classes=20)
model.load_state_dict(torch.load("model.pth", map_location=torch.device('cpu')), strict=False)
# iou_thresh = 0.75
# thresh = 0.75
scaled_anchors = config.SCALED_ANCHORS
# target_layer_list = model.layers[-2]
# cam = YoloCAM(model=model, target_layers = target_layer_list, use_cuda=False)
transforms = A.Compose(
[
A.LongestMaxSize(max_size=config.IMAGE_SIZE),
A.PadIfNeeded(
min_height=config.IMAGE_SIZE, min_width=config.IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
),
A.Normalize(mean=[0, 0, 0], std=[1, 1, 1], max_pixel_value=255,),
ToTensorV2(),
],
)
with torch.no_grad():
transformed_image = transforms(image=image)["image"].unsqueeze(0)
output = model(transformed_image)
bboxes = [[] for _ in range(1)]
for i in range(3):
batch_size, A1, S, _, _ = output[i].shape
anchor = scaled_anchors[i].to('cpu')
boxes_scale_i = cells_to_bboxes(
output[i].to('cpu'), anchor, S=S, is_preds=True
)
for idx, (box) in enumerate(boxes_scale_i):
bboxes[idx] += box
nms_boxes = non_max_suppression(
bboxes[0], iou_threshold=iou_thresh, threshold=thresh, box_format="midpoint",
)
plot_img = draw_predictions(image, nms_boxes, class_labels=config.PASCAL_CLASSES)
# if not show_cam:
# return [plot_img]
# grayscale_cam = cam(transformed_image, scaled_anchors)[0, :, :]
# img = cv2.resize(image, (416, 416))
# img = np.float32(img) / 255
# cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True, image_weight=transparency)
# return [plot_img, cam_image]
return [plot_img]
def draw_predictions(image: np.ndarray, boxes: list[list], class_labels: list[str]) -> np.ndarray:
"""Plots predicted bounding boxes on the image"""
colors = [[random.randint(0, 255) for _ in range(3)] for name in class_labels]
im = np.array(image)
height, width, _ = im.shape
bbox_thick = int((height + width) /500)
# Create a Rectangle patch
for box in boxes:
assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height"
class_pred = box[0]
conf = box[1]
box = box[2:]
upper_left_x = box[0] - box[2] / 2
upper_left_y = box[1] - box[3] / 2
x1 = int(upper_left_x * width)
y1 = int(upper_left_y * height)
x2 = x1 + int(box[2] * width)
y2 = y1 + int(box[3] * height)
cv2.rectangle(
image,
(x1, y1), (x2, y2),
color=colors[int(class_pred)],
thickness=bbox_thick
)
text = f"{class_labels[int(class_pred)]}: {conf:.2f}"
t_size = cv2.getTextSize(text, 0, 0.7, thickness=bbox_thick // 2)[0]
c3 = (x1 + t_size[0], y1 - t_size[1] - 3)
cv2.rectangle(image, (x1, y1), c3, colors[int(class_pred)], -1)
cv2.putText(
image,
text,
(x1, y1 - 2),
cv2.FONT_HERSHEY_SIMPLEX,
0.7,
(0, 0, 0),
bbox_thick // 2,
lineType=cv2.LINE_AA,
)
return image |