Spaces:

rootstrap-org
/

waste-classifier

Sleeping

File size: 9,939 Bytes

fa84113

'''

Efficientdet demo

'''
import argparse
import cv2
import os
import time

from PIL import Image
import PIL.ImageColor as ImageColor
import requests
import matplotlib.pyplot as plt

import torch
import torchvision.transforms as T
from tqdm import tqdm

from effdet import create_model


def get_args_parser():
    parser = argparse.ArgumentParser(
        'Test detr on one image')
    parser.add_argument(
        '--img', metavar='IMG',
        help='path to image, could be url',
        default='https://www.fyidenmark.com/images/denmark-litter.jpg')
    parser.add_argument(
        '--save', metavar='OUTPUT',
        help='path to save image with predictions (if None show image)',
        default=None)
    parser.add_argument('--classes', nargs='+', default=['Litter'])
    parser.add_argument(
        '--checkpoint', type=str,
        help='path to checkpoint')
    parser.add_argument(
        '--device', type=str, default='cpu',
        help='device to evaluate model (default: cpu)')
    parser.add_argument(
        '--prob_threshold', type=float, default=0.3,
        help='probability threshold to show results (default: 0.5)')
    parser.add_argument(
        '--video', action='store_true', default=False,
        help="If true, we treat impute as video (default: False)")
    parser.set_defaults(redundant_bias=None)
    return parser


# standard PyTorch mean-std input image normalization
def get_transforms(im, size=768):
    transform = T.Compose([
        T.Resize((size, size)),
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    return transform(im).unsqueeze(0)


def rescale_bboxes(out_bbox, size, resize):
    img_w, img_h = size
    out_w, out_h = resize
    b = out_bbox * torch.tensor([img_w/out_w, img_h/out_h,
                                 img_w/out_w, img_h/out_h],
                                dtype=torch.float32).to(
                                    out_bbox.device)
    return b


# from https://deepdrive.pl/
def get_output(img, prob, boxes, classes=['Litter'], stat_text=None):
    # colors for visualization
    STANDARD_COLORS = [
        'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige',
        'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue',
        'AntiqueWhite', 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk',
        'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey',
        'DarkKhaki', 'DarkOrange', 'DarkOrchid', 'DarkSalmon',
        'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet',
        'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
        'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold',
        'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink',
        'IndianRed', 'Ivory', 'Khaki', 'Lavender', 'LavenderBlush',
        'LawnGreen', 'LemonChiffon', 'LightBlue',
        'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray',
        'LightGrey', 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen',
        'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue',
        'LightYellow', 'Lime', 'LimeGreen', 'Linen', 'Magenta',
        'MediumAquaMarine', 'MediumOrchid', 'MediumPurple',
        'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
        'MediumTurquoise', 'MediumVioletRed', 'MintCream',
        'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive',
        'OliveDrab', 'Orange', 'OrangeRed', 'Orchid', 'PaleGoldenRod',
        'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip',
        'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
        'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown',
        'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
        'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue',
        'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet',
        'Wheat', 'White', 'WhiteSmoke', 'Yellow', 'YellowGreen'
    ]
    palette = [ImageColor.getrgb(_) for _ in STANDARD_COLORS]
    for p, (x0, y0, x1, y1) in zip(prob, boxes.tolist()):
        cl = int(p[1] - 1)
        color = palette[cl]
        start_p, end_p = (int(x0), int(y0)), (int(x1), int(y1))
        cv2.rectangle(img, start_p, end_p, color, 2)
        text = "%s %.1f%%" % (classes[cl], p[0]*100)
        cv2.putText(img, text, start_p, cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 0, 0), 10)
        cv2.putText(img, text, start_p, cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
    if stat_text is not None:
        cv2.putText(img, stat_text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (0, 0, 0), 10)
        cv2.putText(img, stat_text, (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                    (255, 255, 255), 3)
    return img


# from https://deepdrive.pl/
def save_frames(args, num_iter=45913):
    if not os.path.exists(args.save):
        os.makedirs(args.save)

    cap = cv2.VideoCapture(args.img)
    counter = 0
    pbar = tqdm(total=num_iter+1)
    num_classes = len(args.classes)
    model_name = args.checkpoint.split('-')[-1].split('/')[0]
    model = set_model(model_name, num_classes, args.checkpoint, args.device)
    model.eval()

    model.to(args.device)

    while(cap.isOpened()):
        ret, img = cap.read()
        if img is None:
            print("END")
            break

        # scale + BGR to RGB
        inference_size = (768, 768)
        scaled_img = cv2.resize(img[:, :, ::-1], inference_size)

        transform = T.Compose([
            T.ToTensor(),
            T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        # mean-std normalize the input image (batch-size: 1)
        img_tens = transform(scaled_img).unsqueeze(0).to(args.device)

        # Inference
        t0 = time.time()
        with torch.no_grad():
            # propagate through the model
            output = model(img_tens)
        t1 = time.time()

        # keep only predictions above set confidence
        bboxes_keep = output[0, output[0, :, 4] > args.prob_threshold]
        probas = bboxes_keep[:, 4:]

        # convert boxes to image scales
        bboxes_scaled = rescale_bboxes(bboxes_keep[:, :4],
                                       (img.shape[1], img.shape[0]),
                                       inference_size)

        txt = "Detect-waste %s Threshold=%.2f " \
              "Inference %dx%d  GPU: %s Inference time %.3fs" % \
              (model_name, args.prob_threshold, inference_size[0],
               inference_size[1], torch.cuda.get_device_name(0),
               t1 - t0)
        result = get_output(img, probas, bboxes_scaled,
                            args.classes, txt)
        cv2.imwrite(os.path.join(args.save, 'img%08d.jpg' % counter), result)
        counter += 1
        pbar.update(1)
        del img
        del img_tens
        del result

    cap.release()


def plot_results(pil_img, prob, boxes, classes=['Litter'],

                 save_path=None, colors=None):
    plt.figure(figsize=(16, 10))
    plt.imshow(pil_img)
    ax = plt.gca()
    if colors is None:
        # colors for visualization
        colors = 100 * [
           [0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
           [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes, colors):
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                   fill=False, color=c, linewidth=3))
        cl = int(p[1])
        text = f'{classes[cl]}: {p[0]:0.2f}'
        ax.text(xmin, ymin, text, fontsize=15,
                bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    if save_path is not None:
        plt.savefig(save_path, bbox_inches='tight',
                    transparent=True, pad_inches=0)
        plt.close()
        print(f'Image saved at {save_path}')
    else:
        plt.show()


def set_model(model_type, num_classes, checkpoint_path, device):

    # create model
    model = create_model(
        model_type,
        bench_task='predict',
        num_classes=num_classes,
        pretrained=False,
        redundant_bias=True,
        checkpoint_path=checkpoint_path
    )

    param_count = sum([m.numel() for m in model.parameters()])
    print('Model %s created, param count: %d' % (model_type, param_count))
    model = model.to(device)
    return model


def main(args):
    # prepare model for evaluation
    torch.set_grad_enabled(False)
    num_classes = len(args.classes)
    model_name = args.checkpoint.split('-')[-1].split('/')[0]
    model = set_model(model_name, num_classes, args.checkpoint, args.device)

    model.eval()
    # get image
    if args.img.startswith('https'):
        im = Image.open(requests.get(args.img, stream=True).raw).convert('RGB')
    else:
        im = Image.open(args.img).convert('RGB')

    # mean-std normalize the input image (batch-size: 1)
    img = get_transforms(im)

    # propagate through the model
    outputs = model(img.to(args.device))

    # keep only predictions above set confidence
    bboxes_keep = outputs[0, outputs[0, :, 4] > args.prob_threshold]
    probas = bboxes_keep[:, 4:]

    # convert boxes to image scales
    bboxes_scaled = rescale_bboxes(bboxes_keep[:, :4], im.size,
                                   tuple(img.size()[2:]))

    # plot and save demo image
    plot_results(im, probas, bboxes_scaled.tolist(), args.classes, args.save)


if __name__ == '__main__':
    parser = get_args_parser()
    args = parser.parse_args()
    if args.video:
        save_frames(args)
    else:
        main(args)