File size: 3,058 Bytes
29a65ce
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import torch


from matplotlib import pyplot as plt
import matplotlib.patches as patches
import gradio as gr


# Images
torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000397133.jpg', 'example1.jpg')
torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000037777.jpg', 'example2.jpg')
torch.hub.download_url_to_file('http://images.cocodataset.org/val2017/000000252219.jpg', 'example3.jpg')


ssd_model = torch.hub.load('AK391/DeepLearningExamples:torchhub', 'nvidia_ssd',pretrained=False,force_reload=True)

checkpoint = torch.hub.load_state_dict_from_url('https://api.ngc.nvidia.com/v2/models/nvidia/ssd_pyt_ckpt_amp/versions/20.06.0/files/nvidia_ssdpyt_amp_200703.pt', map_location="cpu")


ssd_model.load_state_dict(checkpoint['model'])

utils = torch.hub.load('AK391/DeepLearningExamples', 'nvidia_ssd_processing_utils',force_reload=True)

ssd_model.to('cpu')
ssd_model.eval()


def inference(img):

    uris = [
        img.name
    ]

    inputs = [utils.prepare_input(uri) for uri in uris]
    tensor = utils.prepare_tensor(inputs)

    with torch.no_grad():
        detections_batch = ssd_model(tensor)

    results_per_input = utils.decode_results(detections_batch)
    best_results_per_input = [utils.pick_best(results, 0.40) for results in results_per_input]

    classes_to_labels = utils.get_coco_object_dictionary()
    for image_idx in range(len(best_results_per_input)):
        fig, ax = plt.subplots(1)
        # Show original, denormalized image...
        image = inputs[image_idx] / 2 + 0.5
        ax.imshow(image)
        # ...with detections
        bboxes, classes, confidences = best_results_per_input[image_idx]
        for idx in range(len(bboxes)):
            left, bot, right, top = bboxes[idx]
            x, y, w, h = [val * 300 for val in [left, bot, right - left, top - bot]]
            rect = patches.Rectangle((x, y), w, h, linewidth=1, edgecolor='r', facecolor='none')
            ax.add_patch(rect)
            ax.text(x, y, "{} {:.0f}%".format(classes_to_labels[classes[idx] - 1], confidences[idx]*100), bbox=dict(facecolor='white', alpha=0.5))
    plt.axis('off')
    plt.draw()
    return plt

inputs = gr.inputs.Image(type='file', label="Original Image")
outputs = gr.outputs.Image(type="plot", label="Output Image")

title = "Single Shot MultiBox Detector model for object detection"
description = "Gradio demo for Single Shot MultiBox Detector model for object detection by Nvidia. To use it upload an image or click an example images images. Read more at the links below"
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/1512.02325'>SSD: Single Shot MultiBox Detector</a> | <a href='https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/Detection/SSD'>Github Repo</a></p>"

examples = [
            ['example1.jpg'], 
            ['example2.jpg'],
            ['example3.jpg']
]
gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch(debug=True,enable_queue=True)