File size: 3,293 Bytes
b2e0420
 
4a6a6a7
b2e0420
 
 
d240e67
f70b16e
b2e0420
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ffc2154
2a40bee
b2e0420
 
 
3e314cb
 
 
 
 
 
 
b2e0420
3e314cb
 
 
 
 
 
 
 
 
 
 
 
b2e0420
 
 
 
3e314cb
 
 
 
 
cf2f20d
384b4ca
b2e0420
 
3e314cb
cf2f20d
3e314cb
 
 
 
384b4ca
 
3e314cb
 
384b4ca
cf2f20d
 
 
 
 
3ba60ba
 
cf2f20d
3ba60ba
b2e0420
 
 
cf2f20d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image, ImageDraw, ImageFont  # Import ImageFont
import gradio as gr
import requests
import random

def detect_objects(image):
    # Load the pre-trained DETR model
    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

    inputs = processor(images=image, return_tensors="pt")
    outputs = model(**inputs)

    # convert outputs (bounding boxes and class logits) to COCO API
    # let's only keep detections with score > 0.9
    target_sizes = torch.tensor([image.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

    # Draw bounding boxes and labels on the image
    draw = ImageDraw.Draw(image)
    for i, (score, label, box) in enumerate(zip(results["scores"], results["labels"], results["boxes"])):
        box = [round(i, 2) for i in box.tolist()]
        color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
        draw.rectangle(box, outline=color, width=3)
        label_text = f"{model.config.id2label[label.item()]}: {round(score.item(), 2)}"
        # Larger and bolder font
        draw.text((box[0], box[1]), label_text, fill=color,)

    return image

def detect_labels(image):
    # Load the pre-trained DETR model
    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

    inputs = processor(images=image, return_tensors="pt")
    outputs = model(**inputs)

    # convert outputs (bounding boxes and class logits) to COCO API
    # let's only keep detections with score > 0.9
    target_sizes = torch.tensor([image.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

    labels = []
    for label_id in results["labels"]:
        labels.append(model.config.id2label[label_id.item()])

    return labels

def upload_image_with_boxes(file):
    image = Image.open(file.name)
    image_with_boxes = detect_objects(image)
    return image_with_boxes

def upload_image_with_labels(file):
    image = Image.open(file.name)
    labels = detect_labels(image)
    return ", ".join(labels)

iface_switch = gr.Interface(
    fn=upload_image_with_boxes,
    inputs="file",
    outputs="image",
    title="Object Detection with Boxes",
    description="Upload an image and detect objects using DETR model. Tap the image to switch to labels view.",
    allow_flagging=False
)

iface_labels = gr.Interface(
    fn=upload_image_with_labels,
    inputs="file",
    outputs="text",
    title="Detected Object Labels",
    description="Upload an image and get the detected object labels using DETR model. Tap the text to switch back to object detection with boxes view.",
    allow_flagging=False
)

iface_tapped = gr.Interface(
    [iface_switch, iface_labels],
    inputs="file",
    outputs="image",
    title="Object Detection and Labels",
    description="Upload an image and switch between object detection with boxes and displaying detected object labels as text.",
    allow_flagging=False
)

iface_tapped.launch()