File size: 6,662 Bytes
e4a2983
 
 
 
 
 
 
 
 
4d56ecd
 
e4a2983
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b179ce
4d56ecd
 
 
 
 
 
 
 
 
 
 
 
 
e4a2983
 
 
 
 
 
 
 
8b179ce
e4a2983
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b179ce
e4a2983
5e54341
e4a2983
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8b179ce
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171

import torch
import cv2
import numpy as np
from PIL import Image
import torchvision.transforms as transforms
from pytorch_grad_cam import EigenCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image
import gradio as gr
import os
# Global Color Palette
COLORS = np.random.uniform(0, 255, size=(80, 3))


def parse_detections(results):
    detections = results.pandas().xyxy[0].to_dict()
    boxes, colors, names, classes = [], [], [], []
    for i in range(len(detections["xmin"])):
        confidence = detections["confidence"][i]
        if confidence < 0.2:
            continue
        xmin, ymin = int(detections["xmin"][i]), int(detections["ymin"][i])
        xmax, ymax = int(detections["xmax"][i]), int(detections["ymax"][i])
        name, category = detections["name"][i], int(detections["class"][i])
        boxes.append((xmin, ymin, xmax, ymax))
        colors.append(COLORS[category])
        names.append(name)
        classes.append(category) 
    return boxes, colors, names, classes


def draw_detections(boxes, colors, names, classes, img):
    for box, color, name, cls in zip(boxes, colors, names, classes):
        xmin, ymin, xmax, ymax = box
        label = f"{cls}: {name}"  # Combine class ID and name
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 2)
        cv2.putText(
            img, label, (xmin, ymin - 5),
            cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
            lineType=cv2.LINE_AA
        )
    return img


def generate_cam_image(model, target_layers, tensor, rgb_img, boxes):
    cam = EigenCAM(model, target_layers)
    grayscale_cam = cam(tensor)[0, :, :]
    img_float = np.float32(rgb_img) / 255
    cam_image = show_cam_on_image(img_float, grayscale_cam, use_rgb=True)
    renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
    for x1, y1, x2, y2 in boxes:
        renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())
    renormalized_cam = scale_cam_image(renormalized_cam)
    renormalized_cam_image = show_cam_on_image(img_float, renormalized_cam, use_rgb=True)

    return cam_image, renormalized_cam_image


def xai_yolov5(image):
    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
    model.eval()
    model.cpu()

    target_layers = [model.model.model.model[-2]]  # Grad-CAM target layer

    # Run YOLO detection
    results = model([image])
    boxes, colors, names, classes = parse_detections(results) 
    detections_img = draw_detections(boxes, colors, names,classes,  image.copy())

    # Prepare input tensor for Grad-CAM
    img_float = np.float32(image) / 255
    transform = transforms.ToTensor()
    tensor = transform(img_float).unsqueeze(0)

    # Grad-CAM visualization
    cam_image, renormalized_cam_image = generate_cam_image(model, target_layers, tensor, image, boxes)

    # Combine results
    final_image = np.hstack((image, detections_img, renormalized_cam_image))
    caption = "Results using YOLOv5"
    return Image.fromarray(final_image), caption


"""
import yaml
import torch
import warnings
warnings.filterwarnings('ignore')
from PIL import Image
import numpy as np
import requests
import cv2
import torch
from pytorch_grad_cam import DeepFeatureFactorization
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image
from pytorch_grad_cam.utils.image import deprocess_image, show_factorization_on_image

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
mean = [0.485, 0.456, 0.406]  # Mean for RGB channels
std = [0.229, 0.224, 0.225]   # Standard deviation for RGB channels
# Load YOLOv5 model and move it to the appropriate device
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True).to(device)
print(f"Loaded YOLOv5 model on {device}")
def create_labels(concept_scores, top_k=2):
    
    yolov5_categories_url = \
        "https://github.com/ultralytics/yolov5/raw/master/data/coco128.yaml"  # URL to the YOLOv5 categories file
    yaml_data = requests.get(yolov5_categories_url).text
    labels = yaml.safe_load(yaml_data)['names']  # Parse the YAML file to get class names
    
    concept_categories = np.argsort(concept_scores, axis=1)[:, ::-1][:, :top_k]
    concept_labels_topk = []
    for concept_index in range(concept_categories.shape[0]):
        categories = concept_categories[concept_index, :]    
        concept_labels = []
        for category in categories:
            score = concept_scores[concept_index, category]
            label = f"{labels[category]}:{score:.2f}"
            concept_labels.append(label)
        concept_labels_topk.append("\n".join(concept_labels))
    return concept_labels_topk

def get_image_from_url(url, device):
 

    img = np.array(Image.open(os.path.join(os.getcwd(), "data/xai/sample1.jpeg")))
    img = cv2.resize(img, (640, 640))
    rgb_img_float = np.float32(img) /255.0
    input_tensor = torch.from_numpy(rgb_img_float).permute(2, 0, 1).unsqueeze(0).to(device)
    return img, rgb_img_float, input_tensor

def visualize_image(model, img_url, n_components=20, top_k=1, lyr_idx = 2):
    img, rgb_img_float, input_tensor = get_image_from_url(img_url, device)
    
    # Specify the target layer for DeepFeatureFactorization (e.g., YOLO's backbone)
    target_layer = model.model.model.model[-lyr_idx]  # Select a feature extraction layer
    
    dff = DeepFeatureFactorization(model=model.model, target_layer=target_layer)
    
    # Run DFF on the input tensor
    concepts, batch_explanations = dff(input_tensor, n_components)
    
    # Softmax normalization
    concept_outputs = torch.softmax(torch.from_numpy(concepts), axis=-1).numpy()    
    concept_label_strings = create_labels(concept_outputs, top_k=top_k)
    
    # Visualize explanations
    visualization = show_factorization_on_image(rgb_img_float, 
                                                batch_explanations[0],
                                                image_weight=0.2,
                                                concept_labels=concept_label_strings)
    
    import matplotlib.pyplot as plt
    plt.imshow(visualization)
    plt.savefig("test" + str(lyr_idx) + ".png")
    result = np.hstack((img, visualization))

    
    # Resize for visualization
    if result.shape[0] > 500:
        result = cv2.resize(result, (result.shape[1]//4, result.shape[0]//4))
    
    return result

# Test with images
for indx in range(2,12):
    Image.fromarray(visualize_image(model, 
                                        "https://github.com/jacobgil/pytorch-grad-cam/blob/master/examples/both.png?raw=true", lyr_idx = indx))
"""