Spaces:

BhumikaMak
/

NeuralVista

Running

File size: 2,276 Bytes

from ultralytics import YOLO
import cv2
import numpy as np
from PIL import Image
import torch
from torchcam.methods import GradCAM
from torchcam.utils import overlay_mask

# Set random colors for detection bounding boxes
COLORS = np.random.uniform(0, 255, size=(80, 3))

def parse_detections_yolov8(results):
    boxes, colors, names = [], [], []
    detections = results.boxes
    for box in detections:
        confidence = box.conf[0].item()
        if confidence < 0.2:  # Filter out low-confidence detections
            continue
        xmin, ymin, xmax, ymax = map(int, box.xyxy[0].tolist())
        category = int(box.cls[0].item())
        name = results.names[category]
        boxes.append((xmin, ymin, xmax, ymax))
        colors.append(COLORS[category])
        names.append(name)
    return boxes, colors, names

def draw_detections(boxes, colors, names, img):
    for box, color, name in zip(boxes, colors, names):
        xmin, ymin, xmax, ymax = box
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), color, 2)
        cv2.putText(img, name, (xmin, ymin - 5),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
                    lineType=cv2.LINE_AA)
    return img

def xai_yolov8(image):
    # Load YOLOv8 model
    model = YOLO('yolov8n.pt')  # Load YOLOv8 nano model
    model.to('cpu')
    model.eval()

    # Run YOLO detection
    results = model(image)
    boxes, colors, names = parse_detections_yolov8(results[0])
    detections_img = draw_detections(boxes, colors, names, image.copy())

    # Convert image to PyTorch tensor for Grad-CAM
    image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).unsqueeze(0).float() / 255.0
    image_tensor = image_tensor.to('cpu')
    print(model.model)  # Output model layers to find the target layer
    grad_cam = GradCAM(model.model, target_layer='model.model.model[-4]')  

    # Perform Grad-CAM
    cam_map = grad_cam(image_tensor)

    # Overlay Grad-CAM mask onto original image
    cam_image = overlay_mask(image, cam_map.squeeze(0).cpu().numpy(), alpha=0.5)

    # Combine original image and Grad-CAM image
    final_image = np.hstack((np.array(image), cam_image))
    caption = "Results using YOLOv8 and Grad-CAM via torchcam"
    return Image.fromarray(final_image), caption