Spaces:

BhumikaMak
/

NeuralVista

Sleeping

File size: 16,113 Bytes

import gradio as gr
import os

# Paths for images
yolov5_result = os.path.join(os.getcwd(), "data/xai/yolov5.png")
yolov8_result = os.path.join(os.getcwd(), "data/xai/yolov8.png")
yolov5_dff = os.path.join(os.getcwd(), "data/xai/yolov5_dff.png")
yolov8_dff = os.path.join(os.getcwd(), "data/xai/yolov8_dff.png")

architecture_description_yolov5 = """
### YOLOv5 Architecture

- **Backbone**: Uses **CSPDarknet53** for feature extraction with **ResNet**-like residual connections.
- **Neck**: **PANet** and **FPN** aggregate features at multiple scales.
- **Head**: Predicts bounding boxes (x, y, w, h), class probabilities, and objectness scores.
- **Loss Functions**: **CIoU** for bounding box regression, **cross-entropy** for classification.
- **Grid-based Detection**: Divides input into grid cells predicting multiple bounding boxes.
- **Non-Maximum Suppression (NMS)**: Filters overlapping boxes with high confidence.
"""

architecture_description_yolov8s = """
### YOLOv8s Architecture

- **Backbone**: Uses **CSPDarknet** with efficient feature extraction layers.
- **Neck**: Incorporates **FPN** and **PANet** for multi-scale feature aggregation.
- **Head**: Predicts bounding boxes, objectness scores, and class probabilities.
- **Loss Functions**: **CIoU** for bounding box regression, **cross-entropy** for classification.
- **Grid-based Prediction**: Outputs predictions per grid cell using multiple anchor boxes.
- **Post-Processing**: **NMS** removes redundant boxes with high overlap.

"""

description_yolov5 = """
#### **1. Feature Focus for Dogs:**  
- The model primarily focuses on:  
  - **Face and Snout:** Key identifiers like eyes, nose, and mouth for dog recognition.  
  - **Ears:** Distinctive ear shapes, such as pointed or floppy ears, are highlighted.  
  - **Body Shape:** Focuses on the overall body contour, including legs and tails.  
  - **Fur Texture:** Highlights curly fur in poodles and smooth fur in corgis.  

#### **2. Feature Focus for Cats:**  
- The network highlights:  
  - **Face and Eyes:** Sharp eyes and whiskers as distinguishing features.  
  - **Ears:** Pointy ears emphasized for identification.  
  - **Body Size and Posture:** Compact body shape and sitting posture are captured.  
  - **Tail and Paws:** Occasionally highlights tails or paws for finer details.  

#### **3. Incorrect Focus or Ambiguity:**  
- Some panels show **misfocused areas**, such as:  
  - **Background:** Irrelevant parts receive attention, causing confusion.  
  - **Shared Features:** Overlap in fur patterns or body shapes may lead to errors.  

#### **4. Insights:**  
- The visualizations explain **why the model classifies certain parts as important** for distinguishing dogs and cats.  
- They highlight **potential biases** or areas for improvement, e.g., reliance on shared features instead of species-specific traits.  
"""

description_yolov8 = """

#### **1. Feature Focus for Dogs:**  
- **Facial Features:**  Strong emphasis on **eyes, nose, and mouth**, aiding in species identification.  
- **Ears and Fur Texture:** Highlights distinctive **fluffy or smooth fur patterns** and **ear shapes** (pointed or floppy).  
- **Body and Legs:** Focus on the **overall contour, legs, and tails**, distinguishing body structures of breeds like poodles and corgis.  

#### **2. Feature Focus for Cats:**  
- **Face and Eyes:**  Sharp focus on **eyes and whiskers**, key characteristics of feline features.  
- **Body Shape and Tail:**  Emphasizes the **compact size and tail curvature**.  
- **Paws and Posture:**  Captures sitting postures and detailed paw structures.  

#### **3. Incorrect Focus or Ambiguity:**  
- **Background Highlights:** Some heatmaps show attention to **background regions**, leading to irrelevant feature extraction.  
- **Shared Features:** Overlapping fur patterns and body structures between dogs and cats occasionally result in misclassification risks.  
- **Edge and Border Effects:** Emphasis on **image boundaries** could reflect dataset biases during training.  

#### **4. Insights:**  
- Refinements, such as **attention-based mechanisms**, can improve focus on discriminative features and reduce errors.  
"""



# Netron HTML templates
def get_netron_html(model_url):
    return f"""
        <div style="background-color: black; padding: 1px; border: 0.5px solid white;">
            <iframe 
                src="{model_url}" 
                width="100%" 
                height="800" 
                frameborder="0">
            </iframe>
        </div>
    """

# URLs for Netron visualizations
yolov5_url = "https://netron.app/?url=https://huggingface.co/FFusion/FFusionXL-BASE/blob/main/vae_encoder/model.onnx"
yolov8_url = "https://netron.app/?url=https://huggingface.co/spaces/BhumikaMak/NeuralVista/resolve/main/weight_files/yolov8s.pt"

custom_css = """
body {
    background-color: #fcf4ee;
    background-size: 1800px 1800px;
    height: 100%;
    margin: 0;
    overflow-y: auto;
}
#neural-vista-title {
    color: #800000 !important;
    font-size: 32px;
    font-weight: bold;
    text-align: center;
}
#neural-vista-text {
    color: #800000  !important;
    font-size: 18px;
    font-weight: bold;
    text-align: center;
}
#highlighted-text {
    font-weight: bold;
    color: #1976d2;
}
.custom-row {
    display: flex;
    justify-content: center; /* Align horizontally */
    align-items: center;     /* Align vertically */
    padding: 10px;           /* Adjust as needed for spacing */
}
.custom-button {
    background-color: #800000;
    color: white;
    font-size: 12px;         /* Small font size */
    width: 100px !important;            /* Fixed width */
    height: 35px !important;            /* Fixed height */
    border-radius: 6px;      /* Slightly rounded corners */
    padding: 0 !important;              /* Remove extra padding */
    cursor: pointer;
    text-align: center;
    margin: 0 auto;          /* Center within its container */
    box-sizing: border-box;  /* Ensure consistent sizing */
}
#run-button {
    background-color: #800000 !important;
    color: white !important;
    font-size: 12px !important;  /* Small font size */
    width: 100px !important;     /* Fixed width */
    height: 35px !important;     /* Fixed height */
    border-radius: 6px !important;
    padding: 0 !important;
    text-align: center !important;
    display: block !important;   /* Ensure block-level alignment */
    margin: 0 auto !important;   /* Center horizontally */
    box-sizing: border-box !important;
}
/* Custom border styles for all Gradio components */
.gradio-container, .gradio-row, .gradio-column, .gradio-input, .gradio-image, .gradio-checkgroup, .gradio-button, .gradio-markdown {
    border: 3px #800000 !important;  /* Border width and color */
    border-radius: 8px !important;      /* Rounded corners */
}
/* Additional customizations for images to enhance visibility of the border */
.gradio-image img {
    border-radius: 8px !important;
    border: 3px solid black !important;  /* Border for image */
}
/* Custom Row for images and buttons */
.custom-row img {
    border-radius: 10px;
    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
}
#highlighted-text {
    font-weight: bold;
    color: #1976d2;
}
.gradio-block {
    max-height: 100vh;  /* Allow scrolling within the Gradio blocks */
    overflow-y: auto;   /* Enable scrolling for the content if it overflows */
}
#neural-vista-title {
    color: #800000 !important;  /* Purple color for the title */
    font-size: 32px;           /* Adjust font size as needed */
    font-weight: bold;
    text-align: center;
}
#neural-vista-text {
    color: #800000  !important;  /* Purple color for the title */
    font-size: 18px;           /* Adjust font size as needed */
    font-weight: bold;
    text-align: center;
    
}


"""

import netron
import threading
import gradio as gr
import os
from PIL import Image
import cv2
import numpy as np
from yolov5 import xai_yolov5
from yolov8 import xai_yolov8s

# Sample images directory
sample_images = {
    "Sample 1": os.path.join(os.getcwd(), "data/xai/sample1.jpeg"),
    "Sample 2": os.path.join(os.getcwd(), "data/xai/sample2.jpg"),
}

def load_sample_image(sample_name):
    """Load a sample image based on user selection."""
    image_path = sample_images.get(sample_name)
    if image_path and os.path.exists(image_path):
        return Image.open(image_path)
    return None

def process_image(sample_choice, uploaded_image, yolo_versions, target_lyr = -5, n_components = 8):
    """Process the image using selected YOLO models."""
    # Load sample or uploaded image
    if uploaded_image is not None:
        image = uploaded_image
    else:
        image = load_sample_image(sample_choice)

    # Preprocess image
    image = np.array(image)
    image = cv2.resize(image, (640, 640))
    result_images = []

    # Apply selected models
    for yolo_version in yolo_versions:
        if yolo_version == "yolov5":
            result_images.append(xai_yolov5(image, target_lyr = -5, n_components = 8)) 
        elif yolo_version == "yolov8s":
            result_images.append(xai_yolov8s(image))
        else:
            result_images.append((Image.fromarray(image), f"{yolo_version} not implemented."))
    return result_images

def view_model(selected_models):
    """Generate Netron visualization for the selected models."""
    netron_html = ""
    for model in selected_models:
        if model=="yolov8s":
            netron_html = f"""
            <iframe 
                src="https://netron.app/?url=https://huggingface.co/spaces/BhumikaMak/NeuralVista/resolve/main/weight_files/yolov8s.pt" 
                width="100%" 
                height="800" 
                frameborder="0">
            </iframe>
            """
        if model == "yolov5":
            netron_html = f"""
            <iframe 
                src="https://netron.app/?url=https://huggingface.co/FFusion/FFusionXL-BASE/blob/main/vae_encoder/model.onnx" 
                width="100%" 
                height="800" 
                frameborder="0">
            </iframe>
            """
    return netron_html if netron_html else "<p>No valid models selected for visualization.</p>"

with gr.Blocks(css=custom_css) as demo:
    gr.HTML("""
      <div style="border: 2px solid #a05252; padding: 20px; border-radius: 8px;">
        <span style="color: #800000; font-family: 'Papyrus', cursive; font-weight: bold; font-size: 32px;">NeuralVista</span><br><br>
        <span style="color: black; font-family: 'Papyrus', cursive; font-size: 18px;">A harmonious framework of tools <span style="color: red; font-family: 'Papyrus', cursive; font-size: 18px;">☼</span> designed to illuminate the inner workings of AI.</span>
      </div>
    """)
    
    with gr.Row():
        with gr.Column():
            gr.Markdown(architecture_description_yolov5)
            gr.HTML(get_netron_html(yolov5_url))
            gr.Image(yolov5_result, label="Detections & Interpretability Map")
            gr.Markdown(description_yolov5)
            gr.Image(yolov5_dff, label="Feature Factorization & discovered concept")
            

        with gr.Column():
            gr.Markdown(architecture_description_yolov8s)
            gr.HTML(get_netron_html(yolov8_url))
            gr.Image(yolov8_result, label="Detections & Interpretability Map")
            gr.Markdown(description_yolov8)
            gr.Image(yolov8_dff, label="Feature Factorization & discovered concept")

    default_sample = "Sample 1"

    with gr.Row():
        # Left side: Sample selection and image upload
        with gr.Column():
            sample_selection = gr.Radio(
                choices=list(sample_images.keys()),
                label="Select a Sample Image",
                value=default_sample,
            )

            upload_image = gr.Image(
                label="Upload an Image",
                type="pil",  
            )

            selected_models = gr.CheckboxGroup(
                choices=["yolov5", "yolov8s"],
                value=["yolov5"],
                label="Select Model(s)",
            )
            #with gr.Row(elem_classes="custom-row"):
            run_button = gr.Button("Run", elem_id="run-button")


        with gr.Column():
            sample_display = gr.Image(
                value=load_sample_image(default_sample),  
                label="Selected Sample Image",
            )
    
    gr.HTML("""
        <span style="font-size: 14px; font-weight: bold;">The visualization demonstrates object detection and interpretability. Detected objects are highlighted with bounding boxes, while the heatmap reveals regions of focus, offering insights into the model's decision-making process.</span>
        """)
    # Results and visualization
    with gr.Row(elem_classes="custom-row"):
        result_gallery = gr.Gallery(
            label="Results",
            rows=1, 
            height="auto",       # Adjust height automatically based on content
            columns=1 ,
            object_fit="contain"
        ) 
        netron_display = gr.HTML(label="Netron Visualization")

    # Update sample image
    sample_selection.change(
        fn=load_sample_image,
        inputs=sample_selection,
        outputs=sample_display,
    )
    

    gr.HTML("""
        <span style="font-size: 14px; ">
            <span style="color: #800000;">Concept Discovery</span> is the process of uncovering the hidden, high-level features that a deep learning model has learned. It provides a way to understand the essence of its internal representations, akin to peering into the mind of the model and revealing the meaningful patterns it detects in the data.
            <br><br>
            <span style="color: #800000;">Deep Feature Factorization</span> (DFF) serves as a tool for breaking down these complex features into simpler, more interpretable components. By applying matrix factorization on activation maps, it untangles the intricate web of learned representations, making it easier to comprehend what the model is truly focusing on. Together, these methods bring us closer to understanding the underlying logic of neural networks, shedding light on the often enigmatic decisions they make.
        </span>
    """)

    with gr.Row(elem_classes="custom-row"):
        dff_gallery = gr.Gallery(
            label="Deep Feature Factorization",
            rows=2,          # 8 rows
            columns=4,       # 1 image per row
            object_fit="fit",
            height="auto"    # Adjust as needed
        ) 

    # Multi-threaded processing
    def run_both(sample_choice, uploaded_image, selected_models):
        results = []
        netron_html = ""

        # Thread to process the image
        def process_thread():
            nonlocal results
            target_lyr = -5 
            n_components = 8
            results = process_image(sample_choice, uploaded_image, selected_models, target_lyr = -5, n_components = 8)

        # Thread to generate Netron visualization
        def netron_thread():
            nonlocal netron_html
            gr.HTML("""
            Generated abstract visualizations of model""")
            netron_html = view_model(selected_models)

        # Launch threads
        t1 = threading.Thread(target=process_thread)
        t2 = threading.Thread(target=netron_thread)
        t1.start()
        t2.start()
        t1.join()
        t2.join()
        image1, text, image2 = results[0]
        if isinstance(image2, list):
            # Check if image2 contains exactly 8 images
            if len(image2) == 8:
                print("image2 contains 8 images.")
            else:
                print("Warning: image2 does not contain exactly 8 images.")
        else:
            print("Error: image2 is not a list of images.")
        return [(image1, text)], netron_html, image2

    # Run button click
    run_button.click(
        fn=run_both,
        inputs=[sample_selection, upload_image, selected_models],
        outputs=[result_gallery, netron_display, dff_gallery],
    )
            

demo.launch()