Removed Files

Browse files

Files changed (14) hide show

.gitignore +0 -3
LICENSE +0 -21
data_loader.py +0 -47
graph_construction.py +0 -138
hubconf.py +0 -23
images/SAG-ViT.png +0 -0
model_components.py +0 -119
requirements.txt +0 -12
sag_vit_model.py +0 -106
tests/test_graph_construction.py +0 -39
tests/test_model_components.py +0 -53
tests/test_sag_vit_model.py +0 -39
tests/test_train.py +0 -54
train.py +0 -189

.gitignore DELETED Viewed

@@ -1,3 +0,0 @@
-data/
-__pycache__
-tests/__pycache__

LICENSE DELETED Viewed

@@ -1,21 +0,0 @@
-MIT License
-Copyright (c) 2024 Shravan Venkatraman
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.

data_loader.py DELETED Viewed

@@ -1,47 +0,0 @@
-import os
-from torch.utils.data import DataLoader, random_split
-from torchvision import datasets, transforms
-def get_dataloaders(data_dir="path/to/data/dir", batch_size=512, train_split=0.8, img_size=224, num_workers=4):
-    """
-    Returns training and validation dataloaders for an image classification dataset.
-    Parameters:
-    - data_dir (str): Path to the directory containing image data in a folder structure compatible with ImageFolder.
-    - batch_size (int): Number of samples per batch.
-    - train_split (float): Fraction of data to use for training. Remaining is for validation.
-    - img_size (int): Target size to which all images are resized after validation.
-    - num_workers (int): Number of worker processes for data loading.
-    Image Size Validation:
-    - Minimum allowed image size: 49x49 pixels.
-    - If an image has either width or height less than 49 pixels, a ValueError is raised.
-    Returns:
-    - train_dataloader (DataLoader): DataLoader for the training split.
-    - val_dataloader (DataLoader): DataLoader for the validation split.
-    """
-    # Check if the provided image size is valid
-    if img_size < 49:
-        raise ValueError(f"Image size must be at least 49x49 pixels, but got {img_size}x{img_size}.")
-    transform = transforms.Compose([
-        transforms.Resize((img_size, img_size)),
-        transforms.ToTensor(),
-        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-    ])
-    # Load full dataset
-    full_dataset = datasets.ImageFolder(root=data_dir, transform=transform)
-    # Split into training and validation sets
-    train_size = int(train_split * len(full_dataset))
-    val_size = len(full_dataset) - train_size
-    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])
-    # Create dataloaders
-    train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
-    val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
-    return train_dataloader, val_dataloader

graph_construction.py DELETED Viewed

@@ -1,138 +0,0 @@
-import torch
-import networkx as nx
-from torch_geometric.utils import from_networkx
-####################################################################
-# These functions reflect the methods described in Section 3.1 and 3.2
-# of the SAG-ViT paper, where high-fidelity feature patches are extracted
-# from the CNN feature maps and organized into a graph structure.
-####################################################################
-def extract_patches(feature_map, patch_size=(4, 4)):
-    """
-    Extracts non-overlapping patches from a feature map to form nodes in a graph.
-    Parameters:
-    - feature_map (Tensor): The feature map from the CNN of shape (B, C, H', W').
-      H' and W' are reduced spatial dimensions after CNN feature extraction.
-    - patch_size (tuple): Spatial size (height, width) of each patch.
-    Returns:
-    - patches (Tensor): Tensor of shape (B, N, C, patch_h, patch_w), where N is the number of patches per image.
-    """
-    b, c, h, w = feature_map.size()
-    patch_h, patch_w = patch_size
-    # Unfold extracts sliding patches; here we align so that they are non-overlapping
-    patches = feature_map.unfold(2, patch_h, patch_h).unfold(3, patch_w, patch_w)
-    # Rearrange to have patches as separate units
-    patches = patches.permute(0, 2, 3, 1, 4, 5).contiguous()
-    patches = patches.view(b, -1, c, patch_h, patch_w)
-    return patches
-def construct_graph_from_patch(patch_index, patch_shape, image_shape):
-    """
-    Constructs edges between patch nodes based on spatial adjacency (k-connectivity).
-    This follows the approach described in Section 3.2 of SAG-ViT, where patches
-    are arranged in a grid and connected to their spatial neighbors.
-    Parameters:
-    - patch_index (int): Index of the current patch node.
-    - patch_shape (tuple): (patch_height, patch_width).
-    - image_shape (tuple): (height, width) of the feature map.
-    Returns:
-    - G (nx.Graph): A graph with a single node and edges to its neighbors (to be composed globally).
-    """
-    G = nx.Graph()
-    # Compute grid dimensions (how many patches along height and width)
-    grid_height = image_shape[0] // patch_shape[0]
-    grid_width = image_shape[1] // patch_shape[1]
-    # Current node index in a flattened grid
-    current_node = patch_index
-    G.add_node(current_node)
-    # 8-neighborhood connectivity (up, down, left, right, diagonals)
-    neighbor_offsets = [(-1, 0), (1, 0), (0, -1), (0, 1),
-                        (-1, -1), (-1, 1), (1, -1), (1, 1)]
-    # Recover row, col from patch_index
-    row = current_node // grid_width
-    col = current_node % grid_width
-    for dr, dc in neighbor_offsets:
-        neighbor_row = row + dr
-        neighbor_col = col + dc
-        if 0 <= neighbor_row < grid_height and 0 <= neighbor_col < grid_width:
-            neighbor_node = neighbor_row * grid_width + neighbor_col
-            G.add_edge(current_node, neighbor_node)
-    return G
-def build_graph_from_patches(feature_map, patch_size=(4,4)):
-    """
-    Builds a global graph for each image in the batch, where each node corresponds
-    to a patch, and edges represent spatial adjacency. This graph captures local
-    spatial relationships of the patches, as outlined in Sections 3.1 and 3.2 of SAG-ViT.
-    Parameters:
-    - feature_map (Tensor): CNN output (B, C, H', W').
-    - patch_size (tuple): Size of each patch (patch_h, patch_w).
-    Returns:
-    - G_global_batch (list): A list of NetworkX graphs, one per image in the batch.
-    - patches (Tensor): The extracted patches (B, N, C, patch_h, patch_w).
-    """
-    patches = extract_patches(feature_map, patch_size)
-    batch_size = patches.size(0)
-    grid_height = feature_map.size(2) // patch_size[0]
-    grid_width = feature_map.size(3) // patch_size[1]
-    num_patches = grid_height * grid_width
-    G_global_batch = []
-    for batch_idx in range(batch_size):
-        G_global = nx.Graph()
-        # Construct a global graph by composing individual patch-based graphs
-        for patch_idx in range(num_patches):
-            G_patch = construct_graph_from_patch(
-                patch_index=patch_idx,
-                patch_shape=patch_size,
-                image_shape=(feature_map.size(2), feature_map.size(3))
-            )
-            G_global = nx.compose(G_global, G_patch)
-        G_global_batch.append(G_global)
-    return G_global_batch, patches
-def build_graph_data_from_patches(G_global_batch, patches):
-    """
-    Converts NetworkX graphs and associated patches into PyTorch Geometric Data objects.
-    Each node corresponds to a patch vectorized into a feature node embedding.
-    Parameters:
-    - G_global_batch (list): List of global graphs (one per image) in NetworkX form.
-    - patches (Tensor): (B, N, C, patch_h, patch_w) patch tensor.
-    Returns:
-    - data_list (list): List of PyTorch Geometric Data objects, where data.x are node features,
-      and data.edge_index is the adjacency from the constructed graph.
-    """
-    from_networkx_ = from_networkx  # local alias to avoid confusion
-    data_list = []
-    batch_size, num_patches, channels, patch_h, patch_w = patches.size()
-    for batch_idx, G_global in enumerate(G_global_batch):
-        # Flatten each patch into a feature vector
-        node_features = patches[batch_idx].view(num_patches, -1)
-        G_pygeom = from_networkx_(G_global)
-        G_pygeom.x = node_features
-        data_list.append(G_pygeom)
-    return data_list

hubconf.py DELETED Viewed

@@ -1,23 +0,0 @@
-dependencies = ['torch']
-from sag_vit_model import SAGViTClassifier
-import torch
-def SAGViT(pretrained=False, **kwargs):
-    """
-    SAG-ViT model endpoint.
-    Args:
-        pretrained (bool): If True, loads pretrained weights.
-        **kwargs: Additional arguments for the model.
-    Returns:
-        model (nn.Module): The SAG-ViT model as proposed in the
-        paper: SAG-ViT: A Scale-Aware, High-Fidelity Patching
-        Approach with Graph Attention for Vision Transformers.
-        https://doi.org/10.48550/arXiv.2411.09420
-    """
-    model = SAGViTClassifier(**kwargs)
-    if pretrained:
-        checkpoint = ''
-        state_dict = torch.hub.load_state_dict_from_url(checkpoint, progress=True)
-        model.load_state_dict(state_dict)
-    return model

images/SAG-ViT.png DELETED Viewed

Binary file (358 kB)

model_components.py DELETED Viewed

@@ -1,119 +0,0 @@
-import torch
-from torch import nn
-import torch.nn.functional as F
-from torch_geometric.nn import GATConv, global_mean_pool
-from torchvision import models
-###############################################################
-# These modules correspond to core building blocks of SAG-ViT:
-# 1. A CNN feature extractor for high-fidelity multi-scale feature maps.
-# 2. A Graph Attention Network (GAT) to refine patch embeddings.
-# 3. A Transformer Encoder to capture global long-range dependencies.
-# 4. An MLP classifier head.
-###############################################################
-class EfficientNetV2FeatureExtractor(nn.Module):
-    """
-    Extracts multi-scale, spatially-rich, and semantically-meaningful feature maps
-    from images using a pre-trained EfficientNetV2-S model. This corresponds
-    to Section 3.1, where a CNN backbone (EfficientNetV2-S) is used to produce rich
-    feature maps that preserve semantic information at multiple scales.
-    """
-    def __init__(self, pretrained=False):
-        super(EfficientNetV2FeatureExtractor, self).__init__()
-        # Load EfficientNetV2-S with pretrained weights
-        efficientnet = models.efficientnet_v2_s(
-            weights="IMAGENET1K_V1" if pretrained else None
-        )
-        # Extract layers up to the last block before downsampling below 16x16
-        self.extractor = nn.Sequential(*list(efficientnet.features.children())[:-2])
-        # Freezing the extractor parameters (if desired)
-        for param in self.extractor.parameters():
-            param.requires_grad = False
-    def forward(self, x):
-        """
-        Forward pass through the CNN backbone.
-        Input:
-        - x (Tensor): Input images of shape (B, 3, H, W)
-        Output:
-        - features (Tensor): Extracted feature map of shape (B, C, H', W'),
-          where H' and W' are reduced spatial dimensions.
-        """
-        features = self.extractor(x)
-        return features
-class GATGNN(nn.Module):
-    """
-    A Graph Attention Network (GAT) that processes patch-graph embeddings.
-    This module corresponds to the Graph Attention stage (Section 3.3),
-    refining local relationships between patches in a learned manner.
-    """
-    def __init__(self, in_channels, hidden_channels, out_channels, heads=8):
-        super(GATGNN, self).__init__()
-        # GAT layers:
-        # First layer maps raw patch embeddings to a higher-level representation.
-        self.conv1 = GATConv(in_channels, hidden_channels, heads=heads)
-        # Second layer produces final node embeddings with a single head.
-        self.conv2 = GATConv(hidden_channels * heads, out_channels, heads=1)
-        self.pool = global_mean_pool
-    def forward(self, data):
-        """
-        Input:
-        - data (PyG Data): Contains x (node features), edge_index (graph edges), and batch indexing.
-        Output:
-        - x (Tensor): Aggregated graph-level embedding after mean pooling.
-        """
-        x, edge_index, batch = data.x, data.edge_index, data.batch
-        x = F.elu(self.conv1(x, edge_index))
-        x = self.conv2(x, edge_index)
-        x = self.pool(x, batch)
-        return x
-class TransformerEncoder(nn.Module):
-    """
-    A Transformer encoder to capture long-range dependencies among patch embeddings.
-    Integrates global dependencies after GAT processing, as per Section 3.3.
-    """
-    def __init__(self, d_model, nhead, num_layers, dim_feedforward):
-        super(TransformerEncoder, self).__init__()
-        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward)
-        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
-    def forward(self, x):
-        """
-        Input:
-        - x (Tensor): Sequence of patch embeddings with shape (B, N, D).
-        Output:
-        - (Tensor): Transformed embeddings with global relationships integrated (B, N, D).
-        """
-        # The Transformer expects (N, B, D), so transpose first
-        x = x.transpose(0, 1)  # (N, B, D)
-        x = self.transformer_encoder(x)
-        x = x.transpose(0, 1)  # (B, N, D)
-        return x
-class MLPBlock(nn.Module):
-    """
-    An MLP classification head to map final global embeddings to classification logits.
-    """
-    def __init__(self, in_features, hidden_features, out_features):
-        super(MLPBlock, self).__init__()
-        self.mlp = nn.Sequential(
-            nn.Linear(in_features, hidden_features),
-            nn.ReLU(),
-            nn.Linear(hidden_features, out_features)
-        )
-    def forward(self, x):
-        return self.mlp(x)

requirements.txt DELETED Viewed

@@ -1,12 +0,0 @@
-numpy==1.26.4
-pandas==2.2.3
-matplotlib==3.7.5
-seaborn==0.12.2
-tqdm==4.66.4
-psutil==5.9.3
-pynvml==11.4.1
-scikit-learn==1.2.2
-torch==2.4.0
-torch-geometric==2.6.1
-torchvision==0.19.0
-networkx==3.3

sag_vit_model.py DELETED Viewed

@@ -1,106 +0,0 @@
-import torch
-from torch import nn
-from torch_geometric.data import Batch
-from model_components import EfficientNetV2FeatureExtractor, GATGNN, TransformerEncoder, MLPBlock
-from graph_construction import build_graph_from_patches, build_graph_data_from_patches
-###############################################################################
-# SAG-ViT Model:
-# This class combines:
-# 1) CNN backbone to produce high-fidelity feature maps (Section 3.1),
-# 2) Graph construction and GAT to refine local patch embeddings (Section 3.2 and 3.3),
-# 3) A Transformer encoder to capture global relationships (Section 3.3),
-# 4) A final MLP classifier.
-###############################################################################
-class SAGViTClassifier(nn.Module):
-    """
-    SAG-ViT: Scale-Aware Graph Attention Vision Transformer
-    This model integrates the following steps:
-    - Extract multi-scale features from images using a CNN backbone (InceptionV3 here).
-    - Partition the feature map into patches and build a graph where each node is a patch.
-    - Use a Graph Attention Network (GAT) to refine patch embeddings based on local spatial relationships.
-    - Utilize a Transformer encoder to model long-range dependencies and integrate multi-scale information.
-    - Finally, classify the resulting representation into desired classes.
-    Inputs:
-    - x (Tensor): Input images (B, 3, H, W)
-    Outputs:
-    - out (Tensor): Classification logits (B, num_classes)
-    """
-    def __init__(
-        self,
-        patch_size=(4,4),
-        num_classes=10,
-        d_model=64,
-        nhead=4,
-        num_layers=2,
-        dim_feedforward=64,
-        hidden_mlp_features=64,
-        in_channels=2560,  # Derived from patch dimensions and CNN output channels
-        gcn_hidden=128,
-        gcn_out=64
-    ):
-        super(SAGViTClassifier, self).__init__()
-        # CNN feature extractor (frozen pre-trained InceptionV3)
-        self.cnn = EfficientNetV2FeatureExtractor()
-        # Graph Attention Network to process patch embeddings
-        self.gcn = GATGNN(in_channels=in_channels, hidden_channels=gcn_hidden, out_channels=gcn_out)
-        # Learnable positional embedding for Transformer input
-        self.positional_embedding = nn.Parameter(torch.randn(1, 1, d_model))
-        # Extra embedding token (similar to class token) to summarize global info
-        self.extra_embedding = nn.Parameter(torch.randn(1, d_model))
-        # Transformer encoder to capture long-range global dependencies
-        self.transformer_encoder = TransformerEncoder(d_model, nhead, num_layers, dim_feedforward)
-        # MLP classification head
-        self.mlp = MLPBlock(d_model, hidden_mlp_features, num_classes)
-        self.patch_size = patch_size
-    def forward(self, x):
-        # Step 1: High-fidelity feature extraction from CNN
-        feature_map = self.cnn(x)
-        # Step 2: Build graphs from patches
-        G_global_batch, patches = build_graph_from_patches(feature_map, self.patch_size)
-        # Step 3: Convert to PyG Data format and batch
-        data_list = build_graph_data_from_patches(G_global_batch, patches)
-        device = x.device
-        batch = Batch.from_data_list(data_list).to(device)
-        # Step 4: GAT stage
-        x_gcn = self.gcn(batch)
-        # Step 5: Reshape GCN output back to (B, N, D)
-        # The number of patches per image is determined by patch size and feature map dimensions.
-        B = x.size(0)
-        D = x_gcn.size(-1)
-        # N is automatically inferred
-        # Thus x_gcn is (B, D) now. We need a sequence dimension for the Transformer.
-        # Let's treat each image-level embedding as one "patch token" plus an extra token:
-        patch_embeddings = x_gcn.unsqueeze(1)  # (B, 1, D)
-        # Add positional embedding
-        patch_embeddings = patch_embeddings + self.positional_embedding  # (B, 1, D)
-        # Add an extra learnable embedding (like a CLS token)
-        patch_embeddings = torch.cat([patch_embeddings, self.extra_embedding.unsqueeze(0).expand(B, -1, -1)], dim=1)  # (B, 2, D)
-        # Step 6: Transformer encoder
-        x_trans = self.transformer_encoder(patch_embeddings)
-        # Step 7: Global pooling (here we just take the mean)
-        x_pooled = x_trans.mean(dim=1)  # (B, D)
-        # Classification
-        out = self.mlp(x_pooled)
-        return out

tests/test_graph_construction.py DELETED Viewed

@@ -1,39 +0,0 @@
-import unittest
-import torch
-import networkx as nx
-from graph_construction import extract_patches, build_graph_from_patches, build_graph_data_from_patches
-class TestGraphConstruction(unittest.TestCase):
-    def test_extract_patches_shape(self):
-        # Create a dummy feature map: B=2, C=16, H=32, W=32
-        feature_map = torch.randn(2, 16, 32, 32)
-        patches = extract_patches(feature_map, patch_size=(4,4))
-        # Check dimensions: after extraction,
-        # number_of_patches = (H/4)*(W/4) = 8*8=64 per image, total 2*64=128
-        self.assertEqual(patches.shape, (2, 64, 16, 4, 4))
-    def test_build_graph_from_patches_graph_structure(self):
-        feature_map = torch.randn(1, 16, 32, 32)
-        G_batch, patches = build_graph_from_patches(feature_map, patch_size=(4,4))
-        # 1 image => G_batch[0] is the graph
-        G = G_batch[0]
-        # We have 64 patches
-        self.assertEqual(len(G.nodes), 64)
-        # Check if edges exist (8-neighborhood).
-        # Interior nodes should have edges to neighbors.
-        # Just check a random node in the middle
-        node_index = 9 # assuming row=1, col=1 in an 8x8 grid
-        self.assertTrue(len(list(G.neighbors(node_index))) > 0)
-    def test_build_graph_data_from_patches_conversion(self):
-        feature_map = torch.randn(2, 16, 32, 32)
-        G_batch, patches = build_graph_from_patches(feature_map, patch_size=(4,4))
-        data_list = build_graph_data_from_patches(G_batch, patches)
-        self.assertEqual(len(data_list), 2)
-        # Check node feature shape
-        self.assertEqual(data_list[0].x.shape[1], 16*4*4)  # C * patch_h * patch_w = 16*4*4=256
-        # Check edges are present
-        self.assertTrue(data_list[0].edge_index.shape[1] > 0)
-if __name__ == '__main__':
-    unittest.main()

tests/test_model_components.py DELETED Viewed

@@ -1,53 +0,0 @@
-import unittest
-import torch
-from model_components import EfficientNetV2FeatureExtractor, GATGNN, TransformerEncoder, MLPBlock
-from torch_geometric.data import Data
-class TestModelComponents(unittest.TestCase):
-    def test_efficientnetv2_extractor_output_shape(self):
-        model = EfficientNetV2FeatureExtractor()
-        model.eval()
-        x = torch.randn(2, 3, 224, 224)
-        with torch.no_grad():
-            features = model(x)
-        # Check output shape - depends on inception intermediate layer
-        # Example: shape could be (2, 768, 8, 8) depending on the chosen layer
-        self.assertEqual(features.size(0), 2)
-        self.assertTrue(features.size(1) > 0)
-        self.assertTrue(features.size(2) > 0)
-        self.assertTrue(features.size(3) > 0)
-    def test_gatgnn_forward(self):
-        # Graph with 4 nodes, each node feature dim=256
-        x = torch.randn(4, 256)
-        edge_index = torch.tensor([[0,1,1,2],[1,0,2,3]], dtype=torch.long)
-        batch = torch.tensor([0,0,0,0])
-        data = Data(x=x, edge_index=edge_index, batch=batch)
-        gnn = GATGNN(in_channels=256, hidden_channels=64, out_channels=32)
-        output = gnn(data)
-        # After pooling: should be (batch_size, out_channels) = (1,32)
-        self.assertEqual(output.shape, (1, 32))
-    def test_transformer_encoder(self):
-        # (B, N, D) = (2, 10, 64)
-        x = torch.randn(2, 10, 64)
-        encoder = TransformerEncoder(d_model=64, nhead=4, num_layers=2, dim_feedforward=64)
-        out = encoder(x)
-        # same shape as input
-        self.assertEqual(out.shape, (2, 10, 64))
-    def test_mlp_block(self):
-        mlp = MLPBlock(in_features=64, hidden_features=128, out_features=10)
-        x = torch.randn(2, 64)
-        out = mlp(x)
-        self.assertEqual(out.shape, (2,10))
-    def test_efficientnetv2_freeze(self):
-        # Ensure params are frozen
-        model = EfficientNetV2FeatureExtractor()
-        for param in model.parameters():
-            self.assertFalse(param.requires_grad)
-if __name__ == '__main__':
-    unittest.main()

tests/test_sag_vit_model.py DELETED Viewed

@@ -1,39 +0,0 @@
-import unittest
-import torch
-from sag_vit_model import SAGViTClassifier
-class TestSAGViTModel(unittest.TestCase):
-    def test_forward_pass(self):
-        model = SAGViTClassifier(
-            patch_size=(4,4),
-            num_classes=10,  # smaller num classes for test
-            d_model=64,
-            nhead=4,
-            num_layers=2,
-            dim_feedforward=64,
-            hidden_mlp_features=64,
-            in_channels=2560,  # from patch dimension example
-            gcn_hidden=128,
-            gcn_out=64
-        )
-        model.eval()
-        x = torch.randn(2, 3, 224, 224)
-        with torch.no_grad():
-            out = model(x)
-        # Check output shape: (B, num_classes) = (2,10)
-        self.assertEqual(out.shape, (2,10))
-    def test_empty_input(self):
-        model = SAGViTClassifier()
-        # Passing an empty tensor should fail gracefully
-        with self.assertRaises(Exception):
-            model(torch.empty(0,3,224,224))
-    def test_invalid_input_dimensions(self):
-        model = SAGViTClassifier()
-        # Incorrect dimension (e.g., missing channel)
-        with self.assertRaises(RuntimeError):
-            model(torch.randn(2, 224, 224))  # no channel dimension
-if __name__ == '__main__':
-    unittest.main()

tests/test_train.py DELETED Viewed

@@ -1,54 +0,0 @@
-import unittest
-from unittest.mock import MagicMock, patch
-import torch
-import torch.nn as nn
-from train import train_model
-from sag_vit_model import SAGViTClassifier
-class TestTrain(unittest.TestCase):
-    @patch("train.optim.Adam")
-    def test_train_model_loop(self, mock_adam):
-        # Mock the optimizer
-        mock_optimizer = MagicMock()
-        mock_adam.return_value = mock_optimizer
-        # Mock dataloaders with a small dummy dataset
-        # Just one batch with a couple of samples
-        train_dataloader = [ (torch.randn(2,3,224,224), torch.tensor([0,1])) ]
-        val_dataloader = [ (torch.randn(2,3,224,224), torch.tensor([0,1])) ]
-        model = SAGViTClassifier(num_classes=2)
-        criterion = nn.CrossEntropyLoss()
-        device = torch.device("cpu")
-        # Test a single epoch training
-        history = train_model(model, "TestModel", train_dataloader, val_dataloader,
-                              num_epochs=1, criterion=criterion, optimizer=mock_optimizer, device=device, patience=2, verbose=False)
-        # Check if history is properly recorded
-        self.assertIn("train_loss", history)
-        self.assertIn("val_loss", history)
-        self.assertGreaterEqual(len(history["train_loss"]), 1)
-        self.assertGreaterEqual(len(history["val_loss"]), 1)
-    def test_early_stopping(self):
-        # Mocking dataloaders where validation loss doesn't improve
-        model = SAGViTClassifier(num_classes=2)
-        criterion = nn.CrossEntropyLoss()
-        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
-        device = torch.device("cpu")
-        # create a scenario where val loss won't improve
-        # first epoch normal, second epoch slightly worse
-        train_dataloader = [ (torch.randn(2,3,224,224), torch.tensor([0,1])) ]
-        val_dataloader = [ (torch.randn(2,3,224,224), torch.tensor([0,1])) ]
-        history = train_model(model, "TestModelEarlyStop", train_dataloader, val_dataloader,
-                              num_epochs=5, criterion=criterion, optimizer=optimizer, device=device, patience=1, verbose=False)
-        # Should have triggered early stopping before all 5 epochs
-        self.assertLessEqual(len(history["train_loss"]), 5)
-if __name__ == '__main__':
-    unittest.main()

train.py DELETED Viewed

@@ -1,189 +0,0 @@
-import os
-import torch
-from torch import nn, optim
-from tqdm import tqdm
-import numpy as np
-from sklearn.metrics import (precision_score, recall_score, f1_score,
-                             roc_auc_score, cohen_kappa_score, matthews_corrcoef,
-                             confusion_matrix)
-from sag_vit_model import SAGViTClassifier
-from data_loader import get_dataloaders
-#####################################################################
-# This file provides the training loop and metric computation. It uses
-# the SAG-ViT model defined in sag_vit_model.py, and the data from data_loader.py.
-# The training loop is adapted to implement early stopping and track various metrics.
-#####################################################################
-def train_model(model, model_name, train_loader, val_loader, num_epochs, criterion, optimizer, device, patience=8, verbose=True):
-    """
-    Trains the SAG-ViT model and evaluates it on the validation set.
-    Implements early stopping based on validation loss.
-    Parameters:
-    - model (nn.Module): The SAG-ViT model.
-    - model_name (str): A name to identify the model (used for saving checkpoints).
-    - train_loader, val_loader: DataLoaders for training and validation.
-    - num_epochs (int): Maximum number of epochs.
-    - criterion (nn.Module): Loss function.
-    - optimizer (torch.optim.Optimizer): Optimization algorithm.
-    - device (torch.device): Device to run the computations on (CPU/GPU).
-    - patience (int): Early stopping patience.
-    Returns:
-    - history (dict): Dictionary containing training and validation metrics per epoch.
-    """
-    history = {
-        'train_loss': [], 'train_acc': [], 'train_prec': [], 'train_rec': [], 'train_f1': [],
-        'train_auc': [], 'train_mcc': [], 'train_cohen_kappa': [], 'train_confusion_matrix': [],
-        'val_loss': [], 'val_acc': [], 'val_prec': [], 'val_rec': [], 'val_f1': [],
-        'val_auc': [], 'val_mcc': [], 'val_cohen_kappa': [], 'val_confusion_matrix': []
-    }
-    best_val_loss = float('inf')
-    patience_counter = 0
-    best_model_state = None
-    for epoch in range(num_epochs):
-        print(f'Epoch {epoch+1}/{num_epochs}')
-        model.train()
-        train_loss_total, correct, total = 0, 0, 0
-        all_preds, all_labels, all_probs = [], [], []
-        # Training loop
-        for batch_idx, (X, y) in enumerate(tqdm(train_loader)):
-            inputs, labels = X.to(device), y.to(device)
-            optimizer.zero_grad()
-            outputs = model(inputs)
-            loss = criterion(outputs, labels)
-            loss.backward()
-            optimizer.step()
-            train_loss_total += loss.item()
-            probs = torch.softmax(outputs, dim=1)
-            _, preds = torch.max(outputs, 1)
-            correct += (preds == labels).sum().item()
-            total += labels.size(0)
-            all_preds.extend(preds.cpu().numpy())
-            all_labels.extend(labels.cpu().numpy())
-            all_probs.extend(probs.detach().cpu().numpy())
-        # Compute training metrics
-        train_acc = correct / total
-        train_prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
-        train_rec = recall_score(all_labels, all_preds, average='macro')
-        train_f1 = f1_score(all_labels, all_preds, average='macro')
-        train_cohen_kappa = cohen_kappa_score(all_labels, all_preds)
-        train_mcc = matthews_corrcoef(all_labels, all_preds)
-        train_confusion = confusion_matrix(all_labels, all_preds)
-        history['train_loss'].append(train_loss_total / len(train_loader))
-        history['train_acc'].append(train_acc)
-        history['train_prec'].append(train_prec)
-        history['train_rec'].append(train_rec)
-        history['train_f1'].append(train_f1)
-        history['train_cohen_kappa'].append(train_cohen_kappa)
-        history['train_mcc'].append(train_mcc)
-        history['train_confusion_matrix'].append(train_confusion)
-        # Validation
-        model.eval()
-        val_loss_total, correct, total = 0, 0, 0
-        all_preds, all_labels, all_probs = [], [], []
-        with torch.no_grad():
-            for batch_idx, (X, y) in enumerate(tqdm(val_loader)):
-                inputs, labels = X.to(device), y.to(device)
-                outputs = model(inputs)
-                loss = criterion(outputs, labels)
-                val_loss_total += loss.item()
-                probs = torch.softmax(outputs, dim=1)
-                _, preds = torch.max(outputs, 1)
-                correct += (preds == labels).sum().item()
-                total += labels.size(0)
-                all_preds.extend(preds.cpu().numpy())
-                all_labels.extend(labels.cpu().numpy())
-                all_probs.extend(probs.detach().cpu().numpy())
-        # Compute validation metrics
-        val_acc = correct / total
-        val_prec = precision_score(all_labels, all_preds, average='macro', zero_division=0)
-        val_rec = recall_score(all_labels, all_preds, average='macro')
-        val_f1 = f1_score(all_labels, all_preds, average='macro')
-        val_cohen_kappa = cohen_kappa_score(all_labels, all_preds)
-        val_mcc = matthews_corrcoef(all_labels, all_preds)
-        val_confusion = confusion_matrix(all_labels, all_preds)
-        history['val_loss'].append(val_loss_total / len(val_loader))
-        history['val_acc'].append(val_acc)
-        history['val_prec'].append(val_prec)
-        history['val_rec'].append(val_rec)
-        history['val_f1'].append(val_f1)
-        history['val_cohen_kappa'].append(val_cohen_kappa)
-        history['val_mcc'].append(val_mcc)
-        history['val_confusion_matrix'].append(val_confusion)
-        # Print epoch summary
-        if verbose:
-            print(f"Train Loss: {history['train_loss'][-1]:.4f}, Train Acc: {history['train_acc'][-1]:.4f}, "
-                f"Val Loss: {history['val_loss'][-1]:.4f}, Val Acc: {history['val_acc'][-1]:.4f}")
-        # Early stopping
-        current_val_loss = history['val_loss'][-1]
-        if current_val_loss < best_val_loss:
-            best_val_loss = current_val_loss
-            best_model_state = model.state_dict()
-            patience_counter = 0
-        else:
-            patience_counter += 1
-            print(f"Patience counter: {patience_counter}/{patience}")
-            if patience_counter >= patience:
-                print("Early stopping triggered.")
-                model.load_state_dict(best_model_state)
-                torch.save(model.state_dict(), f'{model_name}-best.pth')
-                return history
-    model.load_state_dict(best_model_state)
-    torch.save(model.state_dict(), f'{model_name}-{num_epochs}_epochs.pth')
-    return history
-if __name__ == "__main__":
-    # Example usage:
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    data_dir = "data/PlantVillage" # "path/to/data/dir"
-    num_classes = len(os.listdir(data_dir))
-    train_loader, val_loader = get_dataloaders(data_dir=data_dir, img_size=224, batch_size=32) # Minimum image size should be atleast (49, 49)
-    model = SAGViTClassifier(num_classes=num_classes).to(device)
-    criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), lr=0.0001)
-    num_epochs = 100
-    history = train_model(
-        model,
-        'SAGViT',
-        train_loader,
-        val_loader,
-        num_epochs,
-        criterion,
-        optimizer,
-        device
-    )
-    # You may save history to a CSV or analyze it further as needed.
-    # Example:
-    # import pandas as pd
-    # history_df = pd.DataFrame(history)
-    # history_df.to_csv("training_history.csv", index=False)