ummtushar's picture
initial commit
60b0ddc verified
# # # # Imports
# # # import torch
# # # import numpy as np
# # # import pandas as pd
# # # import matplotlib.pyplot as plt
# # # import seaborn as sns
# # # # Imports
# # # import torch
# # # import numpy as np
# # # import pandas as pd
# # # import matplotlib.pyplot as plt
# # # import seaborn as sns
# # # from sklearn.metrics import confusion_matrix, roc_curve, auc
# # # from typing import Callable, List, Tuple
# # # import torch.nn as nn
# # # from pathlib import Path
# # # import torch.nn.functional as F
# # # from yaml import FlowSequenceStartToken
# # # from sklearn.metrics import confusion_matrix, roc_curve, auc
# # # from typing import Callable, List, Tuple
# # # import torch.nn as nn
# # # from pathlib import Path
# # # import torch.nn.functional as F
# # # from yaml import FlowSequenceStartToken
# # Import files
# from image_dataset import ImageDataset
# from net import Net, ResNetModel, EfficientNetModel
# from train_test import train_model, test_model
# from batch_sampler import BatchSampler
# NOTE: File used in the very beginning of the project. Please ignore!
# maincolor = '#4a8cffff'
# secondcolor = '#e06666'
# # Train data
# labels_train_path = 'dc1/data/Y_train.npy'
# data_train_path = 'dc1/data/X_train.npy'
# # Test data
# labels_test_path = 'dc1/data/Y_test.npy'
# data_test_path = 'dc1/data/X_test.npy'
# y_train = np.load(labels_train_path)
# unique_labels = np.unique(y_train)
# data_train = np.load(data_train_path)
# # Data Verification to check if we all have everything good
# data_shape = data_train.shape
# data_type = data_train.dtype
# labels_shape = y_train.shape
# labels_type = y_train.dtype
# print(f"Data Shape: {data_shape}, Data Type: {data_type}")
# print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}")
# # Check the range and distribution of features
# data_range = (np.min(data_train), np.max(data_train))
# # Label Encoding in accordance to the diseases
# class_names_mapping = {
# 0: 'Atelectasis',
# 1: 'Effusion',
# 2: 'Infiltration',
# 3: 'No Finding',
# 4: 'Nodule',
# 5: 'Pneumonia'
# }
# print("Unique classes in the training set:")
# for class_id in unique_labels:
# print(f"Class ID {class_id}: {class_names_mapping[class_id]}")
# # df for distribution analysis
# df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1))
# ###################################################################
# ########### A D V A N C E D A N L Y S I S ###########
# ##################################################################
# # Y test data (labels)
# y_test = np.load(labels_test_path)
# # Initialize model (NET)
# n_classes = 6
# # NOTE : change the nn here!
# model = Net(n_classes=n_classes)
# # model = ResNetModel(n_classes=n_classes)
# # model = EfficientNetModel(n_classes=n_classes)
# # Device for test_model function call
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
# # Initialize the loss function
# loss_function = nn.CrossEntropyLoss() # we can use another, this one i found in internet but I was getting errors...
# # # Data Verification to check if we all have everything good
# # data_shape = data_train.shape
# # data_type = data_train.dtype
# # labels_shape = y_train.shape
# # labels_type = y_train.dtype
# # print(f"Data Shape: {data_shape}, Data Type: {data_type}")
# # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}")
# # # Check the range and distribution of features
# # data_range = (np.min(data_train), np.max(data_train))
# # # Label Encoding in accordance to the diseases
# # class_names_mapping = {
# # 0: 'Atelectasis',
# # 1: 'Effusion',
# # 2: 'Infiltration',
# # 3: 'No Finding',
# # 4: 'Nodule',
# # 5: 'Pneumonia'
# # }
# # print("Unique classes in the training set:")
# # for class_id in unique_labels:
# # print(f"Class ID {class_id}: {class_names_mapping[class_id]}")
# # # df for distribution analysis
# # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1))
# # ###################################################################
# # ########### A D V A N C E D A N L Y S I S ###########
# # ##################################################################
# # # Y test data (labels)
# # y_test = np.load(labels_test_path)
# # # Initialize model (NET)
# # n_classes = 6
# # # NOTE : change the nn here!
# # model = Net(n_classes=n_classes)
# # # model = ResNetModel(n_classes=n_classes)
# # # model = EfficientNetModel(n_classes=n_classes)
# # # Device for test_model function call
# # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# # model.to(device)
# # # Initialize the loss function
# # loss_function = nn.CrossEntropyLoss() # we can use another, this one i found in internet but I was getting errors...
# # # Load test dataset w function
# # test_dataset = ImageDataset(Path("dc1/data/X_test.npy"), Path("dc1/data/Y_test.npy"))
# # # Initialize the BatchSampler
# # batch_size = 32
# # test_loader = BatchSampler(batch_size=batch_size, dataset=test_dataset, balanced=False) # 'balanced' or not we can choose depending on what we want
# # # Function call
# # losses, predicted_labels, true_labels, probabilities = test_model(model, test_loader, loss_function, device)
# ##################### R O C C U R V E #####################
# def plot_multiclass_roc_curve(y_true, y_scores, num_classes):
# # Compute ROC curve and ROC area for each class
# fpr = dict()
# tpr = dict()
# roc_auc = dict()
# for i in range(num_classes):
# fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_scores[:, i])
# roc_auc[i] = auc(fpr[i], tpr[i])
# # Plot all ROC curves
# plt.figure()
# for i in range(num_classes):
# plt.plot(fpr[i], tpr[i], label=f'ROC curve of class {i} (area = {roc_auc[i]:.2f})')
# plt.plot([0, 1], [0, 1], 'k--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Multiclass ROC Curve')
# plt.legend(loc="lower right")
# plt.show()
# # Calculate the probabilities for each class
# model_predictions = []
# model_probabilities = []
# model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy()
# plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes)
# model.eval() # Set the model to evaluation mode
# with torch.no_grad(): # Turn off gradients for the following block
# for data, target in test_loader:
# data, target = data.to(device), target.to(device)
# output = model(data)
# # Get class predictions
# _, preds = torch.max(output, 1)
# model_predictions.extend(preds.cpu().numpy())
# # Get probabilities for the positive class
# probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class
# model_probabilities.extend(probs.cpu().numpy())
# # # Specificity = Number of true negatives (Number of true negatives + number of false positives) =
# # # = Total number of individuals without the illness
# # def sensitivity_specificity(conf_matrix):
# # num_classes = conf_matrix.shape[0]
# # sensitivity = np.zeros(num_classes)
# # specificity = np.zeros(num_classes)
# # for i in range(num_classes):
# # TP = conf_matrix[i, i]
# # FN = sum(conf_matrix[i, :]) - TP
# # FP = sum(conf_matrix[:, i]) - TP
# # TN = conf_matrix.sum() - (TP + FP + FN)
# # sensitivity[i] = TP / (TP + FN) if (TP + FN) != 0 else 0
# # specificity[i] = TN / (TN + FP) if (TN + FP) != 0 else 0
# # return sensitivity, specificity
# # from sklearn.preprocessing import label_binarize
# # # Binarize the labels for multiclass (suggestion of LLM)
# # y_test_binarized = label_binarize(y_test, classes=np.unique(y_test))
# # ##################### R O C C U R V E #####################
# # def plot_multiclass_roc_curve(y_true, y_scores, num_classes):
# # # Compute ROC curve and ROC area for each class
# # fpr = dict()
# # tpr = dict()
# # roc_auc = dict()
# # for i in range(num_classes):
# # fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_scores[:, i])
# # roc_auc[i] = auc(fpr[i], tpr[i])
# # # Plot all ROC curves
# # plt.figure()
# # for i in range(num_classes):
# # plt.plot(fpr[i], tpr[i], label=f'ROC curve of class {i} (area = {roc_auc[i]:.2f})')
# # plt.plot([0, 1], [0, 1], 'k--')
# # plt.xlim([0.0, 1.0])
# # plt.ylim([0.0, 1.05])
# # plt.xlabel('False Positive Rate')
# # plt.ylabel('True Positive Rate')
# # plt.title('Multiclass ROC Curve')
# # plt.legend(loc="lower right")
# # plt.show()
# # # Calculate the probabilities for each class
# # model_predictions = []
# # model_probabilities = []
# # model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy()
# # plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes)
# # model.eval() # Set the model to evaluation mode
# # with torch.no_grad(): # Turn off gradients for the following block
# # for data, target in test_loader:
# # data, target = data.to(device), target.to(device)
# # output = model(data)
# # # Get class predictions
# # _, preds = torch.max(output, 1)
# # model_predictions.extend(preds.cpu().numpy())
# # # Get probabilities for the positive class
# # probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class
# # model_probabilities.extend(probs.cpu().numpy())
# # # Calculate sensitivity and specificity
# # sensitivity, specificity = sensitivity_specificity(y_test, model_predictions)
# # print(f"Sensitivity: {sensitivity}")
# # print(f"Specificity: {specificity}")
# # ##################################################################################################################################################################
# # # # Display the images, 1 for each class
# # # def display_images(images, titles, num_images):
# # # plt.figure(figsize=(15, 5))
# # # for i in range(num_images):
# # # image = np.squeeze(images[i]) # squeeze to make it easy to ptint in 2d
# # # plt.subplot(1, num_images, i + 1)
# # # plt.imshow(image, cmap='gray')
# # # plt.title(titles[i])
# # # plt.axis('off')
# # # plt.show()
# # >>>>>>> ab59272 (Net / ResNet / EfficientNet Experiments)
# # # data_train = np.load(data_train_path)
# # # # Data Verification to check if we all have everything good
# # # data_shape = data_train.shape
# # # data_type = data_train.dtype
# # # labels_shape = y_train.shape
# # # labels_type = y_train.dtype
# # # print(f"Data Shape: {data_shape}, Data Type: {data_type}")
# # # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}")
# # # # Check the range and distribution of features
# # # data_range = (np.min(data_train), np.max(data_train))
# # # # Label Encoding in accordance to the diseases
# # # class_names_mapping = {
# # # 0: 'Atelectasis',
# # # 1: 'Effusion',
# # # 2: 'Infiltration',
# # # 3: 'No Finding',
# # # 4: 'Nodule',
# # # 5: 'Pneumonia'
# # # }
# # # print("Unique classes in the training set:")
# # # for class_id in unique_labels:
# # # print(f"Class ID {class_id}: {class_names_mapping[class_id]}")
# # # # df for distribution analysis
# # # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1))
# # # Calculate the probabilities for each class
# # model_predictions = []
# # model_probabilities = []
# # model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy()
# # plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes)
# # model.eval() # Set the model to evaluation mode
# # with torch.no_grad(): # Turn off gradients for the following block
# # for data, target in test_loader:
# # data, target = data.to(device), target.to(device)
# # output = model(data)
# # # Get class predictions
# # _, preds = torch.max(output, 1)
# # model_predictions.extend(preds.cpu().numpy())
# # # Get probabilities for the positive class
# # probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class
# # model_probabilities.extend(probs.cpu().numpy())
# # # Calculate sensitivity and specificity
# # sensitivity, specificity = sensitivity_specificity(y_test, model_predictions)
# # print(f"Sensitivity: {sensitivity}")
# # print(f"Specificity: {specificity}")
# # ##################################################################################################################################################################
# # # # Display the images, 1 for each class
# # # def display_images(images, titles, num_images):
# # # plt.figure(figsize=(15, 5))
# # # for i in range(num_images):
# # # image = np.squeeze(images[i]) # squeeze to make it easy to ptint in 2d
# # # plt.subplot(1, num_images, i + 1)
# # # plt.imshow(image, cmap='gray')
# # # plt.title(titles[i])
# # # plt.axis('off')
# # # plt.show()
# # >>>>>>> ab59272 (Net / ResNet / EfficientNet Experiments)
# # # data_train = np.load(data_train_path)
# # # # Data Verification to check if we all have everything good
# # # data_shape = data_train.shape
# # # data_type = data_train.dtype
# # # labels_shape = y_train.shape
# # # labels_type = y_train.dtype
# # # print(f"Data Shape: {data_shape}, Data Type: {data_type}")
# # # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}")
# # # # Check the range and distribution of features
# # # data_range = (np.min(data_train), np.max(data_train))
# # # # Label Encoding in accordance to the diseases
# # # class_names_mapping = {
# # # 0: 'Atelectasis',
# # # 1: 'Effusion',
# # # 2: 'Infiltration',
# # # 3: 'No Finding',
# # # 4: 'Nodule',
# # # 5: 'Pneumonia'
# # # }
# # # print("Unique classes in the training set:")
# # # for class_id in unique_labels:
# # # print(f"Class ID {class_id}: {class_names_mapping[class_id]}")
# # # # df for distribution analysis
# # # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1))