# # # # Imports | |
# # # import torch | |
# # # import numpy as np | |
# # # import pandas as pd | |
# # # import matplotlib.pyplot as plt | |
# # # import seaborn as sns | |
# # # # Imports | |
# # # import torch | |
# # # import numpy as np | |
# # # import pandas as pd | |
# # # import matplotlib.pyplot as plt | |
# # # import seaborn as sns | |
# # # from sklearn.metrics import confusion_matrix, roc_curve, auc | |
# # # from typing import Callable, List, Tuple | |
# # # import torch.nn as nn | |
# # # from pathlib import Path | |
# # # import torch.nn.functional as F | |
# # # from yaml import FlowSequenceStartToken | |
# # # from sklearn.metrics import confusion_matrix, roc_curve, auc | |
# # # from typing import Callable, List, Tuple | |
# # # import torch.nn as nn | |
# # # from pathlib import Path | |
# # # import torch.nn.functional as F | |
# # # from yaml import FlowSequenceStartToken | |
# # Import files | |
# from image_dataset import ImageDataset | |
# from net import Net, ResNetModel, EfficientNetModel | |
# from train_test import train_model, test_model | |
# from batch_sampler import BatchSampler | |
# NOTE: File used in the very beginning of the project. Please ignore! | |
# maincolor = '#4a8cffff' | |
# secondcolor = '#e06666' | |
# # Train data | |
# labels_train_path = 'dc1/data/Y_train.npy' | |
# data_train_path = 'dc1/data/X_train.npy' | |
# # Test data | |
# labels_test_path = 'dc1/data/Y_test.npy' | |
# data_test_path = 'dc1/data/X_test.npy' | |
# y_train = np.load(labels_train_path) | |
# unique_labels = np.unique(y_train) | |
# data_train = np.load(data_train_path) | |
# # Data Verification to check if we all have everything good | |
# data_shape = data_train.shape | |
# data_type = data_train.dtype | |
# labels_shape = y_train.shape | |
# labels_type = y_train.dtype | |
# print(f"Data Shape: {data_shape}, Data Type: {data_type}") | |
# print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}") | |
# # Check the range and distribution of features | |
# data_range = (np.min(data_train), np.max(data_train)) | |
# # Label Encoding in accordance to the diseases | |
# class_names_mapping = { | |
# 0: 'Atelectasis', | |
# 1: 'Effusion', | |
# 2: 'Infiltration', | |
# 3: 'No Finding', | |
# 4: 'Nodule', | |
# 5: 'Pneumonia' | |
# } | |
# print("Unique classes in the training set:") | |
# for class_id in unique_labels: | |
# print(f"Class ID {class_id}: {class_names_mapping[class_id]}") | |
# # df for distribution analysis | |
# df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1)) | |
# ################################################################### | |
# ########### A D V A N C E D A N L Y S I S ########### | |
# ################################################################## | |
# # Y test data (labels) | |
# y_test = np.load(labels_test_path) | |
# # Initialize model (NET) | |
# n_classes = 6 | |
# # NOTE : change the nn here! | |
# model = Net(n_classes=n_classes) | |
# # model = ResNetModel(n_classes=n_classes) | |
# # model = EfficientNetModel(n_classes=n_classes) | |
# # Device for test_model function call | |
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# model.to(device) | |
# # Initialize the loss function | |
# loss_function = nn.CrossEntropyLoss() # we can use another, this one i found in internet but I was getting errors... | |
# # # Data Verification to check if we all have everything good | |
# # data_shape = data_train.shape | |
# # data_type = data_train.dtype | |
# # labels_shape = y_train.shape | |
# # labels_type = y_train.dtype | |
# # print(f"Data Shape: {data_shape}, Data Type: {data_type}") | |
# # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}") | |
# # # Check the range and distribution of features | |
# # data_range = (np.min(data_train), np.max(data_train)) | |
# # # Label Encoding in accordance to the diseases | |
# # class_names_mapping = { | |
# # 0: 'Atelectasis', | |
# # 1: 'Effusion', | |
# # 2: 'Infiltration', | |
# # 3: 'No Finding', | |
# # 4: 'Nodule', | |
# # 5: 'Pneumonia' | |
# # } | |
# # print("Unique classes in the training set:") | |
# # for class_id in unique_labels: | |
# # print(f"Class ID {class_id}: {class_names_mapping[class_id]}") | |
# # # df for distribution analysis | |
# # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1)) | |
# # ################################################################### | |
# # ########### A D V A N C E D A N L Y S I S ########### | |
# # ################################################################## | |
# # # Y test data (labels) | |
# # y_test = np.load(labels_test_path) | |
# # # Initialize model (NET) | |
# # n_classes = 6 | |
# # # NOTE : change the nn here! | |
# # model = Net(n_classes=n_classes) | |
# # # model = ResNetModel(n_classes=n_classes) | |
# # # model = EfficientNetModel(n_classes=n_classes) | |
# # # Device for test_model function call | |
# # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
# # model.to(device) | |
# # # Initialize the loss function | |
# # loss_function = nn.CrossEntropyLoss() # we can use another, this one i found in internet but I was getting errors... | |
# # # Load test dataset w function | |
# # test_dataset = ImageDataset(Path("dc1/data/X_test.npy"), Path("dc1/data/Y_test.npy")) | |
# # # Initialize the BatchSampler | |
# # batch_size = 32 | |
# # test_loader = BatchSampler(batch_size=batch_size, dataset=test_dataset, balanced=False) # 'balanced' or not we can choose depending on what we want | |
# # # Function call | |
# # losses, predicted_labels, true_labels, probabilities = test_model(model, test_loader, loss_function, device) | |
# ##################### R O C C U R V E ##################### | |
# def plot_multiclass_roc_curve(y_true, y_scores, num_classes): | |
# # Compute ROC curve and ROC area for each class | |
# fpr = dict() | |
# tpr = dict() | |
# roc_auc = dict() | |
# for i in range(num_classes): | |
# fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_scores[:, i]) | |
# roc_auc[i] = auc(fpr[i], tpr[i]) | |
# # Plot all ROC curves | |
# plt.figure() | |
# for i in range(num_classes): | |
# plt.plot(fpr[i], tpr[i], label=f'ROC curve of class {i} (area = {roc_auc[i]:.2f})') | |
# plt.plot([0, 1], [0, 1], 'k--') | |
# plt.xlim([0.0, 1.0]) | |
# plt.ylim([0.0, 1.05]) | |
# plt.xlabel('False Positive Rate') | |
# plt.ylabel('True Positive Rate') | |
# plt.title('Multiclass ROC Curve') | |
# plt.legend(loc="lower right") | |
# plt.show() | |
# # Calculate the probabilities for each class | |
# model_predictions = [] | |
# model_probabilities = [] | |
# model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy() | |
# plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes) | |
# model.eval() # Set the model to evaluation mode | |
# with torch.no_grad(): # Turn off gradients for the following block | |
# for data, target in test_loader: | |
# data, target = data.to(device), target.to(device) | |
# output = model(data) | |
# # Get class predictions | |
# _, preds = torch.max(output, 1) | |
# model_predictions.extend(preds.cpu().numpy()) | |
# # Get probabilities for the positive class | |
# probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class | |
# model_probabilities.extend(probs.cpu().numpy()) | |
# # # Specificity = Number of true negatives (Number of true negatives + number of false positives) = | |
# # # = Total number of individuals without the illness | |
# # def sensitivity_specificity(conf_matrix): | |
# # num_classes = conf_matrix.shape[0] | |
# # sensitivity = np.zeros(num_classes) | |
# # specificity = np.zeros(num_classes) | |
# # for i in range(num_classes): | |
# # TP = conf_matrix[i, i] | |
# # FN = sum(conf_matrix[i, :]) - TP | |
# # FP = sum(conf_matrix[:, i]) - TP | |
# # TN = conf_matrix.sum() - (TP + FP + FN) | |
# # sensitivity[i] = TP / (TP + FN) if (TP + FN) != 0 else 0 | |
# # specificity[i] = TN / (TN + FP) if (TN + FP) != 0 else 0 | |
# # return sensitivity, specificity | |
# # from sklearn.preprocessing import label_binarize | |
# # # Binarize the labels for multiclass (suggestion of LLM) | |
# # y_test_binarized = label_binarize(y_test, classes=np.unique(y_test)) | |
# # ##################### R O C C U R V E ##################### | |
# # def plot_multiclass_roc_curve(y_true, y_scores, num_classes): | |
# # # Compute ROC curve and ROC area for each class | |
# # fpr = dict() | |
# # tpr = dict() | |
# # roc_auc = dict() | |
# # for i in range(num_classes): | |
# # fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_scores[:, i]) | |
# # roc_auc[i] = auc(fpr[i], tpr[i]) | |
# # # Plot all ROC curves | |
# # plt.figure() | |
# # for i in range(num_classes): | |
# # plt.plot(fpr[i], tpr[i], label=f'ROC curve of class {i} (area = {roc_auc[i]:.2f})') | |
# # plt.plot([0, 1], [0, 1], 'k--') | |
# # plt.xlim([0.0, 1.0]) | |
# # plt.ylim([0.0, 1.05]) | |
# # plt.xlabel('False Positive Rate') | |
# # plt.ylabel('True Positive Rate') | |
# # plt.title('Multiclass ROC Curve') | |
# # plt.legend(loc="lower right") | |
# # plt.show() | |
# # # Calculate the probabilities for each class | |
# # model_predictions = [] | |
# # model_probabilities = [] | |
# # model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy() | |
# # plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes) | |
# # model.eval() # Set the model to evaluation mode | |
# # with torch.no_grad(): # Turn off gradients for the following block | |
# # for data, target in test_loader: | |
# # data, target = data.to(device), target.to(device) | |
# # output = model(data) | |
# # # Get class predictions | |
# # _, preds = torch.max(output, 1) | |
# # model_predictions.extend(preds.cpu().numpy()) | |
# # # Get probabilities for the positive class | |
# # probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class | |
# # model_probabilities.extend(probs.cpu().numpy()) | |
# # # Calculate sensitivity and specificity | |
# # sensitivity, specificity = sensitivity_specificity(y_test, model_predictions) | |
# # print(f"Sensitivity: {sensitivity}") | |
# # print(f"Specificity: {specificity}") | |
# # ################################################################################################################################################################## | |
# # # # Display the images, 1 for each class | |
# # # def display_images(images, titles, num_images): | |
# # # plt.figure(figsize=(15, 5)) | |
# # # for i in range(num_images): | |
# # # image = np.squeeze(images[i]) # squeeze to make it easy to ptint in 2d | |
# # # plt.subplot(1, num_images, i + 1) | |
# # # plt.imshow(image, cmap='gray') | |
# # # plt.title(titles[i]) | |
# # # plt.axis('off') | |
# # # plt.show() | |
# # >>>>>>> ab59272 (Net / ResNet / EfficientNet Experiments) | |
# # # data_train = np.load(data_train_path) | |
# # # # Data Verification to check if we all have everything good | |
# # # data_shape = data_train.shape | |
# # # data_type = data_train.dtype | |
# # # labels_shape = y_train.shape | |
# # # labels_type = y_train.dtype | |
# # # print(f"Data Shape: {data_shape}, Data Type: {data_type}") | |
# # # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}") | |
# # # # Check the range and distribution of features | |
# # # data_range = (np.min(data_train), np.max(data_train)) | |
# # # # Label Encoding in accordance to the diseases | |
# # # class_names_mapping = { | |
# # # 0: 'Atelectasis', | |
# # # 1: 'Effusion', | |
# # # 2: 'Infiltration', | |
# # # 3: 'No Finding', | |
# # # 4: 'Nodule', | |
# # # 5: 'Pneumonia' | |
# # # } | |
# # # print("Unique classes in the training set:") | |
# # # for class_id in unique_labels: | |
# # # print(f"Class ID {class_id}: {class_names_mapping[class_id]}") | |
# # # # df for distribution analysis | |
# # # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1)) | |
# # # Calculate the probabilities for each class | |
# # model_predictions = [] | |
# # model_probabilities = [] | |
# # model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy() | |
# # plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes) | |
# # model.eval() # Set the model to evaluation mode | |
# # with torch.no_grad(): # Turn off gradients for the following block | |
# # for data, target in test_loader: | |
# # data, target = data.to(device), target.to(device) | |
# # output = model(data) | |
# # # Get class predictions | |
# # _, preds = torch.max(output, 1) | |
# # model_predictions.extend(preds.cpu().numpy()) | |
# # # Get probabilities for the positive class | |
# # probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class | |
# # model_probabilities.extend(probs.cpu().numpy()) | |
# # # Calculate sensitivity and specificity | |
# # sensitivity, specificity = sensitivity_specificity(y_test, model_predictions) | |
# # print(f"Sensitivity: {sensitivity}") | |
# # print(f"Specificity: {specificity}") | |
# # ################################################################################################################################################################## | |
# # # # Display the images, 1 for each class | |
# # # def display_images(images, titles, num_images): | |
# # # plt.figure(figsize=(15, 5)) | |
# # # for i in range(num_images): | |
# # # image = np.squeeze(images[i]) # squeeze to make it easy to ptint in 2d | |
# # # plt.subplot(1, num_images, i + 1) | |
# # # plt.imshow(image, cmap='gray') | |
# # # plt.title(titles[i]) | |
# # # plt.axis('off') | |
# # # plt.show() | |
# # >>>>>>> ab59272 (Net / ResNet / EfficientNet Experiments) | |
# # # data_train = np.load(data_train_path) | |
# # # # Data Verification to check if we all have everything good | |
# # # data_shape = data_train.shape | |
# # # data_type = data_train.dtype | |
# # # labels_shape = y_train.shape | |
# # # labels_type = y_train.dtype | |
# # # print(f"Data Shape: {data_shape}, Data Type: {data_type}") | |
# # # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}") | |
# # # # Check the range and distribution of features | |
# # # data_range = (np.min(data_train), np.max(data_train)) | |
# # # # Label Encoding in accordance to the diseases | |
# # # class_names_mapping = { | |
# # # 0: 'Atelectasis', | |
# # # 1: 'Effusion', | |
# # # 2: 'Infiltration', | |
# # # 3: 'No Finding', | |
# # # 4: 'Nodule', | |
# # # 5: 'Pneumonia' | |
# # # } | |
# # # print("Unique classes in the training set:") | |
# # # for class_id in unique_labels: | |
# # # print(f"Class ID {class_id}: {class_names_mapping[class_id]}") | |
# # # # df for distribution analysis | |
# # # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1)) | |