# # # # Imports # # # import torch # # # import numpy as np # # # import pandas as pd # # # import matplotlib.pyplot as plt # # # import seaborn as sns # # # # Imports # # # import torch # # # import numpy as np # # # import pandas as pd # # # import matplotlib.pyplot as plt # # # import seaborn as sns # # # from sklearn.metrics import confusion_matrix, roc_curve, auc # # # from typing import Callable, List, Tuple # # # import torch.nn as nn # # # from pathlib import Path # # # import torch.nn.functional as F # # # from yaml import FlowSequenceStartToken # # # from sklearn.metrics import confusion_matrix, roc_curve, auc # # # from typing import Callable, List, Tuple # # # import torch.nn as nn # # # from pathlib import Path # # # import torch.nn.functional as F # # # from yaml import FlowSequenceStartToken # # Import files # from image_dataset import ImageDataset # from net import Net, ResNetModel, EfficientNetModel # from train_test import train_model, test_model # from batch_sampler import BatchSampler # NOTE: File used in the very beginning of the project. Please ignore! # maincolor = '#4a8cffff' # secondcolor = '#e06666' # # Train data # labels_train_path = 'dc1/data/Y_train.npy' # data_train_path = 'dc1/data/X_train.npy' # # Test data # labels_test_path = 'dc1/data/Y_test.npy' # data_test_path = 'dc1/data/X_test.npy' # y_train = np.load(labels_train_path) # unique_labels = np.unique(y_train) # data_train = np.load(data_train_path) # # Data Verification to check if we all have everything good # data_shape = data_train.shape # data_type = data_train.dtype # labels_shape = y_train.shape # labels_type = y_train.dtype # print(f"Data Shape: {data_shape}, Data Type: {data_type}") # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}") # # Check the range and distribution of features # data_range = (np.min(data_train), np.max(data_train)) # # Label Encoding in accordance to the diseases # class_names_mapping = { # 0: 'Atelectasis', # 1: 'Effusion', # 2: 'Infiltration', # 3: 'No Finding', # 4: 'Nodule', # 5: 'Pneumonia' # } # print("Unique classes in the training set:") # for class_id in unique_labels: # print(f"Class ID {class_id}: {class_names_mapping[class_id]}") # # df for distribution analysis # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1)) # ################################################################### # ########### A D V A N C E D A N L Y S I S ########### # ################################################################## # # Y test data (labels) # y_test = np.load(labels_test_path) # # Initialize model (NET) # n_classes = 6 # # NOTE : change the nn here! # model = Net(n_classes=n_classes) # # model = ResNetModel(n_classes=n_classes) # # model = EfficientNetModel(n_classes=n_classes) # # Device for test_model function call # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model.to(device) # # Initialize the loss function # loss_function = nn.CrossEntropyLoss() # we can use another, this one i found in internet but I was getting errors... # # # Data Verification to check if we all have everything good # # data_shape = data_train.shape # # data_type = data_train.dtype # # labels_shape = y_train.shape # # labels_type = y_train.dtype # # print(f"Data Shape: {data_shape}, Data Type: {data_type}") # # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}") # # # Check the range and distribution of features # # data_range = (np.min(data_train), np.max(data_train)) # # # Label Encoding in accordance to the diseases # # class_names_mapping = { # # 0: 'Atelectasis', # # 1: 'Effusion', # # 2: 'Infiltration', # # 3: 'No Finding', # # 4: 'Nodule', # # 5: 'Pneumonia' # # } # # print("Unique classes in the training set:") # # for class_id in unique_labels: # # print(f"Class ID {class_id}: {class_names_mapping[class_id]}") # # # df for distribution analysis # # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1)) # # ################################################################### # # ########### A D V A N C E D A N L Y S I S ########### # # ################################################################## # # # Y test data (labels) # # y_test = np.load(labels_test_path) # # # Initialize model (NET) # # n_classes = 6 # # # NOTE : change the nn here! # # model = Net(n_classes=n_classes) # # # model = ResNetModel(n_classes=n_classes) # # # model = EfficientNetModel(n_classes=n_classes) # # # Device for test_model function call # # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # # model.to(device) # # # Initialize the loss function # # loss_function = nn.CrossEntropyLoss() # we can use another, this one i found in internet but I was getting errors... # # # Load test dataset w function # # test_dataset = ImageDataset(Path("dc1/data/X_test.npy"), Path("dc1/data/Y_test.npy")) # # # Initialize the BatchSampler # # batch_size = 32 # # test_loader = BatchSampler(batch_size=batch_size, dataset=test_dataset, balanced=False) # 'balanced' or not we can choose depending on what we want # # # Function call # # losses, predicted_labels, true_labels, probabilities = test_model(model, test_loader, loss_function, device) # ##################### R O C C U R V E ##################### # def plot_multiclass_roc_curve(y_true, y_scores, num_classes): # # Compute ROC curve and ROC area for each class # fpr = dict() # tpr = dict() # roc_auc = dict() # for i in range(num_classes): # fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_scores[:, i]) # roc_auc[i] = auc(fpr[i], tpr[i]) # # Plot all ROC curves # plt.figure() # for i in range(num_classes): # plt.plot(fpr[i], tpr[i], label=f'ROC curve of class {i} (area = {roc_auc[i]:.2f})') # plt.plot([0, 1], [0, 1], 'k--') # plt.xlim([0.0, 1.0]) # plt.ylim([0.0, 1.05]) # plt.xlabel('False Positive Rate') # plt.ylabel('True Positive Rate') # plt.title('Multiclass ROC Curve') # plt.legend(loc="lower right") # plt.show() # # Calculate the probabilities for each class # model_predictions = [] # model_probabilities = [] # model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy() # plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes) # model.eval() # Set the model to evaluation mode # with torch.no_grad(): # Turn off gradients for the following block # for data, target in test_loader: # data, target = data.to(device), target.to(device) # output = model(data) # # Get class predictions # _, preds = torch.max(output, 1) # model_predictions.extend(preds.cpu().numpy()) # # Get probabilities for the positive class # probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class # model_probabilities.extend(probs.cpu().numpy()) # # # Specificity = Number of true negatives (Number of true negatives + number of false positives) = # # # = Total number of individuals without the illness # # def sensitivity_specificity(conf_matrix): # # num_classes = conf_matrix.shape[0] # # sensitivity = np.zeros(num_classes) # # specificity = np.zeros(num_classes) # # for i in range(num_classes): # # TP = conf_matrix[i, i] # # FN = sum(conf_matrix[i, :]) - TP # # FP = sum(conf_matrix[:, i]) - TP # # TN = conf_matrix.sum() - (TP + FP + FN) # # sensitivity[i] = TP / (TP + FN) if (TP + FN) != 0 else 0 # # specificity[i] = TN / (TN + FP) if (TN + FP) != 0 else 0 # # return sensitivity, specificity # # from sklearn.preprocessing import label_binarize # # # Binarize the labels for multiclass (suggestion of LLM) # # y_test_binarized = label_binarize(y_test, classes=np.unique(y_test)) # # ##################### R O C C U R V E ##################### # # def plot_multiclass_roc_curve(y_true, y_scores, num_classes): # # # Compute ROC curve and ROC area for each class # # fpr = dict() # # tpr = dict() # # roc_auc = dict() # # for i in range(num_classes): # # fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_scores[:, i]) # # roc_auc[i] = auc(fpr[i], tpr[i]) # # # Plot all ROC curves # # plt.figure() # # for i in range(num_classes): # # plt.plot(fpr[i], tpr[i], label=f'ROC curve of class {i} (area = {roc_auc[i]:.2f})') # # plt.plot([0, 1], [0, 1], 'k--') # # plt.xlim([0.0, 1.0]) # # plt.ylim([0.0, 1.05]) # # plt.xlabel('False Positive Rate') # # plt.ylabel('True Positive Rate') # # plt.title('Multiclass ROC Curve') # # plt.legend(loc="lower right") # # plt.show() # # # Calculate the probabilities for each class # # model_predictions = [] # # model_probabilities = [] # # model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy() # # plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes) # # model.eval() # Set the model to evaluation mode # # with torch.no_grad(): # Turn off gradients for the following block # # for data, target in test_loader: # # data, target = data.to(device), target.to(device) # # output = model(data) # # # Get class predictions # # _, preds = torch.max(output, 1) # # model_predictions.extend(preds.cpu().numpy()) # # # Get probabilities for the positive class # # probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class # # model_probabilities.extend(probs.cpu().numpy()) # # # Calculate sensitivity and specificity # # sensitivity, specificity = sensitivity_specificity(y_test, model_predictions) # # print(f"Sensitivity: {sensitivity}") # # print(f"Specificity: {specificity}") # # ################################################################################################################################################################## # # # # Display the images, 1 for each class # # # def display_images(images, titles, num_images): # # # plt.figure(figsize=(15, 5)) # # # for i in range(num_images): # # # image = np.squeeze(images[i]) # squeeze to make it easy to ptint in 2d # # # plt.subplot(1, num_images, i + 1) # # # plt.imshow(image, cmap='gray') # # # plt.title(titles[i]) # # # plt.axis('off') # # # plt.show() # # >>>>>>> ab59272 (Net / ResNet / EfficientNet Experiments) # # # data_train = np.load(data_train_path) # # # # Data Verification to check if we all have everything good # # # data_shape = data_train.shape # # # data_type = data_train.dtype # # # labels_shape = y_train.shape # # # labels_type = y_train.dtype # # # print(f"Data Shape: {data_shape}, Data Type: {data_type}") # # # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}") # # # # Check the range and distribution of features # # # data_range = (np.min(data_train), np.max(data_train)) # # # # Label Encoding in accordance to the diseases # # # class_names_mapping = { # # # 0: 'Atelectasis', # # # 1: 'Effusion', # # # 2: 'Infiltration', # # # 3: 'No Finding', # # # 4: 'Nodule', # # # 5: 'Pneumonia' # # # } # # # print("Unique classes in the training set:") # # # for class_id in unique_labels: # # # print(f"Class ID {class_id}: {class_names_mapping[class_id]}") # # # # df for distribution analysis # # # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1)) # # # Calculate the probabilities for each class # # model_predictions = [] # # model_probabilities = [] # # model_probabilities = F.softmax(torch.tensor(model_predictions), dim=0).numpy() # # plot_multiclass_roc_curve(y_test_binarized, model_probabilities, n_classes) # # model.eval() # Set the model to evaluation mode # # with torch.no_grad(): # Turn off gradients for the following block # # for data, target in test_loader: # # data, target = data.to(device), target.to(device) # # output = model(data) # # # Get class predictions # # _, preds = torch.max(output, 1) # # model_predictions.extend(preds.cpu().numpy()) # # # Get probabilities for the positive class # # probs = F.softmax(output, dim=1)[:, 1] # Adjust the index based on your positive class # # model_probabilities.extend(probs.cpu().numpy()) # # # Calculate sensitivity and specificity # # sensitivity, specificity = sensitivity_specificity(y_test, model_predictions) # # print(f"Sensitivity: {sensitivity}") # # print(f"Specificity: {specificity}") # # ################################################################################################################################################################## # # # # Display the images, 1 for each class # # # def display_images(images, titles, num_images): # # # plt.figure(figsize=(15, 5)) # # # for i in range(num_images): # # # image = np.squeeze(images[i]) # squeeze to make it easy to ptint in 2d # # # plt.subplot(1, num_images, i + 1) # # # plt.imshow(image, cmap='gray') # # # plt.title(titles[i]) # # # plt.axis('off') # # # plt.show() # # >>>>>>> ab59272 (Net / ResNet / EfficientNet Experiments) # # # data_train = np.load(data_train_path) # # # # Data Verification to check if we all have everything good # # # data_shape = data_train.shape # # # data_type = data_train.dtype # # # labels_shape = y_train.shape # # # labels_type = y_train.dtype # # # print(f"Data Shape: {data_shape}, Data Type: {data_type}") # # # print(f"Labels Shape: {labels_shape}, Labels Type: {labels_type}") # # # # Check the range and distribution of features # # # data_range = (np.min(data_train), np.max(data_train)) # # # # Label Encoding in accordance to the diseases # # # class_names_mapping = { # # # 0: 'Atelectasis', # # # 1: 'Effusion', # # # 2: 'Infiltration', # # # 3: 'No Finding', # # # 4: 'Nodule', # # # 5: 'Pneumonia' # # # } # # # print("Unique classes in the training set:") # # # for class_id in unique_labels: # # # print(f"Class ID {class_id}: {class_names_mapping[class_id]}") # # # # df for distribution analysis # # # df_data_range = pd.DataFrame(data_train.reshape(data_train.shape[0], -1))