thoracic-disease-classifier / dc1 /image_dataset.py

initial commit

60b0ddc verified 10 months ago

3.58 kB

	import numpy as np
	import torch
	import torchvision.transforms as T
	import torchvision.transforms.functional as TF
	import requests
	import io
	from os import path
	from typing import Tuple, List
	from pathlib import Path
	import os


	class ImageDataset:
	"""
	Creates a DataSet from numpy arrays while keeping the data
	in the more efficient numpy arrays for as long as possible and only
	converting to torchtensors when needed (torch tensors are the objects used
	to pass the data through the neural network and apply weights).
	"""

	def __init__(self, x: Path, y: Path) -> None:
	# Target labels
	self.targets = ImageDataset.load_numpy_arr_from_npy(y)
	# Images
	self.imgs = ImageDataset.load_numpy_arr_from_npy(x)

	def __len__(self) -> int:
	return len(self.targets)

	def __getitem__(self, idx: int) -> Tuple[torch.Tensor, np.ndarray]:
	# Template code
	image = torch.from_numpy(self.imgs[idx] / 255).float()
	label = self.targets[idx]

	# Preprocessing
	# Metrics for Normalization of the images
	mean = image.mean()
	std = image.std()

	# Compose: Composes several transforms together (torch documentation)
	compose = T.Compose([
	T.Normalize(mean, std), # Normalization
	T.Resize(156), # Resizing to 156x156
	T.CenterCrop(128), # Cropping to focus on the center 128x128 region
	T.Lambda(lambda x: TF.rotate(x, angle=90)), # Rotating by 90 degrees
	T.RandomHorizontalFlip(p=0.5), # Random horizontal flip with a 50% probability
	T.RandomVerticalFlip(p=0.5), # Random vertical flip with a 50% probability
	T.Lambda(lambda x: x + torch.randn_like(x) * 0.1) # Adding random noise
	])

	# Apply the transformation done by composee
	image = compose(image)

	return image, label

	def get_labels(self) -> List[np.ndarray]:
	return self.targets.tolist()

	@staticmethod
	def load_numpy_arr_from_npy(path: Path) -> np.ndarray:
	"""
	Loads a numpy array from local storage.

	Input:
	path: local path of file

	Outputs:
	dataset: numpy array with input features or labels
	"""

	return np.load(path)


	def load_numpy_arr_from_url(url: str) -> np.ndarray:
	"""
	Loads a numpy array from surfdrive.

	Input:
	url: Download link of dataset

	Outputs:
	dataset: numpy array with input features or labels
	"""

	response = requests.get(url)
	response.raise_for_status()

	return np.load(io.BytesIO(response.content))


	if __name__ == "__main__":
	cwd = os.getcwd()
	if path.exists(path.join(cwd + "data/")):
	print("Data directory exists, files may be overwritten!")
	else:
	os.mkdir(path.join(cwd, "data/"))
	### Load labels
	train_y = load_numpy_arr_from_url(
	url="https://surfdrive.surf.nl/files/index.php/s/i6MvQ8nqoiQ9Tci/download"
	)
	np.save("data/Y_train.npy", train_y)
	test_y = load_numpy_arr_from_url(
	url="https://surfdrive.surf.nl/files/index.php/s/wLXiOjVAW4AWlXY/download"
	)
	np.save("data/Y_test.npy", test_y)
	### Load data
	train_x = load_numpy_arr_from_url(
	url="https://surfdrive.surf.nl/files/index.php/s/4rwSf9SYO1ydGtK/download"
	)
	np.save("data/X_train.npy", train_x)
	test_x = load_numpy_arr_from_url(
	url="https://surfdrive.surf.nl/files/index.php/s/dvY2LpvFo6dHef0/download"
	)
	np.save("data/X_test.npy", test_x)