Spaces:

Haaribo
/

Q-SENN_Bildclassification_SC

Sleeping

App Files Files Community

Q-SENN_Bildclassification_SC / dataset_classes /cub200.py

Haaribo

new

3ced92d 4 months ago

raw

history blame contribute delete

4.15 kB

	# Dataset should lie under /root/
	# root is currently set to ~/tmp/Datasets/CUB200
	# If cropped iamges, like for PIP-Net, ProtoPool, etc. are used, then the crop_root should be set to a folder containing the
	# cropped images in the expected structure, obtained by following ProtoTree's instructions.
	# https://github.com/M-Nauta/ProtoTree/blob/main/README.md#preprocessing-cub
	import os
	from pathlib import Path

	import numpy as np
	import pandas as pd
	from torch.utils.data import Dataset
	from torchvision.datasets.folder import default_loader

	from dataset_classes.utils import txt_load


	class CUB200Class(Dataset):
	root = Path.home() / "tmp/Datasets/CUB200"
	crop_root = Path.home() / "tmp/Datasets/PPCUB200"
	base_folder = 'CUB_200_2011/images'
	def __init__(self, train, transform, crop=True):
	self.train = train
	self.transform = transform
	self.crop = crop
	self._load_metadata()
	self.loader = default_loader

	if crop:
	self.adapt_to_crop()

	def _load_metadata(self):
	images = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'images.txt'), sep=' ',
	names=['img_id', 'filepath'])
	image_class_labels = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'image_class_labels.txt'),
	sep=' ', names=['img_id', 'target'])
	train_test_split = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'train_test_split.txt'),
	sep=' ', names=['img_id', 'is_training_img'])
	data = images.merge(image_class_labels, on='img_id')
	self.data = data.merge(train_test_split, on='img_id')
	if self.train:
	self.data = self.data[self.data.is_training_img == 1]
	else:
	self.data = self.data[self.data.is_training_img == 0]

	def __len__(self):
	return len(self.data)

	def adapt_to_crop(self):
	# ds_name = [x for x in self.cropped_dict.keys() if x in self.root][0]
	self.root = self.crop_root
	folder_name = "train" if self.train else "test"
	folder_name = folder_name + "_cropped"
	self.base_folder = 'CUB_200_2011/' + folder_name

	def __getitem__(self, idx):
	sample = self.data.iloc[idx]
	path = os.path.join(self.root, self.base_folder, sample.filepath)
	target = sample.target - 1 # Targets start at 1 by default, so shift to 0
	img = self.loader(path)
	img = self.transform(img)
	return img, target

	@classmethod
	def get_image_attribute_labels(self, train=False):
	image_attribute_labels = pd.read_csv(
	os.path.join('/home/qixuan/tmp/Datasets/CUB200', 'CUB_200_2011', "attributes",
	'image_attribute_labels.txt'),
	sep=' ', names=['img_id', 'attribute', "is_present", "certainty", "time"], on_bad_lines="skip")
	train_test_split = pd.read_csv(os.path.join(self.root, 'CUB_200_2011', 'train_test_split.txt'),
	sep=' ', names=['img_id', 'is_training_img'])
	merged = image_attribute_labels.merge(train_test_split, on="img_id")
	filtered_data = merged[merged["is_training_img"] == train]
	return filtered_data


	@staticmethod
	def filter_attribute_labels(labels, min_certainty=3):
	is_invisible_present = labels[labels["certainty"] == 1]["is_present"].sum()
	if is_invisible_present != 0:
	raise ValueError("Invisible present")
	labels["img_id"] -= min(labels["img_id"])
	labels["img_id"] = fillholes_in_array(labels["img_id"])
	labels[labels["certainty"] == 1]["certainty"] = 4
	labels = labels[labels["certainty"] >= min_certainty]
	labels["attribute"] -= min(labels["attribute"])
	labels = labels[["img_id", "attribute", "is_present"]]
	labels["is_present"] = labels["is_present"].astype(bool)
	return labels



	def fillholes_in_array(array):
	unique_values = np.unique(array)
	mapping = {x: i for i, x in enumerate(unique_values)}
	array = array.map(mapping)
	return array