Spaces:
Runtime error
Runtime error
# 24 joints instead of 20!! | |
import gzip | |
import json | |
import os | |
import random | |
import math | |
import numpy as np | |
import torch | |
import torch.utils.data as data | |
from importlib_resources import open_binary | |
from scipy.io import loadmat | |
from tabulate import tabulate | |
import itertools | |
import json | |
from scipy import ndimage | |
from csv import DictReader | |
from pycocotools.mask import decode as decode_RLE | |
import sys | |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..', '..')) | |
# import stacked_hourglass.res | |
# from stacked_hourglass.datasets.common import DataInfo | |
# from configs.data_info import COMPLETE_DATA_INFO | |
# from configs.anipose_data_info import COMPLETE_DATA_INFO_24 | |
from src.configs.data_info import COMPLETE_DATA_INFO_24 | |
from src.stacked_hourglass.utils.imutils import load_image, draw_labelmap, draw_multiple_labelmaps | |
from src.stacked_hourglass.utils.misc import to_torch | |
from src.stacked_hourglass.utils.transforms import shufflelr, crop, color_normalize, fliplr, transform | |
import src.stacked_hourglass.datasets.utils_stanext as utils_stanext | |
from src.stacked_hourglass.utils.visualization import save_input_image_with_keypoints | |
class DogsVOC(data.Dataset): | |
DATA_INFO = COMPLETE_DATA_INFO_24 | |
# Suggested joints to use for average PCK calculations. | |
ACC_JOINTS = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 16] # don't know ... | |
def __init__(self, image_path=None, is_train=True, inp_res=256, out_res=64, sigma=1, | |
scale_factor=0.25, rot_factor=30, label_type='Gaussian', | |
do_augment='default', shorten_dataset_to=None, dataset_mode='keyp_only', V12=None): | |
# self.img_folder_mpii = image_path # root image folders | |
self.V12 = V12 | |
self.is_train = is_train # training set or test set | |
if do_augment == 'yes': | |
self.do_augment = True | |
elif do_augment == 'no': | |
self.do_augment = False | |
elif do_augment=='default': | |
if self.is_train: | |
self.do_augment = True | |
else: | |
self.do_augment = False | |
else: | |
raise ValueError | |
self.inp_res = inp_res | |
self.out_res = out_res | |
self.sigma = sigma | |
self.scale_factor = scale_factor | |
self.rot_factor = rot_factor | |
self.label_type = label_type | |
self.dataset_mode = dataset_mode | |
if self.dataset_mode=='complete' or self.dataset_mode=='keyp_and_seg' or self.dataset_mode=='keyp_and_seg_and_partseg': | |
self.calc_seg = True | |
else: | |
self.calc_seg = False | |
# create train/val split | |
# REMARK: I assume we should have a different train / test split here | |
self.img_folder = utils_stanext.get_img_dir(V12=self.V12) | |
self.train_dict, self.test_dict, self.val_dict = utils_stanext.load_stanext_json_as_dict(split_train_test=True, V12=self.V12) | |
self.train_name_list = list(self.train_dict.keys()) # 7004 | |
self.test_name_list = list(self.test_dict.keys()) # 5031 | |
# breed json_path | |
breed_json_path = '/ps/scratch/nrueegg/new_projects/Animals/data/dog_datasets/Stanford_Dogs_Dataset/StanfordExtra/StanExt_breed_dict_v2.json' | |
# only use images that show fully visible dogs in standing or walking poses | |
'''path_easy_images_list = '/ps/scratch/nrueegg/new_projects/Animals/data/dog_datasets/Stanford_Dogs_Dataset/StanfordExtra/AMT_StanExt_easy_images.txt' | |
easy_images_list = [line.rstrip('\n') for line in open(path_easy_images_list)] | |
self.train_name_list = sorted(list(set(easy_images_list) & set(self.train_name_list))) | |
self.test_name_list = sorted(list(set(easy_images_list) & set(self.test_name_list)))''' | |
self.train_name_list = sorted(self.train_name_list) | |
self.test_name_list = sorted(self.test_name_list) | |
random.seed(4) | |
random.shuffle(self.train_name_list) | |
random.shuffle(self.test_name_list) | |
if shorten_dataset_to is not None: | |
self.train_name_list = self.train_name_list[0 : min(len(self.train_name_list), shorten_dataset_to)] | |
self.test_name_list = self.test_name_list[0 : min(len(self.test_name_list), shorten_dataset_to)] | |
if shorten_dataset_to == 12: | |
# my_sample = self.test_name_list[2] # black haired dog | |
my_sample = self.test_name_list[2] | |
for ind in range(0, 12): | |
self.test_name_list[ind] = my_sample | |
# add results for eyes, whithers and throat as obtained through anipose | |
self.path_anipose_out_root = '/ps/scratch/nrueegg/new_projects/Animals/data/dog_datasets/Stanford_Dogs_Dataset/StanfordExtra/animalpose_hg8_v0_results_on_StanExt/' | |
############################################### | |
self.dogvoc_path_root = '/ps/scratch/nrueegg/new_projects/Animals/data/pascal_voc_parts/' | |
self.dogvoc_path_images = self.dogvoc_path_root + 'dog_images/' | |
self.dogvoc_path_masks = self.dogvoc_path_root + 'dog_masks/' | |
with open(self.dogvoc_path_masks + 'voc_dogs_bodypart_info.json', 'r') as file: | |
self.body_part_info = json.load(file) | |
with open(self.dogvoc_path_masks + 'voc_dogs_train.json', 'r') as file: | |
train_set_init = json.load(file) # 707 | |
with open(self.dogvoc_path_masks + 'voc_dogs_val.json', 'r') as file: | |
val_set_init = json.load(file) # 709 | |
self.train_set = train_set_init + val_set_init[:-36] | |
self.val_set = val_set_init[-36:] | |
print('len(dataset): ' + str(self.__len__())) | |
# print(self.test_name_list[0:10]) | |
def get_body_part_indices(self): | |
silh = [ | |
('background', [0]), | |
('foreground', [255, 21, 57, 30, 59, 34, 48, 50, 79, 49, 61, 60, 54, 53, 36, 35, 27, 26, 78])] | |
full_body = [ | |
('other', [255]), | |
('head', [21, 57, 30, 59, 34, 48, 50]), | |
('torso', [79, 49]), | |
('right front leg', [61, 60]), | |
('right back leg', [54, 53]), | |
('left front leg', [36, 35]), | |
('left back leg', [27, 26]), | |
('tail', [78])] | |
head = [ | |
('other', [21, 59, 34]), | |
('right ear', [57]), | |
('left ear', [30]), | |
('muzzle', [48]), | |
('nose', [50])] | |
torso = [ | |
('other', [79]), # wrong 34 | |
('neck', [49])] | |
all_parts = { | |
'silh': silh, | |
'full_body': full_body, | |
'head': head, | |
'torso': torso} | |
return all_parts | |
def __getitem__(self, index): | |
if self.is_train: | |
name = self.train_name_list[index] | |
data = self.train_dict[name] | |
# data = utils_stanext.get_dog(self.train_dict, name) | |
else: | |
name = self.test_name_list[index] | |
data = self.test_dict[name] | |
# data = utils_stanext.get_dog(self.test_dict, name) | |
# self.do_augment = False | |
# index = 5 ########################## | |
if self.is_train: | |
img_info = self.train_set[index] | |
else: | |
img_info = self.val_set[index] | |
sf = self.scale_factor | |
rf = self.rot_factor | |
img_path = os.path.join(self.dogvoc_path_images, img_info['img_name']) | |
# bbox_yxhw = img_info['bbox'] | |
# bbox_xywh = [bbox_yxhw[1], bbox_yxhw[0], bbox_yxhw[2], bbox_yxhw[3]] | |
bbox_xywh = img_info['bbox'] | |
bbox_c = [bbox_xywh[0]+0.5*bbox_xywh[2], bbox_xywh[1]+0.5*bbox_xywh[3]] | |
bbox_max = max(bbox_xywh[2], bbox_xywh[3]) | |
bbox_diag = math.sqrt(bbox_xywh[2]**2 + bbox_xywh[3]**2) | |
# bbox_s = bbox_max / 200. # the dog will fill the image -> bbox_max = 256 | |
# bbox_s = bbox_diag / 200. # diagonal of the boundingbox will be 200 | |
bbox_s = bbox_max / 200. * 256. / 200. # maximum side of the bbox will be 200 | |
c = torch.Tensor(bbox_c) | |
s = bbox_s | |
# For single-person pose estimation with a centered/scaled figure | |
img = load_image(img_path) # CxHxW | |
# img_test = img[0, img_info['bbox'][1]:img_info['bbox'][1]+img_info['bbox'][3], img_info['bbox'][0]:img_info['bbox'][0]+img_info['bbox'][2]] | |
# import cv2 | |
# cv2.imwrite('/ps/scratch/nrueegg/new_projects/Animals/dog_project/pytorch-stacked-hourglass/yy.png', np.asarray(img_test*255, np.uint8)) | |
# segmentation map (we reshape it to 3xHxW, such that we can do the | |
# same transformations as with the image) | |
if self.do_augment and (random.random() <= 0.5): | |
do_flip = True | |
else: | |
do_flip = False | |
if self.calc_seg: | |
mask = np.load(os.path.join(self.dogvoc_path_masks, img_info['img_name'].split('.')[0] + '_' + str(img_info['ind_bbox']) + '.npz.npy')) | |
seg_np = mask.copy() | |
seg_np[mask==0] = 0 | |
seg_np[mask>0] = 1 | |
seg = torch.Tensor(seg_np[None, :, :]) | |
seg = torch.cat(3*[seg]) | |
# NEW: body parts | |
all_parts = self.get_body_part_indices() | |
body_part_index_list = [] | |
body_part_name_list = [] | |
n_tbp = 3 | |
n_bp = 15 | |
# body_part_matrix_multiple_hot = np.zeros((n_bp, mask.shape[0], mask.shape[1])) | |
body_part_matrix_np = np.ones((n_tbp, mask.shape[0], mask.shape[1])) * (-1) | |
ind_bp = 0 | |
for ind_tbp, part in enumerate(['full_body', 'head', 'torso']): | |
# import pdb; pdb.set_trace() | |
if part == 'full_body': | |
inds_mirr = [0, 1, 2, 5, 6, 3, 4, 7] | |
elif part == 'head': | |
inds_mirr = [0, 2, 1, 3, 4] | |
else: | |
inds_mirr = [0, 1] | |
for ind_sbp, subpart in enumerate(all_parts[part]): | |
if do_flip: | |
ind_sbp_corr = inds_mirr[ind_sbp] # we use this if the image is mirrored later on | |
else: | |
ind_sbp_corr = ind_sbp | |
bp_name = subpart[0] | |
bp_indices = subpart[1] | |
body_part_index_list.append(bp_indices) | |
body_part_name_list.append(bp_name) | |
# create matrix slice | |
xx = [mask==ind for ind in bp_indices] | |
xx_mat = (np.stack(xx).sum(axis=0)) | |
# body_part_matrix_multiple_hot[ind_bp, :, :] = xx_mat | |
# add to matrix | |
body_part_matrix_np[ind_tbp, xx_mat>0] = ind_sbp_corr | |
ind_bp += 1 | |
body_part_weight_masks_np = np.zeros((n_tbp, mask.shape[0], mask.shape[1])) | |
body_part_weight_masks_np[0, mask>0] = 1 # full body | |
body_part_weight_masks_np[1, body_part_matrix_np[0, :, :]==1] = 1 # head | |
body_part_weight_masks_np[2, body_part_matrix_np[0, :, :]==2] = 1 # torso | |
body_part_matrix_np[body_part_weight_masks_np==0] = 16 | |
body_part_matrix = torch.Tensor(body_part_matrix_np + 2.0) # / 100 | |
# import pdb; pdb.set_trace() | |
bbox_c_int0 = [int(bbox_c[0]), int(bbox_c[1])] | |
bbox_c_int1 = [int(bbox_c[0])+10, int(bbox_c[1])+10] | |
'''bpm_c0 = body_part_matrix[:, bbox_c_int0[1], bbox_c_int0[0]].clone() | |
bpm_c1 = body_part_matrix[:, bbox_c_int1[1], bbox_c_int1[0]].clone() | |
zero_replacement = torch.Tensor([0, 0, 0.99]) | |
body_part_matrix[:, bbox_c_int0[1], bbox_c_int0[0]] = zero_replacement | |
body_part_matrix[:, bbox_c_int1[1], bbox_c_int1[0]] = 1''' | |
ii = 3 | |
bpm_c0 = body_part_matrix[2, bbox_c_int0[1]-ii:bbox_c_int0[1]+ii, bbox_c_int0[0]-ii:bbox_c_int0[0]+ii] | |
bpm_c1 = body_part_matrix[2, bbox_c_int1[1]-ii:bbox_c_int1[1]+ii, bbox_c_int1[0]-ii:bbox_c_int1[0]+ii] | |
body_part_matrix[2, bbox_c_int0[1]-ii:bbox_c_int0[1]+ii, bbox_c_int0[0]-ii:bbox_c_int0[0]+ii] = 0 | |
body_part_matrix[2, bbox_c_int1[1]-ii:bbox_c_int1[1]+ii, bbox_c_int1[0]-ii:bbox_c_int1[0]+ii] = 255 | |
body_part_matrix = (body_part_matrix).long() | |
# body_part_name_list | |
# ['other', 'head', 'torso', 'right front leg', 'right back leg', 'left front leg', 'left back leg', 'tail', 'other', 'right ear', 'left ear', 'muzzle', 'nose', 'other', 'neck'] | |
# swap indices: | |
# bp_mirroring_inds = [0, 1, 2, 5, 6, 3, 4, 7, 8, 10, 9, 11, 12, 13, 14] | |
r = 0 | |
# self.is_train = False | |
if self.do_augment: | |
s = s*torch.randn(1).mul_(sf).add_(1).clamp(1-sf, 1+sf)[0] | |
r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.6 else 0 | |
# Flip | |
if do_flip: | |
img = fliplr(img) | |
if self.calc_seg: | |
seg = fliplr(seg) | |
body_part_matrix = fliplr(body_part_matrix) | |
c[0] = img.size(2) - c[0] | |
# Color | |
img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) | |
img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) | |
img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1) | |
# Prepare image and groundtruth map | |
inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r) | |
inp = color_normalize(inp, self.DATA_INFO.rgb_mean, self.DATA_INFO.rgb_stddev) | |
# import pdb; pdb.set_trace() | |
if self.calc_seg: | |
seg = crop(seg, c, s, [self.inp_res, self.inp_res], rot=r) | |
# 'crop' will divide by 255 and perform zero padding ( | |
# -> weird function that tries to rescale! Because of that I add zeros and ones in the beginning | |
xx = body_part_matrix.clone() | |
# import pdb; pdb.set_trace() | |
body_part_matrix = crop(body_part_matrix, c, s, [self.inp_res, self.inp_res], rot=r, interp='nearest') | |
body_part_matrix = body_part_matrix*255 - 2 | |
body_part_matrix[body_part_matrix == -2] = -1 | |
body_part_matrix[body_part_matrix == 16] = -1 | |
body_part_matrix[body_part_matrix == 253] = -1 | |
'''print(np.unique(body_part_matrix.numpy())) | |
print(np.unique(body_part_matrix[0, :, :].numpy())) | |
print(np.unique(body_part_matrix[1, :, :].numpy())) | |
print(np.unique(body_part_matrix[2, :, :].numpy()))''' | |
# import cv2 | |
# cv2.imwrite('/ps/scratch/nrueegg/new_projects/Animals/dog_project/pytorch-stacked-hourglass/yy2.png', np.asarray((inp[0, :, :]+1)*100, np.uint8)) | |
# cv2.imwrite('/ps/scratch/nrueegg/new_projects/Animals/dog_project/pytorch-stacked-hourglass/yy3.png', (40*(1+body_part_matrix[0, :, :].numpy())).astype(np.uint8)) | |
# Generate ground truth | |
nparts = 24 | |
target_weight = torch.zeros(nparts, 1) | |
target = torch.zeros(nparts, self.out_res, self.out_res) | |
pts = torch.zeros((nparts, 3)) | |
tpts = torch.zeros((nparts, 3)) | |
# import pdb; pdb.set_trace() | |
# meta = {'index' : index, 'center' : c, 'scale' : s, 'do_flip' : do_flip, 'rot' : r, 'resolution' : [self.out_res, self.out_res], 'name' : name, | |
# 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight, 'breed_index': this_breed['index']} | |
# meta = {'index' : index, 'center' : c, 'scale' : s, 'do_flip' : do_flip, 'rot' : r, 'resolution' : self.out_res, | |
# 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight, 'breed_index': this_breed['index']} | |
# meta = {'index' : index, 'center' : c, 'scale' : s, | |
# 'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight, | |
# 'breed_index': this_breed['index'], 'sim_breed_index': sim_breed_index, | |
# 'ind_dataset': 0} # ind_dataset: 0 for stanext or stanexteasy or stanext 24 | |
meta = {'index' : index, 'center' : c, 'scale' : s, | |
'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight, | |
'ind_dataset': 3} | |
#import pdb; pdb.set_trace() | |
if self.dataset_mode=='keyp_and_seg_and_partseg': | |
# meta = {} | |
meta['silh'] = seg[0, :, :] | |
meta['name'] = name | |
meta['body_part_matrix'] = body_part_matrix.long() | |
# meta['body_part_weights'] = body_part_weight_masks | |
# import pdb; pdb.set_trace() | |
return inp, target, meta | |
else: | |
raise ValueError | |
def __len__(self): | |
if self.is_train: | |
return len(self.train_set) # len(self.train_list) | |
else: | |
return len(self.val_set) # len(self.valid_list) | |