import os |
import random |
import xml.etree.ElementTree as ET |
from typing import List, Tuple |
import numpy as np |
import torch.utils.data |
from PIL import Image, ImageOps |
from torch import Tensor |
from bbox import BBox |
from dataset.base import Base |
from voc_eval import voc_eval |
class VOC2007(Base): |
class Annotation(object): |
class Object(object): |
def __init__(self, name: str, difficult: bool, bbox: BBox): |
super().__init__() |
self.name = name |
self.difficult = difficult |
self.bbox = bbox |
def __repr__(self) -> str: |
return 'Object[name={:s}, difficult={!s}, bbox={!s}]'.format( |
self.name, self.difficult, self.bbox) |
def __init__(self, filename: str, objects: List[Object]): |
super().__init__() |
self.filename = filename |
self.objects = objects |
'background': 0, |
'aeroplane': 1, 'bicycle': 2, 'bird': 3, 'boat': 4, 'bottle': 5, |
'bus': 6, 'car': 7, 'cat': 8, 'chair': 9, 'cow': 10, |
'diningtable': 11, 'dog': 12, 'horse': 13, 'motorbike': 14, 'person': 15, |
'pottedplant': 16, 'sheep': 17, 'sofa': 18, 'train': 19, 'tvmonitor': 20 |
} |
LABEL_TO_CATEGORY_DICT = {v: k for k, v in CATEGORY_TO_LABEL_DICT.items()} |
def __init__(self, path_to_data_dir: str, mode: Base.Mode, image_min_side: float, image_max_side: float): |
super().__init__(path_to_data_dir, mode, image_min_side, image_max_side) |
path_to_voc2007_dir = os.path.join(self._path_to_data_dir, 'VOCdevkit', 'VOC2007') |
path_to_imagesets_main_dir = os.path.join(path_to_voc2007_dir, 'ImageSets', 'Main') |
path_to_annotations_dir = os.path.join(path_to_voc2007_dir, 'Annotations') |
self._path_to_jpeg_images_dir = os.path.join(path_to_voc2007_dir, 'JPEGImages') |
if self._mode == VOC2007.Mode.TRAIN: |
path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'trainval.txt') |
elif self._mode == VOC2007.Mode.EVAL: |
path_to_image_ids_txt = os.path.join(path_to_imagesets_main_dir, 'test.txt') |
else: |
raise ValueError('invalid mode') |
with open(path_to_image_ids_txt, 'r') as f: |
lines = f.readlines() |
self._image_ids = [line.rstrip() for line in lines] |
self._image_id_to_annotation_dict = {} |
self._image_ratios = [] |
for image_id in self._image_ids: |
path_to_annotation_xml = os.path.join(path_to_annotations_dir, f'{image_id}.xml') |
tree = ET.ElementTree(file=path_to_annotation_xml) |
root = tree.getroot() |
self._image_id_to_annotation_dict[image_id] = VOC2007.Annotation( |
filename=root.find('filename').text, |
objects=[VOC2007.Annotation.Object( |
name=next(tag_object.iterfind('name')).text, |
difficult=next(tag_object.iterfind('difficult')).text == '1', |
bbox=BBox( |
left=float(next(tag_object.iterfind('bndbox/xmin')).text) - 1, |
top=float(next(tag_object.iterfind('bndbox/ymin')).text) - 1, |
right=float(next(tag_object.iterfind('bndbox/xmax')).text) - 1, |
bottom=float(next(tag_object.iterfind('bndbox/ymax')).text) - 1 |
) |
) for tag_object in root.iterfind('object')] |
) |
width = int(root.find('size/width').text) |
height = int(root.find('size/height').text) |
ratio = float(width / height) |
self._image_ratios.append(ratio) |
def __len__(self) -> int: |
return len(self._image_id_to_annotation_dict) |
def __getitem__(self, index: int) -> Tuple[str, Tensor, Tensor, Tensor, Tensor]: |
image_id = self._image_ids[index] |
annotation = self._image_id_to_annotation_dict[image_id] |
bboxes = [obj.bbox.tolist() for obj in annotation.objects if not obj.difficult] |
labels = [VOC2007.CATEGORY_TO_LABEL_DICT[obj.name] for obj in annotation.objects if not obj.difficult] |
bboxes = torch.tensor(bboxes, dtype=torch.float) |
labels = torch.tensor(labels, dtype=torch.long) |
image = Image.open(os.path.join(self._path_to_jpeg_images_dir, annotation.filename)) |
if self._mode == VOC2007.Mode.TRAIN and random.random() > 0.5: |
image = ImageOps.mirror(image) |
bboxes[:, [0, 2]] = image.width - bboxes[:, [2, 0]] |
image, scale = VOC2007.preprocess(image, self._image_min_side, self._image_max_side) |
scale = torch.tensor(scale, dtype=torch.float) |
bboxes *= scale |
return image_id, image, scale, bboxes, labels |
def evaluate(self, path_to_results_dir: str, image_ids: List[str], bboxes: List[List[float]], classes: List[int], probs: List[float]) -> Tuple[float, str]: |
self._write_results(path_to_results_dir, image_ids, bboxes, classes, probs) |
path_to_voc2007_dir = os.path.join(self._path_to_data_dir, 'VOCdevkit', 'VOC2007') |
path_to_main_dir = os.path.join(path_to_voc2007_dir, 'ImageSets', 'Main') |
path_to_annotations_dir = os.path.join(path_to_voc2007_dir, 'Annotations') |
class_to_ap_dict = {} |
for c in range(1, VOC2007.num_classes()): |
category = VOC2007.LABEL_TO_CATEGORY_DICT[c] |
try: |
path_to_cache_dir = os.path.join('caches', 'voc2007') |
os.makedirs(path_to_cache_dir, exist_ok=True) |
_, _, ap = voc_eval(detpath=path_to_results_dir+'/comp3_det_test_{:s}.txt'.format(category), |
annopath=path_to_annotations_dir+'/{:s}.xml', |
imagesetfile=os.path.join(path_to_main_dir, 'test.txt'), |
classname=category, |
cachedir=path_to_cache_dir, |
ovthresh=0.5, |
use_07_metric=True) |
except IndexError: |
ap = 0 |
class_to_ap_dict[c] = ap |
mean_ap = np.mean([v for k, v in class_to_ap_dict.items()]).item() |
detail = '' |
for c in range(1, VOC2007.num_classes()): |
detail += '{:d}: {:s} AP = {:.4f}\n'.format(c, VOC2007.LABEL_TO_CATEGORY_DICT[c], class_to_ap_dict[c]) |
return mean_ap, detail |
def _write_results(self, path_to_results_dir: str, image_ids: List[str], bboxes: List[List[float]], classes: List[int], probs: List[float]): |
class_to_txt_files_dict = {} |
for c in range(1, VOC2007.num_classes()): |
class_to_txt_files_dict[c] = open(os.path.join(path_to_results_dir, 'comp3_det_test_{:s}.txt'.format(VOC2007.LABEL_TO_CATEGORY_DICT[c])), 'w') |
for image_id, bbox, cls, prob in zip(image_ids, bboxes, classes, probs): |
class_to_txt_files_dict[cls].write('{:s} {:f} {:f} {:f} {:f} {:f}\n'.format(image_id, prob, |
bbox[0], bbox[1], bbox[2], bbox[3])) |
for _, f in class_to_txt_files_dict.items(): |
f.close() |
@property |
def image_ratios(self) -> List[float]: |
return self._image_ratios |
@staticmethod |
def num_classes() -> int: |
return 21 |