from .np_mask_list import *
from .metrics import *


class PerImageEvaluation:
    """Evaluate detection result of a single image."""

    def __init__(self,
                 num_gt_classes,
                 matching_iou_threshold=0.5,
                 nms_iou_threshold=0.3,
                 nms_max_output_boxes=50,
                 group_of_weight=0.0):
        """Initialized PerImageEvaluation by evaluation parameters.
        Args:
            num_gt_classes: Number of ground truth object classes
            matching_iou_threshold: A ratio of area intersection to union, which is
                the threshold to consider whether a detection is true positive or not
            nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
            nms_max_output_boxes: Number of maximum output boxes in NMS.
            group_of_weight: Weight of the group-of boxes.
        """
        self.matching_iou_threshold = matching_iou_threshold
        self.nms_iou_threshold = nms_iou_threshold
        self.nms_max_output_boxes = nms_max_output_boxes
        self.num_gt_classes = num_gt_classes
        self.group_of_weight = group_of_weight

    def compute_object_detection_metrics(
            self, detected_boxes, detected_scores, detected_class_labels,
            gt_boxes, gt_class_labels, gt_is_difficult_list, gt_is_group_of_list,
            detected_masks=None, gt_masks=None):
        """Evaluates detections as being tp, fp or weighted from a single image.
        The evaluation is done in two stages:
            1. All detections are matched to non group-of boxes; true positives are
               determined and detections matched to difficult boxes are ignored.
            2. Detections that are determined as false positives are matched against
               group-of boxes and weighted if matched.
        Args:
            detected_boxes: A float numpy array of shape [N, 4], representing N
                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
            detected_scores: A float numpy array of shape [N, 1], representing the
                confidence scores of the detected N object instances.
            detected_class_labels: A integer numpy array of shape [N, 1], repreneting
                the class labels of the detected N object instances.
            gt_boxes: A float numpy array of shape [M, 4], representing M
                regions of object instances in ground truth
            gt_class_labels: An integer numpy array of shape [M, 1],
                representing M class labels of object instances in ground truth
            gt_is_difficult_list: A boolean numpy array of length M denoting
                whether a ground truth box is a difficult instance or not
            gt_is_group_of_list: A boolean numpy array of length M denoting
                whether a ground truth box has group-of tag
            detected_masks: (optional) A uint8 numpy array of shape [N, height,
                width]. If not None, the metrics will be computed based on masks.
            gt_masks: (optional) A uint8 numpy array of shape [M, height,
                width]. Can have empty masks, i.e. where all values are 0.
        Returns:
            scores: A list of C float numpy arrays. Each numpy array is of
                shape [K, 1], representing K scores detected with object class label c
            tp_fp_labels: A list of C boolean numpy arrays. Each numpy array
                is of shape [K, 1], representing K True/False positive label of
                object instances detected with class label c
            is_class_correctly_detected_in_image: a numpy integer array of
                shape [C, 1], indicating whether the correponding class has a least
                one instance being correctly detected in the image
        """
        detected_boxes, detected_scores, detected_class_labels, detected_masks = (
            self._remove_invalid_boxes(detected_boxes, detected_scores, detected_class_labels, detected_masks))

        scores, tp_fp_labels = self._compute_tp_fp(
            detected_boxes=detected_boxes,
            detected_scores=detected_scores,
            detected_class_labels=detected_class_labels,
            gt_boxes=gt_boxes,
            gt_class_labels=gt_class_labels,
            gt_is_difficult_list=gt_is_difficult_list,
            gt_is_group_of_list=gt_is_group_of_list,
            detected_masks=detected_masks,
            gt_masks=gt_masks)

        is_class_correctly_detected_in_image = self._compute_cor_loc(
            detected_boxes=detected_boxes,
            detected_scores=detected_scores,
            detected_class_labels=detected_class_labels,
            gt_boxes=gt_boxes,
            gt_class_labels=gt_class_labels,
            detected_masks=detected_masks,
            gt_masks=gt_masks)

        return scores, tp_fp_labels, is_class_correctly_detected_in_image

    def _compute_cor_loc(
            self, detected_boxes, detected_scores, detected_class_labels,
            gt_boxes, gt_class_labels, detected_masks=None, gt_masks=None):
        """Compute CorLoc score for object detection result.
        Args:
            detected_boxes: A float numpy array of shape [N, 4], representing N
                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
            detected_scores: A float numpy array of shape [N, 1], representing the
                confidence scores of the detected N object instances.
            detected_class_labels: A integer numpy array of shape [N, 1], repreneting
                the class labels of the detected N object instances.
            gt_boxes: A float numpy array of shape [M, 4], representing M
                regions of object instances in ground truth
            gt_class_labels: An integer numpy array of shape [M, 1],
                representing M class labels of object instances in ground truth
            detected_masks: (optional) A uint8 numpy array of shape [N, height, width].
                If not None, the scores will be computed based on masks.
            gt_masks: (optional) A uint8 numpy array of shape [M, height, width].
        Returns:
            is_class_correctly_detected_in_image: a numpy integer array of
                shape [C, 1], indicating whether the correponding class has a least
                one instance being correctly detected in the image
        Raises:
            ValueError: If detected masks is not None but groundtruth masks are None,
                or the other way around.
        """
        if (detected_masks is not None and gt_masks is None) or (
                detected_masks is None and gt_masks is not None):
            raise ValueError(
                'If `detected_masks` is provided, then `gt_masks` should also be provided.')

        is_class_correctly_detected_in_image = np.zeros(
            self.num_gt_classes, dtype=int)
        for i in range(self.num_gt_classes):
            (gt_boxes_at_ith_class, gt_masks_at_ith_class,
             detected_boxes_at_ith_class, detected_scores_at_ith_class,
             detected_masks_at_ith_class) = self._get_ith_class_arrays(
                detected_boxes, detected_scores, detected_masks,
                detected_class_labels, gt_boxes, gt_masks,
                gt_class_labels, i)
            is_class_correctly_detected_in_image[i] = (
                self._compute_is_class_correctly_detected_in_image(
                    detected_boxes=detected_boxes_at_ith_class,
                    detected_scores=detected_scores_at_ith_class,
                    gt_boxes=gt_boxes_at_ith_class,
                    detected_masks=detected_masks_at_ith_class,
                    gt_masks=gt_masks_at_ith_class))

        return is_class_correctly_detected_in_image

    def _compute_is_class_correctly_detected_in_image(
            self, detected_boxes, detected_scores, gt_boxes, detected_masks=None, gt_masks=None):
        """Compute CorLoc score for a single class.
        Args:
            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
            detected_scores: A 1-d numpy array of length N representing classification score
            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
            detected_masks: (optional) A np.uint8 numpy array of shape [N, height, width].
                If not None, the scores will be computed based on masks.
            gt_masks: (optional) A np.uint8 numpy array of shape [M, height, width].
        Returns:
            is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
                class is correctly detected in the image or not
        """
        if detected_boxes.size > 0:
            if gt_boxes.size > 0:
                max_score_id = np.argmax(detected_scores)
                mask_mode = False
                if detected_masks is not None and gt_masks is not None:
                    mask_mode = True
                if mask_mode:
                    detected_boxlist = MaskList(
                        box_data=np.expand_dims(detected_boxes[max_score_id], axis=0),
                        mask_data=np.expand_dims(detected_masks[max_score_id], axis=0))
                    gt_boxlist = MaskList(box_data=gt_boxes, mask_data=gt_masks)
                    iou = iou_masklist(detected_boxlist, gt_boxlist)
                else:
                    detected_boxlist = BoxList(np.expand_dims(detected_boxes[max_score_id, :], axis=0))
                    gt_boxlist = BoxList(gt_boxes)
                    iou = iou_boxlist(detected_boxlist, gt_boxlist)
                if np.max(iou) >= self.matching_iou_threshold:
                    return 1
        return 0

    def _compute_tp_fp(
            self, detected_boxes, detected_scores, detected_class_labels,
            gt_boxes, gt_class_labels, gt_is_difficult_list, gt_is_group_of_list, detected_masks=None, gt_masks=None):
        """Labels true/false positives of detections of an image across all classes.
        Args:
            detected_boxes: A float numpy array of shape [N, 4], representing N
                regions of detected object regions. Each row is of the format [y_min, x_min, y_max, x_max]
            detected_scores: A float numpy array of shape [N, 1], representing the
                confidence scores of the detected N object instances.
            detected_class_labels: A integer numpy array of shape [N, 1], representing
                the class labels of the detected N object instances.
            gt_boxes: A float numpy array of shape [M, 4], representing M
                regions of object instances in ground truth
            gt_class_labels: An integer numpy array of shape [M, 1],
                representing M class labels of object instances in ground truth
            gt_is_difficult_list: A boolean numpy array of length M denoting
                whether a ground truth box is a difficult instance or not
            gt_is_group_of_list: A boolean numpy array of length M denoting
                whether a ground truth box has group-of tag
            detected_masks: (optional) A np.uint8 numpy array of shape [N, height,
                width]. If not None, the scores will be computed based on masks.
            gt_masks: (optional) A np.uint8 numpy array of shape [M, height, width].
        Returns:
            result_scores: A list of float numpy arrays. Each numpy array is of
                 shape [K, 1], representing K scores detected with object class label c
            result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
                shape [K, 1], representing K True/False positive label of object
                instances detected with class label c
        Raises:
            ValueError: If detected masks is not None but groundtruth masks are None,
                or the other way around.
        """
        if detected_masks is not None and gt_masks is None:
            raise ValueError(
                'Detected masks is available but groundtruth masks is not.')
        if detected_masks is None and gt_masks is not None:
            raise ValueError(
                'Groundtruth masks is available but detected masks is not.')

        result_scores = []
        result_tp_fp_labels = []
        for i in range(self.num_gt_classes):
            gt_is_difficult_list_at_ith_class = (
                gt_is_difficult_list[gt_class_labels == i])
            gt_is_group_of_list_at_ith_class = (
                gt_is_group_of_list[gt_class_labels == i])
            (gt_boxes_at_ith_class, gt_masks_at_ith_class,
             detected_boxes_at_ith_class, detected_scores_at_ith_class,
             detected_masks_at_ith_class) = self._get_ith_class_arrays(
                detected_boxes, detected_scores, detected_masks,
                detected_class_labels, gt_boxes, gt_masks,
                gt_class_labels, i)
            scores, tp_fp_labels = self._compute_tp_fp_for_single_class(
                detected_boxes=detected_boxes_at_ith_class,
                detected_scores=detected_scores_at_ith_class,
                gt_boxes=gt_boxes_at_ith_class,
                gt_is_difficult_list=gt_is_difficult_list_at_ith_class,
                gt_is_group_of_list=gt_is_group_of_list_at_ith_class,
                detected_masks=detected_masks_at_ith_class,
                gt_masks=gt_masks_at_ith_class)
            result_scores.append(scores)
            result_tp_fp_labels.append(tp_fp_labels)
        return result_scores, result_tp_fp_labels

    def _get_overlaps_and_scores_mask_mode(
            self, detected_boxes, detected_scores, detected_masks,
            gt_boxes, gt_masks, gt_is_group_of_list):
        """Computes overlaps and scores between detected and groudntruth masks.
        Args:
            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
            detected_scores: A 1-d numpy array of length N representing classification score
            detected_masks: A uint8 numpy array of shape [N, height, width]. If not
                None, the scores will be computed based on masks.
            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
            gt_masks: A uint8 numpy array of shape [M, height, width].
            gt_is_group_of_list: A boolean numpy array of length M denoting
                whether a ground truth box has group-of tag. If a groundtruth box is
                group-of box, every detection matching this box is ignored.
        Returns:
            iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
                gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
            ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
                gt_group_of_boxlist.num_boxes() == 0 it will be None.
            scores: The score of the detected boxlist.
            num_boxes: Number of non-maximum suppressed detected boxes.
        """
        detected_boxlist = MaskList(box_data=detected_boxes, mask_data=detected_masks)
        detected_boxlist.add_field('scores', detected_scores)
        detected_boxlist = non_max_suppression(detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
        gt_non_group_of_boxlist = MaskList(
            box_data=gt_boxes[~gt_is_group_of_list], mask_data=gt_masks[~gt_is_group_of_list])
        gt_group_of_boxlist = MaskList(
            box_data=gt_boxes[gt_is_group_of_list], mask_data=gt_masks[gt_is_group_of_list])
        iou_b = iou_masklist(detected_boxlist, gt_non_group_of_boxlist)
        ioa_b = np.transpose(ioa_masklist(gt_group_of_boxlist, detected_boxlist))
        scores = detected_boxlist.get_field('scores')
        num_boxes = detected_boxlist.num_boxes()
        return iou_b, ioa_b, scores, num_boxes

    def _get_overlaps_and_scores_box_mode(
            self, detected_boxes, detected_scores, gt_boxes, gt_is_group_of_list):
        """Computes overlaps and scores between detected and groudntruth boxes.
        Args:
            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
            detected_scores: A 1-d numpy array of length N representing classification score
            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
            gt_is_group_of_list: A boolean numpy array of length M denoting
                whether a ground truth box has group-of tag. If a groundtruth box is
                group-of box, every detection matching this box is ignored.
        Returns:
            iou: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
                gt_non_group_of_boxlist.num_boxes() == 0 it will be None.
            ioa: A float numpy array of size [num_detected_boxes, num_gt_boxes]. If
                gt_group_of_boxlist.num_boxes() == 0 it will be None.
            scores: The score of the detected boxlist.
            num_boxes: Number of non-maximum suppressed detected boxes.
        """
        detected_boxlist = BoxList(detected_boxes)
        detected_boxlist.add_field('scores', detected_scores)
        detected_boxlist = non_max_suppression(detected_boxlist, self.nms_max_output_boxes, self.nms_iou_threshold)
        gt_non_group_of_boxlist = BoxList(gt_boxes[~gt_is_group_of_list])
        gt_group_of_boxlist = BoxList(gt_boxes[gt_is_group_of_list])
        iou_b = iou_boxlist(detected_boxlist, gt_non_group_of_boxlist)
        ioa_b = np.transpose(ioa_boxlist(gt_group_of_boxlist, detected_boxlist))
        scores = detected_boxlist.get_field('scores')
        num_boxes = detected_boxlist.num_boxes()
        return iou_b, ioa_b, scores, num_boxes

    def _compute_tp_fp_for_single_class(
            self, detected_boxes, detected_scores, gt_boxes,
            gt_is_difficult_list, gt_is_group_of_list, detected_masks=None, gt_masks=None):
        """Labels boxes detected with the same class from the same image as tp/fp.
        Args:
            detected_boxes: A numpy array of shape [N, 4] representing detected box coordinates
            detected_scores: A 1-d numpy array of length N representing classification score
            gt_boxes: A numpy array of shape [M, 4] representing ground truth box coordinates
            gt_is_difficult_list: A boolean numpy array of length M denoting
                whether a ground truth box is a difficult instance or not. If a
                groundtruth box is difficult, every detection matching this box is ignored.
            gt_is_group_of_list: A boolean numpy array of length M denoting
                whether a ground truth box has group-of tag. If a groundtruth box is
                group-of box, every detection matching this box is ignored.
            detected_masks: (optional) A uint8 numpy array of shape [N, height,
                width]. If not None, the scores will be computed based on masks.
            gt_masks: (optional) A uint8 numpy array of shape [M, height, width].
        Returns:
            Two arrays of the same size, containing all boxes that were evaluated as
            being true positives or false positives; if a box matched to a difficult
            box or to a group-of box, it is ignored.
            scores: A numpy array representing the detection scores.
            tp_fp_labels: a boolean numpy array indicating whether a detection is a true positive.
        """
        if detected_boxes.size == 0:
            return np.array([], dtype=float), np.array([], dtype=bool)

        mask_mode = False
        if detected_masks is not None and gt_masks is not None:
            mask_mode = True

        iou_b = np.ndarray([0, 0])
        ioa_b = np.ndarray([0, 0])
        iou_m = np.ndarray([0, 0])
        ioa_m = np.ndarray([0, 0])
        if mask_mode:
            # For Instance Segmentation Evaluation on Open Images V5, not all boxed
            # instances have corresponding segmentation annotations. Those boxes that
            # dont have segmentation annotations are represented as empty masks in
            # gt_masks nd array.
            mask_presence_indicator = (np.sum(gt_masks, axis=(1, 2)) > 0)

            iou_m, ioa_m, scores, num_detected_boxes = self._get_overlaps_and_scores_mask_mode(
                detected_boxes=detected_boxes,
                detected_scores=detected_scores,
                detected_masks=detected_masks,
                gt_boxes=gt_boxes[mask_presence_indicator, :],
                gt_masks=gt_masks[mask_presence_indicator, :],
                gt_is_group_of_list=gt_is_group_of_list[mask_presence_indicator])

            if sum(mask_presence_indicator) < len(mask_presence_indicator):
                # Not all masks are present - some masks are empty
                iou_b, ioa_b, _, num_detected_boxes = self._get_overlaps_and_scores_box_mode(
                    detected_boxes=detected_boxes,
                    detected_scores=detected_scores,
                    gt_boxes=gt_boxes[~mask_presence_indicator, :],
                    gt_is_group_of_list=gt_is_group_of_list[~mask_presence_indicator])
            num_detected_boxes = detected_boxes.shape[0]
        else:
            mask_presence_indicator = np.zeros(gt_is_group_of_list.shape, dtype=bool)
            iou_b, ioa_b, scores, num_detected_boxes = self._get_overlaps_and_scores_box_mode(
                detected_boxes=detected_boxes,
                detected_scores=detected_scores,
                gt_boxes=gt_boxes,
                gt_is_group_of_list=gt_is_group_of_list)

        if gt_boxes.size == 0:
            return scores, np.zeros(num_detected_boxes, dtype=bool)

        tp_fp_labels = np.zeros(num_detected_boxes, dtype=bool)
        is_matched_to_box = np.zeros(num_detected_boxes, dtype=bool)
        is_matched_to_difficult = np.zeros(num_detected_boxes, dtype=bool)
        is_matched_to_group_of = np.zeros(num_detected_boxes, dtype=bool)

        def compute_match_iou(iou_matrix, gt_nongroup_of_is_difficult_list, is_box):
            """Computes TP/FP for non group-of box matching.
            The function updates the following local variables:
                tp_fp_labels - if a box is matched to group-of
                is_matched_to_difficult - the detections that were processed at this are
                    matched to difficult box.
                is_matched_to_box - the detections that were processed at this stage are marked as is_box.
            Args:
                iou_matrix: intersection-over-union matrix [num_gt_boxes]x[num_det_boxes].
                gt_nongroup_of_is_difficult_list: boolean that specifies if gt box is difficult.
                is_box: boolean that specifies if currently boxes or masks are processed.
            """
            max_overlap_gt_ids = np.argmax(iou_matrix, axis=1)
            is_gt_detected = np.zeros(iou_matrix.shape[1], dtype=bool)
            for i in range(num_detected_boxes):
                gt_id = max_overlap_gt_ids[i]
                is_evaluatable = (
                    not tp_fp_labels[i] and
                    not is_matched_to_difficult[i] and
                    iou_matrix[i, gt_id] >= self.matching_iou_threshold and
                    not is_matched_to_group_of[i])
                if is_evaluatable:
                    if not gt_nongroup_of_is_difficult_list[gt_id]:
                        if not is_gt_detected[gt_id]:
                            tp_fp_labels[i] = True
                            is_gt_detected[gt_id] = True
                            is_matched_to_box[i] = is_box
                    else:
                        is_matched_to_difficult[i] = True

        def compute_match_ioa(ioa_matrix, is_box):
            """Computes TP/FP for group-of box matching.
            The function updates the following local variables:
                is_matched_to_group_of - if a box is matched to group-of
                is_matched_to_box - the detections that were processed at this stage are marked as is_box.
            Args:
                ioa_matrix: intersection-over-area matrix [num_gt_boxes]x[num_det_boxes].
                is_box: boolean that specifies if currently boxes or masks are processed.
            Returns:
                scores_group_of: of detections matched to group-of boxes[num_groupof_matched].
                tp_fp_labels_group_of: boolean array of size [num_groupof_matched], all values are True.
            """
            scores_group_of = np.zeros(ioa_matrix.shape[1], dtype=float)
            tp_fp_labels_group_of = self.group_of_weight * np.ones(ioa_matrix.shape[1], dtype=float)
            max_overlap_group_of_gt_ids = np.argmax(ioa_matrix, axis=1)
            for i in range(num_detected_boxes):
                gt_id = max_overlap_group_of_gt_ids[i]
                is_evaluatable = (
                    not tp_fp_labels[i] and
                    not is_matched_to_difficult[i] and
                    ioa_matrix[i, gt_id] >= self.matching_iou_threshold and
                    not is_matched_to_group_of[i])
                if is_evaluatable:
                    is_matched_to_group_of[i] = True
                    is_matched_to_box[i] = is_box
                    scores_group_of[gt_id] = max(scores_group_of[gt_id], scores[i])
            selector = np.where((scores_group_of > 0) & (tp_fp_labels_group_of > 0))
            scores_group_of = scores_group_of[selector]
            tp_fp_labels_group_of = tp_fp_labels_group_of[selector]

            return scores_group_of, tp_fp_labels_group_of

        # The evaluation is done in two stages:
        # 1. Evaluate all objects that actually have instance level masks.
        # 2. Evaluate all objects that are not already evaluated as boxes.
        if iou_m.shape[1] > 0:
            gt_is_difficult_mask_list = gt_is_difficult_list[mask_presence_indicator]
            gt_is_group_of_mask_list = gt_is_group_of_list[mask_presence_indicator]
            compute_match_iou(iou_m, gt_is_difficult_mask_list[~gt_is_group_of_mask_list], is_box=False)

        scores_mask_group_of = np.ndarray([0], dtype=float)
        tp_fp_labels_mask_group_of = np.ndarray([0], dtype=float)
        if ioa_m.shape[1] > 0:
            scores_mask_group_of, tp_fp_labels_mask_group_of = compute_match_ioa(ioa_m, is_box=False)

        # Tp-fp evaluation for non-group of boxes (if any).
        if iou_b.shape[1] > 0:
            gt_is_difficult_box_list = gt_is_difficult_list[~mask_presence_indicator]
            gt_is_group_of_box_list = gt_is_group_of_list[~mask_presence_indicator]
            compute_match_iou(iou_b, gt_is_difficult_box_list[~gt_is_group_of_box_list], is_box=True)

        scores_box_group_of = np.ndarray([0], dtype=float)
        tp_fp_labels_box_group_of = np.ndarray([0], dtype=float)
        if ioa_b.shape[1] > 0:
            scores_box_group_of, tp_fp_labels_box_group_of = compute_match_ioa(ioa_b, is_box=True)

        if mask_mode:
            # Note: here crowds are treated as ignore regions.
            valid_entries = (~is_matched_to_difficult & ~is_matched_to_group_of & ~is_matched_to_box)
            return np.concatenate((scores[valid_entries], scores_mask_group_of)),\
                   np.concatenate((tp_fp_labels[valid_entries].astype(float), tp_fp_labels_mask_group_of))
        else:
            valid_entries = (~is_matched_to_difficult & ~is_matched_to_group_of)
            return np.concatenate((scores[valid_entries], scores_box_group_of)),\
                   np.concatenate((tp_fp_labels[valid_entries].astype(float), tp_fp_labels_box_group_of))

    def _get_ith_class_arrays(
            self, detected_boxes, detected_scores, detected_masks, detected_class_labels,
            gt_boxes, gt_masks, gt_class_labels, class_index):
        """Returns numpy arrays belonging to class with index `class_index`.
        Args:
            detected_boxes: A numpy array containing detected boxes.
            detected_scores: A numpy array containing detected scores.
            detected_masks: A numpy array containing detected masks.
            detected_class_labels: A numpy array containing detected class labels.
            gt_boxes: A numpy array containing groundtruth boxes.
            gt_masks: A numpy array containing groundtruth masks.
            gt_class_labels: A numpy array containing groundtruth class labels.
            class_index: An integer index.
        Returns:
            gt_boxes_at_ith_class: A numpy array containing groundtruth boxes labeled as ith class.
            gt_masks_at_ith_class: A numpy array containing groundtruth masks labeled as ith class.
            detected_boxes_at_ith_class: A numpy array containing detected boxes corresponding to the ith class.
            detected_scores_at_ith_class: A numpy array containing detected scores corresponding to the ith class.
            detected_masks_at_ith_class: A numpy array containing detected masks corresponding to the ith class.
        """
        selected_groundtruth = (gt_class_labels == class_index)
        gt_boxes_at_ith_class = gt_boxes[selected_groundtruth]
        if gt_masks is not None:
            gt_masks_at_ith_class = gt_masks[selected_groundtruth]
        else:
            gt_masks_at_ith_class = None
        selected_detections = (detected_class_labels == class_index)
        detected_boxes_at_ith_class = detected_boxes[selected_detections]
        detected_scores_at_ith_class = detected_scores[selected_detections]
        if detected_masks is not None:
            detected_masks_at_ith_class = detected_masks[selected_detections]
        else:
            detected_masks_at_ith_class = None
        return (gt_boxes_at_ith_class, gt_masks_at_ith_class,
                detected_boxes_at_ith_class, detected_scores_at_ith_class,
                detected_masks_at_ith_class)

    def _remove_invalid_boxes(
            self, detected_boxes, detected_scores, detected_class_labels, detected_masks=None):
        """Removes entries with invalid boxes.
        A box is invalid if either its xmax is smaller than its xmin, or its ymax is smaller than its ymin.
        Args:
            detected_boxes: A float numpy array of size [num_boxes, 4] containing box
                coordinates in [ymin, xmin, ymax, xmax] format.
            detected_scores: A float numpy array of size [num_boxes].
            detected_class_labels: A int32 numpy array of size [num_boxes].
            detected_masks: A uint8 numpy array of size [num_boxes, height, width].
        Returns:
            valid_detected_boxes: A float numpy array of size [num_valid_boxes, 4]
                containing box coordinates in [ymin, xmin, ymax, xmax] format.
            valid_detected_scores: A float numpy array of size [num_valid_boxes].
            valid_detected_class_labels: A int32 numpy array of size [num_valid_boxes].
            valid_detected_masks: A uint8 numpy array of size [num_valid_boxes, height, width].
        """
        valid_indices = np.logical_and(
            detected_boxes[:, 0] < detected_boxes[:, 2], detected_boxes[:, 1] < detected_boxes[:, 3])
        detected_boxes = detected_boxes[valid_indices]
        detected_scores = detected_scores[valid_indices]
        detected_class_labels = detected_class_labels[valid_indices]
        if detected_masks is not None:
            detected_masks = detected_masks[valid_indices]
        return [detected_boxes, detected_scores, detected_class_labels, detected_masks]