import logging

import numpy as np

from effdet.evaluation.metrics import compute_precision_recall, compute_average_precision, compute_cor_loc
from effdet.evaluation.per_image_evaluation import PerImageEvaluation


class ObjectDetectionEvaluation:
    """Internal implementation of Pascal object detection metrics."""

    def __init__(self,
                 num_gt_classes,
                 matching_iou_threshold=0.5,
                 nms_iou_threshold=1.0,
                 nms_max_output_boxes=10000,
                 recall_lower_bound=0.0,
                 recall_upper_bound=1.0,
                 use_weighted_mean_ap=False,
                 label_id_offset=0,
                 group_of_weight=0.0,
                 per_image_eval_class=PerImageEvaluation):
        """Constructor.
        Args:
            num_gt_classes: Number of ground-truth classes.
            matching_iou_threshold: IOU threshold used for matching detected boxes to ground-truth boxes.
            nms_iou_threshold: IOU threshold used for non-maximum suppression.
            nms_max_output_boxes: Maximum number of boxes returned by non-maximum suppression.
            recall_lower_bound: lower bound of recall operating area
            recall_upper_bound: upper bound of recall operating area
            use_weighted_mean_ap: (optional) boolean which determines if the mean
                average precision is computed directly from the scores and tp_fp_labels of all classes.
            label_id_offset: The label id offset.
            group_of_weight: Weight of group-of boxes.If set to 0, detections of the
                correct class within a group-of box are ignored. If weight is > 0, then
                if at least one detection falls within a group-of box with
                matching_iou_threshold, weight group_of_weight is added to true
                positives. Consequently, if no detection falls within a group-of box,
                weight group_of_weight is added to false negatives.
            per_image_eval_class: The class that contains functions for computing per image metrics.
        Raises:
            ValueError: if num_gt_classes is smaller than 1.
        """
        if num_gt_classes < 1:
            raise ValueError('Need at least 1 groundtruth class for evaluation.')

        self.per_image_eval = per_image_eval_class(
            num_gt_classes=num_gt_classes,
            matching_iou_threshold=matching_iou_threshold,
            nms_iou_threshold=nms_iou_threshold,
            nms_max_output_boxes=nms_max_output_boxes,
            group_of_weight=group_of_weight)
        self.recall_lower_bound = recall_lower_bound
        self.recall_upper_bound = recall_upper_bound
        self.group_of_weight = group_of_weight
        self.num_class = num_gt_classes
        self.use_weighted_mean_ap = use_weighted_mean_ap
        self.label_id_offset = label_id_offset

        self.gt_boxes = {}
        self.gt_class_labels = {}
        self.gt_masks = {}
        self.gt_is_difficult_list = {}
        self.gt_is_group_of_list = {}
        self.num_gt_instances_per_class = np.zeros(self.num_class, dtype=float)
        self.num_gt_imgs_per_class = np.zeros(self.num_class, dtype=int)

        self._initialize_detections()

    def _initialize_detections(self):
        """Initializes internal data structures."""
        self.detection_keys = set()
        self.scores_per_class = [[] for _ in range(self.num_class)]
        self.tp_fp_labels_per_class = [[] for _ in range(self.num_class)]
        self.num_images_correctly_detected_per_class = np.zeros(self.num_class)
        self.average_precision_per_class = np.empty(self.num_class, dtype=float)
        self.average_precision_per_class.fill(np.nan)
        self.precisions_per_class = [np.nan] * self.num_class
        self.recalls_per_class = [np.nan] * self.num_class
        self.sum_tp_class = [np.nan] * self.num_class

        self.corloc_per_class = np.ones(self.num_class, dtype=float)

    def clear_detections(self):
        self._initialize_detections()

    def add_single_ground_truth_image_info(
            self, image_key, gt_boxes, gt_class_labels,
            gt_is_difficult_list=None, gt_is_group_of_list=None, gt_masks=None):
        """Adds groundtruth for a single image to be used for evaluation.
        Args:
            image_key: A unique string/integer identifier for the image.
            gt_boxes: float32 numpy array of shape [num_boxes, 4] containing
                `num_boxes` groundtruth boxes of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
            gt_class_labels: integer numpy array of shape [num_boxes]
                containing 0-indexed groundtruth classes for the boxes.
            gt_is_difficult_list: A length M numpy boolean array denoting
                whether a ground truth box is a difficult instance or not. To support
                the case that no boxes are difficult, it is by default set as None.
            gt_is_group_of_list: A length M numpy boolean array denoting
                whether a ground truth box is a group-of box or not. To support the case
                that no boxes are groups-of, it is by default set as None.
            gt_masks: uint8 numpy array of shape [num_boxes, height, width]
                containing `num_boxes` groundtruth masks. The mask values range from 0 to 1.
        """
        if image_key in self.gt_boxes:
            logging.warning('image %s has already been added to the ground truth database.', image_key)
            return

        self.gt_boxes[image_key] = gt_boxes
        self.gt_class_labels[image_key] = gt_class_labels
        self.gt_masks[image_key] = gt_masks
        if gt_is_difficult_list is None:
            num_boxes = gt_boxes.shape[0]
            gt_is_difficult_list = np.zeros(num_boxes, dtype=bool)
        gt_is_difficult_list = gt_is_difficult_list.astype(dtype=bool)
        self.gt_is_difficult_list[image_key] = gt_is_difficult_list
        if gt_is_group_of_list is None:
            num_boxes = gt_boxes.shape[0]
            gt_is_group_of_list = np.zeros(num_boxes, dtype=bool)
        if gt_masks is None:
            num_boxes = gt_boxes.shape[0]
            mask_presence_indicator = np.zeros(num_boxes, dtype=bool)
        else:
            mask_presence_indicator = (np.sum(gt_masks, axis=(1, 2)) == 0).astype(dtype=bool)

        gt_is_group_of_list = gt_is_group_of_list.astype(dtype=bool)
        self.gt_is_group_of_list[image_key] = gt_is_group_of_list

        # ignore boxes without masks
        masked_gt_is_difficult_list = gt_is_difficult_list | mask_presence_indicator
        for class_index in range(self.num_class):
            num_gt_instances = np.sum(
                gt_class_labels[~masked_gt_is_difficult_list & ~gt_is_group_of_list] == class_index)
            num_groupof_gt_instances = self.group_of_weight * np.sum(
                gt_class_labels[gt_is_group_of_list & ~masked_gt_is_difficult_list] == class_index)
            self.num_gt_instances_per_class[class_index] += num_gt_instances + num_groupof_gt_instances
            if np.any(gt_class_labels == class_index):
                self.num_gt_imgs_per_class[class_index] += 1

    def add_single_detected_image_info(
            self, image_key, detected_boxes, detected_scores, detected_class_labels, detected_masks=None):
        """Adds detections for a single image to be used for evaluation.
        Args:
            image_key: A unique string/integer identifier for the image.
            detected_boxes: float32 numpy array of shape [num_boxes, 4] containing
                `num_boxes` detection boxes of the format [ymin, xmin, ymax, xmax] in
                absolute image coordinates.
            detected_scores: float32 numpy array of shape [num_boxes] containing
                detection scores for the boxes.
            detected_class_labels: integer numpy array of shape [num_boxes] containing
                0-indexed detection classes for the boxes.
            detected_masks: np.uint8 numpy array of shape [num_boxes, height, width]
                containing `num_boxes` detection masks with values ranging between 0 and 1.
        Raises:
            ValueError: if the number of boxes, scores and class labels differ in length.
        """
        if len(detected_boxes) != len(detected_scores) or len(detected_boxes) != len(detected_class_labels):
            raise ValueError(
                'detected_boxes, detected_scores and '
                'detected_class_labels should all have same lengths. Got'
                '[%d, %d, %d]' % len(detected_boxes), len(detected_scores),
                len(detected_class_labels))

        if image_key in self.detection_keys:
            logging.warning('image %s has already been added to the detection result database', image_key)
            return

        self.detection_keys.add(image_key)
        if image_key in self.gt_boxes:
            gt_boxes = self.gt_boxes[image_key]
            gt_class_labels = self.gt_class_labels[image_key]
            # Masks are popped instead of look up. The reason is that we do not want
            # to keep all masks in memory which can cause memory overflow.
            gt_masks = self.gt_masks.pop(image_key)
            gt_is_difficult_list = self.gt_is_difficult_list[image_key]
            gt_is_group_of_list = self.gt_is_group_of_list[image_key]
        else:
            gt_boxes = np.empty(shape=[0, 4], dtype=float)
            gt_class_labels = np.array([], dtype=int)
            if detected_masks is None:
                gt_masks = None
            else:
                gt_masks = np.empty(shape=[0, 1, 1], dtype=float)
            gt_is_difficult_list = np.array([], dtype=bool)
            gt_is_group_of_list = np.array([], dtype=bool)
        scores, tp_fp_labels, is_class_correctly_detected_in_image = \
            self.per_image_eval.compute_object_detection_metrics(
                detected_boxes=detected_boxes,
                detected_scores=detected_scores,
                detected_class_labels=detected_class_labels,
                gt_boxes=gt_boxes,
                gt_class_labels=gt_class_labels,
                gt_is_difficult_list=gt_is_difficult_list,
                gt_is_group_of_list=gt_is_group_of_list,
                detected_masks=detected_masks,
                gt_masks=gt_masks)

        for i in range(self.num_class):
            if scores[i].shape[0] > 0:
                self.scores_per_class[i].append(scores[i])
                self.tp_fp_labels_per_class[i].append(tp_fp_labels[i])
        self.num_images_correctly_detected_per_class += is_class_correctly_detected_in_image

    def evaluate(self):
        """Compute evaluation result.
        Returns:
            A dict with the following fields -
                average_precision: float numpy array of average precision for each class.
                mean_ap: mean average precision of all classes, float scalar
                precisions: List of precisions, each precision is a float numpy array
                recalls: List of recalls, each recall is a float numpy array
                corloc: numpy float array
                mean_corloc: Mean CorLoc score for each class, float scalar
        """
        if (self.num_gt_instances_per_class == 0).any():
            logging.warning(
                'The following classes have no ground truth examples: %s',
                np.squeeze(np.argwhere(self.num_gt_instances_per_class == 0)) + self.label_id_offset)

        if self.use_weighted_mean_ap:
            all_scores = np.array([], dtype=float)
            all_tp_fp_labels = np.array([], dtype=bool)
        for class_index in range(self.num_class):
            if self.num_gt_instances_per_class[class_index] == 0:
                continue
            if not self.scores_per_class[class_index]:
                scores = np.array([], dtype=float)
                tp_fp_labels = np.array([], dtype=float)
            else:
                scores = np.concatenate(self.scores_per_class[class_index])
                tp_fp_labels = np.concatenate(self.tp_fp_labels_per_class[class_index])
            if self.use_weighted_mean_ap:
                all_scores = np.append(all_scores, scores)
                all_tp_fp_labels = np.append(all_tp_fp_labels, tp_fp_labels)
            precision, recall = compute_precision_recall(
                scores, tp_fp_labels, self.num_gt_instances_per_class[class_index])
            recall_within_bound_indices = [
                index for index, value in enumerate(recall) if
                value >= self.recall_lower_bound and value <= self.recall_upper_bound
            ]
            recall_within_bound = recall[recall_within_bound_indices]
            precision_within_bound = precision[recall_within_bound_indices]

            self.precisions_per_class[class_index] = precision_within_bound
            self.recalls_per_class[class_index] = recall_within_bound
            self.sum_tp_class[class_index] = tp_fp_labels.sum()
            average_precision = compute_average_precision(precision_within_bound, recall_within_bound)
            self.average_precision_per_class[class_index] = average_precision
            logging.debug('average_precision: %f', average_precision)

        self.corloc_per_class = compute_cor_loc(
            self.num_gt_imgs_per_class, self.num_images_correctly_detected_per_class)

        if self.use_weighted_mean_ap:
            num_gt_instances = np.sum(self.num_gt_instances_per_class)
            precision, recall = compute_precision_recall(all_scores, all_tp_fp_labels, num_gt_instances)
            recall_within_bound_indices = [
                index for index, value in enumerate(recall) if
                value >= self.recall_lower_bound and value <= self.recall_upper_bound
            ]
            recall_within_bound = recall[recall_within_bound_indices]
            precision_within_bound = precision[recall_within_bound_indices]
            mean_ap = compute_average_precision(precision_within_bound, recall_within_bound)
        else:
            mean_ap = np.nanmean(self.average_precision_per_class)
        mean_corloc = np.nanmean(self.corloc_per_class)

        return dict(
            per_class_ap=self.average_precision_per_class, mean_ap=mean_ap,
            per_class_precision=self.precisions_per_class,
            per_class_recall=self.recalls_per_class,
            per_class_corlocs=self.corloc_per_class, mean_corloc=mean_corloc)