Spaces:
Sleeping
Sleeping
import os | |
import contextlib | |
import copy | |
import numpy as np | |
from pycocotools.cocoeval import COCOeval | |
from pycocotools.coco import COCO | |
from .data import ImageData, ReactionImageData, CorefImageData | |
class CocoEvaluator(object): | |
def __init__(self, coco_gt): | |
coco_gt = copy.deepcopy(coco_gt) | |
self.coco_gt = coco_gt | |
def evaluate(self, predictions): | |
img_ids, results = self.prepare(predictions, 'bbox') | |
if len(results) == 0: | |
return np.zeros((12,)) | |
coco_dt = self.coco_gt.loadRes(results) | |
cocoEval = COCOeval(self.coco_gt, coco_dt, 'bbox') | |
cocoEval.params.imgIds = img_ids | |
cocoEval.params.catIds = [1] | |
cocoEval.evaluate() | |
cocoEval.accumulate() | |
cocoEval.summarize() | |
self.cocoEval = cocoEval | |
return cocoEval.stats | |
def prepare(self, predictions, iou_type): | |
if iou_type == "bbox": | |
return self.prepare_for_coco_detection(predictions) | |
else: | |
raise ValueError("Unknown iou type {}".format(iou_type)) | |
def prepare_for_coco_detection(self, predictions): | |
img_ids = [] | |
coco_results = [] | |
for idx, prediction in enumerate(predictions): | |
if len(prediction) == 0: | |
continue | |
image = self.coco_gt.dataset['images'][idx] | |
img_ids.append(image['id']) | |
width = image['width'] | |
height = image['height'] | |
coco_results.extend( | |
[ | |
{ | |
"image_id": image['id'], | |
"category_id": pred['category_id'], | |
"bbox": convert_to_xywh(pred['bbox'], width, height), | |
"score": pred['score'], | |
} | |
for pred in prediction | |
] | |
) | |
return img_ids, coco_results | |
def convert_to_xywh(box, width, height): | |
xmin, ymin, xmax, ymax = box | |
return [xmin * width, ymin * height, (xmax - xmin) * width, (ymax - ymin) * height] | |
EMPTY_STATS = {'gold_hits': 0, 'gold_total': 0, 'pred_hits': 0, 'pred_total': 0, 'image': 0} | |
class ReactionEvaluator(object): | |
def evaluate_image(self, gold_image, pred_image, **kwargs): | |
data = ReactionImageData(gold_image, pred_image) | |
return data.evaluate(**kwargs) | |
def compute_metrics(self, gold_hits, gold_total, pred_hits, pred_total): | |
precision = pred_hits / max(pred_total, 1) | |
recall = gold_hits / max(gold_total, 1) | |
f1 = precision * recall * 2 / max(precision + recall, 1e-6) | |
return {'precision': precision, 'recall': recall, 'f1': f1} | |
def evaluate(self, groundtruths, predictions, **kwargs): | |
gold_hits, gold_total, pred_hits, pred_total = 0, 0, 0, 0 | |
for gold_image, pred_image in zip(groundtruths, predictions): | |
gh, ph = self.evaluate_image(gold_image, pred_image, **kwargs) | |
gold_hits += sum(gh) | |
gold_total += len(gh) | |
pred_hits += sum(ph) | |
pred_total += len(ph) | |
return self.compute_metrics(gold_hits, gold_total, pred_hits, pred_total) | |
def evaluate_by_size(self, groundtruths, predictions, **kwargs): | |
group_stats = {} | |
for gold_image, pred_image in zip(groundtruths, predictions): | |
gh, ph = self.evaluate_image(gold_image, pred_image, **kwargs) | |
gtotal = len(gh) | |
if gtotal not in group_stats: | |
group_stats[gtotal] = copy.deepcopy(EMPTY_STATS) | |
group_stats[gtotal]['gold_hits'] += sum(gh) | |
group_stats[gtotal]['gold_total'] += len(gh) | |
group_stats[gtotal]['pred_hits'] += sum(ph) | |
group_stats[gtotal]['pred_total'] += len(ph) | |
group_stats[gtotal]['image'] += 1 | |
group_scores = {} | |
for gtotal, stats in group_stats.items(): | |
group_scores[gtotal] = self.compute_metrics( | |
stats['gold_hits'], stats['gold_total'], stats['pred_hits'], stats['pred_total']) | |
return group_scores, group_stats | |
def evaluate_by_group(self, groundtruths, predictions, **kwargs): | |
group_stats = {} | |
for gold_image, pred_image in zip(groundtruths, predictions): | |
gh, ph = self.evaluate_image(gold_image, pred_image, **kwargs) | |
diagram_type = gold_image['diagram_type'] | |
if diagram_type not in group_stats: | |
group_stats[diagram_type] = copy.deepcopy(EMPTY_STATS) | |
group_stats[diagram_type]['gold_hits'] += sum(gh) | |
group_stats[diagram_type]['gold_total'] += len(gh) | |
group_stats[diagram_type]['pred_hits'] += sum(ph) | |
group_stats[diagram_type]['pred_total'] += len(ph) | |
group_stats[diagram_type]['image'] += 1 | |
group_scores = {} | |
for group, stats in group_stats.items(): | |
group_scores[group] = self.compute_metrics( | |
stats['gold_hits'], stats['gold_total'], stats['pred_hits'], stats['pred_total']) | |
return group_scores, group_stats | |
def evaluate_summarize(self, groundtruths, predictions, **kwargs): | |
size_scores, size_stats = self.evaluate_by_size(groundtruths, predictions, **kwargs) | |
summarize = { | |
'overall': copy.deepcopy(EMPTY_STATS), | |
# 'single': copy.deepcopy(EMPTY_STATS), | |
# 'multiple': copy.deepcopy(EMPTY_STATS) | |
} | |
for size, stats in size_stats.items(): | |
if type(size) is int: | |
# output = summarize['single'] if size <= 1 else summarize['multiple'] | |
for key in stats: | |
# output[key] += stats[key] | |
summarize['overall'][key] += stats[key] | |
scores = {} | |
for key, val in summarize.items(): | |
scores[key] = self.compute_metrics(val['gold_hits'], val['gold_total'], val['pred_hits'], val['pred_total']) | |
return scores, summarize, size_stats | |
class CorefEvaluator(object): | |
def evaluate_image(self, gold_image, pred_image, **kwargs): | |
data = CorefImageData(gold_image, predictions = pred_image) | |
return data.evaluate() | |
def evaluate(self, groundtruths, predictions): | |
hits, gold_total, pred_total = 0, 0, 0 | |
counter = 0 | |
print(len(predictions)) | |
for gold_image, pred_image in zip(groundtruths, predictions): | |
try: hit, gold_pairs, pred_pairs = self.evaluate_image(gold_image, pred_image) | |
except: print(counter) | |
hits += hit | |
gold_total += gold_pairs | |
pred_total += pred_pairs | |
counter += 1 | |
return hits, gold_total, pred_total | |
def evaluate_summarize(self, groundtruths, predictions): | |
hits, gold_total, pred_total = self.evaluate(groundtruths, predictions) | |
precision = hits/max(pred_total, 1) | |
recall = hits/max(gold_total, 1) | |
f1 = precision * recall * 2 / max(precision + recall, 1e-6) | |
return (precision, recall, f1) | |