Spaces:
Sleeping
Sleeping
# Copyright (c) Facebook, Inc. and its affiliates. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import numpy as np | |
import json | |
class Metric(object): | |
def __init__(self, config, metric_names): | |
self.metric_names = metric_names | |
def best_metric(self, metric): | |
return metric[self.metric_names[0]] | |
def save_metrics(self, fn, metrics): | |
with open(fn, "w") as fw: | |
json.dump(fw, metrics) | |
def print_computed_metrics(self, metrics): | |
raise NotImplementedError | |
class RetrievalMetric(Metric): | |
""" | |
this is modified from `howto100m/metrics.py`. | |
History of changes: | |
refactor as a class. | |
add metric_key in __init__ | |
""" | |
def __init__(self, config, metric_names=["R1", "R5", "R10", "MR"]): | |
super().__init__(config, metric_names) | |
self.error = False # TODO(huxu): add to config to print error. | |
def compute_metrics(self, outputs, texts, **kwargs): | |
x = outputs | |
sx = np.sort(-x, axis=1) | |
d = np.diag(-x) | |
d = d[:, np.newaxis] | |
ind = sx - d | |
ind = np.where(ind == 0) | |
ind = ind[1] | |
metrics = {} | |
metrics["R1"] = float(np.sum(ind == 0)) / len(ind) | |
metrics["R5"] = float(np.sum(ind < 5)) / len(ind) | |
metrics["R10"] = float(np.sum(ind < 10)) / len(ind) | |
metrics["MR"] = np.median(ind) + 1 | |
max_idx = np.argmax(outputs, axis=1) | |
if self.error: | |
# print top-20 errors. | |
error = [] | |
for ex_idx in range(20): | |
error.append((texts[ex_idx], texts[max_idx[ex_idx]])) | |
metrics["error"] = error | |
return metrics | |
def print_computed_metrics(self, metrics): | |
r1 = metrics["R1"] | |
r5 = metrics["R5"] | |
r10 = metrics["R10"] | |
mr = metrics["MR"] | |
print( | |
"R@1: {:.4f} - R@5: {:.4f} - R@10: {:.4f} - Median R: {}".format( | |
r1, r5, r10, mr | |
) | |
) | |
if "error" in metrics: | |
print(metrics["error"]) | |
class DiDeMoMetric(Metric): | |
""" | |
History of changes: | |
python 2.x to python 3.x. | |
merge utils.py into eval to save one file. | |
reference: https://github.com/LisaAnne/LocalizingMoments/blob/master/utils/eval.py | |
Code to evaluate your results on the DiDeMo dataset. | |
""" | |
def __init__(self, config, metric_names=["rank1", "rank5", "miou"]): | |
super().__init__(config, metric_names) | |
def compute_metrics(self, outputs, targets, **kwargs): | |
assert len(outputs) == len(targets) | |
rank1, rank5, miou = self._eval_predictions(outputs, targets) | |
metrics = { | |
"rank1": rank1, | |
"rank5": rank5, | |
"miou": miou | |
} | |
return metrics | |
def print_computed_metrics(self, metrics): | |
rank1 = metrics["rank1"] | |
rank5 = metrics["rank5"] | |
miou = metrics["miou"] | |
# print("Average rank@1: %f" % rank1) | |
# print("Average rank@5: %f" % rank5) | |
# print("Average iou: %f" % miou) | |
print( | |
"Average rank@1: {:.4f} Average rank@5: {:.4f} Average iou: {:.4f}".format( | |
rank1, rank5, miou | |
) | |
) | |
def _iou(self, pred, gt): | |
intersection = max(0, min(pred[1], gt[1]) + 1 - max(pred[0], gt[0])) | |
union = max(pred[1], gt[1]) + 1 - min(pred[0], gt[0]) | |
return float(intersection)/union | |
def _rank(self, pred, gt): | |
return pred.index(tuple(gt)) + 1 | |
def _eval_predictions(self, segments, data): | |
''' | |
Inputs: | |
segments: For each item in the ground truth data, rank possible video segments given the description and video. | |
In DiDeMo, there are 21 posible moments extracted for each video so the list of video segments will be of length 21. | |
The first video segment should be the video segment that best corresponds to the text query. | |
There are 4180 sentence in the validation data, so when evaluating a model on the val dataset, | |
segments should be a list of lenght 4180, and each item in segments should be a list of length 21. | |
data: ground truth data | |
''' | |
average_ranks = [] | |
average_iou = [] | |
for s, d in zip(segments, data): | |
pred = s[0] | |
ious = [self._iou(pred, t) for t in d['times']] | |
average_iou.append(np.mean(np.sort(ious)[-3:])) | |
ranks = [self._rank(s, t) for t in d['times'] if tuple(t) in s] # if t in s] is added for s, e not in prediction. | |
average_ranks.append(np.mean(np.sort(ranks)[:3])) | |
rank1 = np.sum(np.array(average_ranks) <= 1)/float(len(average_ranks)) | |
rank5 = np.sum(np.array(average_ranks) <= 5)/float(len(average_ranks)) | |
miou = np.mean(average_iou) | |
# print("Average rank@1: %f" % rank1) | |
# print("Average rank@5: %f" % rank5) | |
# print("Average iou: %f" % miou) | |
return rank1, rank5, miou | |
class NLGMetric(Metric): | |
def __init__( | |
self, | |
config, | |
metric_names=[ | |
"Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4", | |
"METEOR", "ROUGE_L", "CIDEr" | |
] | |
): | |
super().__init__(config, metric_names) | |
# please install NLGEval from `https://github.com/Maluuba/nlg-eval` | |
from nlgeval import NLGEval | |
self.nlg = NLGEval() | |
def compute_metrics(self, outputs, targets, **kwargs): | |
return self.nlg.compute_metrics( | |
hyp_list=outputs, ref_list=targets) | |
def print_computed_metrics(self, metrics): | |
Bleu_1 = metrics["Bleu_1"] | |
Bleu_2 = metrics["Bleu_2"] | |
Bleu_3 = metrics["Bleu_3"] | |
Bleu_4 = metrics["Bleu_4"] | |
METEOR = metrics["METEOR"] | |
ROUGE_L = metrics["ROUGE_L"] | |
CIDEr = metrics["CIDEr"] | |
print( | |
"Bleu_1: {:.4f} - Bleu_2: {:.4f} - Bleu_3: {:.4f} - Bleu_4: {:.4f} - METEOR: {:.4f} - ROUGE_L: {:.4f} - CIDEr: {:.4f}".format( | |
Bleu_1, Bleu_2, Bleu_3, Bleu_4, METEOR, ROUGE_L, CIDEr | |
) | |
) | |
class QAMetric(Metric): | |
def __init__( | |
self, | |
config, | |
metric_names=["acc"] | |
): | |
super().__init__(config, metric_names) | |
def compute_metrics(self, outputs, targets, **kwargs): | |
from sklearn.metrics import accuracy_score | |
return {"acc": accuracy_score(targets, outputs)} | |
def print_computed_metrics(self, metrics): | |
print("acc: {:.4f}".format(metrics["acc"])) | |
class COINActionSegmentationMetric(Metric): | |
""" | |
COIN dataset listed 3 repos for Action Segmentation. | |
Action Sets, NeuralNetwork-Viterbi, TCFPN-ISBA. | |
The first and second are the same. | |
https://github.com/alexanderrichard/action-sets/blob/master/eval.py | |
Future reference for the third: | |
`https://github.com/Zephyr-D/TCFPN-ISBA/blob/master/utils/metrics.py` | |
""" | |
def __init__(self, config, metric_name=["frame_acc"]): | |
super().__init__(config, metric_name) | |
def compute_metrics(self, outputs, targets): | |
n_frames = 0 | |
n_errors = 0 | |
n_errors = sum(outputs != targets) | |
n_frames = len(targets) | |
return {"frame_acc": 1.0 - float(n_errors) / n_frames} | |
def print_computed_metrics(self, metrics): | |
fa = metrics["frame_acc"] | |
print("frame accuracy:", fa) | |
class CrossTaskMetric(Metric): | |
def __init__(self, config, metric_names=["recall"]): | |
super().__init__(config, metric_names) | |
def compute_metrics(self, outputs, targets, **kwargs): | |
"""refactored from line 166: | |
https://github.com/DmZhukov/CrossTask/blob/master/train.py""" | |
recalls = self._get_recalls(Y_true=targets, Y_pred=outputs) | |
results = {} | |
for task, rec in recalls.items(): | |
results[str(task)] = rec | |
avg_recall = np.mean(list(recalls.values())) | |
results["recall"] = avg_recall | |
return results | |
def print_computed_metrics(self, metrics): | |
print('Recall: {0:0.3f}'.format(metrics["recall"])) | |
for task in metrics: | |
if task != "recall": | |
print('Task {0}. Recall = {1:0.3f}'.format( | |
task, metrics[task])) | |
def _get_recalls(self, Y_true, Y_pred): | |
"""refactored from | |
https://github.com/DmZhukov/CrossTask/blob/master/train.py""" | |
step_match = {task: 0 for task in Y_true.keys()} | |
step_total = {task: 0 for task in Y_true.keys()} | |
for task, ys_true in Y_true.items(): | |
ys_pred = Y_pred[task] | |
for vid in set(ys_pred.keys()).intersection(set(ys_true.keys())): | |
y_true = ys_true[vid] | |
y_pred = ys_pred[vid] | |
step_total[task] += (y_true.sum(axis=0) > 0).sum() | |
step_match[task] += (y_true*y_pred).sum() | |
recalls = { | |
task: step_match[task] / n for task, n in step_total.items()} | |
return recalls | |
class ActionRecognitionMetric(Metric): | |
def __init__( | |
self, | |
config, | |
metric_names=["acc", "acc_splits", "r1_splits", "r5_splits", "r10_splits"] | |
): | |
super().__init__(config, metric_names) | |
def compute_metrics(self, outputs, targets, splits, **kwargs): | |
all_video_embd = outputs | |
labels = targets | |
split1, split2, split3 = splits | |
accs = [] | |
r1s = [] | |
r5s = [] | |
r10s = [] | |
for split in range(3): | |
if split == 0: | |
s = split1 | |
elif split == 1: | |
s = split2 | |
else: | |
s = split3 | |
X_pred = all_video_embd[np.where(s == 2)[0]] | |
label_test = labels[np.where(s == 2)[0]] | |
logits = X_pred | |
X_pred = np.argmax(X_pred, axis=1) | |
acc = np.sum(X_pred == label_test) / float(len(X_pred)) | |
accs.append(acc) | |
# compute recall. | |
sorted_pred = (-logits).argsort(axis=-1) | |
label_test_sp = label_test.reshape(-1, 1) | |
r1 = np.mean((sorted_pred[:, :1] == label_test_sp).sum(axis=1), axis=0) | |
r5 = np.mean((sorted_pred[:, :5] == label_test_sp).sum(axis=1), axis=0) | |
r10 = np.mean((sorted_pred[:, :10] == label_test_sp).sum(axis=1), axis=0) | |
r1s.append(r1) | |
r5s.append(r5) | |
r10s.append(r10) | |
return {"acc": accs[0], "acc_splits": accs, "r1_splits": r1s, "r5_splits": r5s, "r10_splits": r10s} | |
def print_computed_metrics(self, metrics): | |
for split, acc in enumerate(metrics["acc_splits"]): | |
print("Top 1 accuracy on split {}: {}; r1 {}; r5 {}; r10 {}".format( | |
split + 1, acc, | |
metrics["r1_splits"][split], | |
metrics["r5_splits"][split], | |
metrics["r10_splits"][split], | |
) | |
) | |