Spaces:
Sleeping
Sleeping
File size: 2,916 Bytes
afa91bc ab21cd8 afa91bc ab21cd8 afa91bc ab21cd8 afa91bc ab21cd8 afa91bc ab21cd8 afa91bc 81704c2 afa91bc 7c54d17 81704c2 7c54d17 afa91bc ab21cd8 afa91bc 1247458 ab21cd8 53ccfce ab21cd8 59d0cdb ab21cd8 afa91bc ab21cd8 afa91bc ab21cd8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
from typing import Dict, Any
import datasets
import evaluate
import numpy as np
from evaluate.utils.file_utils import add_start_docstrings
_DESCRIPTION = """
The "top-5 error" is the percentage of times that the target label does not appear among the 5 highest-probability predictions. It can be computed with:
Top-5 Error Rate = 1 - Top-5 Accuracy
or equivalently:
Top-5 Error Rate = (Number of incorrect top-5 predictions) / (Total number of cases processed)
Where:
- Top-5 Accuracy: The proportion of cases where the true label is among the model's top 5 predicted classes.
- Incorrect top-5 prediction: The true label is not in the top 5 predicted classes (ranked by probability).
"""
_KWARGS_DESCRIPTION = """
Args:
predictions (`list` of `list` of `int`): Predicted labels. Each inner list should contain the top-5 predicted class indices.
references (`list` of `int`): Ground truth labels.
Returns:
top5_error_rate (`float`): Top-5 Error Rate score. Minimum possible value is 0. Maximum possible value is 1.0.
Examples:
>>> metric = evaluate.load("top5_error_rate")
>>> results = metric.compute(
... references=[0, 1, 2],
... predictions=[[0, 1, 2, 3, 4], [1, 0, 2, 3, 4], [2, 0, 1, 3, 4]]
... )
>>> print(results)
{'top5_error_rate': 0.0}
"""
_CITATION = """
"""
@add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class Top5ErrorRate(evaluate.Metric):
def _info(self):
return evaluate.MetricInfo(
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
features=datasets.Features(
{
"predictions": datasets.Sequence(datasets.Value("float32")),
"references": datasets.Sequence(datasets.Value("int32")),
}
if self.config_name == "multilabel"
else {
"predictions": datasets.Value("float32"),
"references": datasets.Value("int32"),
}
),
reference_urls=[],
)
def _compute(
self,
*,
predictions: list[list[float]] = None,
references: list[int] = None,
**kwargs,
) -> Dict[str, Any]:
print(predictions)
print(references)
# to numpy array
outputs = np.array(predictions, dtype=np.float32)
labels = np.array(references)
print(outputs)
print(labels)
# Top-1 ACC
pred = outputs.argmax(axis=1)
acc = (pred == labels).mean()
# Top-5 Error Rate
top5_indices = outputs.argsort(axis=1)[:, -5:]
correct = (labels.reshape(-1, 1) == top5_indices).any(axis=1)
top5_error_rate = 1 - correct.mean()
return {
"accuracy": acc,
"top5_error_rate": top5_error_rate
}
|