File size: 6,980 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
"""
Common code for all metrics.
"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from itertools import combinations
import numpy as np
from ..utils import check_array, check_consistent_length
from ..utils.multiclass import type_of_target
def _average_binary_score(binary_metric, y_true, y_score, average, sample_weight=None):
"""Average a binary metric for multilabel classification.
Parameters
----------
y_true : array, shape = [n_samples] or [n_samples, n_classes]
True binary labels in binary label indicators.
y_score : array, shape = [n_samples] or [n_samples, n_classes]
Target scores, can either be probability estimates of the positive
class, confidence values, or binary decisions.
average : {None, 'micro', 'macro', 'samples', 'weighted'}, default='macro'
If ``None``, the scores for each class are returned. Otherwise,
this determines the type of averaging performed on the data:
``'micro'``:
Calculate metrics globally by considering each element of the label
indicator matrix as a label.
``'macro'``:
Calculate metrics for each label, and find their unweighted
mean. This does not take label imbalance into account.
``'weighted'``:
Calculate metrics for each label, and find their average, weighted
by support (the number of true instances for each label).
``'samples'``:
Calculate metrics for each instance, and find their average.
Will be ignored when ``y_true`` is binary.
sample_weight : array-like of shape (n_samples,), default=None
Sample weights.
binary_metric : callable, returns shape [n_classes]
The binary metric function to use.
Returns
-------
score : float or array of shape [n_classes]
If not ``None``, average the score, else return the score for each
classes.
"""
average_options = (None, "micro", "macro", "weighted", "samples")
if average not in average_options:
raise ValueError("average has to be one of {0}".format(average_options))
y_type = type_of_target(y_true)
if y_type not in ("binary", "multilabel-indicator"):
raise ValueError("{0} format is not supported".format(y_type))
if y_type == "binary":
return binary_metric(y_true, y_score, sample_weight=sample_weight)
check_consistent_length(y_true, y_score, sample_weight)
y_true = check_array(y_true)
y_score = check_array(y_score)
not_average_axis = 1
score_weight = sample_weight
average_weight = None
if average == "micro":
if score_weight is not None:
score_weight = np.repeat(score_weight, y_true.shape[1])
y_true = y_true.ravel()
y_score = y_score.ravel()
elif average == "weighted":
if score_weight is not None:
average_weight = np.sum(
np.multiply(y_true, np.reshape(score_weight, (-1, 1))), axis=0
)
else:
average_weight = np.sum(y_true, axis=0)
if np.isclose(average_weight.sum(), 0.0):
return 0
elif average == "samples":
# swap average_weight <-> score_weight
average_weight = score_weight
score_weight = None
not_average_axis = 0
if y_true.ndim == 1:
y_true = y_true.reshape((-1, 1))
if y_score.ndim == 1:
y_score = y_score.reshape((-1, 1))
n_classes = y_score.shape[not_average_axis]
score = np.zeros((n_classes,))
for c in range(n_classes):
y_true_c = y_true.take([c], axis=not_average_axis).ravel()
y_score_c = y_score.take([c], axis=not_average_axis).ravel()
score[c] = binary_metric(y_true_c, y_score_c, sample_weight=score_weight)
# Average the results
if average is not None:
if average_weight is not None:
# Scores with 0 weights are forced to be 0, preventing the average
# score from being affected by 0-weighted NaN elements.
average_weight = np.asarray(average_weight)
score[average_weight == 0] = 0
return np.average(score, weights=average_weight)
else:
return score
def _average_multiclass_ovo_score(binary_metric, y_true, y_score, average="macro"):
"""Average one-versus-one scores for multiclass classification.
Uses the binary metric for one-vs-one multiclass classification,
where the score is computed according to the Hand & Till (2001) algorithm.
Parameters
----------
binary_metric : callable
The binary metric function to use that accepts the following as input:
y_true_target : array, shape = [n_samples_target]
Some sub-array of y_true for a pair of classes designated
positive and negative in the one-vs-one scheme.
y_score_target : array, shape = [n_samples_target]
Scores corresponding to the probability estimates
of a sample belonging to the designated positive class label
y_true : array-like of shape (n_samples,)
True multiclass labels.
y_score : array-like of shape (n_samples, n_classes)
Target scores corresponding to probability estimates of a sample
belonging to a particular class.
average : {'macro', 'weighted'}, default='macro'
Determines the type of averaging performed on the pairwise binary
metric scores:
``'macro'``:
Calculate metrics for each label, and find their unweighted
mean. This does not take label imbalance into account. Classes
are assumed to be uniformly distributed.
``'weighted'``:
Calculate metrics for each label, taking into account the
prevalence of the classes.
Returns
-------
score : float
Average of the pairwise binary metric scores.
"""
check_consistent_length(y_true, y_score)
y_true_unique = np.unique(y_true)
n_classes = y_true_unique.shape[0]
n_pairs = n_classes * (n_classes - 1) // 2
pair_scores = np.empty(n_pairs)
is_weighted = average == "weighted"
prevalence = np.empty(n_pairs) if is_weighted else None
# Compute scores treating a as positive class and b as negative class,
# then b as positive class and a as negative class
for ix, (a, b) in enumerate(combinations(y_true_unique, 2)):
a_mask = y_true == a
b_mask = y_true == b
ab_mask = np.logical_or(a_mask, b_mask)
if is_weighted:
prevalence[ix] = np.average(ab_mask)
a_true = a_mask[ab_mask]
b_true = b_mask[ab_mask]
a_true_score = binary_metric(a_true, y_score[ab_mask, a])
b_true_score = binary_metric(b_true, y_score[ab_mask, b])
pair_scores[ix] = (a_true_score + b_true_score) / 2
return np.average(pair_scores, weights=prevalence)
|