|
"""Metrics to assess performance on regression task. |
|
|
|
Functions named as ``*_score`` return a scalar value to maximize: the higher |
|
the better. |
|
|
|
Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize: |
|
the lower the better. |
|
""" |
|
|
|
|
|
|
|
|
|
import warnings |
|
from numbers import Real |
|
|
|
import numpy as np |
|
from scipy.special import xlogy |
|
|
|
from ..exceptions import UndefinedMetricWarning |
|
from ..utils._array_api import ( |
|
_average, |
|
_find_matching_floating_dtype, |
|
get_namespace, |
|
get_namespace_and_device, |
|
size, |
|
) |
|
from ..utils._param_validation import Interval, StrOptions, validate_params |
|
from ..utils.stats import _weighted_percentile |
|
from ..utils.validation import ( |
|
_check_sample_weight, |
|
_num_samples, |
|
check_array, |
|
check_consistent_length, |
|
column_or_1d, |
|
) |
|
|
|
__ALL__ = [ |
|
"max_error", |
|
"mean_absolute_error", |
|
"mean_squared_error", |
|
"mean_squared_log_error", |
|
"median_absolute_error", |
|
"mean_absolute_percentage_error", |
|
"mean_pinball_loss", |
|
"r2_score", |
|
"root_mean_squared_log_error", |
|
"root_mean_squared_error", |
|
"explained_variance_score", |
|
"mean_tweedie_deviance", |
|
"mean_poisson_deviance", |
|
"mean_gamma_deviance", |
|
"d2_tweedie_score", |
|
"d2_pinball_score", |
|
"d2_absolute_error_score", |
|
] |
|
|
|
|
|
def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None): |
|
"""Check that y_true and y_pred belong to the same regression task. |
|
|
|
To reduce redundancy when calling `_find_matching_floating_dtype`, |
|
please use `_check_reg_targets_with_floating_dtype` instead. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
multioutput : array-like or string in ['raw_values', uniform_average', |
|
'variance_weighted'] or None |
|
None is accepted due to backward compatibility of r2_score(). |
|
|
|
dtype : str or list, default="numeric" |
|
the dtype argument passed to check_array. |
|
|
|
xp : module, default=None |
|
Precomputed array namespace module. When passed, typically from a caller |
|
that has already performed inspection of its own inputs, skips array |
|
namespace inspection. |
|
|
|
Returns |
|
------- |
|
type_true : one of {'continuous', continuous-multioutput'} |
|
The type of the true target data, as output by |
|
'utils.multiclass.type_of_target'. |
|
|
|
y_true : array-like of shape (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
multioutput : array-like of shape (n_outputs) or string in ['raw_values', |
|
uniform_average', 'variance_weighted'] or None |
|
Custom output weights if ``multioutput`` is array-like or |
|
just the corresponding argument if ``multioutput`` is a |
|
correct keyword. |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred, multioutput, xp=xp) |
|
|
|
check_consistent_length(y_true, y_pred) |
|
y_true = check_array(y_true, ensure_2d=False, dtype=dtype) |
|
y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype) |
|
|
|
if y_true.ndim == 1: |
|
y_true = xp.reshape(y_true, (-1, 1)) |
|
|
|
if y_pred.ndim == 1: |
|
y_pred = xp.reshape(y_pred, (-1, 1)) |
|
|
|
if y_true.shape[1] != y_pred.shape[1]: |
|
raise ValueError( |
|
"y_true and y_pred have different number of output ({0}!={1})".format( |
|
y_true.shape[1], y_pred.shape[1] |
|
) |
|
) |
|
|
|
n_outputs = y_true.shape[1] |
|
allowed_multioutput_str = ("raw_values", "uniform_average", "variance_weighted") |
|
if isinstance(multioutput, str): |
|
if multioutput not in allowed_multioutput_str: |
|
raise ValueError( |
|
"Allowed 'multioutput' string values are {}. " |
|
"You provided multioutput={!r}".format( |
|
allowed_multioutput_str, multioutput |
|
) |
|
) |
|
elif multioutput is not None: |
|
multioutput = check_array(multioutput, ensure_2d=False) |
|
if n_outputs == 1: |
|
raise ValueError("Custom weights are useful only in multi-output cases.") |
|
elif n_outputs != multioutput.shape[0]: |
|
raise ValueError( |
|
"There must be equally many custom weights " |
|
f"({multioutput.shape[0]}) as outputs ({n_outputs})." |
|
) |
|
y_type = "continuous" if n_outputs == 1 else "continuous-multioutput" |
|
|
|
return y_type, y_true, y_pred, multioutput |
|
|
|
|
|
def _check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput, xp=None |
|
): |
|
"""Ensures that y_true, y_pred, and sample_weight correspond to the same |
|
regression task. |
|
|
|
Extends `_check_reg_targets` by automatically selecting a suitable floating-point |
|
data type for inputs using `_find_matching_floating_dtype`. |
|
|
|
Use this private method only when converting inputs to array API-compatibles. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,) |
|
|
|
multioutput : array-like or string in ['raw_values', 'uniform_average', \ |
|
'variance_weighted'] or None |
|
None is accepted due to backward compatibility of r2_score(). |
|
|
|
xp : module, default=None |
|
Precomputed array namespace module. When passed, typically from a caller |
|
that has already performed inspection of its own inputs, skips array |
|
namespace inspection. |
|
|
|
Returns |
|
------- |
|
type_true : one of {'continuous', 'continuous-multioutput'} |
|
The type of the true target data, as output by |
|
'utils.multiclass.type_of_target'. |
|
|
|
y_true : array-like of shape (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : array-like of shape (n_outputs) or string in ['raw_values', \ |
|
'uniform_average', 'variance_weighted'] or None |
|
Custom output weights if ``multioutput`` is array-like or |
|
just the corresponding argument if ``multioutput`` is a |
|
correct keyword. |
|
""" |
|
dtype_name = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp) |
|
|
|
y_type, y_true, y_pred, multioutput = _check_reg_targets( |
|
y_true, y_pred, multioutput, dtype=dtype_name, xp=xp |
|
) |
|
|
|
|
|
|
|
if sample_weight is not None: |
|
sample_weight = xp.asarray(sample_weight, dtype=dtype_name) |
|
|
|
return y_type, y_true, y_pred, sample_weight, multioutput |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def mean_absolute_error( |
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" |
|
): |
|
"""Mean absolute error regression loss. |
|
|
|
Read more in the :ref:`User Guide <mean_absolute_error>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average errors. |
|
|
|
'raw_values' : |
|
Returns a full set of errors in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Errors of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
loss : float or array of floats |
|
If multioutput is 'raw_values', then mean absolute error is returned |
|
for each output separately. |
|
If multioutput is 'uniform_average' or an ndarray of weights, then the |
|
weighted average of all output errors is returned. |
|
|
|
MAE output is non-negative floating point. The best value is 0.0. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import mean_absolute_error |
|
>>> y_true = [3, -0.5, 2, 7] |
|
>>> y_pred = [2.5, 0.0, 2, 8] |
|
>>> mean_absolute_error(y_true, y_pred) |
|
0.5 |
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]] |
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]] |
|
>>> mean_absolute_error(y_true, y_pred) |
|
0.75 |
|
>>> mean_absolute_error(y_true, y_pred, multioutput='raw_values') |
|
array([0.5, 1. ]) |
|
>>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) |
|
0.85... |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput) |
|
|
|
_, y_true, y_pred, sample_weight, multioutput = ( |
|
_check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput, xp=xp |
|
) |
|
) |
|
|
|
check_consistent_length(y_true, y_pred, sample_weight) |
|
|
|
output_errors = _average( |
|
xp.abs(y_pred - y_true), weights=sample_weight, axis=0, xp=xp |
|
) |
|
if isinstance(multioutput, str): |
|
if multioutput == "raw_values": |
|
return output_errors |
|
elif multioutput == "uniform_average": |
|
|
|
multioutput = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
mean_absolute_error = _average(output_errors, weights=multioutput) |
|
|
|
return float(mean_absolute_error) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"alpha": [Interval(Real, 0, 1, closed="both")], |
|
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def mean_pinball_loss( |
|
y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average" |
|
): |
|
"""Pinball loss for quantile regression. |
|
|
|
Read more in the :ref:`User Guide <pinball_loss>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
alpha : float, slope of the pinball loss, default=0.5, |
|
This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`, |
|
`alpha=0.95` is minimized by estimators of the 95th percentile. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average errors. |
|
|
|
'raw_values' : |
|
Returns a full set of errors in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Errors of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
loss : float or ndarray of floats |
|
If multioutput is 'raw_values', then mean absolute error is returned |
|
for each output separately. |
|
If multioutput is 'uniform_average' or an ndarray of weights, then the |
|
weighted average of all output errors is returned. |
|
|
|
The pinball loss output is a non-negative floating point. The best |
|
value is 0.0. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import mean_pinball_loss |
|
>>> y_true = [1, 2, 3] |
|
>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1) |
|
np.float64(0.03...) |
|
>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1) |
|
np.float64(0.3...) |
|
>>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9) |
|
np.float64(0.3...) |
|
>>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9) |
|
np.float64(0.03...) |
|
>>> mean_pinball_loss(y_true, y_true, alpha=0.1) |
|
np.float64(0.0) |
|
>>> mean_pinball_loss(y_true, y_true, alpha=0.9) |
|
np.float64(0.0) |
|
""" |
|
y_type, y_true, y_pred, multioutput = _check_reg_targets( |
|
y_true, y_pred, multioutput |
|
) |
|
check_consistent_length(y_true, y_pred, sample_weight) |
|
diff = y_true - y_pred |
|
sign = (diff >= 0).astype(diff.dtype) |
|
loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff |
|
output_errors = np.average(loss, weights=sample_weight, axis=0) |
|
|
|
if isinstance(multioutput, str) and multioutput == "raw_values": |
|
return output_errors |
|
|
|
if isinstance(multioutput, str) and multioutput == "uniform_average": |
|
|
|
multioutput = None |
|
|
|
return np.average(output_errors, weights=multioutput) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def mean_absolute_percentage_error( |
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" |
|
): |
|
"""Mean absolute percentage error (MAPE) regression loss. |
|
|
|
Note that we are not using the common "percentage" definition: the percentage |
|
in the range [0, 100] is converted to a relative value in the range [0, 1] |
|
by dividing by 100. Thus, an error of 200% corresponds to a relative error of 2. |
|
|
|
Read more in the :ref:`User Guide <mean_absolute_percentage_error>`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like |
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average errors. |
|
If input is list then the shape must be (n_outputs,). |
|
|
|
'raw_values' : |
|
Returns a full set of errors in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Errors of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
loss : float or ndarray of floats |
|
If multioutput is 'raw_values', then mean absolute percentage error |
|
is returned for each output separately. |
|
If multioutput is 'uniform_average' or an ndarray of weights, then the |
|
weighted average of all output errors is returned. |
|
|
|
MAPE output is non-negative floating point. The best value is 0.0. |
|
But note that bad predictions can lead to arbitrarily large |
|
MAPE values, especially if some `y_true` values are very close to zero. |
|
Note that we return a large value instead of `inf` when `y_true` is zero. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import mean_absolute_percentage_error |
|
>>> y_true = [3, -0.5, 2, 7] |
|
>>> y_pred = [2.5, 0.0, 2, 8] |
|
>>> mean_absolute_percentage_error(y_true, y_pred) |
|
0.3273... |
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]] |
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]] |
|
>>> mean_absolute_percentage_error(y_true, y_pred) |
|
0.5515... |
|
>>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7]) |
|
0.6198... |
|
>>> # the value when some element of the y_true is zero is arbitrarily high because |
|
>>> # of the division by epsilon |
|
>>> y_true = [1., 0., 2.4, 7.] |
|
>>> y_pred = [1.2, 0.1, 2.4, 8.] |
|
>>> mean_absolute_percentage_error(y_true, y_pred) |
|
112589990684262.48 |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput) |
|
_, y_true, y_pred, sample_weight, multioutput = ( |
|
_check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput, xp=xp |
|
) |
|
) |
|
check_consistent_length(y_true, y_pred, sample_weight) |
|
epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=y_true.dtype) |
|
y_true_abs = xp.abs(y_true) |
|
mape = xp.abs(y_pred - y_true) / xp.maximum(y_true_abs, epsilon) |
|
output_errors = _average(mape, weights=sample_weight, axis=0) |
|
if isinstance(multioutput, str): |
|
if multioutput == "raw_values": |
|
return output_errors |
|
elif multioutput == "uniform_average": |
|
|
|
multioutput = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
mean_absolute_percentage_error = _average(output_errors, weights=multioutput) |
|
|
|
return float(mean_absolute_percentage_error) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def mean_squared_error( |
|
y_true, |
|
y_pred, |
|
*, |
|
sample_weight=None, |
|
multioutput="uniform_average", |
|
): |
|
"""Mean squared error regression loss. |
|
|
|
Read more in the :ref:`User Guide <mean_squared_error>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average errors. |
|
|
|
'raw_values' : |
|
Returns a full set of errors in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Errors of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
loss : float or array of floats |
|
A non-negative floating point value (the best value is 0.0), or an |
|
array of floating point values, one for each individual target. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import mean_squared_error |
|
>>> y_true = [3, -0.5, 2, 7] |
|
>>> y_pred = [2.5, 0.0, 2, 8] |
|
>>> mean_squared_error(y_true, y_pred) |
|
0.375 |
|
>>> y_true = [[0.5, 1],[-1, 1],[7, -6]] |
|
>>> y_pred = [[0, 2],[-1, 2],[8, -5]] |
|
>>> mean_squared_error(y_true, y_pred) |
|
0.708... |
|
>>> mean_squared_error(y_true, y_pred, multioutput='raw_values') |
|
array([0.41666667, 1. ]) |
|
>>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7]) |
|
0.825... |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput) |
|
_, y_true, y_pred, sample_weight, multioutput = ( |
|
_check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput, xp=xp |
|
) |
|
) |
|
check_consistent_length(y_true, y_pred, sample_weight) |
|
output_errors = _average((y_true - y_pred) ** 2, axis=0, weights=sample_weight) |
|
|
|
if isinstance(multioutput, str): |
|
if multioutput == "raw_values": |
|
return output_errors |
|
elif multioutput == "uniform_average": |
|
|
|
multioutput = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
mean_squared_error = _average(output_errors, weights=multioutput) |
|
|
|
return float(mean_squared_error) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def root_mean_squared_error( |
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" |
|
): |
|
"""Root mean squared error regression loss. |
|
|
|
Read more in the :ref:`User Guide <mean_squared_error>`. |
|
|
|
.. versionadded:: 1.4 |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average errors. |
|
|
|
'raw_values' : |
|
Returns a full set of errors in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Errors of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
loss : float or ndarray of floats |
|
A non-negative floating point value (the best value is 0.0), or an |
|
array of floating point values, one for each individual target. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import root_mean_squared_error |
|
>>> y_true = [3, -0.5, 2, 7] |
|
>>> y_pred = [2.5, 0.0, 2, 8] |
|
>>> root_mean_squared_error(y_true, y_pred) |
|
0.612... |
|
>>> y_true = [[0.5, 1],[-1, 1],[7, -6]] |
|
>>> y_pred = [[0, 2],[-1, 2],[8, -5]] |
|
>>> root_mean_squared_error(y_true, y_pred) |
|
0.822... |
|
""" |
|
|
|
xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput) |
|
|
|
output_errors = xp.sqrt( |
|
mean_squared_error( |
|
y_true, y_pred, sample_weight=sample_weight, multioutput="raw_values" |
|
) |
|
) |
|
|
|
if isinstance(multioutput, str): |
|
if multioutput == "raw_values": |
|
return output_errors |
|
elif multioutput == "uniform_average": |
|
|
|
multioutput = None |
|
|
|
|
|
|
|
|
|
|
|
|
|
root_mean_squared_error = _average(output_errors, weights=multioutput) |
|
|
|
return float(root_mean_squared_error) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def mean_squared_log_error( |
|
y_true, |
|
y_pred, |
|
*, |
|
sample_weight=None, |
|
multioutput="uniform_average", |
|
): |
|
"""Mean squared logarithmic error regression loss. |
|
|
|
Read more in the :ref:`User Guide <mean_squared_log_error>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
|
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average errors. |
|
|
|
'raw_values' : |
|
Returns a full set of errors when the input is of multioutput |
|
format. |
|
|
|
'uniform_average' : |
|
Errors of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
loss : float or ndarray of floats |
|
A non-negative floating point value (the best value is 0.0), or an |
|
array of floating point values, one for each individual target. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import mean_squared_log_error |
|
>>> y_true = [3, 5, 2.5, 7] |
|
>>> y_pred = [2.5, 5, 4, 8] |
|
>>> mean_squared_log_error(y_true, y_pred) |
|
0.039... |
|
>>> y_true = [[0.5, 1], [1, 2], [7, 6]] |
|
>>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]] |
|
>>> mean_squared_log_error(y_true, y_pred) |
|
0.044... |
|
>>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values') |
|
array([0.00462428, 0.08377444]) |
|
>>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7]) |
|
0.060... |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred) |
|
|
|
_, y_true, y_pred, _, _ = _check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput, xp=xp |
|
) |
|
|
|
if xp.any(y_true <= -1) or xp.any(y_pred <= -1): |
|
raise ValueError( |
|
"Mean Squared Logarithmic Error cannot be used when " |
|
"targets contain values less than or equal to -1." |
|
) |
|
|
|
return mean_squared_error( |
|
xp.log1p(y_true), |
|
xp.log1p(y_pred), |
|
sample_weight=sample_weight, |
|
multioutput=multioutput, |
|
) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def root_mean_squared_log_error( |
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" |
|
): |
|
"""Root mean squared logarithmic error regression loss. |
|
|
|
Read more in the :ref:`User Guide <mean_squared_log_error>`. |
|
|
|
.. versionadded:: 1.4 |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
|
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average errors. |
|
|
|
'raw_values' : |
|
Returns a full set of errors when the input is of multioutput |
|
format. |
|
|
|
'uniform_average' : |
|
Errors of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
loss : float or ndarray of floats |
|
A non-negative floating point value (the best value is 0.0), or an |
|
array of floating point values, one for each individual target. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import root_mean_squared_log_error |
|
>>> y_true = [3, 5, 2.5, 7] |
|
>>> y_pred = [2.5, 5, 4, 8] |
|
>>> root_mean_squared_log_error(y_true, y_pred) |
|
0.199... |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred) |
|
|
|
_, y_true, y_pred, _, _ = _check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput, xp=xp |
|
) |
|
|
|
if xp.any(y_true <= -1) or xp.any(y_pred <= -1): |
|
raise ValueError( |
|
"Root Mean Squared Logarithmic Error cannot be used when " |
|
"targets contain values less than or equal to -1." |
|
) |
|
|
|
return root_mean_squared_error( |
|
xp.log1p(y_true), |
|
xp.log1p(y_pred), |
|
sample_weight=sample_weight, |
|
multioutput=multioutput, |
|
) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"], |
|
"sample_weight": ["array-like", None], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def median_absolute_error( |
|
y_true, y_pred, *, multioutput="uniform_average", sample_weight=None |
|
): |
|
"""Median absolute error regression loss. |
|
|
|
Median absolute error output is non-negative floating point. The best value |
|
is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
Defines aggregating of multiple output values. Array-like value defines |
|
weights used to average errors. |
|
|
|
'raw_values' : |
|
Returns a full set of errors in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Errors of all outputs are averaged with uniform weight. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
Returns |
|
------- |
|
loss : float or ndarray of floats |
|
If multioutput is 'raw_values', then mean absolute error is returned |
|
for each output separately. |
|
If multioutput is 'uniform_average' or an ndarray of weights, then the |
|
weighted average of all output errors is returned. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import median_absolute_error |
|
>>> y_true = [3, -0.5, 2, 7] |
|
>>> y_pred = [2.5, 0.0, 2, 8] |
|
>>> median_absolute_error(y_true, y_pred) |
|
np.float64(0.5) |
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]] |
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]] |
|
>>> median_absolute_error(y_true, y_pred) |
|
np.float64(0.75) |
|
>>> median_absolute_error(y_true, y_pred, multioutput='raw_values') |
|
array([0.5, 1. ]) |
|
>>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7]) |
|
np.float64(0.85) |
|
""" |
|
y_type, y_true, y_pred, multioutput = _check_reg_targets( |
|
y_true, y_pred, multioutput |
|
) |
|
if sample_weight is None: |
|
output_errors = np.median(np.abs(y_pred - y_true), axis=0) |
|
else: |
|
sample_weight = _check_sample_weight(sample_weight, y_pred) |
|
output_errors = _weighted_percentile( |
|
np.abs(y_pred - y_true), sample_weight=sample_weight |
|
) |
|
if isinstance(multioutput, str): |
|
if multioutput == "raw_values": |
|
return output_errors |
|
elif multioutput == "uniform_average": |
|
|
|
multioutput = None |
|
|
|
return np.average(output_errors, weights=multioutput) |
|
|
|
|
|
def _assemble_r2_explained_variance( |
|
numerator, denominator, n_outputs, multioutput, force_finite, xp, device |
|
): |
|
"""Common part used by explained variance score and :math:`R^2` score.""" |
|
dtype = numerator.dtype |
|
|
|
nonzero_denominator = denominator != 0 |
|
|
|
if not force_finite: |
|
|
|
output_scores = 1 - (numerator / denominator) |
|
else: |
|
nonzero_numerator = numerator != 0 |
|
|
|
|
|
output_scores = xp.ones([n_outputs], device=device, dtype=dtype) |
|
|
|
valid_score = nonzero_denominator & nonzero_numerator |
|
|
|
output_scores[valid_score] = 1 - ( |
|
numerator[valid_score] / denominator[valid_score] |
|
) |
|
|
|
|
|
|
|
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0 |
|
|
|
if isinstance(multioutput, str): |
|
if multioutput == "raw_values": |
|
|
|
return output_scores |
|
elif multioutput == "uniform_average": |
|
|
|
avg_weights = None |
|
elif multioutput == "variance_weighted": |
|
avg_weights = denominator |
|
if not xp.any(nonzero_denominator): |
|
|
|
|
|
|
|
avg_weights = None |
|
else: |
|
avg_weights = multioutput |
|
|
|
result = _average(output_scores, weights=avg_weights) |
|
if size(result) == 1: |
|
return float(result) |
|
return result |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [ |
|
StrOptions({"raw_values", "uniform_average", "variance_weighted"}), |
|
"array-like", |
|
], |
|
"force_finite": ["boolean"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def explained_variance_score( |
|
y_true, |
|
y_pred, |
|
*, |
|
sample_weight=None, |
|
multioutput="uniform_average", |
|
force_finite=True, |
|
): |
|
"""Explained variance regression score function. |
|
|
|
Best possible score is 1.0, lower values are worse. |
|
|
|
In the particular case when ``y_true`` is constant, the explained variance |
|
score is not finite: it is either ``NaN`` (perfect predictions) or |
|
``-Inf`` (imperfect predictions). To prevent such non-finite numbers to |
|
pollute higher-level experiments such as a grid search cross-validation, |
|
by default these cases are replaced with 1.0 (perfect predictions) or 0.0 |
|
(imperfect predictions) respectively. If ``force_finite`` |
|
is set to ``False``, this score falls back on the original :math:`R^2` |
|
definition. |
|
|
|
.. note:: |
|
The Explained Variance score is similar to the |
|
:func:`R^2 score <r2_score>`, with the notable difference that it |
|
does not account for systematic offsets in the prediction. Most often |
|
the :func:`R^2 score <r2_score>` should be preferred. |
|
|
|
Read more in the :ref:`User Guide <explained_variance_score>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or \ |
|
array-like of shape (n_outputs,), default='uniform_average' |
|
Defines aggregating of multiple output scores. |
|
Array-like value defines weights used to average scores. |
|
|
|
'raw_values' : |
|
Returns a full set of scores in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Scores of all outputs are averaged with uniform weight. |
|
|
|
'variance_weighted' : |
|
Scores of all outputs are averaged, weighted by the variances |
|
of each individual output. |
|
|
|
force_finite : bool, default=True |
|
Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant |
|
data should be replaced with real numbers (``1.0`` if prediction is |
|
perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting |
|
for hyperparameters' search procedures (e.g. grid search |
|
cross-validation). |
|
|
|
.. versionadded:: 1.1 |
|
|
|
Returns |
|
------- |
|
score : float or ndarray of floats |
|
The explained variance or ndarray if 'multioutput' is 'raw_values'. |
|
|
|
See Also |
|
-------- |
|
r2_score : |
|
Similar metric, but accounting for systematic offsets in |
|
prediction. |
|
|
|
Notes |
|
----- |
|
This is not a symmetric function. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import explained_variance_score |
|
>>> y_true = [3, -0.5, 2, 7] |
|
>>> y_pred = [2.5, 0.0, 2, 8] |
|
>>> explained_variance_score(y_true, y_pred) |
|
0.957... |
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]] |
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]] |
|
>>> explained_variance_score(y_true, y_pred, multioutput='uniform_average') |
|
0.983... |
|
>>> y_true = [-2, -2, -2] |
|
>>> y_pred = [-2, -2, -2] |
|
>>> explained_variance_score(y_true, y_pred) |
|
1.0 |
|
>>> explained_variance_score(y_true, y_pred, force_finite=False) |
|
nan |
|
>>> y_true = [-2, -2, -2] |
|
>>> y_pred = [-2, -2, -2 + 1e-8] |
|
>>> explained_variance_score(y_true, y_pred) |
|
0.0 |
|
>>> explained_variance_score(y_true, y_pred, force_finite=False) |
|
-inf |
|
""" |
|
y_type, y_true, y_pred, multioutput = _check_reg_targets( |
|
y_true, y_pred, multioutput |
|
) |
|
check_consistent_length(y_true, y_pred, sample_weight) |
|
|
|
y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0) |
|
numerator = np.average( |
|
(y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0 |
|
) |
|
|
|
y_true_avg = np.average(y_true, weights=sample_weight, axis=0) |
|
denominator = np.average((y_true - y_true_avg) ** 2, weights=sample_weight, axis=0) |
|
|
|
return _assemble_r2_explained_variance( |
|
numerator=numerator, |
|
denominator=denominator, |
|
n_outputs=y_true.shape[1], |
|
multioutput=multioutput, |
|
force_finite=force_finite, |
|
xp=get_namespace(y_true)[0], |
|
|
|
device=None, |
|
) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [ |
|
StrOptions({"raw_values", "uniform_average", "variance_weighted"}), |
|
"array-like", |
|
None, |
|
], |
|
"force_finite": ["boolean"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def r2_score( |
|
y_true, |
|
y_pred, |
|
*, |
|
sample_weight=None, |
|
multioutput="uniform_average", |
|
force_finite=True, |
|
): |
|
""":math:`R^2` (coefficient of determination) regression score function. |
|
|
|
Best possible score is 1.0 and it can be negative (because the |
|
model can be arbitrarily worse). In the general case when the true y is |
|
non-constant, a constant model that always predicts the average y |
|
disregarding the input features would get a :math:`R^2` score of 0.0. |
|
|
|
In the particular case when ``y_true`` is constant, the :math:`R^2` score |
|
is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf`` |
|
(imperfect predictions). To prevent such non-finite numbers to pollute |
|
higher-level experiments such as a grid search cross-validation, by default |
|
these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect |
|
predictions) respectively. You can set ``force_finite`` to ``False`` to |
|
prevent this fix from happening. |
|
|
|
Note: when the prediction residuals have zero mean, the :math:`R^2` score |
|
is identical to the |
|
:func:`Explained Variance score <explained_variance_score>`. |
|
|
|
Read more in the :ref:`User Guide <r2_score>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \ |
|
array-like of shape (n_outputs,) or None, default='uniform_average' |
|
|
|
Defines aggregating of multiple output scores. |
|
Array-like value defines weights used to average scores. |
|
Default is "uniform_average". |
|
|
|
'raw_values' : |
|
Returns a full set of scores in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Scores of all outputs are averaged with uniform weight. |
|
|
|
'variance_weighted' : |
|
Scores of all outputs are averaged, weighted by the variances |
|
of each individual output. |
|
|
|
.. versionchanged:: 0.19 |
|
Default value of multioutput is 'uniform_average'. |
|
|
|
force_finite : bool, default=True |
|
Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant |
|
data should be replaced with real numbers (``1.0`` if prediction is |
|
perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting |
|
for hyperparameters' search procedures (e.g. grid search |
|
cross-validation). |
|
|
|
.. versionadded:: 1.1 |
|
|
|
Returns |
|
------- |
|
z : float or ndarray of floats |
|
The :math:`R^2` score or ndarray of scores if 'multioutput' is |
|
'raw_values'. |
|
|
|
Notes |
|
----- |
|
This is not a symmetric function. |
|
|
|
Unlike most other scores, :math:`R^2` score may be negative (it need not |
|
actually be the square of a quantity R). |
|
|
|
This metric is not well-defined for single samples and will return a NaN |
|
value if n_samples is less than two. |
|
|
|
References |
|
---------- |
|
.. [1] `Wikipedia entry on the Coefficient of determination |
|
<https://en.wikipedia.org/wiki/Coefficient_of_determination>`_ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import r2_score |
|
>>> y_true = [3, -0.5, 2, 7] |
|
>>> y_pred = [2.5, 0.0, 2, 8] |
|
>>> r2_score(y_true, y_pred) |
|
0.948... |
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]] |
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]] |
|
>>> r2_score(y_true, y_pred, |
|
... multioutput='variance_weighted') |
|
0.938... |
|
>>> y_true = [1, 2, 3] |
|
>>> y_pred = [1, 2, 3] |
|
>>> r2_score(y_true, y_pred) |
|
1.0 |
|
>>> y_true = [1, 2, 3] |
|
>>> y_pred = [2, 2, 2] |
|
>>> r2_score(y_true, y_pred) |
|
0.0 |
|
>>> y_true = [1, 2, 3] |
|
>>> y_pred = [3, 2, 1] |
|
>>> r2_score(y_true, y_pred) |
|
-3.0 |
|
>>> y_true = [-2, -2, -2] |
|
>>> y_pred = [-2, -2, -2] |
|
>>> r2_score(y_true, y_pred) |
|
1.0 |
|
>>> r2_score(y_true, y_pred, force_finite=False) |
|
nan |
|
>>> y_true = [-2, -2, -2] |
|
>>> y_pred = [-2, -2, -2 + 1e-8] |
|
>>> r2_score(y_true, y_pred) |
|
0.0 |
|
>>> r2_score(y_true, y_pred, force_finite=False) |
|
-inf |
|
""" |
|
xp, _, device_ = get_namespace_and_device( |
|
y_true, y_pred, sample_weight, multioutput |
|
) |
|
|
|
_, y_true, y_pred, sample_weight, multioutput = ( |
|
_check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput, xp=xp |
|
) |
|
) |
|
|
|
check_consistent_length(y_true, y_pred, sample_weight) |
|
|
|
if _num_samples(y_pred) < 2: |
|
msg = "R^2 score is not well-defined with less than two samples." |
|
warnings.warn(msg, UndefinedMetricWarning) |
|
return float("nan") |
|
|
|
if sample_weight is not None: |
|
sample_weight = column_or_1d(sample_weight) |
|
weight = sample_weight[:, None] |
|
else: |
|
weight = 1.0 |
|
|
|
numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0) |
|
denominator = xp.sum( |
|
weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)) ** 2, |
|
axis=0, |
|
) |
|
|
|
return _assemble_r2_explained_variance( |
|
numerator=numerator, |
|
denominator=denominator, |
|
n_outputs=y_true.shape[1], |
|
multioutput=multioutput, |
|
force_finite=force_finite, |
|
xp=xp, |
|
device=device_, |
|
) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def max_error(y_true, y_pred): |
|
""" |
|
The max_error metric calculates the maximum residual error. |
|
|
|
Read more in the :ref:`User Guide <max_error>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) |
|
Estimated target values. |
|
|
|
Returns |
|
------- |
|
max_error : float |
|
A positive floating point value (the best value is 0.0). |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import max_error |
|
>>> y_true = [3, 2, 7, 1] |
|
>>> y_pred = [4, 2, 7, 1] |
|
>>> max_error(y_true, y_pred) |
|
np.int64(1) |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred) |
|
y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None, xp=xp) |
|
if y_type == "continuous-multioutput": |
|
raise ValueError("Multioutput not supported in max_error") |
|
return xp.max(xp.abs(y_true - y_pred)) |
|
|
|
|
|
def _mean_tweedie_deviance(y_true, y_pred, sample_weight, power): |
|
"""Mean Tweedie deviance regression loss.""" |
|
xp, _ = get_namespace(y_true, y_pred) |
|
p = power |
|
zero = xp.asarray(0, dtype=y_true.dtype) |
|
if p < 0: |
|
|
|
dev = 2 * ( |
|
xp.pow(xp.where(y_true > 0, y_true, zero), xp.asarray(2 - p)) |
|
/ ((1 - p) * (2 - p)) |
|
- y_true * xp.pow(y_pred, xp.asarray(1 - p)) / (1 - p) |
|
+ xp.pow(y_pred, xp.asarray(2 - p)) / (2 - p) |
|
) |
|
elif p == 0: |
|
|
|
dev = (y_true - y_pred) ** 2 |
|
elif p == 1: |
|
|
|
dev = 2 * (xlogy(y_true, y_true / y_pred) - y_true + y_pred) |
|
elif p == 2: |
|
|
|
dev = 2 * (xp.log(y_pred / y_true) + y_true / y_pred - 1) |
|
else: |
|
dev = 2 * ( |
|
xp.pow(y_true, xp.asarray(2 - p)) / ((1 - p) * (2 - p)) |
|
- y_true * xp.pow(y_pred, xp.asarray(1 - p)) / (1 - p) |
|
+ xp.pow(y_pred, xp.asarray(2 - p)) / (2 - p) |
|
) |
|
return float(_average(dev, weights=sample_weight)) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"power": [ |
|
Interval(Real, None, 0, closed="right"), |
|
Interval(Real, 1, None, closed="left"), |
|
], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0): |
|
"""Mean Tweedie deviance regression loss. |
|
|
|
Read more in the :ref:`User Guide <mean_tweedie_deviance>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
power : float, default=0 |
|
Tweedie power parameter. Either power <= 0 or power >= 1. |
|
|
|
The higher `p` the less weight is given to extreme |
|
deviations between true and predicted targets. |
|
|
|
- power < 0: Extreme stable distribution. Requires: y_pred > 0. |
|
- power = 0 : Normal distribution, output corresponds to |
|
mean_squared_error. y_true and y_pred can be any real numbers. |
|
- power = 1 : Poisson distribution. Requires: y_true >= 0 and |
|
y_pred > 0. |
|
- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 |
|
and y_pred > 0. |
|
- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. |
|
- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 |
|
and y_pred > 0. |
|
- otherwise : Positive stable distribution. Requires: y_true > 0 |
|
and y_pred > 0. |
|
|
|
Returns |
|
------- |
|
loss : float |
|
A non-negative floating point value (the best value is 0.0). |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import mean_tweedie_deviance |
|
>>> y_true = [2, 0, 1, 4] |
|
>>> y_pred = [0.5, 0.5, 2., 2.] |
|
>>> mean_tweedie_deviance(y_true, y_pred, power=1) |
|
1.4260... |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred) |
|
y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput=None, xp=xp |
|
) |
|
if y_type == "continuous-multioutput": |
|
raise ValueError("Multioutput not supported in mean_tweedie_deviance") |
|
check_consistent_length(y_true, y_pred, sample_weight) |
|
|
|
if sample_weight is not None: |
|
sample_weight = column_or_1d(sample_weight) |
|
sample_weight = sample_weight[:, np.newaxis] |
|
|
|
message = f"Mean Tweedie deviance error with power={power} can only be used on " |
|
if power < 0: |
|
|
|
if xp.any(y_pred <= 0): |
|
raise ValueError(message + "strictly positive y_pred.") |
|
elif power == 0: |
|
|
|
pass |
|
elif 1 <= power < 2: |
|
|
|
if xp.any(y_true < 0) or xp.any(y_pred <= 0): |
|
raise ValueError(message + "non-negative y and strictly positive y_pred.") |
|
elif power >= 2: |
|
|
|
if xp.any(y_true <= 0) or xp.any(y_pred <= 0): |
|
raise ValueError(message + "strictly positive y and y_pred.") |
|
else: |
|
|
|
raise ValueError |
|
|
|
return _mean_tweedie_deviance( |
|
y_true, y_pred, sample_weight=sample_weight, power=power |
|
) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None): |
|
"""Mean Poisson deviance regression loss. |
|
|
|
Poisson deviance is equivalent to the Tweedie deviance with |
|
the power parameter `power=1`. |
|
|
|
Read more in the :ref:`User Guide <mean_tweedie_deviance>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) |
|
Ground truth (correct) target values. Requires y_true >= 0. |
|
|
|
y_pred : array-like of shape (n_samples,) |
|
Estimated target values. Requires y_pred > 0. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
Returns |
|
------- |
|
loss : float |
|
A non-negative floating point value (the best value is 0.0). |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import mean_poisson_deviance |
|
>>> y_true = [2, 0, 1, 4] |
|
>>> y_pred = [0.5, 0.5, 2., 2.] |
|
>>> mean_poisson_deviance(y_true, y_pred) |
|
1.4260... |
|
""" |
|
return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=1) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None): |
|
"""Mean Gamma deviance regression loss. |
|
|
|
Gamma deviance is equivalent to the Tweedie deviance with |
|
the power parameter `power=2`. It is invariant to scaling of |
|
the target variable, and measures relative errors. |
|
|
|
Read more in the :ref:`User Guide <mean_tweedie_deviance>`. |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) |
|
Ground truth (correct) target values. Requires y_true > 0. |
|
|
|
y_pred : array-like of shape (n_samples,) |
|
Estimated target values. Requires y_pred > 0. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
Returns |
|
------- |
|
loss : float |
|
A non-negative floating point value (the best value is 0.0). |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import mean_gamma_deviance |
|
>>> y_true = [2, 0.5, 1, 4] |
|
>>> y_pred = [0.5, 0.5, 2., 2.] |
|
>>> mean_gamma_deviance(y_true, y_pred) |
|
1.0568... |
|
""" |
|
return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=2) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"power": [ |
|
Interval(Real, None, 0, closed="right"), |
|
Interval(Real, 1, None, closed="left"), |
|
], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0): |
|
""" |
|
:math:`D^2` regression score function, fraction of Tweedie deviance explained. |
|
|
|
Best possible score is 1.0 and it can be negative (because the model can be |
|
arbitrarily worse). A model that always uses the empirical mean of `y_true` as |
|
constant prediction, disregarding the input features, gets a D^2 score of 0.0. |
|
|
|
Read more in the :ref:`User Guide <d2_score>`. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
power : float, default=0 |
|
Tweedie power parameter. Either power <= 0 or power >= 1. |
|
|
|
The higher `p` the less weight is given to extreme |
|
deviations between true and predicted targets. |
|
|
|
- power < 0: Extreme stable distribution. Requires: y_pred > 0. |
|
- power = 0 : Normal distribution, output corresponds to r2_score. |
|
y_true and y_pred can be any real numbers. |
|
- power = 1 : Poisson distribution. Requires: y_true >= 0 and |
|
y_pred > 0. |
|
- 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0 |
|
and y_pred > 0. |
|
- power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0. |
|
- power = 3 : Inverse Gaussian distribution. Requires: y_true > 0 |
|
and y_pred > 0. |
|
- otherwise : Positive stable distribution. Requires: y_true > 0 |
|
and y_pred > 0. |
|
|
|
Returns |
|
------- |
|
z : float or ndarray of floats |
|
The D^2 score. |
|
|
|
Notes |
|
----- |
|
This is not a symmetric function. |
|
|
|
Like R^2, D^2 score may be negative (it need not actually be the square of |
|
a quantity D). |
|
|
|
This metric is not well-defined for single samples and will return a NaN |
|
value if n_samples is less than two. |
|
|
|
References |
|
---------- |
|
.. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. |
|
Wainwright. "Statistical Learning with Sparsity: The Lasso and |
|
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import d2_tweedie_score |
|
>>> y_true = [0.5, 1, 2.5, 7] |
|
>>> y_pred = [1, 1, 5, 3.5] |
|
>>> d2_tweedie_score(y_true, y_pred) |
|
0.285... |
|
>>> d2_tweedie_score(y_true, y_pred, power=1) |
|
0.487... |
|
>>> d2_tweedie_score(y_true, y_pred, power=2) |
|
0.630... |
|
>>> d2_tweedie_score(y_true, y_true, power=2) |
|
1.0 |
|
""" |
|
xp, _ = get_namespace(y_true, y_pred) |
|
|
|
y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype( |
|
y_true, y_pred, sample_weight, multioutput=None, xp=xp |
|
) |
|
if y_type == "continuous-multioutput": |
|
raise ValueError("Multioutput not supported in d2_tweedie_score") |
|
|
|
if _num_samples(y_pred) < 2: |
|
msg = "D^2 score is not well-defined with less than two samples." |
|
warnings.warn(msg, UndefinedMetricWarning) |
|
return float("nan") |
|
|
|
y_true, y_pred = xp.squeeze(y_true, axis=1), xp.squeeze(y_pred, axis=1) |
|
numerator = mean_tweedie_deviance( |
|
y_true, y_pred, sample_weight=sample_weight, power=power |
|
) |
|
|
|
y_avg = _average(y_true, weights=sample_weight, xp=xp) |
|
denominator = _mean_tweedie_deviance( |
|
y_true, y_avg, sample_weight=sample_weight, power=power |
|
) |
|
|
|
return 1 - numerator / denominator |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"alpha": [Interval(Real, 0, 1, closed="both")], |
|
"multioutput": [ |
|
StrOptions({"raw_values", "uniform_average"}), |
|
"array-like", |
|
], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def d2_pinball_score( |
|
y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average" |
|
): |
|
""" |
|
:math:`D^2` regression score function, fraction of pinball loss explained. |
|
|
|
Best possible score is 1.0 and it can be negative (because the model can be |
|
arbitrarily worse). A model that always uses the empirical alpha-quantile of |
|
`y_true` as constant prediction, disregarding the input features, |
|
gets a :math:`D^2` score of 0.0. |
|
|
|
Read more in the :ref:`User Guide <d2_score>`. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
alpha : float, default=0.5 |
|
Slope of the pinball deviance. It determines the quantile level alpha |
|
for which the pinball deviance and also D2 are optimal. |
|
The default `alpha=0.5` is equivalent to `d2_absolute_error_score`. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average scores. |
|
|
|
'raw_values' : |
|
Returns a full set of errors in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Scores of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
score : float or ndarray of floats |
|
The :math:`D^2` score with a pinball deviance |
|
or ndarray of scores if `multioutput='raw_values'`. |
|
|
|
Notes |
|
----- |
|
Like :math:`R^2`, :math:`D^2` score may be negative |
|
(it need not actually be the square of a quantity D). |
|
|
|
This metric is not well-defined for a single point and will return a NaN |
|
value if n_samples is less than two. |
|
|
|
References |
|
---------- |
|
.. [1] Eq. (7) of `Koenker, Roger; Machado, José A. F. (1999). |
|
"Goodness of Fit and Related Inference Processes for Quantile Regression" |
|
<https://doi.org/10.1080/01621459.1999.10473882>`_ |
|
.. [2] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. |
|
Wainwright. "Statistical Learning with Sparsity: The Lasso and |
|
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import d2_pinball_score |
|
>>> y_true = [1, 2, 3] |
|
>>> y_pred = [1, 3, 3] |
|
>>> d2_pinball_score(y_true, y_pred) |
|
np.float64(0.5) |
|
>>> d2_pinball_score(y_true, y_pred, alpha=0.9) |
|
np.float64(0.772...) |
|
>>> d2_pinball_score(y_true, y_pred, alpha=0.1) |
|
np.float64(-1.045...) |
|
>>> d2_pinball_score(y_true, y_true, alpha=0.1) |
|
np.float64(1.0) |
|
""" |
|
y_type, y_true, y_pred, multioutput = _check_reg_targets( |
|
y_true, y_pred, multioutput |
|
) |
|
check_consistent_length(y_true, y_pred, sample_weight) |
|
|
|
if _num_samples(y_pred) < 2: |
|
msg = "D^2 score is not well-defined with less than two samples." |
|
warnings.warn(msg, UndefinedMetricWarning) |
|
return float("nan") |
|
|
|
numerator = mean_pinball_loss( |
|
y_true, |
|
y_pred, |
|
sample_weight=sample_weight, |
|
alpha=alpha, |
|
multioutput="raw_values", |
|
) |
|
|
|
if sample_weight is None: |
|
y_quantile = np.tile( |
|
np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1) |
|
) |
|
else: |
|
sample_weight = _check_sample_weight(sample_weight, y_true) |
|
y_quantile = np.tile( |
|
_weighted_percentile( |
|
y_true, sample_weight=sample_weight, percentile=alpha * 100 |
|
), |
|
(len(y_true), 1), |
|
) |
|
|
|
denominator = mean_pinball_loss( |
|
y_true, |
|
y_quantile, |
|
sample_weight=sample_weight, |
|
alpha=alpha, |
|
multioutput="raw_values", |
|
) |
|
|
|
nonzero_numerator = numerator != 0 |
|
nonzero_denominator = denominator != 0 |
|
valid_score = nonzero_numerator & nonzero_denominator |
|
output_scores = np.ones(y_true.shape[1]) |
|
|
|
output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score]) |
|
output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0 |
|
|
|
if isinstance(multioutput, str): |
|
if multioutput == "raw_values": |
|
|
|
return output_scores |
|
else: |
|
|
|
avg_weights = None |
|
else: |
|
avg_weights = multioutput |
|
|
|
return np.average(output_scores, weights=avg_weights) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"y_true": ["array-like"], |
|
"y_pred": ["array-like"], |
|
"sample_weight": ["array-like", None], |
|
"multioutput": [ |
|
StrOptions({"raw_values", "uniform_average"}), |
|
"array-like", |
|
], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def d2_absolute_error_score( |
|
y_true, y_pred, *, sample_weight=None, multioutput="uniform_average" |
|
): |
|
""" |
|
:math:`D^2` regression score function, fraction of absolute error explained. |
|
|
|
Best possible score is 1.0 and it can be negative (because the model can be |
|
arbitrarily worse). A model that always uses the empirical median of `y_true` |
|
as constant prediction, disregarding the input features, |
|
gets a :math:`D^2` score of 0.0. |
|
|
|
Read more in the :ref:`User Guide <d2_score>`. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
Parameters |
|
---------- |
|
y_true : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Ground truth (correct) target values. |
|
|
|
y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs) |
|
Estimated target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
multioutput : {'raw_values', 'uniform_average'} or array-like of shape \ |
|
(n_outputs,), default='uniform_average' |
|
Defines aggregating of multiple output values. |
|
Array-like value defines weights used to average scores. |
|
|
|
'raw_values' : |
|
Returns a full set of errors in case of multioutput input. |
|
|
|
'uniform_average' : |
|
Scores of all outputs are averaged with uniform weight. |
|
|
|
Returns |
|
------- |
|
score : float or ndarray of floats |
|
The :math:`D^2` score with an absolute error deviance |
|
or ndarray of scores if 'multioutput' is 'raw_values'. |
|
|
|
Notes |
|
----- |
|
Like :math:`R^2`, :math:`D^2` score may be negative |
|
(it need not actually be the square of a quantity D). |
|
|
|
This metric is not well-defined for single samples and will return a NaN |
|
value if n_samples is less than two. |
|
|
|
References |
|
---------- |
|
.. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J. |
|
Wainwright. "Statistical Learning with Sparsity: The Lasso and |
|
Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.metrics import d2_absolute_error_score |
|
>>> y_true = [3, -0.5, 2, 7] |
|
>>> y_pred = [2.5, 0.0, 2, 8] |
|
>>> d2_absolute_error_score(y_true, y_pred) |
|
np.float64(0.764...) |
|
>>> y_true = [[0.5, 1], [-1, 1], [7, -6]] |
|
>>> y_pred = [[0, 2], [-1, 2], [8, -5]] |
|
>>> d2_absolute_error_score(y_true, y_pred, multioutput='uniform_average') |
|
np.float64(0.691...) |
|
>>> d2_absolute_error_score(y_true, y_pred, multioutput='raw_values') |
|
array([0.8125 , 0.57142857]) |
|
>>> y_true = [1, 2, 3] |
|
>>> y_pred = [1, 2, 3] |
|
>>> d2_absolute_error_score(y_true, y_pred) |
|
np.float64(1.0) |
|
>>> y_true = [1, 2, 3] |
|
>>> y_pred = [2, 2, 2] |
|
>>> d2_absolute_error_score(y_true, y_pred) |
|
np.float64(0.0) |
|
>>> y_true = [1, 2, 3] |
|
>>> y_pred = [3, 2, 1] |
|
>>> d2_absolute_error_score(y_true, y_pred) |
|
np.float64(-1.0) |
|
""" |
|
return d2_pinball_score( |
|
y_true, y_pred, sample_weight=sample_weight, alpha=0.5, multioutput=multioutput |
|
) |
|
|