File size: 64,959 Bytes

7885a28

"""Metrics to assess performance on regression task.

Functions named as ``*_score`` return a scalar value to maximize: the higher
the better.

Function named as ``*_error`` or ``*_loss`` return a scalar value to minimize:
the lower the better.
"""

# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause

import warnings
from numbers import Real

import numpy as np
from scipy.special import xlogy

from ..exceptions import UndefinedMetricWarning
from ..utils._array_api import (
    _average,
    _find_matching_floating_dtype,
    get_namespace,
    get_namespace_and_device,
    size,
)
from ..utils._param_validation import Interval, StrOptions, validate_params
from ..utils.stats import _weighted_percentile
from ..utils.validation import (
    _check_sample_weight,
    _num_samples,
    check_array,
    check_consistent_length,
    column_or_1d,
)

__ALL__ = [
    "max_error",
    "mean_absolute_error",
    "mean_squared_error",
    "mean_squared_log_error",
    "median_absolute_error",
    "mean_absolute_percentage_error",
    "mean_pinball_loss",
    "r2_score",
    "root_mean_squared_log_error",
    "root_mean_squared_error",
    "explained_variance_score",
    "mean_tweedie_deviance",
    "mean_poisson_deviance",
    "mean_gamma_deviance",
    "d2_tweedie_score",
    "d2_pinball_score",
    "d2_absolute_error_score",
]


def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric", xp=None):
    """Check that y_true and y_pred belong to the same regression task.

    To reduce redundancy when calling `_find_matching_floating_dtype`,
    please use `_check_reg_targets_with_floating_dtype` instead.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    multioutput : array-like or string in ['raw_values', uniform_average',
        'variance_weighted'] or None
        None is accepted due to backward compatibility of r2_score().

    dtype : str or list, default="numeric"
        the dtype argument passed to check_array.

    xp : module, default=None
        Precomputed array namespace module. When passed, typically from a caller
        that has already performed inspection of its own inputs, skips array
        namespace inspection.

    Returns
    -------
    type_true : one of {'continuous', continuous-multioutput'}
        The type of the true target data, as output by
        'utils.multiclass.type_of_target'.

    y_true : array-like of shape (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples, n_outputs)
        Estimated target values.

    multioutput : array-like of shape (n_outputs) or string in ['raw_values',
        uniform_average', 'variance_weighted'] or None
        Custom output weights if ``multioutput`` is array-like or
        just the corresponding argument if ``multioutput`` is a
        correct keyword.
    """
    xp, _ = get_namespace(y_true, y_pred, multioutput, xp=xp)

    check_consistent_length(y_true, y_pred)
    y_true = check_array(y_true, ensure_2d=False, dtype=dtype)
    y_pred = check_array(y_pred, ensure_2d=False, dtype=dtype)

    if y_true.ndim == 1:
        y_true = xp.reshape(y_true, (-1, 1))

    if y_pred.ndim == 1:
        y_pred = xp.reshape(y_pred, (-1, 1))

    if y_true.shape[1] != y_pred.shape[1]:
        raise ValueError(
            "y_true and y_pred have different number of output ({0}!={1})".format(
                y_true.shape[1], y_pred.shape[1]
            )
        )

    n_outputs = y_true.shape[1]
    allowed_multioutput_str = ("raw_values", "uniform_average", "variance_weighted")
    if isinstance(multioutput, str):
        if multioutput not in allowed_multioutput_str:
            raise ValueError(
                "Allowed 'multioutput' string values are {}. "
                "You provided multioutput={!r}".format(
                    allowed_multioutput_str, multioutput
                )
            )
    elif multioutput is not None:
        multioutput = check_array(multioutput, ensure_2d=False)
        if n_outputs == 1:
            raise ValueError("Custom weights are useful only in multi-output cases.")
        elif n_outputs != multioutput.shape[0]:
            raise ValueError(
                "There must be equally many custom weights "
                f"({multioutput.shape[0]}) as outputs ({n_outputs})."
            )
    y_type = "continuous" if n_outputs == 1 else "continuous-multioutput"

    return y_type, y_true, y_pred, multioutput


def _check_reg_targets_with_floating_dtype(
    y_true, y_pred, sample_weight, multioutput, xp=None
):
    """Ensures that y_true, y_pred, and sample_weight correspond to the same
    regression task.

    Extends `_check_reg_targets` by automatically selecting a suitable floating-point
    data type for inputs using `_find_matching_floating_dtype`.

    Use this private method only when converting inputs to array API-compatibles.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,)

    multioutput : array-like or string in ['raw_values', 'uniform_average', \
        'variance_weighted'] or None
        None is accepted due to backward compatibility of r2_score().

    xp : module, default=None
        Precomputed array namespace module. When passed, typically from a caller
        that has already performed inspection of its own inputs, skips array
        namespace inspection.

    Returns
    -------
    type_true : one of {'continuous', 'continuous-multioutput'}
        The type of the true target data, as output by
        'utils.multiclass.type_of_target'.

    y_true : array-like of shape (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : array-like of shape (n_outputs) or string in ['raw_values', \
        'uniform_average', 'variance_weighted'] or None
        Custom output weights if ``multioutput`` is array-like or
        just the corresponding argument if ``multioutput`` is a
        correct keyword.
    """
    dtype_name = _find_matching_floating_dtype(y_true, y_pred, sample_weight, xp=xp)

    y_type, y_true, y_pred, multioutput = _check_reg_targets(
        y_true, y_pred, multioutput, dtype=dtype_name, xp=xp
    )

    # _check_reg_targets does not accept sample_weight as input.
    # Convert sample_weight's data type separately to match dtype_name.
    if sample_weight is not None:
        sample_weight = xp.asarray(sample_weight, dtype=dtype_name)

    return y_type, y_true, y_pred, sample_weight, multioutput


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_absolute_error(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """Mean absolute error regression loss.

    Read more in the :ref:`User Guide <mean_absolute_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'}  or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or array of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

        MAE output is non-negative floating point. The best value is 0.0.

    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_absolute_error(y_true, y_pred)
    0.5
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> mean_absolute_error(y_true, y_pred)
    0.75
    >>> mean_absolute_error(y_true, y_pred, multioutput='raw_values')
    array([0.5, 1. ])
    >>> mean_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.85...
    """
    xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)

    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )

    check_consistent_length(y_true, y_pred, sample_weight)

    output_errors = _average(
        xp.abs(y_pred - y_true), weights=sample_weight, axis=0, xp=xp
    )
    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to np.average: uniform mean
            multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    mean_absolute_error = _average(output_errors, weights=multioutput)

    return float(mean_absolute_error)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "alpha": [Interval(Real, 0, 1, closed="both")],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_pinball_loss(
    y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
):
    """Pinball loss for quantile regression.

    Read more in the :ref:`User Guide <pinball_loss>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    alpha : float, slope of the pinball loss, default=0.5,
        This loss is equivalent to :ref:`mean_absolute_error` when `alpha=0.5`,
        `alpha=0.95` is minimized by estimators of the 95th percentile.

    multioutput : {'raw_values', 'uniform_average'}  or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

        The pinball loss output is a non-negative floating point. The best
        value is 0.0.

    Examples
    --------
    >>> from sklearn.metrics import mean_pinball_loss
    >>> y_true = [1, 2, 3]
    >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.1)
    np.float64(0.03...)
    >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.1)
    np.float64(0.3...)
    >>> mean_pinball_loss(y_true, [0, 2, 3], alpha=0.9)
    np.float64(0.3...)
    >>> mean_pinball_loss(y_true, [1, 2, 4], alpha=0.9)
    np.float64(0.03...)
    >>> mean_pinball_loss(y_true, y_true, alpha=0.1)
    np.float64(0.0)
    >>> mean_pinball_loss(y_true, y_true, alpha=0.9)
    np.float64(0.0)
    """
    y_type, y_true, y_pred, multioutput = _check_reg_targets(
        y_true, y_pred, multioutput
    )
    check_consistent_length(y_true, y_pred, sample_weight)
    diff = y_true - y_pred
    sign = (diff >= 0).astype(diff.dtype)
    loss = alpha * sign * diff - (1 - alpha) * (1 - sign) * diff
    output_errors = np.average(loss, weights=sample_weight, axis=0)

    if isinstance(multioutput, str) and multioutput == "raw_values":
        return output_errors

    if isinstance(multioutput, str) and multioutput == "uniform_average":
        # pass None as weights to np.average: uniform mean
        multioutput = None

    return np.average(output_errors, weights=multioutput)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_absolute_percentage_error(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """Mean absolute percentage error (MAPE) regression loss.

    Note that we are not using the common "percentage" definition: the percentage
    in the range [0, 100] is converted to a relative value in the range [0, 1]
    by dividing by 100. Thus, an error of 200% corresponds to a relative error of 2.

    Read more in the :ref:`User Guide <mean_absolute_percentage_error>`.

    .. versionadded:: 0.24

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.
        If input is list then the shape must be (n_outputs,).

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute percentage error
        is returned for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

        MAPE output is non-negative floating point. The best value is 0.0.
        But note that bad predictions can lead to arbitrarily large
        MAPE values, especially if some `y_true` values are very close to zero.
        Note that we return a large value instead of `inf` when `y_true` is zero.

    Examples
    --------
    >>> from sklearn.metrics import mean_absolute_percentage_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_absolute_percentage_error(y_true, y_pred)
    0.3273...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> mean_absolute_percentage_error(y_true, y_pred)
    0.5515...
    >>> mean_absolute_percentage_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.6198...
    >>> # the value when some element of the y_true is zero is arbitrarily high because
    >>> # of the division by epsilon
    >>> y_true = [1., 0., 2.4, 7.]
    >>> y_pred = [1.2, 0.1, 2.4, 8.]
    >>> mean_absolute_percentage_error(y_true, y_pred)
    112589990684262.48
    """
    xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)
    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )
    check_consistent_length(y_true, y_pred, sample_weight)
    epsilon = xp.asarray(xp.finfo(xp.float64).eps, dtype=y_true.dtype)
    y_true_abs = xp.abs(y_true)
    mape = xp.abs(y_pred - y_true) / xp.maximum(y_true_abs, epsilon)
    output_errors = _average(mape, weights=sample_weight, axis=0)
    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to _average: uniform mean
            multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    mean_absolute_percentage_error = _average(output_errors, weights=multioutput)

    return float(mean_absolute_percentage_error)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_squared_error(
    y_true,
    y_pred,
    *,
    sample_weight=None,
    multioutput="uniform_average",
):
    """Mean squared error regression loss.

    Read more in the :ref:`User Guide <mean_squared_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or array of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import mean_squared_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> mean_squared_error(y_true, y_pred)
    0.375
    >>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
    >>> y_pred = [[0, 2],[-1, 2],[8, -5]]
    >>> mean_squared_error(y_true, y_pred)
    0.708...
    >>> mean_squared_error(y_true, y_pred, multioutput='raw_values')
    array([0.41666667, 1.        ])
    >>> mean_squared_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.825...
    """
    xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)
    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )
    check_consistent_length(y_true, y_pred, sample_weight)
    output_errors = _average((y_true - y_pred) ** 2, axis=0, weights=sample_weight)

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to _average: uniform mean
            multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    mean_squared_error = _average(output_errors, weights=multioutput)

    return float(mean_squared_error)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def root_mean_squared_error(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """Root mean squared error regression loss.

    Read more in the :ref:`User Guide <mean_squared_error>`.

    .. versionadded:: 1.4

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import root_mean_squared_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> root_mean_squared_error(y_true, y_pred)
    0.612...
    >>> y_true = [[0.5, 1],[-1, 1],[7, -6]]
    >>> y_pred = [[0, 2],[-1, 2],[8, -5]]
    >>> root_mean_squared_error(y_true, y_pred)
    0.822...
    """

    xp, _ = get_namespace(y_true, y_pred, sample_weight, multioutput)

    output_errors = xp.sqrt(
        mean_squared_error(
            y_true, y_pred, sample_weight=sample_weight, multioutput="raw_values"
        )
    )

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to _average: uniform mean
            multioutput = None

    # Average across the outputs (if needed).
    # The second call to `_average` should always return
    # a scalar array that we convert to a Python float to
    # consistently return the same eager evaluated value.
    # Therefore, `axis=None`.
    root_mean_squared_error = _average(output_errors, weights=multioutput)

    return float(root_mean_squared_error)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def mean_squared_log_error(
    y_true,
    y_pred,
    *,
    sample_weight=None,
    multioutput="uniform_average",
):
    """Mean squared logarithmic error regression loss.

    Read more in the :ref:`User Guide <mean_squared_log_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'

        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors when the input is of multioutput
            format.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import mean_squared_log_error
    >>> y_true = [3, 5, 2.5, 7]
    >>> y_pred = [2.5, 5, 4, 8]
    >>> mean_squared_log_error(y_true, y_pred)
    0.039...
    >>> y_true = [[0.5, 1], [1, 2], [7, 6]]
    >>> y_pred = [[0.5, 2], [1, 2.5], [8, 8]]
    >>> mean_squared_log_error(y_true, y_pred)
    0.044...
    >>> mean_squared_log_error(y_true, y_pred, multioutput='raw_values')
    array([0.00462428, 0.08377444])
    >>> mean_squared_log_error(y_true, y_pred, multioutput=[0.3, 0.7])
    0.060...
    """
    xp, _ = get_namespace(y_true, y_pred)

    _, y_true, y_pred, _, _ = _check_reg_targets_with_floating_dtype(
        y_true, y_pred, sample_weight, multioutput, xp=xp
    )

    if xp.any(y_true <= -1) or xp.any(y_pred <= -1):
        raise ValueError(
            "Mean Squared Logarithmic Error cannot be used when "
            "targets contain values less than or equal to -1."
        )

    return mean_squared_error(
        xp.log1p(y_true),
        xp.log1p(y_pred),
        sample_weight=sample_weight,
        multioutput=multioutput,
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
    },
    prefer_skip_nested_validation=True,
)
def root_mean_squared_log_error(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """Root mean squared logarithmic error regression loss.

    Read more in the :ref:`User Guide <mean_squared_log_error>`.

    .. versionadded:: 1.4

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'

        Defines aggregating of multiple output values.
        Array-like value defines weights used to average errors.

        'raw_values' :
            Returns a full set of errors when the input is of multioutput
            format.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    Returns
    -------
    loss : float or ndarray of floats
        A non-negative floating point value (the best value is 0.0), or an
        array of floating point values, one for each individual target.

    Examples
    --------
    >>> from sklearn.metrics import root_mean_squared_log_error
    >>> y_true = [3, 5, 2.5, 7]
    >>> y_pred = [2.5, 5, 4, 8]
    >>> root_mean_squared_log_error(y_true, y_pred)
    0.199...
    """
    xp, _ = get_namespace(y_true, y_pred)

    _, y_true, y_pred, _, _ = _check_reg_targets_with_floating_dtype(
        y_true, y_pred, sample_weight, multioutput, xp=xp
    )

    if xp.any(y_true <= -1) or xp.any(y_pred <= -1):
        raise ValueError(
            "Root Mean Squared Logarithmic Error cannot be used when "
            "targets contain values less than or equal to -1."
        )

    return root_mean_squared_error(
        xp.log1p(y_true),
        xp.log1p(y_pred),
        sample_weight=sample_weight,
        multioutput=multioutput,
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
        "sample_weight": ["array-like", None],
    },
    prefer_skip_nested_validation=True,
)
def median_absolute_error(
    y_true, y_pred, *, multioutput="uniform_average", sample_weight=None
):
    """Median absolute error regression loss.

    Median absolute error output is non-negative floating point. The best value
    is 0.0. Read more in the :ref:`User Guide <median_absolute_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values. Array-like value defines
        weights used to average errors.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Errors of all outputs are averaged with uniform weight.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

        .. versionadded:: 0.24

    Returns
    -------
    loss : float or ndarray of floats
        If multioutput is 'raw_values', then mean absolute error is returned
        for each output separately.
        If multioutput is 'uniform_average' or an ndarray of weights, then the
        weighted average of all output errors is returned.

    Examples
    --------
    >>> from sklearn.metrics import median_absolute_error
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> median_absolute_error(y_true, y_pred)
    np.float64(0.5)
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> median_absolute_error(y_true, y_pred)
    np.float64(0.75)
    >>> median_absolute_error(y_true, y_pred, multioutput='raw_values')
    array([0.5, 1. ])
    >>> median_absolute_error(y_true, y_pred, multioutput=[0.3, 0.7])
    np.float64(0.85)
    """
    y_type, y_true, y_pred, multioutput = _check_reg_targets(
        y_true, y_pred, multioutput
    )
    if sample_weight is None:
        output_errors = np.median(np.abs(y_pred - y_true), axis=0)
    else:
        sample_weight = _check_sample_weight(sample_weight, y_pred)
        output_errors = _weighted_percentile(
            np.abs(y_pred - y_true), sample_weight=sample_weight
        )
    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            return output_errors
        elif multioutput == "uniform_average":
            # pass None as weights to np.average: uniform mean
            multioutput = None

    return np.average(output_errors, weights=multioutput)


def _assemble_r2_explained_variance(
    numerator, denominator, n_outputs, multioutput, force_finite, xp, device
):
    """Common part used by explained variance score and :math:`R^2` score."""
    dtype = numerator.dtype

    nonzero_denominator = denominator != 0

    if not force_finite:
        # Standard formula, that may lead to NaN or -Inf
        output_scores = 1 - (numerator / denominator)
    else:
        nonzero_numerator = numerator != 0
        # Default = Zero Numerator = perfect predictions. Set to 1.0
        # (note: even if denominator is zero, thus avoiding NaN scores)
        output_scores = xp.ones([n_outputs], device=device, dtype=dtype)
        # Non-zero Numerator and Non-zero Denominator: use the formula
        valid_score = nonzero_denominator & nonzero_numerator

        output_scores[valid_score] = 1 - (
            numerator[valid_score] / denominator[valid_score]
        )

        # Non-zero Numerator and Zero Denominator:
        # arbitrary set to 0.0 to avoid -inf scores
        output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            # return scores individually
            return output_scores
        elif multioutput == "uniform_average":
            # Passing None as weights to np.average results is uniform mean
            avg_weights = None
        elif multioutput == "variance_weighted":
            avg_weights = denominator
            if not xp.any(nonzero_denominator):
                # All weights are zero, np.average would raise a ZeroDiv error.
                # This only happens when all y are constant (or 1-element long)
                # Since weights are all equal, fall back to uniform weights.
                avg_weights = None
    else:
        avg_weights = multioutput

    result = _average(output_scores, weights=avg_weights)
    if size(result) == 1:
        return float(result)
    return result


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [
            StrOptions({"raw_values", "uniform_average", "variance_weighted"}),
            "array-like",
        ],
        "force_finite": ["boolean"],
    },
    prefer_skip_nested_validation=True,
)
def explained_variance_score(
    y_true,
    y_pred,
    *,
    sample_weight=None,
    multioutput="uniform_average",
    force_finite=True,
):
    """Explained variance regression score function.

    Best possible score is 1.0, lower values are worse.

    In the particular case when ``y_true`` is constant, the explained variance
    score is not finite: it is either ``NaN`` (perfect predictions) or
    ``-Inf`` (imperfect predictions). To prevent such non-finite numbers to
    pollute higher-level experiments such as a grid search cross-validation,
    by default these cases are replaced with 1.0 (perfect predictions) or 0.0
    (imperfect predictions) respectively. If ``force_finite``
    is set to ``False``, this score falls back on the original :math:`R^2`
    definition.

    .. note::
       The Explained Variance score is similar to the
       :func:`R^2 score <r2_score>`, with the notable difference that it
       does not account for systematic offsets in the prediction. Most often
       the :func:`R^2 score <r2_score>` should be preferred.

    Read more in the :ref:`User Guide <explained_variance_score>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average', 'variance_weighted'} or \
            array-like of shape (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output scores.
        Array-like value defines weights used to average scores.

        'raw_values' :
            Returns a full set of scores in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

        'variance_weighted' :
            Scores of all outputs are averaged, weighted by the variances
            of each individual output.

    force_finite : bool, default=True
        Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant
        data should be replaced with real numbers (``1.0`` if prediction is
        perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting
        for hyperparameters' search procedures (e.g. grid search
        cross-validation).

        .. versionadded:: 1.1

    Returns
    -------
    score : float or ndarray of floats
        The explained variance or ndarray if 'multioutput' is 'raw_values'.

    See Also
    --------
    r2_score :
        Similar metric, but accounting for systematic offsets in
        prediction.

    Notes
    -----
    This is not a symmetric function.

    Examples
    --------
    >>> from sklearn.metrics import explained_variance_score
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> explained_variance_score(y_true, y_pred)
    0.957...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> explained_variance_score(y_true, y_pred, multioutput='uniform_average')
    0.983...
    >>> y_true = [-2, -2, -2]
    >>> y_pred = [-2, -2, -2]
    >>> explained_variance_score(y_true, y_pred)
    1.0
    >>> explained_variance_score(y_true, y_pred, force_finite=False)
    nan
    >>> y_true = [-2, -2, -2]
    >>> y_pred = [-2, -2, -2 + 1e-8]
    >>> explained_variance_score(y_true, y_pred)
    0.0
    >>> explained_variance_score(y_true, y_pred, force_finite=False)
    -inf
    """
    y_type, y_true, y_pred, multioutput = _check_reg_targets(
        y_true, y_pred, multioutput
    )
    check_consistent_length(y_true, y_pred, sample_weight)

    y_diff_avg = np.average(y_true - y_pred, weights=sample_weight, axis=0)
    numerator = np.average(
        (y_true - y_pred - y_diff_avg) ** 2, weights=sample_weight, axis=0
    )

    y_true_avg = np.average(y_true, weights=sample_weight, axis=0)
    denominator = np.average((y_true - y_true_avg) ** 2, weights=sample_weight, axis=0)

    return _assemble_r2_explained_variance(
        numerator=numerator,
        denominator=denominator,
        n_outputs=y_true.shape[1],
        multioutput=multioutput,
        force_finite=force_finite,
        xp=get_namespace(y_true)[0],
        # TODO: update once Array API support is added to explained_variance_score.
        device=None,
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [
            StrOptions({"raw_values", "uniform_average", "variance_weighted"}),
            "array-like",
            None,
        ],
        "force_finite": ["boolean"],
    },
    prefer_skip_nested_validation=True,
)
def r2_score(
    y_true,
    y_pred,
    *,
    sample_weight=None,
    multioutput="uniform_average",
    force_finite=True,
):
    """:math:`R^2` (coefficient of determination) regression score function.

    Best possible score is 1.0 and it can be negative (because the
    model can be arbitrarily worse). In the general case when the true y is
    non-constant, a constant model that always predicts the average y
    disregarding the input features would get a :math:`R^2` score of 0.0.

    In the particular case when ``y_true`` is constant, the :math:`R^2` score
    is not finite: it is either ``NaN`` (perfect predictions) or ``-Inf``
    (imperfect predictions). To prevent such non-finite numbers to pollute
    higher-level experiments such as a grid search cross-validation, by default
    these cases are replaced with 1.0 (perfect predictions) or 0.0 (imperfect
    predictions) respectively. You can set ``force_finite`` to ``False`` to
    prevent this fix from happening.

    Note: when the prediction residuals have zero mean, the :math:`R^2` score
    is identical to the
    :func:`Explained Variance score <explained_variance_score>`.

    Read more in the :ref:`User Guide <r2_score>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, \
            array-like of shape (n_outputs,) or None, default='uniform_average'

        Defines aggregating of multiple output scores.
        Array-like value defines weights used to average scores.
        Default is "uniform_average".

        'raw_values' :
            Returns a full set of scores in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

        'variance_weighted' :
            Scores of all outputs are averaged, weighted by the variances
            of each individual output.

        .. versionchanged:: 0.19
            Default value of multioutput is 'uniform_average'.

    force_finite : bool, default=True
        Flag indicating if ``NaN`` and ``-Inf`` scores resulting from constant
        data should be replaced with real numbers (``1.0`` if prediction is
        perfect, ``0.0`` otherwise). Default is ``True``, a convenient setting
        for hyperparameters' search procedures (e.g. grid search
        cross-validation).

        .. versionadded:: 1.1

    Returns
    -------
    z : float or ndarray of floats
        The :math:`R^2` score or ndarray of scores if 'multioutput' is
        'raw_values'.

    Notes
    -----
    This is not a symmetric function.

    Unlike most other scores, :math:`R^2` score may be negative (it need not
    actually be the square of a quantity R).

    This metric is not well-defined for single samples and will return a NaN
    value if n_samples is less than two.

    References
    ----------
    .. [1] `Wikipedia entry on the Coefficient of determination
            <https://en.wikipedia.org/wiki/Coefficient_of_determination>`_

    Examples
    --------
    >>> from sklearn.metrics import r2_score
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> r2_score(y_true, y_pred)
    0.948...
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> r2_score(y_true, y_pred,
    ...          multioutput='variance_weighted')
    0.938...
    >>> y_true = [1, 2, 3]
    >>> y_pred = [1, 2, 3]
    >>> r2_score(y_true, y_pred)
    1.0
    >>> y_true = [1, 2, 3]
    >>> y_pred = [2, 2, 2]
    >>> r2_score(y_true, y_pred)
    0.0
    >>> y_true = [1, 2, 3]
    >>> y_pred = [3, 2, 1]
    >>> r2_score(y_true, y_pred)
    -3.0
    >>> y_true = [-2, -2, -2]
    >>> y_pred = [-2, -2, -2]
    >>> r2_score(y_true, y_pred)
    1.0
    >>> r2_score(y_true, y_pred, force_finite=False)
    nan
    >>> y_true = [-2, -2, -2]
    >>> y_pred = [-2, -2, -2 + 1e-8]
    >>> r2_score(y_true, y_pred)
    0.0
    >>> r2_score(y_true, y_pred, force_finite=False)
    -inf
    """
    xp, _, device_ = get_namespace_and_device(
        y_true, y_pred, sample_weight, multioutput
    )

    _, y_true, y_pred, sample_weight, multioutput = (
        _check_reg_targets_with_floating_dtype(
            y_true, y_pred, sample_weight, multioutput, xp=xp
        )
    )

    check_consistent_length(y_true, y_pred, sample_weight)

    if _num_samples(y_pred) < 2:
        msg = "R^2 score is not well-defined with less than two samples."
        warnings.warn(msg, UndefinedMetricWarning)
        return float("nan")

    if sample_weight is not None:
        sample_weight = column_or_1d(sample_weight)
        weight = sample_weight[:, None]
    else:
        weight = 1.0

    numerator = xp.sum(weight * (y_true - y_pred) ** 2, axis=0)
    denominator = xp.sum(
        weight * (y_true - _average(y_true, axis=0, weights=sample_weight, xp=xp)) ** 2,
        axis=0,
    )

    return _assemble_r2_explained_variance(
        numerator=numerator,
        denominator=denominator,
        n_outputs=y_true.shape[1],
        multioutput=multioutput,
        force_finite=force_finite,
        xp=xp,
        device=device_,
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
    },
    prefer_skip_nested_validation=True,
)
def max_error(y_true, y_pred):
    """
    The max_error metric calculates the maximum residual error.

    Read more in the :ref:`User Guide <max_error>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,)
        Estimated target values.

    Returns
    -------
    max_error : float
        A positive floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import max_error
    >>> y_true = [3, 2, 7, 1]
    >>> y_pred = [4, 2, 7, 1]
    >>> max_error(y_true, y_pred)
    np.int64(1)
    """
    xp, _ = get_namespace(y_true, y_pred)
    y_type, y_true, y_pred, _ = _check_reg_targets(y_true, y_pred, None, xp=xp)
    if y_type == "continuous-multioutput":
        raise ValueError("Multioutput not supported in max_error")
    return xp.max(xp.abs(y_true - y_pred))


def _mean_tweedie_deviance(y_true, y_pred, sample_weight, power):
    """Mean Tweedie deviance regression loss."""
    xp, _ = get_namespace(y_true, y_pred)
    p = power
    zero = xp.asarray(0, dtype=y_true.dtype)
    if p < 0:
        # 'Extreme stable', y any real number, y_pred > 0
        dev = 2 * (
            xp.pow(xp.where(y_true > 0, y_true, zero), xp.asarray(2 - p))
            / ((1 - p) * (2 - p))
            - y_true * xp.pow(y_pred, xp.asarray(1 - p)) / (1 - p)
            + xp.pow(y_pred, xp.asarray(2 - p)) / (2 - p)
        )
    elif p == 0:
        # Normal distribution, y and y_pred any real number
        dev = (y_true - y_pred) ** 2
    elif p == 1:
        # Poisson distribution
        dev = 2 * (xlogy(y_true, y_true / y_pred) - y_true + y_pred)
    elif p == 2:
        # Gamma distribution
        dev = 2 * (xp.log(y_pred / y_true) + y_true / y_pred - 1)
    else:
        dev = 2 * (
            xp.pow(y_true, xp.asarray(2 - p)) / ((1 - p) * (2 - p))
            - y_true * xp.pow(y_pred, xp.asarray(1 - p)) / (1 - p)
            + xp.pow(y_pred, xp.asarray(2 - p)) / (2 - p)
        )
    return float(_average(dev, weights=sample_weight))


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "power": [
            Interval(Real, None, 0, closed="right"),
            Interval(Real, 1, None, closed="left"),
        ],
    },
    prefer_skip_nested_validation=True,
)
def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):
    """Mean Tweedie deviance regression loss.

    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    power : float, default=0
        Tweedie power parameter. Either power <= 0 or power >= 1.

        The higher `p` the less weight is given to extreme
        deviations between true and predicted targets.

        - power < 0: Extreme stable distribution. Requires: y_pred > 0.
        - power = 0 : Normal distribution, output corresponds to
          mean_squared_error. y_true and y_pred can be any real numbers.
        - power = 1 : Poisson distribution. Requires: y_true >= 0 and
          y_pred > 0.
        - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
          and y_pred > 0.
        - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
        - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
          and y_pred > 0.
        - otherwise : Positive stable distribution. Requires: y_true > 0
          and y_pred > 0.

    Returns
    -------
    loss : float
        A non-negative floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import mean_tweedie_deviance
    >>> y_true = [2, 0, 1, 4]
    >>> y_pred = [0.5, 0.5, 2., 2.]
    >>> mean_tweedie_deviance(y_true, y_pred, power=1)
    1.4260...
    """
    xp, _ = get_namespace(y_true, y_pred)
    y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype(
        y_true, y_pred, sample_weight, multioutput=None, xp=xp
    )
    if y_type == "continuous-multioutput":
        raise ValueError("Multioutput not supported in mean_tweedie_deviance")
    check_consistent_length(y_true, y_pred, sample_weight)

    if sample_weight is not None:
        sample_weight = column_or_1d(sample_weight)
        sample_weight = sample_weight[:, np.newaxis]

    message = f"Mean Tweedie deviance error with power={power} can only be used on "
    if power < 0:
        # 'Extreme stable', y any real number, y_pred > 0
        if xp.any(y_pred <= 0):
            raise ValueError(message + "strictly positive y_pred.")
    elif power == 0:
        # Normal, y and y_pred can be any real number
        pass
    elif 1 <= power < 2:
        # Poisson and compound Poisson distribution, y >= 0, y_pred > 0
        if xp.any(y_true < 0) or xp.any(y_pred <= 0):
            raise ValueError(message + "non-negative y and strictly positive y_pred.")
    elif power >= 2:
        # Gamma and Extreme stable distribution, y and y_pred > 0
        if xp.any(y_true <= 0) or xp.any(y_pred <= 0):
            raise ValueError(message + "strictly positive y and y_pred.")
    else:  # pragma: nocover
        # Unreachable statement
        raise ValueError

    return _mean_tweedie_deviance(
        y_true, y_pred, sample_weight=sample_weight, power=power
    )


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
    },
    prefer_skip_nested_validation=True,
)
def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None):
    """Mean Poisson deviance regression loss.

    Poisson deviance is equivalent to the Tweedie deviance with
    the power parameter `power=1`.

    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values. Requires y_true >= 0.

    y_pred : array-like of shape (n_samples,)
        Estimated target values. Requires y_pred > 0.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    loss : float
        A non-negative floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import mean_poisson_deviance
    >>> y_true = [2, 0, 1, 4]
    >>> y_pred = [0.5, 0.5, 2., 2.]
    >>> mean_poisson_deviance(y_true, y_pred)
    1.4260...
    """
    return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=1)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
    },
    prefer_skip_nested_validation=True,
)
def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None):
    """Mean Gamma deviance regression loss.

    Gamma deviance is equivalent to the Tweedie deviance with
    the power parameter `power=2`. It is invariant to scaling of
    the target variable, and measures relative errors.

    Read more in the :ref:`User Guide <mean_tweedie_deviance>`.

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values. Requires y_true > 0.

    y_pred : array-like of shape (n_samples,)
        Estimated target values. Requires y_pred > 0.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    Returns
    -------
    loss : float
        A non-negative floating point value (the best value is 0.0).

    Examples
    --------
    >>> from sklearn.metrics import mean_gamma_deviance
    >>> y_true = [2, 0.5, 1, 4]
    >>> y_pred = [0.5, 0.5, 2., 2.]
    >>> mean_gamma_deviance(y_true, y_pred)
    1.0568...
    """
    return mean_tweedie_deviance(y_true, y_pred, sample_weight=sample_weight, power=2)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "power": [
            Interval(Real, None, 0, closed="right"),
            Interval(Real, 1, None, closed="left"),
        ],
    },
    prefer_skip_nested_validation=True,
)
def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0):
    """
    :math:`D^2` regression score function, fraction of Tweedie deviance explained.

    Best possible score is 1.0 and it can be negative (because the model can be
    arbitrarily worse). A model that always uses the empirical mean of `y_true` as
    constant prediction, disregarding the input features, gets a D^2 score of 0.0.

    Read more in the :ref:`User Guide <d2_score>`.

    .. versionadded:: 1.0

    Parameters
    ----------
    y_true : array-like of shape (n_samples,)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    power : float, default=0
        Tweedie power parameter. Either power <= 0 or power >= 1.

        The higher `p` the less weight is given to extreme
        deviations between true and predicted targets.

        - power < 0: Extreme stable distribution. Requires: y_pred > 0.
        - power = 0 : Normal distribution, output corresponds to r2_score.
          y_true and y_pred can be any real numbers.
        - power = 1 : Poisson distribution. Requires: y_true >= 0 and
          y_pred > 0.
        - 1 < p < 2 : Compound Poisson distribution. Requires: y_true >= 0
          and y_pred > 0.
        - power = 2 : Gamma distribution. Requires: y_true > 0 and y_pred > 0.
        - power = 3 : Inverse Gaussian distribution. Requires: y_true > 0
          and y_pred > 0.
        - otherwise : Positive stable distribution. Requires: y_true > 0
          and y_pred > 0.

    Returns
    -------
    z : float or ndarray of floats
        The D^2 score.

    Notes
    -----
    This is not a symmetric function.

    Like R^2, D^2 score may be negative (it need not actually be the square of
    a quantity D).

    This metric is not well-defined for single samples and will return a NaN
    value if n_samples is less than two.

    References
    ----------
    .. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
           Wainwright. "Statistical Learning with Sparsity: The Lasso and
           Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/

    Examples
    --------
    >>> from sklearn.metrics import d2_tweedie_score
    >>> y_true = [0.5, 1, 2.5, 7]
    >>> y_pred = [1, 1, 5, 3.5]
    >>> d2_tweedie_score(y_true, y_pred)
    0.285...
    >>> d2_tweedie_score(y_true, y_pred, power=1)
    0.487...
    >>> d2_tweedie_score(y_true, y_pred, power=2)
    0.630...
    >>> d2_tweedie_score(y_true, y_true, power=2)
    1.0
    """
    xp, _ = get_namespace(y_true, y_pred)

    y_type, y_true, y_pred, sample_weight, _ = _check_reg_targets_with_floating_dtype(
        y_true, y_pred, sample_weight, multioutput=None, xp=xp
    )
    if y_type == "continuous-multioutput":
        raise ValueError("Multioutput not supported in d2_tweedie_score")

    if _num_samples(y_pred) < 2:
        msg = "D^2 score is not well-defined with less than two samples."
        warnings.warn(msg, UndefinedMetricWarning)
        return float("nan")

    y_true, y_pred = xp.squeeze(y_true, axis=1), xp.squeeze(y_pred, axis=1)
    numerator = mean_tweedie_deviance(
        y_true, y_pred, sample_weight=sample_weight, power=power
    )

    y_avg = _average(y_true, weights=sample_weight, xp=xp)
    denominator = _mean_tweedie_deviance(
        y_true, y_avg, sample_weight=sample_weight, power=power
    )

    return 1 - numerator / denominator


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "alpha": [Interval(Real, 0, 1, closed="both")],
        "multioutput": [
            StrOptions({"raw_values", "uniform_average"}),
            "array-like",
        ],
    },
    prefer_skip_nested_validation=True,
)
def d2_pinball_score(
    y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
):
    """
    :math:`D^2` regression score function, fraction of pinball loss explained.

    Best possible score is 1.0 and it can be negative (because the model can be
    arbitrarily worse). A model that always uses the empirical alpha-quantile of
    `y_true` as constant prediction, disregarding the input features,
    gets a :math:`D^2` score of 0.0.

    Read more in the :ref:`User Guide <d2_score>`.

    .. versionadded:: 1.1

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    alpha : float, default=0.5
        Slope of the pinball deviance. It determines the quantile level alpha
        for which the pinball deviance and also D2 are optimal.
        The default `alpha=0.5` is equivalent to `d2_absolute_error_score`.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average scores.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

    Returns
    -------
    score : float or ndarray of floats
        The :math:`D^2` score with a pinball deviance
        or ndarray of scores if `multioutput='raw_values'`.

    Notes
    -----
    Like :math:`R^2`, :math:`D^2` score may be negative
    (it need not actually be the square of a quantity D).

    This metric is not well-defined for a single point and will return a NaN
    value if n_samples is less than two.

     References
    ----------
    .. [1] Eq. (7) of `Koenker, Roger; Machado, José A. F. (1999).
           "Goodness of Fit and Related Inference Processes for Quantile Regression"
           <https://doi.org/10.1080/01621459.1999.10473882>`_
    .. [2] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
           Wainwright. "Statistical Learning with Sparsity: The Lasso and
           Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/

    Examples
    --------
    >>> from sklearn.metrics import d2_pinball_score
    >>> y_true = [1, 2, 3]
    >>> y_pred = [1, 3, 3]
    >>> d2_pinball_score(y_true, y_pred)
    np.float64(0.5)
    >>> d2_pinball_score(y_true, y_pred, alpha=0.9)
    np.float64(0.772...)
    >>> d2_pinball_score(y_true, y_pred, alpha=0.1)
    np.float64(-1.045...)
    >>> d2_pinball_score(y_true, y_true, alpha=0.1)
    np.float64(1.0)
    """
    y_type, y_true, y_pred, multioutput = _check_reg_targets(
        y_true, y_pred, multioutput
    )
    check_consistent_length(y_true, y_pred, sample_weight)

    if _num_samples(y_pred) < 2:
        msg = "D^2 score is not well-defined with less than two samples."
        warnings.warn(msg, UndefinedMetricWarning)
        return float("nan")

    numerator = mean_pinball_loss(
        y_true,
        y_pred,
        sample_weight=sample_weight,
        alpha=alpha,
        multioutput="raw_values",
    )

    if sample_weight is None:
        y_quantile = np.tile(
            np.percentile(y_true, q=alpha * 100, axis=0), (len(y_true), 1)
        )
    else:
        sample_weight = _check_sample_weight(sample_weight, y_true)
        y_quantile = np.tile(
            _weighted_percentile(
                y_true, sample_weight=sample_weight, percentile=alpha * 100
            ),
            (len(y_true), 1),
        )

    denominator = mean_pinball_loss(
        y_true,
        y_quantile,
        sample_weight=sample_weight,
        alpha=alpha,
        multioutput="raw_values",
    )

    nonzero_numerator = numerator != 0
    nonzero_denominator = denominator != 0
    valid_score = nonzero_numerator & nonzero_denominator
    output_scores = np.ones(y_true.shape[1])

    output_scores[valid_score] = 1 - (numerator[valid_score] / denominator[valid_score])
    output_scores[nonzero_numerator & ~nonzero_denominator] = 0.0

    if isinstance(multioutput, str):
        if multioutput == "raw_values":
            # return scores individually
            return output_scores
        else:  # multioutput == "uniform_average"
            # passing None as weights to np.average results in uniform mean
            avg_weights = None
    else:
        avg_weights = multioutput

    return np.average(output_scores, weights=avg_weights)


@validate_params(
    {
        "y_true": ["array-like"],
        "y_pred": ["array-like"],
        "sample_weight": ["array-like", None],
        "multioutput": [
            StrOptions({"raw_values", "uniform_average"}),
            "array-like",
        ],
    },
    prefer_skip_nested_validation=True,
)
def d2_absolute_error_score(
    y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
):
    """
    :math:`D^2` regression score function, fraction of absolute error explained.

    Best possible score is 1.0 and it can be negative (because the model can be
    arbitrarily worse). A model that always uses the empirical median of `y_true`
    as constant prediction, disregarding the input features,
    gets a :math:`D^2` score of 0.0.

    Read more in the :ref:`User Guide <d2_score>`.

    .. versionadded:: 1.1

    Parameters
    ----------
    y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Ground truth (correct) target values.

    y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)
        Estimated target values.

    sample_weight : array-like of shape (n_samples,), default=None
        Sample weights.

    multioutput : {'raw_values', 'uniform_average'} or array-like of shape \
            (n_outputs,), default='uniform_average'
        Defines aggregating of multiple output values.
        Array-like value defines weights used to average scores.

        'raw_values' :
            Returns a full set of errors in case of multioutput input.

        'uniform_average' :
            Scores of all outputs are averaged with uniform weight.

    Returns
    -------
    score : float or ndarray of floats
        The :math:`D^2` score with an absolute error deviance
        or ndarray of scores if 'multioutput' is 'raw_values'.

    Notes
    -----
    Like :math:`R^2`, :math:`D^2` score may be negative
    (it need not actually be the square of a quantity D).

    This metric is not well-defined for single samples and will return a NaN
    value if n_samples is less than two.

     References
    ----------
    .. [1] Eq. (3.11) of Hastie, Trevor J., Robert Tibshirani and Martin J.
           Wainwright. "Statistical Learning with Sparsity: The Lasso and
           Generalizations." (2015). https://hastie.su.domains/StatLearnSparsity/

    Examples
    --------
    >>> from sklearn.metrics import d2_absolute_error_score
    >>> y_true = [3, -0.5, 2, 7]
    >>> y_pred = [2.5, 0.0, 2, 8]
    >>> d2_absolute_error_score(y_true, y_pred)
    np.float64(0.764...)
    >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]
    >>> y_pred = [[0, 2], [-1, 2], [8, -5]]
    >>> d2_absolute_error_score(y_true, y_pred, multioutput='uniform_average')
    np.float64(0.691...)
    >>> d2_absolute_error_score(y_true, y_pred, multioutput='raw_values')
    array([0.8125    , 0.57142857])
    >>> y_true = [1, 2, 3]
    >>> y_pred = [1, 2, 3]
    >>> d2_absolute_error_score(y_true, y_pred)
    np.float64(1.0)
    >>> y_true = [1, 2, 3]
    >>> y_pred = [2, 2, 2]
    >>> d2_absolute_error_score(y_true, y_pred)
    np.float64(0.0)
    >>> y_true = [1, 2, 3]
    >>> y_pred = [3, 2, 1]
    >>> d2_absolute_error_score(y_true, y_pred)
    np.float64(-1.0)
    """
    return d2_pinball_score(
        y_true, y_pred, sample_weight=sample_weight, alpha=0.5, multioutput=multioutput
    )