|
|
|
|
|
|
|
import warnings |
|
|
|
import numpy as np |
|
|
|
from ..base import BaseEstimator, RegressorMixin, _fit_context, clone |
|
from ..exceptions import NotFittedError |
|
from ..linear_model import LinearRegression |
|
from ..preprocessing import FunctionTransformer |
|
from ..utils import Bunch, _safe_indexing, check_array |
|
from ..utils._metadata_requests import ( |
|
MetadataRouter, |
|
MethodMapping, |
|
_routing_enabled, |
|
process_routing, |
|
) |
|
from ..utils._param_validation import HasMethods |
|
from ..utils._tags import get_tags |
|
from ..utils.validation import check_is_fitted |
|
|
|
__all__ = ["TransformedTargetRegressor"] |
|
|
|
|
|
class TransformedTargetRegressor(RegressorMixin, BaseEstimator): |
|
"""Meta-estimator to regress on a transformed target. |
|
|
|
Useful for applying a non-linear transformation to the target `y` in |
|
regression problems. This transformation can be given as a Transformer |
|
such as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a |
|
function and its inverse such as `np.log` and `np.exp`. |
|
|
|
The computation during :meth:`fit` is:: |
|
|
|
regressor.fit(X, func(y)) |
|
|
|
or:: |
|
|
|
regressor.fit(X, transformer.transform(y)) |
|
|
|
The computation during :meth:`predict` is:: |
|
|
|
inverse_func(regressor.predict(X)) |
|
|
|
or:: |
|
|
|
transformer.inverse_transform(regressor.predict(X)) |
|
|
|
Read more in the :ref:`User Guide <transformed_target_regressor>`. |
|
|
|
.. versionadded:: 0.20 |
|
|
|
Parameters |
|
---------- |
|
regressor : object, default=None |
|
Regressor object such as derived from |
|
:class:`~sklearn.base.RegressorMixin`. This regressor will |
|
automatically be cloned each time prior to fitting. If `regressor is |
|
None`, :class:`~sklearn.linear_model.LinearRegression` is created and used. |
|
|
|
transformer : object, default=None |
|
Estimator object such as derived from |
|
:class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time |
|
as `func` and `inverse_func`. If `transformer is None` as well as |
|
`func` and `inverse_func`, the transformer will be an identity |
|
transformer. Note that the transformer will be cloned during fitting. |
|
Also, the transformer is restricting `y` to be a numpy array. |
|
|
|
func : function, default=None |
|
Function to apply to `y` before passing to :meth:`fit`. Cannot be set |
|
at the same time as `transformer`. If `func is None`, the function used will be |
|
the identity function. If `func` is set, `inverse_func` also needs to be |
|
provided. The function needs to return a 2-dimensional array. |
|
|
|
inverse_func : function, default=None |
|
Function to apply to the prediction of the regressor. Cannot be set at |
|
the same time as `transformer`. The inverse function is used to return |
|
predictions to the same space of the original training labels. If |
|
`inverse_func` is set, `func` also needs to be provided. The inverse |
|
function needs to return a 2-dimensional array. |
|
|
|
check_inverse : bool, default=True |
|
Whether to check that `transform` followed by `inverse_transform` |
|
or `func` followed by `inverse_func` leads to the original targets. |
|
|
|
Attributes |
|
---------- |
|
regressor_ : object |
|
Fitted regressor. |
|
|
|
transformer_ : object |
|
Transformer used in :meth:`fit` and :meth:`predict`. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. Only defined if the |
|
underlying regressor exposes such an attribute when fit. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
sklearn.preprocessing.FunctionTransformer : Construct a transformer from an |
|
arbitrary callable. |
|
|
|
Notes |
|
----- |
|
Internally, the target `y` is always converted into a 2-dimensional array |
|
to be used by scikit-learn transformers. At the time of prediction, the |
|
output will be reshaped to a have the same number of dimensions as `y`. |
|
|
|
Examples |
|
-------- |
|
>>> import numpy as np |
|
>>> from sklearn.linear_model import LinearRegression |
|
>>> from sklearn.compose import TransformedTargetRegressor |
|
>>> tt = TransformedTargetRegressor(regressor=LinearRegression(), |
|
... func=np.log, inverse_func=np.exp) |
|
>>> X = np.arange(4).reshape(-1, 1) |
|
>>> y = np.exp(2 * X).ravel() |
|
>>> tt.fit(X, y) |
|
TransformedTargetRegressor(...) |
|
>>> tt.score(X, y) |
|
1.0 |
|
>>> tt.regressor_.coef_ |
|
array([2.]) |
|
|
|
For a more detailed example use case refer to |
|
:ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py`. |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"regressor": [HasMethods(["fit", "predict"]), None], |
|
"transformer": [HasMethods("transform"), None], |
|
"func": [callable, None], |
|
"inverse_func": [callable, None], |
|
"check_inverse": ["boolean"], |
|
} |
|
|
|
def __init__( |
|
self, |
|
regressor=None, |
|
*, |
|
transformer=None, |
|
func=None, |
|
inverse_func=None, |
|
check_inverse=True, |
|
): |
|
self.regressor = regressor |
|
self.transformer = transformer |
|
self.func = func |
|
self.inverse_func = inverse_func |
|
self.check_inverse = check_inverse |
|
|
|
def _fit_transformer(self, y): |
|
"""Check transformer and fit transformer. |
|
|
|
Create the default transformer, fit it and make additional inverse |
|
check on a subset (optional). |
|
|
|
""" |
|
if self.transformer is not None and ( |
|
self.func is not None or self.inverse_func is not None |
|
): |
|
raise ValueError( |
|
"'transformer' and functions 'func'/'inverse_func' cannot both be set." |
|
) |
|
elif self.transformer is not None: |
|
self.transformer_ = clone(self.transformer) |
|
else: |
|
if (self.func is not None and self.inverse_func is None) or ( |
|
self.func is None and self.inverse_func is not None |
|
): |
|
lacking_param, existing_param = ( |
|
("func", "inverse_func") |
|
if self.func is None |
|
else ("inverse_func", "func") |
|
) |
|
raise ValueError( |
|
f"When '{existing_param}' is provided, '{lacking_param}' must also" |
|
f" be provided. If {lacking_param} is supposed to be the default," |
|
" you need to explicitly pass it the identity function." |
|
) |
|
self.transformer_ = FunctionTransformer( |
|
func=self.func, |
|
inverse_func=self.inverse_func, |
|
validate=True, |
|
check_inverse=self.check_inverse, |
|
) |
|
|
|
|
|
|
|
self.transformer_.set_output(transform="default") |
|
|
|
|
|
|
|
|
|
self.transformer_.fit(y) |
|
if self.check_inverse: |
|
idx_selected = slice(None, None, max(1, y.shape[0] // 10)) |
|
y_sel = _safe_indexing(y, idx_selected) |
|
y_sel_t = self.transformer_.transform(y_sel) |
|
if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)): |
|
warnings.warn( |
|
( |
|
"The provided functions or transformer are" |
|
" not strictly inverse of each other. If" |
|
" you are sure you want to proceed regardless" |
|
", set 'check_inverse=False'" |
|
), |
|
UserWarning, |
|
) |
|
|
|
@_fit_context( |
|
|
|
prefer_skip_nested_validation=False |
|
) |
|
def fit(self, X, y, **fit_params): |
|
"""Fit the model according to the given training data. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Training vector, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
y : array-like of shape (n_samples,) |
|
Target values. |
|
|
|
**fit_params : dict |
|
- If `enable_metadata_routing=False` (default): Parameters directly passed |
|
to the `fit` method of the underlying regressor. |
|
|
|
- If `enable_metadata_routing=True`: Parameters safely routed to the `fit` |
|
method of the underlying regressor. |
|
|
|
.. versionchanged:: 1.6 |
|
See :ref:`Metadata Routing User Guide <metadata_routing>` for |
|
more details. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Fitted estimator. |
|
""" |
|
if y is None: |
|
raise ValueError( |
|
f"This {self.__class__.__name__} estimator " |
|
"requires y to be passed, but the target y is None." |
|
) |
|
y = check_array( |
|
y, |
|
input_name="y", |
|
accept_sparse=False, |
|
ensure_all_finite=True, |
|
ensure_2d=False, |
|
dtype="numeric", |
|
allow_nd=True, |
|
) |
|
|
|
|
|
|
|
self._training_dim = y.ndim |
|
|
|
|
|
|
|
if y.ndim == 1: |
|
y_2d = y.reshape(-1, 1) |
|
else: |
|
y_2d = y |
|
self._fit_transformer(y_2d) |
|
|
|
|
|
y_trans = self.transformer_.transform(y_2d) |
|
|
|
|
|
|
|
if y_trans.ndim == 2 and y_trans.shape[1] == 1: |
|
y_trans = y_trans.squeeze(axis=1) |
|
|
|
self.regressor_ = self._get_regressor(get_clone=True) |
|
if _routing_enabled(): |
|
routed_params = process_routing(self, "fit", **fit_params) |
|
else: |
|
routed_params = Bunch(regressor=Bunch(fit=fit_params)) |
|
|
|
self.regressor_.fit(X, y_trans, **routed_params.regressor.fit) |
|
|
|
if hasattr(self.regressor_, "feature_names_in_"): |
|
self.feature_names_in_ = self.regressor_.feature_names_in_ |
|
|
|
return self |
|
|
|
def predict(self, X, **predict_params): |
|
"""Predict using the base regressor, applying inverse. |
|
|
|
The regressor is used to predict and the `inverse_func` or |
|
`inverse_transform` is applied before returning the prediction. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Samples. |
|
|
|
**predict_params : dict of str -> object |
|
- If `enable_metadata_routing=False` (default): Parameters directly passed |
|
to the `predict` method of the underlying regressor. |
|
|
|
- If `enable_metadata_routing=True`: Parameters safely routed to the |
|
`predict` method of the underlying regressor. |
|
|
|
.. versionchanged:: 1.6 |
|
See :ref:`Metadata Routing User Guide <metadata_routing>` |
|
for more details. |
|
|
|
Returns |
|
------- |
|
y_hat : ndarray of shape (n_samples,) |
|
Predicted values. |
|
""" |
|
check_is_fitted(self) |
|
if _routing_enabled(): |
|
routed_params = process_routing(self, "predict", **predict_params) |
|
else: |
|
routed_params = Bunch(regressor=Bunch(predict=predict_params)) |
|
|
|
pred = self.regressor_.predict(X, **routed_params.regressor.predict) |
|
if pred.ndim == 1: |
|
pred_trans = self.transformer_.inverse_transform(pred.reshape(-1, 1)) |
|
else: |
|
pred_trans = self.transformer_.inverse_transform(pred) |
|
if ( |
|
self._training_dim == 1 |
|
and pred_trans.ndim == 2 |
|
and pred_trans.shape[1] == 1 |
|
): |
|
pred_trans = pred_trans.squeeze(axis=1) |
|
|
|
return pred_trans |
|
|
|
def __sklearn_tags__(self): |
|
regressor = self._get_regressor() |
|
tags = super().__sklearn_tags__() |
|
tags.regressor_tags.poor_score = True |
|
tags.input_tags.sparse = get_tags(regressor).input_tags.sparse |
|
tags.target_tags.multi_output = get_tags(regressor).target_tags.multi_output |
|
return tags |
|
|
|
@property |
|
def n_features_in_(self): |
|
"""Number of features seen during :term:`fit`.""" |
|
|
|
|
|
try: |
|
check_is_fitted(self) |
|
except NotFittedError as nfe: |
|
raise AttributeError( |
|
"{} object has no n_features_in_ attribute.".format( |
|
self.__class__.__name__ |
|
) |
|
) from nfe |
|
|
|
return self.regressor_.n_features_in_ |
|
|
|
def get_metadata_routing(self): |
|
"""Get metadata routing of this object. |
|
|
|
Please check :ref:`User Guide <metadata_routing>` on how the routing |
|
mechanism works. |
|
|
|
.. versionadded:: 1.6 |
|
|
|
Returns |
|
------- |
|
routing : MetadataRouter |
|
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating |
|
routing information. |
|
""" |
|
router = MetadataRouter(owner=self.__class__.__name__).add( |
|
regressor=self._get_regressor(), |
|
method_mapping=MethodMapping() |
|
.add(caller="fit", callee="fit") |
|
.add(caller="predict", callee="predict"), |
|
) |
|
return router |
|
|
|
def _get_regressor(self, get_clone=False): |
|
if self.regressor is None: |
|
return LinearRegression() |
|
|
|
return clone(self.regressor) if get_clone else self.regressor |
|
|