|
"""Weight Boosting. |
|
|
|
This module contains weight boosting estimators for both classification and |
|
regression. |
|
|
|
The module structure is the following: |
|
|
|
- The `BaseWeightBoosting` base class implements a common ``fit`` method |
|
for all the estimators in the module. Regression and classification |
|
only differ from each other in the loss function that is optimized. |
|
|
|
- :class:`~sklearn.ensemble.AdaBoostClassifier` implements adaptive boosting |
|
(AdaBoost-SAMME) for classification problems. |
|
|
|
- :class:`~sklearn.ensemble.AdaBoostRegressor` implements adaptive boosting |
|
(AdaBoost.R2) for regression problems. |
|
""" |
|
|
|
|
|
|
|
|
|
import warnings |
|
from abc import ABCMeta, abstractmethod |
|
from numbers import Integral, Real |
|
|
|
import numpy as np |
|
|
|
from ..base import ( |
|
ClassifierMixin, |
|
RegressorMixin, |
|
_fit_context, |
|
is_classifier, |
|
is_regressor, |
|
) |
|
from ..metrics import accuracy_score, r2_score |
|
from ..tree import DecisionTreeClassifier, DecisionTreeRegressor |
|
from ..utils import _safe_indexing, check_random_state |
|
from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions |
|
from ..utils.extmath import softmax, stable_cumsum |
|
from ..utils.metadata_routing import ( |
|
_raise_for_unsupported_routing, |
|
_RoutingNotSupportedMixin, |
|
) |
|
from ..utils.validation import ( |
|
_check_sample_weight, |
|
_num_samples, |
|
check_is_fitted, |
|
has_fit_parameter, |
|
validate_data, |
|
) |
|
from ._base import BaseEnsemble |
|
|
|
__all__ = [ |
|
"AdaBoostClassifier", |
|
"AdaBoostRegressor", |
|
] |
|
|
|
|
|
class BaseWeightBoosting(BaseEnsemble, metaclass=ABCMeta): |
|
"""Base class for AdaBoost estimators. |
|
|
|
Warning: This class should not be used directly. Use derived classes |
|
instead. |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"estimator": [HasMethods(["fit", "predict"]), None], |
|
"n_estimators": [Interval(Integral, 1, None, closed="left")], |
|
"learning_rate": [Interval(Real, 0, None, closed="neither")], |
|
"random_state": ["random_state"], |
|
} |
|
|
|
@abstractmethod |
|
def __init__( |
|
self, |
|
estimator=None, |
|
*, |
|
n_estimators=50, |
|
estimator_params=tuple(), |
|
learning_rate=1.0, |
|
random_state=None, |
|
): |
|
super().__init__( |
|
estimator=estimator, |
|
n_estimators=n_estimators, |
|
estimator_params=estimator_params, |
|
) |
|
|
|
self.learning_rate = learning_rate |
|
self.random_state = random_state |
|
|
|
def _check_X(self, X): |
|
|
|
return validate_data( |
|
self, |
|
X, |
|
accept_sparse=["csr", "csc"], |
|
ensure_2d=True, |
|
allow_nd=True, |
|
dtype=None, |
|
reset=False, |
|
) |
|
|
|
@_fit_context( |
|
|
|
prefer_skip_nested_validation=False |
|
) |
|
def fit(self, X, y, sample_weight=None): |
|
"""Build a boosted classifier/regressor from the training set (X, y). |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
y : array-like of shape (n_samples,) |
|
The target values. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. If None, the sample weights are initialized to |
|
1 / n_samples. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Fitted estimator. |
|
""" |
|
_raise_for_unsupported_routing(self, "fit", sample_weight=sample_weight) |
|
X, y = validate_data( |
|
self, |
|
X, |
|
y, |
|
accept_sparse=["csr", "csc"], |
|
ensure_2d=True, |
|
allow_nd=True, |
|
dtype=None, |
|
y_numeric=is_regressor(self), |
|
) |
|
|
|
sample_weight = _check_sample_weight( |
|
sample_weight, X, np.float64, copy=True, ensure_non_negative=True |
|
) |
|
sample_weight /= sample_weight.sum() |
|
|
|
|
|
self._validate_estimator() |
|
|
|
|
|
self.estimators_ = [] |
|
self.estimator_weights_ = np.zeros(self.n_estimators, dtype=np.float64) |
|
self.estimator_errors_ = np.ones(self.n_estimators, dtype=np.float64) |
|
|
|
|
|
|
|
random_state = check_random_state(self.random_state) |
|
epsilon = np.finfo(sample_weight.dtype).eps |
|
|
|
zero_weight_mask = sample_weight == 0.0 |
|
for iboost in range(self.n_estimators): |
|
|
|
sample_weight = np.clip(sample_weight, a_min=epsilon, a_max=None) |
|
|
|
sample_weight[zero_weight_mask] = 0.0 |
|
|
|
|
|
sample_weight, estimator_weight, estimator_error = self._boost( |
|
iboost, X, y, sample_weight, random_state |
|
) |
|
|
|
|
|
if sample_weight is None: |
|
break |
|
self.estimator_weights_[iboost] = estimator_weight |
|
self.estimator_errors_[iboost] = estimator_error |
|
|
|
|
|
if estimator_error == 0: |
|
break |
|
|
|
sample_weight_sum = np.sum(sample_weight) |
|
|
|
if not np.isfinite(sample_weight_sum): |
|
warnings.warn( |
|
( |
|
"Sample weights have reached infinite values," |
|
f" at iteration {iboost}, causing overflow. " |
|
"Iterations stopped. Try lowering the learning rate." |
|
), |
|
stacklevel=2, |
|
) |
|
break |
|
|
|
|
|
if sample_weight_sum <= 0: |
|
break |
|
|
|
if iboost < self.n_estimators - 1: |
|
|
|
sample_weight /= sample_weight_sum |
|
|
|
return self |
|
|
|
@abstractmethod |
|
def _boost(self, iboost, X, y, sample_weight, random_state): |
|
"""Implement a single boost. |
|
|
|
Warning: This method needs to be overridden by subclasses. |
|
|
|
Parameters |
|
---------- |
|
iboost : int |
|
The index of the current boost iteration. |
|
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
y : array-like of shape (n_samples,) |
|
The target values (class labels). |
|
|
|
sample_weight : array-like of shape (n_samples,) |
|
The current sample weights. |
|
|
|
random_state : RandomState |
|
The current random number generator |
|
|
|
Returns |
|
------- |
|
sample_weight : array-like of shape (n_samples,) or None |
|
The reweighted sample weights. |
|
If None then boosting has terminated early. |
|
|
|
estimator_weight : float |
|
The weight for the current boost. |
|
If None then boosting has terminated early. |
|
|
|
error : float |
|
The classification error for the current boost. |
|
If None then boosting has terminated early. |
|
""" |
|
pass |
|
|
|
def staged_score(self, X, y, sample_weight=None): |
|
"""Return staged scores for X, y. |
|
|
|
This generator method yields the ensemble score after each iteration of |
|
boosting and therefore allows monitoring, such as to determine the |
|
score on a test set after each boost. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
y : array-like of shape (n_samples,) |
|
Labels for X. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. |
|
|
|
Yields |
|
------ |
|
z : float |
|
""" |
|
X = self._check_X(X) |
|
|
|
for y_pred in self.staged_predict(X): |
|
if is_classifier(self): |
|
yield accuracy_score(y, y_pred, sample_weight=sample_weight) |
|
else: |
|
yield r2_score(y, y_pred, sample_weight=sample_weight) |
|
|
|
@property |
|
def feature_importances_(self): |
|
"""The impurity-based feature importances. |
|
|
|
The higher, the more important the feature. |
|
The importance of a feature is computed as the (normalized) |
|
total reduction of the criterion brought by that feature. It is also |
|
known as the Gini importance. |
|
|
|
Warning: impurity-based feature importances can be misleading for |
|
high cardinality features (many unique values). See |
|
:func:`sklearn.inspection.permutation_importance` as an alternative. |
|
|
|
Returns |
|
------- |
|
feature_importances_ : ndarray of shape (n_features,) |
|
The feature importances. |
|
""" |
|
if self.estimators_ is None or len(self.estimators_) == 0: |
|
raise ValueError( |
|
"Estimator not fitted, call `fit` before `feature_importances_`." |
|
) |
|
|
|
try: |
|
norm = self.estimator_weights_.sum() |
|
return ( |
|
sum( |
|
weight * clf.feature_importances_ |
|
for weight, clf in zip(self.estimator_weights_, self.estimators_) |
|
) |
|
/ norm |
|
) |
|
|
|
except AttributeError as e: |
|
raise AttributeError( |
|
"Unable to compute feature importances " |
|
"since estimator does not have a " |
|
"feature_importances_ attribute" |
|
) from e |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = True |
|
return tags |
|
|
|
|
|
def _samme_proba(estimator, n_classes, X): |
|
"""Calculate algorithm 4, step 2, equation c) of Zhu et al [1]. |
|
|
|
References |
|
---------- |
|
.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009. |
|
|
|
""" |
|
proba = estimator.predict_proba(X) |
|
|
|
|
|
|
|
|
|
np.clip(proba, np.finfo(proba.dtype).eps, None, out=proba) |
|
log_proba = np.log(proba) |
|
|
|
return (n_classes - 1) * ( |
|
log_proba - (1.0 / n_classes) * log_proba.sum(axis=1)[:, np.newaxis] |
|
) |
|
|
|
|
|
class AdaBoostClassifier( |
|
_RoutingNotSupportedMixin, ClassifierMixin, BaseWeightBoosting |
|
): |
|
"""An AdaBoost classifier. |
|
|
|
An AdaBoost [1]_ classifier is a meta-estimator that begins by fitting a |
|
classifier on the original dataset and then fits additional copies of the |
|
classifier on the same dataset but where the weights of incorrectly |
|
classified instances are adjusted such that subsequent classifiers focus |
|
more on difficult cases. |
|
|
|
This class implements the algorithm based on [2]_. |
|
|
|
Read more in the :ref:`User Guide <adaboost>`. |
|
|
|
.. versionadded:: 0.14 |
|
|
|
Parameters |
|
---------- |
|
estimator : object, default=None |
|
The base estimator from which the boosted ensemble is built. |
|
Support for sample weighting is required, as well as proper |
|
``classes_`` and ``n_classes_`` attributes. If ``None``, then |
|
the base estimator is :class:`~sklearn.tree.DecisionTreeClassifier` |
|
initialized with `max_depth=1`. |
|
|
|
.. versionadded:: 1.2 |
|
`base_estimator` was renamed to `estimator`. |
|
|
|
n_estimators : int, default=50 |
|
The maximum number of estimators at which boosting is terminated. |
|
In case of perfect fit, the learning procedure is stopped early. |
|
Values must be in the range `[1, inf)`. |
|
|
|
learning_rate : float, default=1.0 |
|
Weight applied to each classifier at each boosting iteration. A higher |
|
learning rate increases the contribution of each classifier. There is |
|
a trade-off between the `learning_rate` and `n_estimators` parameters. |
|
Values must be in the range `(0.0, inf)`. |
|
|
|
algorithm : {'SAMME'}, default='SAMME' |
|
Use the SAMME discrete boosting algorithm. |
|
|
|
.. deprecated:: 1.6 |
|
`algorithm` is deprecated and will be removed in version 1.8. This |
|
estimator only implements the 'SAMME' algorithm. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the random seed given at each `estimator` at each |
|
boosting iteration. |
|
Thus, it is only used when `estimator` exposes a `random_state`. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
Attributes |
|
---------- |
|
estimator_ : estimator |
|
The base estimator from which the ensemble is grown. |
|
|
|
.. versionadded:: 1.2 |
|
`base_estimator_` was renamed to `estimator_`. |
|
|
|
estimators_ : list of classifiers |
|
The collection of fitted sub-estimators. |
|
|
|
classes_ : ndarray of shape (n_classes,) |
|
The classes labels. |
|
|
|
n_classes_ : int |
|
The number of classes. |
|
|
|
estimator_weights_ : ndarray of floats |
|
Weights for each estimator in the boosted ensemble. |
|
|
|
estimator_errors_ : ndarray of floats |
|
Classification error for each estimator in the boosted |
|
ensemble. |
|
|
|
feature_importances_ : ndarray of shape (n_features,) |
|
The impurity-based feature importances if supported by the |
|
``estimator`` (when based on decision trees). |
|
|
|
Warning: impurity-based feature importances can be misleading for |
|
high cardinality features (many unique values). See |
|
:func:`sklearn.inspection.permutation_importance` as an alternative. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
AdaBoostRegressor : An AdaBoost regressor that begins by fitting a |
|
regressor on the original dataset and then fits additional copies of |
|
the regressor on the same dataset but where the weights of instances |
|
are adjusted according to the error of the current prediction. |
|
|
|
GradientBoostingClassifier : GB builds an additive model in a forward |
|
stage-wise fashion. Regression trees are fit on the negative gradient |
|
of the binomial or multinomial deviance loss function. Binary |
|
classification is a special case where only a single regression tree is |
|
induced. |
|
|
|
sklearn.tree.DecisionTreeClassifier : A non-parametric supervised learning |
|
method used for classification. |
|
Creates a model that predicts the value of a target variable by |
|
learning simple decision rules inferred from the data features. |
|
|
|
References |
|
---------- |
|
.. [1] Y. Freund, R. Schapire, "A Decision-Theoretic Generalization of |
|
on-Line Learning and an Application to Boosting", 1995. |
|
|
|
.. [2] :doi:`J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class adaboost." |
|
Statistics and its Interface 2.3 (2009): 349-360. |
|
<10.4310/SII.2009.v2.n3.a8>` |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.ensemble import AdaBoostClassifier |
|
>>> from sklearn.datasets import make_classification |
|
>>> X, y = make_classification(n_samples=1000, n_features=4, |
|
... n_informative=2, n_redundant=0, |
|
... random_state=0, shuffle=False) |
|
>>> clf = AdaBoostClassifier(n_estimators=100, random_state=0) |
|
>>> clf.fit(X, y) |
|
AdaBoostClassifier(n_estimators=100, random_state=0) |
|
>>> clf.predict([[0, 0, 0, 0]]) |
|
array([1]) |
|
>>> clf.score(X, y) |
|
0.96... |
|
|
|
For a detailed example of using AdaBoost to fit a sequence of DecisionTrees |
|
as weaklearners, please refer to |
|
:ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py`. |
|
|
|
For a detailed example of using AdaBoost to fit a non-linearly seperable |
|
classification dataset composed of two Gaussian quantiles clusters, please |
|
refer to :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py`. |
|
""" |
|
|
|
|
|
_parameter_constraints: dict = { |
|
**BaseWeightBoosting._parameter_constraints, |
|
"algorithm": [StrOptions({"SAMME"}), Hidden(StrOptions({"deprecated"}))], |
|
} |
|
|
|
def __init__( |
|
self, |
|
estimator=None, |
|
*, |
|
n_estimators=50, |
|
learning_rate=1.0, |
|
algorithm="deprecated", |
|
random_state=None, |
|
): |
|
super().__init__( |
|
estimator=estimator, |
|
n_estimators=n_estimators, |
|
learning_rate=learning_rate, |
|
random_state=random_state, |
|
) |
|
|
|
self.algorithm = algorithm |
|
|
|
def _validate_estimator(self): |
|
"""Check the estimator and set the estimator_ attribute.""" |
|
super()._validate_estimator(default=DecisionTreeClassifier(max_depth=1)) |
|
|
|
if self.algorithm != "deprecated": |
|
warnings.warn( |
|
"The parameter 'algorithm' is deprecated in 1.6 and has no effect. " |
|
"It will be removed in version 1.8.", |
|
FutureWarning, |
|
) |
|
|
|
if not has_fit_parameter(self.estimator_, "sample_weight"): |
|
raise ValueError( |
|
f"{self.estimator.__class__.__name__} doesn't support sample_weight." |
|
) |
|
|
|
def _boost(self, iboost, X, y, sample_weight, random_state): |
|
"""Implement a single boost. |
|
|
|
Perform a single boost according to the discrete SAMME algorithm and return the |
|
updated sample weights. |
|
|
|
Parameters |
|
---------- |
|
iboost : int |
|
The index of the current boost iteration. |
|
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. |
|
|
|
y : array-like of shape (n_samples,) |
|
The target values (class labels). |
|
|
|
sample_weight : array-like of shape (n_samples,) |
|
The current sample weights. |
|
|
|
random_state : RandomState instance |
|
The RandomState instance used if the base estimator accepts a |
|
`random_state` attribute. |
|
|
|
Returns |
|
------- |
|
sample_weight : array-like of shape (n_samples,) or None |
|
The reweighted sample weights. |
|
If None then boosting has terminated early. |
|
|
|
estimator_weight : float |
|
The weight for the current boost. |
|
If None then boosting has terminated early. |
|
|
|
estimator_error : float |
|
The classification error for the current boost. |
|
If None then boosting has terminated early. |
|
""" |
|
estimator = self._make_estimator(random_state=random_state) |
|
|
|
estimator.fit(X, y, sample_weight=sample_weight) |
|
|
|
y_predict = estimator.predict(X) |
|
|
|
if iboost == 0: |
|
self.classes_ = getattr(estimator, "classes_", None) |
|
self.n_classes_ = len(self.classes_) |
|
|
|
|
|
incorrect = y_predict != y |
|
|
|
|
|
estimator_error = np.mean(np.average(incorrect, weights=sample_weight, axis=0)) |
|
|
|
|
|
if estimator_error <= 0: |
|
return sample_weight, 1.0, 0.0 |
|
|
|
n_classes = self.n_classes_ |
|
|
|
|
|
if estimator_error >= 1.0 - (1.0 / n_classes): |
|
self.estimators_.pop(-1) |
|
if len(self.estimators_) == 0: |
|
raise ValueError( |
|
"BaseClassifier in AdaBoostClassifier " |
|
"ensemble is worse than random, ensemble " |
|
"can not be fit." |
|
) |
|
return None, None, None |
|
|
|
|
|
estimator_weight = self.learning_rate * ( |
|
np.log((1.0 - estimator_error) / estimator_error) + np.log(n_classes - 1.0) |
|
) |
|
|
|
|
|
if not iboost == self.n_estimators - 1: |
|
|
|
sample_weight = np.exp( |
|
np.log(sample_weight) |
|
+ estimator_weight * incorrect * (sample_weight > 0) |
|
) |
|
|
|
return sample_weight, estimator_weight, estimator_error |
|
|
|
def predict(self, X): |
|
"""Predict classes for X. |
|
|
|
The predicted class of an input sample is computed as the weighted mean |
|
prediction of the classifiers in the ensemble. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
Returns |
|
------- |
|
y : ndarray of shape (n_samples,) |
|
The predicted classes. |
|
""" |
|
pred = self.decision_function(X) |
|
|
|
if self.n_classes_ == 2: |
|
return self.classes_.take(pred > 0, axis=0) |
|
|
|
return self.classes_.take(np.argmax(pred, axis=1), axis=0) |
|
|
|
def staged_predict(self, X): |
|
"""Return staged predictions for X. |
|
|
|
The predicted class of an input sample is computed as the weighted mean |
|
prediction of the classifiers in the ensemble. |
|
|
|
This generator method yields the ensemble prediction after each |
|
iteration of boosting and therefore allows monitoring, such as to |
|
determine the prediction on a test set after each boost. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
Yields |
|
------ |
|
y : generator of ndarray of shape (n_samples,) |
|
The predicted classes. |
|
""" |
|
X = self._check_X(X) |
|
|
|
n_classes = self.n_classes_ |
|
classes = self.classes_ |
|
|
|
if n_classes == 2: |
|
for pred in self.staged_decision_function(X): |
|
yield np.array(classes.take(pred > 0, axis=0)) |
|
|
|
else: |
|
for pred in self.staged_decision_function(X): |
|
yield np.array(classes.take(np.argmax(pred, axis=1), axis=0)) |
|
|
|
def decision_function(self, X): |
|
"""Compute the decision function of ``X``. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
Returns |
|
------- |
|
score : ndarray of shape of (n_samples, k) |
|
The decision function of the input samples. The order of |
|
outputs is the same as that of the :term:`classes_` attribute. |
|
Binary classification is a special cases with ``k == 1``, |
|
otherwise ``k==n_classes``. For binary classification, |
|
values closer to -1 or 1 mean more like the first or second |
|
class in ``classes_``, respectively. |
|
""" |
|
check_is_fitted(self) |
|
X = self._check_X(X) |
|
|
|
n_classes = self.n_classes_ |
|
classes = self.classes_[:, np.newaxis] |
|
|
|
if n_classes == 1: |
|
return np.zeros_like(X, shape=(X.shape[0], 1)) |
|
|
|
pred = sum( |
|
np.where( |
|
(estimator.predict(X) == classes).T, |
|
w, |
|
-1 / (n_classes - 1) * w, |
|
) |
|
for estimator, w in zip(self.estimators_, self.estimator_weights_) |
|
) |
|
|
|
pred /= self.estimator_weights_.sum() |
|
if n_classes == 2: |
|
pred[:, 0] *= -1 |
|
return pred.sum(axis=1) |
|
return pred |
|
|
|
def staged_decision_function(self, X): |
|
"""Compute decision function of ``X`` for each boosting iteration. |
|
|
|
This method allows monitoring (i.e. determine error on testing set) |
|
after each boosting iteration. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
Yields |
|
------ |
|
score : generator of ndarray of shape (n_samples, k) |
|
The decision function of the input samples. The order of |
|
outputs is the same of that of the :term:`classes_` attribute. |
|
Binary classification is a special cases with ``k == 1``, |
|
otherwise ``k==n_classes``. For binary classification, |
|
values closer to -1 or 1 mean more like the first or second |
|
class in ``classes_``, respectively. |
|
""" |
|
check_is_fitted(self) |
|
X = self._check_X(X) |
|
|
|
n_classes = self.n_classes_ |
|
classes = self.classes_[:, np.newaxis] |
|
pred = None |
|
norm = 0.0 |
|
|
|
for weight, estimator in zip(self.estimator_weights_, self.estimators_): |
|
norm += weight |
|
|
|
current_pred = np.where( |
|
(estimator.predict(X) == classes).T, |
|
weight, |
|
-1 / (n_classes - 1) * weight, |
|
) |
|
|
|
if pred is None: |
|
pred = current_pred |
|
else: |
|
pred += current_pred |
|
|
|
if n_classes == 2: |
|
tmp_pred = np.copy(pred) |
|
tmp_pred[:, 0] *= -1 |
|
yield (tmp_pred / norm).sum(axis=1) |
|
else: |
|
yield pred / norm |
|
|
|
@staticmethod |
|
def _compute_proba_from_decision(decision, n_classes): |
|
"""Compute probabilities from the decision function. |
|
|
|
This is based eq. (15) of [1] where: |
|
p(y=c|X) = exp((1 / K-1) f_c(X)) / sum_k(exp((1 / K-1) f_k(X))) |
|
= softmax((1 / K-1) * f(X)) |
|
|
|
References |
|
---------- |
|
.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", |
|
2009. |
|
""" |
|
if n_classes == 2: |
|
decision = np.vstack([-decision, decision]).T / 2 |
|
else: |
|
decision /= n_classes - 1 |
|
return softmax(decision, copy=False) |
|
|
|
def predict_proba(self, X): |
|
"""Predict class probabilities for X. |
|
|
|
The predicted class probabilities of an input sample is computed as |
|
the weighted mean predicted class probabilities of the classifiers |
|
in the ensemble. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
Returns |
|
------- |
|
p : ndarray of shape (n_samples, n_classes) |
|
The class probabilities of the input samples. The order of |
|
outputs is the same of that of the :term:`classes_` attribute. |
|
""" |
|
check_is_fitted(self) |
|
n_classes = self.n_classes_ |
|
|
|
if n_classes == 1: |
|
return np.ones((_num_samples(X), 1)) |
|
|
|
decision = self.decision_function(X) |
|
return self._compute_proba_from_decision(decision, n_classes) |
|
|
|
def staged_predict_proba(self, X): |
|
"""Predict class probabilities for X. |
|
|
|
The predicted class probabilities of an input sample is computed as |
|
the weighted mean predicted class probabilities of the classifiers |
|
in the ensemble. |
|
|
|
This generator method yields the ensemble predicted class probabilities |
|
after each iteration of boosting and therefore allows monitoring, such |
|
as to determine the predicted class probabilities on a test set after |
|
each boost. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
Yields |
|
------ |
|
p : generator of ndarray of shape (n_samples,) |
|
The class probabilities of the input samples. The order of |
|
outputs is the same of that of the :term:`classes_` attribute. |
|
""" |
|
|
|
n_classes = self.n_classes_ |
|
|
|
for decision in self.staged_decision_function(X): |
|
yield self._compute_proba_from_decision(decision, n_classes) |
|
|
|
def predict_log_proba(self, X): |
|
"""Predict class log-probabilities for X. |
|
|
|
The predicted class log-probabilities of an input sample is computed as |
|
the weighted mean predicted class log-probabilities of the classifiers |
|
in the ensemble. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
Returns |
|
------- |
|
p : ndarray of shape (n_samples, n_classes) |
|
The class probabilities of the input samples. The order of |
|
outputs is the same of that of the :term:`classes_` attribute. |
|
""" |
|
return np.log(self.predict_proba(X)) |
|
|
|
|
|
class AdaBoostRegressor(_RoutingNotSupportedMixin, RegressorMixin, BaseWeightBoosting): |
|
"""An AdaBoost regressor. |
|
|
|
An AdaBoost [1] regressor is a meta-estimator that begins by fitting a |
|
regressor on the original dataset and then fits additional copies of the |
|
regressor on the same dataset but where the weights of instances are |
|
adjusted according to the error of the current prediction. As such, |
|
subsequent regressors focus more on difficult cases. |
|
|
|
This class implements the algorithm known as AdaBoost.R2 [2]. |
|
|
|
Read more in the :ref:`User Guide <adaboost>`. |
|
|
|
.. versionadded:: 0.14 |
|
|
|
Parameters |
|
---------- |
|
estimator : object, default=None |
|
The base estimator from which the boosted ensemble is built. |
|
If ``None``, then the base estimator is |
|
:class:`~sklearn.tree.DecisionTreeRegressor` initialized with |
|
`max_depth=3`. |
|
|
|
.. versionadded:: 1.2 |
|
`base_estimator` was renamed to `estimator`. |
|
|
|
n_estimators : int, default=50 |
|
The maximum number of estimators at which boosting is terminated. |
|
In case of perfect fit, the learning procedure is stopped early. |
|
Values must be in the range `[1, inf)`. |
|
|
|
learning_rate : float, default=1.0 |
|
Weight applied to each regressor at each boosting iteration. A higher |
|
learning rate increases the contribution of each regressor. There is |
|
a trade-off between the `learning_rate` and `n_estimators` parameters. |
|
Values must be in the range `(0.0, inf)`. |
|
|
|
loss : {'linear', 'square', 'exponential'}, default='linear' |
|
The loss function to use when updating the weights after each |
|
boosting iteration. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the random seed given at each `estimator` at each |
|
boosting iteration. |
|
Thus, it is only used when `estimator` exposes a `random_state`. |
|
In addition, it controls the bootstrap of the weights used to train the |
|
`estimator` at each boosting iteration. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
Attributes |
|
---------- |
|
estimator_ : estimator |
|
The base estimator from which the ensemble is grown. |
|
|
|
.. versionadded:: 1.2 |
|
`base_estimator_` was renamed to `estimator_`. |
|
|
|
estimators_ : list of regressors |
|
The collection of fitted sub-estimators. |
|
|
|
estimator_weights_ : ndarray of floats |
|
Weights for each estimator in the boosted ensemble. |
|
|
|
estimator_errors_ : ndarray of floats |
|
Regression error for each estimator in the boosted ensemble. |
|
|
|
feature_importances_ : ndarray of shape (n_features,) |
|
The impurity-based feature importances if supported by the |
|
``estimator`` (when based on decision trees). |
|
|
|
Warning: impurity-based feature importances can be misleading for |
|
high cardinality features (many unique values). See |
|
:func:`sklearn.inspection.permutation_importance` as an alternative. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
AdaBoostClassifier : An AdaBoost classifier. |
|
GradientBoostingRegressor : Gradient Boosting Classification Tree. |
|
sklearn.tree.DecisionTreeRegressor : A decision tree regressor. |
|
|
|
References |
|
---------- |
|
.. [1] Y. Freund, R. Schapire, "A Decision-Theoretic Generalization of |
|
on-Line Learning and an Application to Boosting", 1995. |
|
|
|
.. [2] H. Drucker, "Improving Regressors using Boosting Techniques", 1997. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.ensemble import AdaBoostRegressor |
|
>>> from sklearn.datasets import make_regression |
|
>>> X, y = make_regression(n_features=4, n_informative=2, |
|
... random_state=0, shuffle=False) |
|
>>> regr = AdaBoostRegressor(random_state=0, n_estimators=100) |
|
>>> regr.fit(X, y) |
|
AdaBoostRegressor(n_estimators=100, random_state=0) |
|
>>> regr.predict([[0, 0, 0, 0]]) |
|
array([4.7972...]) |
|
>>> regr.score(X, y) |
|
0.9771... |
|
|
|
For a detailed example of utilizing :class:`~sklearn.ensemble.AdaBoostRegressor` |
|
to fit a sequence of decision trees as weak learners, please refer to |
|
:ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py`. |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
**BaseWeightBoosting._parameter_constraints, |
|
"loss": [StrOptions({"linear", "square", "exponential"})], |
|
} |
|
|
|
def __init__( |
|
self, |
|
estimator=None, |
|
*, |
|
n_estimators=50, |
|
learning_rate=1.0, |
|
loss="linear", |
|
random_state=None, |
|
): |
|
super().__init__( |
|
estimator=estimator, |
|
n_estimators=n_estimators, |
|
learning_rate=learning_rate, |
|
random_state=random_state, |
|
) |
|
|
|
self.loss = loss |
|
self.random_state = random_state |
|
|
|
def _validate_estimator(self): |
|
"""Check the estimator and set the estimator_ attribute.""" |
|
super()._validate_estimator(default=DecisionTreeRegressor(max_depth=3)) |
|
|
|
def _boost(self, iboost, X, y, sample_weight, random_state): |
|
"""Implement a single boost for regression |
|
|
|
Perform a single boost according to the AdaBoost.R2 algorithm and |
|
return the updated sample weights. |
|
|
|
Parameters |
|
---------- |
|
iboost : int |
|
The index of the current boost iteration. |
|
|
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. |
|
|
|
y : array-like of shape (n_samples,) |
|
The target values (class labels in classification, real numbers in |
|
regression). |
|
|
|
sample_weight : array-like of shape (n_samples,) |
|
The current sample weights. |
|
|
|
random_state : RandomState |
|
The RandomState instance used if the base estimator accepts a |
|
`random_state` attribute. |
|
Controls also the bootstrap of the weights used to train the weak |
|
learner. |
|
|
|
Returns |
|
------- |
|
sample_weight : array-like of shape (n_samples,) or None |
|
The reweighted sample weights. |
|
If None then boosting has terminated early. |
|
|
|
estimator_weight : float |
|
The weight for the current boost. |
|
If None then boosting has terminated early. |
|
|
|
estimator_error : float |
|
The regression error for the current boost. |
|
If None then boosting has terminated early. |
|
""" |
|
estimator = self._make_estimator(random_state=random_state) |
|
|
|
|
|
bootstrap_idx = random_state.choice( |
|
np.arange(_num_samples(X)), |
|
size=_num_samples(X), |
|
replace=True, |
|
p=sample_weight, |
|
) |
|
|
|
|
|
|
|
X_ = _safe_indexing(X, bootstrap_idx) |
|
y_ = _safe_indexing(y, bootstrap_idx) |
|
estimator.fit(X_, y_) |
|
y_predict = estimator.predict(X) |
|
|
|
error_vect = np.abs(y_predict - y) |
|
sample_mask = sample_weight > 0 |
|
masked_sample_weight = sample_weight[sample_mask] |
|
masked_error_vector = error_vect[sample_mask] |
|
|
|
error_max = masked_error_vector.max() |
|
if error_max != 0: |
|
masked_error_vector /= error_max |
|
|
|
if self.loss == "square": |
|
masked_error_vector **= 2 |
|
elif self.loss == "exponential": |
|
masked_error_vector = 1.0 - np.exp(-masked_error_vector) |
|
|
|
|
|
estimator_error = (masked_sample_weight * masked_error_vector).sum() |
|
|
|
if estimator_error <= 0: |
|
|
|
return sample_weight, 1.0, 0.0 |
|
|
|
elif estimator_error >= 0.5: |
|
|
|
if len(self.estimators_) > 1: |
|
self.estimators_.pop(-1) |
|
return None, None, None |
|
|
|
beta = estimator_error / (1.0 - estimator_error) |
|
|
|
|
|
estimator_weight = self.learning_rate * np.log(1.0 / beta) |
|
|
|
if not iboost == self.n_estimators - 1: |
|
sample_weight[sample_mask] *= np.power( |
|
beta, (1.0 - masked_error_vector) * self.learning_rate |
|
) |
|
|
|
return sample_weight, estimator_weight, estimator_error |
|
|
|
def _get_median_predict(self, X, limit): |
|
|
|
predictions = np.array([est.predict(X) for est in self.estimators_[:limit]]).T |
|
|
|
|
|
sorted_idx = np.argsort(predictions, axis=1) |
|
|
|
|
|
weight_cdf = stable_cumsum(self.estimator_weights_[sorted_idx], axis=1) |
|
median_or_above = weight_cdf >= 0.5 * weight_cdf[:, -1][:, np.newaxis] |
|
median_idx = median_or_above.argmax(axis=1) |
|
|
|
median_estimators = sorted_idx[np.arange(_num_samples(X)), median_idx] |
|
|
|
|
|
return predictions[np.arange(_num_samples(X)), median_estimators] |
|
|
|
def predict(self, X): |
|
"""Predict regression value for X. |
|
|
|
The predicted regression value of an input sample is computed |
|
as the weighted median prediction of the regressors in the ensemble. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrix can be CSC, CSR, COO, |
|
DOK, or LIL. COO, DOK, and LIL are converted to CSR. |
|
|
|
Returns |
|
------- |
|
y : ndarray of shape (n_samples,) |
|
The predicted regression values. |
|
""" |
|
check_is_fitted(self) |
|
X = self._check_X(X) |
|
|
|
return self._get_median_predict(X, len(self.estimators_)) |
|
|
|
def staged_predict(self, X): |
|
"""Return staged predictions for X. |
|
|
|
The predicted regression value of an input sample is computed |
|
as the weighted median prediction of the regressors in the ensemble. |
|
|
|
This generator method yields the ensemble prediction after each |
|
iteration of boosting and therefore allows monitoring, such as to |
|
determine the prediction on a test set after each boost. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. |
|
|
|
Yields |
|
------ |
|
y : generator of ndarray of shape (n_samples,) |
|
The predicted regression values. |
|
""" |
|
check_is_fitted(self) |
|
X = self._check_X(X) |
|
|
|
for i, _ in enumerate(self.estimators_, 1): |
|
yield self._get_median_predict(X, limit=i) |
|
|