|
"""Bagging meta-estimator.""" |
|
|
|
|
|
|
|
|
|
|
|
import itertools |
|
import numbers |
|
from abc import ABCMeta, abstractmethod |
|
from functools import partial |
|
from numbers import Integral |
|
from warnings import warn |
|
|
|
import numpy as np |
|
|
|
from ..base import ClassifierMixin, RegressorMixin, _fit_context |
|
from ..metrics import accuracy_score, r2_score |
|
from ..tree import DecisionTreeClassifier, DecisionTreeRegressor |
|
from ..utils import ( |
|
Bunch, |
|
_safe_indexing, |
|
check_random_state, |
|
column_or_1d, |
|
) |
|
from ..utils._mask import indices_to_mask |
|
from ..utils._param_validation import HasMethods, Interval, RealNotInt |
|
from ..utils._tags import get_tags |
|
from ..utils.metadata_routing import ( |
|
MetadataRouter, |
|
MethodMapping, |
|
_raise_for_params, |
|
_routing_enabled, |
|
get_routing_for_object, |
|
process_routing, |
|
) |
|
from ..utils.metaestimators import available_if |
|
from ..utils.multiclass import check_classification_targets |
|
from ..utils.parallel import Parallel, delayed |
|
from ..utils.random import sample_without_replacement |
|
from ..utils.validation import ( |
|
_check_method_params, |
|
_check_sample_weight, |
|
_deprecate_positional_args, |
|
_estimator_has, |
|
check_is_fitted, |
|
has_fit_parameter, |
|
validate_data, |
|
) |
|
from ._base import BaseEnsemble, _partition_estimators |
|
|
|
__all__ = ["BaggingClassifier", "BaggingRegressor"] |
|
|
|
MAX_INT = np.iinfo(np.int32).max |
|
|
|
|
|
def _generate_indices(random_state, bootstrap, n_population, n_samples): |
|
"""Draw randomly sampled indices.""" |
|
|
|
if bootstrap: |
|
indices = random_state.randint(0, n_population, n_samples) |
|
else: |
|
indices = sample_without_replacement( |
|
n_population, n_samples, random_state=random_state |
|
) |
|
|
|
return indices |
|
|
|
|
|
def _generate_bagging_indices( |
|
random_state, |
|
bootstrap_features, |
|
bootstrap_samples, |
|
n_features, |
|
n_samples, |
|
max_features, |
|
max_samples, |
|
): |
|
"""Randomly draw feature and sample indices.""" |
|
|
|
random_state = check_random_state(random_state) |
|
|
|
|
|
feature_indices = _generate_indices( |
|
random_state, bootstrap_features, n_features, max_features |
|
) |
|
sample_indices = _generate_indices( |
|
random_state, bootstrap_samples, n_samples, max_samples |
|
) |
|
|
|
return feature_indices, sample_indices |
|
|
|
|
|
def _parallel_build_estimators( |
|
n_estimators, |
|
ensemble, |
|
X, |
|
y, |
|
seeds, |
|
total_n_estimators, |
|
verbose, |
|
check_input, |
|
fit_params, |
|
): |
|
"""Private function used to build a batch of estimators within a job.""" |
|
|
|
n_samples, n_features = X.shape |
|
max_features = ensemble._max_features |
|
max_samples = ensemble._max_samples |
|
bootstrap = ensemble.bootstrap |
|
bootstrap_features = ensemble.bootstrap_features |
|
has_check_input = has_fit_parameter(ensemble.estimator_, "check_input") |
|
requires_feature_indexing = bootstrap_features or max_features != n_features |
|
|
|
|
|
estimators = [] |
|
estimators_features = [] |
|
|
|
|
|
|
|
support_sample_weight = has_fit_parameter(ensemble.estimator_, "sample_weight") |
|
if not _routing_enabled() and ( |
|
not support_sample_weight and fit_params.get("sample_weight") is not None |
|
): |
|
raise ValueError( |
|
"The base estimator doesn't support sample weight, but sample_weight is " |
|
"passed to the fit method." |
|
) |
|
|
|
for i in range(n_estimators): |
|
if verbose > 1: |
|
print( |
|
"Building estimator %d of %d for this parallel run (total %d)..." |
|
% (i + 1, n_estimators, total_n_estimators) |
|
) |
|
|
|
random_state = seeds[i] |
|
estimator = ensemble._make_estimator(append=False, random_state=random_state) |
|
|
|
if has_check_input: |
|
estimator_fit = partial(estimator.fit, check_input=check_input) |
|
else: |
|
estimator_fit = estimator.fit |
|
|
|
|
|
features, indices = _generate_bagging_indices( |
|
random_state, |
|
bootstrap_features, |
|
bootstrap, |
|
n_features, |
|
n_samples, |
|
max_features, |
|
max_samples, |
|
) |
|
|
|
fit_params_ = fit_params.copy() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if _routing_enabled(): |
|
request_or_router = get_routing_for_object(ensemble.estimator_) |
|
consumes_sample_weight = request_or_router.consumes( |
|
"fit", ("sample_weight",) |
|
) |
|
else: |
|
consumes_sample_weight = support_sample_weight |
|
if consumes_sample_weight: |
|
|
|
curr_sample_weight = _check_sample_weight( |
|
fit_params_.pop("sample_weight", None), X |
|
).copy() |
|
|
|
if bootstrap: |
|
sample_counts = np.bincount(indices, minlength=n_samples) |
|
curr_sample_weight *= sample_counts |
|
else: |
|
not_indices_mask = ~indices_to_mask(indices, n_samples) |
|
curr_sample_weight[not_indices_mask] = 0 |
|
|
|
fit_params_["sample_weight"] = curr_sample_weight |
|
X_ = X[:, features] if requires_feature_indexing else X |
|
estimator_fit(X_, y, **fit_params_) |
|
else: |
|
|
|
y_ = _safe_indexing(y, indices) |
|
X_ = _safe_indexing(X, indices) |
|
fit_params_ = _check_method_params(X, params=fit_params_, indices=indices) |
|
if requires_feature_indexing: |
|
X_ = X_[:, features] |
|
estimator_fit(X_, y_, **fit_params_) |
|
|
|
estimators.append(estimator) |
|
estimators_features.append(features) |
|
|
|
return estimators, estimators_features |
|
|
|
|
|
def _parallel_predict_proba(estimators, estimators_features, X, n_classes): |
|
"""Private function used to compute (proba-)predictions within a job.""" |
|
n_samples = X.shape[0] |
|
proba = np.zeros((n_samples, n_classes)) |
|
|
|
for estimator, features in zip(estimators, estimators_features): |
|
if hasattr(estimator, "predict_proba"): |
|
proba_estimator = estimator.predict_proba(X[:, features]) |
|
|
|
if n_classes == len(estimator.classes_): |
|
proba += proba_estimator |
|
|
|
else: |
|
proba[:, estimator.classes_] += proba_estimator[ |
|
:, range(len(estimator.classes_)) |
|
] |
|
|
|
else: |
|
|
|
predictions = estimator.predict(X[:, features]) |
|
|
|
for i in range(n_samples): |
|
proba[i, predictions[i]] += 1 |
|
|
|
return proba |
|
|
|
|
|
def _parallel_predict_log_proba(estimators, estimators_features, X, n_classes): |
|
"""Private function used to compute log probabilities within a job.""" |
|
n_samples = X.shape[0] |
|
log_proba = np.empty((n_samples, n_classes)) |
|
log_proba.fill(-np.inf) |
|
all_classes = np.arange(n_classes, dtype=int) |
|
|
|
for estimator, features in zip(estimators, estimators_features): |
|
log_proba_estimator = estimator.predict_log_proba(X[:, features]) |
|
|
|
if n_classes == len(estimator.classes_): |
|
log_proba = np.logaddexp(log_proba, log_proba_estimator) |
|
|
|
else: |
|
log_proba[:, estimator.classes_] = np.logaddexp( |
|
log_proba[:, estimator.classes_], |
|
log_proba_estimator[:, range(len(estimator.classes_))], |
|
) |
|
|
|
missing = np.setdiff1d(all_classes, estimator.classes_) |
|
log_proba[:, missing] = np.logaddexp(log_proba[:, missing], -np.inf) |
|
|
|
return log_proba |
|
|
|
|
|
def _parallel_decision_function(estimators, estimators_features, X): |
|
"""Private function used to compute decisions within a job.""" |
|
return sum( |
|
estimator.decision_function(X[:, features]) |
|
for estimator, features in zip(estimators, estimators_features) |
|
) |
|
|
|
|
|
def _parallel_predict_regression(estimators, estimators_features, X): |
|
"""Private function used to compute predictions within a job.""" |
|
return sum( |
|
estimator.predict(X[:, features]) |
|
for estimator, features in zip(estimators, estimators_features) |
|
) |
|
|
|
|
|
class BaseBagging(BaseEnsemble, metaclass=ABCMeta): |
|
"""Base class for Bagging meta-estimator. |
|
|
|
Warning: This class should not be used directly. Use derived classes |
|
instead. |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"estimator": [HasMethods(["fit", "predict"]), None], |
|
"n_estimators": [Interval(Integral, 1, None, closed="left")], |
|
"max_samples": [ |
|
Interval(Integral, 1, None, closed="left"), |
|
Interval(RealNotInt, 0, 1, closed="right"), |
|
], |
|
"max_features": [ |
|
Interval(Integral, 1, None, closed="left"), |
|
Interval(RealNotInt, 0, 1, closed="right"), |
|
], |
|
"bootstrap": ["boolean"], |
|
"bootstrap_features": ["boolean"], |
|
"oob_score": ["boolean"], |
|
"warm_start": ["boolean"], |
|
"n_jobs": [None, Integral], |
|
"random_state": ["random_state"], |
|
"verbose": ["verbose"], |
|
} |
|
|
|
@abstractmethod |
|
def __init__( |
|
self, |
|
estimator=None, |
|
n_estimators=10, |
|
*, |
|
max_samples=1.0, |
|
max_features=1.0, |
|
bootstrap=True, |
|
bootstrap_features=False, |
|
oob_score=False, |
|
warm_start=False, |
|
n_jobs=None, |
|
random_state=None, |
|
verbose=0, |
|
): |
|
super().__init__( |
|
estimator=estimator, |
|
n_estimators=n_estimators, |
|
) |
|
self.max_samples = max_samples |
|
self.max_features = max_features |
|
self.bootstrap = bootstrap |
|
self.bootstrap_features = bootstrap_features |
|
self.oob_score = oob_score |
|
self.warm_start = warm_start |
|
self.n_jobs = n_jobs |
|
self.random_state = random_state |
|
self.verbose = verbose |
|
|
|
|
|
|
|
|
|
@_deprecate_positional_args(version="1.7") |
|
@_fit_context( |
|
|
|
prefer_skip_nested_validation=False |
|
) |
|
def fit(self, X, y, *, sample_weight=None, **fit_params): |
|
"""Build a Bagging ensemble of estimators from the training set (X, y). |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrices are accepted only if |
|
they are supported by the base estimator. |
|
|
|
y : array-like of shape (n_samples,) |
|
The target values (class labels in classification, real numbers in |
|
regression). |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Sample weights. If None, then samples are equally weighted. |
|
Note that this is supported only if the base estimator supports |
|
sample weighting. |
|
|
|
**fit_params : dict |
|
Parameters to pass to the underlying estimators. |
|
|
|
.. versionadded:: 1.5 |
|
|
|
Only available if `enable_metadata_routing=True`, |
|
which can be set by using |
|
``sklearn.set_config(enable_metadata_routing=True)``. |
|
See :ref:`Metadata Routing User Guide <metadata_routing>` for |
|
more details. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Fitted estimator. |
|
""" |
|
_raise_for_params(fit_params, self, "fit") |
|
|
|
|
|
X, y = validate_data( |
|
self, |
|
X, |
|
y, |
|
accept_sparse=["csr", "csc"], |
|
dtype=None, |
|
ensure_all_finite=False, |
|
multi_output=True, |
|
) |
|
|
|
if sample_weight is not None: |
|
sample_weight = _check_sample_weight(sample_weight, X, dtype=None) |
|
fit_params["sample_weight"] = sample_weight |
|
|
|
return self._fit(X, y, max_samples=self.max_samples, **fit_params) |
|
|
|
def _parallel_args(self): |
|
return {} |
|
|
|
def _fit( |
|
self, |
|
X, |
|
y, |
|
max_samples=None, |
|
max_depth=None, |
|
check_input=True, |
|
**fit_params, |
|
): |
|
"""Build a Bagging ensemble of estimators from the training |
|
set (X, y). |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrices are accepted only if |
|
they are supported by the base estimator. |
|
|
|
y : array-like of shape (n_samples,) |
|
The target values (class labels in classification, real numbers in |
|
regression). |
|
|
|
max_samples : int or float, default=None |
|
Argument to use instead of self.max_samples. |
|
|
|
max_depth : int, default=None |
|
Override value used when constructing base estimator. Only |
|
supported if the base estimator has a max_depth parameter. |
|
|
|
check_input : bool, default=True |
|
Override value used when fitting base estimator. Only supported |
|
if the base estimator has a check_input parameter for fit function. |
|
If the meta-estimator already checks the input, set this value to |
|
False to prevent redundant input validation. |
|
|
|
**fit_params : dict, default=None |
|
Parameters to pass to the :term:`fit` method of the underlying |
|
estimator. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Fitted estimator. |
|
""" |
|
random_state = check_random_state(self.random_state) |
|
|
|
|
|
n_samples = X.shape[0] |
|
self._n_samples = n_samples |
|
y = self._validate_y(y) |
|
|
|
|
|
self._validate_estimator(self._get_estimator()) |
|
|
|
if _routing_enabled(): |
|
routed_params = process_routing(self, "fit", **fit_params) |
|
else: |
|
routed_params = Bunch() |
|
routed_params.estimator = Bunch(fit=fit_params) |
|
if "sample_weight" in fit_params: |
|
routed_params.estimator.fit["sample_weight"] = fit_params[ |
|
"sample_weight" |
|
] |
|
|
|
if max_depth is not None: |
|
self.estimator_.max_depth = max_depth |
|
|
|
|
|
if max_samples is None: |
|
max_samples = self.max_samples |
|
elif not isinstance(max_samples, numbers.Integral): |
|
max_samples = int(max_samples * X.shape[0]) |
|
|
|
if max_samples > X.shape[0]: |
|
raise ValueError("max_samples must be <= n_samples") |
|
|
|
|
|
self._max_samples = max_samples |
|
|
|
|
|
if isinstance(self.max_features, numbers.Integral): |
|
max_features = self.max_features |
|
elif isinstance(self.max_features, float): |
|
max_features = int(self.max_features * self.n_features_in_) |
|
|
|
if max_features > self.n_features_in_: |
|
raise ValueError("max_features must be <= n_features") |
|
|
|
max_features = max(1, int(max_features)) |
|
|
|
|
|
self._max_features = max_features |
|
|
|
|
|
if not self.bootstrap and self.oob_score: |
|
raise ValueError("Out of bag estimation only available if bootstrap=True") |
|
|
|
if self.warm_start and self.oob_score: |
|
raise ValueError("Out of bag estimate only available if warm_start=False") |
|
|
|
if hasattr(self, "oob_score_") and self.warm_start: |
|
del self.oob_score_ |
|
|
|
if not self.warm_start or not hasattr(self, "estimators_"): |
|
|
|
self.estimators_ = [] |
|
self.estimators_features_ = [] |
|
|
|
n_more_estimators = self.n_estimators - len(self.estimators_) |
|
|
|
if n_more_estimators < 0: |
|
raise ValueError( |
|
"n_estimators=%d must be larger or equal to " |
|
"len(estimators_)=%d when warm_start==True" |
|
% (self.n_estimators, len(self.estimators_)) |
|
) |
|
|
|
elif n_more_estimators == 0: |
|
warn( |
|
"Warm-start fitting without increasing n_estimators does not " |
|
"fit new trees." |
|
) |
|
return self |
|
|
|
|
|
n_jobs, n_estimators, starts = _partition_estimators( |
|
n_more_estimators, self.n_jobs |
|
) |
|
total_n_estimators = sum(n_estimators) |
|
|
|
|
|
|
|
if self.warm_start and len(self.estimators_) > 0: |
|
random_state.randint(MAX_INT, size=len(self.estimators_)) |
|
|
|
seeds = random_state.randint(MAX_INT, size=n_more_estimators) |
|
self._seeds = seeds |
|
|
|
all_results = Parallel( |
|
n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args() |
|
)( |
|
delayed(_parallel_build_estimators)( |
|
n_estimators[i], |
|
self, |
|
X, |
|
y, |
|
seeds[starts[i] : starts[i + 1]], |
|
total_n_estimators, |
|
verbose=self.verbose, |
|
check_input=check_input, |
|
fit_params=routed_params.estimator.fit, |
|
) |
|
for i in range(n_jobs) |
|
) |
|
|
|
|
|
self.estimators_ += list( |
|
itertools.chain.from_iterable(t[0] for t in all_results) |
|
) |
|
self.estimators_features_ += list( |
|
itertools.chain.from_iterable(t[1] for t in all_results) |
|
) |
|
|
|
if self.oob_score: |
|
self._set_oob_score(X, y) |
|
|
|
return self |
|
|
|
@abstractmethod |
|
def _set_oob_score(self, X, y): |
|
"""Calculate out of bag predictions and score.""" |
|
|
|
def _validate_y(self, y): |
|
if len(y.shape) == 1 or y.shape[1] == 1: |
|
return column_or_1d(y, warn=True) |
|
return y |
|
|
|
def _get_estimators_indices(self): |
|
|
|
for seed in self._seeds: |
|
|
|
|
|
feature_indices, sample_indices = _generate_bagging_indices( |
|
seed, |
|
self.bootstrap_features, |
|
self.bootstrap, |
|
self.n_features_in_, |
|
self._n_samples, |
|
self._max_features, |
|
self._max_samples, |
|
) |
|
|
|
yield feature_indices, sample_indices |
|
|
|
@property |
|
def estimators_samples_(self): |
|
""" |
|
The subset of drawn samples for each base estimator. |
|
|
|
Returns a dynamically generated list of indices identifying |
|
the samples used for fitting each member of the ensemble, i.e., |
|
the in-bag samples. |
|
|
|
Note: the list is re-created at each call to the property in order |
|
to reduce the object memory footprint by not storing the sampling |
|
data. Thus fetching the property may be slower than expected. |
|
""" |
|
return [sample_indices for _, sample_indices in self._get_estimators_indices()] |
|
|
|
def get_metadata_routing(self): |
|
"""Get metadata routing of this object. |
|
|
|
Please check :ref:`User Guide <metadata_routing>` on how the routing |
|
mechanism works. |
|
|
|
.. versionadded:: 1.5 |
|
|
|
Returns |
|
------- |
|
routing : MetadataRouter |
|
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating |
|
routing information. |
|
""" |
|
router = MetadataRouter(owner=self.__class__.__name__) |
|
router.add( |
|
estimator=self._get_estimator(), |
|
method_mapping=MethodMapping().add(callee="fit", caller="fit"), |
|
) |
|
return router |
|
|
|
@abstractmethod |
|
def _get_estimator(self): |
|
"""Resolve which estimator to return.""" |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = get_tags(self._get_estimator()).input_tags.sparse |
|
tags.input_tags.allow_nan = get_tags(self._get_estimator()).input_tags.allow_nan |
|
return tags |
|
|
|
|
|
class BaggingClassifier(ClassifierMixin, BaseBagging): |
|
"""A Bagging classifier. |
|
|
|
A Bagging classifier is an ensemble meta-estimator that fits base |
|
classifiers each on random subsets of the original dataset and then |
|
aggregate their individual predictions (either by voting or by averaging) |
|
to form a final prediction. Such a meta-estimator can typically be used as |
|
a way to reduce the variance of a black-box estimator (e.g., a decision |
|
tree), by introducing randomization into its construction procedure and |
|
then making an ensemble out of it. |
|
|
|
This algorithm encompasses several works from the literature. When random |
|
subsets of the dataset are drawn as random subsets of the samples, then |
|
this algorithm is known as Pasting [1]_. If samples are drawn with |
|
replacement, then the method is known as Bagging [2]_. When random subsets |
|
of the dataset are drawn as random subsets of the features, then the method |
|
is known as Random Subspaces [3]_. Finally, when base estimators are built |
|
on subsets of both samples and features, then the method is known as |
|
Random Patches [4]_. |
|
|
|
Read more in the :ref:`User Guide <bagging>`. |
|
|
|
.. versionadded:: 0.15 |
|
|
|
Parameters |
|
---------- |
|
estimator : object, default=None |
|
The base estimator to fit on random subsets of the dataset. |
|
If None, then the base estimator is a |
|
:class:`~sklearn.tree.DecisionTreeClassifier`. |
|
|
|
.. versionadded:: 1.2 |
|
`base_estimator` was renamed to `estimator`. |
|
|
|
n_estimators : int, default=10 |
|
The number of base estimators in the ensemble. |
|
|
|
max_samples : int or float, default=1.0 |
|
The number of samples to draw from X to train each base estimator (with |
|
replacement by default, see `bootstrap` for more details). |
|
|
|
- If int, then draw `max_samples` samples. |
|
- If float, then draw `max_samples * X.shape[0]` samples. |
|
|
|
max_features : int or float, default=1.0 |
|
The number of features to draw from X to train each base estimator ( |
|
without replacement by default, see `bootstrap_features` for more |
|
details). |
|
|
|
- If int, then draw `max_features` features. |
|
- If float, then draw `max(1, int(max_features * n_features_in_))` features. |
|
|
|
bootstrap : bool, default=True |
|
Whether samples are drawn with replacement. If False, sampling |
|
without replacement is performed. |
|
|
|
bootstrap_features : bool, default=False |
|
Whether features are drawn with replacement. |
|
|
|
oob_score : bool, default=False |
|
Whether to use out-of-bag samples to estimate |
|
the generalization error. Only available if bootstrap=True. |
|
|
|
warm_start : bool, default=False |
|
When set to True, reuse the solution of the previous call to fit |
|
and add more estimators to the ensemble, otherwise, just fit |
|
a whole new ensemble. See :term:`the Glossary <warm_start>`. |
|
|
|
.. versionadded:: 0.17 |
|
*warm_start* constructor parameter. |
|
|
|
n_jobs : int, default=None |
|
The number of jobs to run in parallel for both :meth:`fit` and |
|
:meth:`predict`. ``None`` means 1 unless in a |
|
:obj:`joblib.parallel_backend` context. ``-1`` means using all |
|
processors. See :term:`Glossary <n_jobs>` for more details. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the random resampling of the original dataset |
|
(sample wise and feature wise). |
|
If the base estimator accepts a `random_state` attribute, a different |
|
seed is generated for each instance in the ensemble. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
verbose : int, default=0 |
|
Controls the verbosity when fitting and predicting. |
|
|
|
Attributes |
|
---------- |
|
estimator_ : estimator |
|
The base estimator from which the ensemble is grown. |
|
|
|
.. versionadded:: 1.2 |
|
`base_estimator_` was renamed to `estimator_`. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
estimators_ : list of estimators |
|
The collection of fitted base estimators. |
|
|
|
estimators_samples_ : list of arrays |
|
The subset of drawn samples (i.e., the in-bag samples) for each base |
|
estimator. Each subset is defined by an array of the indices selected. |
|
|
|
estimators_features_ : list of arrays |
|
The subset of drawn features for each base estimator. |
|
|
|
classes_ : ndarray of shape (n_classes,) |
|
The classes labels. |
|
|
|
n_classes_ : int or list |
|
The number of classes. |
|
|
|
oob_score_ : float |
|
Score of the training dataset obtained using an out-of-bag estimate. |
|
This attribute exists only when ``oob_score`` is True. |
|
|
|
oob_decision_function_ : ndarray of shape (n_samples, n_classes) |
|
Decision function computed with out-of-bag estimate on the training |
|
set. If n_estimators is small it might be possible that a data point |
|
was never left out during the bootstrap. In this case, |
|
`oob_decision_function_` might contain NaN. This attribute exists |
|
only when ``oob_score`` is True. |
|
|
|
See Also |
|
-------- |
|
BaggingRegressor : A Bagging regressor. |
|
|
|
References |
|
---------- |
|
|
|
.. [1] L. Breiman, "Pasting small votes for classification in large |
|
databases and on-line", Machine Learning, 36(1), 85-103, 1999. |
|
|
|
.. [2] L. Breiman, "Bagging predictors", Machine Learning, 24(2), 123-140, |
|
1996. |
|
|
|
.. [3] T. Ho, "The random subspace method for constructing decision |
|
forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844, |
|
1998. |
|
|
|
.. [4] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine |
|
Learning and Knowledge Discovery in Databases, 346-361, 2012. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.svm import SVC |
|
>>> from sklearn.ensemble import BaggingClassifier |
|
>>> from sklearn.datasets import make_classification |
|
>>> X, y = make_classification(n_samples=100, n_features=4, |
|
... n_informative=2, n_redundant=0, |
|
... random_state=0, shuffle=False) |
|
>>> clf = BaggingClassifier(estimator=SVC(), |
|
... n_estimators=10, random_state=0).fit(X, y) |
|
>>> clf.predict([[0, 0, 0, 0]]) |
|
array([1]) |
|
""" |
|
|
|
def __init__( |
|
self, |
|
estimator=None, |
|
n_estimators=10, |
|
*, |
|
max_samples=1.0, |
|
max_features=1.0, |
|
bootstrap=True, |
|
bootstrap_features=False, |
|
oob_score=False, |
|
warm_start=False, |
|
n_jobs=None, |
|
random_state=None, |
|
verbose=0, |
|
): |
|
super().__init__( |
|
estimator=estimator, |
|
n_estimators=n_estimators, |
|
max_samples=max_samples, |
|
max_features=max_features, |
|
bootstrap=bootstrap, |
|
bootstrap_features=bootstrap_features, |
|
oob_score=oob_score, |
|
warm_start=warm_start, |
|
n_jobs=n_jobs, |
|
random_state=random_state, |
|
verbose=verbose, |
|
) |
|
|
|
def _get_estimator(self): |
|
"""Resolve which estimator to return (default is DecisionTreeClassifier)""" |
|
if self.estimator is None: |
|
return DecisionTreeClassifier() |
|
return self.estimator |
|
|
|
def _set_oob_score(self, X, y): |
|
n_samples = y.shape[0] |
|
n_classes_ = self.n_classes_ |
|
|
|
predictions = np.zeros((n_samples, n_classes_)) |
|
|
|
for estimator, samples, features in zip( |
|
self.estimators_, self.estimators_samples_, self.estimators_features_ |
|
): |
|
|
|
mask = ~indices_to_mask(samples, n_samples) |
|
|
|
if hasattr(estimator, "predict_proba"): |
|
predictions[mask, :] += estimator.predict_proba( |
|
(X[mask, :])[:, features] |
|
) |
|
|
|
else: |
|
p = estimator.predict((X[mask, :])[:, features]) |
|
j = 0 |
|
|
|
for i in range(n_samples): |
|
if mask[i]: |
|
predictions[i, p[j]] += 1 |
|
j += 1 |
|
|
|
if (predictions.sum(axis=1) == 0).any(): |
|
warn( |
|
"Some inputs do not have OOB scores. " |
|
"This probably means too few estimators were used " |
|
"to compute any reliable oob estimates." |
|
) |
|
|
|
oob_decision_function = predictions / predictions.sum(axis=1)[:, np.newaxis] |
|
oob_score = accuracy_score(y, np.argmax(predictions, axis=1)) |
|
|
|
self.oob_decision_function_ = oob_decision_function |
|
self.oob_score_ = oob_score |
|
|
|
def _validate_y(self, y): |
|
y = column_or_1d(y, warn=True) |
|
check_classification_targets(y) |
|
self.classes_, y = np.unique(y, return_inverse=True) |
|
self.n_classes_ = len(self.classes_) |
|
|
|
return y |
|
|
|
def predict(self, X): |
|
"""Predict class for X. |
|
|
|
The predicted class of an input sample is computed as the class with |
|
the highest mean predicted probability. If base estimators do not |
|
implement a ``predict_proba`` method, then it resorts to voting. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrices are accepted only if |
|
they are supported by the base estimator. |
|
|
|
Returns |
|
------- |
|
y : ndarray of shape (n_samples,) |
|
The predicted classes. |
|
""" |
|
predicted_probabilitiy = self.predict_proba(X) |
|
return self.classes_.take((np.argmax(predicted_probabilitiy, axis=1)), axis=0) |
|
|
|
def predict_proba(self, X): |
|
"""Predict class probabilities for X. |
|
|
|
The predicted class probabilities of an input sample is computed as |
|
the mean predicted class probabilities of the base estimators in the |
|
ensemble. If base estimators do not implement a ``predict_proba`` |
|
method, then it resorts to voting and the predicted class probabilities |
|
of an input sample represents the proportion of estimators predicting |
|
each class. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrices are accepted only if |
|
they are supported by the base estimator. |
|
|
|
Returns |
|
------- |
|
p : ndarray of shape (n_samples, n_classes) |
|
The class probabilities of the input samples. The order of the |
|
classes corresponds to that in the attribute :term:`classes_`. |
|
""" |
|
check_is_fitted(self) |
|
|
|
X = validate_data( |
|
self, |
|
X, |
|
accept_sparse=["csr", "csc"], |
|
dtype=None, |
|
ensure_all_finite=False, |
|
reset=False, |
|
) |
|
|
|
|
|
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) |
|
|
|
all_proba = Parallel( |
|
n_jobs=n_jobs, verbose=self.verbose, **self._parallel_args() |
|
)( |
|
delayed(_parallel_predict_proba)( |
|
self.estimators_[starts[i] : starts[i + 1]], |
|
self.estimators_features_[starts[i] : starts[i + 1]], |
|
X, |
|
self.n_classes_, |
|
) |
|
for i in range(n_jobs) |
|
) |
|
|
|
|
|
proba = sum(all_proba) / self.n_estimators |
|
|
|
return proba |
|
|
|
def predict_log_proba(self, X): |
|
"""Predict class log-probabilities for X. |
|
|
|
The predicted class log-probabilities of an input sample is computed as |
|
the log of the mean predicted class probabilities of the base |
|
estimators in the ensemble. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrices are accepted only if |
|
they are supported by the base estimator. |
|
|
|
Returns |
|
------- |
|
p : ndarray of shape (n_samples, n_classes) |
|
The class log-probabilities of the input samples. The order of the |
|
classes corresponds to that in the attribute :term:`classes_`. |
|
""" |
|
check_is_fitted(self) |
|
if hasattr(self.estimator_, "predict_log_proba"): |
|
|
|
X = validate_data( |
|
self, |
|
X, |
|
accept_sparse=["csr", "csc"], |
|
dtype=None, |
|
ensure_all_finite=False, |
|
reset=False, |
|
) |
|
|
|
|
|
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) |
|
|
|
all_log_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose)( |
|
delayed(_parallel_predict_log_proba)( |
|
self.estimators_[starts[i] : starts[i + 1]], |
|
self.estimators_features_[starts[i] : starts[i + 1]], |
|
X, |
|
self.n_classes_, |
|
) |
|
for i in range(n_jobs) |
|
) |
|
|
|
|
|
log_proba = all_log_proba[0] |
|
|
|
for j in range(1, len(all_log_proba)): |
|
log_proba = np.logaddexp(log_proba, all_log_proba[j]) |
|
|
|
log_proba -= np.log(self.n_estimators) |
|
|
|
else: |
|
log_proba = np.log(self.predict_proba(X)) |
|
|
|
return log_proba |
|
|
|
@available_if( |
|
_estimator_has("decision_function", delegates=("estimators_", "estimator")) |
|
) |
|
def decision_function(self, X): |
|
"""Average of the decision functions of the base classifiers. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrices are accepted only if |
|
they are supported by the base estimator. |
|
|
|
Returns |
|
------- |
|
score : ndarray of shape (n_samples, k) |
|
The decision function of the input samples. The columns correspond |
|
to the classes in sorted order, as they appear in the attribute |
|
``classes_``. Regression and binary classification are special |
|
cases with ``k == 1``, otherwise ``k==n_classes``. |
|
""" |
|
check_is_fitted(self) |
|
|
|
|
|
X = validate_data( |
|
self, |
|
X, |
|
accept_sparse=["csr", "csc"], |
|
dtype=None, |
|
ensure_all_finite=False, |
|
reset=False, |
|
) |
|
|
|
|
|
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) |
|
|
|
all_decisions = Parallel(n_jobs=n_jobs, verbose=self.verbose)( |
|
delayed(_parallel_decision_function)( |
|
self.estimators_[starts[i] : starts[i + 1]], |
|
self.estimators_features_[starts[i] : starts[i + 1]], |
|
X, |
|
) |
|
for i in range(n_jobs) |
|
) |
|
|
|
|
|
decisions = sum(all_decisions) / self.n_estimators |
|
|
|
return decisions |
|
|
|
|
|
class BaggingRegressor(RegressorMixin, BaseBagging): |
|
"""A Bagging regressor. |
|
|
|
A Bagging regressor is an ensemble meta-estimator that fits base |
|
regressors each on random subsets of the original dataset and then |
|
aggregate their individual predictions (either by voting or by averaging) |
|
to form a final prediction. Such a meta-estimator can typically be used as |
|
a way to reduce the variance of a black-box estimator (e.g., a decision |
|
tree), by introducing randomization into its construction procedure and |
|
then making an ensemble out of it. |
|
|
|
This algorithm encompasses several works from the literature. When random |
|
subsets of the dataset are drawn as random subsets of the samples, then |
|
this algorithm is known as Pasting [1]_. If samples are drawn with |
|
replacement, then the method is known as Bagging [2]_. When random subsets |
|
of the dataset are drawn as random subsets of the features, then the method |
|
is known as Random Subspaces [3]_. Finally, when base estimators are built |
|
on subsets of both samples and features, then the method is known as |
|
Random Patches [4]_. |
|
|
|
Read more in the :ref:`User Guide <bagging>`. |
|
|
|
.. versionadded:: 0.15 |
|
|
|
Parameters |
|
---------- |
|
estimator : object, default=None |
|
The base estimator to fit on random subsets of the dataset. |
|
If None, then the base estimator is a |
|
:class:`~sklearn.tree.DecisionTreeRegressor`. |
|
|
|
.. versionadded:: 1.2 |
|
`base_estimator` was renamed to `estimator`. |
|
|
|
n_estimators : int, default=10 |
|
The number of base estimators in the ensemble. |
|
|
|
max_samples : int or float, default=1.0 |
|
The number of samples to draw from X to train each base estimator (with |
|
replacement by default, see `bootstrap` for more details). |
|
|
|
- If int, then draw `max_samples` samples. |
|
- If float, then draw `max_samples * X.shape[0]` samples. |
|
|
|
max_features : int or float, default=1.0 |
|
The number of features to draw from X to train each base estimator ( |
|
without replacement by default, see `bootstrap_features` for more |
|
details). |
|
|
|
- If int, then draw `max_features` features. |
|
- If float, then draw `max(1, int(max_features * n_features_in_))` features. |
|
|
|
bootstrap : bool, default=True |
|
Whether samples are drawn with replacement. If False, sampling |
|
without replacement is performed. |
|
|
|
bootstrap_features : bool, default=False |
|
Whether features are drawn with replacement. |
|
|
|
oob_score : bool, default=False |
|
Whether to use out-of-bag samples to estimate |
|
the generalization error. Only available if bootstrap=True. |
|
|
|
warm_start : bool, default=False |
|
When set to True, reuse the solution of the previous call to fit |
|
and add more estimators to the ensemble, otherwise, just fit |
|
a whole new ensemble. See :term:`the Glossary <warm_start>`. |
|
|
|
n_jobs : int, default=None |
|
The number of jobs to run in parallel for both :meth:`fit` and |
|
:meth:`predict`. ``None`` means 1 unless in a |
|
:obj:`joblib.parallel_backend` context. ``-1`` means using all |
|
processors. See :term:`Glossary <n_jobs>` for more details. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the random resampling of the original dataset |
|
(sample wise and feature wise). |
|
If the base estimator accepts a `random_state` attribute, a different |
|
seed is generated for each instance in the ensemble. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
verbose : int, default=0 |
|
Controls the verbosity when fitting and predicting. |
|
|
|
Attributes |
|
---------- |
|
estimator_ : estimator |
|
The base estimator from which the ensemble is grown. |
|
|
|
.. versionadded:: 1.2 |
|
`base_estimator_` was renamed to `estimator_`. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
estimators_ : list of estimators |
|
The collection of fitted sub-estimators. |
|
|
|
estimators_samples_ : list of arrays |
|
The subset of drawn samples (i.e., the in-bag samples) for each base |
|
estimator. Each subset is defined by an array of the indices selected. |
|
|
|
estimators_features_ : list of arrays |
|
The subset of drawn features for each base estimator. |
|
|
|
oob_score_ : float |
|
Score of the training dataset obtained using an out-of-bag estimate. |
|
This attribute exists only when ``oob_score`` is True. |
|
|
|
oob_prediction_ : ndarray of shape (n_samples,) |
|
Prediction computed with out-of-bag estimate on the training |
|
set. If n_estimators is small it might be possible that a data point |
|
was never left out during the bootstrap. In this case, |
|
`oob_prediction_` might contain NaN. This attribute exists only |
|
when ``oob_score`` is True. |
|
|
|
See Also |
|
-------- |
|
BaggingClassifier : A Bagging classifier. |
|
|
|
References |
|
---------- |
|
|
|
.. [1] L. Breiman, "Pasting small votes for classification in large |
|
databases and on-line", Machine Learning, 36(1), 85-103, 1999. |
|
|
|
.. [2] L. Breiman, "Bagging predictors", Machine Learning, 24(2), 123-140, |
|
1996. |
|
|
|
.. [3] T. Ho, "The random subspace method for constructing decision |
|
forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844, |
|
1998. |
|
|
|
.. [4] G. Louppe and P. Geurts, "Ensembles on Random Patches", Machine |
|
Learning and Knowledge Discovery in Databases, 346-361, 2012. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.svm import SVR |
|
>>> from sklearn.ensemble import BaggingRegressor |
|
>>> from sklearn.datasets import make_regression |
|
>>> X, y = make_regression(n_samples=100, n_features=4, |
|
... n_informative=2, n_targets=1, |
|
... random_state=0, shuffle=False) |
|
>>> regr = BaggingRegressor(estimator=SVR(), |
|
... n_estimators=10, random_state=0).fit(X, y) |
|
>>> regr.predict([[0, 0, 0, 0]]) |
|
array([-2.8720...]) |
|
""" |
|
|
|
def __init__( |
|
self, |
|
estimator=None, |
|
n_estimators=10, |
|
*, |
|
max_samples=1.0, |
|
max_features=1.0, |
|
bootstrap=True, |
|
bootstrap_features=False, |
|
oob_score=False, |
|
warm_start=False, |
|
n_jobs=None, |
|
random_state=None, |
|
verbose=0, |
|
): |
|
super().__init__( |
|
estimator=estimator, |
|
n_estimators=n_estimators, |
|
max_samples=max_samples, |
|
max_features=max_features, |
|
bootstrap=bootstrap, |
|
bootstrap_features=bootstrap_features, |
|
oob_score=oob_score, |
|
warm_start=warm_start, |
|
n_jobs=n_jobs, |
|
random_state=random_state, |
|
verbose=verbose, |
|
) |
|
|
|
def predict(self, X): |
|
"""Predict regression target for X. |
|
|
|
The predicted regression target of an input sample is computed as the |
|
mean predicted regression targets of the estimators in the ensemble. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
The training input samples. Sparse matrices are accepted only if |
|
they are supported by the base estimator. |
|
|
|
Returns |
|
------- |
|
y : ndarray of shape (n_samples,) |
|
The predicted values. |
|
""" |
|
check_is_fitted(self) |
|
|
|
X = validate_data( |
|
self, |
|
X, |
|
accept_sparse=["csr", "csc"], |
|
dtype=None, |
|
ensure_all_finite=False, |
|
reset=False, |
|
) |
|
|
|
|
|
n_jobs, _, starts = _partition_estimators(self.n_estimators, self.n_jobs) |
|
|
|
all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose)( |
|
delayed(_parallel_predict_regression)( |
|
self.estimators_[starts[i] : starts[i + 1]], |
|
self.estimators_features_[starts[i] : starts[i + 1]], |
|
X, |
|
) |
|
for i in range(n_jobs) |
|
) |
|
|
|
|
|
y_hat = sum(all_y_hat) / self.n_estimators |
|
|
|
return y_hat |
|
|
|
def _set_oob_score(self, X, y): |
|
n_samples = y.shape[0] |
|
|
|
predictions = np.zeros((n_samples,)) |
|
n_predictions = np.zeros((n_samples,)) |
|
|
|
for estimator, samples, features in zip( |
|
self.estimators_, self.estimators_samples_, self.estimators_features_ |
|
): |
|
|
|
mask = ~indices_to_mask(samples, n_samples) |
|
|
|
predictions[mask] += estimator.predict((X[mask, :])[:, features]) |
|
n_predictions[mask] += 1 |
|
|
|
if (n_predictions == 0).any(): |
|
warn( |
|
"Some inputs do not have OOB scores. " |
|
"This probably means too few estimators were used " |
|
"to compute any reliable oob estimates." |
|
) |
|
n_predictions[n_predictions == 0] = 1 |
|
|
|
predictions /= n_predictions |
|
|
|
self.oob_prediction_ = predictions |
|
self.oob_score_ = r2_score(y, predictions) |
|
|
|
def _get_estimator(self): |
|
"""Resolve which estimator to return (default is DecisionTreeClassifier)""" |
|
if self.estimator is None: |
|
return DecisionTreeRegressor() |
|
return self.estimator |
|
|