File size: 10,408 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 |
"""Base class for ensemble-based estimators."""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from abc import ABCMeta, abstractmethod
import numpy as np
from joblib import effective_n_jobs
from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier, is_regressor
from ..utils import Bunch, check_random_state
from ..utils._tags import get_tags
from ..utils._user_interface import _print_elapsed_time
from ..utils.metadata_routing import _routing_enabled
from ..utils.metaestimators import _BaseComposition
def _fit_single_estimator(
estimator, X, y, fit_params, message_clsname=None, message=None
):
"""Private function used to fit an estimator within a job."""
# TODO(SLEP6): remove if-condition for unrouted sample_weight when metadata
# routing can't be disabled.
if not _routing_enabled() and "sample_weight" in fit_params:
try:
with _print_elapsed_time(message_clsname, message):
estimator.fit(X, y, sample_weight=fit_params["sample_weight"])
except TypeError as exc:
if "unexpected keyword argument 'sample_weight'" in str(exc):
raise TypeError(
"Underlying estimator {} does not support sample weights.".format(
estimator.__class__.__name__
)
) from exc
raise
else:
with _print_elapsed_time(message_clsname, message):
estimator.fit(X, y, **fit_params)
return estimator
def _set_random_states(estimator, random_state=None):
"""Set fixed random_state parameters for an estimator.
Finds all parameters ending ``random_state`` and sets them to integers
derived from ``random_state``.
Parameters
----------
estimator : estimator supporting get/set_params
Estimator with potential randomness managed by random_state
parameters.
random_state : int, RandomState instance or None, default=None
Pseudo-random number generator to control the generation of the random
integers. Pass an int for reproducible output across multiple function
calls.
See :term:`Glossary <random_state>`.
Notes
-----
This does not necessarily set *all* ``random_state`` attributes that
control an estimator's randomness, only those accessible through
``estimator.get_params()``. ``random_state``s not controlled include
those belonging to:
* cross-validation splitters
* ``scipy.stats`` rvs
"""
random_state = check_random_state(random_state)
to_set = {}
for key in sorted(estimator.get_params(deep=True)):
if key == "random_state" or key.endswith("__random_state"):
to_set[key] = random_state.randint(np.iinfo(np.int32).max)
if to_set:
estimator.set_params(**to_set)
class BaseEnsemble(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):
"""Base class for all ensemble classes.
Warning: This class should not be used directly. Use derived classes
instead.
Parameters
----------
estimator : object
The base estimator from which the ensemble is built.
n_estimators : int, default=10
The number of estimators in the ensemble.
estimator_params : list of str, default=tuple()
The list of attributes to use as parameters when instantiating a
new base estimator. If none are given, default parameters are used.
Attributes
----------
estimator_ : estimator
The base estimator from which the ensemble is grown.
estimators_ : list of estimators
The collection of fitted base estimators.
"""
@abstractmethod
def __init__(
self,
estimator=None,
*,
n_estimators=10,
estimator_params=tuple(),
):
# Set parameters
self.estimator = estimator
self.n_estimators = n_estimators
self.estimator_params = estimator_params
# Don't instantiate estimators now! Parameters of estimator might
# still change. Eg., when grid-searching with the nested object syntax.
# self.estimators_ needs to be filled by the derived classes in fit.
def _validate_estimator(self, default=None):
"""Check the base estimator.
Sets the `estimator_` attributes.
"""
if self.estimator is not None:
self.estimator_ = self.estimator
else:
self.estimator_ = default
def _make_estimator(self, append=True, random_state=None):
"""Make and configure a copy of the `estimator_` attribute.
Warning: This method should be used to properly instantiate new
sub-estimators.
"""
estimator = clone(self.estimator_)
estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params})
if random_state is not None:
_set_random_states(estimator, random_state)
if append:
self.estimators_.append(estimator)
return estimator
def __len__(self):
"""Return the number of estimators in the ensemble."""
return len(self.estimators_)
def __getitem__(self, index):
"""Return the index'th estimator in the ensemble."""
return self.estimators_[index]
def __iter__(self):
"""Return iterator over estimators in the ensemble."""
return iter(self.estimators_)
def _partition_estimators(n_estimators, n_jobs):
"""Private function used to partition estimators between jobs."""
# Compute the number of jobs
n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
# Partition estimators between jobs
n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs, dtype=int)
n_estimators_per_job[: n_estimators % n_jobs] += 1
starts = np.cumsum(n_estimators_per_job)
return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()
class _BaseHeterogeneousEnsemble(
MetaEstimatorMixin, _BaseComposition, metaclass=ABCMeta
):
"""Base class for heterogeneous ensemble of learners.
Parameters
----------
estimators : list of (str, estimator) tuples
The ensemble of estimators to use in the ensemble. Each element of the
list is defined as a tuple of string (i.e. name of the estimator) and
an estimator instance. An estimator can be set to `'drop'` using
`set_params`.
Attributes
----------
estimators_ : list of estimators
The elements of the estimators parameter, having been fitted on the
training data. If an estimator has been set to `'drop'`, it will not
appear in `estimators_`.
"""
@property
def named_estimators(self):
"""Dictionary to access any fitted sub-estimators by name.
Returns
-------
:class:`~sklearn.utils.Bunch`
"""
return Bunch(**dict(self.estimators))
@abstractmethod
def __init__(self, estimators):
self.estimators = estimators
def _validate_estimators(self):
if len(self.estimators) == 0:
raise ValueError(
"Invalid 'estimators' attribute, 'estimators' should be a "
"non-empty list of (string, estimator) tuples."
)
names, estimators = zip(*self.estimators)
# defined by MetaEstimatorMixin
self._validate_names(names)
has_estimator = any(est != "drop" for est in estimators)
if not has_estimator:
raise ValueError(
"All estimators are dropped. At least one is required "
"to be an estimator."
)
is_estimator_type = is_classifier if is_classifier(self) else is_regressor
for est in estimators:
if est != "drop" and not is_estimator_type(est):
raise ValueError(
"The estimator {} should be a {}.".format(
est.__class__.__name__, is_estimator_type.__name__[3:]
)
)
return names, estimators
def set_params(self, **params):
"""
Set the parameters of an estimator from the ensemble.
Valid parameter keys can be listed with `get_params()`. Note that you
can directly set the parameters of the estimators contained in
`estimators`.
Parameters
----------
**params : keyword arguments
Specific parameters using e.g.
`set_params(parameter_name=new_value)`. In addition, to setting the
parameters of the estimator, the individual estimator of the
estimators can also be set, or can be removed by setting them to
'drop'.
Returns
-------
self : object
Estimator instance.
"""
super()._set_params("estimators", **params)
return self
def get_params(self, deep=True):
"""
Get the parameters of an estimator from the ensemble.
Returns the parameters given in the constructor as well as the
estimators contained within the `estimators` parameter.
Parameters
----------
deep : bool, default=True
Setting it to True gets the various estimators and the parameters
of the estimators as well.
Returns
-------
params : dict
Parameter and estimator names mapped to their values or parameter
names mapped to their values.
"""
return super()._get_params("estimators", deep=deep)
def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
try:
tags.input_tags.allow_nan = all(
get_tags(est[1]).input_tags.allow_nan if est[1] != "drop" else True
for est in self.estimators
)
tags.input_tags.sparse = all(
get_tags(est[1]).input_tags.sparse if est[1] != "drop" else True
for est in self.estimators
)
except Exception:
# If `estimators` does not comply with our API (list of tuples) then it will
# fail. In this case, we assume that `allow_nan` and `sparse` are False but
# the parameter validation will raise an error during `fit`.
pass # pragma: no cover
return tags
|