File size: 3,134 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
"""
Feature agglomeration. Base classes and functions for performing feature
agglomeration.
"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import numpy as np
from scipy.sparse import issparse
from ..base import TransformerMixin
from ..utils import metadata_routing
from ..utils.deprecation import _deprecate_Xt_in_inverse_transform
from ..utils.validation import check_is_fitted, validate_data
###############################################################################
# Mixin class for feature agglomeration.
class AgglomerationTransform(TransformerMixin):
"""
A class for feature agglomeration via the transform interface.
"""
# This prevents ``set_split_inverse_transform`` to be generated for the
# non-standard ``Xt`` arg on ``inverse_transform``.
# TODO(1.7): remove when Xt is removed for inverse_transform.
__metadata_request__inverse_transform = {"Xt": metadata_routing.UNUSED}
def transform(self, X):
"""
Transform a new matrix using the built clustering.
Parameters
----------
X : array-like of shape (n_samples, n_features) or \
(n_samples, n_samples)
A M by N array of M observations in N dimensions or a length
M array of M one-dimensional observations.
Returns
-------
Y : ndarray of shape (n_samples, n_clusters) or (n_clusters,)
The pooled values for each feature cluster.
"""
check_is_fitted(self)
X = validate_data(self, X, reset=False)
if self.pooling_func == np.mean and not issparse(X):
size = np.bincount(self.labels_)
n_samples = X.shape[0]
# a fast way to compute the mean of grouped features
nX = np.array(
[np.bincount(self.labels_, X[i, :]) / size for i in range(n_samples)]
)
else:
nX = [
self.pooling_func(X[:, self.labels_ == l], axis=1)
for l in np.unique(self.labels_)
]
nX = np.array(nX).T
return nX
def inverse_transform(self, X=None, *, Xt=None):
"""
Inverse the transformation and return a vector of size `n_features`.
Parameters
----------
X : array-like of shape (n_samples, n_clusters) or (n_clusters,)
The values to be assigned to each cluster of samples.
Xt : array-like of shape (n_samples, n_clusters) or (n_clusters,)
The values to be assigned to each cluster of samples.
.. deprecated:: 1.5
`Xt` was deprecated in 1.5 and will be removed in 1.7. Use `X` instead.
Returns
-------
X : ndarray of shape (n_samples, n_features) or (n_features,)
A vector of size `n_samples` with the values of `Xred` assigned to
each of the cluster of samples.
"""
X = _deprecate_Xt_in_inverse_transform(X, Xt)
check_is_fitted(self)
unil, inverse = np.unique(self.labels_, return_inverse=True)
return X[..., inverse]
|