|
"""Approximate kernel feature maps based on Fourier transforms and count sketches.""" |
|
|
|
|
|
|
|
|
|
import warnings |
|
from numbers import Integral, Real |
|
|
|
import numpy as np |
|
import scipy.sparse as sp |
|
from scipy.fft import fft, ifft |
|
from scipy.linalg import svd |
|
|
|
from .base import ( |
|
BaseEstimator, |
|
ClassNamePrefixFeaturesOutMixin, |
|
TransformerMixin, |
|
_fit_context, |
|
) |
|
from .metrics.pairwise import KERNEL_PARAMS, PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels |
|
from .utils import check_random_state |
|
from .utils._param_validation import Interval, StrOptions |
|
from .utils.extmath import safe_sparse_dot |
|
from .utils.validation import ( |
|
_check_feature_names_in, |
|
check_is_fitted, |
|
validate_data, |
|
) |
|
|
|
|
|
class PolynomialCountSketch( |
|
ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator |
|
): |
|
"""Polynomial kernel approximation via Tensor Sketch. |
|
|
|
Implements Tensor Sketch, which approximates the feature map |
|
of the polynomial kernel:: |
|
|
|
K(X, Y) = (gamma * <X, Y> + coef0)^degree |
|
|
|
by efficiently computing a Count Sketch of the outer product of a |
|
vector with itself using Fast Fourier Transforms (FFT). Read more in the |
|
:ref:`User Guide <polynomial_kernel_approx>`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
Parameters |
|
---------- |
|
gamma : float, default=1.0 |
|
Parameter of the polynomial kernel whose feature map |
|
will be approximated. |
|
|
|
degree : int, default=2 |
|
Degree of the polynomial kernel whose feature map |
|
will be approximated. |
|
|
|
coef0 : int, default=0 |
|
Constant term of the polynomial kernel whose feature map |
|
will be approximated. |
|
|
|
n_components : int, default=100 |
|
Dimensionality of the output feature space. Usually, `n_components` |
|
should be greater than the number of features in input samples in |
|
order to achieve good performance. The optimal score / run time |
|
balance is typically achieved around `n_components` = 10 * `n_features`, |
|
but this depends on the specific dataset being used. |
|
|
|
random_state : int, RandomState instance, default=None |
|
Determines random number generation for indexHash and bitHash |
|
initialization. Pass an int for reproducible results across multiple |
|
function calls. See :term:`Glossary <random_state>`. |
|
|
|
Attributes |
|
---------- |
|
indexHash_ : ndarray of shape (degree, n_features), dtype=int64 |
|
Array of indexes in range [0, n_components) used to represent |
|
the 2-wise independent hash functions for Count Sketch computation. |
|
|
|
bitHash_ : ndarray of shape (degree, n_features), dtype=float32 |
|
Array with random entries in {+1, -1}, used to represent |
|
the 2-wise independent hash functions for Count Sketch computation. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel. |
|
Nystroem : Approximate a kernel map using a subset of the training data. |
|
RBFSampler : Approximate a RBF kernel feature map using random Fourier |
|
features. |
|
SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel. |
|
sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.kernel_approximation import PolynomialCountSketch |
|
>>> from sklearn.linear_model import SGDClassifier |
|
>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]] |
|
>>> y = [0, 0, 1, 1] |
|
>>> ps = PolynomialCountSketch(degree=3, random_state=1) |
|
>>> X_features = ps.fit_transform(X) |
|
>>> clf = SGDClassifier(max_iter=10, tol=1e-3) |
|
>>> clf.fit(X_features, y) |
|
SGDClassifier(max_iter=10) |
|
>>> clf.score(X_features, y) |
|
1.0 |
|
|
|
For a more detailed example of usage, see |
|
:ref:`sphx_glr_auto_examples_kernel_approximation_plot_scalable_poly_kernels.py` |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"gamma": [Interval(Real, 0, None, closed="left")], |
|
"degree": [Interval(Integral, 1, None, closed="left")], |
|
"coef0": [Interval(Real, None, None, closed="neither")], |
|
"n_components": [Interval(Integral, 1, None, closed="left")], |
|
"random_state": ["random_state"], |
|
} |
|
|
|
def __init__( |
|
self, *, gamma=1.0, degree=2, coef0=0, n_components=100, random_state=None |
|
): |
|
self.gamma = gamma |
|
self.degree = degree |
|
self.coef0 = coef0 |
|
self.n_components = n_components |
|
self.random_state = random_state |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y=None): |
|
"""Fit the model with X. |
|
|
|
Initializes the internal variables. The method needs no information |
|
about the distribution of data, so we only care about n_features in X. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Training data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. |
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_outputs), \ |
|
default=None |
|
Target values (None for unsupervised transformations). |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns the instance itself. |
|
""" |
|
X = validate_data(self, X, accept_sparse="csc") |
|
random_state = check_random_state(self.random_state) |
|
|
|
n_features = X.shape[1] |
|
if self.coef0 != 0: |
|
n_features += 1 |
|
|
|
self.indexHash_ = random_state.randint( |
|
0, high=self.n_components, size=(self.degree, n_features) |
|
) |
|
|
|
self.bitHash_ = random_state.choice(a=[-1, 1], size=(self.degree, n_features)) |
|
self._n_features_out = self.n_components |
|
return self |
|
|
|
def transform(self, X): |
|
"""Generate the feature map approximation for X. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like}, shape (n_samples, n_features) |
|
New data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. |
|
|
|
Returns |
|
------- |
|
X_new : array-like, shape (n_samples, n_components) |
|
Returns the instance itself. |
|
""" |
|
|
|
check_is_fitted(self) |
|
X = validate_data(self, X, accept_sparse="csc", reset=False) |
|
|
|
X_gamma = np.sqrt(self.gamma) * X |
|
|
|
if sp.issparse(X_gamma) and self.coef0 != 0: |
|
X_gamma = sp.hstack( |
|
[X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))], |
|
format="csc", |
|
) |
|
|
|
elif not sp.issparse(X_gamma) and self.coef0 != 0: |
|
X_gamma = np.hstack( |
|
[X_gamma, np.sqrt(self.coef0) * np.ones((X_gamma.shape[0], 1))] |
|
) |
|
|
|
if X_gamma.shape[1] != self.indexHash_.shape[1]: |
|
raise ValueError( |
|
"Number of features of test samples does not" |
|
" match that of training samples." |
|
) |
|
|
|
count_sketches = np.zeros((X_gamma.shape[0], self.degree, self.n_components)) |
|
|
|
if sp.issparse(X_gamma): |
|
for j in range(X_gamma.shape[1]): |
|
for d in range(self.degree): |
|
iHashIndex = self.indexHash_[d, j] |
|
iHashBit = self.bitHash_[d, j] |
|
count_sketches[:, d, iHashIndex] += ( |
|
(iHashBit * X_gamma[:, [j]]).toarray().ravel() |
|
) |
|
|
|
else: |
|
for j in range(X_gamma.shape[1]): |
|
for d in range(self.degree): |
|
iHashIndex = self.indexHash_[d, j] |
|
iHashBit = self.bitHash_[d, j] |
|
count_sketches[:, d, iHashIndex] += iHashBit * X_gamma[:, j] |
|
|
|
|
|
|
|
count_sketches_fft = fft(count_sketches, axis=2, overwrite_x=True) |
|
count_sketches_fft_prod = np.prod(count_sketches_fft, axis=1) |
|
data_sketch = np.real(ifft(count_sketches_fft_prod, overwrite_x=True)) |
|
|
|
return data_sketch |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = True |
|
return tags |
|
|
|
|
|
class RBFSampler(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): |
|
"""Approximate a RBF kernel feature map using random Fourier features. |
|
|
|
It implements a variant of Random Kitchen Sinks.[1] |
|
|
|
Read more in the :ref:`User Guide <rbf_kernel_approx>`. |
|
|
|
Parameters |
|
---------- |
|
gamma : 'scale' or float, default=1.0 |
|
Parameter of RBF kernel: exp(-gamma * x^2). |
|
If ``gamma='scale'`` is passed then it uses |
|
1 / (n_features * X.var()) as value of gamma. |
|
|
|
.. versionadded:: 1.2 |
|
The option `"scale"` was added in 1.2. |
|
|
|
n_components : int, default=100 |
|
Number of Monte Carlo samples per original feature. |
|
Equals the dimensionality of the computed feature space. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Pseudo-random number generator to control the generation of the random |
|
weights and random offset when fitting the training data. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
Attributes |
|
---------- |
|
random_offset_ : ndarray of shape (n_components,), dtype={np.float64, np.float32} |
|
Random offset used to compute the projection in the `n_components` |
|
dimensions of the feature space. |
|
|
|
random_weights_ : ndarray of shape (n_features, n_components),\ |
|
dtype={np.float64, np.float32} |
|
Random projection directions drawn from the Fourier transform |
|
of the RBF kernel. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel. |
|
Nystroem : Approximate a kernel map using a subset of the training data. |
|
PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch. |
|
SkewedChi2Sampler : Approximate feature map for |
|
"skewed chi-squared" kernel. |
|
sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels. |
|
|
|
Notes |
|
----- |
|
See "Random Features for Large-Scale Kernel Machines" by A. Rahimi and |
|
Benjamin Recht. |
|
|
|
[1] "Weighted Sums of Random Kitchen Sinks: Replacing |
|
minimization with randomization in learning" by A. Rahimi and |
|
Benjamin Recht. |
|
(https://people.eecs.berkeley.edu/~brecht/papers/08.rah.rec.nips.pdf) |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.kernel_approximation import RBFSampler |
|
>>> from sklearn.linear_model import SGDClassifier |
|
>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]] |
|
>>> y = [0, 0, 1, 1] |
|
>>> rbf_feature = RBFSampler(gamma=1, random_state=1) |
|
>>> X_features = rbf_feature.fit_transform(X) |
|
>>> clf = SGDClassifier(max_iter=5, tol=1e-3) |
|
>>> clf.fit(X_features, y) |
|
SGDClassifier(max_iter=5) |
|
>>> clf.score(X_features, y) |
|
1.0 |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"gamma": [ |
|
StrOptions({"scale"}), |
|
Interval(Real, 0.0, None, closed="left"), |
|
], |
|
"n_components": [Interval(Integral, 1, None, closed="left")], |
|
"random_state": ["random_state"], |
|
} |
|
|
|
def __init__(self, *, gamma=1.0, n_components=100, random_state=None): |
|
self.gamma = gamma |
|
self.n_components = n_components |
|
self.random_state = random_state |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y=None): |
|
"""Fit the model with X. |
|
|
|
Samples random projection according to n_features. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix}, shape (n_samples, n_features) |
|
Training data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. |
|
|
|
y : array-like, shape (n_samples,) or (n_samples, n_outputs), \ |
|
default=None |
|
Target values (None for unsupervised transformations). |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns the instance itself. |
|
""" |
|
X = validate_data(self, X, accept_sparse="csr") |
|
random_state = check_random_state(self.random_state) |
|
n_features = X.shape[1] |
|
sparse = sp.issparse(X) |
|
if self.gamma == "scale": |
|
|
|
X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var() |
|
self._gamma = 1.0 / (n_features * X_var) if X_var != 0 else 1.0 |
|
else: |
|
self._gamma = self.gamma |
|
self.random_weights_ = (2.0 * self._gamma) ** 0.5 * random_state.normal( |
|
size=(n_features, self.n_components) |
|
) |
|
|
|
self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components) |
|
|
|
if X.dtype == np.float32: |
|
|
|
|
|
self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False) |
|
self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False) |
|
|
|
self._n_features_out = self.n_components |
|
return self |
|
|
|
def transform(self, X): |
|
"""Apply the approximate feature map to X. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix}, shape (n_samples, n_features) |
|
New data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. |
|
|
|
Returns |
|
------- |
|
X_new : array-like, shape (n_samples, n_components) |
|
Returns the instance itself. |
|
""" |
|
check_is_fitted(self) |
|
|
|
X = validate_data(self, X, accept_sparse="csr", reset=False) |
|
projection = safe_sparse_dot(X, self.random_weights_) |
|
projection += self.random_offset_ |
|
np.cos(projection, projection) |
|
projection *= (2.0 / self.n_components) ** 0.5 |
|
return projection |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = True |
|
tags.transformer_tags.preserves_dtype = ["float64", "float32"] |
|
return tags |
|
|
|
|
|
class SkewedChi2Sampler( |
|
ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator |
|
): |
|
"""Approximate feature map for "skewed chi-squared" kernel. |
|
|
|
Read more in the :ref:`User Guide <skewed_chi_kernel_approx>`. |
|
|
|
Parameters |
|
---------- |
|
skewedness : float, default=1.0 |
|
"skewedness" parameter of the kernel. Needs to be cross-validated. |
|
|
|
n_components : int, default=100 |
|
Number of Monte Carlo samples per original feature. |
|
Equals the dimensionality of the computed feature space. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Pseudo-random number generator to control the generation of the random |
|
weights and random offset when fitting the training data. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
Attributes |
|
---------- |
|
random_weights_ : ndarray of shape (n_features, n_components) |
|
Weight array, sampled from a secant hyperbolic distribution, which will |
|
be used to linearly transform the log of the data. |
|
|
|
random_offset_ : ndarray of shape (n_features, n_components) |
|
Bias term, which will be added to the data. It is uniformly distributed |
|
between 0 and 2*pi. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel. |
|
Nystroem : Approximate a kernel map using a subset of the training data. |
|
RBFSampler : Approximate a RBF kernel feature map using random Fourier |
|
features. |
|
SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel. |
|
sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel. |
|
sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels. |
|
|
|
References |
|
---------- |
|
See "Random Fourier Approximations for Skewed Multiplicative Histogram |
|
Kernels" by Fuxin Li, Catalin Ionescu and Cristian Sminchisescu. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.kernel_approximation import SkewedChi2Sampler |
|
>>> from sklearn.linear_model import SGDClassifier |
|
>>> X = [[0, 0], [1, 1], [1, 0], [0, 1]] |
|
>>> y = [0, 0, 1, 1] |
|
>>> chi2_feature = SkewedChi2Sampler(skewedness=.01, |
|
... n_components=10, |
|
... random_state=0) |
|
>>> X_features = chi2_feature.fit_transform(X, y) |
|
>>> clf = SGDClassifier(max_iter=10, tol=1e-3) |
|
>>> clf.fit(X_features, y) |
|
SGDClassifier(max_iter=10) |
|
>>> clf.score(X_features, y) |
|
1.0 |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"skewedness": [Interval(Real, None, None, closed="neither")], |
|
"n_components": [Interval(Integral, 1, None, closed="left")], |
|
"random_state": ["random_state"], |
|
} |
|
|
|
def __init__(self, *, skewedness=1.0, n_components=100, random_state=None): |
|
self.skewedness = skewedness |
|
self.n_components = n_components |
|
self.random_state = random_state |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y=None): |
|
"""Fit the model with X. |
|
|
|
Samples random projection according to n_features. |
|
|
|
Parameters |
|
---------- |
|
X : array-like, shape (n_samples, n_features) |
|
Training data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. |
|
|
|
y : array-like, shape (n_samples,) or (n_samples, n_outputs), \ |
|
default=None |
|
Target values (None for unsupervised transformations). |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns the instance itself. |
|
""" |
|
X = validate_data(self, X) |
|
random_state = check_random_state(self.random_state) |
|
n_features = X.shape[1] |
|
uniform = random_state.uniform(size=(n_features, self.n_components)) |
|
|
|
self.random_weights_ = 1.0 / np.pi * np.log(np.tan(np.pi / 2.0 * uniform)) |
|
self.random_offset_ = random_state.uniform(0, 2 * np.pi, size=self.n_components) |
|
|
|
if X.dtype == np.float32: |
|
|
|
|
|
self.random_weights_ = self.random_weights_.astype(X.dtype, copy=False) |
|
self.random_offset_ = self.random_offset_.astype(X.dtype, copy=False) |
|
|
|
self._n_features_out = self.n_components |
|
return self |
|
|
|
def transform(self, X): |
|
"""Apply the approximate feature map to X. |
|
|
|
Parameters |
|
---------- |
|
X : array-like, shape (n_samples, n_features) |
|
New data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. All values of X must be |
|
strictly greater than "-skewedness". |
|
|
|
Returns |
|
------- |
|
X_new : array-like, shape (n_samples, n_components) |
|
Returns the instance itself. |
|
""" |
|
check_is_fitted(self) |
|
X = validate_data( |
|
self, X, copy=True, dtype=[np.float64, np.float32], reset=False |
|
) |
|
if (X <= -self.skewedness).any(): |
|
raise ValueError("X may not contain entries smaller than -skewedness.") |
|
|
|
X += self.skewedness |
|
np.log(X, X) |
|
projection = safe_sparse_dot(X, self.random_weights_) |
|
projection += self.random_offset_ |
|
np.cos(projection, projection) |
|
projection *= np.sqrt(2.0) / np.sqrt(self.n_components) |
|
return projection |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.transformer_tags.preserves_dtype = ["float64", "float32"] |
|
return tags |
|
|
|
|
|
class AdditiveChi2Sampler(TransformerMixin, BaseEstimator): |
|
"""Approximate feature map for additive chi2 kernel. |
|
|
|
Uses sampling the fourier transform of the kernel characteristic |
|
at regular intervals. |
|
|
|
Since the kernel that is to be approximated is additive, the components of |
|
the input vectors can be treated separately. Each entry in the original |
|
space is transformed into 2*sample_steps-1 features, where sample_steps is |
|
a parameter of the method. Typical values of sample_steps include 1, 2 and |
|
3. |
|
|
|
Optimal choices for the sampling interval for certain data ranges can be |
|
computed (see the reference). The default values should be reasonable. |
|
|
|
Read more in the :ref:`User Guide <additive_chi_kernel_approx>`. |
|
|
|
Parameters |
|
---------- |
|
sample_steps : int, default=2 |
|
Gives the number of (complex) sampling points. |
|
|
|
sample_interval : float, default=None |
|
Sampling interval. Must be specified when sample_steps not in {1,2,3}. |
|
|
|
Attributes |
|
---------- |
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
SkewedChi2Sampler : A Fourier-approximation to a non-additive variant of |
|
the chi squared kernel. |
|
|
|
sklearn.metrics.pairwise.chi2_kernel : The exact chi squared kernel. |
|
|
|
sklearn.metrics.pairwise.additive_chi2_kernel : The exact additive chi |
|
squared kernel. |
|
|
|
Notes |
|
----- |
|
This estimator approximates a slightly different version of the additive |
|
chi squared kernel then ``metric.additive_chi2`` computes. |
|
|
|
This estimator is stateless and does not need to be fitted. However, we |
|
recommend to call :meth:`fit_transform` instead of :meth:`transform`, as |
|
parameter validation is only performed in :meth:`fit`. |
|
|
|
References |
|
---------- |
|
See `"Efficient additive kernels via explicit feature maps" |
|
<http://www.robots.ox.ac.uk/~vedaldi/assets/pubs/vedaldi11efficient.pdf>`_ |
|
A. Vedaldi and A. Zisserman, Pattern Analysis and Machine Intelligence, |
|
2011 |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import load_digits |
|
>>> from sklearn.linear_model import SGDClassifier |
|
>>> from sklearn.kernel_approximation import AdditiveChi2Sampler |
|
>>> X, y = load_digits(return_X_y=True) |
|
>>> chi2sampler = AdditiveChi2Sampler(sample_steps=2) |
|
>>> X_transformed = chi2sampler.fit_transform(X, y) |
|
>>> clf = SGDClassifier(max_iter=5, random_state=0, tol=1e-3) |
|
>>> clf.fit(X_transformed, y) |
|
SGDClassifier(max_iter=5, random_state=0) |
|
>>> clf.score(X_transformed, y) |
|
0.9499... |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"sample_steps": [Interval(Integral, 1, None, closed="left")], |
|
"sample_interval": [Interval(Real, 0, None, closed="left"), None], |
|
} |
|
|
|
def __init__(self, *, sample_steps=2, sample_interval=None): |
|
self.sample_steps = sample_steps |
|
self.sample_interval = sample_interval |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y=None): |
|
"""Only validates estimator's parameters. |
|
|
|
This method allows to: (i) validate the estimator's parameters and |
|
(ii) be consistent with the scikit-learn transformer API. |
|
|
|
Parameters |
|
---------- |
|
X : array-like, shape (n_samples, n_features) |
|
Training data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. |
|
|
|
y : array-like, shape (n_samples,) or (n_samples, n_outputs), \ |
|
default=None |
|
Target values (None for unsupervised transformations). |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns the transformer. |
|
""" |
|
X = validate_data(self, X, accept_sparse="csr", ensure_non_negative=True) |
|
|
|
if self.sample_interval is None and self.sample_steps not in (1, 2, 3): |
|
raise ValueError( |
|
"If sample_steps is not in [1, 2, 3]," |
|
" you need to provide sample_interval" |
|
) |
|
|
|
return self |
|
|
|
def transform(self, X): |
|
"""Apply approximate feature map to X. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix}, shape (n_samples, n_features) |
|
Training data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. |
|
|
|
Returns |
|
------- |
|
X_new : {ndarray, sparse matrix}, \ |
|
shape = (n_samples, n_features * (2*sample_steps - 1)) |
|
Whether the return value is an array or sparse matrix depends on |
|
the type of the input X. |
|
""" |
|
X = validate_data( |
|
self, X, accept_sparse="csr", reset=False, ensure_non_negative=True |
|
) |
|
sparse = sp.issparse(X) |
|
|
|
if self.sample_interval is None: |
|
|
|
|
|
|
|
|
|
if self.sample_steps == 1: |
|
sample_interval = 0.8 |
|
elif self.sample_steps == 2: |
|
sample_interval = 0.5 |
|
elif self.sample_steps == 3: |
|
sample_interval = 0.4 |
|
else: |
|
raise ValueError( |
|
"If sample_steps is not in [1, 2, 3]," |
|
" you need to provide sample_interval" |
|
) |
|
else: |
|
sample_interval = self.sample_interval |
|
|
|
|
|
|
|
|
|
transf = self._transform_sparse if sparse else self._transform_dense |
|
return transf(X, self.sample_steps, sample_interval) |
|
|
|
def get_feature_names_out(self, input_features=None): |
|
"""Get output feature names for transformation. |
|
|
|
Parameters |
|
---------- |
|
input_features : array-like of str or None, default=None |
|
Only used to validate feature names with the names seen in :meth:`fit`. |
|
|
|
Returns |
|
------- |
|
feature_names_out : ndarray of str objects |
|
Transformed feature names. |
|
""" |
|
|
|
|
|
|
|
check_is_fitted(self, attributes="n_features_in_") |
|
input_features = _check_feature_names_in( |
|
self, input_features, generate_names=True |
|
) |
|
est_name = self.__class__.__name__.lower() |
|
|
|
names_list = [f"{est_name}_{name}_sqrt" for name in input_features] |
|
|
|
for j in range(1, self.sample_steps): |
|
cos_names = [f"{est_name}_{name}_cos{j}" for name in input_features] |
|
sin_names = [f"{est_name}_{name}_sin{j}" for name in input_features] |
|
names_list.extend(cos_names + sin_names) |
|
|
|
return np.asarray(names_list, dtype=object) |
|
|
|
@staticmethod |
|
def _transform_dense(X, sample_steps, sample_interval): |
|
non_zero = X != 0.0 |
|
X_nz = X[non_zero] |
|
|
|
X_step = np.zeros_like(X) |
|
X_step[non_zero] = np.sqrt(X_nz * sample_interval) |
|
|
|
X_new = [X_step] |
|
|
|
log_step_nz = sample_interval * np.log(X_nz) |
|
step_nz = 2 * X_nz * sample_interval |
|
|
|
for j in range(1, sample_steps): |
|
factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * sample_interval)) |
|
|
|
X_step = np.zeros_like(X) |
|
X_step[non_zero] = factor_nz * np.cos(j * log_step_nz) |
|
X_new.append(X_step) |
|
|
|
X_step = np.zeros_like(X) |
|
X_step[non_zero] = factor_nz * np.sin(j * log_step_nz) |
|
X_new.append(X_step) |
|
|
|
return np.hstack(X_new) |
|
|
|
@staticmethod |
|
def _transform_sparse(X, sample_steps, sample_interval): |
|
indices = X.indices.copy() |
|
indptr = X.indptr.copy() |
|
|
|
data_step = np.sqrt(X.data * sample_interval) |
|
X_step = sp.csr_matrix( |
|
(data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False |
|
) |
|
X_new = [X_step] |
|
|
|
log_step_nz = sample_interval * np.log(X.data) |
|
step_nz = 2 * X.data * sample_interval |
|
|
|
for j in range(1, sample_steps): |
|
factor_nz = np.sqrt(step_nz / np.cosh(np.pi * j * sample_interval)) |
|
|
|
data_step = factor_nz * np.cos(j * log_step_nz) |
|
X_step = sp.csr_matrix( |
|
(data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False |
|
) |
|
X_new.append(X_step) |
|
|
|
data_step = factor_nz * np.sin(j * log_step_nz) |
|
X_step = sp.csr_matrix( |
|
(data_step, indices, indptr), shape=X.shape, dtype=X.dtype, copy=False |
|
) |
|
X_new.append(X_step) |
|
|
|
return sp.hstack(X_new) |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.requires_fit = False |
|
tags.input_tags.positive_only = True |
|
tags.input_tags.sparse = True |
|
return tags |
|
|
|
|
|
class Nystroem(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator): |
|
"""Approximate a kernel map using a subset of the training data. |
|
|
|
Constructs an approximate feature map for an arbitrary kernel |
|
using a subset of the data as basis. |
|
|
|
Read more in the :ref:`User Guide <nystroem_kernel_approx>`. |
|
|
|
.. versionadded:: 0.13 |
|
|
|
Parameters |
|
---------- |
|
kernel : str or callable, default='rbf' |
|
Kernel map to be approximated. A callable should accept two arguments |
|
and the keyword arguments passed to this object as `kernel_params`, and |
|
should return a floating point number. |
|
|
|
gamma : float, default=None |
|
Gamma parameter for the RBF, laplacian, polynomial, exponential chi2 |
|
and sigmoid kernels. Interpretation of the default value is left to |
|
the kernel; see the documentation for sklearn.metrics.pairwise. |
|
Ignored by other kernels. |
|
|
|
coef0 : float, default=None |
|
Zero coefficient for polynomial and sigmoid kernels. |
|
Ignored by other kernels. |
|
|
|
degree : float, default=None |
|
Degree of the polynomial kernel. Ignored by other kernels. |
|
|
|
kernel_params : dict, default=None |
|
Additional parameters (keyword arguments) for kernel function passed |
|
as callable object. |
|
|
|
n_components : int, default=100 |
|
Number of features to construct. |
|
How many data points will be used to construct the mapping. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Pseudo-random number generator to control the uniform sampling without |
|
replacement of `n_components` of the training data to construct the |
|
basis kernel. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
n_jobs : int, default=None |
|
The number of jobs to use for the computation. This works by breaking |
|
down the kernel matrix into `n_jobs` even slices and computing them in |
|
parallel. |
|
|
|
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. |
|
``-1`` means using all processors. See :term:`Glossary <n_jobs>` |
|
for more details. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
Attributes |
|
---------- |
|
components_ : ndarray of shape (n_components, n_features) |
|
Subset of training points used to construct the feature map. |
|
|
|
component_indices_ : ndarray of shape (n_components) |
|
Indices of ``components_`` in the training set. |
|
|
|
normalization_ : ndarray of shape (n_components, n_components) |
|
Normalization matrix needed for embedding. |
|
Square root of the kernel matrix on ``components_``. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
AdditiveChi2Sampler : Approximate feature map for additive chi2 kernel. |
|
PolynomialCountSketch : Polynomial kernel approximation via Tensor Sketch. |
|
RBFSampler : Approximate a RBF kernel feature map using random Fourier |
|
features. |
|
SkewedChi2Sampler : Approximate feature map for "skewed chi-squared" kernel. |
|
sklearn.metrics.pairwise.kernel_metrics : List of built-in kernels. |
|
|
|
References |
|
---------- |
|
* Williams, C.K.I. and Seeger, M. |
|
"Using the Nystroem method to speed up kernel machines", |
|
Advances in neural information processing systems 2001 |
|
|
|
* T. Yang, Y. Li, M. Mahdavi, R. Jin and Z. Zhou |
|
"Nystroem Method vs Random Fourier Features: A Theoretical and Empirical |
|
Comparison", |
|
Advances in Neural Information Processing Systems 2012 |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn import datasets, svm |
|
>>> from sklearn.kernel_approximation import Nystroem |
|
>>> X, y = datasets.load_digits(n_class=9, return_X_y=True) |
|
>>> data = X / 16. |
|
>>> clf = svm.LinearSVC() |
|
>>> feature_map_nystroem = Nystroem(gamma=.2, |
|
... random_state=1, |
|
... n_components=300) |
|
>>> data_transformed = feature_map_nystroem.fit_transform(data) |
|
>>> clf.fit(data_transformed, y) |
|
LinearSVC() |
|
>>> clf.score(data_transformed, y) |
|
0.9987... |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"kernel": [ |
|
StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {"precomputed"}), |
|
callable, |
|
], |
|
"gamma": [Interval(Real, 0, None, closed="left"), None], |
|
"coef0": [Interval(Real, None, None, closed="neither"), None], |
|
"degree": [Interval(Real, 1, None, closed="left"), None], |
|
"kernel_params": [dict, None], |
|
"n_components": [Interval(Integral, 1, None, closed="left")], |
|
"random_state": ["random_state"], |
|
"n_jobs": [Integral, None], |
|
} |
|
|
|
def __init__( |
|
self, |
|
kernel="rbf", |
|
*, |
|
gamma=None, |
|
coef0=None, |
|
degree=None, |
|
kernel_params=None, |
|
n_components=100, |
|
random_state=None, |
|
n_jobs=None, |
|
): |
|
self.kernel = kernel |
|
self.gamma = gamma |
|
self.coef0 = coef0 |
|
self.degree = degree |
|
self.kernel_params = kernel_params |
|
self.n_components = n_components |
|
self.random_state = random_state |
|
self.n_jobs = n_jobs |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y=None): |
|
"""Fit estimator to data. |
|
|
|
Samples a subset of training points, computes kernel |
|
on these and computes normalization matrix. |
|
|
|
Parameters |
|
---------- |
|
X : array-like, shape (n_samples, n_features) |
|
Training data, where `n_samples` is the number of samples |
|
and `n_features` is the number of features. |
|
|
|
y : array-like, shape (n_samples,) or (n_samples, n_outputs), \ |
|
default=None |
|
Target values (None for unsupervised transformations). |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns the instance itself. |
|
""" |
|
X = validate_data(self, X, accept_sparse="csr") |
|
rnd = check_random_state(self.random_state) |
|
n_samples = X.shape[0] |
|
|
|
|
|
if self.n_components > n_samples: |
|
|
|
n_components = n_samples |
|
warnings.warn( |
|
"n_components > n_samples. This is not possible.\n" |
|
"n_components was set to n_samples, which results" |
|
" in inefficient evaluation of the full kernel." |
|
) |
|
|
|
else: |
|
n_components = self.n_components |
|
n_components = min(n_samples, n_components) |
|
inds = rnd.permutation(n_samples) |
|
basis_inds = inds[:n_components] |
|
basis = X[basis_inds] |
|
|
|
basis_kernel = pairwise_kernels( |
|
basis, |
|
metric=self.kernel, |
|
filter_params=True, |
|
n_jobs=self.n_jobs, |
|
**self._get_kernel_params(), |
|
) |
|
|
|
|
|
U, S, V = svd(basis_kernel) |
|
S = np.maximum(S, 1e-12) |
|
self.normalization_ = np.dot(U / np.sqrt(S), V) |
|
self.components_ = basis |
|
self.component_indices_ = basis_inds |
|
self._n_features_out = n_components |
|
return self |
|
|
|
def transform(self, X): |
|
"""Apply feature map to X. |
|
|
|
Computes an approximate feature map using the kernel |
|
between some training points and X. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Data to transform. |
|
|
|
Returns |
|
------- |
|
X_transformed : ndarray of shape (n_samples, n_components) |
|
Transformed data. |
|
""" |
|
check_is_fitted(self) |
|
X = validate_data(self, X, accept_sparse="csr", reset=False) |
|
|
|
kernel_params = self._get_kernel_params() |
|
embedded = pairwise_kernels( |
|
X, |
|
self.components_, |
|
metric=self.kernel, |
|
filter_params=True, |
|
n_jobs=self.n_jobs, |
|
**kernel_params, |
|
) |
|
return np.dot(embedded, self.normalization_.T) |
|
|
|
def _get_kernel_params(self): |
|
params = self.kernel_params |
|
if params is None: |
|
params = {} |
|
if not callable(self.kernel) and self.kernel != "precomputed": |
|
for param in KERNEL_PARAMS[self.kernel]: |
|
if getattr(self, param) is not None: |
|
params[param] = getattr(self, param) |
|
else: |
|
if ( |
|
self.gamma is not None |
|
or self.coef0 is not None |
|
or self.degree is not None |
|
): |
|
raise ValueError( |
|
"Don't pass gamma, coef0 or degree to " |
|
"Nystroem if using a callable " |
|
"or precomputed kernel" |
|
) |
|
|
|
return params |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = True |
|
tags.transformer_tags.preserves_dtype = ["float64", "float32"] |
|
return tags |
|
|