|
"""Orthogonal matching pursuit algorithms""" |
|
|
|
|
|
|
|
|
|
import warnings |
|
from math import sqrt |
|
from numbers import Integral, Real |
|
|
|
import numpy as np |
|
from scipy import linalg |
|
from scipy.linalg.lapack import get_lapack_funcs |
|
|
|
from ..base import MultiOutputMixin, RegressorMixin, _fit_context |
|
from ..model_selection import check_cv |
|
from ..utils import Bunch, as_float_array, check_array |
|
from ..utils._param_validation import Interval, StrOptions, validate_params |
|
from ..utils.metadata_routing import ( |
|
MetadataRouter, |
|
MethodMapping, |
|
_raise_for_params, |
|
_routing_enabled, |
|
process_routing, |
|
) |
|
from ..utils.parallel import Parallel, delayed |
|
from ..utils.validation import validate_data |
|
from ._base import LinearModel, _pre_fit |
|
|
|
premature = ( |
|
"Orthogonal matching pursuit ended prematurely due to linear" |
|
" dependence in the dictionary. The requested precision might" |
|
" not have been met." |
|
) |
|
|
|
|
|
def _cholesky_omp(X, y, n_nonzero_coefs, tol=None, copy_X=True, return_path=False): |
|
"""Orthogonal Matching Pursuit step using the Cholesky decomposition. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples, n_features) |
|
Input dictionary. Columns are assumed to have unit norm. |
|
|
|
y : ndarray of shape (n_samples,) |
|
Input targets. |
|
|
|
n_nonzero_coefs : int |
|
Targeted number of non-zero elements. |
|
|
|
tol : float, default=None |
|
Targeted squared error, if not None overrides n_nonzero_coefs. |
|
|
|
copy_X : bool, default=True |
|
Whether the design matrix X must be copied by the algorithm. A false |
|
value is only helpful if X is already Fortran-ordered, otherwise a |
|
copy is made anyway. |
|
|
|
return_path : bool, default=False |
|
Whether to return every value of the nonzero coefficients along the |
|
forward path. Useful for cross-validation. |
|
|
|
Returns |
|
------- |
|
gamma : ndarray of shape (n_nonzero_coefs,) |
|
Non-zero elements of the solution. |
|
|
|
idx : ndarray of shape (n_nonzero_coefs,) |
|
Indices of the positions of the elements in gamma within the solution |
|
vector. |
|
|
|
coef : ndarray of shape (n_features, n_nonzero_coefs) |
|
The first k values of column k correspond to the coefficient value |
|
for the active features at that step. The lower left triangle contains |
|
garbage. Only returned if ``return_path=True``. |
|
|
|
n_active : int |
|
Number of active features at convergence. |
|
""" |
|
if copy_X: |
|
X = X.copy("F") |
|
else: |
|
X = np.asfortranarray(X) |
|
|
|
min_float = np.finfo(X.dtype).eps |
|
nrm2, swap = linalg.get_blas_funcs(("nrm2", "swap"), (X,)) |
|
(potrs,) = get_lapack_funcs(("potrs",), (X,)) |
|
|
|
alpha = np.dot(X.T, y) |
|
residual = y |
|
gamma = np.empty(0) |
|
n_active = 0 |
|
indices = np.arange(X.shape[1]) |
|
|
|
max_features = X.shape[1] if tol is not None else n_nonzero_coefs |
|
|
|
L = np.empty((max_features, max_features), dtype=X.dtype) |
|
|
|
if return_path: |
|
coefs = np.empty_like(L) |
|
|
|
while True: |
|
lam = np.argmax(np.abs(np.dot(X.T, residual))) |
|
if lam < n_active or alpha[lam] ** 2 < min_float: |
|
|
|
warnings.warn(premature, RuntimeWarning, stacklevel=2) |
|
break |
|
|
|
if n_active > 0: |
|
|
|
L[n_active, :n_active] = np.dot(X[:, :n_active].T, X[:, lam]) |
|
linalg.solve_triangular( |
|
L[:n_active, :n_active], |
|
L[n_active, :n_active], |
|
trans=0, |
|
lower=1, |
|
overwrite_b=True, |
|
check_finite=False, |
|
) |
|
v = nrm2(L[n_active, :n_active]) ** 2 |
|
Lkk = linalg.norm(X[:, lam]) ** 2 - v |
|
if Lkk <= min_float: |
|
warnings.warn(premature, RuntimeWarning, stacklevel=2) |
|
break |
|
L[n_active, n_active] = sqrt(Lkk) |
|
else: |
|
L[0, 0] = linalg.norm(X[:, lam]) |
|
|
|
X.T[n_active], X.T[lam] = swap(X.T[n_active], X.T[lam]) |
|
alpha[n_active], alpha[lam] = alpha[lam], alpha[n_active] |
|
indices[n_active], indices[lam] = indices[lam], indices[n_active] |
|
n_active += 1 |
|
|
|
|
|
gamma, _ = potrs( |
|
L[:n_active, :n_active], alpha[:n_active], lower=True, overwrite_b=False |
|
) |
|
|
|
if return_path: |
|
coefs[:n_active, n_active - 1] = gamma |
|
residual = y - np.dot(X[:, :n_active], gamma) |
|
if tol is not None and nrm2(residual) ** 2 <= tol: |
|
break |
|
elif n_active == max_features: |
|
break |
|
|
|
if return_path: |
|
return gamma, indices[:n_active], coefs[:, :n_active], n_active |
|
else: |
|
return gamma, indices[:n_active], n_active |
|
|
|
|
|
def _gram_omp( |
|
Gram, |
|
Xy, |
|
n_nonzero_coefs, |
|
tol_0=None, |
|
tol=None, |
|
copy_Gram=True, |
|
copy_Xy=True, |
|
return_path=False, |
|
): |
|
"""Orthogonal Matching Pursuit step on a precomputed Gram matrix. |
|
|
|
This function uses the Cholesky decomposition method. |
|
|
|
Parameters |
|
---------- |
|
Gram : ndarray of shape (n_features, n_features) |
|
Gram matrix of the input data matrix. |
|
|
|
Xy : ndarray of shape (n_features,) |
|
Input targets. |
|
|
|
n_nonzero_coefs : int |
|
Targeted number of non-zero elements. |
|
|
|
tol_0 : float, default=None |
|
Squared norm of y, required if tol is not None. |
|
|
|
tol : float, default=None |
|
Targeted squared error, if not None overrides n_nonzero_coefs. |
|
|
|
copy_Gram : bool, default=True |
|
Whether the gram matrix must be copied by the algorithm. A false |
|
value is only helpful if it is already Fortran-ordered, otherwise a |
|
copy is made anyway. |
|
|
|
copy_Xy : bool, default=True |
|
Whether the covariance vector Xy must be copied by the algorithm. |
|
If False, it may be overwritten. |
|
|
|
return_path : bool, default=False |
|
Whether to return every value of the nonzero coefficients along the |
|
forward path. Useful for cross-validation. |
|
|
|
Returns |
|
------- |
|
gamma : ndarray of shape (n_nonzero_coefs,) |
|
Non-zero elements of the solution. |
|
|
|
idx : ndarray of shape (n_nonzero_coefs,) |
|
Indices of the positions of the elements in gamma within the solution |
|
vector. |
|
|
|
coefs : ndarray of shape (n_features, n_nonzero_coefs) |
|
The first k values of column k correspond to the coefficient value |
|
for the active features at that step. The lower left triangle contains |
|
garbage. Only returned if ``return_path=True``. |
|
|
|
n_active : int |
|
Number of active features at convergence. |
|
""" |
|
Gram = Gram.copy("F") if copy_Gram else np.asfortranarray(Gram) |
|
|
|
if copy_Xy or not Xy.flags.writeable: |
|
Xy = Xy.copy() |
|
|
|
min_float = np.finfo(Gram.dtype).eps |
|
nrm2, swap = linalg.get_blas_funcs(("nrm2", "swap"), (Gram,)) |
|
(potrs,) = get_lapack_funcs(("potrs",), (Gram,)) |
|
|
|
indices = np.arange(len(Gram)) |
|
alpha = Xy |
|
tol_curr = tol_0 |
|
delta = 0 |
|
gamma = np.empty(0) |
|
n_active = 0 |
|
|
|
max_features = len(Gram) if tol is not None else n_nonzero_coefs |
|
|
|
L = np.empty((max_features, max_features), dtype=Gram.dtype) |
|
|
|
L[0, 0] = 1.0 |
|
if return_path: |
|
coefs = np.empty_like(L) |
|
|
|
while True: |
|
lam = np.argmax(np.abs(alpha)) |
|
if lam < n_active or alpha[lam] ** 2 < min_float: |
|
|
|
warnings.warn(premature, RuntimeWarning, stacklevel=3) |
|
break |
|
if n_active > 0: |
|
L[n_active, :n_active] = Gram[lam, :n_active] |
|
linalg.solve_triangular( |
|
L[:n_active, :n_active], |
|
L[n_active, :n_active], |
|
trans=0, |
|
lower=1, |
|
overwrite_b=True, |
|
check_finite=False, |
|
) |
|
v = nrm2(L[n_active, :n_active]) ** 2 |
|
Lkk = Gram[lam, lam] - v |
|
if Lkk <= min_float: |
|
warnings.warn(premature, RuntimeWarning, stacklevel=3) |
|
break |
|
L[n_active, n_active] = sqrt(Lkk) |
|
else: |
|
L[0, 0] = sqrt(Gram[lam, lam]) |
|
|
|
Gram[n_active], Gram[lam] = swap(Gram[n_active], Gram[lam]) |
|
Gram.T[n_active], Gram.T[lam] = swap(Gram.T[n_active], Gram.T[lam]) |
|
indices[n_active], indices[lam] = indices[lam], indices[n_active] |
|
Xy[n_active], Xy[lam] = Xy[lam], Xy[n_active] |
|
n_active += 1 |
|
|
|
gamma, _ = potrs( |
|
L[:n_active, :n_active], Xy[:n_active], lower=True, overwrite_b=False |
|
) |
|
if return_path: |
|
coefs[:n_active, n_active - 1] = gamma |
|
beta = np.dot(Gram[:, :n_active], gamma) |
|
alpha = Xy - beta |
|
if tol is not None: |
|
tol_curr += delta |
|
delta = np.inner(gamma, beta[:n_active]) |
|
tol_curr -= delta |
|
if abs(tol_curr) <= tol: |
|
break |
|
elif n_active == max_features: |
|
break |
|
|
|
if return_path: |
|
return gamma, indices[:n_active], coefs[:, :n_active], n_active |
|
else: |
|
return gamma, indices[:n_active], n_active |
|
|
|
|
|
@validate_params( |
|
{ |
|
"X": ["array-like"], |
|
"y": [np.ndarray], |
|
"n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None], |
|
"tol": [Interval(Real, 0, None, closed="left"), None], |
|
"precompute": ["boolean", StrOptions({"auto"})], |
|
"copy_X": ["boolean"], |
|
"return_path": ["boolean"], |
|
"return_n_iter": ["boolean"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def orthogonal_mp( |
|
X, |
|
y, |
|
*, |
|
n_nonzero_coefs=None, |
|
tol=None, |
|
precompute=False, |
|
copy_X=True, |
|
return_path=False, |
|
return_n_iter=False, |
|
): |
|
r"""Orthogonal Matching Pursuit (OMP). |
|
|
|
Solves n_targets Orthogonal Matching Pursuit problems. |
|
An instance of the problem has the form: |
|
|
|
When parametrized by the number of non-zero coefficients using |
|
`n_nonzero_coefs`: |
|
argmin ||y - X\gamma||^2 subject to ||\gamma||_0 <= n_{nonzero coefs} |
|
|
|
When parametrized by error using the parameter `tol`: |
|
argmin ||\gamma||_0 subject to ||y - X\gamma||^2 <= tol |
|
|
|
Read more in the :ref:`User Guide <omp>`. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Input data. Columns are assumed to have unit norm. |
|
|
|
y : ndarray of shape (n_samples,) or (n_samples, n_targets) |
|
Input targets. |
|
|
|
n_nonzero_coefs : int, default=None |
|
Desired number of non-zero entries in the solution. If None (by |
|
default) this value is set to 10% of n_features. |
|
|
|
tol : float, default=None |
|
Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs. |
|
|
|
precompute : 'auto' or bool, default=False |
|
Whether to perform precomputations. Improves performance when n_targets |
|
or n_samples is very large. |
|
|
|
copy_X : bool, default=True |
|
Whether the design matrix X must be copied by the algorithm. A false |
|
value is only helpful if X is already Fortran-ordered, otherwise a |
|
copy is made anyway. |
|
|
|
return_path : bool, default=False |
|
Whether to return every value of the nonzero coefficients along the |
|
forward path. Useful for cross-validation. |
|
|
|
return_n_iter : bool, default=False |
|
Whether or not to return the number of iterations. |
|
|
|
Returns |
|
------- |
|
coef : ndarray of shape (n_features,) or (n_features, n_targets) |
|
Coefficients of the OMP solution. If `return_path=True`, this contains |
|
the whole coefficient path. In this case its shape is |
|
(n_features, n_features) or (n_features, n_targets, n_features) and |
|
iterating over the last axis generates coefficients in increasing order |
|
of active features. |
|
|
|
n_iters : array-like or int |
|
Number of active features across every target. Returned only if |
|
`return_n_iter` is set to True. |
|
|
|
See Also |
|
-------- |
|
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model. |
|
orthogonal_mp_gram : Solve OMP problems using Gram matrix and the product X.T * y. |
|
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm. |
|
sklearn.decomposition.sparse_encode : Sparse coding. |
|
|
|
Notes |
|
----- |
|
Orthogonal matching pursuit was introduced in S. Mallat, Z. Zhang, |
|
Matching pursuits with time-frequency dictionaries, IEEE Transactions on |
|
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. |
|
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf) |
|
|
|
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, |
|
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal |
|
Matching Pursuit Technical Report - CS Technion, April 2008. |
|
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_regression |
|
>>> from sklearn.linear_model import orthogonal_mp |
|
>>> X, y = make_regression(noise=4, random_state=0) |
|
>>> coef = orthogonal_mp(X, y) |
|
>>> coef.shape |
|
(100,) |
|
>>> X[:1,] @ coef |
|
array([-78.68...]) |
|
""" |
|
X = check_array(X, order="F", copy=copy_X) |
|
copy_X = False |
|
if y.ndim == 1: |
|
y = y.reshape(-1, 1) |
|
y = check_array(y) |
|
if y.shape[1] > 1: |
|
copy_X = True |
|
if n_nonzero_coefs is None and tol is None: |
|
|
|
|
|
n_nonzero_coefs = max(int(0.1 * X.shape[1]), 1) |
|
if tol is None and n_nonzero_coefs > X.shape[1]: |
|
raise ValueError( |
|
"The number of atoms cannot be more than the number of features" |
|
) |
|
if precompute == "auto": |
|
precompute = X.shape[0] > X.shape[1] |
|
if precompute: |
|
G = np.dot(X.T, X) |
|
G = np.asfortranarray(G) |
|
Xy = np.dot(X.T, y) |
|
if tol is not None: |
|
norms_squared = np.sum((y**2), axis=0) |
|
else: |
|
norms_squared = None |
|
return orthogonal_mp_gram( |
|
G, |
|
Xy, |
|
n_nonzero_coefs=n_nonzero_coefs, |
|
tol=tol, |
|
norms_squared=norms_squared, |
|
copy_Gram=copy_X, |
|
copy_Xy=False, |
|
return_path=return_path, |
|
) |
|
|
|
if return_path: |
|
coef = np.zeros((X.shape[1], y.shape[1], X.shape[1])) |
|
else: |
|
coef = np.zeros((X.shape[1], y.shape[1])) |
|
n_iters = [] |
|
|
|
for k in range(y.shape[1]): |
|
out = _cholesky_omp( |
|
X, y[:, k], n_nonzero_coefs, tol, copy_X=copy_X, return_path=return_path |
|
) |
|
if return_path: |
|
_, idx, coefs, n_iter = out |
|
coef = coef[:, :, : len(idx)] |
|
for n_active, x in enumerate(coefs.T): |
|
coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1] |
|
else: |
|
x, idx, n_iter = out |
|
coef[idx, k] = x |
|
n_iters.append(n_iter) |
|
|
|
if y.shape[1] == 1: |
|
n_iters = n_iters[0] |
|
|
|
if return_n_iter: |
|
return np.squeeze(coef), n_iters |
|
else: |
|
return np.squeeze(coef) |
|
|
|
|
|
@validate_params( |
|
{ |
|
"Gram": ["array-like"], |
|
"Xy": ["array-like"], |
|
"n_nonzero_coefs": [Interval(Integral, 0, None, closed="neither"), None], |
|
"tol": [Interval(Real, 0, None, closed="left"), None], |
|
"norms_squared": ["array-like", None], |
|
"copy_Gram": ["boolean"], |
|
"copy_Xy": ["boolean"], |
|
"return_path": ["boolean"], |
|
"return_n_iter": ["boolean"], |
|
}, |
|
prefer_skip_nested_validation=True, |
|
) |
|
def orthogonal_mp_gram( |
|
Gram, |
|
Xy, |
|
*, |
|
n_nonzero_coefs=None, |
|
tol=None, |
|
norms_squared=None, |
|
copy_Gram=True, |
|
copy_Xy=True, |
|
return_path=False, |
|
return_n_iter=False, |
|
): |
|
"""Gram Orthogonal Matching Pursuit (OMP). |
|
|
|
Solves n_targets Orthogonal Matching Pursuit problems using only |
|
the Gram matrix X.T * X and the product X.T * y. |
|
|
|
Read more in the :ref:`User Guide <omp>`. |
|
|
|
Parameters |
|
---------- |
|
Gram : array-like of shape (n_features, n_features) |
|
Gram matrix of the input data: `X.T * X`. |
|
|
|
Xy : array-like of shape (n_features,) or (n_features, n_targets) |
|
Input targets multiplied by `X`: `X.T * y`. |
|
|
|
n_nonzero_coefs : int, default=None |
|
Desired number of non-zero entries in the solution. If `None` (by |
|
default) this value is set to 10% of n_features. |
|
|
|
tol : float, default=None |
|
Maximum squared norm of the residual. If not `None`, |
|
overrides `n_nonzero_coefs`. |
|
|
|
norms_squared : array-like of shape (n_targets,), default=None |
|
Squared L2 norms of the lines of `y`. Required if `tol` is not None. |
|
|
|
copy_Gram : bool, default=True |
|
Whether the gram matrix must be copied by the algorithm. A `False` |
|
value is only helpful if it is already Fortran-ordered, otherwise a |
|
copy is made anyway. |
|
|
|
copy_Xy : bool, default=True |
|
Whether the covariance vector `Xy` must be copied by the algorithm. |
|
If `False`, it may be overwritten. |
|
|
|
return_path : bool, default=False |
|
Whether to return every value of the nonzero coefficients along the |
|
forward path. Useful for cross-validation. |
|
|
|
return_n_iter : bool, default=False |
|
Whether or not to return the number of iterations. |
|
|
|
Returns |
|
------- |
|
coef : ndarray of shape (n_features,) or (n_features, n_targets) |
|
Coefficients of the OMP solution. If `return_path=True`, this contains |
|
the whole coefficient path. In this case its shape is |
|
`(n_features, n_features)` or `(n_features, n_targets, n_features)` and |
|
iterating over the last axis yields coefficients in increasing order |
|
of active features. |
|
|
|
n_iters : list or int |
|
Number of active features across every target. Returned only if |
|
`return_n_iter` is set to True. |
|
|
|
See Also |
|
-------- |
|
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP). |
|
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems. |
|
lars_path : Compute Least Angle Regression or Lasso path using |
|
LARS algorithm. |
|
sklearn.decomposition.sparse_encode : Generic sparse coding. |
|
Each column of the result is the solution to a Lasso problem. |
|
|
|
Notes |
|
----- |
|
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang, |
|
Matching pursuits with time-frequency dictionaries, IEEE Transactions on |
|
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. |
|
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf) |
|
|
|
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, |
|
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal |
|
Matching Pursuit Technical Report - CS Technion, April 2008. |
|
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_regression |
|
>>> from sklearn.linear_model import orthogonal_mp_gram |
|
>>> X, y = make_regression(noise=4, random_state=0) |
|
>>> coef = orthogonal_mp_gram(X.T @ X, X.T @ y) |
|
>>> coef.shape |
|
(100,) |
|
>>> X[:1,] @ coef |
|
array([-78.68...]) |
|
""" |
|
Gram = check_array(Gram, order="F", copy=copy_Gram) |
|
Xy = np.asarray(Xy) |
|
if Xy.ndim > 1 and Xy.shape[1] > 1: |
|
|
|
copy_Gram = True |
|
if Xy.ndim == 1: |
|
Xy = Xy[:, np.newaxis] |
|
if tol is not None: |
|
norms_squared = [norms_squared] |
|
if copy_Xy or not Xy.flags.writeable: |
|
|
|
Xy = Xy.copy() |
|
|
|
if n_nonzero_coefs is None and tol is None: |
|
n_nonzero_coefs = int(0.1 * len(Gram)) |
|
if tol is not None and norms_squared is None: |
|
raise ValueError( |
|
"Gram OMP needs the precomputed norms in order " |
|
"to evaluate the error sum of squares." |
|
) |
|
if tol is not None and tol < 0: |
|
raise ValueError("Epsilon cannot be negative") |
|
if tol is None and n_nonzero_coefs <= 0: |
|
raise ValueError("The number of atoms must be positive") |
|
if tol is None and n_nonzero_coefs > len(Gram): |
|
raise ValueError( |
|
"The number of atoms cannot be more than the number of features" |
|
) |
|
|
|
if return_path: |
|
coef = np.zeros((len(Gram), Xy.shape[1], len(Gram)), dtype=Gram.dtype) |
|
else: |
|
coef = np.zeros((len(Gram), Xy.shape[1]), dtype=Gram.dtype) |
|
|
|
n_iters = [] |
|
for k in range(Xy.shape[1]): |
|
out = _gram_omp( |
|
Gram, |
|
Xy[:, k], |
|
n_nonzero_coefs, |
|
norms_squared[k] if tol is not None else None, |
|
tol, |
|
copy_Gram=copy_Gram, |
|
copy_Xy=False, |
|
return_path=return_path, |
|
) |
|
if return_path: |
|
_, idx, coefs, n_iter = out |
|
coef = coef[:, :, : len(idx)] |
|
for n_active, x in enumerate(coefs.T): |
|
coef[idx[: n_active + 1], k, n_active] = x[: n_active + 1] |
|
else: |
|
x, idx, n_iter = out |
|
coef[idx, k] = x |
|
n_iters.append(n_iter) |
|
|
|
if Xy.shape[1] == 1: |
|
n_iters = n_iters[0] |
|
|
|
if return_n_iter: |
|
return np.squeeze(coef), n_iters |
|
else: |
|
return np.squeeze(coef) |
|
|
|
|
|
class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel): |
|
"""Orthogonal Matching Pursuit model (OMP). |
|
|
|
Read more in the :ref:`User Guide <omp>`. |
|
|
|
Parameters |
|
---------- |
|
n_nonzero_coefs : int, default=None |
|
Desired number of non-zero entries in the solution. Ignored if `tol` is set. |
|
When `None` and `tol` is also `None`, this value is either set to 10% of |
|
`n_features` or 1, whichever is greater. |
|
|
|
tol : float, default=None |
|
Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs. |
|
|
|
fit_intercept : bool, default=True |
|
Whether to calculate the intercept for this model. If set |
|
to false, no intercept will be used in calculations |
|
(i.e. data is expected to be centered). |
|
|
|
precompute : 'auto' or bool, default='auto' |
|
Whether to use a precomputed Gram and Xy matrix to speed up |
|
calculations. Improves performance when :term:`n_targets` or |
|
:term:`n_samples` is very large. Note that if you already have such |
|
matrices, you can pass them directly to the fit method. |
|
|
|
Attributes |
|
---------- |
|
coef_ : ndarray of shape (n_features,) or (n_targets, n_features) |
|
Parameter vector (w in the formula). |
|
|
|
intercept_ : float or ndarray of shape (n_targets,) |
|
Independent term in decision function. |
|
|
|
n_iter_ : int or array-like |
|
Number of active features across every target. |
|
|
|
n_nonzero_coefs_ : int or None |
|
The number of non-zero coefficients in the solution or `None` when `tol` is |
|
set. If `n_nonzero_coefs` is None and `tol` is None this value is either set |
|
to 10% of `n_features` or 1, whichever is greater. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems. |
|
orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit |
|
problems using only the Gram matrix X.T * X and the product X.T * y. |
|
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm. |
|
Lars : Least Angle Regression model a.k.a. LAR. |
|
LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars. |
|
sklearn.decomposition.sparse_encode : Generic sparse coding. |
|
Each column of the result is the solution to a Lasso problem. |
|
OrthogonalMatchingPursuitCV : Cross-validated |
|
Orthogonal Matching Pursuit model (OMP). |
|
|
|
Notes |
|
----- |
|
Orthogonal matching pursuit was introduced in G. Mallat, Z. Zhang, |
|
Matching pursuits with time-frequency dictionaries, IEEE Transactions on |
|
Signal Processing, Vol. 41, No. 12. (December 1993), pp. 3397-3415. |
|
(https://www.di.ens.fr/~mallat/papiers/MallatPursuit93.pdf) |
|
|
|
This implementation is based on Rubinstein, R., Zibulevsky, M. and Elad, |
|
M., Efficient Implementation of the K-SVD Algorithm using Batch Orthogonal |
|
Matching Pursuit Technical Report - CS Technion, April 2008. |
|
https://www.cs.technion.ac.il/~ronrubin/Publications/KSVD-OMP-v2.pdf |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.linear_model import OrthogonalMatchingPursuit |
|
>>> from sklearn.datasets import make_regression |
|
>>> X, y = make_regression(noise=4, random_state=0) |
|
>>> reg = OrthogonalMatchingPursuit().fit(X, y) |
|
>>> reg.score(X, y) |
|
0.9991... |
|
>>> reg.predict(X[:1,]) |
|
array([-78.3854...]) |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"n_nonzero_coefs": [Interval(Integral, 1, None, closed="left"), None], |
|
"tol": [Interval(Real, 0, None, closed="left"), None], |
|
"fit_intercept": ["boolean"], |
|
"precompute": [StrOptions({"auto"}), "boolean"], |
|
} |
|
|
|
def __init__( |
|
self, |
|
*, |
|
n_nonzero_coefs=None, |
|
tol=None, |
|
fit_intercept=True, |
|
precompute="auto", |
|
): |
|
self.n_nonzero_coefs = n_nonzero_coefs |
|
self.tol = tol |
|
self.fit_intercept = fit_intercept |
|
self.precompute = precompute |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y): |
|
"""Fit the model using X, y as training data. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Training data. |
|
|
|
y : array-like of shape (n_samples,) or (n_samples, n_targets) |
|
Target values. Will be cast to X's dtype if necessary. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns an instance of self. |
|
""" |
|
X, y = validate_data(self, X, y, multi_output=True, y_numeric=True) |
|
n_features = X.shape[1] |
|
|
|
X, y, X_offset, y_offset, X_scale, Gram, Xy = _pre_fit( |
|
X, y, None, self.precompute, self.fit_intercept, copy=True |
|
) |
|
|
|
if y.ndim == 1: |
|
y = y[:, np.newaxis] |
|
|
|
if self.n_nonzero_coefs is None and self.tol is None: |
|
|
|
|
|
self.n_nonzero_coefs_ = max(int(0.1 * n_features), 1) |
|
elif self.tol is not None: |
|
self.n_nonzero_coefs_ = None |
|
else: |
|
self.n_nonzero_coefs_ = self.n_nonzero_coefs |
|
|
|
if Gram is False: |
|
coef_, self.n_iter_ = orthogonal_mp( |
|
X, |
|
y, |
|
n_nonzero_coefs=self.n_nonzero_coefs_, |
|
tol=self.tol, |
|
precompute=False, |
|
copy_X=True, |
|
return_n_iter=True, |
|
) |
|
else: |
|
norms_sq = np.sum(y**2, axis=0) if self.tol is not None else None |
|
|
|
coef_, self.n_iter_ = orthogonal_mp_gram( |
|
Gram, |
|
Xy=Xy, |
|
n_nonzero_coefs=self.n_nonzero_coefs_, |
|
tol=self.tol, |
|
norms_squared=norms_sq, |
|
copy_Gram=True, |
|
copy_Xy=True, |
|
return_n_iter=True, |
|
) |
|
self.coef_ = coef_.T |
|
self._set_intercept(X_offset, y_offset, X_scale) |
|
return self |
|
|
|
|
|
def _omp_path_residues( |
|
X_train, |
|
y_train, |
|
X_test, |
|
y_test, |
|
copy=True, |
|
fit_intercept=True, |
|
max_iter=100, |
|
): |
|
"""Compute the residues on left-out data for a full LARS path. |
|
|
|
Parameters |
|
---------- |
|
X_train : ndarray of shape (n_samples, n_features) |
|
The data to fit the LARS on. |
|
|
|
y_train : ndarray of shape (n_samples) |
|
The target variable to fit LARS on. |
|
|
|
X_test : ndarray of shape (n_samples, n_features) |
|
The data to compute the residues on. |
|
|
|
y_test : ndarray of shape (n_samples) |
|
The target variable to compute the residues on. |
|
|
|
copy : bool, default=True |
|
Whether X_train, X_test, y_train and y_test should be copied. If |
|
False, they may be overwritten. |
|
|
|
fit_intercept : bool, default=True |
|
Whether to calculate the intercept for this model. If set |
|
to false, no intercept will be used in calculations |
|
(i.e. data is expected to be centered). |
|
|
|
max_iter : int, default=100 |
|
Maximum numbers of iterations to perform, therefore maximum features |
|
to include. 100 by default. |
|
|
|
Returns |
|
------- |
|
residues : ndarray of shape (n_samples, max_features) |
|
Residues of the prediction on the test data. |
|
""" |
|
|
|
if copy: |
|
X_train = X_train.copy() |
|
y_train = y_train.copy() |
|
X_test = X_test.copy() |
|
y_test = y_test.copy() |
|
|
|
if fit_intercept: |
|
X_mean = X_train.mean(axis=0) |
|
X_train -= X_mean |
|
X_test -= X_mean |
|
y_mean = y_train.mean(axis=0) |
|
y_train = as_float_array(y_train, copy=False) |
|
y_train -= y_mean |
|
y_test = as_float_array(y_test, copy=False) |
|
y_test -= y_mean |
|
|
|
coefs = orthogonal_mp( |
|
X_train, |
|
y_train, |
|
n_nonzero_coefs=max_iter, |
|
tol=None, |
|
precompute=False, |
|
copy_X=False, |
|
return_path=True, |
|
) |
|
if coefs.ndim == 1: |
|
coefs = coefs[:, np.newaxis] |
|
|
|
return np.dot(coefs.T, X_test.T) - y_test |
|
|
|
|
|
class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel): |
|
"""Cross-validated Orthogonal Matching Pursuit model (OMP). |
|
|
|
See glossary entry for :term:`cross-validation estimator`. |
|
|
|
Read more in the :ref:`User Guide <omp>`. |
|
|
|
Parameters |
|
---------- |
|
copy : bool, default=True |
|
Whether the design matrix X must be copied by the algorithm. A false |
|
value is only helpful if X is already Fortran-ordered, otherwise a |
|
copy is made anyway. |
|
|
|
fit_intercept : bool, default=True |
|
Whether to calculate the intercept for this model. If set |
|
to false, no intercept will be used in calculations |
|
(i.e. data is expected to be centered). |
|
|
|
max_iter : int, default=None |
|
Maximum numbers of iterations to perform, therefore maximum features |
|
to include. 10% of ``n_features`` but at least 5 if available. |
|
|
|
cv : int, cross-validation generator or iterable, default=None |
|
Determines the cross-validation splitting strategy. |
|
Possible inputs for cv are: |
|
|
|
- None, to use the default 5-fold cross-validation, |
|
- integer, to specify the number of folds. |
|
- :term:`CV splitter`, |
|
- An iterable yielding (train, test) splits as arrays of indices. |
|
|
|
For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used. |
|
|
|
Refer :ref:`User Guide <cross_validation>` for the various |
|
cross-validation strategies that can be used here. |
|
|
|
.. versionchanged:: 0.22 |
|
``cv`` default value if None changed from 3-fold to 5-fold. |
|
|
|
n_jobs : int, default=None |
|
Number of CPUs to use during the cross validation. |
|
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. |
|
``-1`` means using all processors. See :term:`Glossary <n_jobs>` |
|
for more details. |
|
|
|
verbose : bool or int, default=False |
|
Sets the verbosity amount. |
|
|
|
Attributes |
|
---------- |
|
intercept_ : float or ndarray of shape (n_targets,) |
|
Independent term in decision function. |
|
|
|
coef_ : ndarray of shape (n_features,) or (n_targets, n_features) |
|
Parameter vector (w in the problem formulation). |
|
|
|
n_nonzero_coefs_ : int |
|
Estimated number of non-zero coefficients giving the best mean squared |
|
error over the cross-validation folds. |
|
|
|
n_iter_ : int or array-like |
|
Number of active features across every target for the model refit with |
|
the best hyperparameters got by cross-validating across all folds. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
orthogonal_mp : Solves n_targets Orthogonal Matching Pursuit problems. |
|
orthogonal_mp_gram : Solves n_targets Orthogonal Matching Pursuit |
|
problems using only the Gram matrix X.T * X and the product X.T * y. |
|
lars_path : Compute Least Angle Regression or Lasso path using LARS algorithm. |
|
Lars : Least Angle Regression model a.k.a. LAR. |
|
LassoLars : Lasso model fit with Least Angle Regression a.k.a. Lars. |
|
OrthogonalMatchingPursuit : Orthogonal Matching Pursuit model (OMP). |
|
LarsCV : Cross-validated Least Angle Regression model. |
|
LassoLarsCV : Cross-validated Lasso model fit with Least Angle Regression. |
|
sklearn.decomposition.sparse_encode : Generic sparse coding. |
|
Each column of the result is the solution to a Lasso problem. |
|
|
|
Notes |
|
----- |
|
In `fit`, once the optimal number of non-zero coefficients is found through |
|
cross-validation, the model is fit again using the entire training set. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.linear_model import OrthogonalMatchingPursuitCV |
|
>>> from sklearn.datasets import make_regression |
|
>>> X, y = make_regression(n_features=100, n_informative=10, |
|
... noise=4, random_state=0) |
|
>>> reg = OrthogonalMatchingPursuitCV(cv=5).fit(X, y) |
|
>>> reg.score(X, y) |
|
0.9991... |
|
>>> reg.n_nonzero_coefs_ |
|
np.int64(10) |
|
>>> reg.predict(X[:1,]) |
|
array([-78.3854...]) |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"copy": ["boolean"], |
|
"fit_intercept": ["boolean"], |
|
"max_iter": [Interval(Integral, 0, None, closed="left"), None], |
|
"cv": ["cv_object"], |
|
"n_jobs": [Integral, None], |
|
"verbose": ["verbose"], |
|
} |
|
|
|
def __init__( |
|
self, |
|
*, |
|
copy=True, |
|
fit_intercept=True, |
|
max_iter=None, |
|
cv=None, |
|
n_jobs=None, |
|
verbose=False, |
|
): |
|
self.copy = copy |
|
self.fit_intercept = fit_intercept |
|
self.max_iter = max_iter |
|
self.cv = cv |
|
self.n_jobs = n_jobs |
|
self.verbose = verbose |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y, **fit_params): |
|
"""Fit the model using X, y as training data. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
Training data. |
|
|
|
y : array-like of shape (n_samples,) |
|
Target values. Will be cast to X's dtype if necessary. |
|
|
|
**fit_params : dict |
|
Parameters to pass to the underlying splitter. |
|
|
|
.. versionadded:: 1.4 |
|
Only available if `enable_metadata_routing=True`, |
|
which can be set by using |
|
``sklearn.set_config(enable_metadata_routing=True)``. |
|
See :ref:`Metadata Routing User Guide <metadata_routing>` for |
|
more details. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Returns an instance of self. |
|
""" |
|
_raise_for_params(fit_params, self, "fit") |
|
|
|
X, y = validate_data(self, X, y, y_numeric=True, ensure_min_features=2) |
|
X = as_float_array(X, copy=False, ensure_all_finite=False) |
|
cv = check_cv(self.cv, classifier=False) |
|
if _routing_enabled(): |
|
routed_params = process_routing(self, "fit", **fit_params) |
|
else: |
|
|
|
routed_params = Bunch() |
|
routed_params.splitter = Bunch(split={}) |
|
max_iter = ( |
|
min(max(int(0.1 * X.shape[1]), 5), X.shape[1]) |
|
if not self.max_iter |
|
else self.max_iter |
|
) |
|
cv_paths = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( |
|
delayed(_omp_path_residues)( |
|
X[train], |
|
y[train], |
|
X[test], |
|
y[test], |
|
self.copy, |
|
self.fit_intercept, |
|
max_iter, |
|
) |
|
for train, test in cv.split(X, **routed_params.splitter.split) |
|
) |
|
|
|
min_early_stop = min(fold.shape[0] for fold in cv_paths) |
|
mse_folds = np.array( |
|
[(fold[:min_early_stop] ** 2).mean(axis=1) for fold in cv_paths] |
|
) |
|
best_n_nonzero_coefs = np.argmin(mse_folds.mean(axis=0)) + 1 |
|
self.n_nonzero_coefs_ = best_n_nonzero_coefs |
|
omp = OrthogonalMatchingPursuit( |
|
n_nonzero_coefs=best_n_nonzero_coefs, |
|
fit_intercept=self.fit_intercept, |
|
).fit(X, y) |
|
|
|
self.coef_ = omp.coef_ |
|
self.intercept_ = omp.intercept_ |
|
self.n_iter_ = omp.n_iter_ |
|
return self |
|
|
|
def get_metadata_routing(self): |
|
"""Get metadata routing of this object. |
|
|
|
Please check :ref:`User Guide <metadata_routing>` on how the routing |
|
mechanism works. |
|
|
|
.. versionadded:: 1.4 |
|
|
|
Returns |
|
------- |
|
routing : MetadataRouter |
|
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating |
|
routing information. |
|
""" |
|
|
|
router = MetadataRouter(owner=self.__class__.__name__).add( |
|
splitter=self.cv, |
|
method_mapping=MethodMapping().add(caller="fit", callee="split"), |
|
) |
|
return router |
|
|