|
"""A set of kernels that can be combined by operators and used in Gaussian processes.""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import math |
|
import warnings |
|
from abc import ABCMeta, abstractmethod |
|
from collections import namedtuple |
|
from inspect import signature |
|
|
|
import numpy as np |
|
from scipy.spatial.distance import cdist, pdist, squareform |
|
from scipy.special import gamma, kv |
|
|
|
from ..base import clone |
|
from ..exceptions import ConvergenceWarning |
|
from ..metrics.pairwise import pairwise_kernels |
|
from ..utils.validation import _num_samples |
|
|
|
|
|
def _check_length_scale(X, length_scale): |
|
length_scale = np.squeeze(length_scale).astype(float) |
|
if np.ndim(length_scale) > 1: |
|
raise ValueError("length_scale cannot be of dimension greater than 1") |
|
if np.ndim(length_scale) == 1 and X.shape[1] != length_scale.shape[0]: |
|
raise ValueError( |
|
"Anisotropic kernel must have the same number of " |
|
"dimensions as data (%d!=%d)" % (length_scale.shape[0], X.shape[1]) |
|
) |
|
return length_scale |
|
|
|
|
|
class Hyperparameter( |
|
namedtuple( |
|
"Hyperparameter", ("name", "value_type", "bounds", "n_elements", "fixed") |
|
) |
|
): |
|
"""A kernel hyperparameter's specification in form of a namedtuple. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Attributes |
|
---------- |
|
name : str |
|
The name of the hyperparameter. Note that a kernel using a |
|
hyperparameter with name "x" must have the attributes self.x and |
|
self.x_bounds |
|
|
|
value_type : str |
|
The type of the hyperparameter. Currently, only "numeric" |
|
hyperparameters are supported. |
|
|
|
bounds : pair of floats >= 0 or "fixed" |
|
The lower and upper bound on the parameter. If n_elements>1, a pair |
|
of 1d array with n_elements each may be given alternatively. If |
|
the string "fixed" is passed as bounds, the hyperparameter's value |
|
cannot be changed. |
|
|
|
n_elements : int, default=1 |
|
The number of elements of the hyperparameter value. Defaults to 1, |
|
which corresponds to a scalar hyperparameter. n_elements > 1 |
|
corresponds to a hyperparameter which is vector-valued, |
|
such as, e.g., anisotropic length-scales. |
|
|
|
fixed : bool, default=None |
|
Whether the value of this hyperparameter is fixed, i.e., cannot be |
|
changed during hyperparameter tuning. If None is passed, the "fixed" is |
|
derived based on the given bounds. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.gaussian_process.kernels import ConstantKernel |
|
>>> from sklearn.datasets import make_friedman2 |
|
>>> from sklearn.gaussian_process import GaussianProcessRegressor |
|
>>> from sklearn.gaussian_process.kernels import Hyperparameter |
|
>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0) |
|
>>> kernel = ConstantKernel(constant_value=1.0, |
|
... constant_value_bounds=(0.0, 10.0)) |
|
|
|
We can access each hyperparameter: |
|
|
|
>>> for hyperparameter in kernel.hyperparameters: |
|
... print(hyperparameter) |
|
Hyperparameter(name='constant_value', value_type='numeric', |
|
bounds=array([[ 0., 10.]]), n_elements=1, fixed=False) |
|
|
|
>>> params = kernel.get_params() |
|
>>> for key in sorted(params): print(f"{key} : {params[key]}") |
|
constant_value : 1.0 |
|
constant_value_bounds : (0.0, 10.0) |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__slots__ = () |
|
|
|
def __new__(cls, name, value_type, bounds, n_elements=1, fixed=None): |
|
if not isinstance(bounds, str) or bounds != "fixed": |
|
bounds = np.atleast_2d(bounds) |
|
if n_elements > 1: |
|
if bounds.shape[0] == 1: |
|
bounds = np.repeat(bounds, n_elements, 0) |
|
elif bounds.shape[0] != n_elements: |
|
raise ValueError( |
|
"Bounds on %s should have either 1 or " |
|
"%d dimensions. Given are %d" |
|
% (name, n_elements, bounds.shape[0]) |
|
) |
|
|
|
if fixed is None: |
|
fixed = isinstance(bounds, str) and bounds == "fixed" |
|
return super(Hyperparameter, cls).__new__( |
|
cls, name, value_type, bounds, n_elements, fixed |
|
) |
|
|
|
|
|
|
|
def __eq__(self, other): |
|
return ( |
|
self.name == other.name |
|
and self.value_type == other.value_type |
|
and np.all(self.bounds == other.bounds) |
|
and self.n_elements == other.n_elements |
|
and self.fixed == other.fixed |
|
) |
|
|
|
|
|
class Kernel(metaclass=ABCMeta): |
|
"""Base class for all kernels. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.gaussian_process.kernels import Kernel, RBF |
|
>>> import numpy as np |
|
>>> class CustomKernel(Kernel): |
|
... def __init__(self, length_scale=1.0): |
|
... self.length_scale = length_scale |
|
... def __call__(self, X, Y=None): |
|
... if Y is None: |
|
... Y = X |
|
... return np.inner(X, X if Y is None else Y) ** 2 |
|
... def diag(self, X): |
|
... return np.ones(X.shape[0]) |
|
... def is_stationary(self): |
|
... return True |
|
>>> kernel = CustomKernel(length_scale=2.0) |
|
>>> X = np.array([[1, 2], [3, 4]]) |
|
>>> print(kernel(X)) |
|
[[ 25 121] |
|
[121 625]] |
|
""" |
|
|
|
def get_params(self, deep=True): |
|
"""Get parameters of this kernel. |
|
|
|
Parameters |
|
---------- |
|
deep : bool, default=True |
|
If True, will return the parameters for this estimator and |
|
contained subobjects that are estimators. |
|
|
|
Returns |
|
------- |
|
params : dict |
|
Parameter names mapped to their values. |
|
""" |
|
params = dict() |
|
|
|
|
|
|
|
cls = self.__class__ |
|
init = getattr(cls.__init__, "deprecated_original", cls.__init__) |
|
init_sign = signature(init) |
|
args, varargs = [], [] |
|
for parameter in init_sign.parameters.values(): |
|
if parameter.kind != parameter.VAR_KEYWORD and parameter.name != "self": |
|
args.append(parameter.name) |
|
if parameter.kind == parameter.VAR_POSITIONAL: |
|
varargs.append(parameter.name) |
|
|
|
if len(varargs) != 0: |
|
raise RuntimeError( |
|
"scikit-learn kernels should always " |
|
"specify their parameters in the signature" |
|
" of their __init__ (no varargs)." |
|
" %s doesn't follow this convention." % (cls,) |
|
) |
|
for arg in args: |
|
params[arg] = getattr(self, arg) |
|
|
|
return params |
|
|
|
def set_params(self, **params): |
|
"""Set the parameters of this kernel. |
|
|
|
The method works on simple kernels as well as on nested kernels. |
|
The latter have parameters of the form ``<component>__<parameter>`` |
|
so that it's possible to update each component of a nested object. |
|
|
|
Returns |
|
------- |
|
self |
|
""" |
|
if not params: |
|
|
|
return self |
|
valid_params = self.get_params(deep=True) |
|
for key, value in params.items(): |
|
split = key.split("__", 1) |
|
if len(split) > 1: |
|
|
|
name, sub_name = split |
|
if name not in valid_params: |
|
raise ValueError( |
|
"Invalid parameter %s for kernel %s. " |
|
"Check the list of available parameters " |
|
"with `kernel.get_params().keys()`." % (name, self) |
|
) |
|
sub_object = valid_params[name] |
|
sub_object.set_params(**{sub_name: value}) |
|
else: |
|
|
|
if key not in valid_params: |
|
raise ValueError( |
|
"Invalid parameter %s for kernel %s. " |
|
"Check the list of available parameters " |
|
"with `kernel.get_params().keys()`." |
|
% (key, self.__class__.__name__) |
|
) |
|
setattr(self, key, value) |
|
return self |
|
|
|
def clone_with_theta(self, theta): |
|
"""Returns a clone of self with given hyperparameters theta. |
|
|
|
Parameters |
|
---------- |
|
theta : ndarray of shape (n_dims,) |
|
The hyperparameters |
|
""" |
|
cloned = clone(self) |
|
cloned.theta = theta |
|
return cloned |
|
|
|
@property |
|
def n_dims(self): |
|
"""Returns the number of non-fixed hyperparameters of the kernel.""" |
|
return self.theta.shape[0] |
|
|
|
@property |
|
def hyperparameters(self): |
|
"""Returns a list of all hyperparameter specifications.""" |
|
r = [ |
|
getattr(self, attr) |
|
for attr in dir(self) |
|
if attr.startswith("hyperparameter_") |
|
] |
|
return r |
|
|
|
@property |
|
def theta(self): |
|
"""Returns the (flattened, log-transformed) non-fixed hyperparameters. |
|
|
|
Note that theta are typically the log-transformed values of the |
|
kernel's hyperparameters as this representation of the search space |
|
is more amenable for hyperparameter search, as hyperparameters like |
|
length-scales naturally live on a log-scale. |
|
|
|
Returns |
|
------- |
|
theta : ndarray of shape (n_dims,) |
|
The non-fixed, log-transformed hyperparameters of the kernel |
|
""" |
|
theta = [] |
|
params = self.get_params() |
|
for hyperparameter in self.hyperparameters: |
|
if not hyperparameter.fixed: |
|
theta.append(params[hyperparameter.name]) |
|
if len(theta) > 0: |
|
return np.log(np.hstack(theta)) |
|
else: |
|
return np.array([]) |
|
|
|
@theta.setter |
|
def theta(self, theta): |
|
"""Sets the (flattened, log-transformed) non-fixed hyperparameters. |
|
|
|
Parameters |
|
---------- |
|
theta : ndarray of shape (n_dims,) |
|
The non-fixed, log-transformed hyperparameters of the kernel |
|
""" |
|
params = self.get_params() |
|
i = 0 |
|
for hyperparameter in self.hyperparameters: |
|
if hyperparameter.fixed: |
|
continue |
|
if hyperparameter.n_elements > 1: |
|
|
|
params[hyperparameter.name] = np.exp( |
|
theta[i : i + hyperparameter.n_elements] |
|
) |
|
i += hyperparameter.n_elements |
|
else: |
|
params[hyperparameter.name] = np.exp(theta[i]) |
|
i += 1 |
|
|
|
if i != len(theta): |
|
raise ValueError( |
|
"theta has not the correct number of entries." |
|
" Should be %d; given are %d" % (i, len(theta)) |
|
) |
|
self.set_params(**params) |
|
|
|
@property |
|
def bounds(self): |
|
"""Returns the log-transformed bounds on the theta. |
|
|
|
Returns |
|
------- |
|
bounds : ndarray of shape (n_dims, 2) |
|
The log-transformed bounds on the kernel's hyperparameters theta |
|
""" |
|
bounds = [ |
|
hyperparameter.bounds |
|
for hyperparameter in self.hyperparameters |
|
if not hyperparameter.fixed |
|
] |
|
if len(bounds) > 0: |
|
return np.log(np.vstack(bounds)) |
|
else: |
|
return np.array([]) |
|
|
|
def __add__(self, b): |
|
if not isinstance(b, Kernel): |
|
return Sum(self, ConstantKernel(b)) |
|
return Sum(self, b) |
|
|
|
def __radd__(self, b): |
|
if not isinstance(b, Kernel): |
|
return Sum(ConstantKernel(b), self) |
|
return Sum(b, self) |
|
|
|
def __mul__(self, b): |
|
if not isinstance(b, Kernel): |
|
return Product(self, ConstantKernel(b)) |
|
return Product(self, b) |
|
|
|
def __rmul__(self, b): |
|
if not isinstance(b, Kernel): |
|
return Product(ConstantKernel(b), self) |
|
return Product(b, self) |
|
|
|
def __pow__(self, b): |
|
return Exponentiation(self, b) |
|
|
|
def __eq__(self, b): |
|
if type(self) != type(b): |
|
return False |
|
params_a = self.get_params() |
|
params_b = b.get_params() |
|
for key in set(list(params_a.keys()) + list(params_b.keys())): |
|
if np.any(params_a.get(key, None) != params_b.get(key, None)): |
|
return False |
|
return True |
|
|
|
def __repr__(self): |
|
return "{0}({1})".format( |
|
self.__class__.__name__, ", ".join(map("{0:.3g}".format, self.theta)) |
|
) |
|
|
|
@abstractmethod |
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Evaluate the kernel.""" |
|
|
|
@abstractmethod |
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to np.diag(self(X)); however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples,) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
|
|
@abstractmethod |
|
def is_stationary(self): |
|
"""Returns whether the kernel is stationary.""" |
|
|
|
@property |
|
def requires_vector_input(self): |
|
"""Returns whether the kernel is defined on fixed-length feature |
|
vectors or generic objects. Defaults to True for backward |
|
compatibility.""" |
|
return True |
|
|
|
def _check_bounds_params(self): |
|
"""Called after fitting to warn if bounds may have been too tight.""" |
|
list_close = np.isclose(self.bounds, np.atleast_2d(self.theta).T) |
|
idx = 0 |
|
for hyp in self.hyperparameters: |
|
if hyp.fixed: |
|
continue |
|
for dim in range(hyp.n_elements): |
|
if list_close[idx, 0]: |
|
warnings.warn( |
|
"The optimal value found for " |
|
"dimension %s of parameter %s is " |
|
"close to the specified lower " |
|
"bound %s. Decreasing the bound and" |
|
" calling fit again may find a " |
|
"better value." % (dim, hyp.name, hyp.bounds[dim][0]), |
|
ConvergenceWarning, |
|
) |
|
elif list_close[idx, 1]: |
|
warnings.warn( |
|
"The optimal value found for " |
|
"dimension %s of parameter %s is " |
|
"close to the specified upper " |
|
"bound %s. Increasing the bound and" |
|
" calling fit again may find a " |
|
"better value." % (dim, hyp.name, hyp.bounds[dim][1]), |
|
ConvergenceWarning, |
|
) |
|
idx += 1 |
|
|
|
|
|
class NormalizedKernelMixin: |
|
"""Mixin for kernels which are normalized: k(X, X)=1. |
|
|
|
.. versionadded:: 0.18 |
|
""" |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to np.diag(self(X)); however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
return np.ones(X.shape[0]) |
|
|
|
|
|
class StationaryKernelMixin: |
|
"""Mixin for kernels which are stationary: k(X, Y)= f(X-Y). |
|
|
|
.. versionadded:: 0.18 |
|
""" |
|
|
|
def is_stationary(self): |
|
"""Returns whether the kernel is stationary.""" |
|
return True |
|
|
|
|
|
class GenericKernelMixin: |
|
"""Mixin for kernels which operate on generic objects such as variable- |
|
length sequences, trees, and graphs. |
|
|
|
.. versionadded:: 0.22 |
|
""" |
|
|
|
@property |
|
def requires_vector_input(self): |
|
"""Whether the kernel works only on fixed-length feature vectors.""" |
|
return False |
|
|
|
|
|
class CompoundKernel(Kernel): |
|
"""Kernel which is composed of a set of other kernels. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
kernels : list of Kernels |
|
The other kernels |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.gaussian_process.kernels import WhiteKernel |
|
>>> from sklearn.gaussian_process.kernels import RBF |
|
>>> from sklearn.gaussian_process.kernels import CompoundKernel |
|
>>> kernel = CompoundKernel( |
|
... [WhiteKernel(noise_level=3.0), RBF(length_scale=2.0)]) |
|
>>> print(kernel.bounds) |
|
[[-11.51292546 11.51292546] |
|
[-11.51292546 11.51292546]] |
|
>>> print(kernel.n_dims) |
|
2 |
|
>>> print(kernel.theta) |
|
[1.09861229 0.69314718] |
|
""" |
|
|
|
def __init__(self, kernels): |
|
self.kernels = kernels |
|
|
|
def get_params(self, deep=True): |
|
"""Get parameters of this kernel. |
|
|
|
Parameters |
|
---------- |
|
deep : bool, default=True |
|
If True, will return the parameters for this estimator and |
|
contained subobjects that are estimators. |
|
|
|
Returns |
|
------- |
|
params : dict |
|
Parameter names mapped to their values. |
|
""" |
|
return dict(kernels=self.kernels) |
|
|
|
@property |
|
def theta(self): |
|
"""Returns the (flattened, log-transformed) non-fixed hyperparameters. |
|
|
|
Note that theta are typically the log-transformed values of the |
|
kernel's hyperparameters as this representation of the search space |
|
is more amenable for hyperparameter search, as hyperparameters like |
|
length-scales naturally live on a log-scale. |
|
|
|
Returns |
|
------- |
|
theta : ndarray of shape (n_dims,) |
|
The non-fixed, log-transformed hyperparameters of the kernel |
|
""" |
|
return np.hstack([kernel.theta for kernel in self.kernels]) |
|
|
|
@theta.setter |
|
def theta(self, theta): |
|
"""Sets the (flattened, log-transformed) non-fixed hyperparameters. |
|
|
|
Parameters |
|
---------- |
|
theta : array of shape (n_dims,) |
|
The non-fixed, log-transformed hyperparameters of the kernel |
|
""" |
|
k_dims = self.k1.n_dims |
|
for i, kernel in enumerate(self.kernels): |
|
kernel.theta = theta[i * k_dims : (i + 1) * k_dims] |
|
|
|
@property |
|
def bounds(self): |
|
"""Returns the log-transformed bounds on the theta. |
|
|
|
Returns |
|
------- |
|
bounds : array of shape (n_dims, 2) |
|
The log-transformed bounds on the kernel's hyperparameters theta |
|
""" |
|
return np.vstack([kernel.bounds for kernel in self.kernels]) |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Note that this compound kernel returns the results of all simple kernel |
|
stacked along an additional axis. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object, \ |
|
default=None |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : array-like of shape (n_samples_X, n_features) or list of object, \ |
|
default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
is evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of the |
|
kernel hyperparameter is computed. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y, n_kernels) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape \ |
|
(n_samples_X, n_samples_X, n_dims, n_kernels), optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
if eval_gradient: |
|
K = [] |
|
K_grad = [] |
|
for kernel in self.kernels: |
|
K_single, K_grad_single = kernel(X, Y, eval_gradient) |
|
K.append(K_single) |
|
K_grad.append(K_grad_single[..., np.newaxis]) |
|
return np.dstack(K), np.concatenate(K_grad, 3) |
|
else: |
|
return np.dstack([kernel(X, Y, eval_gradient) for kernel in self.kernels]) |
|
|
|
def __eq__(self, b): |
|
if type(self) != type(b) or len(self.kernels) != len(b.kernels): |
|
return False |
|
return np.all( |
|
[self.kernels[i] == b.kernels[i] for i in range(len(self.kernels))] |
|
) |
|
|
|
def is_stationary(self): |
|
"""Returns whether the kernel is stationary.""" |
|
return np.all([kernel.is_stationary() for kernel in self.kernels]) |
|
|
|
@property |
|
def requires_vector_input(self): |
|
"""Returns whether the kernel is defined on discrete structures.""" |
|
return np.any([kernel.requires_vector_input for kernel in self.kernels]) |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to `np.diag(self(X))`; however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Argument to the kernel. |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X, n_kernels) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
return np.vstack([kernel.diag(X) for kernel in self.kernels]).T |
|
|
|
|
|
class KernelOperator(Kernel): |
|
"""Base class for all kernel operators. |
|
|
|
.. versionadded:: 0.18 |
|
""" |
|
|
|
def __init__(self, k1, k2): |
|
self.k1 = k1 |
|
self.k2 = k2 |
|
|
|
def get_params(self, deep=True): |
|
"""Get parameters of this kernel. |
|
|
|
Parameters |
|
---------- |
|
deep : bool, default=True |
|
If True, will return the parameters for this estimator and |
|
contained subobjects that are estimators. |
|
|
|
Returns |
|
------- |
|
params : dict |
|
Parameter names mapped to their values. |
|
""" |
|
params = dict(k1=self.k1, k2=self.k2) |
|
if deep: |
|
deep_items = self.k1.get_params().items() |
|
params.update(("k1__" + k, val) for k, val in deep_items) |
|
deep_items = self.k2.get_params().items() |
|
params.update(("k2__" + k, val) for k, val in deep_items) |
|
|
|
return params |
|
|
|
@property |
|
def hyperparameters(self): |
|
"""Returns a list of all hyperparameter.""" |
|
r = [ |
|
Hyperparameter( |
|
"k1__" + hyperparameter.name, |
|
hyperparameter.value_type, |
|
hyperparameter.bounds, |
|
hyperparameter.n_elements, |
|
) |
|
for hyperparameter in self.k1.hyperparameters |
|
] |
|
|
|
for hyperparameter in self.k2.hyperparameters: |
|
r.append( |
|
Hyperparameter( |
|
"k2__" + hyperparameter.name, |
|
hyperparameter.value_type, |
|
hyperparameter.bounds, |
|
hyperparameter.n_elements, |
|
) |
|
) |
|
return r |
|
|
|
@property |
|
def theta(self): |
|
"""Returns the (flattened, log-transformed) non-fixed hyperparameters. |
|
|
|
Note that theta are typically the log-transformed values of the |
|
kernel's hyperparameters as this representation of the search space |
|
is more amenable for hyperparameter search, as hyperparameters like |
|
length-scales naturally live on a log-scale. |
|
|
|
Returns |
|
------- |
|
theta : ndarray of shape (n_dims,) |
|
The non-fixed, log-transformed hyperparameters of the kernel |
|
""" |
|
return np.append(self.k1.theta, self.k2.theta) |
|
|
|
@theta.setter |
|
def theta(self, theta): |
|
"""Sets the (flattened, log-transformed) non-fixed hyperparameters. |
|
|
|
Parameters |
|
---------- |
|
theta : ndarray of shape (n_dims,) |
|
The non-fixed, log-transformed hyperparameters of the kernel |
|
""" |
|
k1_dims = self.k1.n_dims |
|
self.k1.theta = theta[:k1_dims] |
|
self.k2.theta = theta[k1_dims:] |
|
|
|
@property |
|
def bounds(self): |
|
"""Returns the log-transformed bounds on the theta. |
|
|
|
Returns |
|
------- |
|
bounds : ndarray of shape (n_dims, 2) |
|
The log-transformed bounds on the kernel's hyperparameters theta |
|
""" |
|
if self.k1.bounds.size == 0: |
|
return self.k2.bounds |
|
if self.k2.bounds.size == 0: |
|
return self.k1.bounds |
|
return np.vstack((self.k1.bounds, self.k2.bounds)) |
|
|
|
def __eq__(self, b): |
|
if type(self) != type(b): |
|
return False |
|
return (self.k1 == b.k1 and self.k2 == b.k2) or ( |
|
self.k1 == b.k2 and self.k2 == b.k1 |
|
) |
|
|
|
def is_stationary(self): |
|
"""Returns whether the kernel is stationary.""" |
|
return self.k1.is_stationary() and self.k2.is_stationary() |
|
|
|
@property |
|
def requires_vector_input(self): |
|
"""Returns whether the kernel is stationary.""" |
|
return self.k1.requires_vector_input or self.k2.requires_vector_input |
|
|
|
|
|
class Sum(KernelOperator): |
|
"""The `Sum` kernel takes two kernels :math:`k_1` and :math:`k_2` |
|
and combines them via |
|
|
|
.. math:: |
|
k_{sum}(X, Y) = k_1(X, Y) + k_2(X, Y) |
|
|
|
Note that the `__add__` magic method is overridden, so |
|
`Sum(RBF(), RBF())` is equivalent to using the + operator |
|
with `RBF() + RBF()`. |
|
|
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
k1 : Kernel |
|
The first base-kernel of the sum-kernel |
|
|
|
k2 : Kernel |
|
The second base-kernel of the sum-kernel |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_friedman2 |
|
>>> from sklearn.gaussian_process import GaussianProcessRegressor |
|
>>> from sklearn.gaussian_process.kernels import RBF, Sum, ConstantKernel |
|
>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0) |
|
>>> kernel = Sum(ConstantKernel(2), RBF()) |
|
>>> gpr = GaussianProcessRegressor(kernel=kernel, |
|
... random_state=0).fit(X, y) |
|
>>> gpr.score(X, y) |
|
1.0 |
|
>>> kernel |
|
1.41**2 + RBF(length_scale=1) |
|
""" |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : array-like of shape (n_samples_X, n_features) or list of object,\ |
|
default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
is evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
if eval_gradient: |
|
K1, K1_gradient = self.k1(X, Y, eval_gradient=True) |
|
K2, K2_gradient = self.k2(X, Y, eval_gradient=True) |
|
return K1 + K2, np.dstack((K1_gradient, K2_gradient)) |
|
else: |
|
return self.k1(X, Y) + self.k2(X, Y) |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to `np.diag(self(X))`; however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Argument to the kernel. |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
return self.k1.diag(X) + self.k2.diag(X) |
|
|
|
def __repr__(self): |
|
return "{0} + {1}".format(self.k1, self.k2) |
|
|
|
|
|
class Product(KernelOperator): |
|
"""The `Product` kernel takes two kernels :math:`k_1` and :math:`k_2` |
|
and combines them via |
|
|
|
.. math:: |
|
k_{prod}(X, Y) = k_1(X, Y) * k_2(X, Y) |
|
|
|
Note that the `__mul__` magic method is overridden, so |
|
`Product(RBF(), RBF())` is equivalent to using the * operator |
|
with `RBF() * RBF()`. |
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
k1 : Kernel |
|
The first base-kernel of the product-kernel |
|
|
|
k2 : Kernel |
|
The second base-kernel of the product-kernel |
|
|
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_friedman2 |
|
>>> from sklearn.gaussian_process import GaussianProcessRegressor |
|
>>> from sklearn.gaussian_process.kernels import (RBF, Product, |
|
... ConstantKernel) |
|
>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0) |
|
>>> kernel = Product(ConstantKernel(2), RBF()) |
|
>>> gpr = GaussianProcessRegressor(kernel=kernel, |
|
... random_state=0).fit(X, y) |
|
>>> gpr.score(X, y) |
|
1.0 |
|
>>> kernel |
|
1.41**2 * RBF(length_scale=1) |
|
""" |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : array-like of shape (n_samples_Y, n_features) or list of object,\ |
|
default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
is evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
if eval_gradient: |
|
K1, K1_gradient = self.k1(X, Y, eval_gradient=True) |
|
K2, K2_gradient = self.k2(X, Y, eval_gradient=True) |
|
return K1 * K2, np.dstack( |
|
(K1_gradient * K2[:, :, np.newaxis], K2_gradient * K1[:, :, np.newaxis]) |
|
) |
|
else: |
|
return self.k1(X, Y) * self.k2(X, Y) |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to np.diag(self(X)); however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Argument to the kernel. |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
return self.k1.diag(X) * self.k2.diag(X) |
|
|
|
def __repr__(self): |
|
return "{0} * {1}".format(self.k1, self.k2) |
|
|
|
|
|
class Exponentiation(Kernel): |
|
"""The Exponentiation kernel takes one base kernel and a scalar parameter |
|
:math:`p` and combines them via |
|
|
|
.. math:: |
|
k_{exp}(X, Y) = k(X, Y) ^p |
|
|
|
Note that the `__pow__` magic method is overridden, so |
|
`Exponentiation(RBF(), 2)` is equivalent to using the ** operator |
|
with `RBF() ** 2`. |
|
|
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
kernel : Kernel |
|
The base kernel |
|
|
|
exponent : float |
|
The exponent for the base kernel |
|
|
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_friedman2 |
|
>>> from sklearn.gaussian_process import GaussianProcessRegressor |
|
>>> from sklearn.gaussian_process.kernels import (RationalQuadratic, |
|
... Exponentiation) |
|
>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0) |
|
>>> kernel = Exponentiation(RationalQuadratic(), exponent=2) |
|
>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5, |
|
... random_state=0).fit(X, y) |
|
>>> gpr.score(X, y) |
|
0.419... |
|
>>> gpr.predict(X[:1,:], return_std=True) |
|
(array([635.5...]), array([0.559...])) |
|
""" |
|
|
|
def __init__(self, kernel, exponent): |
|
self.kernel = kernel |
|
self.exponent = exponent |
|
|
|
def get_params(self, deep=True): |
|
"""Get parameters of this kernel. |
|
|
|
Parameters |
|
---------- |
|
deep : bool, default=True |
|
If True, will return the parameters for this estimator and |
|
contained subobjects that are estimators. |
|
|
|
Returns |
|
------- |
|
params : dict |
|
Parameter names mapped to their values. |
|
""" |
|
params = dict(kernel=self.kernel, exponent=self.exponent) |
|
if deep: |
|
deep_items = self.kernel.get_params().items() |
|
params.update(("kernel__" + k, val) for k, val in deep_items) |
|
return params |
|
|
|
@property |
|
def hyperparameters(self): |
|
"""Returns a list of all hyperparameter.""" |
|
r = [] |
|
for hyperparameter in self.kernel.hyperparameters: |
|
r.append( |
|
Hyperparameter( |
|
"kernel__" + hyperparameter.name, |
|
hyperparameter.value_type, |
|
hyperparameter.bounds, |
|
hyperparameter.n_elements, |
|
) |
|
) |
|
return r |
|
|
|
@property |
|
def theta(self): |
|
"""Returns the (flattened, log-transformed) non-fixed hyperparameters. |
|
|
|
Note that theta are typically the log-transformed values of the |
|
kernel's hyperparameters as this representation of the search space |
|
is more amenable for hyperparameter search, as hyperparameters like |
|
length-scales naturally live on a log-scale. |
|
|
|
Returns |
|
------- |
|
theta : ndarray of shape (n_dims,) |
|
The non-fixed, log-transformed hyperparameters of the kernel |
|
""" |
|
return self.kernel.theta |
|
|
|
@theta.setter |
|
def theta(self, theta): |
|
"""Sets the (flattened, log-transformed) non-fixed hyperparameters. |
|
|
|
Parameters |
|
---------- |
|
theta : ndarray of shape (n_dims,) |
|
The non-fixed, log-transformed hyperparameters of the kernel |
|
""" |
|
self.kernel.theta = theta |
|
|
|
@property |
|
def bounds(self): |
|
"""Returns the log-transformed bounds on the theta. |
|
|
|
Returns |
|
------- |
|
bounds : ndarray of shape (n_dims, 2) |
|
The log-transformed bounds on the kernel's hyperparameters theta |
|
""" |
|
return self.kernel.bounds |
|
|
|
def __eq__(self, b): |
|
if type(self) != type(b): |
|
return False |
|
return self.kernel == b.kernel and self.exponent == b.exponent |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : array-like of shape (n_samples_Y, n_features) or list of object,\ |
|
default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
is evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
if eval_gradient: |
|
K, K_gradient = self.kernel(X, Y, eval_gradient=True) |
|
K_gradient *= self.exponent * K[:, :, np.newaxis] ** (self.exponent - 1) |
|
return K**self.exponent, K_gradient |
|
else: |
|
K = self.kernel(X, Y, eval_gradient=False) |
|
return K**self.exponent |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to np.diag(self(X)); however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Argument to the kernel. |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
return self.kernel.diag(X) ** self.exponent |
|
|
|
def __repr__(self): |
|
return "{0} ** {1}".format(self.kernel, self.exponent) |
|
|
|
def is_stationary(self): |
|
"""Returns whether the kernel is stationary.""" |
|
return self.kernel.is_stationary() |
|
|
|
@property |
|
def requires_vector_input(self): |
|
"""Returns whether the kernel is defined on discrete structures.""" |
|
return self.kernel.requires_vector_input |
|
|
|
|
|
class ConstantKernel(StationaryKernelMixin, GenericKernelMixin, Kernel): |
|
"""Constant kernel. |
|
|
|
Can be used as part of a product-kernel where it scales the magnitude of |
|
the other factor (kernel) or as part of a sum-kernel, where it modifies |
|
the mean of the Gaussian process. |
|
|
|
.. math:: |
|
k(x_1, x_2) = constant\\_value \\;\\forall\\; x_1, x_2 |
|
|
|
Adding a constant kernel is equivalent to adding a constant:: |
|
|
|
kernel = RBF() + ConstantKernel(constant_value=2) |
|
|
|
is the same as:: |
|
|
|
kernel = RBF() + 2 |
|
|
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
constant_value : float, default=1.0 |
|
The constant value which defines the covariance: |
|
k(x_1, x_2) = constant_value |
|
|
|
constant_value_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on `constant_value`. |
|
If set to "fixed", `constant_value` cannot be changed during |
|
hyperparameter tuning. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_friedman2 |
|
>>> from sklearn.gaussian_process import GaussianProcessRegressor |
|
>>> from sklearn.gaussian_process.kernels import RBF, ConstantKernel |
|
>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0) |
|
>>> kernel = RBF() + ConstantKernel(constant_value=2) |
|
>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5, |
|
... random_state=0).fit(X, y) |
|
>>> gpr.score(X, y) |
|
0.3696... |
|
>>> gpr.predict(X[:1,:], return_std=True) |
|
(array([606.1...]), array([0.24...])) |
|
""" |
|
|
|
def __init__(self, constant_value=1.0, constant_value_bounds=(1e-5, 1e5)): |
|
self.constant_value = constant_value |
|
self.constant_value_bounds = constant_value_bounds |
|
|
|
@property |
|
def hyperparameter_constant_value(self): |
|
return Hyperparameter("constant_value", "numeric", self.constant_value_bounds) |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : array-like of shape (n_samples_X, n_features) or list of object, \ |
|
default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
is evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
Only supported when Y is None. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when eval_gradient |
|
is True. |
|
""" |
|
if Y is None: |
|
Y = X |
|
elif eval_gradient: |
|
raise ValueError("Gradient can only be evaluated when Y is None.") |
|
|
|
K = np.full( |
|
(_num_samples(X), _num_samples(Y)), |
|
self.constant_value, |
|
dtype=np.array(self.constant_value).dtype, |
|
) |
|
if eval_gradient: |
|
if not self.hyperparameter_constant_value.fixed: |
|
return ( |
|
K, |
|
np.full( |
|
(_num_samples(X), _num_samples(X), 1), |
|
self.constant_value, |
|
dtype=np.array(self.constant_value).dtype, |
|
), |
|
) |
|
else: |
|
return K, np.empty((_num_samples(X), _num_samples(X), 0)) |
|
else: |
|
return K |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to np.diag(self(X)); however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Argument to the kernel. |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
return np.full( |
|
_num_samples(X), |
|
self.constant_value, |
|
dtype=np.array(self.constant_value).dtype, |
|
) |
|
|
|
def __repr__(self): |
|
return "{0:.3g}**2".format(np.sqrt(self.constant_value)) |
|
|
|
|
|
class WhiteKernel(StationaryKernelMixin, GenericKernelMixin, Kernel): |
|
"""White kernel. |
|
|
|
The main use-case of this kernel is as part of a sum-kernel where it |
|
explains the noise of the signal as independently and identically |
|
normally-distributed. The parameter noise_level equals the variance of this |
|
noise. |
|
|
|
.. math:: |
|
k(x_1, x_2) = noise\\_level \\text{ if } x_i == x_j \\text{ else } 0 |
|
|
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
noise_level : float, default=1.0 |
|
Parameter controlling the noise level (variance) |
|
|
|
noise_level_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'noise_level'. |
|
If set to "fixed", 'noise_level' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_friedman2 |
|
>>> from sklearn.gaussian_process import GaussianProcessRegressor |
|
>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel |
|
>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0) |
|
>>> kernel = DotProduct() + WhiteKernel(noise_level=0.5) |
|
>>> gpr = GaussianProcessRegressor(kernel=kernel, |
|
... random_state=0).fit(X, y) |
|
>>> gpr.score(X, y) |
|
0.3680... |
|
>>> gpr.predict(X[:2,:], return_std=True) |
|
(array([653.0..., 592.1... ]), array([316.6..., 316.6...])) |
|
""" |
|
|
|
def __init__(self, noise_level=1.0, noise_level_bounds=(1e-5, 1e5)): |
|
self.noise_level = noise_level |
|
self.noise_level_bounds = noise_level_bounds |
|
|
|
@property |
|
def hyperparameter_noise_level(self): |
|
return Hyperparameter("noise_level", "numeric", self.noise_level_bounds) |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : array-like of shape (n_samples_X, n_features) or list of object,\ |
|
default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
is evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
Only supported when Y is None. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when eval_gradient |
|
is True. |
|
""" |
|
if Y is not None and eval_gradient: |
|
raise ValueError("Gradient can only be evaluated when Y is None.") |
|
|
|
if Y is None: |
|
K = self.noise_level * np.eye(_num_samples(X)) |
|
if eval_gradient: |
|
if not self.hyperparameter_noise_level.fixed: |
|
return ( |
|
K, |
|
self.noise_level * np.eye(_num_samples(X))[:, :, np.newaxis], |
|
) |
|
else: |
|
return K, np.empty((_num_samples(X), _num_samples(X), 0)) |
|
else: |
|
return K |
|
else: |
|
return np.zeros((_num_samples(X), _num_samples(Y))) |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to np.diag(self(X)); however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples_X, n_features) or list of object |
|
Argument to the kernel. |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
return np.full( |
|
_num_samples(X), self.noise_level, dtype=np.array(self.noise_level).dtype |
|
) |
|
|
|
def __repr__(self): |
|
return "{0}(noise_level={1:.3g})".format( |
|
self.__class__.__name__, self.noise_level |
|
) |
|
|
|
|
|
class RBF(StationaryKernelMixin, NormalizedKernelMixin, Kernel): |
|
"""Radial basis function kernel (aka squared-exponential kernel). |
|
|
|
The RBF kernel is a stationary kernel. It is also known as the |
|
"squared exponential" kernel. It is parameterized by a length scale |
|
parameter :math:`l>0`, which can either be a scalar (isotropic variant |
|
of the kernel) or a vector with the same number of dimensions as the inputs |
|
X (anisotropic variant of the kernel). The kernel is given by: |
|
|
|
.. math:: |
|
k(x_i, x_j) = \\exp\\left(- \\frac{d(x_i, x_j)^2}{2l^2} \\right) |
|
|
|
where :math:`l` is the length scale of the kernel and |
|
:math:`d(\\cdot,\\cdot)` is the Euclidean distance. |
|
For advice on how to set the length scale parameter, see e.g. [1]_. |
|
|
|
This kernel is infinitely differentiable, which implies that GPs with this |
|
kernel as covariance function have mean square derivatives of all orders, |
|
and are thus very smooth. |
|
See [2]_, Chapter 4, Section 4.2, for further details of the RBF kernel. |
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
length_scale : float or ndarray of shape (n_features,), default=1.0 |
|
The length scale of the kernel. If a float, an isotropic kernel is |
|
used. If an array, an anisotropic kernel is used where each dimension |
|
of l defines the length-scale of the respective feature dimension. |
|
|
|
length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'length_scale'. |
|
If set to "fixed", 'length_scale' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
References |
|
---------- |
|
.. [1] `David Duvenaud (2014). "The Kernel Cookbook: |
|
Advice on Covariance functions". |
|
<https://www.cs.toronto.edu/~duvenaud/cookbook/>`_ |
|
|
|
.. [2] `Carl Edward Rasmussen, Christopher K. I. Williams (2006). |
|
"Gaussian Processes for Machine Learning". The MIT Press. |
|
<http://www.gaussianprocess.org/gpml/>`_ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import load_iris |
|
>>> from sklearn.gaussian_process import GaussianProcessClassifier |
|
>>> from sklearn.gaussian_process.kernels import RBF |
|
>>> X, y = load_iris(return_X_y=True) |
|
>>> kernel = 1.0 * RBF(1.0) |
|
>>> gpc = GaussianProcessClassifier(kernel=kernel, |
|
... random_state=0).fit(X, y) |
|
>>> gpc.score(X, y) |
|
0.9866... |
|
>>> gpc.predict_proba(X[:2,:]) |
|
array([[0.8354..., 0.03228..., 0.1322...], |
|
[0.7906..., 0.0652..., 0.1441...]]) |
|
""" |
|
|
|
def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5)): |
|
self.length_scale = length_scale |
|
self.length_scale_bounds = length_scale_bounds |
|
|
|
@property |
|
def anisotropic(self): |
|
return np.iterable(self.length_scale) and len(self.length_scale) > 1 |
|
|
|
@property |
|
def hyperparameter_length_scale(self): |
|
if self.anisotropic: |
|
return Hyperparameter( |
|
"length_scale", |
|
"numeric", |
|
self.length_scale_bounds, |
|
len(self.length_scale), |
|
) |
|
return Hyperparameter("length_scale", "numeric", self.length_scale_bounds) |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : ndarray of shape (n_samples_Y, n_features), default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
if evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
Only supported when Y is None. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
X = np.atleast_2d(X) |
|
length_scale = _check_length_scale(X, self.length_scale) |
|
if Y is None: |
|
dists = pdist(X / length_scale, metric="sqeuclidean") |
|
K = np.exp(-0.5 * dists) |
|
|
|
K = squareform(K) |
|
np.fill_diagonal(K, 1) |
|
else: |
|
if eval_gradient: |
|
raise ValueError("Gradient can only be evaluated when Y is None.") |
|
dists = cdist(X / length_scale, Y / length_scale, metric="sqeuclidean") |
|
K = np.exp(-0.5 * dists) |
|
|
|
if eval_gradient: |
|
if self.hyperparameter_length_scale.fixed: |
|
|
|
return K, np.empty((X.shape[0], X.shape[0], 0)) |
|
elif not self.anisotropic or length_scale.shape[0] == 1: |
|
K_gradient = (K * squareform(dists))[:, :, np.newaxis] |
|
return K, K_gradient |
|
elif self.anisotropic: |
|
|
|
K_gradient = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / ( |
|
length_scale**2 |
|
) |
|
K_gradient *= K[..., np.newaxis] |
|
return K, K_gradient |
|
else: |
|
return K |
|
|
|
def __repr__(self): |
|
if self.anisotropic: |
|
return "{0}(length_scale=[{1}])".format( |
|
self.__class__.__name__, |
|
", ".join(map("{0:.3g}".format, self.length_scale)), |
|
) |
|
else: |
|
return "{0}(length_scale={1:.3g})".format( |
|
self.__class__.__name__, np.ravel(self.length_scale)[0] |
|
) |
|
|
|
|
|
class Matern(RBF): |
|
"""Matern kernel. |
|
|
|
The class of Matern kernels is a generalization of the :class:`RBF`. |
|
It has an additional parameter :math:`\\nu` which controls the |
|
smoothness of the resulting function. The smaller :math:`\\nu`, |
|
the less smooth the approximated function is. |
|
As :math:`\\nu\\rightarrow\\infty`, the kernel becomes equivalent to |
|
the :class:`RBF` kernel. When :math:`\\nu = 1/2`, the Matérn kernel |
|
becomes identical to the absolute exponential kernel. |
|
Important intermediate values are |
|
:math:`\\nu=1.5` (once differentiable functions) |
|
and :math:`\\nu=2.5` (twice differentiable functions). |
|
|
|
The kernel is given by: |
|
|
|
.. math:: |
|
k(x_i, x_j) = \\frac{1}{\\Gamma(\\nu)2^{\\nu-1}}\\Bigg( |
|
\\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j ) |
|
\\Bigg)^\\nu K_\\nu\\Bigg( |
|
\\frac{\\sqrt{2\\nu}}{l} d(x_i , x_j )\\Bigg) |
|
|
|
|
|
|
|
where :math:`d(\\cdot,\\cdot)` is the Euclidean distance, |
|
:math:`K_{\\nu}(\\cdot)` is a modified Bessel function and |
|
:math:`\\Gamma(\\cdot)` is the gamma function. |
|
See [1]_, Chapter 4, Section 4.2, for details regarding the different |
|
variants of the Matern kernel. |
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
length_scale : float or ndarray of shape (n_features,), default=1.0 |
|
The length scale of the kernel. If a float, an isotropic kernel is |
|
used. If an array, an anisotropic kernel is used where each dimension |
|
of l defines the length-scale of the respective feature dimension. |
|
|
|
length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'length_scale'. |
|
If set to "fixed", 'length_scale' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
nu : float, default=1.5 |
|
The parameter nu controlling the smoothness of the learned function. |
|
The smaller nu, the less smooth the approximated function is. |
|
For nu=inf, the kernel becomes equivalent to the RBF kernel and for |
|
nu=0.5 to the absolute exponential kernel. Important intermediate |
|
values are nu=1.5 (once differentiable functions) and nu=2.5 |
|
(twice differentiable functions). Note that values of nu not in |
|
[0.5, 1.5, 2.5, inf] incur a considerably higher computational cost |
|
(appr. 10 times higher) since they require to evaluate the modified |
|
Bessel function. Furthermore, in contrast to l, nu is kept fixed to |
|
its initial value and not optimized. |
|
|
|
References |
|
---------- |
|
.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006). |
|
"Gaussian Processes for Machine Learning". The MIT Press. |
|
<http://www.gaussianprocess.org/gpml/>`_ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import load_iris |
|
>>> from sklearn.gaussian_process import GaussianProcessClassifier |
|
>>> from sklearn.gaussian_process.kernels import Matern |
|
>>> X, y = load_iris(return_X_y=True) |
|
>>> kernel = 1.0 * Matern(length_scale=1.0, nu=1.5) |
|
>>> gpc = GaussianProcessClassifier(kernel=kernel, |
|
... random_state=0).fit(X, y) |
|
>>> gpc.score(X, y) |
|
0.9866... |
|
>>> gpc.predict_proba(X[:2,:]) |
|
array([[0.8513..., 0.0368..., 0.1117...], |
|
[0.8086..., 0.0693..., 0.1220...]]) |
|
""" |
|
|
|
def __init__(self, length_scale=1.0, length_scale_bounds=(1e-5, 1e5), nu=1.5): |
|
super().__init__(length_scale, length_scale_bounds) |
|
self.nu = nu |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : ndarray of shape (n_samples_Y, n_features), default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
if evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
Only supported when Y is None. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
X = np.atleast_2d(X) |
|
length_scale = _check_length_scale(X, self.length_scale) |
|
if Y is None: |
|
dists = pdist(X / length_scale, metric="euclidean") |
|
else: |
|
if eval_gradient: |
|
raise ValueError("Gradient can only be evaluated when Y is None.") |
|
dists = cdist(X / length_scale, Y / length_scale, metric="euclidean") |
|
|
|
if self.nu == 0.5: |
|
K = np.exp(-dists) |
|
elif self.nu == 1.5: |
|
K = dists * math.sqrt(3) |
|
K = (1.0 + K) * np.exp(-K) |
|
elif self.nu == 2.5: |
|
K = dists * math.sqrt(5) |
|
K = (1.0 + K + K**2 / 3.0) * np.exp(-K) |
|
elif self.nu == np.inf: |
|
K = np.exp(-(dists**2) / 2.0) |
|
else: |
|
K = dists |
|
K[K == 0.0] += np.finfo(float).eps |
|
tmp = math.sqrt(2 * self.nu) * K |
|
K.fill((2 ** (1.0 - self.nu)) / gamma(self.nu)) |
|
K *= tmp**self.nu |
|
K *= kv(self.nu, tmp) |
|
|
|
if Y is None: |
|
|
|
K = squareform(K) |
|
np.fill_diagonal(K, 1) |
|
|
|
if eval_gradient: |
|
if self.hyperparameter_length_scale.fixed: |
|
|
|
K_gradient = np.empty((X.shape[0], X.shape[0], 0)) |
|
return K, K_gradient |
|
|
|
|
|
if self.anisotropic: |
|
D = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2 / (length_scale**2) |
|
else: |
|
D = squareform(dists**2)[:, :, np.newaxis] |
|
|
|
if self.nu == 0.5: |
|
denominator = np.sqrt(D.sum(axis=2))[:, :, np.newaxis] |
|
divide_result = np.zeros_like(D) |
|
np.divide( |
|
D, |
|
denominator, |
|
out=divide_result, |
|
where=denominator != 0, |
|
) |
|
K_gradient = K[..., np.newaxis] * divide_result |
|
elif self.nu == 1.5: |
|
K_gradient = 3 * D * np.exp(-np.sqrt(3 * D.sum(-1)))[..., np.newaxis] |
|
elif self.nu == 2.5: |
|
tmp = np.sqrt(5 * D.sum(-1))[..., np.newaxis] |
|
K_gradient = 5.0 / 3.0 * D * (tmp + 1) * np.exp(-tmp) |
|
elif self.nu == np.inf: |
|
K_gradient = D * K[..., np.newaxis] |
|
else: |
|
|
|
def f(theta): |
|
return self.clone_with_theta(theta)(X, Y) |
|
|
|
return K, _approx_fprime(self.theta, f, 1e-10) |
|
|
|
if not self.anisotropic: |
|
return K, K_gradient[:, :].sum(-1)[:, :, np.newaxis] |
|
else: |
|
return K, K_gradient |
|
else: |
|
return K |
|
|
|
def __repr__(self): |
|
if self.anisotropic: |
|
return "{0}(length_scale=[{1}], nu={2:.3g})".format( |
|
self.__class__.__name__, |
|
", ".join(map("{0:.3g}".format, self.length_scale)), |
|
self.nu, |
|
) |
|
else: |
|
return "{0}(length_scale={1:.3g}, nu={2:.3g})".format( |
|
self.__class__.__name__, np.ravel(self.length_scale)[0], self.nu |
|
) |
|
|
|
|
|
class RationalQuadratic(StationaryKernelMixin, NormalizedKernelMixin, Kernel): |
|
"""Rational Quadratic kernel. |
|
|
|
The RationalQuadratic kernel can be seen as a scale mixture (an infinite |
|
sum) of RBF kernels with different characteristic length scales. It is |
|
parameterized by a length scale parameter :math:`l>0` and a scale |
|
mixture parameter :math:`\\alpha>0`. Only the isotropic variant |
|
where length_scale :math:`l` is a scalar is supported at the moment. |
|
The kernel is given by: |
|
|
|
.. math:: |
|
k(x_i, x_j) = \\left( |
|
1 + \\frac{d(x_i, x_j)^2 }{ 2\\alpha l^2}\\right)^{-\\alpha} |
|
|
|
where :math:`\\alpha` is the scale mixture parameter, :math:`l` is |
|
the length scale of the kernel and :math:`d(\\cdot,\\cdot)` is the |
|
Euclidean distance. |
|
For advice on how to set the parameters, see e.g. [1]_. |
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
length_scale : float > 0, default=1.0 |
|
The length scale of the kernel. |
|
|
|
alpha : float > 0, default=1.0 |
|
Scale mixture parameter |
|
|
|
length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'length_scale'. |
|
If set to "fixed", 'length_scale' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
alpha_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'alpha'. |
|
If set to "fixed", 'alpha' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
References |
|
---------- |
|
.. [1] `David Duvenaud (2014). "The Kernel Cookbook: |
|
Advice on Covariance functions". |
|
<https://www.cs.toronto.edu/~duvenaud/cookbook/>`_ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import load_iris |
|
>>> from sklearn.gaussian_process import GaussianProcessClassifier |
|
>>> from sklearn.gaussian_process.kernels import RationalQuadratic |
|
>>> X, y = load_iris(return_X_y=True) |
|
>>> kernel = RationalQuadratic(length_scale=1.0, alpha=1.5) |
|
>>> gpc = GaussianProcessClassifier(kernel=kernel, |
|
... random_state=0).fit(X, y) |
|
>>> gpc.score(X, y) |
|
0.9733... |
|
>>> gpc.predict_proba(X[:2,:]) |
|
array([[0.8881..., 0.0566..., 0.05518...], |
|
[0.8678..., 0.0707... , 0.0614...]]) |
|
""" |
|
|
|
def __init__( |
|
self, |
|
length_scale=1.0, |
|
alpha=1.0, |
|
length_scale_bounds=(1e-5, 1e5), |
|
alpha_bounds=(1e-5, 1e5), |
|
): |
|
self.length_scale = length_scale |
|
self.alpha = alpha |
|
self.length_scale_bounds = length_scale_bounds |
|
self.alpha_bounds = alpha_bounds |
|
|
|
@property |
|
def hyperparameter_length_scale(self): |
|
return Hyperparameter("length_scale", "numeric", self.length_scale_bounds) |
|
|
|
@property |
|
def hyperparameter_alpha(self): |
|
return Hyperparameter("alpha", "numeric", self.alpha_bounds) |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : ndarray of shape (n_samples_Y, n_features), default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
if evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
Only supported when Y is None. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims) |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when eval_gradient |
|
is True. |
|
""" |
|
if len(np.atleast_1d(self.length_scale)) > 1: |
|
raise AttributeError( |
|
"RationalQuadratic kernel only supports isotropic version, " |
|
"please use a single scalar for length_scale" |
|
) |
|
X = np.atleast_2d(X) |
|
if Y is None: |
|
dists = squareform(pdist(X, metric="sqeuclidean")) |
|
tmp = dists / (2 * self.alpha * self.length_scale**2) |
|
base = 1 + tmp |
|
K = base**-self.alpha |
|
np.fill_diagonal(K, 1) |
|
else: |
|
if eval_gradient: |
|
raise ValueError("Gradient can only be evaluated when Y is None.") |
|
dists = cdist(X, Y, metric="sqeuclidean") |
|
K = (1 + dists / (2 * self.alpha * self.length_scale**2)) ** -self.alpha |
|
|
|
if eval_gradient: |
|
|
|
if not self.hyperparameter_length_scale.fixed: |
|
length_scale_gradient = dists * K / (self.length_scale**2 * base) |
|
length_scale_gradient = length_scale_gradient[:, :, np.newaxis] |
|
else: |
|
length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0)) |
|
|
|
|
|
if not self.hyperparameter_alpha.fixed: |
|
alpha_gradient = K * ( |
|
-self.alpha * np.log(base) |
|
+ dists / (2 * self.length_scale**2 * base) |
|
) |
|
alpha_gradient = alpha_gradient[:, :, np.newaxis] |
|
else: |
|
alpha_gradient = np.empty((K.shape[0], K.shape[1], 0)) |
|
|
|
return K, np.dstack((alpha_gradient, length_scale_gradient)) |
|
else: |
|
return K |
|
|
|
def __repr__(self): |
|
return "{0}(alpha={1:.3g}, length_scale={2:.3g})".format( |
|
self.__class__.__name__, self.alpha, self.length_scale |
|
) |
|
|
|
|
|
class ExpSineSquared(StationaryKernelMixin, NormalizedKernelMixin, Kernel): |
|
r"""Exp-Sine-Squared kernel (aka periodic kernel). |
|
|
|
The ExpSineSquared kernel allows one to model functions which repeat |
|
themselves exactly. It is parameterized by a length scale |
|
parameter :math:`l>0` and a periodicity parameter :math:`p>0`. |
|
Only the isotropic variant where :math:`l` is a scalar is |
|
supported at the moment. The kernel is given by: |
|
|
|
.. math:: |
|
k(x_i, x_j) = \text{exp}\left(- |
|
\frac{ 2\sin^2(\pi d(x_i, x_j)/p) }{ l^ 2} \right) |
|
|
|
where :math:`l` is the length scale of the kernel, :math:`p` the |
|
periodicity of the kernel and :math:`d(\cdot,\cdot)` is the |
|
Euclidean distance. |
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
|
|
length_scale : float > 0, default=1.0 |
|
The length scale of the kernel. |
|
|
|
periodicity : float > 0, default=1.0 |
|
The periodicity of the kernel. |
|
|
|
length_scale_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'length_scale'. |
|
If set to "fixed", 'length_scale' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
periodicity_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'periodicity'. |
|
If set to "fixed", 'periodicity' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_friedman2 |
|
>>> from sklearn.gaussian_process import GaussianProcessRegressor |
|
>>> from sklearn.gaussian_process.kernels import ExpSineSquared |
|
>>> X, y = make_friedman2(n_samples=50, noise=0, random_state=0) |
|
>>> kernel = ExpSineSquared(length_scale=1, periodicity=1) |
|
>>> gpr = GaussianProcessRegressor(kernel=kernel, alpha=5, |
|
... random_state=0).fit(X, y) |
|
>>> gpr.score(X, y) |
|
0.0144... |
|
>>> gpr.predict(X[:2,:], return_std=True) |
|
(array([425.6..., 457.5...]), array([0.3894..., 0.3467...])) |
|
""" |
|
|
|
def __init__( |
|
self, |
|
length_scale=1.0, |
|
periodicity=1.0, |
|
length_scale_bounds=(1e-5, 1e5), |
|
periodicity_bounds=(1e-5, 1e5), |
|
): |
|
self.length_scale = length_scale |
|
self.periodicity = periodicity |
|
self.length_scale_bounds = length_scale_bounds |
|
self.periodicity_bounds = periodicity_bounds |
|
|
|
@property |
|
def hyperparameter_length_scale(self): |
|
"""Returns the length scale""" |
|
return Hyperparameter("length_scale", "numeric", self.length_scale_bounds) |
|
|
|
@property |
|
def hyperparameter_periodicity(self): |
|
return Hyperparameter("periodicity", "numeric", self.periodicity_bounds) |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : ndarray of shape (n_samples_Y, n_features), default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
if evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
Only supported when Y is None. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims), \ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
X = np.atleast_2d(X) |
|
if Y is None: |
|
dists = squareform(pdist(X, metric="euclidean")) |
|
arg = np.pi * dists / self.periodicity |
|
sin_of_arg = np.sin(arg) |
|
K = np.exp(-2 * (sin_of_arg / self.length_scale) ** 2) |
|
else: |
|
if eval_gradient: |
|
raise ValueError("Gradient can only be evaluated when Y is None.") |
|
dists = cdist(X, Y, metric="euclidean") |
|
K = np.exp( |
|
-2 * (np.sin(np.pi / self.periodicity * dists) / self.length_scale) ** 2 |
|
) |
|
|
|
if eval_gradient: |
|
cos_of_arg = np.cos(arg) |
|
|
|
if not self.hyperparameter_length_scale.fixed: |
|
length_scale_gradient = 4 / self.length_scale**2 * sin_of_arg**2 * K |
|
length_scale_gradient = length_scale_gradient[:, :, np.newaxis] |
|
else: |
|
length_scale_gradient = np.empty((K.shape[0], K.shape[1], 0)) |
|
|
|
if not self.hyperparameter_periodicity.fixed: |
|
periodicity_gradient = ( |
|
4 * arg / self.length_scale**2 * cos_of_arg * sin_of_arg * K |
|
) |
|
periodicity_gradient = periodicity_gradient[:, :, np.newaxis] |
|
else: |
|
periodicity_gradient = np.empty((K.shape[0], K.shape[1], 0)) |
|
|
|
return K, np.dstack((length_scale_gradient, periodicity_gradient)) |
|
else: |
|
return K |
|
|
|
def __repr__(self): |
|
return "{0}(length_scale={1:.3g}, periodicity={2:.3g})".format( |
|
self.__class__.__name__, self.length_scale, self.periodicity |
|
) |
|
|
|
|
|
class DotProduct(Kernel): |
|
r"""Dot-Product kernel. |
|
|
|
The DotProduct kernel is non-stationary and can be obtained from linear |
|
regression by putting :math:`N(0, 1)` priors on the coefficients |
|
of :math:`x_d (d = 1, . . . , D)` and a prior of :math:`N(0, \sigma_0^2)` |
|
on the bias. The DotProduct kernel is invariant to a rotation of |
|
the coordinates about the origin, but not translations. |
|
It is parameterized by a parameter sigma_0 :math:`\sigma` |
|
which controls the inhomogenity of the kernel. For :math:`\sigma_0^2 =0`, |
|
the kernel is called the homogeneous linear kernel, otherwise |
|
it is inhomogeneous. The kernel is given by |
|
|
|
.. math:: |
|
k(x_i, x_j) = \sigma_0 ^ 2 + x_i \cdot x_j |
|
|
|
The DotProduct kernel is commonly combined with exponentiation. |
|
|
|
See [1]_, Chapter 4, Section 4.2, for further details regarding the |
|
DotProduct kernel. |
|
|
|
Read more in the :ref:`User Guide <gp_kernels>`. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
sigma_0 : float >= 0, default=1.0 |
|
Parameter controlling the inhomogenity of the kernel. If sigma_0=0, |
|
the kernel is homogeneous. |
|
|
|
sigma_0_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'sigma_0'. |
|
If set to "fixed", 'sigma_0' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
References |
|
---------- |
|
.. [1] `Carl Edward Rasmussen, Christopher K. I. Williams (2006). |
|
"Gaussian Processes for Machine Learning". The MIT Press. |
|
<http://www.gaussianprocess.org/gpml/>`_ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import make_friedman2 |
|
>>> from sklearn.gaussian_process import GaussianProcessRegressor |
|
>>> from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel |
|
>>> X, y = make_friedman2(n_samples=500, noise=0, random_state=0) |
|
>>> kernel = DotProduct() + WhiteKernel() |
|
>>> gpr = GaussianProcessRegressor(kernel=kernel, |
|
... random_state=0).fit(X, y) |
|
>>> gpr.score(X, y) |
|
0.3680... |
|
>>> gpr.predict(X[:2,:], return_std=True) |
|
(array([653.0..., 592.1...]), array([316.6..., 316.6...])) |
|
""" |
|
|
|
def __init__(self, sigma_0=1.0, sigma_0_bounds=(1e-5, 1e5)): |
|
self.sigma_0 = sigma_0 |
|
self.sigma_0_bounds = sigma_0_bounds |
|
|
|
@property |
|
def hyperparameter_sigma_0(self): |
|
return Hyperparameter("sigma_0", "numeric", self.sigma_0_bounds) |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : ndarray of shape (n_samples_Y, n_features), default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
if evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
Only supported when Y is None. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
X = np.atleast_2d(X) |
|
if Y is None: |
|
K = np.inner(X, X) + self.sigma_0**2 |
|
else: |
|
if eval_gradient: |
|
raise ValueError("Gradient can only be evaluated when Y is None.") |
|
K = np.inner(X, Y) + self.sigma_0**2 |
|
|
|
if eval_gradient: |
|
if not self.hyperparameter_sigma_0.fixed: |
|
K_gradient = np.empty((K.shape[0], K.shape[1], 1)) |
|
K_gradient[..., 0] = 2 * self.sigma_0**2 |
|
return K, K_gradient |
|
else: |
|
return K, np.empty((X.shape[0], X.shape[0], 0)) |
|
else: |
|
return K |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to np.diag(self(X)); however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y). |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X). |
|
""" |
|
return np.einsum("ij,ij->i", X, X) + self.sigma_0**2 |
|
|
|
def is_stationary(self): |
|
"""Returns whether the kernel is stationary.""" |
|
return False |
|
|
|
def __repr__(self): |
|
return "{0}(sigma_0={1:.3g})".format(self.__class__.__name__, self.sigma_0) |
|
|
|
|
|
|
|
def _approx_fprime(xk, f, epsilon, args=()): |
|
f0 = f(*((xk,) + args)) |
|
grad = np.zeros((f0.shape[0], f0.shape[1], len(xk)), float) |
|
ei = np.zeros((len(xk),), float) |
|
for k in range(len(xk)): |
|
ei[k] = 1.0 |
|
d = epsilon * ei |
|
grad[:, :, k] = (f(*((xk + d,) + args)) - f0) / d[k] |
|
ei[k] = 0.0 |
|
return grad |
|
|
|
|
|
class PairwiseKernel(Kernel): |
|
"""Wrapper for kernels in sklearn.metrics.pairwise. |
|
|
|
A thin wrapper around the functionality of the kernels in |
|
sklearn.metrics.pairwise. |
|
|
|
Note: Evaluation of eval_gradient is not analytic but numeric and all |
|
kernels support only isotropic distances. The parameter gamma is |
|
considered to be a hyperparameter and may be optimized. The other |
|
kernel parameters are set directly at initialization and are kept |
|
fixed. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Parameters |
|
---------- |
|
gamma : float, default=1.0 |
|
Parameter gamma of the pairwise kernel specified by metric. It should |
|
be positive. |
|
|
|
gamma_bounds : pair of floats >= 0 or "fixed", default=(1e-5, 1e5) |
|
The lower and upper bound on 'gamma'. |
|
If set to "fixed", 'gamma' cannot be changed during |
|
hyperparameter tuning. |
|
|
|
metric : {"linear", "additive_chi2", "chi2", "poly", "polynomial", \ |
|
"rbf", "laplacian", "sigmoid", "cosine"} or callable, \ |
|
default="linear" |
|
The metric to use when calculating kernel between instances in a |
|
feature array. If metric is a string, it must be one of the metrics |
|
in pairwise.PAIRWISE_KERNEL_FUNCTIONS. |
|
If metric is "precomputed", X is assumed to be a kernel matrix. |
|
Alternatively, if metric is a callable function, it is called on each |
|
pair of instances (rows) and the resulting value recorded. The callable |
|
should take two arrays from X as input and return a value indicating |
|
the distance between them. |
|
|
|
pairwise_kernels_kwargs : dict, default=None |
|
All entries of this dict (if any) are passed as keyword arguments to |
|
the pairwise kernel function. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.datasets import load_iris |
|
>>> from sklearn.gaussian_process import GaussianProcessClassifier |
|
>>> from sklearn.gaussian_process.kernels import PairwiseKernel |
|
>>> X, y = load_iris(return_X_y=True) |
|
>>> kernel = PairwiseKernel(metric='rbf') |
|
>>> gpc = GaussianProcessClassifier(kernel=kernel, |
|
... random_state=0).fit(X, y) |
|
>>> gpc.score(X, y) |
|
0.9733... |
|
>>> gpc.predict_proba(X[:2,:]) |
|
array([[0.8880..., 0.05663..., 0.05532...], |
|
[0.8676..., 0.07073..., 0.06165...]]) |
|
""" |
|
|
|
def __init__( |
|
self, |
|
gamma=1.0, |
|
gamma_bounds=(1e-5, 1e5), |
|
metric="linear", |
|
pairwise_kernels_kwargs=None, |
|
): |
|
self.gamma = gamma |
|
self.gamma_bounds = gamma_bounds |
|
self.metric = metric |
|
self.pairwise_kernels_kwargs = pairwise_kernels_kwargs |
|
|
|
@property |
|
def hyperparameter_gamma(self): |
|
return Hyperparameter("gamma", "numeric", self.gamma_bounds) |
|
|
|
def __call__(self, X, Y=None, eval_gradient=False): |
|
"""Return the kernel k(X, Y) and optionally its gradient. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Y : ndarray of shape (n_samples_Y, n_features), default=None |
|
Right argument of the returned kernel k(X, Y). If None, k(X, X) |
|
if evaluated instead. |
|
|
|
eval_gradient : bool, default=False |
|
Determines whether the gradient with respect to the log of |
|
the kernel hyperparameter is computed. |
|
Only supported when Y is None. |
|
|
|
Returns |
|
------- |
|
K : ndarray of shape (n_samples_X, n_samples_Y) |
|
Kernel k(X, Y) |
|
|
|
K_gradient : ndarray of shape (n_samples_X, n_samples_X, n_dims),\ |
|
optional |
|
The gradient of the kernel k(X, X) with respect to the log of the |
|
hyperparameter of the kernel. Only returned when `eval_gradient` |
|
is True. |
|
""" |
|
pairwise_kernels_kwargs = self.pairwise_kernels_kwargs |
|
if self.pairwise_kernels_kwargs is None: |
|
pairwise_kernels_kwargs = {} |
|
|
|
X = np.atleast_2d(X) |
|
K = pairwise_kernels( |
|
X, |
|
Y, |
|
metric=self.metric, |
|
gamma=self.gamma, |
|
filter_params=True, |
|
**pairwise_kernels_kwargs, |
|
) |
|
if eval_gradient: |
|
if self.hyperparameter_gamma.fixed: |
|
return K, np.empty((X.shape[0], X.shape[0], 0)) |
|
else: |
|
|
|
def f(gamma): |
|
return pairwise_kernels( |
|
X, |
|
Y, |
|
metric=self.metric, |
|
gamma=np.exp(gamma), |
|
filter_params=True, |
|
**pairwise_kernels_kwargs, |
|
) |
|
|
|
return K, _approx_fprime(self.theta, f, 1e-10) |
|
else: |
|
return K |
|
|
|
def diag(self, X): |
|
"""Returns the diagonal of the kernel k(X, X). |
|
|
|
The result of this method is identical to np.diag(self(X)); however, |
|
it can be evaluated more efficiently since only the diagonal is |
|
evaluated. |
|
|
|
Parameters |
|
---------- |
|
X : ndarray of shape (n_samples_X, n_features) |
|
Left argument of the returned kernel k(X, Y) |
|
|
|
Returns |
|
------- |
|
K_diag : ndarray of shape (n_samples_X,) |
|
Diagonal of kernel k(X, X) |
|
""" |
|
|
|
return np.apply_along_axis(self, 1, X).ravel() |
|
|
|
def is_stationary(self): |
|
"""Returns whether the kernel is stationary.""" |
|
return self.metric in ["rbf"] |
|
|
|
def __repr__(self): |
|
return "{0}(gamma={1}, metric={2})".format( |
|
self.__class__.__name__, self.gamma, self.metric |
|
) |
|
|