Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

9.21 kB

	"""Kernel ridge regression."""

	# Authors: The scikit-learn developers
	# SPDX-License-Identifier: BSD-3-Clause

	from numbers import Real

	import numpy as np

	from .base import BaseEstimator, MultiOutputMixin, RegressorMixin, _fit_context
	from .linear_model._ridge import _solve_cholesky_kernel
	from .metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
	from .utils._param_validation import Interval, StrOptions
	from .utils.validation import _check_sample_weight, check_is_fitted, validate_data


	class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator):
	"""Kernel ridge regression.

	Kernel ridge regression (KRR) combines ridge regression (linear least
	squares with l2-norm regularization) with the kernel trick. It thus
	learns a linear function in the space induced by the respective kernel and
	the data. For non-linear kernels, this corresponds to a non-linear
	function in the original space.

	The form of the model learned by KRR is identical to support vector
	regression (SVR). However, different loss functions are used: KRR uses
	squared error loss while support vector regression uses epsilon-insensitive
	loss, both combined with l2 regularization. In contrast to SVR, fitting a
	KRR model can be done in closed-form and is typically faster for
	medium-sized datasets. On the other hand, the learned model is non-sparse
	and thus slower than SVR, which learns a sparse model for epsilon > 0, at
	prediction-time.

	This estimator has built-in support for multi-variate regression
	(i.e., when y is a 2d-array of shape [n_samples, n_targets]).

	Read more in the :ref:`User Guide <kernel_ridge>`.

	Parameters
	----------
	alpha : float or array-like of shape (n_targets,), default=1.0
	Regularization strength; must be a positive float. Regularization
	improves the conditioning of the problem and reduces the variance of
	the estimates. Larger values specify stronger regularization.
	Alpha corresponds to ``1 / (2C)`` in other linear models such as
	:class:`~sklearn.linear_model.LogisticRegression` or
	:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are
	assumed to be specific to the targets. Hence they must correspond in
	number. See :ref:`ridge_regression` for formula.

	kernel : str or callable, default="linear"
	Kernel mapping used internally. This parameter is directly passed to
	:class:`~sklearn.metrics.pairwise.pairwise_kernels`.
	If `kernel` is a string, it must be one of the metrics
	in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
	If `kernel` is "precomputed", X is assumed to be a kernel matrix.
	Alternatively, if `kernel` is a callable function, it is called on
	each pair of instances (rows) and the resulting value recorded. The
	callable should take two rows from X as input and return the
	corresponding kernel value as a single number. This means that
	callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
	they operate on matrices, not single samples. Use the string
	identifying the kernel instead.

	gamma : float, default=None
	Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
	and sigmoid kernels. Interpretation of the default value is left to
	the kernel; see the documentation for sklearn.metrics.pairwise.
	Ignored by other kernels.

	degree : float, default=3
	Degree of the polynomial kernel. Ignored by other kernels.

	coef0 : float, default=1
	Zero coefficient for polynomial and sigmoid kernels.
	Ignored by other kernels.

	kernel_params : dict, default=None
	Additional parameters (keyword arguments) for kernel function passed
	as callable object.

	Attributes
	----------
	dual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)
	Representation of weight vector(s) in kernel space

	X_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)
	Training data, which is also required for prediction. If
	kernel == "precomputed" this is instead the precomputed
	training matrix, of shape (n_samples, n_samples).

	n_features_in_ : int
	Number of features seen during :term:`fit`.

	.. versionadded:: 0.24

	feature_names_in_ : ndarray of shape (`n_features_in_`,)
	Names of features seen during :term:`fit`. Defined only when `X`
	has feature names that are all strings.

	.. versionadded:: 1.0

	See Also
	--------
	sklearn.gaussian_process.GaussianProcessRegressor : Gaussian
	Process regressor providing automatic kernel hyperparameters
	tuning and predictions uncertainty.
	sklearn.linear_model.Ridge : Linear ridge regression.
	sklearn.linear_model.RidgeCV : Ridge regression with built-in
	cross-validation.
	sklearn.svm.SVR : Support Vector Regression accepting a large variety
	of kernels.

	References
	----------
	* Kevin P. Murphy
	"Machine Learning: A Probabilistic Perspective", The MIT Press
	chapter 14.4.3, pp. 492-493

	Examples
	--------
	>>> from sklearn.kernel_ridge import KernelRidge
	>>> import numpy as np
	>>> n_samples, n_features = 10, 5
	>>> rng = np.random.RandomState(0)
	>>> y = rng.randn(n_samples)
	>>> X = rng.randn(n_samples, n_features)
	>>> krr = KernelRidge(alpha=1.0)
	>>> krr.fit(X, y)
	KernelRidge(alpha=1.0)
	"""

	_parameter_constraints: dict = {
	"alpha": [Interval(Real, 0, None, closed="left"), "array-like"],
	"kernel": [
	StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) \| {"precomputed"}),
	callable,
	],
	"gamma": [Interval(Real, 0, None, closed="left"), None],
	"degree": [Interval(Real, 0, None, closed="left")],
	"coef0": [Interval(Real, None, None, closed="neither")],
	"kernel_params": [dict, None],
	}

	def __init__(
	self,
	alpha=1,
	*,
	kernel="linear",
	gamma=None,
	degree=3,
	coef0=1,
	kernel_params=None,
	):
	self.alpha = alpha
	self.kernel = kernel
	self.gamma = gamma
	self.degree = degree
	self.coef0 = coef0
	self.kernel_params = kernel_params

	def _get_kernel(self, X, Y=None):
	if callable(self.kernel):
	params = self.kernel_params or {}
	else:
	params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0}
	return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)

	def __sklearn_tags__(self):
	tags = super().__sklearn_tags__()
	tags.input_tags.sparse = True
	tags.input_tags.pairwise = self.kernel == "precomputed"
	return tags

	@_fit_context(prefer_skip_nested_validation=True)
	def fit(self, X, y, sample_weight=None):
	"""Fit Kernel Ridge regression model.

	Parameters
	----------
	X : {array-like, sparse matrix} of shape (n_samples, n_features)
	Training data. If kernel == "precomputed" this is instead
	a precomputed kernel matrix, of shape (n_samples, n_samples).

	y : array-like of shape (n_samples,) or (n_samples, n_targets)
	Target values.

	sample_weight : float or array-like of shape (n_samples,), default=None
	Individual weights for each sample, ignored if None is passed.

	Returns
	-------
	self : object
	Returns the instance itself.
	"""
	# Convert data
	X, y = validate_data(
	self, X, y, accept_sparse=("csr", "csc"), multi_output=True, y_numeric=True
	)
	if sample_weight is not None and not isinstance(sample_weight, float):
	sample_weight = _check_sample_weight(sample_weight, X)

	K = self._get_kernel(X)
	alpha = np.atleast_1d(self.alpha)

	ravel = False
	if len(y.shape) == 1:
	y = y.reshape(-1, 1)
	ravel = True

	copy = self.kernel == "precomputed"
	self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha, sample_weight, copy)
	if ravel:
	self.dual_coef_ = self.dual_coef_.ravel()

	self.X_fit_ = X

	return self

	def predict(self, X):
	"""Predict using the kernel ridge model.

	Parameters
	----------
	X : {array-like, sparse matrix} of shape (n_samples, n_features)
	Samples. If kernel == "precomputed" this is instead a
	precomputed kernel matrix, shape = [n_samples,
	n_samples_fitted], where n_samples_fitted is the number of
	samples used in the fitting for this estimator.

	Returns
	-------
	C : ndarray of shape (n_samples,) or (n_samples, n_targets)
	Returns predicted values.
	"""
	check_is_fitted(self)
	X = validate_data(self, X, accept_sparse=("csr", "csc"), reset=False)
	K = self._get_kernel(X, self.X_fit_)
	return np.dot(K, self.dual_coef_)