|
|
|
|
|
|
|
from numbers import Integral, Real |
|
|
|
import numpy as np |
|
|
|
from ..base import BaseEstimator, OutlierMixin, RegressorMixin, _fit_context |
|
from ..linear_model._base import LinearClassifierMixin, LinearModel, SparseCoefMixin |
|
from ..utils._param_validation import Interval, StrOptions |
|
from ..utils.multiclass import check_classification_targets |
|
from ..utils.validation import _num_samples, validate_data |
|
from ._base import BaseLibSVM, BaseSVC, _fit_liblinear, _get_liblinear_solver_type |
|
|
|
|
|
def _validate_dual_parameter(dual, loss, penalty, multi_class, X): |
|
"""Helper function to assign the value of dual parameter.""" |
|
if dual == "auto": |
|
if X.shape[0] < X.shape[1]: |
|
try: |
|
_get_liblinear_solver_type(multi_class, penalty, loss, True) |
|
return True |
|
except ValueError: |
|
return False |
|
else: |
|
try: |
|
_get_liblinear_solver_type(multi_class, penalty, loss, False) |
|
return False |
|
except ValueError: |
|
return True |
|
else: |
|
return dual |
|
|
|
|
|
class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator): |
|
"""Linear Support Vector Classification. |
|
|
|
Similar to SVC with parameter kernel='linear', but implemented in terms of |
|
liblinear rather than libsvm, so it has more flexibility in the choice of |
|
penalties and loss functions and should scale better to large numbers of |
|
samples. |
|
|
|
The main differences between :class:`~sklearn.svm.LinearSVC` and |
|
:class:`~sklearn.svm.SVC` lie in the loss function used by default, and in |
|
the handling of intercept regularization between those two implementations. |
|
|
|
This class supports both dense and sparse input and the multiclass support |
|
is handled according to a one-vs-the-rest scheme. |
|
|
|
Read more in the :ref:`User Guide <svm_classification>`. |
|
|
|
Parameters |
|
---------- |
|
penalty : {'l1', 'l2'}, default='l2' |
|
Specifies the norm used in the penalization. The 'l2' |
|
penalty is the standard used in SVC. The 'l1' leads to ``coef_`` |
|
vectors that are sparse. |
|
|
|
loss : {'hinge', 'squared_hinge'}, default='squared_hinge' |
|
Specifies the loss function. 'hinge' is the standard SVM loss |
|
(used e.g. by the SVC class) while 'squared_hinge' is the |
|
square of the hinge loss. The combination of ``penalty='l1'`` |
|
and ``loss='hinge'`` is not supported. |
|
|
|
dual : "auto" or bool, default="auto" |
|
Select the algorithm to either solve the dual or primal |
|
optimization problem. Prefer dual=False when n_samples > n_features. |
|
`dual="auto"` will choose the value of the parameter automatically, |
|
based on the values of `n_samples`, `n_features`, `loss`, `multi_class` |
|
and `penalty`. If `n_samples` < `n_features` and optimizer supports |
|
chosen `loss`, `multi_class` and `penalty`, then dual will be set to True, |
|
otherwise it will be set to False. |
|
|
|
.. versionchanged:: 1.3 |
|
The `"auto"` option is added in version 1.3 and will be the default |
|
in version 1.5. |
|
|
|
tol : float, default=1e-4 |
|
Tolerance for stopping criteria. |
|
|
|
C : float, default=1.0 |
|
Regularization parameter. The strength of the regularization is |
|
inversely proportional to C. Must be strictly positive. |
|
For an intuitive visualization of the effects of scaling |
|
the regularization parameter C, see |
|
:ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`. |
|
|
|
multi_class : {'ovr', 'crammer_singer'}, default='ovr' |
|
Determines the multi-class strategy if `y` contains more than |
|
two classes. |
|
``"ovr"`` trains n_classes one-vs-rest classifiers, while |
|
``"crammer_singer"`` optimizes a joint objective over all classes. |
|
While `crammer_singer` is interesting from a theoretical perspective |
|
as it is consistent, it is seldom used in practice as it rarely leads |
|
to better accuracy and is more expensive to compute. |
|
If ``"crammer_singer"`` is chosen, the options loss, penalty and dual |
|
will be ignored. |
|
|
|
fit_intercept : bool, default=True |
|
Whether or not to fit an intercept. If set to True, the feature vector |
|
is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where |
|
1 corresponds to the intercept. If set to False, no intercept will be |
|
used in calculations (i.e. data is expected to be already centered). |
|
|
|
intercept_scaling : float, default=1.0 |
|
When `fit_intercept` is True, the instance vector x becomes ``[x_1, |
|
..., x_n, intercept_scaling]``, i.e. a "synthetic" feature with a |
|
constant value equal to `intercept_scaling` is appended to the instance |
|
vector. The intercept becomes intercept_scaling * synthetic feature |
|
weight. Note that liblinear internally penalizes the intercept, |
|
treating it like any other term in the feature vector. To reduce the |
|
impact of the regularization on the intercept, the `intercept_scaling` |
|
parameter can be set to a value greater than 1; the higher the value of |
|
`intercept_scaling`, the lower the impact of regularization on it. |
|
Then, the weights become `[w_x_1, ..., w_x_n, |
|
w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent |
|
the feature weights and the intercept weight is scaled by |
|
`intercept_scaling`. This scaling allows the intercept term to have a |
|
different regularization behavior compared to the other features. |
|
|
|
class_weight : dict or 'balanced', default=None |
|
Set the parameter C of class i to ``class_weight[i]*C`` for |
|
SVC. If not given, all classes are supposed to have |
|
weight one. |
|
The "balanced" mode uses the values of y to automatically adjust |
|
weights inversely proportional to class frequencies in the input data |
|
as ``n_samples / (n_classes * np.bincount(y))``. |
|
|
|
verbose : int, default=0 |
|
Enable verbose output. Note that this setting takes advantage of a |
|
per-process runtime setting in liblinear that, if enabled, may not work |
|
properly in a multithreaded context. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the pseudo random number generation for shuffling the data for |
|
the dual coordinate descent (if ``dual=True``). When ``dual=False`` the |
|
underlying implementation of :class:`LinearSVC` is not random and |
|
``random_state`` has no effect on the results. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
max_iter : int, default=1000 |
|
The maximum number of iterations to be run. |
|
|
|
Attributes |
|
---------- |
|
coef_ : ndarray of shape (1, n_features) if n_classes == 2 \ |
|
else (n_classes, n_features) |
|
Weights assigned to the features (coefficients in the primal |
|
problem). |
|
|
|
``coef_`` is a readonly property derived from ``raw_coef_`` that |
|
follows the internal memory layout of liblinear. |
|
|
|
intercept_ : ndarray of shape (1,) if n_classes == 2 else (n_classes,) |
|
Constants in decision function. |
|
|
|
classes_ : ndarray of shape (n_classes,) |
|
The unique classes labels. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
n_iter_ : int |
|
Maximum number of iterations run across all classes. |
|
|
|
See Also |
|
-------- |
|
SVC : Implementation of Support Vector Machine classifier using libsvm: |
|
the kernel can be non-linear but its SMO algorithm does not |
|
scale to large number of samples as LinearSVC does. |
|
|
|
Furthermore SVC multi-class mode is implemented using one |
|
vs one scheme while LinearSVC uses one vs the rest. It is |
|
possible to implement one vs the rest with SVC by using the |
|
:class:`~sklearn.multiclass.OneVsRestClassifier` wrapper. |
|
|
|
Finally SVC can fit dense data without memory copy if the input |
|
is C-contiguous. Sparse data will still incur memory copy though. |
|
|
|
sklearn.linear_model.SGDClassifier : SGDClassifier can optimize the same |
|
cost function as LinearSVC |
|
by adjusting the penalty and loss parameters. In addition it requires |
|
less memory, allows incremental (online) learning, and implements |
|
various loss functions and regularization regimes. |
|
|
|
Notes |
|
----- |
|
The underlying C implementation uses a random number generator to |
|
select features when fitting the model. It is thus not uncommon |
|
to have slightly different results for the same input data. If |
|
that happens, try with a smaller ``tol`` parameter. |
|
|
|
The underlying implementation, liblinear, uses a sparse internal |
|
representation for the data that will incur a memory copy. |
|
|
|
Predict output may not match that of standalone liblinear in certain |
|
cases. See :ref:`differences from liblinear <liblinear_differences>` |
|
in the narrative documentation. |
|
|
|
References |
|
---------- |
|
`LIBLINEAR: A Library for Large Linear Classification |
|
<https://www.csie.ntu.edu.tw/~cjlin/liblinear/>`__ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.svm import LinearSVC |
|
>>> from sklearn.pipeline import make_pipeline |
|
>>> from sklearn.preprocessing import StandardScaler |
|
>>> from sklearn.datasets import make_classification |
|
>>> X, y = make_classification(n_features=4, random_state=0) |
|
>>> clf = make_pipeline(StandardScaler(), |
|
... LinearSVC(random_state=0, tol=1e-5)) |
|
>>> clf.fit(X, y) |
|
Pipeline(steps=[('standardscaler', StandardScaler()), |
|
('linearsvc', LinearSVC(random_state=0, tol=1e-05))]) |
|
|
|
>>> print(clf.named_steps['linearsvc'].coef_) |
|
[[0.141... 0.526... 0.679... 0.493...]] |
|
|
|
>>> print(clf.named_steps['linearsvc'].intercept_) |
|
[0.1693...] |
|
>>> print(clf.predict([[0, 0, 0, 0]])) |
|
[1] |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"penalty": [StrOptions({"l1", "l2"})], |
|
"loss": [StrOptions({"hinge", "squared_hinge"})], |
|
"dual": ["boolean", StrOptions({"auto"})], |
|
"tol": [Interval(Real, 0.0, None, closed="neither")], |
|
"C": [Interval(Real, 0.0, None, closed="neither")], |
|
"multi_class": [StrOptions({"ovr", "crammer_singer"})], |
|
"fit_intercept": ["boolean"], |
|
"intercept_scaling": [Interval(Real, 0, None, closed="neither")], |
|
"class_weight": [None, dict, StrOptions({"balanced"})], |
|
"verbose": ["verbose"], |
|
"random_state": ["random_state"], |
|
"max_iter": [Interval(Integral, 0, None, closed="left")], |
|
} |
|
|
|
def __init__( |
|
self, |
|
penalty="l2", |
|
loss="squared_hinge", |
|
*, |
|
dual="auto", |
|
tol=1e-4, |
|
C=1.0, |
|
multi_class="ovr", |
|
fit_intercept=True, |
|
intercept_scaling=1, |
|
class_weight=None, |
|
verbose=0, |
|
random_state=None, |
|
max_iter=1000, |
|
): |
|
self.dual = dual |
|
self.tol = tol |
|
self.C = C |
|
self.multi_class = multi_class |
|
self.fit_intercept = fit_intercept |
|
self.intercept_scaling = intercept_scaling |
|
self.class_weight = class_weight |
|
self.verbose = verbose |
|
self.random_state = random_state |
|
self.max_iter = max_iter |
|
self.penalty = penalty |
|
self.loss = loss |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y, sample_weight=None): |
|
"""Fit the model according to the given training data. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Training vector, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
y : array-like of shape (n_samples,) |
|
Target vector relative to X. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Array of weights that are assigned to individual |
|
samples. If not provided, |
|
then each sample is given unit weight. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Returns |
|
------- |
|
self : object |
|
An instance of the estimator. |
|
""" |
|
X, y = validate_data( |
|
self, |
|
X, |
|
y, |
|
accept_sparse="csr", |
|
dtype=np.float64, |
|
order="C", |
|
accept_large_sparse=False, |
|
) |
|
check_classification_targets(y) |
|
self.classes_ = np.unique(y) |
|
|
|
_dual = _validate_dual_parameter( |
|
self.dual, self.loss, self.penalty, self.multi_class, X |
|
) |
|
|
|
self.coef_, self.intercept_, n_iter_ = _fit_liblinear( |
|
X, |
|
y, |
|
self.C, |
|
self.fit_intercept, |
|
self.intercept_scaling, |
|
self.class_weight, |
|
self.penalty, |
|
_dual, |
|
self.verbose, |
|
self.max_iter, |
|
self.tol, |
|
self.random_state, |
|
self.multi_class, |
|
self.loss, |
|
sample_weight=sample_weight, |
|
) |
|
|
|
|
|
|
|
|
|
self.n_iter_ = n_iter_.max().item() |
|
|
|
if self.multi_class == "crammer_singer" and len(self.classes_) == 2: |
|
self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1) |
|
if self.fit_intercept: |
|
intercept = self.intercept_[1] - self.intercept_[0] |
|
self.intercept_ = np.array([intercept]) |
|
|
|
return self |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = True |
|
return tags |
|
|
|
|
|
class LinearSVR(RegressorMixin, LinearModel): |
|
"""Linear Support Vector Regression. |
|
|
|
Similar to SVR with parameter kernel='linear', but implemented in terms of |
|
liblinear rather than libsvm, so it has more flexibility in the choice of |
|
penalties and loss functions and should scale better to large numbers of |
|
samples. |
|
|
|
The main differences between :class:`~sklearn.svm.LinearSVR` and |
|
:class:`~sklearn.svm.SVR` lie in the loss function used by default, and in |
|
the handling of intercept regularization between those two implementations. |
|
|
|
This class supports both dense and sparse input. |
|
|
|
Read more in the :ref:`User Guide <svm_regression>`. |
|
|
|
.. versionadded:: 0.16 |
|
|
|
Parameters |
|
---------- |
|
epsilon : float, default=0.0 |
|
Epsilon parameter in the epsilon-insensitive loss function. Note |
|
that the value of this parameter depends on the scale of the target |
|
variable y. If unsure, set ``epsilon=0``. |
|
|
|
tol : float, default=1e-4 |
|
Tolerance for stopping criteria. |
|
|
|
C : float, default=1.0 |
|
Regularization parameter. The strength of the regularization is |
|
inversely proportional to C. Must be strictly positive. |
|
|
|
loss : {'epsilon_insensitive', 'squared_epsilon_insensitive'}, \ |
|
default='epsilon_insensitive' |
|
Specifies the loss function. The epsilon-insensitive loss |
|
(standard SVR) is the L1 loss, while the squared epsilon-insensitive |
|
loss ('squared_epsilon_insensitive') is the L2 loss. |
|
|
|
fit_intercept : bool, default=True |
|
Whether or not to fit an intercept. If set to True, the feature vector |
|
is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where |
|
1 corresponds to the intercept. If set to False, no intercept will be |
|
used in calculations (i.e. data is expected to be already centered). |
|
|
|
intercept_scaling : float, default=1.0 |
|
When `fit_intercept` is True, the instance vector x becomes `[x_1, ..., |
|
x_n, intercept_scaling]`, i.e. a "synthetic" feature with a constant |
|
value equal to `intercept_scaling` is appended to the instance vector. |
|
The intercept becomes intercept_scaling * synthetic feature weight. |
|
Note that liblinear internally penalizes the intercept, treating it |
|
like any other term in the feature vector. To reduce the impact of the |
|
regularization on the intercept, the `intercept_scaling` parameter can |
|
be set to a value greater than 1; the higher the value of |
|
`intercept_scaling`, the lower the impact of regularization on it. |
|
Then, the weights become `[w_x_1, ..., w_x_n, |
|
w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent |
|
the feature weights and the intercept weight is scaled by |
|
`intercept_scaling`. This scaling allows the intercept term to have a |
|
different regularization behavior compared to the other features. |
|
|
|
dual : "auto" or bool, default="auto" |
|
Select the algorithm to either solve the dual or primal |
|
optimization problem. Prefer dual=False when n_samples > n_features. |
|
`dual="auto"` will choose the value of the parameter automatically, |
|
based on the values of `n_samples`, `n_features` and `loss`. If |
|
`n_samples` < `n_features` and optimizer supports chosen `loss`, |
|
then dual will be set to True, otherwise it will be set to False. |
|
|
|
.. versionchanged:: 1.3 |
|
The `"auto"` option is added in version 1.3 and will be the default |
|
in version 1.5. |
|
|
|
verbose : int, default=0 |
|
Enable verbose output. Note that this setting takes advantage of a |
|
per-process runtime setting in liblinear that, if enabled, may not work |
|
properly in a multithreaded context. |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the pseudo random number generation for shuffling the data. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
max_iter : int, default=1000 |
|
The maximum number of iterations to be run. |
|
|
|
Attributes |
|
---------- |
|
coef_ : ndarray of shape (n_features) if n_classes == 2 \ |
|
else (n_classes, n_features) |
|
Weights assigned to the features (coefficients in the primal |
|
problem). |
|
|
|
`coef_` is a readonly property derived from `raw_coef_` that |
|
follows the internal memory layout of liblinear. |
|
|
|
intercept_ : ndarray of shape (1) if n_classes == 2 else (n_classes) |
|
Constants in decision function. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
n_iter_ : int |
|
Maximum number of iterations run across all classes. |
|
|
|
See Also |
|
-------- |
|
LinearSVC : Implementation of Support Vector Machine classifier using the |
|
same library as this class (liblinear). |
|
|
|
SVR : Implementation of Support Vector Machine regression using libsvm: |
|
the kernel can be non-linear but its SMO algorithm does not scale to |
|
large number of samples as :class:`~sklearn.svm.LinearSVR` does. |
|
|
|
sklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost |
|
function as LinearSVR |
|
by adjusting the penalty and loss parameters. In addition it requires |
|
less memory, allows incremental (online) learning, and implements |
|
various loss functions and regularization regimes. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.svm import LinearSVR |
|
>>> from sklearn.pipeline import make_pipeline |
|
>>> from sklearn.preprocessing import StandardScaler |
|
>>> from sklearn.datasets import make_regression |
|
>>> X, y = make_regression(n_features=4, random_state=0) |
|
>>> regr = make_pipeline(StandardScaler(), |
|
... LinearSVR(random_state=0, tol=1e-5)) |
|
>>> regr.fit(X, y) |
|
Pipeline(steps=[('standardscaler', StandardScaler()), |
|
('linearsvr', LinearSVR(random_state=0, tol=1e-05))]) |
|
|
|
>>> print(regr.named_steps['linearsvr'].coef_) |
|
[18.582... 27.023... 44.357... 64.522...] |
|
>>> print(regr.named_steps['linearsvr'].intercept_) |
|
[-4...] |
|
>>> print(regr.predict([[0, 0, 0, 0]])) |
|
[-2.384...] |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
"epsilon": [Real], |
|
"tol": [Interval(Real, 0.0, None, closed="neither")], |
|
"C": [Interval(Real, 0.0, None, closed="neither")], |
|
"loss": [StrOptions({"epsilon_insensitive", "squared_epsilon_insensitive"})], |
|
"fit_intercept": ["boolean"], |
|
"intercept_scaling": [Interval(Real, 0, None, closed="neither")], |
|
"dual": ["boolean", StrOptions({"auto"})], |
|
"verbose": ["verbose"], |
|
"random_state": ["random_state"], |
|
"max_iter": [Interval(Integral, 0, None, closed="left")], |
|
} |
|
|
|
def __init__( |
|
self, |
|
*, |
|
epsilon=0.0, |
|
tol=1e-4, |
|
C=1.0, |
|
loss="epsilon_insensitive", |
|
fit_intercept=True, |
|
intercept_scaling=1.0, |
|
dual="auto", |
|
verbose=0, |
|
random_state=None, |
|
max_iter=1000, |
|
): |
|
self.tol = tol |
|
self.C = C |
|
self.epsilon = epsilon |
|
self.fit_intercept = fit_intercept |
|
self.intercept_scaling = intercept_scaling |
|
self.verbose = verbose |
|
self.random_state = random_state |
|
self.max_iter = max_iter |
|
self.dual = dual |
|
self.loss = loss |
|
|
|
@_fit_context(prefer_skip_nested_validation=True) |
|
def fit(self, X, y, sample_weight=None): |
|
"""Fit the model according to the given training data. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Training vector, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
y : array-like of shape (n_samples,) |
|
Target vector relative to X. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Array of weights that are assigned to individual |
|
samples. If not provided, |
|
then each sample is given unit weight. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Returns |
|
------- |
|
self : object |
|
An instance of the estimator. |
|
""" |
|
X, y = validate_data( |
|
self, |
|
X, |
|
y, |
|
accept_sparse="csr", |
|
dtype=np.float64, |
|
order="C", |
|
accept_large_sparse=False, |
|
) |
|
penalty = "l2" |
|
|
|
_dual = _validate_dual_parameter(self.dual, self.loss, penalty, "ovr", X) |
|
|
|
self.coef_, self.intercept_, n_iter_ = _fit_liblinear( |
|
X, |
|
y, |
|
self.C, |
|
self.fit_intercept, |
|
self.intercept_scaling, |
|
None, |
|
penalty, |
|
_dual, |
|
self.verbose, |
|
self.max_iter, |
|
self.tol, |
|
self.random_state, |
|
loss=self.loss, |
|
epsilon=self.epsilon, |
|
sample_weight=sample_weight, |
|
) |
|
self.coef_ = self.coef_.ravel() |
|
|
|
|
|
|
|
|
|
self.n_iter_ = n_iter_.max().item() |
|
|
|
return self |
|
|
|
def __sklearn_tags__(self): |
|
tags = super().__sklearn_tags__() |
|
tags.input_tags.sparse = True |
|
return tags |
|
|
|
|
|
class SVC(BaseSVC): |
|
"""C-Support Vector Classification. |
|
|
|
The implementation is based on libsvm. The fit time scales at least |
|
quadratically with the number of samples and may be impractical |
|
beyond tens of thousands of samples. For large datasets |
|
consider using :class:`~sklearn.svm.LinearSVC` or |
|
:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a |
|
:class:`~sklearn.kernel_approximation.Nystroem` transformer or |
|
other :ref:`kernel_approximation`. |
|
|
|
The multiclass support is handled according to a one-vs-one scheme. |
|
|
|
For details on the precise mathematical formulation of the provided |
|
kernel functions and how `gamma`, `coef0` and `degree` affect each |
|
other, see the corresponding section in the narrative documentation: |
|
:ref:`svm_kernels`. |
|
|
|
To learn how to tune SVC's hyperparameters, see the following example: |
|
:ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py` |
|
|
|
Read more in the :ref:`User Guide <svm_classification>`. |
|
|
|
Parameters |
|
---------- |
|
C : float, default=1.0 |
|
Regularization parameter. The strength of the regularization is |
|
inversely proportional to C. Must be strictly positive. The penalty |
|
is a squared l2 penalty. For an intuitive visualization of the effects |
|
of scaling the regularization parameter C, see |
|
:ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`. |
|
|
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \ |
|
default='rbf' |
|
Specifies the kernel type to be used in the algorithm. If |
|
none is given, 'rbf' will be used. If a callable is given it is used to |
|
pre-compute the kernel matrix from data matrices; that matrix should be |
|
an array of shape ``(n_samples, n_samples)``. For an intuitive |
|
visualization of different kernel types see |
|
:ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`. |
|
|
|
degree : int, default=3 |
|
Degree of the polynomial kernel function ('poly'). |
|
Must be non-negative. Ignored by all other kernels. |
|
|
|
gamma : {'scale', 'auto'} or float, default='scale' |
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. |
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses |
|
1 / (n_features * X.var()) as value of gamma, |
|
- if 'auto', uses 1 / n_features |
|
- if float, must be non-negative. |
|
|
|
.. versionchanged:: 0.22 |
|
The default value of ``gamma`` changed from 'auto' to 'scale'. |
|
|
|
coef0 : float, default=0.0 |
|
Independent term in kernel function. |
|
It is only significant in 'poly' and 'sigmoid'. |
|
|
|
shrinking : bool, default=True |
|
Whether to use the shrinking heuristic. |
|
See the :ref:`User Guide <shrinking_svm>`. |
|
|
|
probability : bool, default=False |
|
Whether to enable probability estimates. This must be enabled prior |
|
to calling `fit`, will slow down that method as it internally uses |
|
5-fold cross-validation, and `predict_proba` may be inconsistent with |
|
`predict`. Read more in the :ref:`User Guide <scores_probabilities>`. |
|
|
|
tol : float, default=1e-3 |
|
Tolerance for stopping criterion. |
|
|
|
cache_size : float, default=200 |
|
Specify the size of the kernel cache (in MB). |
|
|
|
class_weight : dict or 'balanced', default=None |
|
Set the parameter C of class i to class_weight[i]*C for |
|
SVC. If not given, all classes are supposed to have |
|
weight one. |
|
The "balanced" mode uses the values of y to automatically adjust |
|
weights inversely proportional to class frequencies in the input data |
|
as ``n_samples / (n_classes * np.bincount(y))``. |
|
|
|
verbose : bool, default=False |
|
Enable verbose output. Note that this setting takes advantage of a |
|
per-process runtime setting in libsvm that, if enabled, may not work |
|
properly in a multithreaded context. |
|
|
|
max_iter : int, default=-1 |
|
Hard limit on iterations within solver, or -1 for no limit. |
|
|
|
decision_function_shape : {'ovo', 'ovr'}, default='ovr' |
|
Whether to return a one-vs-rest ('ovr') decision function of shape |
|
(n_samples, n_classes) as all other classifiers, or the original |
|
one-vs-one ('ovo') decision function of libsvm which has shape |
|
(n_samples, n_classes * (n_classes - 1) / 2). However, note that |
|
internally, one-vs-one ('ovo') is always used as a multi-class strategy |
|
to train models; an ovr matrix is only constructed from the ovo matrix. |
|
The parameter is ignored for binary classification. |
|
|
|
.. versionchanged:: 0.19 |
|
decision_function_shape is 'ovr' by default. |
|
|
|
.. versionadded:: 0.17 |
|
*decision_function_shape='ovr'* is recommended. |
|
|
|
.. versionchanged:: 0.17 |
|
Deprecated *decision_function_shape='ovo' and None*. |
|
|
|
break_ties : bool, default=False |
|
If true, ``decision_function_shape='ovr'``, and number of classes > 2, |
|
:term:`predict` will break ties according to the confidence values of |
|
:term:`decision_function`; otherwise the first class among the tied |
|
classes is returned. Please note that breaking ties comes at a |
|
relatively high computational cost compared to a simple predict. See |
|
:ref:`sphx_glr_auto_examples_svm_plot_svm_tie_breaking.py` for an |
|
example of its usage with ``decision_function_shape='ovr'``. |
|
|
|
.. versionadded:: 0.22 |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the pseudo random number generation for shuffling the data for |
|
probability estimates. Ignored when `probability` is False. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
Attributes |
|
---------- |
|
class_weight_ : ndarray of shape (n_classes,) |
|
Multipliers of parameter C for each class. |
|
Computed based on the ``class_weight`` parameter. |
|
|
|
classes_ : ndarray of shape (n_classes,) |
|
The classes labels. |
|
|
|
coef_ : ndarray of shape (n_classes * (n_classes - 1) / 2, n_features) |
|
Weights assigned to the features (coefficients in the primal |
|
problem). This is only available in the case of a linear kernel. |
|
|
|
`coef_` is a readonly property derived from `dual_coef_` and |
|
`support_vectors_`. |
|
|
|
dual_coef_ : ndarray of shape (n_classes -1, n_SV) |
|
Dual coefficients of the support vector in the decision |
|
function (see :ref:`sgd_mathematical_formulation`), multiplied by |
|
their targets. |
|
For multiclass, coefficient for all 1-vs-1 classifiers. |
|
The layout of the coefficients in the multiclass case is somewhat |
|
non-trivial. See the :ref:`multi-class section of the User Guide |
|
<svm_multi_class>` for details. |
|
|
|
fit_status_ : int |
|
0 if correctly fitted, 1 otherwise (will raise warning) |
|
|
|
intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,) |
|
Constants in decision function. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,) |
|
Number of iterations run by the optimization routine to fit the model. |
|
The shape of this attribute depends on the number of models optimized |
|
which in turn depends on the number of classes. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
support_ : ndarray of shape (n_SV) |
|
Indices of support vectors. |
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features) |
|
Support vectors. An empty array if kernel is precomputed. |
|
|
|
n_support_ : ndarray of shape (n_classes,), dtype=int32 |
|
Number of support vectors for each class. |
|
|
|
probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2) |
|
probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2) |
|
If `probability=True`, it corresponds to the parameters learned in |
|
Platt scaling to produce probability estimates from decision values. |
|
If `probability=False`, it's an empty array. Platt scaling uses the |
|
logistic function |
|
``1 / (1 + exp(decision_value * probA_ + probB_))`` |
|
where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For |
|
more information on the multiclass case and training procedure see |
|
section 8 of [1]_. |
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,) |
|
Array dimensions of training vector ``X``. |
|
|
|
See Also |
|
-------- |
|
SVR : Support Vector Machine for Regression implemented using libsvm. |
|
|
|
LinearSVC : Scalable Linear Support Vector Machine for classification |
|
implemented using liblinear. Check the See Also section of |
|
LinearSVC for more comparison element. |
|
|
|
References |
|
---------- |
|
.. [1] `LIBSVM: A Library for Support Vector Machines |
|
<http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_ |
|
|
|
.. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector |
|
Machines and Comparisons to Regularized Likelihood Methods" |
|
<https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_ |
|
|
|
Examples |
|
-------- |
|
>>> import numpy as np |
|
>>> from sklearn.pipeline import make_pipeline |
|
>>> from sklearn.preprocessing import StandardScaler |
|
>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) |
|
>>> y = np.array([1, 1, 2, 2]) |
|
>>> from sklearn.svm import SVC |
|
>>> clf = make_pipeline(StandardScaler(), SVC(gamma='auto')) |
|
>>> clf.fit(X, y) |
|
Pipeline(steps=[('standardscaler', StandardScaler()), |
|
('svc', SVC(gamma='auto'))]) |
|
|
|
>>> print(clf.predict([[-0.8, -1]])) |
|
[1] |
|
|
|
For a comaprison of the SVC with other classifiers see: |
|
:ref:`sphx_glr_auto_examples_classification_plot_classification_probability.py`. |
|
""" |
|
|
|
_impl = "c_svc" |
|
|
|
def __init__( |
|
self, |
|
*, |
|
C=1.0, |
|
kernel="rbf", |
|
degree=3, |
|
gamma="scale", |
|
coef0=0.0, |
|
shrinking=True, |
|
probability=False, |
|
tol=1e-3, |
|
cache_size=200, |
|
class_weight=None, |
|
verbose=False, |
|
max_iter=-1, |
|
decision_function_shape="ovr", |
|
break_ties=False, |
|
random_state=None, |
|
): |
|
super().__init__( |
|
kernel=kernel, |
|
degree=degree, |
|
gamma=gamma, |
|
coef0=coef0, |
|
tol=tol, |
|
C=C, |
|
nu=0.0, |
|
shrinking=shrinking, |
|
probability=probability, |
|
cache_size=cache_size, |
|
class_weight=class_weight, |
|
verbose=verbose, |
|
max_iter=max_iter, |
|
decision_function_shape=decision_function_shape, |
|
break_ties=break_ties, |
|
random_state=random_state, |
|
) |
|
|
|
|
|
class NuSVC(BaseSVC): |
|
"""Nu-Support Vector Classification. |
|
|
|
Similar to SVC but uses a parameter to control the number of support |
|
vectors. |
|
|
|
The implementation is based on libsvm. |
|
|
|
Read more in the :ref:`User Guide <svm_classification>`. |
|
|
|
Parameters |
|
---------- |
|
nu : float, default=0.5 |
|
An upper bound on the fraction of margin errors (see :ref:`User Guide |
|
<nu_svc>`) and a lower bound of the fraction of support vectors. |
|
Should be in the interval (0, 1]. |
|
|
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \ |
|
default='rbf' |
|
Specifies the kernel type to be used in the algorithm. |
|
If none is given, 'rbf' will be used. If a callable is given it is |
|
used to precompute the kernel matrix. For an intuitive |
|
visualization of different kernel types see |
|
:ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`. |
|
|
|
degree : int, default=3 |
|
Degree of the polynomial kernel function ('poly'). |
|
Must be non-negative. Ignored by all other kernels. |
|
|
|
gamma : {'scale', 'auto'} or float, default='scale' |
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. |
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses |
|
1 / (n_features * X.var()) as value of gamma, |
|
- if 'auto', uses 1 / n_features |
|
- if float, must be non-negative. |
|
|
|
.. versionchanged:: 0.22 |
|
The default value of ``gamma`` changed from 'auto' to 'scale'. |
|
|
|
coef0 : float, default=0.0 |
|
Independent term in kernel function. |
|
It is only significant in 'poly' and 'sigmoid'. |
|
|
|
shrinking : bool, default=True |
|
Whether to use the shrinking heuristic. |
|
See the :ref:`User Guide <shrinking_svm>`. |
|
|
|
probability : bool, default=False |
|
Whether to enable probability estimates. This must be enabled prior |
|
to calling `fit`, will slow down that method as it internally uses |
|
5-fold cross-validation, and `predict_proba` may be inconsistent with |
|
`predict`. Read more in the :ref:`User Guide <scores_probabilities>`. |
|
|
|
tol : float, default=1e-3 |
|
Tolerance for stopping criterion. |
|
|
|
cache_size : float, default=200 |
|
Specify the size of the kernel cache (in MB). |
|
|
|
class_weight : {dict, 'balanced'}, default=None |
|
Set the parameter C of class i to class_weight[i]*C for |
|
SVC. If not given, all classes are supposed to have |
|
weight one. The "balanced" mode uses the values of y to automatically |
|
adjust weights inversely proportional to class frequencies as |
|
``n_samples / (n_classes * np.bincount(y))``. |
|
|
|
verbose : bool, default=False |
|
Enable verbose output. Note that this setting takes advantage of a |
|
per-process runtime setting in libsvm that, if enabled, may not work |
|
properly in a multithreaded context. |
|
|
|
max_iter : int, default=-1 |
|
Hard limit on iterations within solver, or -1 for no limit. |
|
|
|
decision_function_shape : {'ovo', 'ovr'}, default='ovr' |
|
Whether to return a one-vs-rest ('ovr') decision function of shape |
|
(n_samples, n_classes) as all other classifiers, or the original |
|
one-vs-one ('ovo') decision function of libsvm which has shape |
|
(n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one |
|
('ovo') is always used as multi-class strategy. The parameter is |
|
ignored for binary classification. |
|
|
|
.. versionchanged:: 0.19 |
|
decision_function_shape is 'ovr' by default. |
|
|
|
.. versionadded:: 0.17 |
|
*decision_function_shape='ovr'* is recommended. |
|
|
|
.. versionchanged:: 0.17 |
|
Deprecated *decision_function_shape='ovo' and None*. |
|
|
|
break_ties : bool, default=False |
|
If true, ``decision_function_shape='ovr'``, and number of classes > 2, |
|
:term:`predict` will break ties according to the confidence values of |
|
:term:`decision_function`; otherwise the first class among the tied |
|
classes is returned. Please note that breaking ties comes at a |
|
relatively high computational cost compared to a simple predict. |
|
See :ref:`sphx_glr_auto_examples_svm_plot_svm_tie_breaking.py` for an |
|
example of its usage with ``decision_function_shape='ovr'``. |
|
|
|
.. versionadded:: 0.22 |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the pseudo random number generation for shuffling the data for |
|
probability estimates. Ignored when `probability` is False. |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
Attributes |
|
---------- |
|
class_weight_ : ndarray of shape (n_classes,) |
|
Multipliers of parameter C of each class. |
|
Computed based on the ``class_weight`` parameter. |
|
|
|
classes_ : ndarray of shape (n_classes,) |
|
The unique classes labels. |
|
|
|
coef_ : ndarray of shape (n_classes * (n_classes -1) / 2, n_features) |
|
Weights assigned to the features (coefficients in the primal |
|
problem). This is only available in the case of a linear kernel. |
|
|
|
`coef_` is readonly property derived from `dual_coef_` and |
|
`support_vectors_`. |
|
|
|
dual_coef_ : ndarray of shape (n_classes - 1, n_SV) |
|
Dual coefficients of the support vector in the decision |
|
function (see :ref:`sgd_mathematical_formulation`), multiplied by |
|
their targets. |
|
For multiclass, coefficient for all 1-vs-1 classifiers. |
|
The layout of the coefficients in the multiclass case is somewhat |
|
non-trivial. See the :ref:`multi-class section of the User Guide |
|
<svm_multi_class>` for details. |
|
|
|
fit_status_ : int |
|
0 if correctly fitted, 1 if the algorithm did not converge. |
|
|
|
intercept_ : ndarray of shape (n_classes * (n_classes - 1) / 2,) |
|
Constants in decision function. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
n_iter_ : ndarray of shape (n_classes * (n_classes - 1) // 2,) |
|
Number of iterations run by the optimization routine to fit the model. |
|
The shape of this attribute depends on the number of models optimized |
|
which in turn depends on the number of classes. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
support_ : ndarray of shape (n_SV,) |
|
Indices of support vectors. |
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features) |
|
Support vectors. |
|
|
|
n_support_ : ndarray of shape (n_classes,), dtype=int32 |
|
Number of support vectors for each class. |
|
|
|
fit_status_ : int |
|
0 if correctly fitted, 1 if the algorithm did not converge. |
|
|
|
probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,) |
|
|
|
probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,) |
|
If `probability=True`, it corresponds to the parameters learned in |
|
Platt scaling to produce probability estimates from decision values. |
|
If `probability=False`, it's an empty array. Platt scaling uses the |
|
logistic function |
|
``1 / (1 + exp(decision_value * probA_ + probB_))`` |
|
where ``probA_`` and ``probB_`` are learned from the dataset [2]_. For |
|
more information on the multiclass case and training procedure see |
|
section 8 of [1]_. |
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,) |
|
Array dimensions of training vector ``X``. |
|
|
|
See Also |
|
-------- |
|
SVC : Support Vector Machine for classification using libsvm. |
|
|
|
LinearSVC : Scalable linear Support Vector Machine for classification using |
|
liblinear. |
|
|
|
References |
|
---------- |
|
.. [1] `LIBSVM: A Library for Support Vector Machines |
|
<http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_ |
|
|
|
.. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector |
|
Machines and Comparisons to Regularized Likelihood Methods" |
|
<https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_ |
|
|
|
Examples |
|
-------- |
|
>>> import numpy as np |
|
>>> X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]]) |
|
>>> y = np.array([1, 1, 2, 2]) |
|
>>> from sklearn.pipeline import make_pipeline |
|
>>> from sklearn.preprocessing import StandardScaler |
|
>>> from sklearn.svm import NuSVC |
|
>>> clf = make_pipeline(StandardScaler(), NuSVC()) |
|
>>> clf.fit(X, y) |
|
Pipeline(steps=[('standardscaler', StandardScaler()), ('nusvc', NuSVC())]) |
|
>>> print(clf.predict([[-0.8, -1]])) |
|
[1] |
|
""" |
|
|
|
_impl = "nu_svc" |
|
|
|
_parameter_constraints: dict = { |
|
**BaseSVC._parameter_constraints, |
|
"nu": [Interval(Real, 0.0, 1.0, closed="right")], |
|
} |
|
_parameter_constraints.pop("C") |
|
|
|
def __init__( |
|
self, |
|
*, |
|
nu=0.5, |
|
kernel="rbf", |
|
degree=3, |
|
gamma="scale", |
|
coef0=0.0, |
|
shrinking=True, |
|
probability=False, |
|
tol=1e-3, |
|
cache_size=200, |
|
class_weight=None, |
|
verbose=False, |
|
max_iter=-1, |
|
decision_function_shape="ovr", |
|
break_ties=False, |
|
random_state=None, |
|
): |
|
super().__init__( |
|
kernel=kernel, |
|
degree=degree, |
|
gamma=gamma, |
|
coef0=coef0, |
|
tol=tol, |
|
C=0.0, |
|
nu=nu, |
|
shrinking=shrinking, |
|
probability=probability, |
|
cache_size=cache_size, |
|
class_weight=class_weight, |
|
verbose=verbose, |
|
max_iter=max_iter, |
|
decision_function_shape=decision_function_shape, |
|
break_ties=break_ties, |
|
random_state=random_state, |
|
) |
|
|
|
|
|
class SVR(RegressorMixin, BaseLibSVM): |
|
"""Epsilon-Support Vector Regression. |
|
|
|
The free parameters in the model are C and epsilon. |
|
|
|
The implementation is based on libsvm. The fit time complexity |
|
is more than quadratic with the number of samples which makes it hard |
|
to scale to datasets with more than a couple of 10000 samples. For large |
|
datasets consider using :class:`~sklearn.svm.LinearSVR` or |
|
:class:`~sklearn.linear_model.SGDRegressor` instead, possibly after a |
|
:class:`~sklearn.kernel_approximation.Nystroem` transformer or |
|
other :ref:`kernel_approximation`. |
|
|
|
Read more in the :ref:`User Guide <svm_regression>`. |
|
|
|
Parameters |
|
---------- |
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \ |
|
default='rbf' |
|
Specifies the kernel type to be used in the algorithm. |
|
If none is given, 'rbf' will be used. If a callable is given it is |
|
used to precompute the kernel matrix. |
|
For an intuitive visualization of different kernel types |
|
see :ref:`sphx_glr_auto_examples_svm_plot_svm_regression.py` |
|
|
|
degree : int, default=3 |
|
Degree of the polynomial kernel function ('poly'). |
|
Must be non-negative. Ignored by all other kernels. |
|
|
|
gamma : {'scale', 'auto'} or float, default='scale' |
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. |
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses |
|
1 / (n_features * X.var()) as value of gamma, |
|
- if 'auto', uses 1 / n_features |
|
- if float, must be non-negative. |
|
|
|
.. versionchanged:: 0.22 |
|
The default value of ``gamma`` changed from 'auto' to 'scale'. |
|
|
|
coef0 : float, default=0.0 |
|
Independent term in kernel function. |
|
It is only significant in 'poly' and 'sigmoid'. |
|
|
|
tol : float, default=1e-3 |
|
Tolerance for stopping criterion. |
|
|
|
C : float, default=1.0 |
|
Regularization parameter. The strength of the regularization is |
|
inversely proportional to C. Must be strictly positive. |
|
The penalty is a squared l2. For an intuitive visualization of the |
|
effects of scaling the regularization parameter C, see |
|
:ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`. |
|
|
|
epsilon : float, default=0.1 |
|
Epsilon in the epsilon-SVR model. It specifies the epsilon-tube |
|
within which no penalty is associated in the training loss function |
|
with points predicted within a distance epsilon from the actual |
|
value. Must be non-negative. |
|
|
|
shrinking : bool, default=True |
|
Whether to use the shrinking heuristic. |
|
See the :ref:`User Guide <shrinking_svm>`. |
|
|
|
cache_size : float, default=200 |
|
Specify the size of the kernel cache (in MB). |
|
|
|
verbose : bool, default=False |
|
Enable verbose output. Note that this setting takes advantage of a |
|
per-process runtime setting in libsvm that, if enabled, may not work |
|
properly in a multithreaded context. |
|
|
|
max_iter : int, default=-1 |
|
Hard limit on iterations within solver, or -1 for no limit. |
|
|
|
Attributes |
|
---------- |
|
coef_ : ndarray of shape (1, n_features) |
|
Weights assigned to the features (coefficients in the primal |
|
problem). This is only available in the case of a linear kernel. |
|
|
|
`coef_` is readonly property derived from `dual_coef_` and |
|
`support_vectors_`. |
|
|
|
dual_coef_ : ndarray of shape (1, n_SV) |
|
Coefficients of the support vector in the decision function. |
|
|
|
fit_status_ : int |
|
0 if correctly fitted, 1 otherwise (will raise warning) |
|
|
|
intercept_ : ndarray of shape (1,) |
|
Constants in decision function. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
n_iter_ : int |
|
Number of iterations run by the optimization routine to fit the model. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
n_support_ : ndarray of shape (1,), dtype=int32 |
|
Number of support vectors. |
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,) |
|
Array dimensions of training vector ``X``. |
|
|
|
support_ : ndarray of shape (n_SV,) |
|
Indices of support vectors. |
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features) |
|
Support vectors. |
|
|
|
See Also |
|
-------- |
|
NuSVR : Support Vector Machine for regression implemented using libsvm |
|
using a parameter to control the number of support vectors. |
|
|
|
LinearSVR : Scalable Linear Support Vector Machine for regression |
|
implemented using liblinear. |
|
|
|
References |
|
---------- |
|
.. [1] `LIBSVM: A Library for Support Vector Machines |
|
<http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_ |
|
|
|
.. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector |
|
Machines and Comparisons to Regularized Likelihood Methods" |
|
<https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.svm import SVR |
|
>>> from sklearn.pipeline import make_pipeline |
|
>>> from sklearn.preprocessing import StandardScaler |
|
>>> import numpy as np |
|
>>> n_samples, n_features = 10, 5 |
|
>>> rng = np.random.RandomState(0) |
|
>>> y = rng.randn(n_samples) |
|
>>> X = rng.randn(n_samples, n_features) |
|
>>> regr = make_pipeline(StandardScaler(), SVR(C=1.0, epsilon=0.2)) |
|
>>> regr.fit(X, y) |
|
Pipeline(steps=[('standardscaler', StandardScaler()), |
|
('svr', SVR(epsilon=0.2))]) |
|
""" |
|
|
|
_impl = "epsilon_svr" |
|
|
|
_parameter_constraints: dict = {**BaseLibSVM._parameter_constraints} |
|
for unused_param in ["class_weight", "nu", "probability", "random_state"]: |
|
_parameter_constraints.pop(unused_param) |
|
|
|
def __init__( |
|
self, |
|
*, |
|
kernel="rbf", |
|
degree=3, |
|
gamma="scale", |
|
coef0=0.0, |
|
tol=1e-3, |
|
C=1.0, |
|
epsilon=0.1, |
|
shrinking=True, |
|
cache_size=200, |
|
verbose=False, |
|
max_iter=-1, |
|
): |
|
super().__init__( |
|
kernel=kernel, |
|
degree=degree, |
|
gamma=gamma, |
|
coef0=coef0, |
|
tol=tol, |
|
C=C, |
|
nu=0.0, |
|
epsilon=epsilon, |
|
verbose=verbose, |
|
shrinking=shrinking, |
|
probability=False, |
|
cache_size=cache_size, |
|
class_weight=None, |
|
max_iter=max_iter, |
|
random_state=None, |
|
) |
|
|
|
|
|
class NuSVR(RegressorMixin, BaseLibSVM): |
|
"""Nu Support Vector Regression. |
|
|
|
Similar to NuSVC, for regression, uses a parameter nu to control |
|
the number of support vectors. However, unlike NuSVC, where nu |
|
replaces C, here nu replaces the parameter epsilon of epsilon-SVR. |
|
|
|
The implementation is based on libsvm. |
|
|
|
Read more in the :ref:`User Guide <svm_regression>`. |
|
|
|
Parameters |
|
---------- |
|
nu : float, default=0.5 |
|
An upper bound on the fraction of training errors and a lower bound of |
|
the fraction of support vectors. Should be in the interval (0, 1]. By |
|
default 0.5 will be taken. |
|
|
|
C : float, default=1.0 |
|
Penalty parameter C of the error term. For an intuitive visualization |
|
of the effects of scaling the regularization parameter C, see |
|
:ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`. |
|
|
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \ |
|
default='rbf' |
|
Specifies the kernel type to be used in the algorithm. |
|
If none is given, 'rbf' will be used. If a callable is given it is |
|
used to precompute the kernel matrix. |
|
For an intuitive visualization of different kernel types see |
|
See :ref:`sphx_glr_auto_examples_svm_plot_svm_regression.py` |
|
|
|
degree : int, default=3 |
|
Degree of the polynomial kernel function ('poly'). |
|
Must be non-negative. Ignored by all other kernels. |
|
|
|
gamma : {'scale', 'auto'} or float, default='scale' |
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. |
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses |
|
1 / (n_features * X.var()) as value of gamma, |
|
- if 'auto', uses 1 / n_features |
|
- if float, must be non-negative. |
|
|
|
.. versionchanged:: 0.22 |
|
The default value of ``gamma`` changed from 'auto' to 'scale'. |
|
|
|
coef0 : float, default=0.0 |
|
Independent term in kernel function. |
|
It is only significant in 'poly' and 'sigmoid'. |
|
|
|
shrinking : bool, default=True |
|
Whether to use the shrinking heuristic. |
|
See the :ref:`User Guide <shrinking_svm>`. |
|
|
|
tol : float, default=1e-3 |
|
Tolerance for stopping criterion. |
|
|
|
cache_size : float, default=200 |
|
Specify the size of the kernel cache (in MB). |
|
|
|
verbose : bool, default=False |
|
Enable verbose output. Note that this setting takes advantage of a |
|
per-process runtime setting in libsvm that, if enabled, may not work |
|
properly in a multithreaded context. |
|
|
|
max_iter : int, default=-1 |
|
Hard limit on iterations within solver, or -1 for no limit. |
|
|
|
Attributes |
|
---------- |
|
coef_ : ndarray of shape (1, n_features) |
|
Weights assigned to the features (coefficients in the primal |
|
problem). This is only available in the case of a linear kernel. |
|
|
|
`coef_` is readonly property derived from `dual_coef_` and |
|
`support_vectors_`. |
|
|
|
dual_coef_ : ndarray of shape (1, n_SV) |
|
Coefficients of the support vector in the decision function. |
|
|
|
fit_status_ : int |
|
0 if correctly fitted, 1 otherwise (will raise warning) |
|
|
|
intercept_ : ndarray of shape (1,) |
|
Constants in decision function. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
n_iter_ : int |
|
Number of iterations run by the optimization routine to fit the model. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
n_support_ : ndarray of shape (1,), dtype=int32 |
|
Number of support vectors. |
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,) |
|
Array dimensions of training vector ``X``. |
|
|
|
support_ : ndarray of shape (n_SV,) |
|
Indices of support vectors. |
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features) |
|
Support vectors. |
|
|
|
See Also |
|
-------- |
|
NuSVC : Support Vector Machine for classification implemented with libsvm |
|
with a parameter to control the number of support vectors. |
|
|
|
SVR : Epsilon Support Vector Machine for regression implemented with |
|
libsvm. |
|
|
|
References |
|
---------- |
|
.. [1] `LIBSVM: A Library for Support Vector Machines |
|
<http://www.csie.ntu.edu.tw/~cjlin/papers/libsvm.pdf>`_ |
|
|
|
.. [2] `Platt, John (1999). "Probabilistic Outputs for Support Vector |
|
Machines and Comparisons to Regularized Likelihood Methods" |
|
<https://citeseerx.ist.psu.edu/doc_view/pid/42e5ed832d4310ce4378c44d05570439df28a393>`_ |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.svm import NuSVR |
|
>>> from sklearn.pipeline import make_pipeline |
|
>>> from sklearn.preprocessing import StandardScaler |
|
>>> import numpy as np |
|
>>> n_samples, n_features = 10, 5 |
|
>>> np.random.seed(0) |
|
>>> y = np.random.randn(n_samples) |
|
>>> X = np.random.randn(n_samples, n_features) |
|
>>> regr = make_pipeline(StandardScaler(), NuSVR(C=1.0, nu=0.1)) |
|
>>> regr.fit(X, y) |
|
Pipeline(steps=[('standardscaler', StandardScaler()), |
|
('nusvr', NuSVR(nu=0.1))]) |
|
""" |
|
|
|
_impl = "nu_svr" |
|
|
|
_parameter_constraints: dict = {**BaseLibSVM._parameter_constraints} |
|
for unused_param in ["class_weight", "epsilon", "probability", "random_state"]: |
|
_parameter_constraints.pop(unused_param) |
|
|
|
def __init__( |
|
self, |
|
*, |
|
nu=0.5, |
|
C=1.0, |
|
kernel="rbf", |
|
degree=3, |
|
gamma="scale", |
|
coef0=0.0, |
|
shrinking=True, |
|
tol=1e-3, |
|
cache_size=200, |
|
verbose=False, |
|
max_iter=-1, |
|
): |
|
super().__init__( |
|
kernel=kernel, |
|
degree=degree, |
|
gamma=gamma, |
|
coef0=coef0, |
|
tol=tol, |
|
C=C, |
|
nu=nu, |
|
epsilon=0.0, |
|
shrinking=shrinking, |
|
probability=False, |
|
cache_size=cache_size, |
|
class_weight=None, |
|
verbose=verbose, |
|
max_iter=max_iter, |
|
random_state=None, |
|
) |
|
|
|
|
|
class OneClassSVM(OutlierMixin, BaseLibSVM): |
|
"""Unsupervised Outlier Detection. |
|
|
|
Estimate the support of a high-dimensional distribution. |
|
|
|
The implementation is based on libsvm. |
|
|
|
Read more in the :ref:`User Guide <outlier_detection>`. |
|
|
|
Parameters |
|
---------- |
|
kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable, \ |
|
default='rbf' |
|
Specifies the kernel type to be used in the algorithm. |
|
If none is given, 'rbf' will be used. If a callable is given it is |
|
used to precompute the kernel matrix. |
|
|
|
degree : int, default=3 |
|
Degree of the polynomial kernel function ('poly'). |
|
Must be non-negative. Ignored by all other kernels. |
|
|
|
gamma : {'scale', 'auto'} or float, default='scale' |
|
Kernel coefficient for 'rbf', 'poly' and 'sigmoid'. |
|
|
|
- if ``gamma='scale'`` (default) is passed then it uses |
|
1 / (n_features * X.var()) as value of gamma, |
|
- if 'auto', uses 1 / n_features |
|
- if float, must be non-negative. |
|
|
|
.. versionchanged:: 0.22 |
|
The default value of ``gamma`` changed from 'auto' to 'scale'. |
|
|
|
coef0 : float, default=0.0 |
|
Independent term in kernel function. |
|
It is only significant in 'poly' and 'sigmoid'. |
|
|
|
tol : float, default=1e-3 |
|
Tolerance for stopping criterion. |
|
|
|
nu : float, default=0.5 |
|
An upper bound on the fraction of training |
|
errors and a lower bound of the fraction of support |
|
vectors. Should be in the interval (0, 1]. By default 0.5 |
|
will be taken. |
|
|
|
shrinking : bool, default=True |
|
Whether to use the shrinking heuristic. |
|
See the :ref:`User Guide <shrinking_svm>`. |
|
|
|
cache_size : float, default=200 |
|
Specify the size of the kernel cache (in MB). |
|
|
|
verbose : bool, default=False |
|
Enable verbose output. Note that this setting takes advantage of a |
|
per-process runtime setting in libsvm that, if enabled, may not work |
|
properly in a multithreaded context. |
|
|
|
max_iter : int, default=-1 |
|
Hard limit on iterations within solver, or -1 for no limit. |
|
|
|
Attributes |
|
---------- |
|
coef_ : ndarray of shape (1, n_features) |
|
Weights assigned to the features (coefficients in the primal |
|
problem). This is only available in the case of a linear kernel. |
|
|
|
`coef_` is readonly property derived from `dual_coef_` and |
|
`support_vectors_`. |
|
|
|
dual_coef_ : ndarray of shape (1, n_SV) |
|
Coefficients of the support vectors in the decision function. |
|
|
|
fit_status_ : int |
|
0 if correctly fitted, 1 otherwise (will raise warning) |
|
|
|
intercept_ : ndarray of shape (1,) |
|
Constant in the decision function. |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
n_iter_ : int |
|
Number of iterations run by the optimization routine to fit the model. |
|
|
|
.. versionadded:: 1.1 |
|
|
|
n_support_ : ndarray of shape (n_classes,), dtype=int32 |
|
Number of support vectors for each class. |
|
|
|
offset_ : float |
|
Offset used to define the decision function from the raw scores. |
|
We have the relation: decision_function = score_samples - `offset_`. |
|
The offset is the opposite of `intercept_` and is provided for |
|
consistency with other outlier detection algorithms. |
|
|
|
.. versionadded:: 0.20 |
|
|
|
shape_fit_ : tuple of int of shape (n_dimensions_of_X,) |
|
Array dimensions of training vector ``X``. |
|
|
|
support_ : ndarray of shape (n_SV,) |
|
Indices of support vectors. |
|
|
|
support_vectors_ : ndarray of shape (n_SV, n_features) |
|
Support vectors. |
|
|
|
See Also |
|
-------- |
|
sklearn.linear_model.SGDOneClassSVM : Solves linear One-Class SVM using |
|
Stochastic Gradient Descent. |
|
sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection using |
|
Local Outlier Factor (LOF). |
|
sklearn.ensemble.IsolationForest : Isolation Forest Algorithm. |
|
|
|
Examples |
|
-------- |
|
>>> from sklearn.svm import OneClassSVM |
|
>>> X = [[0], [0.44], [0.45], [0.46], [1]] |
|
>>> clf = OneClassSVM(gamma='auto').fit(X) |
|
>>> clf.predict(X) |
|
array([-1, 1, 1, 1, -1]) |
|
>>> clf.score_samples(X) |
|
array([1.7798..., 2.0547..., 2.0556..., 2.0561..., 1.7332...]) |
|
|
|
For a more extended example, |
|
see :ref:`sphx_glr_auto_examples_applications_plot_species_distribution_modeling.py` |
|
""" |
|
|
|
_impl = "one_class" |
|
|
|
_parameter_constraints: dict = {**BaseLibSVM._parameter_constraints} |
|
for unused_param in ["C", "class_weight", "epsilon", "probability", "random_state"]: |
|
_parameter_constraints.pop(unused_param) |
|
|
|
def __init__( |
|
self, |
|
*, |
|
kernel="rbf", |
|
degree=3, |
|
gamma="scale", |
|
coef0=0.0, |
|
tol=1e-3, |
|
nu=0.5, |
|
shrinking=True, |
|
cache_size=200, |
|
verbose=False, |
|
max_iter=-1, |
|
): |
|
super().__init__( |
|
kernel, |
|
degree, |
|
gamma, |
|
coef0, |
|
tol, |
|
0.0, |
|
nu, |
|
0.0, |
|
shrinking, |
|
False, |
|
cache_size, |
|
None, |
|
verbose, |
|
max_iter, |
|
random_state=None, |
|
) |
|
|
|
def fit(self, X, y=None, sample_weight=None): |
|
"""Detect the soft boundary of the set of samples X. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
Set of samples, where `n_samples` is the number of samples and |
|
`n_features` is the number of features. |
|
|
|
y : Ignored |
|
Not used, present for API consistency by convention. |
|
|
|
sample_weight : array-like of shape (n_samples,), default=None |
|
Per-sample weights. Rescale C per sample. Higher weights |
|
force the classifier to put more emphasis on these points. |
|
|
|
Returns |
|
------- |
|
self : object |
|
Fitted estimator. |
|
|
|
Notes |
|
----- |
|
If X is not a C-ordered contiguous array it is copied. |
|
""" |
|
super().fit(X, np.ones(_num_samples(X)), sample_weight=sample_weight) |
|
self.offset_ = -self._intercept_ |
|
return self |
|
|
|
def decision_function(self, X): |
|
"""Signed distance to the separating hyperplane. |
|
|
|
Signed distance is positive for an inlier and negative for an outlier. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The data matrix. |
|
|
|
Returns |
|
------- |
|
dec : ndarray of shape (n_samples,) |
|
Returns the decision function of the samples. |
|
""" |
|
dec = self._decision_function(X).ravel() |
|
return dec |
|
|
|
def score_samples(self, X): |
|
"""Raw scoring function of the samples. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
The data matrix. |
|
|
|
Returns |
|
------- |
|
score_samples : ndarray of shape (n_samples,) |
|
Returns the (unshifted) scoring function of the samples. |
|
""" |
|
return self.decision_function(X) + self.offset_ |
|
|
|
def predict(self, X): |
|
"""Perform classification on samples in X. |
|
|
|
For a one-class model, +1 or -1 is returned. |
|
|
|
Parameters |
|
---------- |
|
X : {array-like, sparse matrix} of shape (n_samples, n_features) or \ |
|
(n_samples_test, n_samples_train) |
|
For kernel="precomputed", the expected shape of X is |
|
(n_samples_test, n_samples_train). |
|
|
|
Returns |
|
------- |
|
y_pred : ndarray of shape (n_samples,) |
|
Class labels for samples in X. |
|
""" |
|
y = super().predict(X) |
|
return np.asarray(y, dtype=np.intp) |
|
|