File size: 3,286 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
"""Determination of parameter bounds"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from numbers import Real
import numpy as np
from ..preprocessing import LabelBinarizer
from ..utils._param_validation import Interval, StrOptions, validate_params
from ..utils.extmath import safe_sparse_dot
from ..utils.validation import check_array, check_consistent_length
@validate_params(
{
"X": ["array-like", "sparse matrix"],
"y": ["array-like"],
"loss": [StrOptions({"squared_hinge", "log"})],
"fit_intercept": ["boolean"],
"intercept_scaling": [Interval(Real, 0, None, closed="neither")],
},
prefer_skip_nested_validation=True,
)
def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0):
"""Return the lowest bound for C.
The lower bound for C is computed such that for C in (l1_min_C, infinity)
the model is guaranteed not to be empty. This applies to l1 penalized
classifiers, such as LinearSVC with penalty='l1' and
linear_model.LogisticRegression with penalty='l1'.
This value is valid if class_weight parameter in fit() is not set.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training vector, where `n_samples` is the number of samples and
`n_features` is the number of features.
y : array-like of shape (n_samples,)
Target vector relative to X.
loss : {'squared_hinge', 'log'}, default='squared_hinge'
Specifies the loss function.
With 'squared_hinge' it is the squared hinge loss (a.k.a. L2 loss).
With 'log' it is the loss of logistic regression models.
fit_intercept : bool, default=True
Specifies if the intercept should be fitted by the model.
It must match the fit() method parameter.
intercept_scaling : float, default=1.0
When fit_intercept is True, instance vector x becomes
[x, intercept_scaling],
i.e. a "synthetic" feature with constant value equals to
intercept_scaling is appended to the instance vector.
It must match the fit() method parameter.
Returns
-------
l1_min_c : float
Minimum value for C.
Examples
--------
>>> from sklearn.svm import l1_min_c
>>> from sklearn.datasets import make_classification
>>> X, y = make_classification(n_samples=100, n_features=20, random_state=42)
>>> print(f"{l1_min_c(X, y, loss='squared_hinge', fit_intercept=True):.4f}")
0.0044
"""
X = check_array(X, accept_sparse="csc")
check_consistent_length(X, y)
Y = LabelBinarizer(neg_label=-1).fit_transform(y).T
# maximum absolute value over classes and features
den = np.max(np.abs(safe_sparse_dot(Y, X)))
if fit_intercept:
bias = np.full(
(np.size(y), 1), intercept_scaling, dtype=np.array(intercept_scaling).dtype
)
den = max(den, abs(np.dot(Y, bias)).max())
if den == 0.0:
raise ValueError(
"Ill-posed l1_min_c calculation: l1 will always "
"select zero coefficients for this data"
)
if loss == "squared_hinge":
return 0.5 / den
else: # loss == 'log':
return 2.0 / den
|