|
"""Bayesian Gaussian Mixture Model.""" |
|
|
|
|
|
|
|
|
|
import math |
|
from numbers import Real |
|
|
|
import numpy as np |
|
from scipy.special import betaln, digamma, gammaln |
|
|
|
from ..utils import check_array |
|
from ..utils._param_validation import Interval, StrOptions |
|
from ._base import BaseMixture, _check_shape |
|
from ._gaussian_mixture import ( |
|
_check_precision_matrix, |
|
_check_precision_positivity, |
|
_compute_log_det_cholesky, |
|
_compute_precision_cholesky, |
|
_estimate_gaussian_parameters, |
|
_estimate_log_gaussian_prob, |
|
) |
|
|
|
|
|
def _log_dirichlet_norm(dirichlet_concentration): |
|
"""Compute the log of the Dirichlet distribution normalization term. |
|
|
|
Parameters |
|
---------- |
|
dirichlet_concentration : array-like of shape (n_samples,) |
|
The parameters values of the Dirichlet distribution. |
|
|
|
Returns |
|
------- |
|
log_dirichlet_norm : float |
|
The log normalization of the Dirichlet distribution. |
|
""" |
|
return gammaln(np.sum(dirichlet_concentration)) - np.sum( |
|
gammaln(dirichlet_concentration) |
|
) |
|
|
|
|
|
def _log_wishart_norm(degrees_of_freedom, log_det_precisions_chol, n_features): |
|
"""Compute the log of the Wishart distribution normalization term. |
|
|
|
Parameters |
|
---------- |
|
degrees_of_freedom : array-like of shape (n_components,) |
|
The number of degrees of freedom on the covariance Wishart |
|
distributions. |
|
|
|
log_det_precision_chol : array-like of shape (n_components,) |
|
The determinant of the precision matrix for each component. |
|
|
|
n_features : int |
|
The number of features. |
|
|
|
Return |
|
------ |
|
log_wishart_norm : array-like of shape (n_components,) |
|
The log normalization of the Wishart distribution. |
|
""" |
|
|
|
return -( |
|
degrees_of_freedom * log_det_precisions_chol |
|
+ degrees_of_freedom * n_features * 0.5 * math.log(2.0) |
|
+ np.sum( |
|
gammaln(0.5 * (degrees_of_freedom - np.arange(n_features)[:, np.newaxis])), |
|
0, |
|
) |
|
) |
|
|
|
|
|
class BayesianGaussianMixture(BaseMixture): |
|
"""Variational Bayesian estimation of a Gaussian mixture. |
|
|
|
This class allows to infer an approximate posterior distribution over the |
|
parameters of a Gaussian mixture distribution. The effective number of |
|
components can be inferred from the data. |
|
|
|
This class implements two types of prior for the weights distribution: a |
|
finite mixture model with Dirichlet distribution and an infinite mixture |
|
model with the Dirichlet Process. In practice Dirichlet Process inference |
|
algorithm is approximated and uses a truncated distribution with a fixed |
|
maximum number of components (called the Stick-breaking representation). |
|
The number of components actually used almost always depends on the data. |
|
|
|
.. versionadded:: 0.18 |
|
|
|
Read more in the :ref:`User Guide <bgmm>`. |
|
|
|
Parameters |
|
---------- |
|
n_components : int, default=1 |
|
The number of mixture components. Depending on the data and the value |
|
of the `weight_concentration_prior` the model can decide to not use |
|
all the components by setting some component `weights_` to values very |
|
close to zero. The number of effective components is therefore smaller |
|
than n_components. |
|
|
|
covariance_type : {'full', 'tied', 'diag', 'spherical'}, default='full' |
|
String describing the type of covariance parameters to use. |
|
Must be one of: |
|
|
|
- 'full' (each component has its own general covariance matrix), |
|
- 'tied' (all components share the same general covariance matrix), |
|
- 'diag' (each component has its own diagonal covariance matrix), |
|
- 'spherical' (each component has its own single variance). |
|
|
|
tol : float, default=1e-3 |
|
The convergence threshold. EM iterations will stop when the |
|
lower bound average gain on the likelihood (of the training data with |
|
respect to the model) is below this threshold. |
|
|
|
reg_covar : float, default=1e-6 |
|
Non-negative regularization added to the diagonal of covariance. |
|
Allows to assure that the covariance matrices are all positive. |
|
|
|
max_iter : int, default=100 |
|
The number of EM iterations to perform. |
|
|
|
n_init : int, default=1 |
|
The number of initializations to perform. The result with the highest |
|
lower bound value on the likelihood is kept. |
|
|
|
init_params : {'kmeans', 'k-means++', 'random', 'random_from_data'}, \ |
|
default='kmeans' |
|
The method used to initialize the weights, the means and the |
|
covariances. String must be one of: |
|
|
|
- 'kmeans': responsibilities are initialized using kmeans. |
|
- 'k-means++': use the k-means++ method to initialize. |
|
- 'random': responsibilities are initialized randomly. |
|
- 'random_from_data': initial means are randomly selected data points. |
|
|
|
.. versionchanged:: v1.1 |
|
`init_params` now accepts 'random_from_data' and 'k-means++' as |
|
initialization methods. |
|
|
|
weight_concentration_prior_type : {'dirichlet_process', 'dirichlet_distribution'}, \ |
|
default='dirichlet_process' |
|
String describing the type of the weight concentration prior. |
|
|
|
weight_concentration_prior : float or None, default=None |
|
The dirichlet concentration of each component on the weight |
|
distribution (Dirichlet). This is commonly called gamma in the |
|
literature. The higher concentration puts more mass in |
|
the center and will lead to more components being active, while a lower |
|
concentration parameter will lead to more mass at the edge of the |
|
mixture weights simplex. The value of the parameter must be greater |
|
than 0. If it is None, it's set to ``1. / n_components``. |
|
|
|
mean_precision_prior : float or None, default=None |
|
The precision prior on the mean distribution (Gaussian). |
|
Controls the extent of where means can be placed. Larger |
|
values concentrate the cluster means around `mean_prior`. |
|
The value of the parameter must be greater than 0. |
|
If it is None, it is set to 1. |
|
|
|
mean_prior : array-like, shape (n_features,), default=None |
|
The prior on the mean distribution (Gaussian). |
|
If it is None, it is set to the mean of X. |
|
|
|
degrees_of_freedom_prior : float or None, default=None |
|
The prior of the number of degrees of freedom on the covariance |
|
distributions (Wishart). If it is None, it's set to `n_features`. |
|
|
|
covariance_prior : float or array-like, default=None |
|
The prior on the covariance distribution (Wishart). |
|
If it is None, the emiprical covariance prior is initialized using the |
|
covariance of X. The shape depends on `covariance_type`:: |
|
|
|
(n_features, n_features) if 'full', |
|
(n_features, n_features) if 'tied', |
|
(n_features) if 'diag', |
|
float if 'spherical' |
|
|
|
random_state : int, RandomState instance or None, default=None |
|
Controls the random seed given to the method chosen to initialize the |
|
parameters (see `init_params`). |
|
In addition, it controls the generation of random samples from the |
|
fitted distribution (see the method `sample`). |
|
Pass an int for reproducible output across multiple function calls. |
|
See :term:`Glossary <random_state>`. |
|
|
|
warm_start : bool, default=False |
|
If 'warm_start' is True, the solution of the last fitting is used as |
|
initialization for the next call of fit(). This can speed up |
|
convergence when fit is called several times on similar problems. |
|
See :term:`the Glossary <warm_start>`. |
|
|
|
verbose : int, default=0 |
|
Enable verbose output. If 1 then it prints the current |
|
initialization and each iteration step. If greater than 1 then |
|
it prints also the log probability and the time needed |
|
for each step. |
|
|
|
verbose_interval : int, default=10 |
|
Number of iteration done before the next print. |
|
|
|
Attributes |
|
---------- |
|
weights_ : array-like of shape (n_components,) |
|
The weights of each mixture components. |
|
|
|
means_ : array-like of shape (n_components, n_features) |
|
The mean of each mixture component. |
|
|
|
covariances_ : array-like |
|
The covariance of each mixture component. |
|
The shape depends on `covariance_type`:: |
|
|
|
(n_components,) if 'spherical', |
|
(n_features, n_features) if 'tied', |
|
(n_components, n_features) if 'diag', |
|
(n_components, n_features, n_features) if 'full' |
|
|
|
precisions_ : array-like |
|
The precision matrices for each component in the mixture. A precision |
|
matrix is the inverse of a covariance matrix. A covariance matrix is |
|
symmetric positive definite so the mixture of Gaussian can be |
|
equivalently parameterized by the precision matrices. Storing the |
|
precision matrices instead of the covariance matrices makes it more |
|
efficient to compute the log-likelihood of new samples at test time. |
|
The shape depends on ``covariance_type``:: |
|
|
|
(n_components,) if 'spherical', |
|
(n_features, n_features) if 'tied', |
|
(n_components, n_features) if 'diag', |
|
(n_components, n_features, n_features) if 'full' |
|
|
|
precisions_cholesky_ : array-like |
|
The cholesky decomposition of the precision matrices of each mixture |
|
component. A precision matrix is the inverse of a covariance matrix. |
|
A covariance matrix is symmetric positive definite so the mixture of |
|
Gaussian can be equivalently parameterized by the precision matrices. |
|
Storing the precision matrices instead of the covariance matrices makes |
|
it more efficient to compute the log-likelihood of new samples at test |
|
time. The shape depends on ``covariance_type``:: |
|
|
|
(n_components,) if 'spherical', |
|
(n_features, n_features) if 'tied', |
|
(n_components, n_features) if 'diag', |
|
(n_components, n_features, n_features) if 'full' |
|
|
|
converged_ : bool |
|
True when convergence of the best fit of inference was reached, False otherwise. |
|
|
|
n_iter_ : int |
|
Number of step used by the best fit of inference to reach the |
|
convergence. |
|
|
|
lower_bound_ : float |
|
Lower bound value on the model evidence (of the training data) of the |
|
best fit of inference. |
|
|
|
weight_concentration_prior_ : tuple or float |
|
The dirichlet concentration of each component on the weight |
|
distribution (Dirichlet). The type depends on |
|
``weight_concentration_prior_type``:: |
|
|
|
(float, float) if 'dirichlet_process' (Beta parameters), |
|
float if 'dirichlet_distribution' (Dirichlet parameters). |
|
|
|
The higher concentration puts more mass in |
|
the center and will lead to more components being active, while a lower |
|
concentration parameter will lead to more mass at the edge of the |
|
simplex. |
|
|
|
weight_concentration_ : array-like of shape (n_components,) |
|
The dirichlet concentration of each component on the weight |
|
distribution (Dirichlet). |
|
|
|
mean_precision_prior_ : float |
|
The precision prior on the mean distribution (Gaussian). |
|
Controls the extent of where means can be placed. |
|
Larger values concentrate the cluster means around `mean_prior`. |
|
If mean_precision_prior is set to None, `mean_precision_prior_` is set |
|
to 1. |
|
|
|
mean_precision_ : array-like of shape (n_components,) |
|
The precision of each components on the mean distribution (Gaussian). |
|
|
|
mean_prior_ : array-like of shape (n_features,) |
|
The prior on the mean distribution (Gaussian). |
|
|
|
degrees_of_freedom_prior_ : float |
|
The prior of the number of degrees of freedom on the covariance |
|
distributions (Wishart). |
|
|
|
degrees_of_freedom_ : array-like of shape (n_components,) |
|
The number of degrees of freedom of each components in the model. |
|
|
|
covariance_prior_ : float or array-like |
|
The prior on the covariance distribution (Wishart). |
|
The shape depends on `covariance_type`:: |
|
|
|
(n_features, n_features) if 'full', |
|
(n_features, n_features) if 'tied', |
|
(n_features) if 'diag', |
|
float if 'spherical' |
|
|
|
n_features_in_ : int |
|
Number of features seen during :term:`fit`. |
|
|
|
.. versionadded:: 0.24 |
|
|
|
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
Names of features seen during :term:`fit`. Defined only when `X` |
|
has feature names that are all strings. |
|
|
|
.. versionadded:: 1.0 |
|
|
|
See Also |
|
-------- |
|
GaussianMixture : Finite Gaussian mixture fit with EM. |
|
|
|
References |
|
---------- |
|
|
|
.. [1] `Bishop, Christopher M. (2006). "Pattern recognition and machine |
|
learning". Vol. 4 No. 4. New York: Springer. |
|
<https://www.springer.com/kr/book/9780387310732>`_ |
|
|
|
.. [2] `Hagai Attias. (2000). "A Variational Bayesian Framework for |
|
Graphical Models". In Advances in Neural Information Processing |
|
Systems 12. |
|
<https://citeseerx.ist.psu.edu/doc_view/pid/ee844fd96db7041a9681b5a18bff008912052c7e>`_ |
|
|
|
.. [3] `Blei, David M. and Michael I. Jordan. (2006). "Variational |
|
inference for Dirichlet process mixtures". Bayesian analysis 1.1 |
|
<https://www.cs.princeton.edu/courses/archive/fall11/cos597C/reading/BleiJordan2005.pdf>`_ |
|
|
|
Examples |
|
-------- |
|
>>> import numpy as np |
|
>>> from sklearn.mixture import BayesianGaussianMixture |
|
>>> X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [12, 4], [10, 7]]) |
|
>>> bgm = BayesianGaussianMixture(n_components=2, random_state=42).fit(X) |
|
>>> bgm.means_ |
|
array([[2.49... , 2.29...], |
|
[8.45..., 4.52... ]]) |
|
>>> bgm.predict([[0, 0], [9, 3]]) |
|
array([0, 1]) |
|
""" |
|
|
|
_parameter_constraints: dict = { |
|
**BaseMixture._parameter_constraints, |
|
"covariance_type": [StrOptions({"spherical", "tied", "diag", "full"})], |
|
"weight_concentration_prior_type": [ |
|
StrOptions({"dirichlet_process", "dirichlet_distribution"}) |
|
], |
|
"weight_concentration_prior": [ |
|
None, |
|
Interval(Real, 0.0, None, closed="neither"), |
|
], |
|
"mean_precision_prior": [None, Interval(Real, 0.0, None, closed="neither")], |
|
"mean_prior": [None, "array-like"], |
|
"degrees_of_freedom_prior": [None, Interval(Real, 0.0, None, closed="neither")], |
|
"covariance_prior": [ |
|
None, |
|
"array-like", |
|
Interval(Real, 0.0, None, closed="neither"), |
|
], |
|
} |
|
|
|
def __init__( |
|
self, |
|
*, |
|
n_components=1, |
|
covariance_type="full", |
|
tol=1e-3, |
|
reg_covar=1e-6, |
|
max_iter=100, |
|
n_init=1, |
|
init_params="kmeans", |
|
weight_concentration_prior_type="dirichlet_process", |
|
weight_concentration_prior=None, |
|
mean_precision_prior=None, |
|
mean_prior=None, |
|
degrees_of_freedom_prior=None, |
|
covariance_prior=None, |
|
random_state=None, |
|
warm_start=False, |
|
verbose=0, |
|
verbose_interval=10, |
|
): |
|
super().__init__( |
|
n_components=n_components, |
|
tol=tol, |
|
reg_covar=reg_covar, |
|
max_iter=max_iter, |
|
n_init=n_init, |
|
init_params=init_params, |
|
random_state=random_state, |
|
warm_start=warm_start, |
|
verbose=verbose, |
|
verbose_interval=verbose_interval, |
|
) |
|
|
|
self.covariance_type = covariance_type |
|
self.weight_concentration_prior_type = weight_concentration_prior_type |
|
self.weight_concentration_prior = weight_concentration_prior |
|
self.mean_precision_prior = mean_precision_prior |
|
self.mean_prior = mean_prior |
|
self.degrees_of_freedom_prior = degrees_of_freedom_prior |
|
self.covariance_prior = covariance_prior |
|
|
|
def _check_parameters(self, X): |
|
"""Check that the parameters are well defined. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
""" |
|
self._check_weights_parameters() |
|
self._check_means_parameters(X) |
|
self._check_precision_parameters(X) |
|
self._checkcovariance_prior_parameter(X) |
|
|
|
def _check_weights_parameters(self): |
|
"""Check the parameter of the Dirichlet distribution.""" |
|
if self.weight_concentration_prior is None: |
|
self.weight_concentration_prior_ = 1.0 / self.n_components |
|
else: |
|
self.weight_concentration_prior_ = self.weight_concentration_prior |
|
|
|
def _check_means_parameters(self, X): |
|
"""Check the parameters of the Gaussian distribution. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
""" |
|
_, n_features = X.shape |
|
|
|
if self.mean_precision_prior is None: |
|
self.mean_precision_prior_ = 1.0 |
|
else: |
|
self.mean_precision_prior_ = self.mean_precision_prior |
|
|
|
if self.mean_prior is None: |
|
self.mean_prior_ = X.mean(axis=0) |
|
else: |
|
self.mean_prior_ = check_array( |
|
self.mean_prior, dtype=[np.float64, np.float32], ensure_2d=False |
|
) |
|
_check_shape(self.mean_prior_, (n_features,), "means") |
|
|
|
def _check_precision_parameters(self, X): |
|
"""Check the prior parameters of the precision distribution. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
""" |
|
_, n_features = X.shape |
|
|
|
if self.degrees_of_freedom_prior is None: |
|
self.degrees_of_freedom_prior_ = n_features |
|
elif self.degrees_of_freedom_prior > n_features - 1.0: |
|
self.degrees_of_freedom_prior_ = self.degrees_of_freedom_prior |
|
else: |
|
raise ValueError( |
|
"The parameter 'degrees_of_freedom_prior' " |
|
"should be greater than %d, but got %.3f." |
|
% (n_features - 1, self.degrees_of_freedom_prior) |
|
) |
|
|
|
def _checkcovariance_prior_parameter(self, X): |
|
"""Check the `covariance_prior_`. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
""" |
|
_, n_features = X.shape |
|
|
|
if self.covariance_prior is None: |
|
self.covariance_prior_ = { |
|
"full": np.atleast_2d(np.cov(X.T)), |
|
"tied": np.atleast_2d(np.cov(X.T)), |
|
"diag": np.var(X, axis=0, ddof=1), |
|
"spherical": np.var(X, axis=0, ddof=1).mean(), |
|
}[self.covariance_type] |
|
|
|
elif self.covariance_type in ["full", "tied"]: |
|
self.covariance_prior_ = check_array( |
|
self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False |
|
) |
|
_check_shape( |
|
self.covariance_prior_, |
|
(n_features, n_features), |
|
"%s covariance_prior" % self.covariance_type, |
|
) |
|
_check_precision_matrix(self.covariance_prior_, self.covariance_type) |
|
elif self.covariance_type == "diag": |
|
self.covariance_prior_ = check_array( |
|
self.covariance_prior, dtype=[np.float64, np.float32], ensure_2d=False |
|
) |
|
_check_shape( |
|
self.covariance_prior_, |
|
(n_features,), |
|
"%s covariance_prior" % self.covariance_type, |
|
) |
|
_check_precision_positivity(self.covariance_prior_, self.covariance_type) |
|
|
|
else: |
|
self.covariance_prior_ = self.covariance_prior |
|
|
|
def _initialize(self, X, resp): |
|
"""Initialization of the mixture parameters. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
|
|
resp : array-like of shape (n_samples, n_components) |
|
""" |
|
nk, xk, sk = _estimate_gaussian_parameters( |
|
X, resp, self.reg_covar, self.covariance_type |
|
) |
|
|
|
self._estimate_weights(nk) |
|
self._estimate_means(nk, xk) |
|
self._estimate_precisions(nk, xk, sk) |
|
|
|
def _estimate_weights(self, nk): |
|
"""Estimate the parameters of the Dirichlet distribution. |
|
|
|
Parameters |
|
---------- |
|
nk : array-like of shape (n_components,) |
|
""" |
|
if self.weight_concentration_prior_type == "dirichlet_process": |
|
|
|
|
|
self.weight_concentration_ = ( |
|
1.0 + nk, |
|
( |
|
self.weight_concentration_prior_ |
|
+ np.hstack((np.cumsum(nk[::-1])[-2::-1], 0)) |
|
), |
|
) |
|
else: |
|
|
|
self.weight_concentration_ = self.weight_concentration_prior_ + nk |
|
|
|
def _estimate_means(self, nk, xk): |
|
"""Estimate the parameters of the Gaussian distribution. |
|
|
|
Parameters |
|
---------- |
|
nk : array-like of shape (n_components,) |
|
|
|
xk : array-like of shape (n_components, n_features) |
|
""" |
|
self.mean_precision_ = self.mean_precision_prior_ + nk |
|
self.means_ = ( |
|
self.mean_precision_prior_ * self.mean_prior_ + nk[:, np.newaxis] * xk |
|
) / self.mean_precision_[:, np.newaxis] |
|
|
|
def _estimate_precisions(self, nk, xk, sk): |
|
"""Estimate the precisions parameters of the precision distribution. |
|
|
|
Parameters |
|
---------- |
|
nk : array-like of shape (n_components,) |
|
|
|
xk : array-like of shape (n_components, n_features) |
|
|
|
sk : array-like |
|
The shape depends of `covariance_type`: |
|
'full' : (n_components, n_features, n_features) |
|
'tied' : (n_features, n_features) |
|
'diag' : (n_components, n_features) |
|
'spherical' : (n_components,) |
|
""" |
|
{ |
|
"full": self._estimate_wishart_full, |
|
"tied": self._estimate_wishart_tied, |
|
"diag": self._estimate_wishart_diag, |
|
"spherical": self._estimate_wishart_spherical, |
|
}[self.covariance_type](nk, xk, sk) |
|
|
|
self.precisions_cholesky_ = _compute_precision_cholesky( |
|
self.covariances_, self.covariance_type |
|
) |
|
|
|
def _estimate_wishart_full(self, nk, xk, sk): |
|
"""Estimate the full Wishart distribution parameters. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
|
|
nk : array-like of shape (n_components,) |
|
|
|
xk : array-like of shape (n_components, n_features) |
|
|
|
sk : array-like of shape (n_components, n_features, n_features) |
|
""" |
|
_, n_features = xk.shape |
|
|
|
|
|
|
|
|
|
self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk |
|
|
|
self.covariances_ = np.empty((self.n_components, n_features, n_features)) |
|
|
|
for k in range(self.n_components): |
|
diff = xk[k] - self.mean_prior_ |
|
self.covariances_[k] = ( |
|
self.covariance_prior_ |
|
+ nk[k] * sk[k] |
|
+ nk[k] |
|
* self.mean_precision_prior_ |
|
/ self.mean_precision_[k] |
|
* np.outer(diff, diff) |
|
) |
|
|
|
|
|
self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis, np.newaxis] |
|
|
|
def _estimate_wishart_tied(self, nk, xk, sk): |
|
"""Estimate the tied Wishart distribution parameters. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
|
|
nk : array-like of shape (n_components,) |
|
|
|
xk : array-like of shape (n_components, n_features) |
|
|
|
sk : array-like of shape (n_features, n_features) |
|
""" |
|
_, n_features = xk.shape |
|
|
|
|
|
|
|
|
|
self.degrees_of_freedom_ = ( |
|
self.degrees_of_freedom_prior_ + nk.sum() / self.n_components |
|
) |
|
|
|
diff = xk - self.mean_prior_ |
|
self.covariances_ = ( |
|
self.covariance_prior_ |
|
+ sk * nk.sum() / self.n_components |
|
+ self.mean_precision_prior_ |
|
/ self.n_components |
|
* np.dot((nk / self.mean_precision_) * diff.T, diff) |
|
) |
|
|
|
|
|
self.covariances_ /= self.degrees_of_freedom_ |
|
|
|
def _estimate_wishart_diag(self, nk, xk, sk): |
|
"""Estimate the diag Wishart distribution parameters. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
|
|
nk : array-like of shape (n_components,) |
|
|
|
xk : array-like of shape (n_components, n_features) |
|
|
|
sk : array-like of shape (n_components, n_features) |
|
""" |
|
_, n_features = xk.shape |
|
|
|
|
|
|
|
|
|
self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk |
|
|
|
diff = xk - self.mean_prior_ |
|
self.covariances_ = self.covariance_prior_ + nk[:, np.newaxis] * ( |
|
sk |
|
+ (self.mean_precision_prior_ / self.mean_precision_)[:, np.newaxis] |
|
* np.square(diff) |
|
) |
|
|
|
|
|
self.covariances_ /= self.degrees_of_freedom_[:, np.newaxis] |
|
|
|
def _estimate_wishart_spherical(self, nk, xk, sk): |
|
"""Estimate the spherical Wishart distribution parameters. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
|
|
nk : array-like of shape (n_components,) |
|
|
|
xk : array-like of shape (n_components, n_features) |
|
|
|
sk : array-like of shape (n_components,) |
|
""" |
|
_, n_features = xk.shape |
|
|
|
|
|
|
|
|
|
self.degrees_of_freedom_ = self.degrees_of_freedom_prior_ + nk |
|
|
|
diff = xk - self.mean_prior_ |
|
self.covariances_ = self.covariance_prior_ + nk * ( |
|
sk |
|
+ self.mean_precision_prior_ |
|
/ self.mean_precision_ |
|
* np.mean(np.square(diff), 1) |
|
) |
|
|
|
|
|
self.covariances_ /= self.degrees_of_freedom_ |
|
|
|
def _m_step(self, X, log_resp): |
|
"""M step. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
|
|
log_resp : array-like of shape (n_samples, n_components) |
|
Logarithm of the posterior probabilities (or responsibilities) of |
|
the point of each sample in X. |
|
""" |
|
n_samples, _ = X.shape |
|
|
|
nk, xk, sk = _estimate_gaussian_parameters( |
|
X, np.exp(log_resp), self.reg_covar, self.covariance_type |
|
) |
|
self._estimate_weights(nk) |
|
self._estimate_means(nk, xk) |
|
self._estimate_precisions(nk, xk, sk) |
|
|
|
def _estimate_log_weights(self): |
|
if self.weight_concentration_prior_type == "dirichlet_process": |
|
digamma_sum = digamma( |
|
self.weight_concentration_[0] + self.weight_concentration_[1] |
|
) |
|
digamma_a = digamma(self.weight_concentration_[0]) |
|
digamma_b = digamma(self.weight_concentration_[1]) |
|
return ( |
|
digamma_a |
|
- digamma_sum |
|
+ np.hstack((0, np.cumsum(digamma_b - digamma_sum)[:-1])) |
|
) |
|
else: |
|
|
|
return digamma(self.weight_concentration_) - digamma( |
|
np.sum(self.weight_concentration_) |
|
) |
|
|
|
def _estimate_log_prob(self, X): |
|
_, n_features = X.shape |
|
|
|
|
|
log_gauss = _estimate_log_gaussian_prob( |
|
X, self.means_, self.precisions_cholesky_, self.covariance_type |
|
) - 0.5 * n_features * np.log(self.degrees_of_freedom_) |
|
|
|
log_lambda = n_features * np.log(2.0) + np.sum( |
|
digamma( |
|
0.5 |
|
* (self.degrees_of_freedom_ - np.arange(0, n_features)[:, np.newaxis]) |
|
), |
|
0, |
|
) |
|
|
|
return log_gauss + 0.5 * (log_lambda - n_features / self.mean_precision_) |
|
|
|
def _compute_lower_bound(self, log_resp, log_prob_norm): |
|
"""Estimate the lower bound of the model. |
|
|
|
The lower bound on the likelihood (of the training data with respect to |
|
the model) is used to detect the convergence and has to increase at |
|
each iteration. |
|
|
|
Parameters |
|
---------- |
|
X : array-like of shape (n_samples, n_features) |
|
|
|
log_resp : array, shape (n_samples, n_components) |
|
Logarithm of the posterior probabilities (or responsibilities) of |
|
the point of each sample in X. |
|
|
|
log_prob_norm : float |
|
Logarithm of the probability of each sample in X. |
|
|
|
Returns |
|
------- |
|
lower_bound : float |
|
""" |
|
|
|
|
|
(n_features,) = self.mean_prior_.shape |
|
|
|
|
|
|
|
log_det_precisions_chol = _compute_log_det_cholesky( |
|
self.precisions_cholesky_, self.covariance_type, n_features |
|
) - 0.5 * n_features * np.log(self.degrees_of_freedom_) |
|
|
|
if self.covariance_type == "tied": |
|
log_wishart = self.n_components * np.float64( |
|
_log_wishart_norm( |
|
self.degrees_of_freedom_, log_det_precisions_chol, n_features |
|
) |
|
) |
|
else: |
|
log_wishart = np.sum( |
|
_log_wishart_norm( |
|
self.degrees_of_freedom_, log_det_precisions_chol, n_features |
|
) |
|
) |
|
|
|
if self.weight_concentration_prior_type == "dirichlet_process": |
|
log_norm_weight = -np.sum( |
|
betaln(self.weight_concentration_[0], self.weight_concentration_[1]) |
|
) |
|
else: |
|
log_norm_weight = _log_dirichlet_norm(self.weight_concentration_) |
|
|
|
return ( |
|
-np.sum(np.exp(log_resp) * log_resp) |
|
- log_wishart |
|
- log_norm_weight |
|
- 0.5 * n_features * np.sum(np.log(self.mean_precision_)) |
|
) |
|
|
|
def _get_parameters(self): |
|
return ( |
|
self.weight_concentration_, |
|
self.mean_precision_, |
|
self.means_, |
|
self.degrees_of_freedom_, |
|
self.covariances_, |
|
self.precisions_cholesky_, |
|
) |
|
|
|
def _set_parameters(self, params): |
|
( |
|
self.weight_concentration_, |
|
self.mean_precision_, |
|
self.means_, |
|
self.degrees_of_freedom_, |
|
self.covariances_, |
|
self.precisions_cholesky_, |
|
) = params |
|
|
|
|
|
if self.weight_concentration_prior_type == "dirichlet_process": |
|
weight_dirichlet_sum = ( |
|
self.weight_concentration_[0] + self.weight_concentration_[1] |
|
) |
|
tmp = self.weight_concentration_[1] / weight_dirichlet_sum |
|
self.weights_ = ( |
|
self.weight_concentration_[0] |
|
/ weight_dirichlet_sum |
|
* np.hstack((1, np.cumprod(tmp[:-1]))) |
|
) |
|
self.weights_ /= np.sum(self.weights_) |
|
else: |
|
self.weights_ = self.weight_concentration_ / np.sum( |
|
self.weight_concentration_ |
|
) |
|
|
|
|
|
if self.covariance_type == "full": |
|
self.precisions_ = np.array( |
|
[ |
|
np.dot(prec_chol, prec_chol.T) |
|
for prec_chol in self.precisions_cholesky_ |
|
] |
|
) |
|
|
|
elif self.covariance_type == "tied": |
|
self.precisions_ = np.dot( |
|
self.precisions_cholesky_, self.precisions_cholesky_.T |
|
) |
|
else: |
|
self.precisions_ = self.precisions_cholesky_**2 |
|
|