# Temporary file separated from _distribution_infrastructure.py # to simplify the diff during PR review. from abc import ABC, abstractmethod class _ProbabilityDistribution(ABC): @abstractmethod def support(self): r"""Support of the random variable The support of a random variable is set of all possible outcomes; i.e., the subset of the domain of argument :math:`x` for which the probability density function :math:`f(x)` is nonzero. This function returns lower and upper bounds of the support. Returns ------- out : tuple of Array The lower and upper bounds of the support. See Also -------- pdf References ---------- .. [1] Support (mathematics), *Wikipedia*, https://en.wikipedia.org/wiki/Support_(mathematics) Notes ----- Suppose a continuous probability distribution has support ``(l, r)``. The following table summarizes the value returned by methods of ``ContinuousDistribution`` for arguments outside the support. +----------------+---------------------+---------------------+ | Method | Value for ``x < l`` | Value for ``x > r`` | +================+=====================+=====================+ | ``pdf(x)`` | 0 | 0 | +----------------+---------------------+---------------------+ | ``logpdf(x)`` | -inf | -inf | +----------------+---------------------+---------------------+ | ``cdf(x)`` | 0 | 1 | +----------------+---------------------+---------------------+ | ``logcdf(x)`` | -inf | 0 | +----------------+---------------------+---------------------+ | ``ccdf(x)`` | 1 | 0 | +----------------+---------------------+---------------------+ | ``logccdf(x)`` | 0 | -inf | +----------------+---------------------+---------------------+ For the ``cdf`` and related methods, the inequality need not be strict; i.e. the tabulated value is returned when the method is evaluated *at* the corresponding boundary. The following table summarizes the value returned by the inverse methods of ``ContinuousDistribution`` for arguments at the boundaries of the domain ``0`` to ``1``. +-------------+-----------+-----------+ | Method | ``x = 0`` | ``x = 1`` | +=============+===========+===========+ | ``icdf(x)`` | ``l`` | ``r`` | +-------------+-----------+-----------+ | ``icdf(x)`` | ``r`` | ``l`` | +-------------+-----------+-----------+ For the inverse log-functions, the same values are returned for for ``x = log(0)`` and ``x = log(1)``. All inverse functions return ``nan`` when evaluated at an argument outside the domain ``0`` to ``1``. Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Retrieve the support of the distribution: >>> X.support() (-0.5, 0.5) For a distribution with infinite support, >>> X = stats.Normal() >>> X.support() (-inf, inf) Due to underflow, the numerical value returned by the PDF may be zero even for arguments within the support, even if the true value is nonzero. In such cases, the log-PDF may be useful. >>> X.pdf([-100., 100.]) array([0., 0.]) >>> X.logpdf([-100., 100.]) array([-5000.91893853, -5000.91893853]) Use cases for the log-CDF and related methods are analogous. """ raise NotImplementedError() @abstractmethod def sample(self, shape, *, method, rng): r"""Random sample from the distribution. Parameters ---------- shape : tuple of ints, default: () The shape of the sample to draw. If the parameters of the distribution underlying the random variable are arrays of shape ``param_shape``, the output array will be of shape ``shape + param_shape``. method : {None, 'formula', 'inverse_transform'} The strategy used to produce the sample. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: an implementation specific to the distribution - ``'inverse_transform'``: generate a uniformly distributed sample and return the inverse CDF at these arguments. Not all `method` options are available for all distributions. If the selected `method` is not available, a `NotImplementedError`` will be raised. rng : `numpy.random.Generator` or `scipy.stats.QMCEngine`, optional Pseudo- or quasi-random number generator state. When `rng` is None, a new `numpy.random.Generator` is created using entropy from the operating system. Types other than `numpy.random.Generator` and `scipy.stats.QMCEngine` are passed to `numpy.random.default_rng` to instantiate a ``Generator``. If `rng` is an instance of `scipy.stats.QMCEngine` configured to use scrambling and `shape` is not empty, then each slice along the zeroth axis of the result is a "quasi-independent", low-discrepancy sequence; that is, they are distinct sequences that can be treated as statistically independent for most practical purposes. Separate calls to `sample` produce new quasi-independent, low-discrepancy sequences. References ---------- .. [1] Sampling (statistics), *Wikipedia*, https://en.wikipedia.org/wiki/Sampling_(statistics) Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=0., b=1.) Generate a pseudorandom sample: >>> x = X.sample((1000, 1)) >>> octiles = (np.arange(8) + 1) / 8 >>> np.count_nonzero(x <= octiles, axis=0) array([ 148, 263, 387, 516, 636, 751, 865, 1000]) # may vary >>> X = stats.Uniform(a=np.zeros((3, 1)), b=np.ones(2)) >>> X.a.shape, (3, 2) >>> x = X.sample(shape=(5, 4)) >>> x.shape (5, 4, 3, 2) """ raise NotImplementedError() @abstractmethod def moment(self, order, kind, *, method): r"""Raw, central, or standard moment of positive integer order. In terms of probability density function :math:`f(x)` and support :math:`\chi`, the "raw" moment (about the origin) of order :math:`n` of a random variable :math:`X` is: .. math:: \mu'_n(X) = \int_{\chi} x^n f(x) dx The "central" moment is the raw moment taken about the mean, :math:`\mu = \mu'_1`: .. math:: \mu_n(X) = \int_{\chi} (x - \mu) ^n f(x) dx The "standardized" moment is the central moment normalized by the :math:`n^\text{th}` power of the standard deviation :math:`\sigma = \sqrt{\mu_2}` to produce a scale invariant quantity: .. math:: \tilde{\mu}_n(X) = \frac{\mu_n(X)} {\sigma^n} Parameters ---------- order : int The integer order of the moment; i.e. :math:`n` in the formulae above. kind : {'raw', 'central', 'standardized'} Whether to return the raw (default), central, or standardized moment defined above. method : {None, 'formula', 'general', 'transform', 'normalize', 'quadrature', 'cache'} The strategy used to evaluate the moment. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'cache'``: use the value of the moment most recently calculated via another method - ``'formula'``: use a formula for the moment itself - ``'general'``: use a general result that is true for all distributions with finite moments; for instance, the zeroth raw moment is identically 1 - ``'transform'``: transform a raw moment to a central moment or vice versa (see Notes) - ``'normalize'``: normalize a central moment to get a standardized or vice versa - ``'quadrature'``: numerically integrate according to the definition Not all `method` options are available for all orders, kinds, and distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The moment of the random variable of the specified order and kind. See Also -------- pdf mean variance standard_deviation skewness kurtosis Notes ----- Not all distributions have finite moments of all orders; moments of some orders may be undefined or infinite. If a formula for the moment is not specifically implemented for the chosen distribution, SciPy will attempt to compute the moment via a generic method, which may yield a finite result where none exists. This is not a critical bug, but an opportunity for an enhancement. The definition of a raw moment in the summary is specific to the raw moment about the origin. The raw moment about any point :math:`a` is: .. math:: E[(X-a)^n] = \int_{\chi} (x-a)^n f(x) dx In this notation, a raw moment about the origin is :math:`\mu'_n = E[x^n]`, and a central moment is :math:`\mu_n = E[(x-\mu)^n]`, where :math:`\mu` is the first raw moment; i.e. the mean. The ``'transform'`` method takes advantage of the following relationships between moments taken about different points :math:`a` and :math:`b`. .. math:: E[(X-b)^n] = \sum_{i=0}^n E[(X-a)^i] {n \choose i} (a - b)^{n-i} For instance, to transform the raw moment to the central moment, we let :math:`b = \mu` and :math:`a = 0`. The distribution infrastructure provides flexibility for distribution authors to implement separate formulas for raw moments, central moments, and standardized moments of any order. By default, the moment of the desired order and kind is evaluated from the formula if such a formula is available; if not, the infrastructure uses any formulas that are available rather than resorting directly to numerical integration. For instance, if formulas for the first three raw moments are available and the third standardized moments is desired, the infrastructure will evaluate the raw moments and perform the transforms and standardization required. The decision tree is somewhat complex, but the strategy for obtaining a moment of a given order and kind (possibly as an intermediate step due to the recursive nature of the transform formula above) roughly follows this order of priority: #. Use cache (if order of same moment and kind has been calculated) #. Use formula (if available) #. Transform between raw and central moment and/or normalize to convert between central and standardized moments (if efficient) #. Use a generic result true for most distributions (if available) #. Use quadrature References ---------- .. [1] Moment, *Wikipedia*, https://en.wikipedia.org/wiki/Moment_(mathematics) Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Normal(mu=1., sigma=2.) Evaluate the first raw moment: >>> X.moment(order=1, kind='raw') 1.0 >>> X.moment(order=1, kind='raw') == X.mean() == X.mu True Evaluate the second central moment: >>> X.moment(order=2, kind='central') 4.0 >>> X.moment(order=2, kind='central') == X.variance() == X.sigma**2 True Evaluate the fourth standardized moment: >>> X.moment(order=4, kind='standardized') 3.0 >>> X.moment(order=4, kind='standardized') == X.kurtosis(convention='non-excess') True """ # noqa:E501 raise NotImplementedError() @abstractmethod def mean(self, *, method): r"""Mean (raw first moment about the origin) Parameters ---------- method : {None, 'formula', 'transform', 'quadrature', 'cache'} Method used to calculate the raw first moment. Not all methods are available for all distributions. See `moment` for details. See Also -------- moment median mode References ---------- .. [1] Mean, *Wikipedia*, https://en.wikipedia.org/wiki/Mean#Mean_of_a_probability_distribution Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Normal(mu=1., sigma=2.) Evaluate the variance: >>> X.mean() 1.0 >>> X.mean() == X.moment(order=1, kind='raw') == X.mu True """ raise NotImplementedError() @abstractmethod def median(self, *, method): r"""Median (50th percentil) If a continuous random variable :math:`X` has probability :math:`0.5` of taking on a value less than :math:`m`, then :math:`m` is the median. That is, the median is the value :math:`m` for which: .. math:: P(X ≤ m) = 0.5 = P(X ≥ m) Parameters ---------- method : {None, 'formula', 'icdf'} The strategy used to evaluate the median. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the median - ``'icdf'``: evaluate the inverse CDF of 0.5 Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The median See Also -------- mean mode icdf References ---------- .. [1] Median, *Wikipedia*, https://en.wikipedia.org/wiki/Median#Probability_distributions Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Uniform(a=0., b=10.) Compute the median: >>> X.median() np.float64(5.0) >>> X.median() == X.icdf(0.5) == X.iccdf(0.5) True """ raise NotImplementedError() @abstractmethod def mode(self, *, method): r"""Mode (most likely value) Informally, the mode is a value that a random variable has the highest probability (density) of assuming. That is, the mode is the element of the support :math:`\chi` that maximizes the probability density function :math:`f(x)`: .. math:: \text{mode} = \arg\max_{x \in \chi} f(x) Parameters ---------- method : {None, 'formula', 'optimization'} The strategy used to evaluate the mode. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the median - ``'optimization'``: numerically maximize the PDF Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The mode See Also -------- mean median pdf Notes ----- For some distributions #. the mode is not unique (e.g. the uniform distribution); #. the PDF has one or more singularities, and it is debateable whether a singularity is considered to be in the domain and called the mode (e.g. the gamma distribution with shape parameter less than 1); and/or #. the probability density function may have one or more local maxima that are not a global maximum (e.g. mixture distributions). In such cases, `mode` will #. return a single value, #. consider the mode to occur at a singularity, and/or #. return a local maximum which may or may not be a global maximum. If a formula for the mode is not specifically implemented for the chosen distribution, SciPy will attempt to compute the mode numerically, which may not meet the user's preferred definition of a mode. In such cases, the user is encouraged to subclass the distribution and override ``mode``. References ---------- .. [1] Mode (statistics), *Wikipedia*, https://en.wikipedia.org/wiki/Mode_(statistics) Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Normal(mu=1., sigma=2.) Evaluate the mode: >>> X.mode() 1.0 If the mode is not uniquely defined, ``mode`` nonetheless returns a single value. >>> X = stats.Uniform(a=0., b=1.) >>> X.mode() 0.5 If this choice does not satisfy your requirements, subclass the distribution and override ``mode``: >>> class BetterUniform(stats.Uniform): ... def mode(self): ... return self.b >>> X = BetterUniform(a=0., b=1.) >>> X.mode() 1.0 """ raise NotImplementedError() @abstractmethod def variance(self, *, method): r"""Variance (central second moment) Parameters ---------- method : {None, 'formula', 'transform', 'normalize', 'quadrature', 'cache'} Method used to calculate the central second moment. Not all methods are available for all distributions. See `moment` for details. See Also -------- moment standard_deviation mean References ---------- .. [1] Variance, *Wikipedia*, https://en.wikipedia.org/wiki/Variance#Absolutely_continuous_random_variable Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Normal(mu=1., sigma=2.) Evaluate the variance: >>> X.variance() 4.0 >>> X.variance() == X.moment(order=2, kind='central') == X.sigma**2 True """ raise NotImplementedError() @abstractmethod def standard_deviation(self, *, method): r"""Standard deviation (square root of the second central moment) Parameters ---------- method : {None, 'formula', 'transform', 'normalize', 'quadrature', 'cache'} Method used to calculate the central second moment. Not all methods are available for all distributions. See `moment` for details. See Also -------- variance mean moment References ---------- .. [1] Standard deviation, *Wikipedia*, https://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Normal(mu=1., sigma=2.) Evaluate the standard deviation: >>> X.standard_deviation() 2.0 >>> X.standard_deviation() == X.moment(order=2, kind='central')**0.5 == X.sigma True """ raise NotImplementedError() @abstractmethod def skewness(self, *, method): r"""Skewness (standardized third moment) Parameters ---------- method : {None, 'formula', 'general', 'transform', 'normalize', 'cache'} Method used to calculate the standardized third moment. Not all methods are available for all distributions. See `moment` for details. See Also -------- moment mean variance References ---------- .. [1] Skewness, *Wikipedia*, https://en.wikipedia.org/wiki/Skewness Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Normal(mu=1., sigma=2.) Evaluate the skewness: >>> X.skewness() 0.0 >>> X.skewness() == X.moment(order=3, kind='standardized') True """ raise NotImplementedError() @abstractmethod def kurtosis(self, *, method): r"""Kurtosis (standardized fourth moment) By default, this is the standardized fourth moment, also known as the "non-excess" or "Pearson" kurtosis (e.g. the kurtosis of the normal distribution is 3). The "excess" or "Fisher" kurtosis (the standardized fourth moment minus 3) is available via the `convention` parameter. Parameters ---------- method : {None, 'formula', 'general', 'transform', 'normalize', 'cache'} Method used to calculate the standardized fourth moment. Not all methods are available for all distributions. See `moment` for details. convention : {'non-excess', 'excess'} Two distinct conventions are available: - ``'non-excess'``: the standardized fourth moment (Pearson's kurtosis) - ``'excess'``: the standardized fourth moment minus 3 (Fisher's kurtosis) The default is ``'non-excess'``. See Also -------- moment mean variance References ---------- .. [1] Kurtosis, *Wikipedia*, https://en.wikipedia.org/wiki/Kurtosis Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Normal(mu=1., sigma=2.) Evaluate the kurtosis: >>> X.kurtosis() 3.0 >>> (X.kurtosis() ... == X.kurtosis(convention='excess') + 3. ... == X.moment(order=4, kind='standardized')) True """ raise NotImplementedError() @abstractmethod def pdf(self, x, /, *, method): r"""Probability density function The probability density function ("PDF"), denoted :math:`f(x)`, is the probability *per unit length* that the random variable will assume the value :math:`x`. Mathematically, it can be defined as the derivative of the cumulative distribution function :math:`F(x)`: .. math:: f(x) = \frac{d}{dx} F(x) `pdf` accepts `x` for :math:`x`. Parameters ---------- x : array_like The argument of the PDF. method : {None, 'formula', 'logexp'} The strategy used to evaluate the PDF. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the PDF itself - ``'logexp'``: evaluate the log-PDF and exponentiate Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The PDF evaluated at the argument `x`. See Also -------- cdf logpdf Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. By definition of the support, the PDF evaluates to its minimum value of :math:`0` outside the support; i.e. for :math:`x < l` or :math:`x > r`. The maximum of the PDF may be less than or greater than :math:`1`; since the valus is a probability *density*, only its integral over the support must equal :math:`1`. References ---------- .. [1] Probability density function, *Wikipedia*, https://en.wikipedia.org/wiki/Probability_density_function Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Uniform(a=-1., b=1.) Evaluate the PDF at the desired argument: >>> X.pdf(0.25) 0.5 """ raise NotImplementedError() @abstractmethod def logpdf(self, x, /, *, method): r"""Log of the probability density function The probability density function ("PDF"), denoted :math:`f(x)`, is the probability *per unit length* that the random variable will assume the value :math:`x`. Mathematically, it can be defined as the derivative of the cumulative distribution function :math:`F(x)`: .. math:: f(x) = \frac{d}{dx} F(x) `logpdf` computes the logarithm of the probability density function ("log-PDF"), :math:`\log(f(x))`, but it may be numerically favorable compared to the naive implementation (computing :math:`f(x)` and taking the logarithm). `logpdf` accepts `x` for :math:`x`. Parameters ---------- x : array_like The argument of the log-PDF. method : {None, 'formula', 'logexp'} The strategy used to evaluate the log-PDF. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the log-PDF itself - ``'logexp'``: evaluate the PDF and takes its logarithm Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The log-PDF evaluated at the argument `x`. See Also -------- pdf logcdf Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. By definition of the support, the log-PDF evaluates to its minimum value of :math:`-\infty` (i.e. :math:`\log(0)`) outside the support; i.e. for :math:`x < l` or :math:`x > r`. The maximum of the log-PDF may be less than or greater than :math:`\log(1) = 0` because the maximum of the PDF can be any positive real. For distributions with infinite support, it is common for `pdf` to return a value of ``0`` when the argument is theoretically within the support; this can occur because the true value of the PDF is too small to be represented by the chosen dtype. The log-PDF, however, will often be finite (not ``-inf``) over a much larger domain. Consequently, it may be preferred to work with the logarithms of probabilities and probability densities to avoid underflow. References ---------- .. [1] Probability density function, *Wikipedia*, https://en.wikipedia.org/wiki/Probability_density_function Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-1.0, b=1.0) Evaluate the log-PDF at the desired argument: >>> X.logpdf(0.5) -0.6931471805599453 >>> np.allclose(X.logpdf(0.5), np.log(X.pdf(0.5))) True """ raise NotImplementedError() @abstractmethod def cdf(self, x, y, /, *, method): r"""Cumulative distribution function The cumulative distribution function ("CDF"), denoted :math:`F(x)`, is the probability the random variable :math:`X` will assume a value less than or equal to :math:`x`: .. math:: F(x) = P(X ≤ x) A two-argument variant of this function is also defined as the probability the random variable :math:`X` will assume a value between :math:`x` and :math:`y`. .. math:: F(x, y) = P(x ≤ X ≤ y) `cdf` accepts `x` for :math:`x` and `y` for :math:`y`. Parameters ---------- x, y : array_like The arguments of the CDF. `x` is required; `y` is optional. method : {None, 'formula', 'logexp', 'complement', 'quadrature', 'subtraction'} The strategy used to evaluate the CDF. By default (``None``), the one-argument form of the function chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the CDF itself - ``'logexp'``: evaluate the log-CDF and exponentiate - ``'complement'``: evaluate the CCDF and take the complement - ``'quadrature'``: numerically integrate the PDF In place of ``'complement'``, the two-argument form accepts: - ``'subtraction'``: compute the CDF at each argument and take the difference. Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The CDF evaluated at the provided argument(s). See Also -------- logcdf ccdf Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. The CDF :math:`F(x)` is related to the probability density function :math:`f(x)` by: .. math:: F(x) = \int_l^x f(u) du The two argument version is: .. math:: F(x, y) = \int_x^y f(u) du = F(y) - F(x) The CDF evaluates to its minimum value of :math:`0` for :math:`x ≤ l` and its maximum value of :math:`1` for :math:`x ≥ r`. The CDF is also known simply as the "distribution function". References ---------- .. [1] Cumulative distribution function, *Wikipedia*, https://en.wikipedia.org/wiki/Cumulative_distribution_function Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Evaluate the CDF at the desired argument: >>> X.cdf(0.25) 0.75 Evaluate the cumulative probability between two arguments: >>> X.cdf(-0.25, 0.25) == X.cdf(0.25) - X.cdf(-0.25) True """ # noqa: E501 raise NotImplementedError() @abstractmethod def icdf(self, p, /, *, method): r"""Inverse of the cumulative distribution function. The inverse of the cumulative distribution function ("inverse CDF"), denoted :math:`F^{-1}(p)`, is the argument :math:`x` for which the cumulative distribution function :math:`F(x)` evaluates to :math:`p`. .. math:: F^{-1}(p) = x \quad \text{s.t.} \quad F(x) = p `icdf` accepts `p` for :math:`p \in [0, 1]`. Parameters ---------- p : array_like The argument of the inverse CDF. method : {None, 'formula', 'complement', 'inversion'} The strategy used to evaluate the inverse CDF. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the inverse CDF itself - ``'complement'``: evaluate the inverse CCDF at the complement of `p` - ``'inversion'``: solve numerically for the argument at which the CDF is equal to `p` Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The inverse CDF evaluated at the provided argument. See Also -------- cdf ilogcdf Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. The inverse CDF returns its minimum value of :math:`l` at :math:`p = 0` and its maximum value of :math:`r` at :math:`p = 1`. Because the CDF has range :math:`[0, 1]`, the inverse CDF is only defined on the domain :math:`[0, 1]`; for :math:`p < 0` and :math:`p > 1`, `icdf` returns ``nan``. The inverse CDF is also known as the quantile function, percentile function, and percent-point function. References ---------- .. [1] Quantile function, *Wikipedia*, https://en.wikipedia.org/wiki/Quantile_function Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Evaluate the inverse CDF at the desired argument: >>> X.icdf(0.25) -0.25 >>> np.allclose(X.cdf(X.icdf(0.25)), 0.25) True This function returns NaN when the argument is outside the domain. >>> X.icdf([-0.1, 0, 1, 1.1]) array([ nan, -0.5, 0.5, nan]) """ raise NotImplementedError() @abstractmethod def ccdf(self, x, y, /, *, method): r"""Complementary cumulative distribution function The complementary cumulative distribution function ("CCDF"), denoted :math:`G(x)`, is the complement of the cumulative distribution function :math:`F(x)`; i.e., probability the random variable :math:`X` will assume a value greater than :math:`x`: .. math:: G(x) = 1 - F(x) = P(X > x) A two-argument variant of this function is: .. math:: G(x, y) = 1 - F(x, y) = P(X < x \text{ or } X > y) `ccdf` accepts `x` for :math:`x` and `y` for :math:`y`. Parameters ---------- x, y : array_like The arguments of the CCDF. `x` is required; `y` is optional. method : {None, 'formula', 'logexp', 'complement', 'quadrature', 'addition'} The strategy used to evaluate the CCDF. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the CCDF itself - ``'logexp'``: evaluate the log-CCDF and exponentiate - ``'complement'``: evaluate the CDF and take the complement - ``'quadrature'``: numerically integrate the PDF The two-argument form chooses between: - ``'formula'``: use a formula for the CCDF itself - ``'addition'``: compute the CDF at `x` and the CCDF at `y`, then add Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The CCDF evaluated at the provided argument(s). See Also -------- cdf logccdf Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. The CCDF :math:`G(x)` is related to the probability density function :math:`f(x)` by: .. math:: G(x) = \int_x^r f(u) du The two argument version is: .. math:: G(x, y) = \int_l^x f(u) du + \int_y^r f(u) du The CCDF returns its minimum value of :math:`0` for :math:`x ≥ r` and its maximum value of :math:`1` for :math:`x ≤ l`. The CCDF is also known as the "survival function". References ---------- .. [1] Cumulative distribution function, *Wikipedia*, https://en.wikipedia.org/wiki/Cumulative_distribution_function#Derived_functions Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Evaluate the CCDF at the desired argument: >>> X.ccdf(0.25) 0.25 >>> np.allclose(X.ccdf(0.25), 1-X.cdf(0.25)) True Evaluate the complement of the cumulative probability between two arguments: >>> X.ccdf(-0.25, 0.25) == X.cdf(-0.25) + X.ccdf(0.25) True """ # noqa: E501 raise NotImplementedError() @abstractmethod def iccdf(self, p, /, *, method): r"""Inverse complementary cumulative distribution function. The inverse complementary cumulative distribution function ("inverse CCDF"), denoted :math:`G^{-1}(p)`, is the argument :math:`x` for which the complementary cumulative distribution function :math:`G(x)` evaluates to :math:`p`. .. math:: G^{-1}(p) = x \quad \text{s.t.} \quad G(x) = p `iccdf` accepts `p` for :math:`p \in [0, 1]`. Parameters ---------- p : array_like The argument of the inverse CCDF. method : {None, 'formula', 'complement', 'inversion'} The strategy used to evaluate the inverse CCDF. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the inverse CCDF itself - ``'complement'``: evaluate the inverse CDF at the complement of `p` - ``'inversion'``: solve numerically for the argument at which the CCDF is equal to `p` Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The inverse CCDF evaluated at the provided argument. Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. The inverse CCDF returns its minimum value of :math:`l` at :math:`p = 1` and its maximum value of :math:`r` at :math:`p = 0`. Because the CCDF has range :math:`[0, 1]`, the inverse CCDF is only defined on the domain :math:`[0, 1]`; for :math:`p < 0` and :math:`p > 1`, ``iccdf`` returns ``nan``. See Also -------- icdf ilogccdf Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Evaluate the inverse CCDF at the desired argument: >>> X.iccdf(0.25) 0.25 >>> np.allclose(X.iccdf(0.25), X.icdf(1-0.25)) True This function returns NaN when the argument is outside the domain. >>> X.iccdf([-0.1, 0, 1, 1.1]) array([ nan, 0.5, -0.5, nan]) """ raise NotImplementedError() @abstractmethod def logcdf(self, x, y, /, *, method): r"""Log of the cumulative distribution function The cumulative distribution function ("CDF"), denoted :math:`F(x)`, is the probability the random variable :math:`X` will assume a value less than or equal to :math:`x`: .. math:: F(x) = P(X ≤ x) A two-argument variant of this function is also defined as the probability the random variable :math:`X` will assume a value between :math:`x` and :math:`y`. .. math:: F(x, y) = P(x ≤ X ≤ y) `logcdf` computes the logarithm of the cumulative distribution function ("log-CDF"), :math:`\log(F(x))`/:math:`\log(F(x, y))`, but it may be numerically favorable compared to the naive implementation (computing the CDF and taking the logarithm). `logcdf` accepts `x` for :math:`x` and `y` for :math:`y`. Parameters ---------- x, y : array_like The arguments of the log-CDF. `x` is required; `y` is optional. method : {None, 'formula', 'logexp', 'complement', 'quadrature', 'subtraction'} The strategy used to evaluate the log-CDF. By default (``None``), the one-argument form of the function chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the log-CDF itself - ``'logexp'``: evaluate the CDF and take the logarithm - ``'complement'``: evaluate the log-CCDF and take the logarithmic complement (see Notes) - ``'quadrature'``: numerically log-integrate the log-PDF In place of ``'complement'``, the two-argument form accepts: - ``'subtraction'``: compute the log-CDF at each argument and take the logarithmic difference (see Notes) Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The log-CDF evaluated at the provided argument(s). See Also -------- cdf logccdf Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. The log-CDF evaluates to its minimum value of :math:`\log(0) = -\infty` for :math:`x ≤ l` and its maximum value of :math:`\log(1) = 0` for :math:`x ≥ r`. For distributions with infinite support, it is common for `cdf` to return a value of ``0`` when the argument is theoretically within the support; this can occur because the true value of the CDF is too small to be represented by the chosen dtype. `logcdf`, however, will often return a finite (not ``-inf``) result over a much larger domain. Similarly, `logcdf` may provided a strictly negative result with arguments for which `cdf` would return ``1.0``. Consequently, it may be preferred to work with the logarithms of probabilities to avoid underflow and related limitations of floating point numbers. The "logarithmic complement" of a number :math:`z` is mathematically equivalent to :math:`\log(1-\exp(z))`, but it is computed to avoid loss of precision when :math:`\exp(z)` is nearly :math:`0` or :math:`1`. Similarly, the term "logarithmic difference" of :math:`w` and :math:`z` is used here to mean :math:`\log(\exp(w)-\exp(z))`. If ``y < x``, the CDF is negative, and therefore the log-CCDF is complex with imaginary part :math:`\pi`. For consistency, the result of this function always has complex dtype when `y` is provided, regardless of the value of the imaginary part. References ---------- .. [1] Cumulative distribution function, *Wikipedia*, https://en.wikipedia.org/wiki/Cumulative_distribution_function Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Evaluate the log-CDF at the desired argument: >>> X.logcdf(0.25) -0.287682072451781 >>> np.allclose(X.logcdf(0.), np.log(X.cdf(0.))) True """ # noqa: E501 raise NotImplementedError() @abstractmethod def ilogcdf(self, logp, /, *, method): r"""Inverse of the logarithm of the cumulative distribution function. The inverse of the logarithm of the cumulative distribution function ("inverse log-CDF") is the argument :math:`x` for which the logarithm of the cumulative distribution function :math:`\log(F(x))` evaluates to :math:`\log(p)`. Mathematically, it is equivalent to :math:`F^{-1}(\exp(y))`, where :math:`y = \log(p)`, but it may be numerically favorable compared to the naive implementation (computing :math:`p = \exp(y)`, then :math:`F^{-1}(p)`). `ilogcdf` accepts `logp` for :math:`\log(p) ≤ 0`. Parameters ---------- logp : array_like The argument of the inverse log-CDF. method : {None, 'formula', 'complement', 'inversion'} The strategy used to evaluate the inverse log-CDF. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the inverse log-CDF itself - ``'complement'``: evaluate the inverse log-CCDF at the logarithmic complement of `logp` (see Notes) - ``'inversion'``: solve numerically for the argument at which the log-CDF is equal to `logp` Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The inverse log-CDF evaluated at the provided argument. See Also -------- icdf logcdf Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. The inverse log-CDF returns its minimum value of :math:`l` at :math:`\log(p) = \log(0) = -\infty` and its maximum value of :math:`r` at :math:`\log(p) = \log(1) = 0`. Because the log-CDF has range :math:`[-\infty, 0]`, the inverse log-CDF is only defined on the negative reals; for :math:`\log(p) > 0`, `ilogcdf` returns ``nan``. Occasionally, it is needed to find the argument of the CDF for which the resulting probability is very close to ``0`` or ``1`` - too close to represent accurately with floating point arithmetic. In many cases, however, the *logarithm* of this resulting probability may be represented in floating point arithmetic, in which case this function may be used to find the argument of the CDF for which the *logarithm* of the resulting probability is :math:`y = \log(p)`. The "logarithmic complement" of a number :math:`z` is mathematically equivalent to :math:`\log(1-\exp(z))`, but it is computed to avoid loss of precision when :math:`\exp(z)` is nearly :math:`0` or :math:`1`. Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Evaluate the inverse log-CDF at the desired argument: >>> X.ilogcdf(-0.25) 0.2788007830714034 >>> np.allclose(X.ilogcdf(-0.25), X.icdf(np.exp(-0.25))) True """ raise NotImplementedError() @abstractmethod def logccdf(self, x, y, /, *, method): r"""Log of the complementary cumulative distribution function The complementary cumulative distribution function ("CCDF"), denoted :math:`G(x)` is the complement of the cumulative distribution function :math:`F(x)`; i.e., probability the random variable :math:`X` will assume a value greater than :math:`x`: .. math:: G(x) = 1 - F(x) = P(X > x) A two-argument variant of this function is: .. math:: G(x, y) = 1 - F(x, y) = P(X < x \quad \text{or} \quad X > y) `logccdf` computes the logarithm of the complementary cumulative distribution function ("log-CCDF"), :math:`\log(G(x))`/:math:`\log(G(x, y))`, but it may be numerically favorable compared to the naive implementation (computing the CDF and taking the logarithm). `logccdf` accepts `x` for :math:`x` and `y` for :math:`y`. Parameters ---------- x, y : array_like The arguments of the log-CCDF. `x` is required; `y` is optional. method : {None, 'formula', 'logexp', 'complement', 'quadrature', 'addition'} The strategy used to evaluate the log-CCDF. By default (``None``), the one-argument form of the function chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the log CCDF itself - ``'logexp'``: evaluate the CCDF and take the logarithm - ``'complement'``: evaluate the log-CDF and take the logarithmic complement (see Notes) - ``'quadrature'``: numerically log-integrate the log-PDF The two-argument form chooses between: - ``'formula'``: use a formula for the log CCDF itself - ``'addition'``: compute the log-CDF at `x` and the log-CCDF at `y`, then take the logarithmic sum (see Notes) Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The log-CCDF evaluated at the provided argument(s). See Also -------- ccdf logcdf Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. The log-CCDF returns its minimum value of :math:`\log(0)=-\infty` for :math:`x ≥ r` and its maximum value of :math:`\log(1) = 0` for :math:`x ≤ l`. For distributions with infinite support, it is common for `ccdf` to return a value of ``0`` when the argument is theoretically within the support; this can occur because the true value of the CCDF is too small to be represented by the chosen dtype. The log of the CCDF, however, will often be finite (not ``-inf``) over a much larger domain. Similarly, `logccdf` may provided a strictly negative result with arguments for which `ccdf` would return ``1.0``. Consequently, it may be preferred to work with the logarithms of probabilities to avoid underflow and related limitations of floating point numbers. The "logarithmic complement" of a number :math:`z` is mathematically equivalent to :math:`\log(1-\exp(z))`, but it is computed to avoid loss of precision when :math:`\exp(z)` is nearly :math:`0` or :math:`1`. Similarly, the term "logarithmic sum" of :math:`w` and :math:`z` is used here to mean the :math:`\log(\exp(w)+\exp(z))`, AKA :math:`\text{LogSumExp}(w, z)`. References ---------- .. [1] Cumulative distribution function, *Wikipedia*, https://en.wikipedia.org/wiki/Cumulative_distribution_function#Derived_functions Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Evaluate the log-CCDF at the desired argument: >>> X.logccdf(0.25) -1.3862943611198906 >>> np.allclose(X.logccdf(0.), np.log(X.ccdf(0.))) True """ # noqa: E501 raise NotImplementedError() @abstractmethod def ilogccdf(self, logp, /, *, method): r"""Inverse of the log of the complementary cumulative distribution function. The inverse of the logarithm of the complementary cumulative distribution function ("inverse log-CCDF") is the argument :math:`x` for which the logarithm of the complementary cumulative distribution function :math:`\log(G(x))` evaluates to :math:`\log(p)`. Mathematically, it is equivalent to :math:`G^{-1}(\exp(y))`, where :math:`y = \log(p)`, but it may be numerically favorable compared to the naive implementation (computing :math:`p = \exp(y)`, then :math:`G^{-1}(p)`). `ilogccdf` accepts `logp` for :math:`\log(p) ≤ 0`. Parameters ---------- x : array_like The argument of the inverse log-CCDF. method : {None, 'formula', 'complement', 'inversion'} The strategy used to evaluate the inverse log-CCDF. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the inverse log-CCDF itself - ``'complement'``: evaluate the inverse log-CDF at the logarithmic complement of `x` (see Notes) - ``'inversion'``: solve numerically for the argument at which the log-CCDF is equal to `x` Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The inverse log-CCDF evaluated at the provided argument. Notes ----- Suppose a continuous probability distribution has support :math:`[l, r]`. The inverse log-CCDF returns its minimum value of :math:`l` at :math:`\log(p) = \log(1) = 0` and its maximum value of :math:`r` at :math:`\log(p) = \log(0) = -\infty`. Because the log-CCDF has range :math:`[-\infty, 0]`, the inverse log-CDF is only defined on the negative reals; for :math:`\log(p) > 0`, `ilogccdf` returns ``nan``. Occasionally, it is needed to find the argument of the CCDF for which the resulting probability is very close to ``0`` or ``1`` - too close to represent accurately with floating point arithmetic. In many cases, however, the *logarithm* of this resulting probability may be represented in floating point arithmetic, in which case this function may be used to find the argument of the CCDF for which the *logarithm* of the resulting probability is `y = \log(p)`. The "logarithmic complement" of a number :math:`z` is mathematically equivalent to :math:`\log(1-\exp(z))`, but it is computed to avoid loss of precision when :math:`\exp(z)` is nearly :math:`0` or :math:`1`. See Also -------- iccdf ilogccdf Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-0.5, b=0.5) Evaluate the inverse log-CCDF at the desired argument: >>> X.ilogccdf(-0.25) -0.2788007830714034 >>> np.allclose(X.ilogccdf(-0.25), X.iccdf(np.exp(-0.25))) True """ raise NotImplementedError() @abstractmethod def logentropy(self, *, method): r"""Logarithm of the differential entropy In terms of probability density function :math:`f(x)` and support :math:`\chi`, the differential entropy (or simply "entropy") of a random variable :math:`X` is: .. math:: h(X) = - \int_{\chi} f(x) \log f(x) dx `logentropy` computes the logarithm of the differential entropy ("log-entropy"), :math:`log(h(X))`, but it may be numerically favorable compared to the naive implementation (computing :math:`h(X)` then taking the logarithm). Parameters ---------- method : {None, 'formula', 'logexp', 'quadrature} The strategy used to evaluate the log-entropy. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the log-entropy itself - ``'logexp'``: evaluate the entropy and take the logarithm - ``'quadrature'``: numerically log-integrate the logarithm of the entropy integrand Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The log-entropy. See Also -------- entropy logpdf Notes ----- If the entropy of a distribution is negative, then the log-entropy is complex with imaginary part :math:`\pi`. For consistency, the result of this function always has complex dtype, regardless of the value of the imaginary part. References ---------- .. [1] Differential entropy, *Wikipedia*, https://en.wikipedia.org/wiki/Differential_entropy Examples -------- Instantiate a distribution with the desired parameters: >>> import numpy as np >>> from scipy import stats >>> X = stats.Uniform(a=-1., b=1.) Evaluate the log-entropy: >>> X.logentropy() (-0.3665129205816642+0j) >>> np.allclose(np.exp(X.logentropy()), X.entropy()) True For a random variable with negative entropy, the log-entropy has an imaginary part equal to `np.pi`. >>> X = stats.Uniform(a=-.1, b=.1) >>> X.entropy(), X.logentropy() (-1.6094379124341007, (0.4758849953271105+3.141592653589793j)) """ raise NotImplementedError() @abstractmethod def entropy(self, *, method): r"""Differential entropy In terms of probability density function :math:`f(x)` and support :math:`\chi`, the differential entropy (or simply "entropy") of a continuous random variable :math:`X` is: .. math:: h(X) = - \int_{\chi} f(x) \log f(x) dx Parameters ---------- method : {None, 'formula', 'logexp', 'quadrature'} The strategy used to evaluate the entropy. By default (``None``), the infrastructure chooses between the following options, listed in order of precedence. - ``'formula'``: use a formula for the entropy itself - ``'logexp'``: evaluate the log-entropy and exponentiate - ``'quadrature'``: use numerical integration Not all `method` options are available for all distributions. If the selected `method` is not available, a ``NotImplementedError`` will be raised. Returns ------- out : array The entropy of the random variable. See Also -------- logentropy pdf Notes ----- This function calculates the entropy using the natural logarithm; i.e. the logarithm with base :math:`e`. Consequently, the value is expressed in (dimensionless) "units" of nats. To convert the entropy to different units (i.e. corresponding with a different base), divide the result by the natural logarithm of the desired base. References ---------- .. [1] Differential entropy, *Wikipedia*, https://en.wikipedia.org/wiki/Differential_entropy Examples -------- Instantiate a distribution with the desired parameters: >>> from scipy import stats >>> X = stats.Uniform(a=-1., b=1.) Evaluate the entropy: >>> X.entropy() 0.6931471805599454 """ raise NotImplementedError()