|
import warnings |
|
import numpy as np |
|
from itertools import combinations, permutations, product |
|
from collections.abc import Sequence |
|
from dataclasses import dataclass, field |
|
import inspect |
|
|
|
from scipy._lib._util import (check_random_state, _rename_parameter, rng_integers, |
|
_transition_to_rng) |
|
from scipy._lib._array_api import array_namespace, is_numpy, xp_moveaxis_to_end |
|
from scipy.special import ndtr, ndtri, comb, factorial |
|
|
|
from ._common import ConfidenceInterval |
|
from ._axis_nan_policy import _broadcast_concatenate, _broadcast_arrays |
|
from ._warnings_errors import DegenerateDataWarning |
|
|
|
__all__ = ['bootstrap', 'monte_carlo_test', 'permutation_test'] |
|
|
|
|
|
def _vectorize_statistic(statistic): |
|
"""Vectorize an n-sample statistic""" |
|
|
|
|
|
def stat_nd(*data, axis=0): |
|
lengths = [sample.shape[axis] for sample in data] |
|
split_indices = np.cumsum(lengths)[:-1] |
|
z = _broadcast_concatenate(data, axis) |
|
|
|
|
|
|
|
|
|
z = np.moveaxis(z, axis, 0) |
|
|
|
def stat_1d(z): |
|
data = np.split(z, split_indices) |
|
return statistic(*data) |
|
|
|
return np.apply_along_axis(stat_1d, 0, z)[()] |
|
return stat_nd |
|
|
|
|
|
def _jackknife_resample(sample, batch=None): |
|
"""Jackknife resample the sample. Only one-sample stats for now.""" |
|
n = sample.shape[-1] |
|
batch_nominal = batch or n |
|
|
|
for k in range(0, n, batch_nominal): |
|
|
|
batch_actual = min(batch_nominal, n-k) |
|
|
|
|
|
j = np.ones((batch_actual, n), dtype=bool) |
|
np.fill_diagonal(j[:, k:k+batch_actual], False) |
|
i = np.arange(n) |
|
i = np.broadcast_to(i, (batch_actual, n)) |
|
i = i[j].reshape((batch_actual, n-1)) |
|
|
|
resamples = sample[..., i] |
|
yield resamples |
|
|
|
|
|
def _bootstrap_resample(sample, n_resamples=None, rng=None): |
|
"""Bootstrap resample the sample.""" |
|
n = sample.shape[-1] |
|
|
|
|
|
i = rng_integers(rng, 0, n, (n_resamples, n)) |
|
|
|
resamples = sample[..., i] |
|
return resamples |
|
|
|
|
|
def _percentile_of_score(a, score, axis): |
|
"""Vectorized, simplified `scipy.stats.percentileofscore`. |
|
Uses logic of the 'mean' value of percentileofscore's kind parameter. |
|
|
|
Unlike `stats.percentileofscore`, the percentile returned is a fraction |
|
in [0, 1]. |
|
""" |
|
B = a.shape[axis] |
|
return ((a < score).sum(axis=axis) + (a <= score).sum(axis=axis)) / (2 * B) |
|
|
|
|
|
def _percentile_along_axis(theta_hat_b, alpha): |
|
"""`np.percentile` with different percentile for each slice.""" |
|
|
|
|
|
|
|
shape = theta_hat_b.shape[:-1] |
|
alpha = np.broadcast_to(alpha, shape) |
|
percentiles = np.zeros_like(alpha, dtype=np.float64) |
|
for indices, alpha_i in np.ndenumerate(alpha): |
|
if np.isnan(alpha_i): |
|
|
|
msg = ( |
|
"The BCa confidence interval cannot be calculated." |
|
" This problem is known to occur when the distribution" |
|
" is degenerate or the statistic is np.min." |
|
) |
|
warnings.warn(DegenerateDataWarning(msg), stacklevel=3) |
|
percentiles[indices] = np.nan |
|
else: |
|
theta_hat_b_i = theta_hat_b[indices] |
|
percentiles[indices] = np.percentile(theta_hat_b_i, alpha_i) |
|
return percentiles[()] |
|
|
|
|
|
def _bca_interval(data, statistic, axis, alpha, theta_hat_b, batch): |
|
"""Bias-corrected and accelerated interval.""" |
|
|
|
|
|
|
|
theta_hat = np.asarray(statistic(*data, axis=axis))[..., None] |
|
percentile = _percentile_of_score(theta_hat_b, theta_hat, axis=-1) |
|
z0_hat = ndtri(percentile) |
|
|
|
|
|
theta_hat_ji = [] |
|
for j, sample in enumerate(data): |
|
|
|
|
|
|
|
|
|
|
|
samples = [np.expand_dims(sample, -2) for sample in data] |
|
theta_hat_i = [] |
|
for jackknife_sample in _jackknife_resample(sample, batch): |
|
samples[j] = jackknife_sample |
|
broadcasted = _broadcast_arrays(samples, axis=-1) |
|
theta_hat_i.append(statistic(*broadcasted, axis=-1)) |
|
theta_hat_ji.append(theta_hat_i) |
|
|
|
theta_hat_ji = [np.concatenate(theta_hat_i, axis=-1) |
|
for theta_hat_i in theta_hat_ji] |
|
|
|
n_j = [theta_hat_i.shape[-1] for theta_hat_i in theta_hat_ji] |
|
|
|
theta_hat_j_dot = [theta_hat_i.mean(axis=-1, keepdims=True) |
|
for theta_hat_i in theta_hat_ji] |
|
|
|
U_ji = [(n - 1) * (theta_hat_dot - theta_hat_i) |
|
for theta_hat_dot, theta_hat_i, n |
|
in zip(theta_hat_j_dot, theta_hat_ji, n_j)] |
|
|
|
nums = [(U_i**3).sum(axis=-1)/n**3 for U_i, n in zip(U_ji, n_j)] |
|
dens = [(U_i**2).sum(axis=-1)/n**2 for U_i, n in zip(U_ji, n_j)] |
|
a_hat = 1/6 * sum(nums) / sum(dens)**(3/2) |
|
|
|
|
|
z_alpha = ndtri(alpha) |
|
z_1alpha = -z_alpha |
|
num1 = z0_hat + z_alpha |
|
alpha_1 = ndtr(z0_hat + num1/(1 - a_hat*num1)) |
|
num2 = z0_hat + z_1alpha |
|
alpha_2 = ndtr(z0_hat + num2/(1 - a_hat*num2)) |
|
return alpha_1, alpha_2, a_hat |
|
|
|
|
|
def _bootstrap_iv(data, statistic, vectorized, paired, axis, confidence_level, |
|
alternative, n_resamples, batch, method, bootstrap_result, |
|
rng): |
|
"""Input validation and standardization for `bootstrap`.""" |
|
|
|
if vectorized not in {True, False, None}: |
|
raise ValueError("`vectorized` must be `True`, `False`, or `None`.") |
|
|
|
if vectorized is None: |
|
vectorized = 'axis' in inspect.signature(statistic).parameters |
|
|
|
if not vectorized: |
|
statistic = _vectorize_statistic(statistic) |
|
|
|
axis_int = int(axis) |
|
if axis != axis_int: |
|
raise ValueError("`axis` must be an integer.") |
|
|
|
n_samples = 0 |
|
try: |
|
n_samples = len(data) |
|
except TypeError: |
|
raise ValueError("`data` must be a sequence of samples.") |
|
|
|
if n_samples == 0: |
|
raise ValueError("`data` must contain at least one sample.") |
|
|
|
message = ("Ignoring the dimension specified by `axis`, arrays in `data` do not " |
|
"have the same shape. Beginning in SciPy 1.16.0, `bootstrap` will " |
|
"explicitly broadcast elements of `data` to the same shape (ignoring " |
|
"`axis`) before performing the calculation. To avoid this warning in " |
|
"the meantime, ensure that all samples have the same shape (except " |
|
"potentially along `axis`).") |
|
data = [np.atleast_1d(sample) for sample in data] |
|
reduced_shapes = set() |
|
for sample in data: |
|
reduced_shape = list(sample.shape) |
|
reduced_shape.pop(axis) |
|
reduced_shapes.add(tuple(reduced_shape)) |
|
if len(reduced_shapes) != 1: |
|
warnings.warn(message, FutureWarning, stacklevel=3) |
|
|
|
data_iv = [] |
|
for sample in data: |
|
if sample.shape[axis_int] <= 1: |
|
raise ValueError("each sample in `data` must contain two or more " |
|
"observations along `axis`.") |
|
sample = np.moveaxis(sample, axis_int, -1) |
|
data_iv.append(sample) |
|
|
|
if paired not in {True, False}: |
|
raise ValueError("`paired` must be `True` or `False`.") |
|
|
|
if paired: |
|
n = data_iv[0].shape[-1] |
|
for sample in data_iv[1:]: |
|
if sample.shape[-1] != n: |
|
message = ("When `paired is True`, all samples must have the " |
|
"same length along `axis`") |
|
raise ValueError(message) |
|
|
|
|
|
|
|
def statistic(i, axis=-1, data=data_iv, unpaired_statistic=statistic): |
|
data = [sample[..., i] for sample in data] |
|
return unpaired_statistic(*data, axis=axis) |
|
|
|
data_iv = [np.arange(n)] |
|
|
|
confidence_level_float = float(confidence_level) |
|
|
|
alternative = alternative.lower() |
|
alternatives = {'two-sided', 'less', 'greater'} |
|
if alternative not in alternatives: |
|
raise ValueError(f"`alternative` must be one of {alternatives}") |
|
|
|
n_resamples_int = int(n_resamples) |
|
if n_resamples != n_resamples_int or n_resamples_int < 0: |
|
raise ValueError("`n_resamples` must be a non-negative integer.") |
|
|
|
if batch is None: |
|
batch_iv = batch |
|
else: |
|
batch_iv = int(batch) |
|
if batch != batch_iv or batch_iv <= 0: |
|
raise ValueError("`batch` must be a positive integer or None.") |
|
|
|
methods = {'percentile', 'basic', 'bca'} |
|
method = method.lower() |
|
if method not in methods: |
|
raise ValueError(f"`method` must be in {methods}") |
|
|
|
message = "`bootstrap_result` must have attribute `bootstrap_distribution'" |
|
if (bootstrap_result is not None |
|
and not hasattr(bootstrap_result, "bootstrap_distribution")): |
|
raise ValueError(message) |
|
|
|
message = ("Either `bootstrap_result.bootstrap_distribution.size` or " |
|
"`n_resamples` must be positive.") |
|
if ((not bootstrap_result or |
|
not bootstrap_result.bootstrap_distribution.size) |
|
and n_resamples_int == 0): |
|
raise ValueError(message) |
|
|
|
rng = check_random_state(rng) |
|
|
|
return (data_iv, statistic, vectorized, paired, axis_int, |
|
confidence_level_float, alternative, n_resamples_int, batch_iv, |
|
method, bootstrap_result, rng) |
|
|
|
|
|
@dataclass |
|
class BootstrapResult: |
|
"""Result object returned by `scipy.stats.bootstrap`. |
|
|
|
Attributes |
|
---------- |
|
confidence_interval : ConfidenceInterval |
|
The bootstrap confidence interval as an instance of |
|
`collections.namedtuple` with attributes `low` and `high`. |
|
bootstrap_distribution : ndarray |
|
The bootstrap distribution, that is, the value of `statistic` for |
|
each resample. The last dimension corresponds with the resamples |
|
(e.g. ``res.bootstrap_distribution.shape[-1] == n_resamples``). |
|
standard_error : float or ndarray |
|
The bootstrap standard error, that is, the sample standard |
|
deviation of the bootstrap distribution. |
|
|
|
""" |
|
confidence_interval: ConfidenceInterval |
|
bootstrap_distribution: np.ndarray |
|
standard_error: float | np.ndarray |
|
|
|
|
|
@_transition_to_rng('random_state') |
|
def bootstrap(data, statistic, *, n_resamples=9999, batch=None, |
|
vectorized=None, paired=False, axis=0, confidence_level=0.95, |
|
alternative='two-sided', method='BCa', bootstrap_result=None, |
|
rng=None): |
|
r""" |
|
Compute a two-sided bootstrap confidence interval of a statistic. |
|
|
|
When `method` is ``'percentile'`` and `alternative` is ``'two-sided'``, |
|
a bootstrap confidence interval is computed according to the following |
|
procedure. |
|
|
|
1. Resample the data: for each sample in `data` and for each of |
|
`n_resamples`, take a random sample of the original sample |
|
(with replacement) of the same size as the original sample. |
|
|
|
2. Compute the bootstrap distribution of the statistic: for each set of |
|
resamples, compute the test statistic. |
|
|
|
3. Determine the confidence interval: find the interval of the bootstrap |
|
distribution that is |
|
|
|
- symmetric about the median and |
|
- contains `confidence_level` of the resampled statistic values. |
|
|
|
While the ``'percentile'`` method is the most intuitive, it is rarely |
|
used in practice. Two more common methods are available, ``'basic'`` |
|
('reverse percentile') and ``'BCa'`` ('bias-corrected and accelerated'); |
|
they differ in how step 3 is performed. |
|
|
|
If the samples in `data` are taken at random from their respective |
|
distributions :math:`n` times, the confidence interval returned by |
|
`bootstrap` will contain the true value of the statistic for those |
|
distributions approximately `confidence_level`:math:`\, \times \, n` times. |
|
|
|
Parameters |
|
---------- |
|
data : sequence of array-like |
|
Each element of `data` is a sample containing scalar observations from an |
|
underlying distribution. Elements of `data` must be broadcastable to the |
|
same shape (with the possible exception of the dimension specified by `axis`). |
|
|
|
.. versionchanged:: 1.14.0 |
|
`bootstrap` will now emit a ``FutureWarning`` if the shapes of the |
|
elements of `data` are not the same (with the exception of the dimension |
|
specified by `axis`). |
|
Beginning in SciPy 1.16.0, `bootstrap` will explicitly broadcast the |
|
elements to the same shape (except along `axis`) before performing |
|
the calculation. |
|
|
|
statistic : callable |
|
Statistic for which the confidence interval is to be calculated. |
|
`statistic` must be a callable that accepts ``len(data)`` samples |
|
as separate arguments and returns the resulting statistic. |
|
If `vectorized` is set ``True``, |
|
`statistic` must also accept a keyword argument `axis` and be |
|
vectorized to compute the statistic along the provided `axis`. |
|
n_resamples : int, default: ``9999`` |
|
The number of resamples performed to form the bootstrap distribution |
|
of the statistic. |
|
batch : int, optional |
|
The number of resamples to process in each vectorized call to |
|
`statistic`. Memory usage is O( `batch` * ``n`` ), where ``n`` is the |
|
sample size. Default is ``None``, in which case ``batch = n_resamples`` |
|
(or ``batch = max(n_resamples, n)`` for ``method='BCa'``). |
|
vectorized : bool, optional |
|
If `vectorized` is set ``False``, `statistic` will not be passed |
|
keyword argument `axis` and is expected to calculate the statistic |
|
only for 1D samples. If ``True``, `statistic` will be passed keyword |
|
argument `axis` and is expected to calculate the statistic along `axis` |
|
when passed an ND sample array. If ``None`` (default), `vectorized` |
|
will be set ``True`` if ``axis`` is a parameter of `statistic`. Use of |
|
a vectorized statistic typically reduces computation time. |
|
paired : bool, default: ``False`` |
|
Whether the statistic treats corresponding elements of the samples |
|
in `data` as paired. If True, `bootstrap` resamples an array of |
|
*indices* and uses the same indices for all arrays in `data`; otherwise, |
|
`bootstrap` independently resamples the elements of each array. |
|
axis : int, default: ``0`` |
|
The axis of the samples in `data` along which the `statistic` is |
|
calculated. |
|
confidence_level : float, default: ``0.95`` |
|
The confidence level of the confidence interval. |
|
alternative : {'two-sided', 'less', 'greater'}, default: ``'two-sided'`` |
|
Choose ``'two-sided'`` (default) for a two-sided confidence interval, |
|
``'less'`` for a one-sided confidence interval with the lower bound |
|
at ``-np.inf``, and ``'greater'`` for a one-sided confidence interval |
|
with the upper bound at ``np.inf``. The other bound of the one-sided |
|
confidence intervals is the same as that of a two-sided confidence |
|
interval with `confidence_level` twice as far from 1.0; e.g. the upper |
|
bound of a 95% ``'less'`` confidence interval is the same as the upper |
|
bound of a 90% ``'two-sided'`` confidence interval. |
|
method : {'percentile', 'basic', 'bca'}, default: ``'BCa'`` |
|
Whether to return the 'percentile' bootstrap confidence interval |
|
(``'percentile'``), the 'basic' (AKA 'reverse') bootstrap confidence |
|
interval (``'basic'``), or the bias-corrected and accelerated bootstrap |
|
confidence interval (``'BCa'``). |
|
bootstrap_result : BootstrapResult, optional |
|
Provide the result object returned by a previous call to `bootstrap` |
|
to include the previous bootstrap distribution in the new bootstrap |
|
distribution. This can be used, for example, to change |
|
`confidence_level`, change `method`, or see the effect of performing |
|
additional resampling without repeating computations. |
|
rng : `numpy.random.Generator`, optional |
|
Pseudorandom number generator state. When `rng` is None, a new |
|
`numpy.random.Generator` is created using entropy from the |
|
operating system. Types other than `numpy.random.Generator` are |
|
passed to `numpy.random.default_rng` to instantiate a ``Generator``. |
|
|
|
Returns |
|
------- |
|
res : BootstrapResult |
|
An object with attributes: |
|
|
|
confidence_interval : ConfidenceInterval |
|
The bootstrap confidence interval as an instance of |
|
`collections.namedtuple` with attributes `low` and `high`. |
|
bootstrap_distribution : ndarray |
|
The bootstrap distribution, that is, the value of `statistic` for |
|
each resample. The last dimension corresponds with the resamples |
|
(e.g. ``res.bootstrap_distribution.shape[-1] == n_resamples``). |
|
standard_error : float or ndarray |
|
The bootstrap standard error, that is, the sample standard |
|
deviation of the bootstrap distribution. |
|
|
|
Warns |
|
----- |
|
`~scipy.stats.DegenerateDataWarning` |
|
Generated when ``method='BCa'`` and the bootstrap distribution is |
|
degenerate (e.g. all elements are identical). |
|
|
|
Notes |
|
----- |
|
Elements of the confidence interval may be NaN for ``method='BCa'`` if |
|
the bootstrap distribution is degenerate (e.g. all elements are identical). |
|
In this case, consider using another `method` or inspecting `data` for |
|
indications that other analysis may be more appropriate (e.g. all |
|
observations are identical). |
|
|
|
References |
|
---------- |
|
.. [1] B. Efron and R. J. Tibshirani, An Introduction to the Bootstrap, |
|
Chapman & Hall/CRC, Boca Raton, FL, USA (1993) |
|
.. [2] Nathaniel E. Helwig, "Bootstrap Confidence Intervals", |
|
http://users.stat.umn.edu/~helwig/notes/bootci-Notes.pdf |
|
.. [3] Bootstrapping (statistics), Wikipedia, |
|
https://en.wikipedia.org/wiki/Bootstrapping_%28statistics%29 |
|
|
|
Examples |
|
-------- |
|
Suppose we have sampled data from an unknown distribution. |
|
|
|
>>> import numpy as np |
|
>>> rng = np.random.default_rng() |
|
>>> from scipy.stats import norm |
|
>>> dist = norm(loc=2, scale=4) # our "unknown" distribution |
|
>>> data = dist.rvs(size=100, random_state=rng) |
|
|
|
We are interested in the standard deviation of the distribution. |
|
|
|
>>> std_true = dist.std() # the true value of the statistic |
|
>>> print(std_true) |
|
4.0 |
|
>>> std_sample = np.std(data) # the sample statistic |
|
>>> print(std_sample) |
|
3.9460644295563863 |
|
|
|
The bootstrap is used to approximate the variability we would expect if we |
|
were to repeatedly sample from the unknown distribution and calculate the |
|
statistic of the sample each time. It does this by repeatedly resampling |
|
values *from the original sample* with replacement and calculating the |
|
statistic of each resample. This results in a "bootstrap distribution" of |
|
the statistic. |
|
|
|
>>> import matplotlib.pyplot as plt |
|
>>> from scipy.stats import bootstrap |
|
>>> data = (data,) # samples must be in a sequence |
|
>>> res = bootstrap(data, np.std, confidence_level=0.9, rng=rng) |
|
>>> fig, ax = plt.subplots() |
|
>>> ax.hist(res.bootstrap_distribution, bins=25) |
|
>>> ax.set_title('Bootstrap Distribution') |
|
>>> ax.set_xlabel('statistic value') |
|
>>> ax.set_ylabel('frequency') |
|
>>> plt.show() |
|
|
|
The standard error quantifies this variability. It is calculated as the |
|
standard deviation of the bootstrap distribution. |
|
|
|
>>> res.standard_error |
|
0.24427002125829136 |
|
>>> res.standard_error == np.std(res.bootstrap_distribution, ddof=1) |
|
True |
|
|
|
The bootstrap distribution of the statistic is often approximately normal |
|
with scale equal to the standard error. |
|
|
|
>>> x = np.linspace(3, 5) |
|
>>> pdf = norm.pdf(x, loc=std_sample, scale=res.standard_error) |
|
>>> fig, ax = plt.subplots() |
|
>>> ax.hist(res.bootstrap_distribution, bins=25, density=True) |
|
>>> ax.plot(x, pdf) |
|
>>> ax.set_title('Normal Approximation of the Bootstrap Distribution') |
|
>>> ax.set_xlabel('statistic value') |
|
>>> ax.set_ylabel('pdf') |
|
>>> plt.show() |
|
|
|
This suggests that we could construct a 90% confidence interval on the |
|
statistic based on quantiles of this normal distribution. |
|
|
|
>>> norm.interval(0.9, loc=std_sample, scale=res.standard_error) |
|
(3.5442759991341726, 4.3478528599786) |
|
|
|
Due to central limit theorem, this normal approximation is accurate for a |
|
variety of statistics and distributions underlying the samples; however, |
|
the approximation is not reliable in all cases. Because `bootstrap` is |
|
designed to work with arbitrary underlying distributions and statistics, |
|
it uses more advanced techniques to generate an accurate confidence |
|
interval. |
|
|
|
>>> print(res.confidence_interval) |
|
ConfidenceInterval(low=3.57655333533867, high=4.382043696342881) |
|
|
|
If we sample from the original distribution 100 times and form a bootstrap |
|
confidence interval for each sample, the confidence interval |
|
contains the true value of the statistic approximately 90% of the time. |
|
|
|
>>> n_trials = 100 |
|
>>> ci_contains_true_std = 0 |
|
>>> for i in range(n_trials): |
|
... data = (dist.rvs(size=100, random_state=rng),) |
|
... res = bootstrap(data, np.std, confidence_level=0.9, |
|
... n_resamples=999, rng=rng) |
|
... ci = res.confidence_interval |
|
... if ci[0] < std_true < ci[1]: |
|
... ci_contains_true_std += 1 |
|
>>> print(ci_contains_true_std) |
|
88 |
|
|
|
Rather than writing a loop, we can also determine the confidence intervals |
|
for all 100 samples at once. |
|
|
|
>>> data = (dist.rvs(size=(n_trials, 100), random_state=rng),) |
|
>>> res = bootstrap(data, np.std, axis=-1, confidence_level=0.9, |
|
... n_resamples=999, rng=rng) |
|
>>> ci_l, ci_u = res.confidence_interval |
|
|
|
Here, `ci_l` and `ci_u` contain the confidence interval for each of the |
|
``n_trials = 100`` samples. |
|
|
|
>>> print(ci_l[:5]) |
|
[3.86401283 3.33304394 3.52474647 3.54160981 3.80569252] |
|
>>> print(ci_u[:5]) |
|
[4.80217409 4.18143252 4.39734707 4.37549713 4.72843584] |
|
|
|
And again, approximately 90% contain the true value, ``std_true = 4``. |
|
|
|
>>> print(np.sum((ci_l < std_true) & (std_true < ci_u))) |
|
93 |
|
|
|
`bootstrap` can also be used to estimate confidence intervals of |
|
multi-sample statistics. For example, to get a confidence interval |
|
for the difference between means, we write a function that accepts |
|
two sample arguments and returns only the statistic. The use of the |
|
``axis`` argument ensures that all mean calculations are perform in |
|
a single vectorized call, which is faster than looping over pairs |
|
of resamples in Python. |
|
|
|
>>> def my_statistic(sample1, sample2, axis=-1): |
|
... mean1 = np.mean(sample1, axis=axis) |
|
... mean2 = np.mean(sample2, axis=axis) |
|
... return mean1 - mean2 |
|
|
|
Here, we use the 'percentile' method with the default 95% confidence level. |
|
|
|
>>> sample1 = norm.rvs(scale=1, size=100, random_state=rng) |
|
>>> sample2 = norm.rvs(scale=2, size=100, random_state=rng) |
|
>>> data = (sample1, sample2) |
|
>>> res = bootstrap(data, my_statistic, method='basic', rng=rng) |
|
>>> print(my_statistic(sample1, sample2)) |
|
0.16661030792089523 |
|
>>> print(res.confidence_interval) |
|
ConfidenceInterval(low=-0.29087973240818693, high=0.6371338699912273) |
|
|
|
The bootstrap estimate of the standard error is also available. |
|
|
|
>>> print(res.standard_error) |
|
0.238323948262459 |
|
|
|
Paired-sample statistics work, too. For example, consider the Pearson |
|
correlation coefficient. |
|
|
|
>>> from scipy.stats import pearsonr |
|
>>> n = 100 |
|
>>> x = np.linspace(0, 10, n) |
|
>>> y = x + rng.uniform(size=n) |
|
>>> print(pearsonr(x, y)[0]) # element 0 is the statistic |
|
0.9954306665125647 |
|
|
|
We wrap `pearsonr` so that it returns only the statistic, ensuring |
|
that we use the `axis` argument because it is available. |
|
|
|
>>> def my_statistic(x, y, axis=-1): |
|
... return pearsonr(x, y, axis=axis)[0] |
|
|
|
We call `bootstrap` using ``paired=True``. |
|
|
|
>>> res = bootstrap((x, y), my_statistic, paired=True, rng=rng) |
|
>>> print(res.confidence_interval) |
|
ConfidenceInterval(low=0.9941504301315878, high=0.996377412215445) |
|
|
|
The result object can be passed back into `bootstrap` to perform additional |
|
resampling: |
|
|
|
>>> len(res.bootstrap_distribution) |
|
9999 |
|
>>> res = bootstrap((x, y), my_statistic, paired=True, |
|
... n_resamples=1000, rng=rng, |
|
... bootstrap_result=res) |
|
>>> len(res.bootstrap_distribution) |
|
10999 |
|
|
|
or to change the confidence interval options: |
|
|
|
>>> res2 = bootstrap((x, y), my_statistic, paired=True, |
|
... n_resamples=0, rng=rng, bootstrap_result=res, |
|
... method='percentile', confidence_level=0.9) |
|
>>> np.testing.assert_equal(res2.bootstrap_distribution, |
|
... res.bootstrap_distribution) |
|
>>> res.confidence_interval |
|
ConfidenceInterval(low=0.9941574828235082, high=0.9963781698210212) |
|
|
|
without repeating computation of the original bootstrap distribution. |
|
|
|
""" |
|
|
|
args = _bootstrap_iv(data, statistic, vectorized, paired, axis, |
|
confidence_level, alternative, n_resamples, batch, |
|
method, bootstrap_result, rng) |
|
(data, statistic, vectorized, paired, axis, confidence_level, |
|
alternative, n_resamples, batch, method, bootstrap_result, |
|
rng) = args |
|
|
|
theta_hat_b = ([] if bootstrap_result is None |
|
else [bootstrap_result.bootstrap_distribution]) |
|
|
|
batch_nominal = batch or n_resamples or 1 |
|
|
|
for k in range(0, n_resamples, batch_nominal): |
|
batch_actual = min(batch_nominal, n_resamples-k) |
|
|
|
resampled_data = [] |
|
for sample in data: |
|
resample = _bootstrap_resample(sample, n_resamples=batch_actual, |
|
rng=rng) |
|
resampled_data.append(resample) |
|
|
|
|
|
theta_hat_b.append(statistic(*resampled_data, axis=-1)) |
|
theta_hat_b = np.concatenate(theta_hat_b, axis=-1) |
|
|
|
|
|
alpha = ((1 - confidence_level)/2 if alternative == 'two-sided' |
|
else (1 - confidence_level)) |
|
if method == 'bca': |
|
interval = _bca_interval(data, statistic, axis=-1, alpha=alpha, |
|
theta_hat_b=theta_hat_b, batch=batch)[:2] |
|
percentile_fun = _percentile_along_axis |
|
else: |
|
interval = alpha, 1-alpha |
|
|
|
def percentile_fun(a, q): |
|
return np.percentile(a=a, q=q, axis=-1) |
|
|
|
|
|
ci_l = percentile_fun(theta_hat_b, interval[0]*100) |
|
ci_u = percentile_fun(theta_hat_b, interval[1]*100) |
|
if method == 'basic': |
|
theta_hat = statistic(*data, axis=-1) |
|
ci_l, ci_u = 2*theta_hat - ci_u, 2*theta_hat - ci_l |
|
|
|
if alternative == 'less': |
|
ci_l = np.full_like(ci_l, -np.inf) |
|
elif alternative == 'greater': |
|
ci_u = np.full_like(ci_u, np.inf) |
|
|
|
return BootstrapResult(confidence_interval=ConfidenceInterval(ci_l, ci_u), |
|
bootstrap_distribution=theta_hat_b, |
|
standard_error=np.std(theta_hat_b, ddof=1, axis=-1)) |
|
|
|
|
|
def _monte_carlo_test_iv(data, rvs, statistic, vectorized, n_resamples, |
|
batch, alternative, axis): |
|
"""Input validation for `monte_carlo_test`.""" |
|
axis_int = int(axis) |
|
if axis != axis_int: |
|
raise ValueError("`axis` must be an integer.") |
|
|
|
if vectorized not in {True, False, None}: |
|
raise ValueError("`vectorized` must be `True`, `False`, or `None`.") |
|
|
|
if not isinstance(rvs, Sequence): |
|
rvs = (rvs,) |
|
data = (data,) |
|
for rvs_i in rvs: |
|
if not callable(rvs_i): |
|
raise TypeError("`rvs` must be callable or sequence of callables.") |
|
|
|
|
|
|
|
message = "If `rvs` is a sequence, `len(rvs)` must equal `len(data)`." |
|
try: |
|
len(data) |
|
except TypeError as e: |
|
raise ValueError(message) from e |
|
if not len(rvs) == len(data): |
|
raise ValueError(message) |
|
|
|
if not callable(statistic): |
|
raise TypeError("`statistic` must be callable.") |
|
|
|
if vectorized is None: |
|
try: |
|
signature = inspect.signature(statistic).parameters |
|
except ValueError as e: |
|
message = (f"Signature inspection of {statistic=} failed; " |
|
"pass `vectorize` explicitly.") |
|
raise ValueError(message) from e |
|
vectorized = 'axis' in signature |
|
|
|
xp = array_namespace(*data) |
|
|
|
if not vectorized: |
|
if is_numpy(xp): |
|
statistic_vectorized = _vectorize_statistic(statistic) |
|
else: |
|
message = ("`statistic` must be vectorized (i.e. support an `axis` " |
|
f"argument) when `data` contains {xp.__name__} arrays.") |
|
raise ValueError(message) |
|
else: |
|
statistic_vectorized = statistic |
|
|
|
data = _broadcast_arrays(data, axis, xp=xp) |
|
data_iv = [] |
|
for sample in data: |
|
sample = xp.broadcast_to(sample, (1,)) if sample.ndim == 0 else sample |
|
sample = xp_moveaxis_to_end(sample, axis_int, xp=xp) |
|
data_iv.append(sample) |
|
|
|
n_resamples_int = int(n_resamples) |
|
if n_resamples != n_resamples_int or n_resamples_int <= 0: |
|
raise ValueError("`n_resamples` must be a positive integer.") |
|
|
|
if batch is None: |
|
batch_iv = batch |
|
else: |
|
batch_iv = int(batch) |
|
if batch != batch_iv or batch_iv <= 0: |
|
raise ValueError("`batch` must be a positive integer or None.") |
|
|
|
alternatives = {'two-sided', 'greater', 'less'} |
|
alternative = alternative.lower() |
|
if alternative not in alternatives: |
|
raise ValueError(f"`alternative` must be in {alternatives}") |
|
|
|
|
|
min_float = getattr(xp, 'float16', xp.float32) |
|
dtype = xp.result_type(*data_iv, min_float) |
|
|
|
return (data_iv, rvs, statistic_vectorized, vectorized, n_resamples_int, |
|
batch_iv, alternative, axis_int, dtype, xp) |
|
|
|
|
|
@dataclass |
|
class MonteCarloTestResult: |
|
"""Result object returned by `scipy.stats.monte_carlo_test`. |
|
|
|
Attributes |
|
---------- |
|
statistic : float or ndarray |
|
The observed test statistic of the sample. |
|
pvalue : float or ndarray |
|
The p-value for the given alternative. |
|
null_distribution : ndarray |
|
The values of the test statistic generated under the null |
|
hypothesis. |
|
""" |
|
statistic: float | np.ndarray |
|
pvalue: float | np.ndarray |
|
null_distribution: np.ndarray |
|
|
|
|
|
@_rename_parameter('sample', 'data') |
|
def monte_carlo_test(data, rvs, statistic, *, vectorized=None, |
|
n_resamples=9999, batch=None, alternative="two-sided", |
|
axis=0): |
|
r"""Perform a Monte Carlo hypothesis test. |
|
|
|
`data` contains a sample or a sequence of one or more samples. `rvs` |
|
specifies the distribution(s) of the sample(s) in `data` under the null |
|
hypothesis. The value of `statistic` for the given `data` is compared |
|
against a Monte Carlo null distribution: the value of the statistic for |
|
each of `n_resamples` sets of samples generated using `rvs`. This gives |
|
the p-value, the probability of observing such an extreme value of the |
|
test statistic under the null hypothesis. |
|
|
|
Parameters |
|
---------- |
|
data : array-like or sequence of array-like |
|
An array or sequence of arrays of observations. |
|
rvs : callable or tuple of callables |
|
A callable or sequence of callables that generates random variates |
|
under the null hypothesis. Each element of `rvs` must be a callable |
|
that accepts keyword argument ``size`` (e.g. ``rvs(size=(m, n))``) and |
|
returns an N-d array sample of that shape. If `rvs` is a sequence, the |
|
number of callables in `rvs` must match the number of samples in |
|
`data`, i.e. ``len(rvs) == len(data)``. If `rvs` is a single callable, |
|
`data` is treated as a single sample. |
|
statistic : callable |
|
Statistic for which the p-value of the hypothesis test is to be |
|
calculated. `statistic` must be a callable that accepts a sample |
|
(e.g. ``statistic(sample)``) or ``len(rvs)`` separate samples (e.g. |
|
``statistic(samples1, sample2)`` if `rvs` contains two callables and |
|
`data` contains two samples) and returns the resulting statistic. |
|
If `vectorized` is set ``True``, `statistic` must also accept a keyword |
|
argument `axis` and be vectorized to compute the statistic along the |
|
provided `axis` of the samples in `data`. |
|
vectorized : bool, optional |
|
If `vectorized` is set ``False``, `statistic` will not be passed |
|
keyword argument `axis` and is expected to calculate the statistic |
|
only for 1D samples. If ``True``, `statistic` will be passed keyword |
|
argument `axis` and is expected to calculate the statistic along `axis` |
|
when passed ND sample arrays. If ``None`` (default), `vectorized` |
|
will be set ``True`` if ``axis`` is a parameter of `statistic`. Use of |
|
a vectorized statistic typically reduces computation time. |
|
n_resamples : int, default: 9999 |
|
Number of samples drawn from each of the callables of `rvs`. |
|
Equivalently, the number statistic values under the null hypothesis |
|
used as the Monte Carlo null distribution. |
|
batch : int, optional |
|
The number of Monte Carlo samples to process in each call to |
|
`statistic`. Memory usage is O( `batch` * ``sample.size[axis]`` ). Default |
|
is ``None``, in which case `batch` equals `n_resamples`. |
|
alternative : {'two-sided', 'less', 'greater'} |
|
The alternative hypothesis for which the p-value is calculated. |
|
For each alternative, the p-value is defined as follows. |
|
|
|
- ``'greater'`` : the percentage of the null distribution that is |
|
greater than or equal to the observed value of the test statistic. |
|
- ``'less'`` : the percentage of the null distribution that is |
|
less than or equal to the observed value of the test statistic. |
|
- ``'two-sided'`` : twice the smaller of the p-values above. |
|
|
|
axis : int, default: 0 |
|
The axis of `data` (or each sample within `data`) over which to |
|
calculate the statistic. |
|
|
|
Returns |
|
------- |
|
res : MonteCarloTestResult |
|
An object with attributes: |
|
|
|
statistic : float or ndarray |
|
The test statistic of the observed `data`. |
|
pvalue : float or ndarray |
|
The p-value for the given alternative. |
|
null_distribution : ndarray |
|
The values of the test statistic generated under the null |
|
hypothesis. |
|
|
|
.. warning:: |
|
The p-value is calculated by counting the elements of the null |
|
distribution that are as extreme or more extreme than the observed |
|
value of the statistic. Due to the use of finite precision arithmetic, |
|
some statistic functions return numerically distinct values when the |
|
theoretical values would be exactly equal. In some cases, this could |
|
lead to a large error in the calculated p-value. `monte_carlo_test` |
|
guards against this by considering elements in the null distribution |
|
that are "close" (within a relative tolerance of 100 times the |
|
floating point epsilon of inexact dtypes) to the observed |
|
value of the test statistic as equal to the observed value of the |
|
test statistic. However, the user is advised to inspect the null |
|
distribution to assess whether this method of comparison is |
|
appropriate, and if not, calculate the p-value manually. |
|
|
|
References |
|
---------- |
|
|
|
.. [1] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be |
|
Zero: Calculating Exact P-values When Permutations Are Randomly Drawn." |
|
Statistical Applications in Genetics and Molecular Biology 9.1 (2010). |
|
|
|
Examples |
|
-------- |
|
|
|
Suppose we wish to test whether a small sample has been drawn from a normal |
|
distribution. We decide that we will use the skew of the sample as a |
|
test statistic, and we will consider a p-value of 0.05 to be statistically |
|
significant. |
|
|
|
>>> import numpy as np |
|
>>> from scipy import stats |
|
>>> def statistic(x, axis): |
|
... return stats.skew(x, axis) |
|
|
|
After collecting our data, we calculate the observed value of the test |
|
statistic. |
|
|
|
>>> rng = np.random.default_rng() |
|
>>> x = stats.skewnorm.rvs(a=1, size=50, random_state=rng) |
|
>>> statistic(x, axis=0) |
|
0.12457412450240658 |
|
|
|
To determine the probability of observing such an extreme value of the |
|
skewness by chance if the sample were drawn from the normal distribution, |
|
we can perform a Monte Carlo hypothesis test. The test will draw many |
|
samples at random from their normal distribution, calculate the skewness |
|
of each sample, and compare our original skewness against this |
|
distribution to determine an approximate p-value. |
|
|
|
>>> from scipy.stats import monte_carlo_test |
|
>>> # because our statistic is vectorized, we pass `vectorized=True` |
|
>>> rvs = lambda size: stats.norm.rvs(size=size, random_state=rng) |
|
>>> res = monte_carlo_test(x, rvs, statistic, vectorized=True) |
|
>>> print(res.statistic) |
|
0.12457412450240658 |
|
>>> print(res.pvalue) |
|
0.7012 |
|
|
|
The probability of obtaining a test statistic less than or equal to the |
|
observed value under the null hypothesis is ~70%. This is greater than |
|
our chosen threshold of 5%, so we cannot consider this to be significant |
|
evidence against the null hypothesis. |
|
|
|
Note that this p-value essentially matches that of |
|
`scipy.stats.skewtest`, which relies on an asymptotic distribution of a |
|
test statistic based on the sample skewness. |
|
|
|
>>> stats.skewtest(x).pvalue |
|
0.6892046027110614 |
|
|
|
This asymptotic approximation is not valid for small sample sizes, but |
|
`monte_carlo_test` can be used with samples of any size. |
|
|
|
>>> x = stats.skewnorm.rvs(a=1, size=7, random_state=rng) |
|
>>> # stats.skewtest(x) would produce an error due to small sample |
|
>>> res = monte_carlo_test(x, rvs, statistic, vectorized=True) |
|
|
|
The Monte Carlo distribution of the test statistic is provided for |
|
further investigation. |
|
|
|
>>> import matplotlib.pyplot as plt |
|
>>> fig, ax = plt.subplots() |
|
>>> ax.hist(res.null_distribution, bins=50) |
|
>>> ax.set_title("Monte Carlo distribution of test statistic") |
|
>>> ax.set_xlabel("Value of Statistic") |
|
>>> ax.set_ylabel("Frequency") |
|
>>> plt.show() |
|
|
|
""" |
|
args = _monte_carlo_test_iv(data, rvs, statistic, vectorized, |
|
n_resamples, batch, alternative, axis) |
|
(data, rvs, statistic, vectorized, n_resamples, |
|
batch, alternative, axis, dtype, xp) = args |
|
|
|
|
|
observed = xp.asarray(statistic(*data, axis=-1)) |
|
observed = observed[()] if observed.ndim == 0 else observed |
|
|
|
n_observations = [sample.shape[-1] for sample in data] |
|
batch_nominal = batch or n_resamples |
|
null_distribution = [] |
|
for k in range(0, n_resamples, batch_nominal): |
|
batch_actual = min(batch_nominal, n_resamples - k) |
|
resamples = [rvs_i(size=(batch_actual, n_observations_i)) |
|
for rvs_i, n_observations_i in zip(rvs, n_observations)] |
|
null_distribution.append(statistic(*resamples, axis=-1)) |
|
null_distribution = xp.concat(null_distribution) |
|
null_distribution = xp.reshape(null_distribution, [-1] + [1]*observed.ndim) |
|
|
|
|
|
|
|
eps = (0 if not xp.isdtype(observed.dtype, ('real floating')) |
|
else xp.finfo(observed.dtype).eps*100) |
|
gamma = xp.abs(eps * observed) |
|
|
|
def less(null_distribution, observed): |
|
cmps = null_distribution <= observed + gamma |
|
cmps = xp.asarray(cmps, dtype=dtype) |
|
pvalues = (xp.sum(cmps, axis=0, dtype=dtype) + 1.) / (n_resamples + 1.) |
|
return pvalues |
|
|
|
def greater(null_distribution, observed): |
|
cmps = null_distribution >= observed - gamma |
|
cmps = xp.asarray(cmps, dtype=dtype) |
|
pvalues = (xp.sum(cmps, axis=0, dtype=dtype) + 1.) / (n_resamples + 1.) |
|
return pvalues |
|
|
|
def two_sided(null_distribution, observed): |
|
pvalues_less = less(null_distribution, observed) |
|
pvalues_greater = greater(null_distribution, observed) |
|
pvalues = xp.minimum(pvalues_less, pvalues_greater) * 2 |
|
return pvalues |
|
|
|
compare = {"less": less, |
|
"greater": greater, |
|
"two-sided": two_sided} |
|
|
|
pvalues = compare[alternative](null_distribution, observed) |
|
pvalues = xp.clip(pvalues, 0., 1.) |
|
|
|
return MonteCarloTestResult(observed, pvalues, null_distribution) |
|
|
|
|
|
@dataclass |
|
class PowerResult: |
|
"""Result object returned by `scipy.stats.power`. |
|
|
|
Attributes |
|
---------- |
|
power : float or ndarray |
|
The estimated power. |
|
pvalues : float or ndarray |
|
The simulated p-values. |
|
""" |
|
power: float | np.ndarray |
|
pvalues: float | np.ndarray |
|
|
|
|
|
def _wrap_kwargs(fun): |
|
"""Wrap callable to accept arbitrary kwargs and ignore unused ones""" |
|
|
|
try: |
|
keys = set(inspect.signature(fun).parameters.keys()) |
|
except ValueError: |
|
|
|
keys = {'size'} |
|
|
|
|
|
def wrapped_rvs_i(*args, keys=keys, fun=fun, **all_kwargs): |
|
kwargs = {key: val for key, val in all_kwargs.items() |
|
if key in keys} |
|
return fun(*args, **kwargs) |
|
return wrapped_rvs_i |
|
|
|
|
|
def _power_iv(rvs, test, n_observations, significance, vectorized, |
|
n_resamples, batch, kwargs): |
|
"""Input validation for `monte_carlo_test`.""" |
|
|
|
if vectorized not in {True, False, None}: |
|
raise ValueError("`vectorized` must be `True`, `False`, or `None`.") |
|
|
|
if not isinstance(rvs, Sequence): |
|
rvs = (rvs,) |
|
n_observations = (n_observations,) |
|
for rvs_i in rvs: |
|
if not callable(rvs_i): |
|
raise TypeError("`rvs` must be callable or sequence of callables.") |
|
|
|
if not len(rvs) == len(n_observations): |
|
message = ("If `rvs` is a sequence, `len(rvs)` " |
|
"must equal `len(n_observations)`.") |
|
raise ValueError(message) |
|
|
|
significance = np.asarray(significance)[()] |
|
if (not np.issubdtype(significance.dtype, np.floating) |
|
or np.min(significance) < 0 or np.max(significance) > 1): |
|
raise ValueError("`significance` must contain floats between 0 and 1.") |
|
|
|
kwargs = dict() if kwargs is None else kwargs |
|
if not isinstance(kwargs, dict): |
|
raise TypeError("`kwargs` must be a dictionary that maps keywords to arrays.") |
|
|
|
vals = kwargs.values() |
|
keys = kwargs.keys() |
|
|
|
|
|
wrapped_rvs = [_wrap_kwargs(rvs_i) for rvs_i in rvs] |
|
|
|
|
|
|
|
tmp = np.asarray(np.broadcast_arrays(*n_observations, *vals)) |
|
shape = tmp.shape |
|
if tmp.ndim == 1: |
|
tmp = tmp[np.newaxis, :] |
|
else: |
|
tmp = tmp.reshape((shape[0], -1)).T |
|
nobs, vals = tmp[:, :len(rvs)], tmp[:, len(rvs):] |
|
nobs = nobs.astype(int) |
|
|
|
if not callable(test): |
|
raise TypeError("`test` must be callable.") |
|
|
|
if vectorized is None: |
|
vectorized = 'axis' in inspect.signature(test).parameters |
|
|
|
if not vectorized: |
|
test_vectorized = _vectorize_statistic(test) |
|
else: |
|
test_vectorized = test |
|
|
|
test_vectorized = _wrap_kwargs(test_vectorized) |
|
|
|
n_resamples_int = int(n_resamples) |
|
if n_resamples != n_resamples_int or n_resamples_int <= 0: |
|
raise ValueError("`n_resamples` must be a positive integer.") |
|
|
|
if batch is None: |
|
batch_iv = batch |
|
else: |
|
batch_iv = int(batch) |
|
if batch != batch_iv or batch_iv <= 0: |
|
raise ValueError("`batch` must be a positive integer or None.") |
|
|
|
return (wrapped_rvs, test_vectorized, nobs, significance, vectorized, |
|
n_resamples_int, batch_iv, vals, keys, shape[1:]) |
|
|
|
|
|
def power(test, rvs, n_observations, *, significance=0.01, vectorized=None, |
|
n_resamples=10000, batch=None, kwargs=None): |
|
r"""Simulate the power of a hypothesis test under an alternative hypothesis. |
|
|
|
Parameters |
|
---------- |
|
test : callable |
|
Hypothesis test for which the power is to be simulated. |
|
`test` must be a callable that accepts a sample (e.g. ``test(sample)``) |
|
or ``len(rvs)`` separate samples (e.g. ``test(samples1, sample2)`` if |
|
`rvs` contains two callables and `n_observations` contains two values) |
|
and returns the p-value of the test. |
|
If `vectorized` is set to ``True``, `test` must also accept a keyword |
|
argument `axis` and be vectorized to perform the test along the |
|
provided `axis` of the samples. |
|
Any callable from `scipy.stats` with an `axis` argument that returns an |
|
object with a `pvalue` attribute is also acceptable. |
|
rvs : callable or tuple of callables |
|
A callable or sequence of callables that generate(s) random variates |
|
under the alternative hypothesis. Each element of `rvs` must accept |
|
keyword argument ``size`` (e.g. ``rvs(size=(m, n))``) and return an |
|
N-d array of that shape. If `rvs` is a sequence, the number of callables |
|
in `rvs` must match the number of elements of `n_observations`, i.e. |
|
``len(rvs) == len(n_observations)``. If `rvs` is a single callable, |
|
`n_observations` is treated as a single element. |
|
n_observations : tuple of ints or tuple of integer arrays |
|
If a sequence of ints, each is the sizes of a sample to be passed to `test`. |
|
If a sequence of integer arrays, the power is simulated for each |
|
set of corresponding sample sizes. See Examples. |
|
significance : float or array_like of floats, default: 0.01 |
|
The threshold for significance; i.e., the p-value below which the |
|
hypothesis test results will be considered as evidence against the null |
|
hypothesis. Equivalently, the acceptable rate of Type I error under |
|
the null hypothesis. If an array, the power is simulated for each |
|
significance threshold. |
|
kwargs : dict, optional |
|
Keyword arguments to be passed to `rvs` and/or `test` callables. |
|
Introspection is used to determine which keyword arguments may be |
|
passed to each callable. |
|
The value corresponding with each keyword must be an array. |
|
Arrays must be broadcastable with one another and with each array in |
|
`n_observations`. The power is simulated for each set of corresponding |
|
sample sizes and arguments. See Examples. |
|
vectorized : bool, optional |
|
If `vectorized` is set to ``False``, `test` will not be passed keyword |
|
argument `axis` and is expected to perform the test only for 1D samples. |
|
If ``True``, `test` will be passed keyword argument `axis` and is |
|
expected to perform the test along `axis` when passed N-D sample arrays. |
|
If ``None`` (default), `vectorized` will be set ``True`` if ``axis`` is |
|
a parameter of `test`. Use of a vectorized test typically reduces |
|
computation time. |
|
n_resamples : int, default: 10000 |
|
Number of samples drawn from each of the callables of `rvs`. |
|
Equivalently, the number tests performed under the alternative |
|
hypothesis to approximate the power. |
|
batch : int, optional |
|
The number of samples to process in each call to `test`. Memory usage is |
|
proportional to the product of `batch` and the largest sample size. Default |
|
is ``None``, in which case `batch` equals `n_resamples`. |
|
|
|
Returns |
|
------- |
|
res : PowerResult |
|
An object with attributes: |
|
|
|
power : float or ndarray |
|
The estimated power against the alternative. |
|
pvalues : ndarray |
|
The p-values observed under the alternative hypothesis. |
|
|
|
Notes |
|
----- |
|
The power is simulated as follows: |
|
|
|
- Draw many random samples (or sets of samples), each of the size(s) |
|
specified by `n_observations`, under the alternative specified by |
|
`rvs`. |
|
- For each sample (or set of samples), compute the p-value according to |
|
`test`. These p-values are recorded in the ``pvalues`` attribute of |
|
the result object. |
|
- Compute the proportion of p-values that are less than the `significance` |
|
level. This is the power recorded in the ``power`` attribute of the |
|
result object. |
|
|
|
Suppose that `significance` is an array with shape ``shape1``, the elements |
|
of `kwargs` and `n_observations` are mutually broadcastable to shape ``shape2``, |
|
and `test` returns an array of p-values of shape ``shape3``. Then the result |
|
object ``power`` attribute will be of shape ``shape1 + shape2 + shape3``, and |
|
the ``pvalues`` attribute will be of shape ``shape2 + shape3 + (n_resamples,)``. |
|
|
|
Examples |
|
-------- |
|
Suppose we wish to simulate the power of the independent sample t-test |
|
under the following conditions: |
|
|
|
- The first sample has 10 observations drawn from a normal distribution |
|
with mean 0. |
|
- The second sample has 12 observations drawn from a normal distribution |
|
with mean 1.0. |
|
- The threshold on p-values for significance is 0.05. |
|
|
|
>>> import numpy as np |
|
>>> from scipy import stats |
|
>>> rng = np.random.default_rng(2549598345528) |
|
>>> |
|
>>> test = stats.ttest_ind |
|
>>> n_observations = (10, 12) |
|
>>> rvs1 = rng.normal |
|
>>> rvs2 = lambda size: rng.normal(loc=1, size=size) |
|
>>> rvs = (rvs1, rvs2) |
|
>>> res = stats.power(test, rvs, n_observations, significance=0.05) |
|
>>> res.power |
|
0.6116 |
|
|
|
With samples of size 10 and 12, respectively, the power of the t-test |
|
with a significance threshold of 0.05 is approximately 60% under the chosen |
|
alternative. We can investigate the effect of sample size on the power |
|
by passing sample size arrays. |
|
|
|
>>> import matplotlib.pyplot as plt |
|
>>> nobs_x = np.arange(5, 21) |
|
>>> nobs_y = nobs_x |
|
>>> n_observations = (nobs_x, nobs_y) |
|
>>> res = stats.power(test, rvs, n_observations, significance=0.05) |
|
>>> ax = plt.subplot() |
|
>>> ax.plot(nobs_x, res.power) |
|
>>> ax.set_xlabel('Sample Size') |
|
>>> ax.set_ylabel('Simulated Power') |
|
>>> ax.set_title('Simulated Power of `ttest_ind` with Equal Sample Sizes') |
|
>>> plt.show() |
|
|
|
Alternatively, we can investigate the impact that effect size has on the power. |
|
In this case, the effect size is the location of the distribution underlying |
|
the second sample. |
|
|
|
>>> n_observations = (10, 12) |
|
>>> loc = np.linspace(0, 1, 20) |
|
>>> rvs2 = lambda size, loc: rng.normal(loc=loc, size=size) |
|
>>> rvs = (rvs1, rvs2) |
|
>>> res = stats.power(test, rvs, n_observations, significance=0.05, |
|
... kwargs={'loc': loc}) |
|
>>> ax = plt.subplot() |
|
>>> ax.plot(loc, res.power) |
|
>>> ax.set_xlabel('Effect Size') |
|
>>> ax.set_ylabel('Simulated Power') |
|
>>> ax.set_title('Simulated Power of `ttest_ind`, Varying Effect Size') |
|
>>> plt.show() |
|
|
|
We can also use `power` to estimate the Type I error rate (also referred to by the |
|
ambiguous term "size") of a test and assess whether it matches the nominal level. |
|
For example, the null hypothesis of `jarque_bera` is that the sample was drawn from |
|
a distribution with the same skewness and kurtosis as the normal distribution. To |
|
estimate the Type I error rate, we can consider the null hypothesis to be a true |
|
*alternative* hypothesis and calculate the power. |
|
|
|
>>> test = stats.jarque_bera |
|
>>> n_observations = 10 |
|
>>> rvs = rng.normal |
|
>>> significance = np.linspace(0.0001, 0.1, 1000) |
|
>>> res = stats.power(test, rvs, n_observations, significance=significance) |
|
>>> size = res.power |
|
|
|
As shown below, the Type I error rate of the test is far below the nominal level |
|
for such a small sample, as mentioned in its documentation. |
|
|
|
>>> ax = plt.subplot() |
|
>>> ax.plot(significance, size) |
|
>>> ax.plot([0, 0.1], [0, 0.1], '--') |
|
>>> ax.set_xlabel('nominal significance level') |
|
>>> ax.set_ylabel('estimated test size (Type I error rate)') |
|
>>> ax.set_title('Estimated test size vs nominal significance level') |
|
>>> ax.set_aspect('equal', 'box') |
|
>>> ax.legend(('`ttest_1samp`', 'ideal test')) |
|
>>> plt.show() |
|
|
|
As one might expect from such a conservative test, the power is quite low with |
|
respect to some alternatives. For example, the power of the test under the |
|
alternative that the sample was drawn from the Laplace distribution may not |
|
be much greater than the Type I error rate. |
|
|
|
>>> rvs = rng.laplace |
|
>>> significance = np.linspace(0.0001, 0.1, 1000) |
|
>>> res = stats.power(test, rvs, n_observations, significance=0.05) |
|
>>> print(res.power) |
|
0.0587 |
|
|
|
This is not a mistake in SciPy's implementation; it is simply due to the fact |
|
that the null distribution of the test statistic is derived under the assumption |
|
that the sample size is large (i.e. approaches infinity), and this asymptotic |
|
approximation is not accurate for small samples. In such cases, resampling |
|
and Monte Carlo methods (e.g. `permutation_test`, `goodness_of_fit`, |
|
`monte_carlo_test`) may be more appropriate. |
|
|
|
""" |
|
tmp = _power_iv(rvs, test, n_observations, significance, |
|
vectorized, n_resamples, batch, kwargs) |
|
(rvs, test, nobs, significance, |
|
vectorized, n_resamples, batch, args, kwds, shape)= tmp |
|
|
|
batch_nominal = batch or n_resamples |
|
pvalues = [] |
|
for nobs_i, args_i in zip(nobs, args): |
|
kwargs_i = dict(zip(kwds, args_i)) |
|
pvalues_i = [] |
|
for k in range(0, n_resamples, batch_nominal): |
|
batch_actual = min(batch_nominal, n_resamples - k) |
|
resamples = [rvs_j(size=(batch_actual, nobs_ij), **kwargs_i) |
|
for rvs_j, nobs_ij in zip(rvs, nobs_i)] |
|
res = test(*resamples, **kwargs_i, axis=-1) |
|
p = getattr(res, 'pvalue', res) |
|
pvalues_i.append(p) |
|
|
|
pvalues_i = np.concatenate(pvalues_i, axis=-1) |
|
pvalues.append(pvalues_i) |
|
|
|
shape += pvalues_i.shape[:-1] |
|
|
|
pvalues = np.concatenate(pvalues, axis=0) |
|
|
|
pvalues = pvalues.reshape(shape + (-1,)) |
|
if significance.ndim > 0: |
|
newdims = tuple(range(significance.ndim, pvalues.ndim + significance.ndim)) |
|
significance = np.expand_dims(significance, newdims) |
|
powers = np.mean(pvalues < significance, axis=-1) |
|
|
|
return PowerResult(power=powers, pvalues=pvalues) |
|
|
|
|
|
@dataclass |
|
class PermutationTestResult: |
|
"""Result object returned by `scipy.stats.permutation_test`. |
|
|
|
Attributes |
|
---------- |
|
statistic : float or ndarray |
|
The observed test statistic of the data. |
|
pvalue : float or ndarray |
|
The p-value for the given alternative. |
|
null_distribution : ndarray |
|
The values of the test statistic generated under the null |
|
hypothesis. |
|
""" |
|
statistic: float | np.ndarray |
|
pvalue: float | np.ndarray |
|
null_distribution: np.ndarray |
|
|
|
|
|
def _all_partitions_concatenated(ns): |
|
""" |
|
Generate all partitions of indices of groups of given sizes, concatenated |
|
|
|
`ns` is an iterable of ints. |
|
""" |
|
def all_partitions(z, n): |
|
for c in combinations(z, n): |
|
x0 = set(c) |
|
x1 = z - x0 |
|
yield [x0, x1] |
|
|
|
def all_partitions_n(z, ns): |
|
if len(ns) == 0: |
|
yield [z] |
|
return |
|
for c in all_partitions(z, ns[0]): |
|
for d in all_partitions_n(c[1], ns[1:]): |
|
yield c[0:1] + d |
|
|
|
z = set(range(np.sum(ns))) |
|
for partitioning in all_partitions_n(z, ns[:]): |
|
x = np.concatenate([list(partition) |
|
for partition in partitioning]).astype(int) |
|
yield x |
|
|
|
|
|
def _batch_generator(iterable, batch): |
|
"""A generator that yields batches of elements from an iterable""" |
|
iterator = iter(iterable) |
|
if batch <= 0: |
|
raise ValueError("`batch` must be positive.") |
|
z = [item for i, item in zip(range(batch), iterator)] |
|
while z: |
|
yield z |
|
z = [item for i, item in zip(range(batch), iterator)] |
|
|
|
|
|
def _pairings_permutations_gen(n_permutations, n_samples, n_obs_sample, batch, |
|
rng): |
|
|
|
|
|
|
|
batch = min(batch, n_permutations) |
|
|
|
if hasattr(rng, 'permuted'): |
|
def batched_perm_generator(): |
|
indices = np.arange(n_obs_sample) |
|
indices = np.tile(indices, (batch, n_samples, 1)) |
|
for k in range(0, n_permutations, batch): |
|
batch_actual = min(batch, n_permutations-k) |
|
|
|
permuted_indices = rng.permuted(indices, axis=-1) |
|
yield permuted_indices[:batch_actual] |
|
else: |
|
def batched_perm_generator(): |
|
for k in range(0, n_permutations, batch): |
|
batch_actual = min(batch, n_permutations-k) |
|
size = (batch_actual, n_samples, n_obs_sample) |
|
x = rng.random(size=size) |
|
yield np.argsort(x, axis=-1)[:batch_actual] |
|
|
|
return batched_perm_generator() |
|
|
|
|
|
def _calculate_null_both(data, statistic, n_permutations, batch, |
|
rng=None): |
|
""" |
|
Calculate null distribution for independent sample tests. |
|
""" |
|
n_samples = len(data) |
|
|
|
|
|
|
|
n_obs_i = [sample.shape[-1] for sample in data] |
|
n_obs_ic = np.cumsum(n_obs_i) |
|
n_obs = n_obs_ic[-1] |
|
n_max = np.prod([comb(n_obs_ic[i], n_obs_ic[i-1]) |
|
for i in range(n_samples-1, 0, -1)]) |
|
|
|
|
|
|
|
|
|
if n_permutations >= n_max: |
|
exact_test = True |
|
n_permutations = n_max |
|
perm_generator = _all_partitions_concatenated(n_obs_i) |
|
else: |
|
exact_test = False |
|
|
|
|
|
|
|
|
|
perm_generator = (rng.permutation(n_obs) |
|
for i in range(n_permutations)) |
|
|
|
batch = batch or int(n_permutations) |
|
null_distribution = [] |
|
|
|
|
|
|
|
|
|
|
|
data = np.concatenate(data, axis=-1) |
|
for indices in _batch_generator(perm_generator, batch=batch): |
|
indices = np.array(indices) |
|
|
|
|
|
|
|
|
|
|
|
|
|
data_batch = data[..., indices] |
|
|
|
|
|
|
|
|
|
data_batch = np.moveaxis(data_batch, -2, 0) |
|
data_batch = np.split(data_batch, n_obs_ic[:-1], axis=-1) |
|
null_distribution.append(statistic(*data_batch, axis=-1)) |
|
null_distribution = np.concatenate(null_distribution, axis=0) |
|
|
|
return null_distribution, n_permutations, exact_test |
|
|
|
|
|
def _calculate_null_pairings(data, statistic, n_permutations, batch, |
|
rng=None): |
|
""" |
|
Calculate null distribution for association tests. |
|
""" |
|
n_samples = len(data) |
|
|
|
|
|
n_obs_sample = data[0].shape[-1] |
|
n_max = factorial(n_obs_sample)**n_samples |
|
|
|
|
|
|
|
if n_permutations >= n_max: |
|
exact_test = True |
|
n_permutations = n_max |
|
batch = batch or int(n_permutations) |
|
|
|
perm_generator = product(*(permutations(range(n_obs_sample)) |
|
for i in range(n_samples))) |
|
batched_perm_generator = _batch_generator(perm_generator, batch=batch) |
|
else: |
|
exact_test = False |
|
batch = batch or int(n_permutations) |
|
|
|
|
|
|
|
args = n_permutations, n_samples, n_obs_sample, batch, rng |
|
batched_perm_generator = _pairings_permutations_gen(*args) |
|
|
|
null_distribution = [] |
|
|
|
for indices in batched_perm_generator: |
|
indices = np.array(indices) |
|
|
|
|
|
|
|
|
|
|
|
indices = np.swapaxes(indices, 0, 1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
data_batch = [None]*n_samples |
|
for i in range(n_samples): |
|
data_batch[i] = data[i][..., indices[i]] |
|
data_batch[i] = np.moveaxis(data_batch[i], -2, 0) |
|
|
|
null_distribution.append(statistic(*data_batch, axis=-1)) |
|
null_distribution = np.concatenate(null_distribution, axis=0) |
|
|
|
return null_distribution, n_permutations, exact_test |
|
|
|
|
|
def _calculate_null_samples(data, statistic, n_permutations, batch, |
|
rng=None): |
|
""" |
|
Calculate null distribution for paired-sample tests. |
|
""" |
|
n_samples = len(data) |
|
|
|
|
|
|
|
|
|
if n_samples == 1: |
|
data = [data[0], -data[0]] |
|
|
|
|
|
|
|
|
|
|
|
data = np.swapaxes(data, 0, -1) |
|
|
|
|
|
|
|
def statistic_wrapped(*data, axis): |
|
data = np.swapaxes(data, 0, -1) |
|
if n_samples == 1: |
|
data = data[0:1] |
|
return statistic(*data, axis=axis) |
|
|
|
return _calculate_null_pairings(data, statistic_wrapped, n_permutations, |
|
batch, rng) |
|
|
|
|
|
def _permutation_test_iv(data, statistic, permutation_type, vectorized, |
|
n_resamples, batch, alternative, axis, rng): |
|
"""Input validation for `permutation_test`.""" |
|
|
|
axis_int = int(axis) |
|
if axis != axis_int: |
|
raise ValueError("`axis` must be an integer.") |
|
|
|
permutation_types = {'samples', 'pairings', 'independent'} |
|
permutation_type = permutation_type.lower() |
|
if permutation_type not in permutation_types: |
|
raise ValueError(f"`permutation_type` must be in {permutation_types}.") |
|
|
|
if vectorized not in {True, False, None}: |
|
raise ValueError("`vectorized` must be `True`, `False`, or `None`.") |
|
|
|
if vectorized is None: |
|
vectorized = 'axis' in inspect.signature(statistic).parameters |
|
|
|
if not vectorized: |
|
statistic = _vectorize_statistic(statistic) |
|
|
|
message = "`data` must be a tuple containing at least two samples" |
|
try: |
|
if len(data) < 2 and permutation_type == 'independent': |
|
raise ValueError(message) |
|
except TypeError: |
|
raise TypeError(message) |
|
|
|
data = _broadcast_arrays(data, axis) |
|
data_iv = [] |
|
for sample in data: |
|
sample = np.atleast_1d(sample) |
|
if sample.shape[axis] <= 1: |
|
raise ValueError("each sample in `data` must contain two or more " |
|
"observations along `axis`.") |
|
sample = np.moveaxis(sample, axis_int, -1) |
|
data_iv.append(sample) |
|
|
|
n_resamples_int = (int(n_resamples) if not np.isinf(n_resamples) |
|
else np.inf) |
|
if n_resamples != n_resamples_int or n_resamples_int <= 0: |
|
raise ValueError("`n_resamples` must be a positive integer.") |
|
|
|
if batch is None: |
|
batch_iv = batch |
|
else: |
|
batch_iv = int(batch) |
|
if batch != batch_iv or batch_iv <= 0: |
|
raise ValueError("`batch` must be a positive integer or None.") |
|
|
|
alternatives = {'two-sided', 'greater', 'less'} |
|
alternative = alternative.lower() |
|
if alternative not in alternatives: |
|
raise ValueError(f"`alternative` must be in {alternatives}") |
|
|
|
rng = check_random_state(rng) |
|
|
|
return (data_iv, statistic, permutation_type, vectorized, n_resamples_int, |
|
batch_iv, alternative, axis_int, rng) |
|
|
|
|
|
@_transition_to_rng('random_state') |
|
def permutation_test(data, statistic, *, permutation_type='independent', |
|
vectorized=None, n_resamples=9999, batch=None, |
|
alternative="two-sided", axis=0, rng=None): |
|
r""" |
|
Performs a permutation test of a given statistic on provided data. |
|
|
|
For independent sample statistics, the null hypothesis is that the data are |
|
randomly sampled from the same distribution. |
|
For paired sample statistics, two null hypothesis can be tested: |
|
that the data are paired at random or that the data are assigned to samples |
|
at random. |
|
|
|
Parameters |
|
---------- |
|
data : iterable of array-like |
|
Contains the samples, each of which is an array of observations. |
|
Dimensions of sample arrays must be compatible for broadcasting except |
|
along `axis`. |
|
statistic : callable |
|
Statistic for which the p-value of the hypothesis test is to be |
|
calculated. `statistic` must be a callable that accepts samples |
|
as separate arguments (e.g. ``statistic(*data)``) and returns the |
|
resulting statistic. |
|
If `vectorized` is set ``True``, `statistic` must also accept a keyword |
|
argument `axis` and be vectorized to compute the statistic along the |
|
provided `axis` of the sample arrays. |
|
permutation_type : {'independent', 'samples', 'pairings'}, optional |
|
The type of permutations to be performed, in accordance with the |
|
null hypothesis. The first two permutation types are for paired sample |
|
statistics, in which all samples contain the same number of |
|
observations and observations with corresponding indices along `axis` |
|
are considered to be paired; the third is for independent sample |
|
statistics. |
|
|
|
- ``'samples'`` : observations are assigned to different samples |
|
but remain paired with the same observations from other samples. |
|
This permutation type is appropriate for paired sample hypothesis |
|
tests such as the Wilcoxon signed-rank test and the paired t-test. |
|
- ``'pairings'`` : observations are paired with different observations, |
|
but they remain within the same sample. This permutation type is |
|
appropriate for association/correlation tests with statistics such |
|
as Spearman's :math:`\rho`, Kendall's :math:`\tau`, and Pearson's |
|
:math:`r`. |
|
- ``'independent'`` (default) : observations are assigned to different |
|
samples. Samples may contain different numbers of observations. This |
|
permutation type is appropriate for independent sample hypothesis |
|
tests such as the Mann-Whitney :math:`U` test and the independent |
|
sample t-test. |
|
|
|
Please see the Notes section below for more detailed descriptions |
|
of the permutation types. |
|
|
|
vectorized : bool, optional |
|
If `vectorized` is set ``False``, `statistic` will not be passed |
|
keyword argument `axis` and is expected to calculate the statistic |
|
only for 1D samples. If ``True``, `statistic` will be passed keyword |
|
argument `axis` and is expected to calculate the statistic along `axis` |
|
when passed an ND sample array. If ``None`` (default), `vectorized` |
|
will be set ``True`` if ``axis`` is a parameter of `statistic`. Use |
|
of a vectorized statistic typically reduces computation time. |
|
n_resamples : int or np.inf, default: 9999 |
|
Number of random permutations (resamples) used to approximate the null |
|
distribution. If greater than or equal to the number of distinct |
|
permutations, the exact null distribution will be computed. |
|
Note that the number of distinct permutations grows very rapidly with |
|
the sizes of samples, so exact tests are feasible only for very small |
|
data sets. |
|
batch : int, optional |
|
The number of permutations to process in each call to `statistic`. |
|
Memory usage is O( `batch` * ``n`` ), where ``n`` is the total size |
|
of all samples, regardless of the value of `vectorized`. Default is |
|
``None``, in which case ``batch`` is the number of permutations. |
|
alternative : {'two-sided', 'less', 'greater'}, optional |
|
The alternative hypothesis for which the p-value is calculated. |
|
For each alternative, the p-value is defined for exact tests as |
|
follows. |
|
|
|
- ``'greater'`` : the percentage of the null distribution that is |
|
greater than or equal to the observed value of the test statistic. |
|
- ``'less'`` : the percentage of the null distribution that is |
|
less than or equal to the observed value of the test statistic. |
|
- ``'two-sided'`` (default) : twice the smaller of the p-values above. |
|
|
|
Note that p-values for randomized tests are calculated according to the |
|
conservative (over-estimated) approximation suggested in [2]_ and [3]_ |
|
rather than the unbiased estimator suggested in [4]_. That is, when |
|
calculating the proportion of the randomized null distribution that is |
|
as extreme as the observed value of the test statistic, the values in |
|
the numerator and denominator are both increased by one. An |
|
interpretation of this adjustment is that the observed value of the |
|
test statistic is always included as an element of the randomized |
|
null distribution. |
|
The convention used for two-sided p-values is not universal; |
|
the observed test statistic and null distribution are returned in |
|
case a different definition is preferred. |
|
|
|
axis : int, default: 0 |
|
The axis of the (broadcasted) samples over which to calculate the |
|
statistic. If samples have a different number of dimensions, |
|
singleton dimensions are prepended to samples with fewer dimensions |
|
before `axis` is considered. |
|
rng : `numpy.random.Generator`, optional |
|
Pseudorandom number generator state. When `rng` is None, a new |
|
`numpy.random.Generator` is created using entropy from the |
|
operating system. Types other than `numpy.random.Generator` are |
|
passed to `numpy.random.default_rng` to instantiate a ``Generator``. |
|
|
|
Returns |
|
------- |
|
res : PermutationTestResult |
|
An object with attributes: |
|
|
|
statistic : float or ndarray |
|
The observed test statistic of the data. |
|
pvalue : float or ndarray |
|
The p-value for the given alternative. |
|
null_distribution : ndarray |
|
The values of the test statistic generated under the null |
|
hypothesis. |
|
|
|
Notes |
|
----- |
|
|
|
The three types of permutation tests supported by this function are |
|
described below. |
|
|
|
**Unpaired statistics** (``permutation_type='independent'``): |
|
|
|
The null hypothesis associated with this permutation type is that all |
|
observations are sampled from the same underlying distribution and that |
|
they have been assigned to one of the samples at random. |
|
|
|
Suppose ``data`` contains two samples; e.g. ``a, b = data``. |
|
When ``1 < n_resamples < binom(n, k)``, where |
|
|
|
* ``k`` is the number of observations in ``a``, |
|
* ``n`` is the total number of observations in ``a`` and ``b``, and |
|
* ``binom(n, k)`` is the binomial coefficient (``n`` choose ``k``), |
|
|
|
the data are pooled (concatenated), randomly assigned to either the first |
|
or second sample, and the statistic is calculated. This process is |
|
performed repeatedly, `permutation` times, generating a distribution of the |
|
statistic under the null hypothesis. The statistic of the original |
|
data is compared to this distribution to determine the p-value. |
|
|
|
When ``n_resamples >= binom(n, k)``, an exact test is performed: the data |
|
are *partitioned* between the samples in each distinct way exactly once, |
|
and the exact null distribution is formed. |
|
Note that for a given partitioning of the data between the samples, |
|
only one ordering/permutation of the data *within* each sample is |
|
considered. For statistics that do not depend on the order of the data |
|
within samples, this dramatically reduces computational cost without |
|
affecting the shape of the null distribution (because the frequency/count |
|
of each value is affected by the same factor). |
|
|
|
For ``a = [a1, a2, a3, a4]`` and ``b = [b1, b2, b3]``, an example of this |
|
permutation type is ``x = [b3, a1, a2, b2]`` and ``y = [a4, b1, a3]``. |
|
Because only one ordering/permutation of the data *within* each sample |
|
is considered in an exact test, a resampling like ``x = [b3, a1, b2, a2]`` |
|
and ``y = [a4, a3, b1]`` would *not* be considered distinct from the |
|
example above. |
|
|
|
``permutation_type='independent'`` does not support one-sample statistics, |
|
but it can be applied to statistics with more than two samples. In this |
|
case, if ``n`` is an array of the number of observations within each |
|
sample, the number of distinct partitions is:: |
|
|
|
np.prod([binom(sum(n[i:]), sum(n[i+1:])) for i in range(len(n)-1)]) |
|
|
|
**Paired statistics, permute pairings** (``permutation_type='pairings'``): |
|
|
|
The null hypothesis associated with this permutation type is that |
|
observations within each sample are drawn from the same underlying |
|
distribution and that pairings with elements of other samples are |
|
assigned at random. |
|
|
|
Suppose ``data`` contains only one sample; e.g. ``a, = data``, and we |
|
wish to consider all possible pairings of elements of ``a`` with elements |
|
of a second sample, ``b``. Let ``n`` be the number of observations in |
|
``a``, which must also equal the number of observations in ``b``. |
|
|
|
When ``1 < n_resamples < factorial(n)``, the elements of ``a`` are |
|
randomly permuted. The user-supplied statistic accepts one data argument, |
|
say ``a_perm``, and calculates the statistic considering ``a_perm`` and |
|
``b``. This process is performed repeatedly, `permutation` times, |
|
generating a distribution of the statistic under the null hypothesis. |
|
The statistic of the original data is compared to this distribution to |
|
determine the p-value. |
|
|
|
When ``n_resamples >= factorial(n)``, an exact test is performed: |
|
``a`` is permuted in each distinct way exactly once. Therefore, the |
|
`statistic` is computed for each unique pairing of samples between ``a`` |
|
and ``b`` exactly once. |
|
|
|
For ``a = [a1, a2, a3]`` and ``b = [b1, b2, b3]``, an example of this |
|
permutation type is ``a_perm = [a3, a1, a2]`` while ``b`` is left |
|
in its original order. |
|
|
|
``permutation_type='pairings'`` supports ``data`` containing any number |
|
of samples, each of which must contain the same number of observations. |
|
All samples provided in ``data`` are permuted *independently*. Therefore, |
|
if ``m`` is the number of samples and ``n`` is the number of observations |
|
within each sample, then the number of permutations in an exact test is:: |
|
|
|
factorial(n)**m |
|
|
|
Note that if a two-sample statistic, for example, does not inherently |
|
depend on the order in which observations are provided - only on the |
|
*pairings* of observations - then only one of the two samples should be |
|
provided in ``data``. This dramatically reduces computational cost without |
|
affecting the shape of the null distribution (because the frequency/count |
|
of each value is affected by the same factor). |
|
|
|
**Paired statistics, permute samples** (``permutation_type='samples'``): |
|
|
|
The null hypothesis associated with this permutation type is that |
|
observations within each pair are drawn from the same underlying |
|
distribution and that the sample to which they are assigned is random. |
|
|
|
Suppose ``data`` contains two samples; e.g. ``a, b = data``. |
|
Let ``n`` be the number of observations in ``a``, which must also equal |
|
the number of observations in ``b``. |
|
|
|
When ``1 < n_resamples < 2**n``, the elements of ``a`` are ``b`` are |
|
randomly swapped between samples (maintaining their pairings) and the |
|
statistic is calculated. This process is performed repeatedly, |
|
`permutation` times, generating a distribution of the statistic under the |
|
null hypothesis. The statistic of the original data is compared to this |
|
distribution to determine the p-value. |
|
|
|
When ``n_resamples >= 2**n``, an exact test is performed: the observations |
|
are assigned to the two samples in each distinct way (while maintaining |
|
pairings) exactly once. |
|
|
|
For ``a = [a1, a2, a3]`` and ``b = [b1, b2, b3]``, an example of this |
|
permutation type is ``x = [b1, a2, b3]`` and ``y = [a1, b2, a3]``. |
|
|
|
``permutation_type='samples'`` supports ``data`` containing any number |
|
of samples, each of which must contain the same number of observations. |
|
If ``data`` contains more than one sample, paired observations within |
|
``data`` are exchanged between samples *independently*. Therefore, if ``m`` |
|
is the number of samples and ``n`` is the number of observations within |
|
each sample, then the number of permutations in an exact test is:: |
|
|
|
factorial(m)**n |
|
|
|
Several paired-sample statistical tests, such as the Wilcoxon signed rank |
|
test and paired-sample t-test, can be performed considering only the |
|
*difference* between two paired elements. Accordingly, if ``data`` contains |
|
only one sample, then the null distribution is formed by independently |
|
changing the *sign* of each observation. |
|
|
|
.. warning:: |
|
The p-value is calculated by counting the elements of the null |
|
distribution that are as extreme or more extreme than the observed |
|
value of the statistic. Due to the use of finite precision arithmetic, |
|
some statistic functions return numerically distinct values when the |
|
theoretical values would be exactly equal. In some cases, this could |
|
lead to a large error in the calculated p-value. `permutation_test` |
|
guards against this by considering elements in the null distribution |
|
that are "close" (within a relative tolerance of 100 times the |
|
floating point epsilon of inexact dtypes) to the observed |
|
value of the test statistic as equal to the observed value of the |
|
test statistic. However, the user is advised to inspect the null |
|
distribution to assess whether this method of comparison is |
|
appropriate, and if not, calculate the p-value manually. See example |
|
below. |
|
|
|
References |
|
---------- |
|
|
|
.. [1] R. A. Fisher. The Design of Experiments, 6th Ed (1951). |
|
.. [2] B. Phipson and G. K. Smyth. "Permutation P-values Should Never Be |
|
Zero: Calculating Exact P-values When Permutations Are Randomly Drawn." |
|
Statistical Applications in Genetics and Molecular Biology 9.1 (2010). |
|
.. [3] M. D. Ernst. "Permutation Methods: A Basis for Exact Inference". |
|
Statistical Science (2004). |
|
.. [4] B. Efron and R. J. Tibshirani. An Introduction to the Bootstrap |
|
(1993). |
|
|
|
Examples |
|
-------- |
|
|
|
Suppose we wish to test whether two samples are drawn from the same |
|
distribution. Assume that the underlying distributions are unknown to us, |
|
and that before observing the data, we hypothesized that the mean of the |
|
first sample would be less than that of the second sample. We decide that |
|
we will use the difference between the sample means as a test statistic, |
|
and we will consider a p-value of 0.05 to be statistically significant. |
|
|
|
For efficiency, we write the function defining the test statistic in a |
|
vectorized fashion: the samples ``x`` and ``y`` can be ND arrays, and the |
|
statistic will be calculated for each axis-slice along `axis`. |
|
|
|
>>> import numpy as np |
|
>>> def statistic(x, y, axis): |
|
... return np.mean(x, axis=axis) - np.mean(y, axis=axis) |
|
|
|
After collecting our data, we calculate the observed value of the test |
|
statistic. |
|
|
|
>>> from scipy.stats import norm |
|
>>> rng = np.random.default_rng() |
|
>>> x = norm.rvs(size=5, random_state=rng) |
|
>>> y = norm.rvs(size=6, loc = 3, random_state=rng) |
|
>>> statistic(x, y, 0) |
|
-3.5411688580987266 |
|
|
|
Indeed, the test statistic is negative, suggesting that the true mean of |
|
the distribution underlying ``x`` is less than that of the distribution |
|
underlying ``y``. To determine the probability of this occurring by chance |
|
if the two samples were drawn from the same distribution, we perform |
|
a permutation test. |
|
|
|
>>> from scipy.stats import permutation_test |
|
>>> # because our statistic is vectorized, we pass `vectorized=True` |
|
>>> # `n_resamples=np.inf` indicates that an exact test is to be performed |
|
>>> res = permutation_test((x, y), statistic, vectorized=True, |
|
... n_resamples=np.inf, alternative='less') |
|
>>> print(res.statistic) |
|
-3.5411688580987266 |
|
>>> print(res.pvalue) |
|
0.004329004329004329 |
|
|
|
The probability of obtaining a test statistic less than or equal to the |
|
observed value under the null hypothesis is 0.4329%. This is less than our |
|
chosen threshold of 5%, so we consider this to be significant evidence |
|
against the null hypothesis in favor of the alternative. |
|
|
|
Because the size of the samples above was small, `permutation_test` could |
|
perform an exact test. For larger samples, we resort to a randomized |
|
permutation test. |
|
|
|
>>> x = norm.rvs(size=100, random_state=rng) |
|
>>> y = norm.rvs(size=120, loc=0.2, random_state=rng) |
|
>>> res = permutation_test((x, y), statistic, n_resamples=9999, |
|
... vectorized=True, alternative='less', |
|
... rng=rng) |
|
>>> print(res.statistic) |
|
-0.4230459671240913 |
|
>>> print(res.pvalue) |
|
0.0015 |
|
|
|
The approximate probability of obtaining a test statistic less than or |
|
equal to the observed value under the null hypothesis is 0.0225%. This is |
|
again less than our chosen threshold of 5%, so again we have significant |
|
evidence to reject the null hypothesis in favor of the alternative. |
|
|
|
For large samples and number of permutations, the result is comparable to |
|
that of the corresponding asymptotic test, the independent sample t-test. |
|
|
|
>>> from scipy.stats import ttest_ind |
|
>>> res_asymptotic = ttest_ind(x, y, alternative='less') |
|
>>> print(res_asymptotic.pvalue) |
|
0.0014669545224902675 |
|
|
|
The permutation distribution of the test statistic is provided for |
|
further investigation. |
|
|
|
>>> import matplotlib.pyplot as plt |
|
>>> plt.hist(res.null_distribution, bins=50) |
|
>>> plt.title("Permutation distribution of test statistic") |
|
>>> plt.xlabel("Value of Statistic") |
|
>>> plt.ylabel("Frequency") |
|
>>> plt.show() |
|
|
|
Inspection of the null distribution is essential if the statistic suffers |
|
from inaccuracy due to limited machine precision. Consider the following |
|
case: |
|
|
|
>>> from scipy.stats import pearsonr |
|
>>> x = [1, 2, 4, 3] |
|
>>> y = [2, 4, 6, 8] |
|
>>> def statistic(x, y, axis=-1): |
|
... return pearsonr(x, y, axis=axis).statistic |
|
>>> res = permutation_test((x, y), statistic, vectorized=True, |
|
... permutation_type='pairings', |
|
... alternative='greater') |
|
>>> r, pvalue, null = res.statistic, res.pvalue, res.null_distribution |
|
|
|
In this case, some elements of the null distribution differ from the |
|
observed value of the correlation coefficient ``r`` due to numerical noise. |
|
We manually inspect the elements of the null distribution that are nearly |
|
the same as the observed value of the test statistic. |
|
|
|
>>> r |
|
0.7999999999999999 |
|
>>> unique = np.unique(null) |
|
>>> unique |
|
array([-1. , -1. , -0.8, -0.8, -0.8, -0.6, -0.4, -0.4, -0.2, -0.2, -0.2, |
|
0. , 0.2, 0.2, 0.2, 0.4, 0.4, 0.6, 0.8, 0.8, 0.8, 1. , |
|
1. ]) # may vary |
|
>>> unique[np.isclose(r, unique)].tolist() |
|
[0.7999999999999998, 0.7999999999999999, 0.8] # may vary |
|
|
|
If `permutation_test` were to perform the comparison naively, the |
|
elements of the null distribution with value ``0.7999999999999998`` would |
|
not be considered as extreme or more extreme as the observed value of the |
|
statistic, so the calculated p-value would be too small. |
|
|
|
>>> incorrect_pvalue = np.count_nonzero(null >= r) / len(null) |
|
>>> incorrect_pvalue |
|
0.14583333333333334 # may vary |
|
|
|
Instead, `permutation_test` treats elements of the null distribution that |
|
are within ``max(1e-14, abs(r)*1e-14)`` of the observed value of the |
|
statistic ``r`` to be equal to ``r``. |
|
|
|
>>> correct_pvalue = np.count_nonzero(null >= r - 1e-14) / len(null) |
|
>>> correct_pvalue |
|
0.16666666666666666 |
|
>>> res.pvalue == correct_pvalue |
|
True |
|
|
|
This method of comparison is expected to be accurate in most practical |
|
situations, but the user is advised to assess this by inspecting the |
|
elements of the null distribution that are close to the observed value |
|
of the statistic. Also, consider the use of statistics that can be |
|
calculated using exact arithmetic (e.g. integer statistics). |
|
|
|
""" |
|
args = _permutation_test_iv(data, statistic, permutation_type, vectorized, |
|
n_resamples, batch, alternative, axis, |
|
rng) |
|
(data, statistic, permutation_type, vectorized, n_resamples, batch, |
|
alternative, axis, rng) = args |
|
|
|
observed = statistic(*data, axis=-1) |
|
|
|
null_calculators = {"pairings": _calculate_null_pairings, |
|
"samples": _calculate_null_samples, |
|
"independent": _calculate_null_both} |
|
null_calculator_args = (data, statistic, n_resamples, |
|
batch, rng) |
|
calculate_null = null_calculators[permutation_type] |
|
null_distribution, n_resamples, exact_test = ( |
|
calculate_null(*null_calculator_args)) |
|
|
|
|
|
adjustment = 0 if exact_test else 1 |
|
|
|
|
|
|
|
eps = (0 if not np.issubdtype(observed.dtype, np.inexact) |
|
else np.finfo(observed.dtype).eps*100) |
|
gamma = np.abs(eps * observed) |
|
|
|
def less(null_distribution, observed): |
|
cmps = null_distribution <= observed + gamma |
|
pvalues = (cmps.sum(axis=0) + adjustment) / (n_resamples + adjustment) |
|
return pvalues |
|
|
|
def greater(null_distribution, observed): |
|
cmps = null_distribution >= observed - gamma |
|
pvalues = (cmps.sum(axis=0) + adjustment) / (n_resamples + adjustment) |
|
return pvalues |
|
|
|
def two_sided(null_distribution, observed): |
|
pvalues_less = less(null_distribution, observed) |
|
pvalues_greater = greater(null_distribution, observed) |
|
pvalues = np.minimum(pvalues_less, pvalues_greater) * 2 |
|
return pvalues |
|
|
|
compare = {"less": less, |
|
"greater": greater, |
|
"two-sided": two_sided} |
|
|
|
pvalues = compare[alternative](null_distribution, observed) |
|
pvalues = np.clip(pvalues, 0, 1) |
|
|
|
return PermutationTestResult(observed, pvalues, null_distribution) |
|
|
|
|
|
@dataclass |
|
class ResamplingMethod: |
|
"""Configuration information for a statistical resampling method. |
|
|
|
Instances of this class can be passed into the `method` parameter of some |
|
hypothesis test functions to perform a resampling or Monte Carlo version |
|
of the hypothesis test. |
|
|
|
Attributes |
|
---------- |
|
n_resamples : int |
|
The number of resamples to perform or Monte Carlo samples to draw. |
|
batch : int, optional |
|
The number of resamples to process in each vectorized call to |
|
the statistic. Batch sizes >>1 tend to be faster when the statistic |
|
is vectorized, but memory usage scales linearly with the batch size. |
|
Default is ``None``, which processes all resamples in a single batch. |
|
|
|
""" |
|
n_resamples: int = 9999 |
|
batch: int = None |
|
|
|
|
|
@dataclass |
|
class MonteCarloMethod(ResamplingMethod): |
|
"""Configuration information for a Monte Carlo hypothesis test. |
|
|
|
Instances of this class can be passed into the `method` parameter of some |
|
hypothesis test functions to perform a Monte Carlo version of the |
|
hypothesis tests. |
|
|
|
Attributes |
|
---------- |
|
n_resamples : int, optional |
|
The number of Monte Carlo samples to draw. Default is 9999. |
|
batch : int, optional |
|
The number of Monte Carlo samples to process in each vectorized call to |
|
the statistic. Batch sizes >>1 tend to be faster when the statistic |
|
is vectorized, but memory usage scales linearly with the batch size. |
|
Default is ``None``, which processes all samples in a single batch. |
|
rvs : callable or tuple of callables, optional |
|
A callable or sequence of callables that generates random variates |
|
under the null hypothesis. Each element of `rvs` must be a callable |
|
that accepts keyword argument ``size`` (e.g. ``rvs(size=(m, n))``) and |
|
returns an N-d array sample of that shape. If `rvs` is a sequence, the |
|
number of callables in `rvs` must match the number of samples passed |
|
to the hypothesis test in which the `MonteCarloMethod` is used. Default |
|
is ``None``, in which case the hypothesis test function chooses values |
|
to match the standard version of the hypothesis test. For example, |
|
the null hypothesis of `scipy.stats.pearsonr` is typically that the |
|
samples are drawn from the standard normal distribution, so |
|
``rvs = (rng.normal, rng.normal)`` where |
|
``rng = np.random.default_rng()``. |
|
rng : `numpy.random.Generator`, optional |
|
Pseudorandom number generator state. When `rng` is None, a new |
|
`numpy.random.Generator` is created using entropy from the |
|
operating system. Types other than `numpy.random.Generator` are |
|
passed to `numpy.random.default_rng` to instantiate a ``Generator``. |
|
|
|
""" |
|
rvs: object = None |
|
rng: object = None |
|
|
|
def __init__(self, n_resamples=9999, batch=None, rvs=None, rng=None): |
|
if (rvs is not None) and (rng is not None): |
|
message = 'Use of `rvs` and `rng` are mutually exclusive.' |
|
raise ValueError(message) |
|
|
|
self.n_resamples = n_resamples |
|
self.batch = batch |
|
self.rvs = rvs |
|
self.rng = rng |
|
|
|
def _asdict(self): |
|
|
|
return dict(n_resamples=self.n_resamples, batch=self.batch, |
|
rvs=self.rvs, rng=self.rng) |
|
|
|
|
|
_rs_deprecation = ("Use of attribute `random_state` is deprecated and replaced by " |
|
"`rng`. Support for `random_state` will be removed in SciPy 1.19.0. " |
|
"To silence this warning and ensure consistent behavior in SciPy " |
|
"1.19.0, control the RNG using attribute `rng`. Values set using " |
|
"attribute `rng` will be validated by `np.random.default_rng`, so " |
|
"the behavior corresponding with a given value may change compared " |
|
"to use of `random_state`. For example, 1) `None` will result in " |
|
"unpredictable random numbers, 2) an integer will result in a " |
|
"different stream of random numbers, (with the same distribution), " |
|
"and 3) `np.random` or `RandomState` instances will result in an " |
|
"error. See the documentation of `default_rng` for more " |
|
"information.") |
|
|
|
|
|
@dataclass |
|
class PermutationMethod(ResamplingMethod): |
|
"""Configuration information for a permutation hypothesis test. |
|
|
|
Instances of this class can be passed into the `method` parameter of some |
|
hypothesis test functions to perform a permutation version of the |
|
hypothesis tests. |
|
|
|
Attributes |
|
---------- |
|
n_resamples : int, optional |
|
The number of resamples to perform. Default is 9999. |
|
batch : int, optional |
|
The number of resamples to process in each vectorized call to |
|
the statistic. Batch sizes >>1 tend to be faster when the statistic |
|
is vectorized, but memory usage scales linearly with the batch size. |
|
Default is ``None``, which processes all resamples in a single batch. |
|
rng : `numpy.random.Generator`, optional |
|
Pseudorandom number generator used to perform resampling. |
|
|
|
If `rng` is passed by keyword to the initializer or the `rng` attribute is used |
|
directly, types other than `numpy.random.Generator` are passed to |
|
`numpy.random.default_rng` to instantiate a ``Generator`` before use. |
|
If `rng` is already a ``Generator`` instance, then the provided instance is |
|
used. Specify `rng` for repeatable behavior. |
|
|
|
If this argument is passed by position, if `random_state` is passed by keyword |
|
into the initializer, or if the `random_state` attribute is used directly, |
|
legacy behavior for `random_state` applies: |
|
|
|
- If `random_state` is None (or `numpy.random`), the `numpy.random.RandomState` |
|
singleton is used. |
|
- If `random_state` is an int, a new ``RandomState`` instance is used, |
|
seeded with `random_state`. |
|
- If `random_state` is already a ``Generator`` or ``RandomState`` instance then |
|
that instance is used. |
|
|
|
.. versionchanged:: 1.15.0 |
|
|
|
As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_ |
|
transition from use of `numpy.random.RandomState` to |
|
`numpy.random.Generator`, this attribute name was changed from |
|
`random_state` to `rng`. For an interim period, both names will continue to |
|
work, although only one may be specified at a time. After the interim |
|
period, uses of `random_state` will emit warnings. The behavior of both |
|
`random_state` and `rng` are outlined above, but only `rng` should be used |
|
in new code. |
|
|
|
""" |
|
rng: object |
|
_rng: object = field(init=False, repr=False, default=None) |
|
|
|
@property |
|
def random_state(self): |
|
|
|
|
|
return self._random_state |
|
|
|
@random_state.setter |
|
def random_state(self, val): |
|
|
|
|
|
self._random_state = val |
|
|
|
@property |
|
def rng(self): |
|
return self._rng |
|
|
|
def __init__(self, n_resamples=9999, batch=None, random_state=None, *, rng=None): |
|
|
|
|
|
|
|
self._rng = rng |
|
self._random_state = random_state |
|
super().__init__(n_resamples=n_resamples, batch=batch) |
|
|
|
def _asdict(self): |
|
|
|
d = dict(n_resamples=self.n_resamples, batch=self.batch) |
|
if self.rng is not None: |
|
d['rng'] = self.rng |
|
if self.random_state is not None: |
|
d['random_state'] = self.random_state |
|
return d |
|
|
|
|
|
@dataclass |
|
class BootstrapMethod(ResamplingMethod): |
|
"""Configuration information for a bootstrap confidence interval. |
|
|
|
Instances of this class can be passed into the `method` parameter of some |
|
confidence interval methods to generate a bootstrap confidence interval. |
|
|
|
Attributes |
|
---------- |
|
n_resamples : int, optional |
|
The number of resamples to perform. Default is 9999. |
|
batch : int, optional |
|
The number of resamples to process in each vectorized call to |
|
the statistic. Batch sizes >>1 tend to be faster when the statistic |
|
is vectorized, but memory usage scales linearly with the batch size. |
|
Default is ``None``, which processes all resamples in a single batch. |
|
rng : `numpy.random.Generator`, optional |
|
Pseudorandom number generator used to perform resampling. |
|
|
|
If `rng` is passed by keyword to the initializer or the `rng` attribute is used |
|
directly, types other than `numpy.random.Generator` are passed to |
|
`numpy.random.default_rng` to instantiate a ``Generator`` before use. |
|
If `rng` is already a ``Generator`` instance, then the provided instance is |
|
used. Specify `rng` for repeatable behavior. |
|
|
|
If this argument is passed by position, if `random_state` is passed by keyword |
|
into the initializer, or if the `random_state` attribute is used directly, |
|
legacy behavior for `random_state` applies: |
|
|
|
- If `random_state` is None (or `numpy.random`), the `numpy.random.RandomState` |
|
singleton is used. |
|
- If `random_state` is an int, a new ``RandomState`` instance is used, |
|
seeded with `random_state`. |
|
- If `random_state` is already a ``Generator`` or ``RandomState`` instance then |
|
that instance is used. |
|
|
|
.. versionchanged:: 1.15.0 |
|
|
|
As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_ |
|
transition from use of `numpy.random.RandomState` to |
|
`numpy.random.Generator`, this attribute name was changed from |
|
`random_state` to `rng`. For an interim period, both names will continue to |
|
work, although only one may be specified at a time. After the interim |
|
period, uses of `random_state` will emit warnings. The behavior of both |
|
`random_state` and `rng` are outlined above, but only `rng` should be used |
|
in new code. |
|
|
|
method : {'BCa', 'percentile', 'basic'} |
|
Whether to use the 'percentile' bootstrap ('percentile'), the 'basic' |
|
(AKA 'reverse') bootstrap ('basic'), or the bias-corrected and |
|
accelerated bootstrap ('BCa', default). |
|
|
|
""" |
|
rng: object |
|
_rng: object = field(init=False, repr=False, default=None) |
|
method: str = 'BCa' |
|
|
|
@property |
|
def random_state(self): |
|
|
|
|
|
return self._random_state |
|
|
|
@random_state.setter |
|
def random_state(self, val): |
|
|
|
|
|
self._random_state = val |
|
|
|
@property |
|
def rng(self): |
|
return self._rng |
|
|
|
def __init__(self, n_resamples=9999, batch=None, random_state=None, |
|
method='BCa', *, rng=None): |
|
|
|
|
|
|
|
self._rng = rng |
|
self._random_state = random_state |
|
self.method = method |
|
super().__init__(n_resamples=n_resamples, batch=batch) |
|
|
|
def _asdict(self): |
|
|
|
d = dict(n_resamples=self.n_resamples, batch=self.batch, |
|
method=self.method) |
|
if self.rng is not None: |
|
d['rng'] = self.rng |
|
if self.random_state is not None: |
|
d['random_state'] = self.random_state |
|
return d |
|
|