File size: 7,961 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
import pytest
from pytest import raises as assert_raises, warns as assert_warns
import numpy as np
from numpy.testing import assert_approx_equal, assert_allclose, assert_equal
from scipy.spatial.distance import cdist
from scipy import stats
class TestMGCErrorWarnings:
""" Tests errors and warnings derived from MGC.
"""
def test_error_notndarray(self):
# raises error if x or y is not a ndarray
x = np.arange(20)
y = [5] * 20
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
assert_raises(ValueError, stats.multiscale_graphcorr, y, x)
def test_error_shape(self):
# raises error if number of samples different (n)
x = np.arange(100).reshape(25, 4)
y = x.reshape(10, 10)
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
def test_error_lowsamples(self):
# raises error if samples are low (< 3)
x = np.arange(3)
y = np.arange(3)
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
def test_error_nans(self):
# raises error if inputs contain NaNs
x = np.arange(20, dtype=float)
x[0] = np.nan
assert_raises(ValueError, stats.multiscale_graphcorr, x, x)
y = np.arange(20)
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
def test_error_wrongdisttype(self):
# raises error if metric is not a function
x = np.arange(20)
compute_distance = 0
assert_raises(ValueError, stats.multiscale_graphcorr, x, x,
compute_distance=compute_distance)
@pytest.mark.parametrize("reps", [
-1, # reps is negative
'1', # reps is not integer
])
def test_error_reps(self, reps):
# raises error if reps is negative
x = np.arange(20)
assert_raises(ValueError, stats.multiscale_graphcorr, x, x, reps=reps)
def test_warns_reps(self):
# raises warning when reps is less than 1000
x = np.arange(20)
reps = 100
assert_warns(RuntimeWarning, stats.multiscale_graphcorr, x, x, reps=reps)
def test_error_infty(self):
# raises error if input contains infinities
x = np.arange(20)
y = np.ones(20) * np.inf
assert_raises(ValueError, stats.multiscale_graphcorr, x, y)
class TestMGCStat:
""" Test validity of MGC test statistic
"""
def _simulations(self, samps=100, dims=1, sim_type=""):
# linear simulation
if sim_type == "linear":
x = np.random.uniform(-1, 1, size=(samps, 1))
y = x + 0.3 * np.random.random_sample(size=(x.size, 1))
# spiral simulation
elif sim_type == "nonlinear":
unif = np.array(np.random.uniform(0, 5, size=(samps, 1)))
x = unif * np.cos(np.pi * unif)
y = (unif * np.sin(np.pi * unif) +
0.4*np.random.random_sample(size=(x.size, 1)))
# independence (tests type I simulation)
elif sim_type == "independence":
u = np.random.normal(0, 1, size=(samps, 1))
v = np.random.normal(0, 1, size=(samps, 1))
u_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
v_2 = np.random.binomial(1, p=0.5, size=(samps, 1))
x = u/3 + 2*u_2 - 1
y = v/3 + 2*v_2 - 1
# raises error if not approved sim_type
else:
raise ValueError("sim_type must be linear, nonlinear, or "
"independence")
# add dimensions of noise for higher dimensions
if dims > 1:
dims_noise = np.random.normal(0, 1, size=(samps, dims-1))
x = np.concatenate((x, dims_noise), axis=1)
return x, y
@pytest.mark.xslow
@pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
("linear", 0.97, 1/1000), # test linear simulation
("nonlinear", 0.163, 1/1000), # test spiral simulation
("independence", -0.0094, 0.78) # test independence simulation
])
def test_oned(self, sim_type, obs_stat, obs_pvalue):
np.random.seed(12345678)
# generate x and y
x, y = self._simulations(samps=100, dims=1, sim_type=sim_type)
# test stat and pvalue
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
assert_approx_equal(stat, obs_stat, significant=1)
assert_approx_equal(pvalue, obs_pvalue, significant=1)
@pytest.mark.xslow
@pytest.mark.parametrize("sim_type, obs_stat, obs_pvalue", [
("linear", 0.184, 1/1000), # test linear simulation
("nonlinear", 0.0190, 0.117), # test spiral simulation
])
def test_fived(self, sim_type, obs_stat, obs_pvalue):
np.random.seed(12345678)
# generate x and y
x, y = self._simulations(samps=100, dims=5, sim_type=sim_type)
# test stat and pvalue
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
assert_approx_equal(stat, obs_stat, significant=1)
assert_approx_equal(pvalue, obs_pvalue, significant=1)
@pytest.mark.xslow
def test_twosamp(self):
np.random.seed(12345678)
# generate x and y
x = np.random.binomial(100, 0.5, size=(100, 5))
y = np.random.normal(0, 1, size=(80, 5))
# test stat and pvalue
stat, pvalue, _ = stats.multiscale_graphcorr(x, y)
assert_approx_equal(stat, 1.0, significant=1)
assert_approx_equal(pvalue, 0.001, significant=1)
# generate x and y
y = np.random.normal(0, 1, size=(100, 5))
# test stat and pvalue
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, is_twosamp=True)
assert_approx_equal(stat, 1.0, significant=1)
assert_approx_equal(pvalue, 0.001, significant=1)
@pytest.mark.xslow
def test_workers(self):
np.random.seed(12345678)
# generate x and y
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
# test stat and pvalue
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, workers=2)
assert_approx_equal(stat, 0.97, significant=1)
assert_approx_equal(pvalue, 0.001, significant=1)
@pytest.mark.xslow
def test_random_state(self):
# generate x and y
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
# test stat and pvalue
stat, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
assert_approx_equal(stat, 0.97, significant=1)
assert_approx_equal(pvalue, 0.001, significant=1)
@pytest.mark.xslow
def test_dist_perm(self):
np.random.seed(12345678)
# generate x and y
x, y = self._simulations(samps=100, dims=1, sim_type="nonlinear")
distx = cdist(x, x, metric="euclidean")
disty = cdist(y, y, metric="euclidean")
stat_dist, pvalue_dist, _ = stats.multiscale_graphcorr(distx, disty,
compute_distance=None,
random_state=1)
assert_approx_equal(stat_dist, 0.163, significant=1)
assert_approx_equal(pvalue_dist, 0.001, significant=1)
@pytest.mark.fail_slow(20) # all other tests are XSLOW; we need at least one to run
@pytest.mark.slow
def test_pvalue_literature(self):
np.random.seed(12345678)
# generate x and y
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
# test stat and pvalue
_, pvalue, _ = stats.multiscale_graphcorr(x, y, random_state=1)
assert_allclose(pvalue, 1/1001)
@pytest.mark.xslow
def test_alias(self):
np.random.seed(12345678)
# generate x and y
x, y = self._simulations(samps=100, dims=1, sim_type="linear")
res = stats.multiscale_graphcorr(x, y, random_state=1)
assert_equal(res.stat, res.statistic)
|