|
""" |
|
test_indexing tests the following Index methods: |
|
__getitem__ |
|
get_loc |
|
get_value |
|
__contains__ |
|
take |
|
where |
|
get_indexer |
|
get_indexer_for |
|
slice_locs |
|
asof_locs |
|
|
|
The corresponding tests.indexes.[index_type].test_indexing files |
|
contain tests for the corresponding methods specific to those Index subclasses. |
|
""" |
|
import numpy as np |
|
import pytest |
|
|
|
from pandas.errors import InvalidIndexError |
|
|
|
from pandas.core.dtypes.common import ( |
|
is_float_dtype, |
|
is_scalar, |
|
) |
|
|
|
from pandas import ( |
|
NA, |
|
DatetimeIndex, |
|
Index, |
|
IntervalIndex, |
|
MultiIndex, |
|
NaT, |
|
PeriodIndex, |
|
TimedeltaIndex, |
|
) |
|
import pandas._testing as tm |
|
|
|
|
|
class TestTake: |
|
def test_take_invalid_kwargs(self, index): |
|
indices = [1, 2] |
|
|
|
msg = r"take\(\) got an unexpected keyword argument 'foo'" |
|
with pytest.raises(TypeError, match=msg): |
|
index.take(indices, foo=2) |
|
|
|
msg = "the 'out' parameter is not supported" |
|
with pytest.raises(ValueError, match=msg): |
|
index.take(indices, out=indices) |
|
|
|
msg = "the 'mode' parameter is not supported" |
|
with pytest.raises(ValueError, match=msg): |
|
index.take(indices, mode="clip") |
|
|
|
def test_take(self, index): |
|
indexer = [4, 3, 0, 2] |
|
if len(index) < 5: |
|
pytest.skip("Test doesn't make sense since not enough elements") |
|
|
|
result = index.take(indexer) |
|
expected = index[indexer] |
|
assert result.equals(expected) |
|
|
|
if not isinstance(index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): |
|
|
|
msg = r"'(.*Index)' object has no attribute 'freq'" |
|
with pytest.raises(AttributeError, match=msg): |
|
index.freq |
|
|
|
def test_take_indexer_type(self): |
|
|
|
integer_index = Index([0, 1, 2, 3]) |
|
scalar_index = 1 |
|
msg = "Expected indices to be array-like" |
|
with pytest.raises(TypeError, match=msg): |
|
integer_index.take(scalar_index) |
|
|
|
def test_take_minus1_without_fill(self, index): |
|
|
|
if len(index) == 0: |
|
|
|
pytest.skip("Test doesn't make sense for empty index") |
|
|
|
result = index.take([0, 0, -1]) |
|
|
|
expected = index.take([0, 0, len(index) - 1]) |
|
tm.assert_index_equal(result, expected) |
|
|
|
|
|
class TestContains: |
|
@pytest.mark.parametrize( |
|
"index,val", |
|
[ |
|
(Index([0, 1, 2]), 2), |
|
(Index([0, 1, "2"]), "2"), |
|
(Index([0, 1, 2, np.inf, 4]), 4), |
|
(Index([0, 1, 2, np.nan, 4]), 4), |
|
(Index([0, 1, 2, np.inf]), np.inf), |
|
(Index([0, 1, 2, np.nan]), np.nan), |
|
], |
|
) |
|
def test_index_contains(self, index, val): |
|
assert val in index |
|
|
|
@pytest.mark.parametrize( |
|
"index,val", |
|
[ |
|
(Index([0, 1, 2]), "2"), |
|
(Index([0, 1, "2"]), 2), |
|
(Index([0, 1, 2, np.inf]), 4), |
|
(Index([0, 1, 2, np.nan]), 4), |
|
(Index([0, 1, 2, np.inf]), np.nan), |
|
(Index([0, 1, 2, np.nan]), np.inf), |
|
|
|
|
|
(Index([0, 1, 2], dtype=np.int64), np.inf), |
|
(Index([0, 1, 2], dtype=np.int64), np.nan), |
|
(Index([0, 1, 2], dtype=np.uint64), np.inf), |
|
(Index([0, 1, 2], dtype=np.uint64), np.nan), |
|
], |
|
) |
|
def test_index_not_contains(self, index, val): |
|
assert val not in index |
|
|
|
@pytest.mark.parametrize( |
|
"index,val", [(Index([0, 1, "2"]), 0), (Index([0, 1, "2"]), "2")] |
|
) |
|
def test_mixed_index_contains(self, index, val): |
|
|
|
assert val in index |
|
|
|
@pytest.mark.parametrize( |
|
"index,val", [(Index([0, 1, "2"]), "1"), (Index([0, 1, "2"]), 2)] |
|
) |
|
def test_mixed_index_not_contains(self, index, val): |
|
|
|
assert val not in index |
|
|
|
def test_contains_with_float_index(self, any_real_numpy_dtype): |
|
|
|
dtype = any_real_numpy_dtype |
|
data = [0, 1, 2, 3] if not is_float_dtype(dtype) else [0.1, 1.1, 2.2, 3.3] |
|
index = Index(data, dtype=dtype) |
|
|
|
if not is_float_dtype(index.dtype): |
|
assert 1.1 not in index |
|
assert 1.0 in index |
|
assert 1 in index |
|
else: |
|
assert 1.1 in index |
|
assert 1.0 not in index |
|
assert 1 not in index |
|
|
|
def test_contains_requires_hashable_raises(self, index): |
|
if isinstance(index, MultiIndex): |
|
return |
|
|
|
msg = "unhashable type: 'list'" |
|
with pytest.raises(TypeError, match=msg): |
|
[] in index |
|
|
|
msg = "|".join( |
|
[ |
|
r"unhashable type: 'dict'", |
|
r"must be real number, not dict", |
|
r"an integer is required", |
|
r"\{\}", |
|
r"pandas\._libs\.interval\.IntervalTree' is not iterable", |
|
] |
|
) |
|
with pytest.raises(TypeError, match=msg): |
|
{} in index._engine |
|
|
|
|
|
class TestGetLoc: |
|
def test_get_loc_non_hashable(self, index): |
|
with pytest.raises(InvalidIndexError, match="[0, 1]"): |
|
index.get_loc([0, 1]) |
|
|
|
def test_get_loc_non_scalar_hashable(self, index): |
|
|
|
from enum import Enum |
|
|
|
class E(Enum): |
|
X1 = "x1" |
|
|
|
assert not is_scalar(E.X1) |
|
|
|
exc = KeyError |
|
msg = "<E.X1: 'x1'>" |
|
if isinstance( |
|
index, |
|
( |
|
DatetimeIndex, |
|
TimedeltaIndex, |
|
PeriodIndex, |
|
IntervalIndex, |
|
), |
|
): |
|
|
|
exc = InvalidIndexError |
|
msg = "E.X1" |
|
with pytest.raises(exc, match=msg): |
|
index.get_loc(E.X1) |
|
|
|
def test_get_loc_generator(self, index): |
|
exc = KeyError |
|
if isinstance( |
|
index, |
|
( |
|
DatetimeIndex, |
|
TimedeltaIndex, |
|
PeriodIndex, |
|
IntervalIndex, |
|
MultiIndex, |
|
), |
|
): |
|
|
|
exc = InvalidIndexError |
|
with pytest.raises(exc, match="generator object"): |
|
|
|
index.get_loc(x for x in range(5)) |
|
|
|
def test_get_loc_masked_duplicated_na(self): |
|
|
|
idx = Index([1, 2, NA, NA], dtype="Int64") |
|
result = idx.get_loc(NA) |
|
expected = np.array([False, False, True, True]) |
|
tm.assert_numpy_array_equal(result, expected) |
|
|
|
|
|
class TestGetIndexer: |
|
def test_get_indexer_base(self, index): |
|
if index._index_as_unique: |
|
expected = np.arange(index.size, dtype=np.intp) |
|
actual = index.get_indexer(index) |
|
tm.assert_numpy_array_equal(expected, actual) |
|
else: |
|
msg = "Reindexing only valid with uniquely valued Index objects" |
|
with pytest.raises(InvalidIndexError, match=msg): |
|
index.get_indexer(index) |
|
|
|
with pytest.raises(ValueError, match="Invalid fill method"): |
|
index.get_indexer(index, method="invalid") |
|
|
|
def test_get_indexer_consistency(self, index): |
|
|
|
|
|
if index._index_as_unique: |
|
indexer = index.get_indexer(index[0:2]) |
|
assert isinstance(indexer, np.ndarray) |
|
assert indexer.dtype == np.intp |
|
else: |
|
msg = "Reindexing only valid with uniquely valued Index objects" |
|
with pytest.raises(InvalidIndexError, match=msg): |
|
index.get_indexer(index[0:2]) |
|
|
|
indexer, _ = index.get_indexer_non_unique(index[0:2]) |
|
assert isinstance(indexer, np.ndarray) |
|
assert indexer.dtype == np.intp |
|
|
|
def test_get_indexer_masked_duplicated_na(self): |
|
|
|
idx = Index([1, 2, NA, NA], dtype="Int64") |
|
result = idx.get_indexer_for(Index([1, NA], dtype="Int64")) |
|
expected = np.array([0, 2, 3], dtype=result.dtype) |
|
tm.assert_numpy_array_equal(result, expected) |
|
|
|
|
|
class TestConvertSliceIndexer: |
|
def test_convert_almost_null_slice(self, index): |
|
|
|
|
|
key = slice(None, None, "foo") |
|
|
|
if isinstance(index, IntervalIndex): |
|
msg = "label-based slicing with step!=1 is not supported for IntervalIndex" |
|
with pytest.raises(ValueError, match=msg): |
|
index._convert_slice_indexer(key, "loc") |
|
else: |
|
msg = "'>=' not supported between instances of 'str' and 'int'" |
|
with pytest.raises(TypeError, match=msg): |
|
index._convert_slice_indexer(key, "loc") |
|
|
|
|
|
class TestPutmask: |
|
def test_putmask_with_wrong_mask(self, index): |
|
|
|
if not len(index): |
|
pytest.skip("Test doesn't make sense for empty index") |
|
|
|
fill = index[0] |
|
|
|
msg = "putmask: mask and data must be the same size" |
|
with pytest.raises(ValueError, match=msg): |
|
index.putmask(np.ones(len(index) + 1, np.bool_), fill) |
|
|
|
with pytest.raises(ValueError, match=msg): |
|
index.putmask(np.ones(len(index) - 1, np.bool_), fill) |
|
|
|
with pytest.raises(ValueError, match=msg): |
|
index.putmask("foo", fill) |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"idx", [Index([1, 2, 3]), Index([0.1, 0.2, 0.3]), Index(["a", "b", "c"])] |
|
) |
|
def test_getitem_deprecated_float(idx): |
|
|
|
|
|
msg = "Indexing with a float is no longer supported" |
|
with pytest.raises(IndexError, match=msg): |
|
idx[1.0] |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"idx,target,expected", |
|
[ |
|
([np.nan, "var1", np.nan], [np.nan], np.array([0, 2], dtype=np.intp)), |
|
( |
|
[np.nan, "var1", np.nan], |
|
[np.nan, "var1"], |
|
np.array([0, 2, 1], dtype=np.intp), |
|
), |
|
( |
|
np.array([np.nan, "var1", np.nan], dtype=object), |
|
[np.nan], |
|
np.array([0, 2], dtype=np.intp), |
|
), |
|
( |
|
DatetimeIndex(["2020-08-05", NaT, NaT]), |
|
[NaT], |
|
np.array([1, 2], dtype=np.intp), |
|
), |
|
(["a", "b", "a", np.nan], [np.nan], np.array([3], dtype=np.intp)), |
|
( |
|
np.array(["b", np.nan, float("NaN"), "b"], dtype=object), |
|
Index([np.nan], dtype=object), |
|
np.array([1, 2], dtype=np.intp), |
|
), |
|
], |
|
) |
|
def test_get_indexer_non_unique_multiple_nans(idx, target, expected): |
|
|
|
axis = Index(idx) |
|
actual = axis.get_indexer_for(target) |
|
tm.assert_numpy_array_equal(actual, expected) |
|
|
|
|
|
def test_get_indexer_non_unique_nans_in_object_dtype_target(nulls_fixture): |
|
idx = Index([1.0, 2.0]) |
|
target = Index([1, nulls_fixture], dtype="object") |
|
|
|
result_idx, result_missing = idx.get_indexer_non_unique(target) |
|
tm.assert_numpy_array_equal(result_idx, np.array([0, -1], dtype=np.intp)) |
|
tm.assert_numpy_array_equal(result_missing, np.array([1], dtype=np.intp)) |
|
|