|
""" |
|
Collection of tests asserting things that should be true for |
|
any index subclass except for MultiIndex. Makes use of the `index_flat` |
|
fixture defined in pandas/conftest.py. |
|
""" |
|
from copy import ( |
|
copy, |
|
deepcopy, |
|
) |
|
import re |
|
|
|
import numpy as np |
|
import pytest |
|
|
|
from pandas.compat import IS64 |
|
from pandas.compat.numpy import np_version_gte1p25 |
|
|
|
from pandas.core.dtypes.common import ( |
|
is_integer_dtype, |
|
is_numeric_dtype, |
|
) |
|
|
|
import pandas as pd |
|
from pandas import ( |
|
CategoricalIndex, |
|
MultiIndex, |
|
PeriodIndex, |
|
RangeIndex, |
|
) |
|
import pandas._testing as tm |
|
|
|
|
|
class TestCommon: |
|
@pytest.mark.parametrize("name", [None, "new_name"]) |
|
def test_to_frame(self, name, index_flat, using_copy_on_write): |
|
|
|
idx = index_flat |
|
|
|
if name: |
|
idx_name = name |
|
else: |
|
idx_name = idx.name or 0 |
|
|
|
df = idx.to_frame(name=idx_name) |
|
|
|
assert df.index is idx |
|
assert len(df.columns) == 1 |
|
assert df.columns[0] == idx_name |
|
if not using_copy_on_write: |
|
assert df[idx_name].values is not idx.values |
|
|
|
df = idx.to_frame(index=False, name=idx_name) |
|
assert df.index is not idx |
|
|
|
def test_droplevel(self, index_flat): |
|
|
|
|
|
index = index_flat |
|
|
|
assert index.droplevel([]).equals(index) |
|
|
|
for level in [index.name, [index.name]]: |
|
if isinstance(index.name, tuple) and level is index.name: |
|
|
|
continue |
|
msg = ( |
|
"Cannot remove 1 levels from an index with 1 levels: at least one " |
|
"level must be left." |
|
) |
|
with pytest.raises(ValueError, match=msg): |
|
index.droplevel(level) |
|
|
|
for level in "wrong", ["wrong"]: |
|
with pytest.raises( |
|
KeyError, |
|
match=r"'Requested level \(wrong\) does not match index name \(None\)'", |
|
): |
|
index.droplevel(level) |
|
|
|
def test_constructor_non_hashable_name(self, index_flat): |
|
|
|
index = index_flat |
|
|
|
message = "Index.name must be a hashable type" |
|
renamed = [["1"]] |
|
|
|
|
|
with pytest.raises(TypeError, match=message): |
|
index.rename(name=renamed) |
|
|
|
|
|
with pytest.raises(TypeError, match=message): |
|
index.set_names(names=renamed) |
|
|
|
def test_constructor_unwraps_index(self, index_flat): |
|
a = index_flat |
|
|
|
|
|
b = type(a)(a, dtype=a.dtype) |
|
tm.assert_equal(a._data, b._data) |
|
|
|
def test_to_flat_index(self, index_flat): |
|
|
|
index = index_flat |
|
|
|
result = index.to_flat_index() |
|
tm.assert_index_equal(result, index) |
|
|
|
def test_set_name_methods(self, index_flat): |
|
|
|
index = index_flat |
|
new_name = "This is the new name for this index" |
|
|
|
original_name = index.name |
|
new_ind = index.set_names([new_name]) |
|
assert new_ind.name == new_name |
|
assert index.name == original_name |
|
res = index.rename(new_name, inplace=True) |
|
|
|
|
|
assert res is None |
|
assert index.name == new_name |
|
assert index.names == [new_name] |
|
with pytest.raises(ValueError, match="Level must be None"): |
|
index.set_names("a", level=0) |
|
|
|
|
|
name = ("A", "B") |
|
index.rename(name, inplace=True) |
|
assert index.name == name |
|
assert index.names == [name] |
|
|
|
@pytest.mark.xfail |
|
def test_set_names_single_label_no_level(self, index_flat): |
|
with pytest.raises(TypeError, match="list-like"): |
|
|
|
index_flat.set_names("a") |
|
|
|
def test_copy_and_deepcopy(self, index_flat): |
|
index = index_flat |
|
|
|
for func in (copy, deepcopy): |
|
idx_copy = func(index) |
|
assert idx_copy is not index |
|
assert idx_copy.equals(index) |
|
|
|
new_copy = index.copy(deep=True, name="banana") |
|
assert new_copy.name == "banana" |
|
|
|
def test_copy_name(self, index_flat): |
|
|
|
|
|
index = index_flat |
|
|
|
first = type(index)(index, copy=True, name="mario") |
|
second = type(first)(first, copy=False) |
|
|
|
|
|
assert first is not second |
|
tm.assert_index_equal(first, second) |
|
|
|
|
|
assert index.equals(first) |
|
|
|
assert first.name == "mario" |
|
assert second.name == "mario" |
|
|
|
|
|
s1 = pd.Series(2, index=first) |
|
s2 = pd.Series(3, index=second[:-1]) |
|
|
|
s3 = s1 * s2 |
|
assert s3.index.name == "mario" |
|
|
|
def test_copy_name2(self, index_flat): |
|
|
|
index = index_flat |
|
|
|
assert index.copy(name="mario").name == "mario" |
|
|
|
with pytest.raises(ValueError, match="Length of new names must be 1, got 2"): |
|
index.copy(name=["mario", "luigi"]) |
|
|
|
msg = f"{type(index).__name__}.name must be a hashable type" |
|
with pytest.raises(TypeError, match=msg): |
|
index.copy(name=[["mario"]]) |
|
|
|
def test_unique_level(self, index_flat): |
|
|
|
index = index_flat |
|
|
|
|
|
expected = index.drop_duplicates() |
|
for level in [0, index.name, None]: |
|
result = index.unique(level=level) |
|
tm.assert_index_equal(result, expected) |
|
|
|
msg = "Too many levels: Index has only 1 level, not 4" |
|
with pytest.raises(IndexError, match=msg): |
|
index.unique(level=3) |
|
|
|
msg = ( |
|
rf"Requested level \(wrong\) does not match index name " |
|
rf"\({re.escape(index.name.__repr__())}\)" |
|
) |
|
with pytest.raises(KeyError, match=msg): |
|
index.unique(level="wrong") |
|
|
|
def test_unique(self, index_flat): |
|
|
|
index = index_flat |
|
if not len(index): |
|
pytest.skip("Skip check for empty Index and MultiIndex") |
|
|
|
idx = index[[0] * 5] |
|
idx_unique = index[[0]] |
|
|
|
|
|
|
|
assert idx_unique.is_unique is True |
|
try: |
|
assert idx_unique.hasnans is False |
|
except NotImplementedError: |
|
pass |
|
|
|
result = idx.unique() |
|
tm.assert_index_equal(result, idx_unique) |
|
|
|
|
|
if not index._can_hold_na: |
|
pytest.skip("Skip na-check if index cannot hold na") |
|
|
|
vals = index._values[[0] * 5] |
|
vals[0] = np.nan |
|
|
|
vals_unique = vals[:2] |
|
idx_nan = index._shallow_copy(vals) |
|
idx_unique_nan = index._shallow_copy(vals_unique) |
|
assert idx_unique_nan.is_unique is True |
|
|
|
assert idx_nan.dtype == index.dtype |
|
assert idx_unique_nan.dtype == index.dtype |
|
|
|
expected = idx_unique_nan |
|
for pos, i in enumerate([idx_nan, idx_unique_nan]): |
|
result = i.unique() |
|
tm.assert_index_equal(result, expected) |
|
|
|
@pytest.mark.filterwarnings("ignore:Period with BDay freq:FutureWarning") |
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") |
|
def test_searchsorted_monotonic(self, index_flat, request): |
|
|
|
index = index_flat |
|
|
|
|
|
if isinstance(index, pd.IntervalIndex): |
|
mark = pytest.mark.xfail( |
|
reason="IntervalIndex.searchsorted does not support Interval arg", |
|
raises=NotImplementedError, |
|
) |
|
request.applymarker(mark) |
|
|
|
|
|
if index.empty: |
|
pytest.skip("Skip check for empty Index") |
|
value = index[0] |
|
|
|
|
|
expected_left, expected_right = 0, (index == value).argmin() |
|
if expected_right == 0: |
|
|
|
expected_right = len(index) |
|
|
|
|
|
|
|
if index.is_monotonic_increasing: |
|
ssm_left = index._searchsorted_monotonic(value, side="left") |
|
assert expected_left == ssm_left |
|
|
|
ssm_right = index._searchsorted_monotonic(value, side="right") |
|
assert expected_right == ssm_right |
|
|
|
ss_left = index.searchsorted(value, side="left") |
|
assert expected_left == ss_left |
|
|
|
ss_right = index.searchsorted(value, side="right") |
|
assert expected_right == ss_right |
|
|
|
elif index.is_monotonic_decreasing: |
|
ssm_left = index._searchsorted_monotonic(value, side="left") |
|
assert expected_left == ssm_left |
|
|
|
ssm_right = index._searchsorted_monotonic(value, side="right") |
|
assert expected_right == ssm_right |
|
else: |
|
|
|
msg = "index must be monotonic increasing or decreasing" |
|
with pytest.raises(ValueError, match=msg): |
|
index._searchsorted_monotonic(value, side="left") |
|
|
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") |
|
def test_drop_duplicates(self, index_flat, keep): |
|
|
|
index = index_flat |
|
if isinstance(index, RangeIndex): |
|
pytest.skip( |
|
"RangeIndex is tested in test_drop_duplicates_no_duplicates " |
|
"as it cannot hold duplicates" |
|
) |
|
if len(index) == 0: |
|
pytest.skip( |
|
"empty index is tested in test_drop_duplicates_no_duplicates " |
|
"as it cannot hold duplicates" |
|
) |
|
|
|
|
|
holder = type(index) |
|
unique_values = list(set(index)) |
|
dtype = index.dtype if is_numeric_dtype(index) else None |
|
unique_idx = holder(unique_values, dtype=dtype) |
|
|
|
|
|
n = len(unique_idx) |
|
duplicated_selection = np.random.default_rng(2).choice(n, int(n * 1.5)) |
|
idx = holder(unique_idx.values[duplicated_selection]) |
|
|
|
|
|
expected_duplicated = ( |
|
pd.Series(duplicated_selection).duplicated(keep=keep).values |
|
) |
|
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated) |
|
|
|
|
|
expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep)) |
|
tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped) |
|
|
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") |
|
def test_drop_duplicates_no_duplicates(self, index_flat): |
|
|
|
index = index_flat |
|
|
|
|
|
if isinstance(index, RangeIndex): |
|
|
|
unique_idx = index |
|
else: |
|
holder = type(index) |
|
unique_values = list(set(index)) |
|
dtype = index.dtype if is_numeric_dtype(index) else None |
|
unique_idx = holder(unique_values, dtype=dtype) |
|
|
|
|
|
expected_duplicated = np.array([False] * len(unique_idx), dtype="bool") |
|
tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated) |
|
result_dropped = unique_idx.drop_duplicates() |
|
tm.assert_index_equal(result_dropped, unique_idx) |
|
|
|
assert result_dropped is not unique_idx |
|
|
|
def test_drop_duplicates_inplace(self, index): |
|
msg = r"drop_duplicates\(\) got an unexpected keyword argument" |
|
with pytest.raises(TypeError, match=msg): |
|
index.drop_duplicates(inplace=True) |
|
|
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") |
|
def test_has_duplicates(self, index_flat): |
|
|
|
|
|
index = index_flat |
|
holder = type(index) |
|
if not len(index) or isinstance(index, RangeIndex): |
|
|
|
|
|
|
|
pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex") |
|
|
|
idx = holder([index[0]] * 5) |
|
assert idx.is_unique is False |
|
assert idx.has_duplicates is True |
|
|
|
@pytest.mark.parametrize( |
|
"dtype", |
|
["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"], |
|
) |
|
def test_astype_preserves_name(self, index, dtype): |
|
|
|
if isinstance(index, MultiIndex): |
|
index.names = ["idx" + str(i) for i in range(index.nlevels)] |
|
else: |
|
index.name = "idx" |
|
|
|
warn = None |
|
if index.dtype.kind == "c" and dtype in ["float64", "int64", "uint64"]: |
|
|
|
if np_version_gte1p25: |
|
warn = np.exceptions.ComplexWarning |
|
else: |
|
warn = np.ComplexWarning |
|
|
|
is_pyarrow_str = str(index.dtype) == "string[pyarrow]" and dtype == "category" |
|
try: |
|
|
|
with tm.assert_produces_warning( |
|
warn, |
|
raise_on_extra_warnings=is_pyarrow_str, |
|
check_stacklevel=False, |
|
): |
|
result = index.astype(dtype) |
|
except (ValueError, TypeError, NotImplementedError, SystemError): |
|
return |
|
|
|
if isinstance(index, MultiIndex): |
|
assert result.names == index.names |
|
else: |
|
assert result.name == index.name |
|
|
|
def test_hasnans_isnans(self, index_flat): |
|
|
|
index = index_flat |
|
|
|
|
|
idx = index.copy(deep=True) |
|
expected = np.array([False] * len(idx), dtype=bool) |
|
tm.assert_numpy_array_equal(idx._isnan, expected) |
|
assert idx.hasnans is False |
|
|
|
idx = index.copy(deep=True) |
|
values = idx._values |
|
|
|
if len(index) == 0: |
|
return |
|
elif is_integer_dtype(index.dtype): |
|
return |
|
elif index.dtype == bool: |
|
|
|
return |
|
|
|
values[1] = np.nan |
|
|
|
idx = type(index)(values) |
|
|
|
expected = np.array([False] * len(idx), dtype=bool) |
|
expected[1] = True |
|
tm.assert_numpy_array_equal(idx._isnan, expected) |
|
assert idx.hasnans is True |
|
|
|
|
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") |
|
@pytest.mark.parametrize("na_position", [None, "middle"]) |
|
def test_sort_values_invalid_na_position(index_with_missing, na_position): |
|
with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): |
|
index_with_missing.sort_values(na_position=na_position) |
|
|
|
|
|
@pytest.mark.fails_arm_wheels |
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") |
|
@pytest.mark.parametrize("na_position", ["first", "last"]) |
|
def test_sort_values_with_missing(index_with_missing, na_position, request): |
|
|
|
|
|
|
|
if isinstance(index_with_missing, CategoricalIndex): |
|
request.applymarker( |
|
pytest.mark.xfail( |
|
reason="missing value sorting order not well-defined", strict=False |
|
) |
|
) |
|
|
|
missing_count = np.sum(index_with_missing.isna()) |
|
not_na_vals = index_with_missing[index_with_missing.notna()].values |
|
sorted_values = np.sort(not_na_vals) |
|
if na_position == "first": |
|
sorted_values = np.concatenate([[None] * missing_count, sorted_values]) |
|
else: |
|
sorted_values = np.concatenate([sorted_values, [None] * missing_count]) |
|
|
|
|
|
expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype) |
|
|
|
result = index_with_missing.sort_values(na_position=na_position) |
|
tm.assert_index_equal(result, expected) |
|
|
|
|
|
def test_ndarray_compat_properties(index): |
|
if isinstance(index, PeriodIndex) and not IS64: |
|
pytest.skip("Overflow") |
|
idx = index |
|
assert idx.T.equals(idx) |
|
assert idx.transpose().equals(idx) |
|
|
|
values = idx.values |
|
|
|
assert idx.shape == values.shape |
|
assert idx.ndim == values.ndim |
|
assert idx.size == values.size |
|
|
|
if not isinstance(index, (RangeIndex, MultiIndex)): |
|
|
|
assert idx.nbytes == values.nbytes |
|
|
|
|
|
idx.nbytes |
|
idx.values.nbytes |
|
|
|
|
|
def test_compare_read_only_array(): |
|
|
|
arr = np.array([], dtype=object) |
|
arr.flags.writeable = False |
|
idx = pd.Index(arr) |
|
result = idx > 69 |
|
assert result.dtype == bool |
|
|