Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

17.9 kB

	"""
	Collection of tests asserting things that should be true for
	any index subclass except for MultiIndex. Makes use of the `index_flat`
	fixture defined in pandas/conftest.py.
	"""
	from copy import (
	copy,
	deepcopy,
	)
	import re

	import numpy as np
	import pytest

	from pandas.compat import IS64
	from pandas.compat.numpy import np_version_gte1p25

	from pandas.core.dtypes.common import (
	is_integer_dtype,
	is_numeric_dtype,
	)

	import pandas as pd
	from pandas import (
	CategoricalIndex,
	MultiIndex,
	PeriodIndex,
	RangeIndex,
	)
	import pandas._testing as tm


	class TestCommon:
	@pytest.mark.parametrize("name", [None, "new_name"])
	def test_to_frame(self, name, index_flat, using_copy_on_write):
	# see GH#15230, GH#22580
	idx = index_flat

	if name:
	idx_name = name
	else:
	idx_name = idx.name or 0

	df = idx.to_frame(name=idx_name)

	assert df.index is idx
	assert len(df.columns) == 1
	assert df.columns[0] == idx_name
	if not using_copy_on_write:
	assert df[idx_name].values is not idx.values

	df = idx.to_frame(index=False, name=idx_name)
	assert df.index is not idx

	def test_droplevel(self, index_flat):
	# GH 21115
	# MultiIndex is tested separately in test_multi.py
	index = index_flat

	assert index.droplevel([]).equals(index)

	for level in [index.name, [index.name]]:
	if isinstance(index.name, tuple) and level is index.name:
	# GH 21121 : droplevel with tuple name
	continue
	msg = (
	"Cannot remove 1 levels from an index with 1 levels: at least one "
	"level must be left."
	)
	with pytest.raises(ValueError, match=msg):
	index.droplevel(level)

	for level in "wrong", ["wrong"]:
	with pytest.raises(
	KeyError,
	match=r"'Requested level \(wrong\) does not match index name \(None\)'",
	):
	index.droplevel(level)

	def test_constructor_non_hashable_name(self, index_flat):
	# GH 20527
	index = index_flat

	message = "Index.name must be a hashable type"
	renamed = [["1"]]

	# With .rename()
	with pytest.raises(TypeError, match=message):
	index.rename(name=renamed)

	# With .set_names()
	with pytest.raises(TypeError, match=message):
	index.set_names(names=renamed)

	def test_constructor_unwraps_index(self, index_flat):
	a = index_flat
	# Passing dtype is necessary for Index([True, False], dtype=object)
	# case.
	b = type(a)(a, dtype=a.dtype)
	tm.assert_equal(a._data, b._data)

	def test_to_flat_index(self, index_flat):
	# 22866
	index = index_flat

	result = index.to_flat_index()
	tm.assert_index_equal(result, index)

	def test_set_name_methods(self, index_flat):
	# MultiIndex tested separately
	index = index_flat
	new_name = "This is the new name for this index"

	original_name = index.name
	new_ind = index.set_names([new_name])
	assert new_ind.name == new_name
	assert index.name == original_name
	res = index.rename(new_name, inplace=True)

	# should return None
	assert res is None
	assert index.name == new_name
	assert index.names == [new_name]
	with pytest.raises(ValueError, match="Level must be None"):
	index.set_names("a", level=0)

	# rename in place just leaves tuples and other containers alone
	name = ("A", "B")
	index.rename(name, inplace=True)
	assert index.name == name
	assert index.names == [name]

	@pytest.mark.xfail
	def test_set_names_single_label_no_level(self, index_flat):
	with pytest.raises(TypeError, match="list-like"):
	# should still fail even if it would be the right length
	index_flat.set_names("a")

	def test_copy_and_deepcopy(self, index_flat):
	index = index_flat

	for func in (copy, deepcopy):
	idx_copy = func(index)
	assert idx_copy is not index
	assert idx_copy.equals(index)

	new_copy = index.copy(deep=True, name="banana")
	assert new_copy.name == "banana"

	def test_copy_name(self, index_flat):
	# GH#12309: Check that the "name" argument
	# passed at initialization is honored.
	index = index_flat

	first = type(index)(index, copy=True, name="mario")
	second = type(first)(first, copy=False)

	# Even though "copy=False", we want a new object.
	assert first is not second
	tm.assert_index_equal(first, second)

	# Not using tm.assert_index_equal() since names differ.
	assert index.equals(first)

	assert first.name == "mario"
	assert second.name == "mario"

	# TODO: belongs in series arithmetic tests?
	s1 = pd.Series(2, index=first)
	s2 = pd.Series(3, index=second[:-1])
	# See GH#13365
	s3 = s1 * s2
	assert s3.index.name == "mario"

	def test_copy_name2(self, index_flat):
	# GH#35592
	index = index_flat

	assert index.copy(name="mario").name == "mario"

	with pytest.raises(ValueError, match="Length of new names must be 1, got 2"):
	index.copy(name=["mario", "luigi"])

	msg = f"{type(index).__name__}.name must be a hashable type"
	with pytest.raises(TypeError, match=msg):
	index.copy(name=[["mario"]])

	def test_unique_level(self, index_flat):
	# don't test a MultiIndex here (as its tested separated)
	index = index_flat

	# GH 17896
	expected = index.drop_duplicates()
	for level in [0, index.name, None]:
	result = index.unique(level=level)
	tm.assert_index_equal(result, expected)

	msg = "Too many levels: Index has only 1 level, not 4"
	with pytest.raises(IndexError, match=msg):
	index.unique(level=3)

	msg = (
	rf"Requested level \(wrong\) does not match index name "
	rf"\({re.escape(index.name.__repr__())}\)"
	)
	with pytest.raises(KeyError, match=msg):
	index.unique(level="wrong")

	def test_unique(self, index_flat):
	# MultiIndex tested separately
	index = index_flat
	if not len(index):
	pytest.skip("Skip check for empty Index and MultiIndex")

	idx = index[[0] * 5]
	idx_unique = index[[0]]

	# We test against `idx_unique`, so first we make sure it's unique
	# and doesn't contain nans.
	assert idx_unique.is_unique is True
	try:
	assert idx_unique.hasnans is False
	except NotImplementedError:
	pass

	result = idx.unique()
	tm.assert_index_equal(result, idx_unique)

	# nans:
	if not index._can_hold_na:
	pytest.skip("Skip na-check if index cannot hold na")

	vals = index._values[[0] * 5]
	vals[0] = np.nan

	vals_unique = vals[:2]
	idx_nan = index._shallow_copy(vals)
	idx_unique_nan = index._shallow_copy(vals_unique)
	assert idx_unique_nan.is_unique is True

	assert idx_nan.dtype == index.dtype
	assert idx_unique_nan.dtype == index.dtype

	expected = idx_unique_nan
	for pos, i in enumerate([idx_nan, idx_unique_nan]):
	result = i.unique()
	tm.assert_index_equal(result, expected)

	@pytest.mark.filterwarnings("ignore:Period with BDay freq:FutureWarning")
	@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
	def test_searchsorted_monotonic(self, index_flat, request):
	# GH17271
	index = index_flat
	# not implemented for tuple searches in MultiIndex
	# or Intervals searches in IntervalIndex
	if isinstance(index, pd.IntervalIndex):
	mark = pytest.mark.xfail(
	reason="IntervalIndex.searchsorted does not support Interval arg",
	raises=NotImplementedError,
	)
	request.applymarker(mark)

	# nothing to test if the index is empty
	if index.empty:
	pytest.skip("Skip check for empty Index")
	value = index[0]

	# determine the expected results (handle dupes for 'right')
	expected_left, expected_right = 0, (index == value).argmin()
	if expected_right == 0:
	# all values are the same, expected_right should be length
	expected_right = len(index)

	# test _searchsorted_monotonic in all cases
	# test searchsorted only for increasing
	if index.is_monotonic_increasing:
	ssm_left = index._searchsorted_monotonic(value, side="left")
	assert expected_left == ssm_left

	ssm_right = index._searchsorted_monotonic(value, side="right")
	assert expected_right == ssm_right

	ss_left = index.searchsorted(value, side="left")
	assert expected_left == ss_left

	ss_right = index.searchsorted(value, side="right")
	assert expected_right == ss_right

	elif index.is_monotonic_decreasing:
	ssm_left = index._searchsorted_monotonic(value, side="left")
	assert expected_left == ssm_left

	ssm_right = index._searchsorted_monotonic(value, side="right")
	assert expected_right == ssm_right
	else:
	# non-monotonic should raise.
	msg = "index must be monotonic increasing or decreasing"
	with pytest.raises(ValueError, match=msg):
	index._searchsorted_monotonic(value, side="left")

	@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
	def test_drop_duplicates(self, index_flat, keep):
	# MultiIndex is tested separately
	index = index_flat
	if isinstance(index, RangeIndex):
	pytest.skip(
	"RangeIndex is tested in test_drop_duplicates_no_duplicates "
	"as it cannot hold duplicates"
	)
	if len(index) == 0:
	pytest.skip(
	"empty index is tested in test_drop_duplicates_no_duplicates "
	"as it cannot hold duplicates"
	)

	# make unique index
	holder = type(index)
	unique_values = list(set(index))
	dtype = index.dtype if is_numeric_dtype(index) else None
	unique_idx = holder(unique_values, dtype=dtype)

	# make duplicated index
	n = len(unique_idx)
	duplicated_selection = np.random.default_rng(2).choice(n, int(n * 1.5))
	idx = holder(unique_idx.values[duplicated_selection])

	# Series.duplicated is tested separately
	expected_duplicated = (
	pd.Series(duplicated_selection).duplicated(keep=keep).values
	)
	tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected_duplicated)

	# Series.drop_duplicates is tested separately
	expected_dropped = holder(pd.Series(idx).drop_duplicates(keep=keep))
	tm.assert_index_equal(idx.drop_duplicates(keep=keep), expected_dropped)

	@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
	def test_drop_duplicates_no_duplicates(self, index_flat):
	# MultiIndex is tested separately
	index = index_flat

	# make unique index
	if isinstance(index, RangeIndex):
	# RangeIndex cannot have duplicates
	unique_idx = index
	else:
	holder = type(index)
	unique_values = list(set(index))
	dtype = index.dtype if is_numeric_dtype(index) else None
	unique_idx = holder(unique_values, dtype=dtype)

	# check on unique index
	expected_duplicated = np.array([False] * len(unique_idx), dtype="bool")
	tm.assert_numpy_array_equal(unique_idx.duplicated(), expected_duplicated)
	result_dropped = unique_idx.drop_duplicates()
	tm.assert_index_equal(result_dropped, unique_idx)
	# validate shallow copy
	assert result_dropped is not unique_idx

	def test_drop_duplicates_inplace(self, index):
	msg = r"drop_duplicates\(\) got an unexpected keyword argument"
	with pytest.raises(TypeError, match=msg):
	index.drop_duplicates(inplace=True)

	@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
	def test_has_duplicates(self, index_flat):
	# MultiIndex tested separately in:
	# tests/indexes/multi/test_unique_and_duplicates.
	index = index_flat
	holder = type(index)
	if not len(index) or isinstance(index, RangeIndex):
	# MultiIndex tested separately in:
	# tests/indexes/multi/test_unique_and_duplicates.
	# RangeIndex is unique by definition.
	pytest.skip("Skip check for empty Index, MultiIndex, and RangeIndex")

	idx = holder([index[0]] * 5)
	assert idx.is_unique is False
	assert idx.has_duplicates is True

	@pytest.mark.parametrize(
	"dtype",
	["int64", "uint64", "float64", "category", "datetime64[ns]", "timedelta64[ns]"],
	)
	def test_astype_preserves_name(self, index, dtype):
	# https://github.com/pandas-dev/pandas/issues/32013
	if isinstance(index, MultiIndex):
	index.names = ["idx" + str(i) for i in range(index.nlevels)]
	else:
	index.name = "idx"

	warn = None
	if index.dtype.kind == "c" and dtype in ["float64", "int64", "uint64"]:
	# imaginary components discarded
	if np_version_gte1p25:
	warn = np.exceptions.ComplexWarning
	else:
	warn = np.ComplexWarning

	is_pyarrow_str = str(index.dtype) == "string[pyarrow]" and dtype == "category"
	try:
	# Some of these conversions cannot succeed so we use a try / except
	with tm.assert_produces_warning(
	warn,
	raise_on_extra_warnings=is_pyarrow_str,
	check_stacklevel=False,
	):
	result = index.astype(dtype)
	except (ValueError, TypeError, NotImplementedError, SystemError):
	return

	if isinstance(index, MultiIndex):
	assert result.names == index.names
	else:
	assert result.name == index.name

	def test_hasnans_isnans(self, index_flat):
	# GH#11343, added tests for hasnans / isnans
	index = index_flat

	# cases in indices doesn't include NaN
	idx = index.copy(deep=True)
	expected = np.array([False] * len(idx), dtype=bool)
	tm.assert_numpy_array_equal(idx._isnan, expected)
	assert idx.hasnans is False

	idx = index.copy(deep=True)
	values = idx._values

	if len(index) == 0:
	return
	elif is_integer_dtype(index.dtype):
	return
	elif index.dtype == bool:
	# values[1] = np.nan below casts to True!
	return

	values[1] = np.nan

	idx = type(index)(values)

	expected = np.array([False] * len(idx), dtype=bool)
	expected[1] = True
	tm.assert_numpy_array_equal(idx._isnan, expected)
	assert idx.hasnans is True


	@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
	@pytest.mark.parametrize("na_position", [None, "middle"])
	def test_sort_values_invalid_na_position(index_with_missing, na_position):
	with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"):
	index_with_missing.sort_values(na_position=na_position)


	@pytest.mark.fails_arm_wheels
	@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
	@pytest.mark.parametrize("na_position", ["first", "last"])
	def test_sort_values_with_missing(index_with_missing, na_position, request):
	# GH 35584. Test that sort_values works with missing values,
	# sort non-missing and place missing according to na_position

	if isinstance(index_with_missing, CategoricalIndex):
	request.applymarker(
	pytest.mark.xfail(
	reason="missing value sorting order not well-defined", strict=False
	)
	)

	missing_count = np.sum(index_with_missing.isna())
	not_na_vals = index_with_missing[index_with_missing.notna()].values
	sorted_values = np.sort(not_na_vals)
	if na_position == "first":
	sorted_values = np.concatenate([[None] * missing_count, sorted_values])
	else:
	sorted_values = np.concatenate([sorted_values, [None] * missing_count])

	# Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray
	expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype)

	result = index_with_missing.sort_values(na_position=na_position)
	tm.assert_index_equal(result, expected)


	def test_ndarray_compat_properties(index):
	if isinstance(index, PeriodIndex) and not IS64:
	pytest.skip("Overflow")
	idx = index
	assert idx.T.equals(idx)
	assert idx.transpose().equals(idx)

	values = idx.values

	assert idx.shape == values.shape
	assert idx.ndim == values.ndim
	assert idx.size == values.size

	if not isinstance(index, (RangeIndex, MultiIndex)):
	# These two are not backed by an ndarray
	assert idx.nbytes == values.nbytes

	# test for validity
	idx.nbytes
	idx.values.nbytes


	def test_compare_read_only_array():
	# GH#57130
	arr = np.array([], dtype=object)
	arr.flags.writeable = False
	idx = pd.Index(arr)
	result = idx > 69
	assert result.dtype == bool