File size: 5,968 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
from typing import final
import pytest
import pandas as pd
import pandas._testing as tm
from pandas.api.types import is_numeric_dtype
class BaseReduceTests:
"""
Reduction specific tests. Generally these only
make sense for numeric/boolean operations.
"""
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
# Specify if we expect this reduction to succeed.
return False
def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool):
# We perform the same operation on the np.float64 data and check
# that the results match. Override if you need to cast to something
# other than float64.
res_op = getattr(ser, op_name)
try:
alt = ser.astype("float64")
except (TypeError, ValueError):
# e.g. Interval can't cast (TypeError), StringArray can't cast
# (ValueError), so let's cast to object and do
# the reduction pointwise
alt = ser.astype(object)
exp_op = getattr(alt, op_name)
if op_name == "count":
result = res_op()
expected = exp_op()
else:
result = res_op(skipna=skipna)
expected = exp_op(skipna=skipna)
tm.assert_almost_equal(result, expected)
def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool):
# Find the expected dtype when the given reduction is done on a DataFrame
# column with this array. The default assumes float64-like behavior,
# i.e. retains the dtype.
return arr.dtype
# We anticipate that authors should not need to override check_reduce_frame,
# but should be able to do any necessary overriding in
# _get_expected_reduction_dtype. If you have a use case where this
# does not hold, please let us know at github.com/pandas-dev/pandas/issues.
@final
def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
# Check that the 2D reduction done in a DataFrame reduction "looks like"
# a wrapped version of the 1D reduction done by Series.
arr = ser.array
df = pd.DataFrame({"a": arr})
kwargs = {"ddof": 1} if op_name in ["var", "std"] else {}
cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna)
# The DataFrame method just calls arr._reduce with keepdims=True,
# so this first check is perfunctory.
result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs)
result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array
tm.assert_extension_array_equal(result1, result2)
# Check that the 2D reduction looks like a wrapped version of the
# 1D reduction
if not skipna and ser.isna().any():
expected = pd.array([pd.NA], dtype=cmp_dtype)
else:
exp_value = getattr(ser.dropna(), op_name)()
expected = pd.array([exp_value], dtype=cmp_dtype)
tm.assert_extension_array_equal(result1, expected)
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna):
op_name = all_boolean_reductions
ser = pd.Series(data)
if not self._supports_reduction(ser, op_name):
# TODO: the message being checked here isn't actually checking anything
msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)
with pytest.raises(TypeError, match=msg):
getattr(ser, op_name)(skipna=skipna)
else:
self.check_reduce(ser, op_name, skipna)
@pytest.mark.filterwarnings("ignore::RuntimeWarning")
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
ser = pd.Series(data)
if not self._supports_reduction(ser, op_name):
# TODO: the message being checked here isn't actually checking anything
msg = (
"[Cc]annot perform|Categorical is not ordered for operation|"
"does not support reduction|"
)
with pytest.raises(TypeError, match=msg):
getattr(ser, op_name)(skipna=skipna)
else:
# min/max with empty produce numpy warnings
self.check_reduce(ser, op_name, skipna)
@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_frame(self, data, all_numeric_reductions, skipna):
op_name = all_numeric_reductions
ser = pd.Series(data)
if not is_numeric_dtype(ser.dtype):
pytest.skip(f"{ser.dtype} is not numeric dtype")
if op_name in ["count", "kurt", "sem"]:
pytest.skip(f"{op_name} not an array method")
if not self._supports_reduction(ser, op_name):
pytest.skip(f"Reduction {op_name} not supported for this dtype")
self.check_reduce_frame(ser, op_name, skipna)
# TODO(3.0): remove BaseNoReduceTests, BaseNumericReduceTests,
# BaseBooleanReduceTests
class BaseNoReduceTests(BaseReduceTests):
"""we don't define any reductions"""
class BaseNumericReduceTests(BaseReduceTests):
# For backward compatibility only, this only runs the numeric reductions
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
if op_name in ["any", "all"]:
pytest.skip("These are tested in BaseBooleanReduceTests")
return True
class BaseBooleanReduceTests(BaseReduceTests):
# For backward compatibility only, this only runs the numeric reductions
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
if op_name not in ["any", "all"]:
pytest.skip("These are tested in BaseNumericReduceTests")
return True
|