|
from datetime import datetime |
|
|
|
import numpy as np |
|
import pytest |
|
|
|
from pandas.core.dtypes.common import is_extension_array_dtype |
|
|
|
import pandas as pd |
|
from pandas import ( |
|
DataFrame, |
|
DatetimeIndex, |
|
Index, |
|
MultiIndex, |
|
NaT, |
|
PeriodIndex, |
|
Series, |
|
TimedeltaIndex, |
|
) |
|
import pandas._testing as tm |
|
from pandas.core.groupby.groupby import DataError |
|
from pandas.core.groupby.grouper import Grouper |
|
from pandas.core.indexes.datetimes import date_range |
|
from pandas.core.indexes.period import period_range |
|
from pandas.core.indexes.timedeltas import timedelta_range |
|
from pandas.core.resample import _asfreq_compat |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DATE_RANGE = (date_range, "dti", datetime(2005, 1, 1), datetime(2005, 1, 10)) |
|
PERIOD_RANGE = (period_range, "pi", datetime(2005, 1, 1), datetime(2005, 1, 10)) |
|
TIMEDELTA_RANGE = (timedelta_range, "tdi", "1 day", "10 day") |
|
|
|
all_ts = pytest.mark.parametrize( |
|
"_index_factory,_series_name,_index_start,_index_end", |
|
[DATE_RANGE, PERIOD_RANGE, TIMEDELTA_RANGE], |
|
) |
|
|
|
|
|
@pytest.fixture |
|
def create_index(_index_factory): |
|
def _create_index(*args, **kwargs): |
|
"""return the _index_factory created using the args, kwargs""" |
|
return _index_factory(*args, **kwargs) |
|
|
|
return _create_index |
|
|
|
|
|
@pytest.mark.parametrize("freq", ["2D", "1h"]) |
|
@pytest.mark.parametrize( |
|
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE] |
|
) |
|
def test_asfreq(series_and_frame, freq, create_index): |
|
obj = series_and_frame |
|
|
|
result = obj.resample(freq).asfreq() |
|
new_index = create_index(obj.index[0], obj.index[-1], freq=freq) |
|
expected = obj.reindex(new_index) |
|
tm.assert_almost_equal(result, expected) |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"_index_factory,_series_name,_index_start,_index_end", [DATE_RANGE, TIMEDELTA_RANGE] |
|
) |
|
def test_asfreq_fill_value(series, create_index): |
|
|
|
|
|
ser = series |
|
|
|
result = ser.resample("1h").asfreq() |
|
new_index = create_index(ser.index[0], ser.index[-1], freq="1h") |
|
expected = ser.reindex(new_index) |
|
tm.assert_series_equal(result, expected) |
|
|
|
|
|
frame = ser.astype("float").to_frame("value") |
|
frame.iloc[1] = None |
|
result = frame.resample("1h").asfreq(fill_value=4.0) |
|
new_index = create_index(frame.index[0], frame.index[-1], freq="1h") |
|
expected = frame.reindex(new_index, fill_value=4.0) |
|
tm.assert_frame_equal(result, expected) |
|
|
|
|
|
@all_ts |
|
def test_resample_interpolate(frame): |
|
|
|
df = frame |
|
warn = None |
|
if isinstance(df.index, PeriodIndex): |
|
warn = FutureWarning |
|
msg = "Resampling with a PeriodIndex is deprecated" |
|
with tm.assert_produces_warning(warn, match=msg): |
|
result = df.resample("1min").asfreq().interpolate() |
|
expected = df.resample("1min").interpolate() |
|
tm.assert_frame_equal(result, expected) |
|
|
|
|
|
def test_raises_on_non_datetimelike_index(): |
|
|
|
xp = DataFrame() |
|
msg = ( |
|
"Only valid with DatetimeIndex, TimedeltaIndex or PeriodIndex, " |
|
"but got an instance of 'RangeIndex'" |
|
) |
|
with pytest.raises(TypeError, match=msg): |
|
xp.resample("YE") |
|
|
|
|
|
@all_ts |
|
@pytest.mark.parametrize("freq", ["ME", "D", "h"]) |
|
def test_resample_empty_series(freq, empty_series_dti, resample_method): |
|
|
|
|
|
ser = empty_series_dti |
|
if freq == "ME" and isinstance(ser.index, TimedeltaIndex): |
|
msg = ( |
|
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, " |
|
"e.g. '24h' or '3D', not <MonthEnd>" |
|
) |
|
with pytest.raises(ValueError, match=msg): |
|
ser.resample(freq) |
|
return |
|
elif freq == "ME" and isinstance(ser.index, PeriodIndex): |
|
|
|
freq = "M" |
|
|
|
warn = None |
|
if isinstance(ser.index, PeriodIndex): |
|
warn = FutureWarning |
|
msg = "Resampling with a PeriodIndex is deprecated" |
|
with tm.assert_produces_warning(warn, match=msg): |
|
rs = ser.resample(freq) |
|
result = getattr(rs, resample_method)() |
|
|
|
if resample_method == "ohlc": |
|
expected = DataFrame( |
|
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"] |
|
) |
|
expected.index = _asfreq_compat(ser.index, freq) |
|
tm.assert_frame_equal(result, expected, check_dtype=False) |
|
else: |
|
expected = ser.copy() |
|
expected.index = _asfreq_compat(ser.index, freq) |
|
tm.assert_series_equal(result, expected, check_dtype=False) |
|
|
|
tm.assert_index_equal(result.index, expected.index) |
|
assert result.index.freq == expected.index.freq |
|
|
|
|
|
@all_ts |
|
@pytest.mark.parametrize( |
|
"freq", |
|
[ |
|
pytest.param("ME", marks=pytest.mark.xfail(reason="Don't know why this fails")), |
|
"D", |
|
"h", |
|
], |
|
) |
|
def test_resample_nat_index_series(freq, series, resample_method): |
|
|
|
|
|
ser = series.copy() |
|
ser.index = PeriodIndex([NaT] * len(ser), freq=freq) |
|
|
|
msg = "Resampling with a PeriodIndex is deprecated" |
|
with tm.assert_produces_warning(FutureWarning, match=msg): |
|
rs = ser.resample(freq) |
|
result = getattr(rs, resample_method)() |
|
|
|
if resample_method == "ohlc": |
|
expected = DataFrame( |
|
[], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"] |
|
) |
|
tm.assert_frame_equal(result, expected, check_dtype=False) |
|
else: |
|
expected = ser[:0].copy() |
|
tm.assert_series_equal(result, expected, check_dtype=False) |
|
tm.assert_index_equal(result.index, expected.index) |
|
assert result.index.freq == expected.index.freq |
|
|
|
|
|
@all_ts |
|
@pytest.mark.parametrize("freq", ["ME", "D", "h"]) |
|
@pytest.mark.parametrize("resample_method", ["count", "size"]) |
|
def test_resample_count_empty_series(freq, empty_series_dti, resample_method): |
|
|
|
ser = empty_series_dti |
|
if freq == "ME" and isinstance(ser.index, TimedeltaIndex): |
|
msg = ( |
|
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, " |
|
"e.g. '24h' or '3D', not <MonthEnd>" |
|
) |
|
with pytest.raises(ValueError, match=msg): |
|
ser.resample(freq) |
|
return |
|
elif freq == "ME" and isinstance(ser.index, PeriodIndex): |
|
|
|
freq = "M" |
|
|
|
warn = None |
|
if isinstance(ser.index, PeriodIndex): |
|
warn = FutureWarning |
|
msg = "Resampling with a PeriodIndex is deprecated" |
|
with tm.assert_produces_warning(warn, match=msg): |
|
rs = ser.resample(freq) |
|
|
|
result = getattr(rs, resample_method)() |
|
|
|
index = _asfreq_compat(ser.index, freq) |
|
|
|
expected = Series([], dtype="int64", index=index, name=ser.name) |
|
|
|
tm.assert_series_equal(result, expected) |
|
|
|
|
|
@all_ts |
|
@pytest.mark.parametrize("freq", ["ME", "D", "h"]) |
|
def test_resample_empty_dataframe(empty_frame_dti, freq, resample_method): |
|
|
|
df = empty_frame_dti |
|
|
|
if freq == "ME" and isinstance(df.index, TimedeltaIndex): |
|
msg = ( |
|
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, " |
|
"e.g. '24h' or '3D', not <MonthEnd>" |
|
) |
|
with pytest.raises(ValueError, match=msg): |
|
df.resample(freq, group_keys=False) |
|
return |
|
elif freq == "ME" and isinstance(df.index, PeriodIndex): |
|
|
|
freq = "M" |
|
|
|
warn = None |
|
if isinstance(df.index, PeriodIndex): |
|
warn = FutureWarning |
|
msg = "Resampling with a PeriodIndex is deprecated" |
|
with tm.assert_produces_warning(warn, match=msg): |
|
rs = df.resample(freq, group_keys=False) |
|
result = getattr(rs, resample_method)() |
|
if resample_method == "ohlc": |
|
|
|
mi = MultiIndex.from_product([df.columns, ["open", "high", "low", "close"]]) |
|
expected = DataFrame( |
|
[], index=df.index[:0].copy(), columns=mi, dtype=np.float64 |
|
) |
|
expected.index = _asfreq_compat(df.index, freq) |
|
|
|
elif resample_method != "size": |
|
expected = df.copy() |
|
else: |
|
|
|
expected = Series([], dtype=np.int64) |
|
|
|
expected.index = _asfreq_compat(df.index, freq) |
|
|
|
tm.assert_index_equal(result.index, expected.index) |
|
assert result.index.freq == expected.index.freq |
|
tm.assert_almost_equal(result, expected) |
|
|
|
|
|
|
|
|
|
@all_ts |
|
@pytest.mark.parametrize("freq", ["ME", "D", "h"]) |
|
def test_resample_count_empty_dataframe(freq, empty_frame_dti): |
|
|
|
|
|
empty_frame_dti["a"] = [] |
|
|
|
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex): |
|
msg = ( |
|
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, " |
|
"e.g. '24h' or '3D', not <MonthEnd>" |
|
) |
|
with pytest.raises(ValueError, match=msg): |
|
empty_frame_dti.resample(freq) |
|
return |
|
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): |
|
|
|
freq = "M" |
|
|
|
warn = None |
|
if isinstance(empty_frame_dti.index, PeriodIndex): |
|
warn = FutureWarning |
|
msg = "Resampling with a PeriodIndex is deprecated" |
|
with tm.assert_produces_warning(warn, match=msg): |
|
rs = empty_frame_dti.resample(freq) |
|
result = rs.count() |
|
|
|
index = _asfreq_compat(empty_frame_dti.index, freq) |
|
|
|
expected = DataFrame(dtype="int64", index=index, columns=Index(["a"], dtype=object)) |
|
|
|
tm.assert_frame_equal(result, expected) |
|
|
|
|
|
@all_ts |
|
@pytest.mark.parametrize("freq", ["ME", "D", "h"]) |
|
def test_resample_size_empty_dataframe(freq, empty_frame_dti): |
|
|
|
|
|
empty_frame_dti["a"] = [] |
|
|
|
if freq == "ME" and isinstance(empty_frame_dti.index, TimedeltaIndex): |
|
msg = ( |
|
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, " |
|
"e.g. '24h' or '3D', not <MonthEnd>" |
|
) |
|
with pytest.raises(ValueError, match=msg): |
|
empty_frame_dti.resample(freq) |
|
return |
|
elif freq == "ME" and isinstance(empty_frame_dti.index, PeriodIndex): |
|
|
|
freq = "M" |
|
|
|
msg = "Resampling with a PeriodIndex" |
|
warn = None |
|
if isinstance(empty_frame_dti.index, PeriodIndex): |
|
warn = FutureWarning |
|
with tm.assert_produces_warning(warn, match=msg): |
|
rs = empty_frame_dti.resample(freq) |
|
result = rs.size() |
|
|
|
index = _asfreq_compat(empty_frame_dti.index, freq) |
|
|
|
expected = Series([], dtype="int64", index=index) |
|
|
|
tm.assert_series_equal(result, expected) |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"index", |
|
[ |
|
PeriodIndex([], freq="M", name="a"), |
|
DatetimeIndex([], name="a"), |
|
TimedeltaIndex([], name="a"), |
|
], |
|
) |
|
@pytest.mark.parametrize("dtype", [float, int, object, "datetime64[ns]"]) |
|
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") |
|
def test_resample_empty_dtypes(index, dtype, resample_method): |
|
|
|
|
|
|
|
warn = None |
|
if isinstance(index, PeriodIndex): |
|
|
|
index = PeriodIndex([], freq="B", name=index.name) |
|
warn = FutureWarning |
|
msg = "Resampling with a PeriodIndex is deprecated" |
|
|
|
empty_series_dti = Series([], index, dtype) |
|
with tm.assert_produces_warning(warn, match=msg): |
|
rs = empty_series_dti.resample("d", group_keys=False) |
|
try: |
|
getattr(rs, resample_method)() |
|
except DataError: |
|
|
|
|
|
pass |
|
|
|
|
|
@all_ts |
|
@pytest.mark.parametrize("freq", ["ME", "D", "h"]) |
|
def test_apply_to_empty_series(empty_series_dti, freq): |
|
|
|
ser = empty_series_dti |
|
|
|
if freq == "ME" and isinstance(empty_series_dti.index, TimedeltaIndex): |
|
msg = ( |
|
"Resampling on a TimedeltaIndex requires fixed-duration `freq`, " |
|
"e.g. '24h' or '3D', not <MonthEnd>" |
|
) |
|
with pytest.raises(ValueError, match=msg): |
|
empty_series_dti.resample(freq) |
|
return |
|
elif freq == "ME" and isinstance(empty_series_dti.index, PeriodIndex): |
|
|
|
freq = "M" |
|
|
|
msg = "Resampling with a PeriodIndex" |
|
warn = None |
|
if isinstance(empty_series_dti.index, PeriodIndex): |
|
warn = FutureWarning |
|
|
|
with tm.assert_produces_warning(warn, match=msg): |
|
rs = ser.resample(freq, group_keys=False) |
|
|
|
result = rs.apply(lambda x: 1) |
|
with tm.assert_produces_warning(warn, match=msg): |
|
expected = ser.resample(freq).apply("sum") |
|
|
|
tm.assert_series_equal(result, expected, check_dtype=False) |
|
|
|
|
|
@all_ts |
|
def test_resampler_is_iterable(series): |
|
|
|
freq = "h" |
|
tg = Grouper(freq=freq, convention="start") |
|
msg = "Resampling with a PeriodIndex" |
|
warn = None |
|
if isinstance(series.index, PeriodIndex): |
|
warn = FutureWarning |
|
|
|
with tm.assert_produces_warning(warn, match=msg): |
|
grouped = series.groupby(tg) |
|
|
|
with tm.assert_produces_warning(warn, match=msg): |
|
resampled = series.resample(freq) |
|
for (rk, rv), (gk, gv) in zip(resampled, grouped): |
|
assert rk == gk |
|
tm.assert_series_equal(rv, gv) |
|
|
|
|
|
@all_ts |
|
def test_resample_quantile(series): |
|
|
|
ser = series |
|
q = 0.75 |
|
freq = "h" |
|
|
|
msg = "Resampling with a PeriodIndex" |
|
warn = None |
|
if isinstance(series.index, PeriodIndex): |
|
warn = FutureWarning |
|
with tm.assert_produces_warning(warn, match=msg): |
|
result = ser.resample(freq).quantile(q) |
|
expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name) |
|
tm.assert_series_equal(result, expected) |
|
|
|
|
|
@pytest.mark.parametrize("how", ["first", "last"]) |
|
def test_first_last_skipna(any_real_nullable_dtype, skipna, how): |
|
|
|
if is_extension_array_dtype(any_real_nullable_dtype): |
|
na_value = Series(dtype=any_real_nullable_dtype).dtype.na_value |
|
else: |
|
na_value = np.nan |
|
df = DataFrame( |
|
{ |
|
"a": [2, 1, 1, 2], |
|
"b": [na_value, 3.0, na_value, 4.0], |
|
"c": [na_value, 3.0, na_value, 4.0], |
|
}, |
|
index=date_range("2020-01-01", periods=4, freq="D"), |
|
dtype=any_real_nullable_dtype, |
|
) |
|
rs = df.resample("ME") |
|
method = getattr(rs, how) |
|
result = method(skipna=skipna) |
|
|
|
gb = df.groupby(df.shape[0] * [pd.to_datetime("2020-01-31")]) |
|
expected = getattr(gb, how)(skipna=skipna) |
|
expected.index.freq = "ME" |
|
tm.assert_frame_equal(result, expected) |
|
|