|
from copy import deepcopy |
|
import inspect |
|
import pydoc |
|
|
|
import numpy as np |
|
import pytest |
|
|
|
from pandas._config import using_pyarrow_string_dtype |
|
from pandas._config.config import option_context |
|
|
|
import pandas as pd |
|
from pandas import ( |
|
DataFrame, |
|
Series, |
|
date_range, |
|
timedelta_range, |
|
) |
|
import pandas._testing as tm |
|
|
|
|
|
class TestDataFrameMisc: |
|
def test_getitem_pop_assign_name(self, float_frame): |
|
s = float_frame["A"] |
|
assert s.name == "A" |
|
|
|
s = float_frame.pop("A") |
|
assert s.name == "A" |
|
|
|
s = float_frame.loc[:, "B"] |
|
assert s.name == "B" |
|
|
|
s2 = s.loc[:] |
|
assert s2.name == "B" |
|
|
|
def test_get_axis(self, float_frame): |
|
f = float_frame |
|
assert f._get_axis_number(0) == 0 |
|
assert f._get_axis_number(1) == 1 |
|
assert f._get_axis_number("index") == 0 |
|
assert f._get_axis_number("rows") == 0 |
|
assert f._get_axis_number("columns") == 1 |
|
|
|
assert f._get_axis_name(0) == "index" |
|
assert f._get_axis_name(1) == "columns" |
|
assert f._get_axis_name("index") == "index" |
|
assert f._get_axis_name("rows") == "index" |
|
assert f._get_axis_name("columns") == "columns" |
|
|
|
assert f._get_axis(0) is f.index |
|
assert f._get_axis(1) is f.columns |
|
|
|
with pytest.raises(ValueError, match="No axis named"): |
|
f._get_axis_number(2) |
|
|
|
with pytest.raises(ValueError, match="No axis.*foo"): |
|
f._get_axis_name("foo") |
|
|
|
with pytest.raises(ValueError, match="No axis.*None"): |
|
f._get_axis_name(None) |
|
|
|
with pytest.raises(ValueError, match="No axis named"): |
|
f._get_axis_number(None) |
|
|
|
def test_column_contains_raises(self, float_frame): |
|
with pytest.raises(TypeError, match="unhashable type: 'Index'"): |
|
float_frame.columns in float_frame |
|
|
|
def test_tab_completion(self): |
|
|
|
df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD")) |
|
for key in list("ABCD"): |
|
assert key in dir(df) |
|
assert isinstance(df.__getitem__("A"), Series) |
|
|
|
|
|
|
|
df = DataFrame( |
|
[list("abcd"), list("efgh")], |
|
columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))), |
|
) |
|
for key in list("ABCD"): |
|
assert key in dir(df) |
|
for key in list("EFGH"): |
|
assert key not in dir(df) |
|
assert isinstance(df.__getitem__("A"), DataFrame) |
|
|
|
def test_display_max_dir_items(self): |
|
|
|
columns = ["a" + str(i) for i in range(420)] |
|
values = [range(420), range(420)] |
|
df = DataFrame(values, columns=columns) |
|
|
|
|
|
assert "a99" in dir(df) |
|
assert "a100" not in dir(df) |
|
|
|
with option_context("display.max_dir_items", 300): |
|
df = DataFrame(values, columns=columns) |
|
assert "a299" in dir(df) |
|
assert "a300" not in dir(df) |
|
|
|
with option_context("display.max_dir_items", None): |
|
df = DataFrame(values, columns=columns) |
|
assert "a419" in dir(df) |
|
|
|
def test_not_hashable(self): |
|
empty_frame = DataFrame() |
|
|
|
df = DataFrame([1]) |
|
msg = "unhashable type: 'DataFrame'" |
|
with pytest.raises(TypeError, match=msg): |
|
hash(df) |
|
with pytest.raises(TypeError, match=msg): |
|
hash(empty_frame) |
|
|
|
@pytest.mark.xfail(using_pyarrow_string_dtype(), reason="surrogates not allowed") |
|
def test_column_name_contains_unicode_surrogate(self): |
|
|
|
colname = "\ud83d" |
|
df = DataFrame({colname: []}) |
|
|
|
assert colname not in dir(df) |
|
assert df.columns[0] == colname |
|
|
|
def test_new_empty_index(self): |
|
df1 = DataFrame(np.random.default_rng(2).standard_normal((0, 3))) |
|
df2 = DataFrame(np.random.default_rng(2).standard_normal((0, 3))) |
|
df1.index.name = "foo" |
|
assert df2.index.name is None |
|
|
|
def test_get_agg_axis(self, float_frame): |
|
cols = float_frame._get_agg_axis(0) |
|
assert cols is float_frame.columns |
|
|
|
idx = float_frame._get_agg_axis(1) |
|
assert idx is float_frame.index |
|
|
|
msg = r"Axis must be 0 or 1 \(got 2\)" |
|
with pytest.raises(ValueError, match=msg): |
|
float_frame._get_agg_axis(2) |
|
|
|
def test_empty(self, float_frame, float_string_frame): |
|
empty_frame = DataFrame() |
|
assert empty_frame.empty |
|
|
|
assert not float_frame.empty |
|
assert not float_string_frame.empty |
|
|
|
|
|
df = DataFrame({"A": [1.0, 2.0, 3.0], "B": ["a", "b", "c"]}, index=np.arange(3)) |
|
del df["A"] |
|
assert not df.empty |
|
|
|
def test_len(self, float_frame): |
|
assert len(float_frame) == len(float_frame.index) |
|
|
|
|
|
arr = float_frame[["A", "B"]].values |
|
expected = float_frame.reindex(columns=["A", "B"]).values |
|
tm.assert_almost_equal(arr, expected) |
|
|
|
def test_axis_aliases(self, float_frame): |
|
f = float_frame |
|
|
|
|
|
expected = f.sum(axis=0) |
|
result = f.sum(axis="index") |
|
tm.assert_series_equal(result, expected) |
|
|
|
expected = f.sum(axis=1) |
|
result = f.sum(axis="columns") |
|
tm.assert_series_equal(result, expected) |
|
|
|
def test_class_axis(self): |
|
|
|
|
|
assert pydoc.getdoc(DataFrame.index) |
|
assert pydoc.getdoc(DataFrame.columns) |
|
|
|
def test_series_put_names(self, float_string_frame): |
|
series = float_string_frame._series |
|
for k, v in series.items(): |
|
assert v.name == k |
|
|
|
def test_empty_nonzero(self): |
|
df = DataFrame([1, 2, 3]) |
|
assert not df.empty |
|
df = DataFrame(index=[1], columns=[1]) |
|
assert not df.empty |
|
df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna() |
|
assert df.empty |
|
assert df.T.empty |
|
|
|
@pytest.mark.parametrize( |
|
"df", |
|
[ |
|
DataFrame(), |
|
DataFrame(index=[1]), |
|
DataFrame(columns=[1]), |
|
DataFrame({1: []}), |
|
], |
|
) |
|
def test_empty_like(self, df): |
|
assert df.empty |
|
assert df.T.empty |
|
|
|
def test_with_datetimelikes(self): |
|
df = DataFrame( |
|
{ |
|
"A": date_range("20130101", periods=10), |
|
"B": timedelta_range("1 day", periods=10), |
|
} |
|
) |
|
t = df.T |
|
|
|
result = t.dtypes.value_counts() |
|
expected = Series({np.dtype("object"): 10}, name="count") |
|
tm.assert_series_equal(result, expected) |
|
|
|
def test_deepcopy(self, float_frame): |
|
cp = deepcopy(float_frame) |
|
cp.loc[0, "A"] = 10 |
|
assert not float_frame.equals(cp) |
|
|
|
def test_inplace_return_self(self): |
|
|
|
|
|
data = DataFrame( |
|
{"a": ["foo", "bar", "baz", "qux"], "b": [0, 0, 1, 1], "c": [1, 2, 3, 4]} |
|
) |
|
|
|
def _check_f(base, f): |
|
result = f(base) |
|
assert result is None |
|
|
|
|
|
|
|
|
|
f = lambda x: x.set_index("a", inplace=True) |
|
_check_f(data.copy(), f) |
|
|
|
|
|
f = lambda x: x.reset_index(inplace=True) |
|
_check_f(data.set_index("a"), f) |
|
|
|
|
|
f = lambda x: x.drop_duplicates(inplace=True) |
|
_check_f(data.copy(), f) |
|
|
|
|
|
f = lambda x: x.sort_values("b", inplace=True) |
|
_check_f(data.copy(), f) |
|
|
|
|
|
f = lambda x: x.sort_index(inplace=True) |
|
_check_f(data.copy(), f) |
|
|
|
|
|
f = lambda x: x.fillna(0, inplace=True) |
|
_check_f(data.copy(), f) |
|
|
|
|
|
f = lambda x: x.replace(1, 0, inplace=True) |
|
_check_f(data.copy(), f) |
|
|
|
|
|
f = lambda x: x.rename({1: "foo"}, inplace=True) |
|
_check_f(data.copy(), f) |
|
|
|
|
|
d = data.copy()["c"] |
|
|
|
|
|
f = lambda x: x.reset_index(inplace=True, drop=True) |
|
_check_f(data.set_index("a")["c"], f) |
|
|
|
|
|
f = lambda x: x.fillna(0, inplace=True) |
|
_check_f(d.copy(), f) |
|
|
|
|
|
f = lambda x: x.replace(1, 0, inplace=True) |
|
_check_f(d.copy(), f) |
|
|
|
|
|
f = lambda x: x.rename({1: "foo"}, inplace=True) |
|
_check_f(d.copy(), f) |
|
|
|
def test_tab_complete_warning(self, ip, frame_or_series): |
|
|
|
pytest.importorskip("IPython", minversion="6.0.0") |
|
from IPython.core.completer import provisionalcompleter |
|
|
|
if frame_or_series is DataFrame: |
|
code = "from pandas import DataFrame; obj = DataFrame()" |
|
else: |
|
code = "from pandas import Series; obj = Series(dtype=object)" |
|
|
|
ip.run_cell(code) |
|
|
|
|
|
with tm.assert_produces_warning(None, raise_on_extra_warnings=False): |
|
with provisionalcompleter("ignore"): |
|
list(ip.Completer.completions("obj.", 1)) |
|
|
|
def test_attrs(self): |
|
df = DataFrame({"A": [2, 3]}) |
|
assert df.attrs == {} |
|
df.attrs["version"] = 1 |
|
|
|
result = df.rename(columns=str) |
|
assert result.attrs == {"version": 1} |
|
|
|
def test_attrs_deepcopy(self): |
|
df = DataFrame({"A": [2, 3]}) |
|
assert df.attrs == {} |
|
df.attrs["tags"] = {"spam", "ham"} |
|
|
|
result = df.rename(columns=str) |
|
assert result.attrs == df.attrs |
|
assert result.attrs["tags"] is not df.attrs["tags"] |
|
|
|
@pytest.mark.parametrize("allows_duplicate_labels", [True, False, None]) |
|
def test_set_flags( |
|
self, |
|
allows_duplicate_labels, |
|
frame_or_series, |
|
using_copy_on_write, |
|
warn_copy_on_write, |
|
): |
|
obj = DataFrame({"A": [1, 2]}) |
|
key = (0, 0) |
|
if frame_or_series is Series: |
|
obj = obj["A"] |
|
key = 0 |
|
|
|
result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels) |
|
|
|
if allows_duplicate_labels is None: |
|
|
|
assert result.flags.allows_duplicate_labels is True |
|
else: |
|
assert result.flags.allows_duplicate_labels is allows_duplicate_labels |
|
|
|
|
|
assert obj is not result |
|
|
|
|
|
assert obj.flags.allows_duplicate_labels is True |
|
|
|
|
|
if frame_or_series is Series: |
|
assert np.may_share_memory(obj.values, result.values) |
|
else: |
|
assert np.may_share_memory(obj["A"].values, result["A"].values) |
|
|
|
with tm.assert_cow_warning(warn_copy_on_write): |
|
result.iloc[key] = 0 |
|
if using_copy_on_write: |
|
assert obj.iloc[key] == 1 |
|
else: |
|
assert obj.iloc[key] == 0 |
|
|
|
with tm.assert_cow_warning(warn_copy_on_write): |
|
result.iloc[key] = 1 |
|
|
|
|
|
result = obj.set_flags( |
|
copy=True, allows_duplicate_labels=allows_duplicate_labels |
|
) |
|
result.iloc[key] = 10 |
|
assert obj.iloc[key] == 1 |
|
|
|
def test_constructor_expanddim(self): |
|
|
|
|
|
|
|
df = DataFrame() |
|
|
|
msg = "'DataFrame' object has no attribute '_constructor_expanddim'" |
|
with pytest.raises(AttributeError, match=msg): |
|
df._constructor_expanddim(np.arange(27).reshape(3, 3, 3)) |
|
|
|
def test_inspect_getmembers(self): |
|
|
|
pytest.importorskip("jinja2") |
|
df = DataFrame() |
|
msg = "DataFrame._data is deprecated" |
|
with tm.assert_produces_warning( |
|
DeprecationWarning, match=msg, check_stacklevel=False |
|
): |
|
inspect.getmembers(df) |
|
|