|
import numpy as np |
|
import pytest |
|
|
|
import pandas.util._test_decorators as td |
|
|
|
from pandas import ( |
|
DataFrame, |
|
Index, |
|
) |
|
import pandas._testing as tm |
|
|
|
pytestmark = [td.skip_if_no("numba"), pytest.mark.single_cpu] |
|
|
|
|
|
@pytest.fixture(params=[0, 1]) |
|
def apply_axis(request): |
|
return request.param |
|
|
|
|
|
def test_numba_vs_python_noop(float_frame, apply_axis): |
|
func = lambda x: x |
|
result = float_frame.apply(func, engine="numba", axis=apply_axis) |
|
expected = float_frame.apply(func, engine="python", axis=apply_axis) |
|
tm.assert_frame_equal(result, expected) |
|
|
|
|
|
def test_numba_vs_python_string_index(): |
|
|
|
pytest.importorskip("pyarrow") |
|
df = DataFrame( |
|
1, |
|
index=Index(["a", "b"], dtype="string[pyarrow_numpy]"), |
|
columns=Index(["x", "y"], dtype="string[pyarrow_numpy]"), |
|
) |
|
func = lambda x: x |
|
result = df.apply(func, engine="numba", axis=0) |
|
expected = df.apply(func, engine="python", axis=0) |
|
tm.assert_frame_equal( |
|
result, expected, check_column_type=False, check_index_type=False |
|
) |
|
|
|
|
|
def test_numba_vs_python_indexing(): |
|
frame = DataFrame( |
|
{"a": [1, 2, 3], "b": [4, 5, 6], "c": [7.0, 8.0, 9.0]}, |
|
index=Index(["A", "B", "C"]), |
|
) |
|
row_func = lambda x: x["c"] |
|
result = frame.apply(row_func, engine="numba", axis=1) |
|
expected = frame.apply(row_func, engine="python", axis=1) |
|
tm.assert_series_equal(result, expected) |
|
|
|
col_func = lambda x: x["A"] |
|
result = frame.apply(col_func, engine="numba", axis=0) |
|
expected = frame.apply(col_func, engine="python", axis=0) |
|
tm.assert_series_equal(result, expected) |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"reduction", |
|
[lambda x: x.mean(), lambda x: x.min(), lambda x: x.max(), lambda x: x.sum()], |
|
) |
|
def test_numba_vs_python_reductions(reduction, apply_axis): |
|
df = DataFrame(np.ones((4, 4), dtype=np.float64)) |
|
result = df.apply(reduction, engine="numba", axis=apply_axis) |
|
expected = df.apply(reduction, engine="python", axis=apply_axis) |
|
tm.assert_series_equal(result, expected) |
|
|
|
|
|
@pytest.mark.parametrize("colnames", [[1, 2, 3], [1.0, 2.0, 3.0]]) |
|
def test_numba_numeric_colnames(colnames): |
|
|
|
df = DataFrame( |
|
np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.int64), columns=colnames |
|
) |
|
first_col = colnames[0] |
|
f = lambda x: x[first_col] |
|
result = df.apply(f, engine="numba", axis=1) |
|
expected = df.apply(f, engine="python", axis=1) |
|
tm.assert_series_equal(result, expected) |
|
|
|
|
|
def test_numba_parallel_unsupported(float_frame): |
|
f = lambda x: x |
|
with pytest.raises( |
|
NotImplementedError, |
|
match="Parallel apply is not supported when raw=False and engine='numba'", |
|
): |
|
float_frame.apply(f, engine="numba", engine_kwargs={"parallel": True}) |
|
|
|
|
|
def test_numba_nonunique_unsupported(apply_axis): |
|
f = lambda x: x |
|
df = DataFrame({"a": [1, 2]}, index=Index(["a", "a"])) |
|
with pytest.raises( |
|
NotImplementedError, |
|
match="The index/columns must be unique when raw=False and engine='numba'", |
|
): |
|
df.apply(f, engine="numba", axis=apply_axis) |
|
|
|
|
|
def test_numba_unsupported_dtypes(apply_axis): |
|
f = lambda x: x |
|
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]}) |
|
df["c"] = df["c"].astype("double[pyarrow]") |
|
|
|
with pytest.raises( |
|
ValueError, |
|
match="Column b must have a numeric dtype. Found 'object|string' instead", |
|
): |
|
df.apply(f, engine="numba", axis=apply_axis) |
|
|
|
with pytest.raises( |
|
ValueError, |
|
match="Column c is backed by an extension array, " |
|
"which is not supported by the numba engine.", |
|
): |
|
df["c"].to_frame().apply(f, engine="numba", axis=apply_axis) |
|
|