|
from __future__ import annotations |
|
|
|
from functools import reduce |
|
from itertools import product |
|
import operator |
|
|
|
import numpy as np |
|
import pytest |
|
|
|
from pandas.compat import PY312 |
|
from pandas.errors import ( |
|
NumExprClobberingError, |
|
PerformanceWarning, |
|
UndefinedVariableError, |
|
) |
|
import pandas.util._test_decorators as td |
|
|
|
from pandas.core.dtypes.common import ( |
|
is_bool, |
|
is_float, |
|
is_list_like, |
|
is_scalar, |
|
) |
|
|
|
import pandas as pd |
|
from pandas import ( |
|
DataFrame, |
|
Index, |
|
Series, |
|
date_range, |
|
period_range, |
|
timedelta_range, |
|
) |
|
import pandas._testing as tm |
|
from pandas.core.computation import ( |
|
expr, |
|
pytables, |
|
) |
|
from pandas.core.computation.engines import ENGINES |
|
from pandas.core.computation.expr import ( |
|
BaseExprVisitor, |
|
PandasExprVisitor, |
|
PythonExprVisitor, |
|
) |
|
from pandas.core.computation.expressions import ( |
|
NUMEXPR_INSTALLED, |
|
USE_NUMEXPR, |
|
) |
|
from pandas.core.computation.ops import ( |
|
ARITH_OPS_SYMS, |
|
SPECIAL_CASE_ARITH_OPS_SYMS, |
|
_binary_math_ops, |
|
_binary_ops_dict, |
|
_unary_math_ops, |
|
) |
|
from pandas.core.computation.scope import DEFAULT_GLOBALS |
|
|
|
|
|
@pytest.fixture( |
|
params=( |
|
pytest.param( |
|
engine, |
|
marks=[ |
|
pytest.mark.skipif( |
|
engine == "numexpr" and not USE_NUMEXPR, |
|
reason=f"numexpr enabled->{USE_NUMEXPR}, " |
|
f"installed->{NUMEXPR_INSTALLED}", |
|
), |
|
td.skip_if_no("numexpr"), |
|
], |
|
) |
|
for engine in ENGINES |
|
) |
|
) |
|
def engine(request): |
|
return request.param |
|
|
|
|
|
@pytest.fixture(params=expr.PARSERS) |
|
def parser(request): |
|
return request.param |
|
|
|
|
|
def _eval_single_bin(lhs, cmp1, rhs, engine): |
|
c = _binary_ops_dict[cmp1] |
|
if ENGINES[engine].has_neg_frac: |
|
try: |
|
return c(lhs, rhs) |
|
except ValueError as e: |
|
if str(e).startswith( |
|
"negative number cannot be raised to a fractional power" |
|
): |
|
return np.nan |
|
raise |
|
return c(lhs, rhs) |
|
|
|
|
|
|
|
@pytest.fixture( |
|
params=list(range(5)), |
|
ids=["DataFrame", "Series", "SeriesNaN", "DataFrameNaN", "float"], |
|
) |
|
def lhs(request): |
|
nan_df1 = DataFrame(np.random.default_rng(2).standard_normal((10, 5))) |
|
nan_df1[nan_df1 > 0.5] = np.nan |
|
|
|
opts = ( |
|
DataFrame(np.random.default_rng(2).standard_normal((10, 5))), |
|
Series(np.random.default_rng(2).standard_normal(5)), |
|
Series([1, 2, np.nan, np.nan, 5]), |
|
nan_df1, |
|
np.random.default_rng(2).standard_normal(), |
|
) |
|
return opts[request.param] |
|
|
|
|
|
rhs = lhs |
|
midhs = lhs |
|
|
|
|
|
@pytest.fixture |
|
def idx_func_dict(): |
|
return { |
|
"i": lambda n: Index(np.arange(n), dtype=np.int64), |
|
"f": lambda n: Index(np.arange(n), dtype=np.float64), |
|
"s": lambda n: Index([f"{i}_{chr(i)}" for i in range(97, 97 + n)]), |
|
"dt": lambda n: date_range("2020-01-01", periods=n), |
|
"td": lambda n: timedelta_range("1 day", periods=n), |
|
"p": lambda n: period_range("2020-01-01", periods=n, freq="D"), |
|
} |
|
|
|
|
|
class TestEval: |
|
@pytest.mark.parametrize( |
|
"cmp1", |
|
["!=", "==", "<=", ">=", "<", ">"], |
|
ids=["ne", "eq", "le", "ge", "lt", "gt"], |
|
) |
|
@pytest.mark.parametrize("cmp2", [">", "<"], ids=["gt", "lt"]) |
|
@pytest.mark.parametrize("binop", expr.BOOL_OPS_SYMS) |
|
def test_complex_cmp_ops(self, cmp1, cmp2, binop, lhs, rhs, engine, parser): |
|
if parser == "python" and binop in ["and", "or"]: |
|
msg = "'BoolOp' nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" |
|
pd.eval(ex, engine=engine, parser=parser) |
|
return |
|
|
|
lhs_new = _eval_single_bin(lhs, cmp1, rhs, engine) |
|
rhs_new = _eval_single_bin(lhs, cmp2, rhs, engine) |
|
expected = _eval_single_bin(lhs_new, binop, rhs_new, engine) |
|
|
|
ex = f"(lhs {cmp1} rhs) {binop} (lhs {cmp2} rhs)" |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
tm.assert_equal(result, expected) |
|
|
|
@pytest.mark.parametrize("cmp_op", expr.CMP_OPS_SYMS) |
|
def test_simple_cmp_ops(self, cmp_op, lhs, rhs, engine, parser): |
|
lhs = lhs < 0 |
|
rhs = rhs < 0 |
|
|
|
if parser == "python" and cmp_op in ["in", "not in"]: |
|
msg = "'(In|NotIn)' nodes are not implemented" |
|
|
|
with pytest.raises(NotImplementedError, match=msg): |
|
ex = f"lhs {cmp_op} rhs" |
|
pd.eval(ex, engine=engine, parser=parser) |
|
return |
|
|
|
ex = f"lhs {cmp_op} rhs" |
|
msg = "|".join( |
|
[ |
|
r"only list-like( or dict-like)? objects are allowed to be " |
|
r"passed to (DataFrame\.)?isin\(\), you passed a " |
|
r"(`|')bool(`|')", |
|
"argument of type 'bool' is not iterable", |
|
] |
|
) |
|
if cmp_op in ("in", "not in") and not is_list_like(rhs): |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval( |
|
ex, |
|
engine=engine, |
|
parser=parser, |
|
local_dict={"lhs": lhs, "rhs": rhs}, |
|
) |
|
else: |
|
expected = _eval_single_bin(lhs, cmp_op, rhs, engine) |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
tm.assert_equal(result, expected) |
|
|
|
@pytest.mark.parametrize("op", expr.CMP_OPS_SYMS) |
|
def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser): |
|
if parser == "python" and op in ["in", "not in"]: |
|
msg = "'(In|NotIn)' nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
ex = f"~(lhs {op} rhs)" |
|
pd.eval(ex, engine=engine, parser=parser) |
|
return |
|
|
|
if ( |
|
is_float(lhs) |
|
and not is_float(rhs) |
|
and op in ["in", "not in"] |
|
and engine == "python" |
|
and parser == "pandas" |
|
): |
|
mark = pytest.mark.xfail( |
|
reason="Looks like expected is negative, unclear whether " |
|
"expected is incorrect or result is incorrect" |
|
) |
|
request.applymarker(mark) |
|
skip_these = ["in", "not in"] |
|
ex = f"~(lhs {op} rhs)" |
|
|
|
msg = "|".join( |
|
[ |
|
r"only list-like( or dict-like)? objects are allowed to be " |
|
r"passed to (DataFrame\.)?isin\(\), you passed a " |
|
r"(`|')float(`|')", |
|
"argument of type 'float' is not iterable", |
|
] |
|
) |
|
if is_scalar(rhs) and op in skip_these: |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval( |
|
ex, |
|
engine=engine, |
|
parser=parser, |
|
local_dict={"lhs": lhs, "rhs": rhs}, |
|
) |
|
else: |
|
|
|
if is_scalar(lhs) and is_scalar(rhs): |
|
lhs, rhs = (np.array([x]) for x in (lhs, rhs)) |
|
expected = _eval_single_bin(lhs, op, rhs, engine) |
|
if is_scalar(expected): |
|
expected = not expected |
|
else: |
|
expected = ~expected |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
tm.assert_almost_equal(expected, result) |
|
|
|
@pytest.mark.parametrize("cmp1", ["<", ">"]) |
|
@pytest.mark.parametrize("cmp2", ["<", ">"]) |
|
def test_chained_cmp_op(self, cmp1, cmp2, lhs, midhs, rhs, engine, parser): |
|
mid = midhs |
|
if parser == "python": |
|
ex1 = f"lhs {cmp1} mid {cmp2} rhs" |
|
msg = "'BoolOp' nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(ex1, engine=engine, parser=parser) |
|
return |
|
|
|
lhs_new = _eval_single_bin(lhs, cmp1, mid, engine) |
|
rhs_new = _eval_single_bin(mid, cmp2, rhs, engine) |
|
|
|
if lhs_new is not None and rhs_new is not None: |
|
ex1 = f"lhs {cmp1} mid {cmp2} rhs" |
|
ex2 = f"lhs {cmp1} mid and mid {cmp2} rhs" |
|
ex3 = f"(lhs {cmp1} mid) & (mid {cmp2} rhs)" |
|
expected = _eval_single_bin(lhs_new, "&", rhs_new, engine) |
|
|
|
for ex in (ex1, ex2, ex3): |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
|
|
tm.assert_almost_equal(result, expected) |
|
|
|
@pytest.mark.parametrize( |
|
"arith1", sorted(set(ARITH_OPS_SYMS).difference(SPECIAL_CASE_ARITH_OPS_SYMS)) |
|
) |
|
def test_binary_arith_ops(self, arith1, lhs, rhs, engine, parser): |
|
ex = f"lhs {arith1} rhs" |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
expected = _eval_single_bin(lhs, arith1, rhs, engine) |
|
|
|
tm.assert_almost_equal(result, expected) |
|
ex = f"lhs {arith1} rhs {arith1} rhs" |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
nlhs = _eval_single_bin(lhs, arith1, rhs, engine) |
|
try: |
|
nlhs, ghs = nlhs.align(rhs) |
|
except (ValueError, TypeError, AttributeError): |
|
|
|
|
|
return |
|
else: |
|
if engine == "numexpr": |
|
import numexpr as ne |
|
|
|
|
|
expected = ne.evaluate(f"nlhs {arith1} ghs") |
|
|
|
|
|
|
|
|
|
tm.assert_almost_equal(result.values, expected) |
|
else: |
|
expected = eval(f"nlhs {arith1} ghs") |
|
tm.assert_almost_equal(result, expected) |
|
|
|
|
|
|
|
def test_modulus(self, lhs, rhs, engine, parser): |
|
ex = r"lhs % rhs" |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
expected = lhs % rhs |
|
tm.assert_almost_equal(result, expected) |
|
|
|
if engine == "numexpr": |
|
import numexpr as ne |
|
|
|
expected = ne.evaluate(r"expected % rhs") |
|
if isinstance(result, (DataFrame, Series)): |
|
tm.assert_almost_equal(result.values, expected) |
|
else: |
|
tm.assert_almost_equal(result, expected.item()) |
|
else: |
|
expected = _eval_single_bin(expected, "%", rhs, engine) |
|
tm.assert_almost_equal(result, expected) |
|
|
|
def test_floor_division(self, lhs, rhs, engine, parser): |
|
ex = "lhs // rhs" |
|
|
|
if engine == "python": |
|
res = pd.eval(ex, engine=engine, parser=parser) |
|
expected = lhs // rhs |
|
tm.assert_equal(res, expected) |
|
else: |
|
msg = ( |
|
r"unsupported operand type\(s\) for //: 'VariableNode' and " |
|
"'VariableNode'" |
|
) |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval( |
|
ex, |
|
local_dict={"lhs": lhs, "rhs": rhs}, |
|
engine=engine, |
|
parser=parser, |
|
) |
|
|
|
@td.skip_if_windows |
|
def test_pow(self, lhs, rhs, engine, parser): |
|
|
|
ex = "lhs ** rhs" |
|
expected = _eval_single_bin(lhs, "**", rhs, engine) |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
|
|
if ( |
|
is_scalar(lhs) |
|
and is_scalar(rhs) |
|
and isinstance(expected, (complex, np.complexfloating)) |
|
and np.isnan(result) |
|
): |
|
msg = "(DataFrame.columns|numpy array) are different" |
|
with pytest.raises(AssertionError, match=msg): |
|
tm.assert_numpy_array_equal(result, expected) |
|
else: |
|
tm.assert_almost_equal(result, expected) |
|
|
|
ex = "(lhs ** rhs) ** rhs" |
|
result = pd.eval(ex, engine=engine, parser=parser) |
|
|
|
middle = _eval_single_bin(lhs, "**", rhs, engine) |
|
expected = _eval_single_bin(middle, "**", rhs, engine) |
|
tm.assert_almost_equal(result, expected) |
|
|
|
def test_check_single_invert_op(self, lhs, engine, parser): |
|
|
|
try: |
|
elb = lhs.astype(bool) |
|
except AttributeError: |
|
elb = np.array([bool(lhs)]) |
|
expected = ~elb |
|
result = pd.eval("~elb", engine=engine, parser=parser) |
|
tm.assert_almost_equal(expected, result) |
|
|
|
def test_frame_invert(self, engine, parser): |
|
expr = "~lhs" |
|
|
|
|
|
|
|
|
|
lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2))) |
|
if engine == "numexpr": |
|
msg = "couldn't find matching opcode for 'invert_dd'" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
else: |
|
msg = "ufunc 'invert' not supported for the input types" |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
|
|
|
|
lhs = DataFrame(np.random.default_rng(2).integers(5, size=(5, 2))) |
|
if engine == "numexpr": |
|
msg = "couldn't find matching opcode for 'invert" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
else: |
|
expect = ~lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_frame_equal(expect, result) |
|
|
|
|
|
lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5) |
|
expect = ~lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_frame_equal(expect, result) |
|
|
|
|
|
lhs = DataFrame( |
|
{"b": ["a", 1, 2.0], "c": np.random.default_rng(2).standard_normal(3) > 0.5} |
|
) |
|
if engine == "numexpr": |
|
with pytest.raises(ValueError, match="unknown type object"): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
else: |
|
msg = "bad operand type for unary ~: 'str'" |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
|
|
def test_series_invert(self, engine, parser): |
|
|
|
expr = "~lhs" |
|
|
|
|
|
|
|
lhs = Series(np.random.default_rng(2).standard_normal(5)) |
|
if engine == "numexpr": |
|
msg = "couldn't find matching opcode for 'invert_dd'" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
else: |
|
msg = "ufunc 'invert' not supported for the input types" |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
|
|
|
|
lhs = Series(np.random.default_rng(2).integers(5, size=5)) |
|
if engine == "numexpr": |
|
msg = "couldn't find matching opcode for 'invert" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
else: |
|
expect = ~lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_series_equal(expect, result) |
|
|
|
|
|
lhs = Series(np.random.default_rng(2).standard_normal(5) > 0.5) |
|
expect = ~lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_series_equal(expect, result) |
|
|
|
|
|
|
|
|
|
|
|
|
|
lhs = Series(["a", 1, 2.0]) |
|
if engine == "numexpr": |
|
with pytest.raises(ValueError, match="unknown type object"): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
else: |
|
msg = "bad operand type for unary ~: 'str'" |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
|
|
def test_frame_negate(self, engine, parser): |
|
expr = "-lhs" |
|
|
|
|
|
lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2))) |
|
expect = -lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_frame_equal(expect, result) |
|
|
|
|
|
lhs = DataFrame(np.random.default_rng(2).integers(5, size=(5, 2))) |
|
expect = -lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_frame_equal(expect, result) |
|
|
|
|
|
lhs = DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5) |
|
if engine == "numexpr": |
|
msg = "couldn't find matching opcode for 'neg_bb'" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
else: |
|
expect = -lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_frame_equal(expect, result) |
|
|
|
def test_series_negate(self, engine, parser): |
|
expr = "-lhs" |
|
|
|
|
|
lhs = Series(np.random.default_rng(2).standard_normal(5)) |
|
expect = -lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_series_equal(expect, result) |
|
|
|
|
|
lhs = Series(np.random.default_rng(2).integers(5, size=5)) |
|
expect = -lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_series_equal(expect, result) |
|
|
|
|
|
lhs = Series(np.random.default_rng(2).standard_normal(5) > 0.5) |
|
if engine == "numexpr": |
|
msg = "couldn't find matching opcode for 'neg_bb'" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(expr, engine=engine, parser=parser) |
|
else: |
|
expect = -lhs |
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_series_equal(expect, result) |
|
|
|
@pytest.mark.parametrize( |
|
"lhs", |
|
[ |
|
|
|
DataFrame(np.random.default_rng(2).standard_normal((5, 2))), |
|
|
|
DataFrame(np.random.default_rng(2).integers(5, size=(5, 2))), |
|
|
|
DataFrame(np.random.default_rng(2).standard_normal((5, 2)) > 0.5), |
|
], |
|
) |
|
def test_frame_pos(self, lhs, engine, parser): |
|
expr = "+lhs" |
|
expect = lhs |
|
|
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_frame_equal(expect, result) |
|
|
|
@pytest.mark.parametrize( |
|
"lhs", |
|
[ |
|
|
|
Series(np.random.default_rng(2).standard_normal(5)), |
|
|
|
Series(np.random.default_rng(2).integers(5, size=5)), |
|
|
|
Series(np.random.default_rng(2).standard_normal(5) > 0.5), |
|
], |
|
) |
|
def test_series_pos(self, lhs, engine, parser): |
|
expr = "+lhs" |
|
expect = lhs |
|
|
|
result = pd.eval(expr, engine=engine, parser=parser) |
|
tm.assert_series_equal(expect, result) |
|
|
|
def test_scalar_unary(self, engine, parser): |
|
msg = "bad operand type for unary ~: 'float'" |
|
warn = None |
|
if PY312 and not (engine == "numexpr" and parser == "pandas"): |
|
warn = DeprecationWarning |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval("~1.0", engine=engine, parser=parser) |
|
|
|
assert pd.eval("-1.0", parser=parser, engine=engine) == -1.0 |
|
assert pd.eval("+1.0", parser=parser, engine=engine) == +1.0 |
|
assert pd.eval("~1", parser=parser, engine=engine) == ~1 |
|
assert pd.eval("-1", parser=parser, engine=engine) == -1 |
|
assert pd.eval("+1", parser=parser, engine=engine) == +1 |
|
with tm.assert_produces_warning( |
|
warn, match="Bitwise inversion", check_stacklevel=False |
|
): |
|
assert pd.eval("~True", parser=parser, engine=engine) == ~True |
|
with tm.assert_produces_warning( |
|
warn, match="Bitwise inversion", check_stacklevel=False |
|
): |
|
assert pd.eval("~False", parser=parser, engine=engine) == ~False |
|
assert pd.eval("-True", parser=parser, engine=engine) == -True |
|
assert pd.eval("-False", parser=parser, engine=engine) == -False |
|
assert pd.eval("+True", parser=parser, engine=engine) == +True |
|
assert pd.eval("+False", parser=parser, engine=engine) == +False |
|
|
|
def test_unary_in_array(self): |
|
|
|
|
|
|
|
result = np.array( |
|
pd.eval("[-True, True, +True, -False, False, +False, -37, 37, ~37, +37]"), |
|
dtype=np.object_, |
|
) |
|
expected = np.array( |
|
[ |
|
-True, |
|
True, |
|
+True, |
|
-False, |
|
False, |
|
+False, |
|
-37, |
|
37, |
|
~37, |
|
+37, |
|
], |
|
dtype=np.object_, |
|
) |
|
tm.assert_numpy_array_equal(result, expected) |
|
|
|
@pytest.mark.parametrize("dtype", [np.float32, np.float64]) |
|
@pytest.mark.parametrize("expr", ["x < -0.1", "-5 > x"]) |
|
def test_float_comparison_bin_op(self, dtype, expr): |
|
|
|
df = DataFrame({"x": np.array([0], dtype=dtype)}) |
|
res = df.eval(expr) |
|
assert res.values == np.array([False]) |
|
|
|
def test_unary_in_function(self): |
|
|
|
df = DataFrame({"x": [0, 1, np.nan]}) |
|
|
|
result = df.eval("x.fillna(-1)") |
|
expected = df.x.fillna(-1) |
|
|
|
|
|
tm.assert_series_equal(result, expected, check_names=not USE_NUMEXPR) |
|
|
|
result = df.eval("x.shift(1, fill_value=-1)") |
|
expected = df.x.shift(1, fill_value=-1) |
|
tm.assert_series_equal(result, expected, check_names=not USE_NUMEXPR) |
|
|
|
@pytest.mark.parametrize( |
|
"ex", |
|
( |
|
"1 or 2", |
|
"1 and 2", |
|
"a and b", |
|
"a or b", |
|
"1 or 2 and (3 + 2) > 3", |
|
"2 * x > 2 or 1 and 2", |
|
"2 * df > 3 and 1 or a", |
|
), |
|
) |
|
def test_disallow_scalar_bool_ops(self, ex, engine, parser): |
|
x, a, b = np.random.default_rng(2).standard_normal(3), 1, 2 |
|
df = DataFrame(np.random.default_rng(2).standard_normal((3, 2))) |
|
|
|
msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(ex, engine=engine, parser=parser) |
|
|
|
def test_identical(self, engine, parser): |
|
|
|
x = 1 |
|
result = pd.eval("x", engine=engine, parser=parser) |
|
assert result == 1 |
|
assert is_scalar(result) |
|
|
|
x = 1.5 |
|
result = pd.eval("x", engine=engine, parser=parser) |
|
assert result == 1.5 |
|
assert is_scalar(result) |
|
|
|
x = False |
|
result = pd.eval("x", engine=engine, parser=parser) |
|
assert not result |
|
assert is_bool(result) |
|
assert is_scalar(result) |
|
|
|
x = np.array([1]) |
|
result = pd.eval("x", engine=engine, parser=parser) |
|
tm.assert_numpy_array_equal(result, np.array([1])) |
|
assert result.shape == (1,) |
|
|
|
x = np.array([1.5]) |
|
result = pd.eval("x", engine=engine, parser=parser) |
|
tm.assert_numpy_array_equal(result, np.array([1.5])) |
|
assert result.shape == (1,) |
|
|
|
x = np.array([False]) |
|
result = pd.eval("x", engine=engine, parser=parser) |
|
tm.assert_numpy_array_equal(result, np.array([False])) |
|
assert result.shape == (1,) |
|
|
|
def test_line_continuation(self, engine, parser): |
|
|
|
exp = """1 + 2 * \ |
|
5 - 1 + 2 """ |
|
result = pd.eval(exp, engine=engine, parser=parser) |
|
assert result == 12 |
|
|
|
def test_float_truncation(self, engine, parser): |
|
|
|
exp = "1000000000.006" |
|
result = pd.eval(exp, engine=engine, parser=parser) |
|
expected = np.float64(exp) |
|
assert result == expected |
|
|
|
df = DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]}) |
|
cutoff = 1000000000.0006 |
|
result = df.query(f"A < {cutoff:.4f}") |
|
assert result.empty |
|
|
|
cutoff = 1000000000.0010 |
|
result = df.query(f"A > {cutoff:.4f}") |
|
expected = df.loc[[1, 2], :] |
|
tm.assert_frame_equal(expected, result) |
|
|
|
exact = 1000000000.0011 |
|
result = df.query(f"A == {exact:.4f}") |
|
expected = df.loc[[1], :] |
|
tm.assert_frame_equal(expected, result) |
|
|
|
def test_disallow_python_keywords(self): |
|
|
|
df = DataFrame([[0, 0, 0]], columns=["foo", "bar", "class"]) |
|
msg = "Python keyword not valid identifier in numexpr query" |
|
with pytest.raises(SyntaxError, match=msg): |
|
df.query("class == 0") |
|
|
|
df = DataFrame() |
|
df.index.name = "lambda" |
|
with pytest.raises(SyntaxError, match=msg): |
|
df.query("lambda == 0") |
|
|
|
def test_true_false_logic(self): |
|
|
|
|
|
with tm.maybe_produces_warning( |
|
DeprecationWarning, PY312, check_stacklevel=False |
|
): |
|
assert pd.eval("not True") == -2 |
|
assert pd.eval("not False") == -1 |
|
assert pd.eval("True and not True") == 0 |
|
|
|
def test_and_logic_string_match(self): |
|
|
|
event = Series({"a": "hello"}) |
|
assert pd.eval(f"{event.str.match('hello').a}") |
|
assert pd.eval(f"{event.str.match('hello').a and event.str.match('hello').a}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestTypeCasting: |
|
@pytest.mark.parametrize("op", ["+", "-", "*", "**", "/"]) |
|
|
|
|
|
@pytest.mark.parametrize("left_right", [("df", "3"), ("3", "df")]) |
|
def test_binop_typecasting( |
|
self, engine, parser, op, complex_or_float_dtype, left_right, request |
|
): |
|
|
|
dtype = complex_or_float_dtype |
|
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3)), dtype=dtype) |
|
left, right = left_right |
|
s = f"{left} {op} {right}" |
|
res = pd.eval(s, engine=engine, parser=parser) |
|
if dtype == "complex64" and engine == "numexpr": |
|
mark = pytest.mark.xfail( |
|
reason="numexpr issue with complex that are upcast " |
|
"to complex 128 " |
|
"https://github.com/pydata/numexpr/issues/492" |
|
) |
|
request.applymarker(mark) |
|
assert df.values.dtype == dtype |
|
assert res.values.dtype == dtype |
|
tm.assert_frame_equal(res, eval(s), check_exact=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def should_warn(*args): |
|
not_mono = not any(map(operator.attrgetter("is_monotonic_increasing"), args)) |
|
only_one_dt = reduce( |
|
operator.xor, (issubclass(x.dtype.type, np.datetime64) for x in args) |
|
) |
|
return not_mono and only_one_dt |
|
|
|
|
|
class TestAlignment: |
|
index_types = ["i", "s", "dt"] |
|
lhs_index_types = index_types + ["s"] |
|
|
|
def test_align_nested_unary_op(self, engine, parser): |
|
s = "df * ~2" |
|
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) |
|
res = pd.eval(s, engine=engine, parser=parser) |
|
tm.assert_frame_equal(res, df * ~2) |
|
|
|
@pytest.mark.filterwarnings("always::RuntimeWarning") |
|
@pytest.mark.parametrize("lr_idx_type", lhs_index_types) |
|
@pytest.mark.parametrize("rr_idx_type", index_types) |
|
@pytest.mark.parametrize("c_idx_type", index_types) |
|
def test_basic_frame_alignment( |
|
self, engine, parser, lr_idx_type, rr_idx_type, c_idx_type, idx_func_dict |
|
): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((10, 10)), |
|
index=idx_func_dict[lr_idx_type](10), |
|
columns=idx_func_dict[c_idx_type](10), |
|
) |
|
df2 = DataFrame( |
|
np.random.default_rng(2).standard_normal((20, 10)), |
|
index=idx_func_dict[rr_idx_type](20), |
|
columns=idx_func_dict[c_idx_type](10), |
|
) |
|
|
|
if should_warn(df.index, df2.index): |
|
with tm.assert_produces_warning(RuntimeWarning): |
|
res = pd.eval("df + df2", engine=engine, parser=parser) |
|
else: |
|
res = pd.eval("df + df2", engine=engine, parser=parser) |
|
tm.assert_frame_equal(res, df + df2) |
|
|
|
@pytest.mark.parametrize("r_idx_type", lhs_index_types) |
|
@pytest.mark.parametrize("c_idx_type", lhs_index_types) |
|
def test_frame_comparison( |
|
self, engine, parser, r_idx_type, c_idx_type, idx_func_dict |
|
): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((10, 10)), |
|
index=idx_func_dict[r_idx_type](10), |
|
columns=idx_func_dict[c_idx_type](10), |
|
) |
|
res = pd.eval("df < 2", engine=engine, parser=parser) |
|
tm.assert_frame_equal(res, df < 2) |
|
|
|
df3 = DataFrame( |
|
np.random.default_rng(2).standard_normal(df.shape), |
|
index=df.index, |
|
columns=df.columns, |
|
) |
|
res = pd.eval("df < df3", engine=engine, parser=parser) |
|
tm.assert_frame_equal(res, df < df3) |
|
|
|
@pytest.mark.filterwarnings("ignore::RuntimeWarning") |
|
@pytest.mark.parametrize("r1", lhs_index_types) |
|
@pytest.mark.parametrize("c1", index_types) |
|
@pytest.mark.parametrize("r2", index_types) |
|
@pytest.mark.parametrize("c2", index_types) |
|
def test_medium_complex_frame_alignment( |
|
self, engine, parser, r1, c1, r2, c2, idx_func_dict |
|
): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((3, 2)), |
|
index=idx_func_dict[r1](3), |
|
columns=idx_func_dict[c1](2), |
|
) |
|
df2 = DataFrame( |
|
np.random.default_rng(2).standard_normal((4, 2)), |
|
index=idx_func_dict[r2](4), |
|
columns=idx_func_dict[c2](2), |
|
) |
|
df3 = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), |
|
index=idx_func_dict[r2](5), |
|
columns=idx_func_dict[c2](2), |
|
) |
|
if should_warn(df.index, df2.index, df3.index): |
|
with tm.assert_produces_warning(RuntimeWarning): |
|
res = pd.eval("df + df2 + df3", engine=engine, parser=parser) |
|
else: |
|
res = pd.eval("df + df2 + df3", engine=engine, parser=parser) |
|
tm.assert_frame_equal(res, df + df2 + df3) |
|
|
|
@pytest.mark.filterwarnings("ignore::RuntimeWarning") |
|
@pytest.mark.parametrize("index_name", ["index", "columns"]) |
|
@pytest.mark.parametrize("c_idx_type", index_types) |
|
@pytest.mark.parametrize("r_idx_type", lhs_index_types) |
|
def test_basic_frame_series_alignment( |
|
self, engine, parser, index_name, r_idx_type, c_idx_type, idx_func_dict |
|
): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((10, 10)), |
|
index=idx_func_dict[r_idx_type](10), |
|
columns=idx_func_dict[c_idx_type](10), |
|
) |
|
index = getattr(df, index_name) |
|
s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) |
|
|
|
if should_warn(df.index, s.index): |
|
with tm.assert_produces_warning(RuntimeWarning): |
|
res = pd.eval("df + s", engine=engine, parser=parser) |
|
else: |
|
res = pd.eval("df + s", engine=engine, parser=parser) |
|
|
|
if r_idx_type == "dt" or c_idx_type == "dt": |
|
expected = df.add(s) if engine == "numexpr" else df + s |
|
else: |
|
expected = df + s |
|
tm.assert_frame_equal(res, expected) |
|
|
|
@pytest.mark.parametrize("index_name", ["index", "columns"]) |
|
@pytest.mark.parametrize( |
|
"r_idx_type, c_idx_type", |
|
list(product(["i", "s"], ["i", "s"])) + [("dt", "dt")], |
|
) |
|
@pytest.mark.filterwarnings("ignore::RuntimeWarning") |
|
def test_basic_series_frame_alignment( |
|
self, request, engine, parser, index_name, r_idx_type, c_idx_type, idx_func_dict |
|
): |
|
if ( |
|
engine == "numexpr" |
|
and parser in ("pandas", "python") |
|
and index_name == "index" |
|
and r_idx_type == "i" |
|
and c_idx_type == "s" |
|
): |
|
reason = ( |
|
f"Flaky column ordering when engine={engine}, " |
|
f"parser={parser}, index_name={index_name}, " |
|
f"r_idx_type={r_idx_type}, c_idx_type={c_idx_type}" |
|
) |
|
request.applymarker(pytest.mark.xfail(reason=reason, strict=False)) |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((10, 7)), |
|
index=idx_func_dict[r_idx_type](10), |
|
columns=idx_func_dict[c_idx_type](7), |
|
) |
|
index = getattr(df, index_name) |
|
s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) |
|
if should_warn(s.index, df.index): |
|
with tm.assert_produces_warning(RuntimeWarning): |
|
res = pd.eval("s + df", engine=engine, parser=parser) |
|
else: |
|
res = pd.eval("s + df", engine=engine, parser=parser) |
|
|
|
if r_idx_type == "dt" or c_idx_type == "dt": |
|
expected = df.add(s) if engine == "numexpr" else s + df |
|
else: |
|
expected = s + df |
|
tm.assert_frame_equal(res, expected) |
|
|
|
@pytest.mark.filterwarnings("ignore::RuntimeWarning") |
|
@pytest.mark.parametrize("c_idx_type", index_types) |
|
@pytest.mark.parametrize("r_idx_type", lhs_index_types) |
|
@pytest.mark.parametrize("index_name", ["index", "columns"]) |
|
@pytest.mark.parametrize("op", ["+", "*"]) |
|
def test_series_frame_commutativity( |
|
self, engine, parser, index_name, op, r_idx_type, c_idx_type, idx_func_dict |
|
): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((10, 10)), |
|
index=idx_func_dict[r_idx_type](10), |
|
columns=idx_func_dict[c_idx_type](10), |
|
) |
|
index = getattr(df, index_name) |
|
s = Series(np.random.default_rng(2).standard_normal(5), index[:5]) |
|
|
|
lhs = f"s {op} df" |
|
rhs = f"df {op} s" |
|
if should_warn(df.index, s.index): |
|
with tm.assert_produces_warning(RuntimeWarning): |
|
a = pd.eval(lhs, engine=engine, parser=parser) |
|
with tm.assert_produces_warning(RuntimeWarning): |
|
b = pd.eval(rhs, engine=engine, parser=parser) |
|
else: |
|
a = pd.eval(lhs, engine=engine, parser=parser) |
|
b = pd.eval(rhs, engine=engine, parser=parser) |
|
|
|
if r_idx_type != "dt" and c_idx_type != "dt": |
|
if engine == "numexpr": |
|
tm.assert_frame_equal(a, b) |
|
|
|
@pytest.mark.filterwarnings("always::RuntimeWarning") |
|
@pytest.mark.parametrize("r1", lhs_index_types) |
|
@pytest.mark.parametrize("c1", index_types) |
|
@pytest.mark.parametrize("r2", index_types) |
|
@pytest.mark.parametrize("c2", index_types) |
|
def test_complex_series_frame_alignment( |
|
self, engine, parser, r1, c1, r2, c2, idx_func_dict |
|
): |
|
n = 3 |
|
m1 = 5 |
|
m2 = 2 * m1 |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((m1, n)), |
|
index=idx_func_dict[r1](m1), |
|
columns=idx_func_dict[c1](n), |
|
) |
|
df2 = DataFrame( |
|
np.random.default_rng(2).standard_normal((m2, n)), |
|
index=idx_func_dict[r2](m2), |
|
columns=idx_func_dict[c2](n), |
|
) |
|
index = df2.columns |
|
ser = Series(np.random.default_rng(2).standard_normal(n), index[:n]) |
|
|
|
if r2 == "dt" or c2 == "dt": |
|
if engine == "numexpr": |
|
expected2 = df2.add(ser) |
|
else: |
|
expected2 = df2 + ser |
|
else: |
|
expected2 = df2 + ser |
|
|
|
if r1 == "dt" or c1 == "dt": |
|
if engine == "numexpr": |
|
expected = expected2.add(df) |
|
else: |
|
expected = expected2 + df |
|
else: |
|
expected = expected2 + df |
|
|
|
if should_warn(df2.index, ser.index, df.index): |
|
with tm.assert_produces_warning(RuntimeWarning): |
|
res = pd.eval("df2 + ser + df", engine=engine, parser=parser) |
|
else: |
|
res = pd.eval("df2 + ser + df", engine=engine, parser=parser) |
|
assert res.shape == expected.shape |
|
tm.assert_frame_equal(res, expected) |
|
|
|
def test_performance_warning_for_poor_alignment(self, engine, parser): |
|
df = DataFrame(np.random.default_rng(2).standard_normal((1000, 10))) |
|
s = Series(np.random.default_rng(2).standard_normal(10000)) |
|
if engine == "numexpr": |
|
seen = PerformanceWarning |
|
else: |
|
seen = False |
|
|
|
with tm.assert_produces_warning(seen): |
|
pd.eval("df + s", engine=engine, parser=parser) |
|
|
|
s = Series(np.random.default_rng(2).standard_normal(1000)) |
|
with tm.assert_produces_warning(False): |
|
pd.eval("df + s", engine=engine, parser=parser) |
|
|
|
df = DataFrame(np.random.default_rng(2).standard_normal((10, 10000))) |
|
s = Series(np.random.default_rng(2).standard_normal(10000)) |
|
with tm.assert_produces_warning(False): |
|
pd.eval("df + s", engine=engine, parser=parser) |
|
|
|
df = DataFrame(np.random.default_rng(2).standard_normal((10, 10))) |
|
s = Series(np.random.default_rng(2).standard_normal(10000)) |
|
|
|
is_python_engine = engine == "python" |
|
|
|
if not is_python_engine: |
|
wrn = PerformanceWarning |
|
else: |
|
wrn = False |
|
|
|
with tm.assert_produces_warning(wrn) as w: |
|
pd.eval("df + s", engine=engine, parser=parser) |
|
|
|
if not is_python_engine: |
|
assert len(w) == 1 |
|
msg = str(w[0].message) |
|
logged = np.log10(s.size - df.shape[1]) |
|
expected = ( |
|
f"Alignment difference on axis 1 is larger " |
|
f"than an order of magnitude on term 'df', " |
|
f"by more than {logged:.4g}; performance may suffer." |
|
) |
|
assert msg == expected |
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestOperations: |
|
def eval(self, *args, **kwargs): |
|
kwargs["level"] = kwargs.pop("level", 0) + 1 |
|
return pd.eval(*args, **kwargs) |
|
|
|
def test_simple_arith_ops(self, engine, parser): |
|
exclude_arith = [] |
|
if parser == "python": |
|
exclude_arith = ["in", "not in"] |
|
|
|
arith_ops = [ |
|
op |
|
for op in expr.ARITH_OPS_SYMS + expr.CMP_OPS_SYMS |
|
if op not in exclude_arith |
|
] |
|
|
|
ops = (op for op in arith_ops if op != "//") |
|
|
|
for op in ops: |
|
ex = f"1 {op} 1" |
|
ex2 = f"x {op} 1" |
|
ex3 = f"1 {op} (x + 1)" |
|
|
|
if op in ("in", "not in"): |
|
msg = "argument of type 'int' is not iterable" |
|
with pytest.raises(TypeError, match=msg): |
|
pd.eval(ex, engine=engine, parser=parser) |
|
else: |
|
expec = _eval_single_bin(1, op, 1, engine) |
|
x = self.eval(ex, engine=engine, parser=parser) |
|
assert x == expec |
|
|
|
expec = _eval_single_bin(x, op, 1, engine) |
|
y = self.eval(ex2, local_dict={"x": x}, engine=engine, parser=parser) |
|
assert y == expec |
|
|
|
expec = _eval_single_bin(1, op, x + 1, engine) |
|
y = self.eval(ex3, local_dict={"x": x}, engine=engine, parser=parser) |
|
assert y == expec |
|
|
|
@pytest.mark.parametrize("rhs", [True, False]) |
|
@pytest.mark.parametrize("lhs", [True, False]) |
|
@pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS) |
|
def test_simple_bool_ops(self, rhs, lhs, op): |
|
ex = f"{lhs} {op} {rhs}" |
|
|
|
if parser == "python" and op in ["and", "or"]: |
|
msg = "'BoolOp' nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
self.eval(ex) |
|
return |
|
|
|
res = self.eval(ex) |
|
exp = eval(ex) |
|
assert res == exp |
|
|
|
@pytest.mark.parametrize("rhs", [True, False]) |
|
@pytest.mark.parametrize("lhs", [True, False]) |
|
@pytest.mark.parametrize("op", expr.BOOL_OPS_SYMS) |
|
def test_bool_ops_with_constants(self, rhs, lhs, op): |
|
ex = f"{lhs} {op} {rhs}" |
|
|
|
if parser == "python" and op in ["and", "or"]: |
|
msg = "'BoolOp' nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
self.eval(ex) |
|
return |
|
|
|
res = self.eval(ex) |
|
exp = eval(ex) |
|
assert res == exp |
|
|
|
def test_4d_ndarray_fails(self): |
|
x = np.random.default_rng(2).standard_normal((3, 4, 5, 6)) |
|
y = Series(np.random.default_rng(2).standard_normal(10)) |
|
msg = "N-dimensional objects, where N > 2, are not supported with eval" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
self.eval("x + y", local_dict={"x": x, "y": y}) |
|
|
|
def test_constant(self): |
|
x = self.eval("1") |
|
assert x == 1 |
|
|
|
def test_single_variable(self): |
|
df = DataFrame(np.random.default_rng(2).standard_normal((10, 2))) |
|
df2 = self.eval("df", local_dict={"df": df}) |
|
tm.assert_frame_equal(df, df2) |
|
|
|
def test_failing_subscript_with_name_error(self): |
|
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) |
|
with pytest.raises(NameError, match="name 'x' is not defined"): |
|
self.eval("df[x > 2] > 2") |
|
|
|
def test_lhs_expression_subscript(self): |
|
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) |
|
result = self.eval("(df + 1)[df > 2]", local_dict={"df": df}) |
|
expected = (df + 1)[df > 2] |
|
tm.assert_frame_equal(result, expected) |
|
|
|
def test_attr_expression(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 3)), columns=list("abc") |
|
) |
|
expr1 = "df.a < df.b" |
|
expec1 = df.a < df.b |
|
expr2 = "df.a + df.b + df.c" |
|
expec2 = df.a + df.b + df.c |
|
expr3 = "df.a + df.b + df.c[df.b < 0]" |
|
expec3 = df.a + df.b + df.c[df.b < 0] |
|
exprs = expr1, expr2, expr3 |
|
expecs = expec1, expec2, expec3 |
|
for e, expec in zip(exprs, expecs): |
|
tm.assert_series_equal(expec, self.eval(e, local_dict={"df": df})) |
|
|
|
def test_assignment_fails(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 3)), columns=list("abc") |
|
) |
|
df2 = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) |
|
expr1 = "df = df2" |
|
msg = "cannot assign without a target object" |
|
with pytest.raises(ValueError, match=msg): |
|
self.eval(expr1, local_dict={"df": df, "df2": df2}) |
|
|
|
def test_assignment_column_multiple_raise(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
|
|
with pytest.raises(SyntaxError, match="invalid syntax"): |
|
df.eval("d c = a + b") |
|
|
|
def test_assignment_column_invalid_assign(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
|
|
msg = "left hand side of an assignment must be a single name" |
|
with pytest.raises(SyntaxError, match=msg): |
|
df.eval("d,c = a + b") |
|
|
|
def test_assignment_column_invalid_assign_function_call(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
msg = "cannot assign to function call" |
|
with pytest.raises(SyntaxError, match=msg): |
|
df.eval('Timestamp("20131001") = a + b') |
|
|
|
def test_assignment_single_assign_existing(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
|
|
expected = df.copy() |
|
expected["a"] = expected["a"] + expected["b"] |
|
df.eval("a = a + b", inplace=True) |
|
tm.assert_frame_equal(df, expected) |
|
|
|
def test_assignment_single_assign_new(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
|
|
expected = df.copy() |
|
expected["c"] = expected["a"] + expected["b"] |
|
df.eval("c = a + b", inplace=True) |
|
tm.assert_frame_equal(df, expected) |
|
|
|
def test_assignment_single_assign_local_overlap(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
df = df.copy() |
|
a = 1 |
|
df.eval("a = 1 + b", inplace=True) |
|
|
|
expected = df.copy() |
|
expected["a"] = 1 + expected["b"] |
|
tm.assert_frame_equal(df, expected) |
|
|
|
def test_assignment_single_assign_name(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
|
|
a = 1 |
|
old_a = df.a.copy() |
|
df.eval("a = a + b", inplace=True) |
|
result = old_a + df.b |
|
tm.assert_series_equal(result, df.a, check_names=False) |
|
assert result.name is None |
|
|
|
def test_assignment_multiple_raises(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
|
|
df.eval("c = a + b", inplace=True) |
|
msg = "can only assign a single expression" |
|
with pytest.raises(SyntaxError, match=msg): |
|
df.eval("c = a = b") |
|
|
|
def test_assignment_explicit(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
|
|
self.eval("c = df.a + df.b", local_dict={"df": df}, target=df, inplace=True) |
|
expected = df.copy() |
|
expected["c"] = expected["a"] + expected["b"] |
|
tm.assert_frame_equal(df, expected) |
|
|
|
def test_column_in(self): |
|
|
|
df = DataFrame({"a": [11], "b": [-32]}) |
|
result = df.eval("a in [11, -32]") |
|
expected = Series([True]) |
|
|
|
|
|
tm.assert_series_equal(result, expected, check_names=False) |
|
|
|
@pytest.mark.xfail(reason="Unknown: Omitted test_ in name prior.") |
|
def test_assignment_not_inplace(self): |
|
|
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=list("ab") |
|
) |
|
|
|
actual = df.eval("c = a + b", inplace=False) |
|
assert actual is not None |
|
|
|
expected = df.copy() |
|
expected["c"] = expected["a"] + expected["b"] |
|
tm.assert_frame_equal(df, expected) |
|
|
|
def test_multi_line_expression(self, warn_copy_on_write): |
|
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
|
expected = df.copy() |
|
|
|
expected["c"] = expected["a"] + expected["b"] |
|
expected["d"] = expected["c"] + expected["b"] |
|
answer = df.eval( |
|
""" |
|
c = a + b |
|
d = c + b""", |
|
inplace=True, |
|
) |
|
tm.assert_frame_equal(expected, df) |
|
assert answer is None |
|
|
|
expected["a"] = expected["a"] - 1 |
|
expected["e"] = expected["a"] + 2 |
|
answer = df.eval( |
|
""" |
|
a = a - 1 |
|
e = a + 2""", |
|
inplace=True, |
|
) |
|
tm.assert_frame_equal(expected, df) |
|
assert answer is None |
|
|
|
|
|
msg = "Multi-line expressions are only valid if all expressions contain" |
|
with pytest.raises(ValueError, match=msg): |
|
df.eval( |
|
""" |
|
a = b + 2 |
|
b - 2""", |
|
inplace=False, |
|
) |
|
|
|
def test_multi_line_expression_not_inplace(self): |
|
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
|
expected = df.copy() |
|
|
|
expected["c"] = expected["a"] + expected["b"] |
|
expected["d"] = expected["c"] + expected["b"] |
|
df = df.eval( |
|
""" |
|
c = a + b |
|
d = c + b""", |
|
inplace=False, |
|
) |
|
tm.assert_frame_equal(expected, df) |
|
|
|
expected["a"] = expected["a"] - 1 |
|
expected["e"] = expected["a"] + 2 |
|
df = df.eval( |
|
""" |
|
a = a - 1 |
|
e = a + 2""", |
|
inplace=False, |
|
) |
|
tm.assert_frame_equal(expected, df) |
|
|
|
def test_multi_line_expression_local_variable(self): |
|
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
|
expected = df.copy() |
|
|
|
local_var = 7 |
|
expected["c"] = expected["a"] * local_var |
|
expected["d"] = expected["c"] + local_var |
|
answer = df.eval( |
|
""" |
|
c = a * @local_var |
|
d = c + @local_var |
|
""", |
|
inplace=True, |
|
) |
|
tm.assert_frame_equal(expected, df) |
|
assert answer is None |
|
|
|
def test_multi_line_expression_callable_local_variable(self): |
|
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
|
|
|
def local_func(a, b): |
|
return b |
|
|
|
expected = df.copy() |
|
expected["c"] = expected["a"] * local_func(1, 7) |
|
expected["d"] = expected["c"] + local_func(1, 7) |
|
answer = df.eval( |
|
""" |
|
c = a * @local_func(1, 7) |
|
d = c + @local_func(1, 7) |
|
""", |
|
inplace=True, |
|
) |
|
tm.assert_frame_equal(expected, df) |
|
assert answer is None |
|
|
|
def test_multi_line_expression_callable_local_variable_with_kwargs(self): |
|
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
|
|
|
def local_func(a, b): |
|
return b |
|
|
|
expected = df.copy() |
|
expected["c"] = expected["a"] * local_func(b=7, a=1) |
|
expected["d"] = expected["c"] + local_func(b=7, a=1) |
|
answer = df.eval( |
|
""" |
|
c = a * @local_func(b=7, a=1) |
|
d = c + @local_func(b=7, a=1) |
|
""", |
|
inplace=True, |
|
) |
|
tm.assert_frame_equal(expected, df) |
|
assert answer is None |
|
|
|
def test_assignment_in_query(self): |
|
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
|
df_orig = df.copy() |
|
msg = "cannot assign without a target object" |
|
with pytest.raises(ValueError, match=msg): |
|
df.query("a = 1") |
|
tm.assert_frame_equal(df, df_orig) |
|
|
|
def test_query_inplace(self): |
|
|
|
df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) |
|
expected = df.copy() |
|
expected = expected[expected["a"] == 2] |
|
df.query("a == 2", inplace=True) |
|
tm.assert_frame_equal(expected, df) |
|
|
|
df = {} |
|
expected = {"a": 3} |
|
|
|
self.eval("a = 1 + 2", target=df, inplace=True) |
|
tm.assert_dict_equal(df, expected) |
|
|
|
@pytest.mark.parametrize("invalid_target", [1, "cat", [1, 2], np.array([]), (1, 3)]) |
|
def test_cannot_item_assign(self, invalid_target): |
|
msg = "Cannot assign expression output to target" |
|
expression = "a = 1 + 2" |
|
|
|
with pytest.raises(ValueError, match=msg): |
|
self.eval(expression, target=invalid_target, inplace=True) |
|
|
|
if hasattr(invalid_target, "copy"): |
|
with pytest.raises(ValueError, match=msg): |
|
self.eval(expression, target=invalid_target, inplace=False) |
|
|
|
@pytest.mark.parametrize("invalid_target", [1, "cat", (1, 3)]) |
|
def test_cannot_copy_item(self, invalid_target): |
|
msg = "Cannot return a copy of the target" |
|
expression = "a = 1 + 2" |
|
|
|
with pytest.raises(ValueError, match=msg): |
|
self.eval(expression, target=invalid_target, inplace=False) |
|
|
|
@pytest.mark.parametrize("target", [1, "cat", [1, 2], np.array([]), (1, 3), {1: 2}]) |
|
def test_inplace_no_assignment(self, target): |
|
expression = "1 + 2" |
|
|
|
assert self.eval(expression, target=target, inplace=False) == 3 |
|
|
|
msg = "Cannot operate inplace if there is no assignment" |
|
with pytest.raises(ValueError, match=msg): |
|
self.eval(expression, target=target, inplace=True) |
|
|
|
def test_basic_period_index_boolean_expression(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((2, 2)), |
|
columns=period_range("2020-01-01", freq="D", periods=2), |
|
) |
|
e = df < 2 |
|
r = self.eval("df < 2", local_dict={"df": df}) |
|
x = df < 2 |
|
|
|
tm.assert_frame_equal(r, e) |
|
tm.assert_frame_equal(x, e) |
|
|
|
def test_basic_period_index_subscript_expression(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((2, 2)), |
|
columns=period_range("2020-01-01", freq="D", periods=2), |
|
) |
|
r = self.eval("df[df < 2 + 3]", local_dict={"df": df}) |
|
e = df[df < 2 + 3] |
|
tm.assert_frame_equal(r, e) |
|
|
|
def test_nested_period_index_subscript_expression(self): |
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((2, 2)), |
|
columns=period_range("2020-01-01", freq="D", periods=2), |
|
) |
|
r = self.eval("df[df[df < 2] < 2] + df * 2", local_dict={"df": df}) |
|
e = df[df[df < 2] < 2] + df * 2 |
|
tm.assert_frame_equal(r, e) |
|
|
|
def test_date_boolean(self, engine, parser): |
|
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) |
|
df["dates1"] = date_range("1/1/2012", periods=5) |
|
res = self.eval( |
|
"df.dates1 < 20130101", |
|
local_dict={"df": df}, |
|
engine=engine, |
|
parser=parser, |
|
) |
|
expec = df.dates1 < "20130101" |
|
tm.assert_series_equal(res, expec, check_names=False) |
|
|
|
def test_simple_in_ops(self, engine, parser): |
|
if parser != "python": |
|
res = pd.eval("1 in [1, 2]", engine=engine, parser=parser) |
|
assert res |
|
|
|
res = pd.eval("2 in (1, 2)", engine=engine, parser=parser) |
|
assert res |
|
|
|
res = pd.eval("3 in (1, 2)", engine=engine, parser=parser) |
|
assert not res |
|
|
|
res = pd.eval("3 not in (1, 2)", engine=engine, parser=parser) |
|
assert res |
|
|
|
res = pd.eval("[3] not in (1, 2)", engine=engine, parser=parser) |
|
assert res |
|
|
|
res = pd.eval("[3] in ([3], 2)", engine=engine, parser=parser) |
|
assert res |
|
|
|
res = pd.eval("[[3]] in [[[3]], 2]", engine=engine, parser=parser) |
|
assert res |
|
|
|
res = pd.eval("(3,) in [(3,), 2]", engine=engine, parser=parser) |
|
assert res |
|
|
|
res = pd.eval("(3,) not in [(3,), 2]", engine=engine, parser=parser) |
|
assert not res |
|
|
|
res = pd.eval("[(3,)] in [[(3,)], 2]", engine=engine, parser=parser) |
|
assert res |
|
else: |
|
msg = "'In' nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval("1 in [1, 2]", engine=engine, parser=parser) |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval("2 in (1, 2)", engine=engine, parser=parser) |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval("3 in (1, 2)", engine=engine, parser=parser) |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval("[(3,)] in (1, 2, [(3,)])", engine=engine, parser=parser) |
|
msg = "'NotIn' nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval("3 not in (1, 2)", engine=engine, parser=parser) |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval("[3] not in (1, 2, [[3]])", engine=engine, parser=parser) |
|
|
|
def test_check_many_exprs(self, engine, parser): |
|
a = 1 |
|
expr = " * ".join("a" * 33) |
|
expected = 1 |
|
res = pd.eval(expr, engine=engine, parser=parser) |
|
assert res == expected |
|
|
|
@pytest.mark.parametrize( |
|
"expr", |
|
[ |
|
"df > 2 and df > 3", |
|
"df > 2 or df > 3", |
|
"not df > 2", |
|
], |
|
) |
|
def test_fails_and_or_not(self, expr, engine, parser): |
|
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) |
|
if parser == "python": |
|
msg = "'BoolOp' nodes are not implemented" |
|
if "not" in expr: |
|
msg = "'Not' nodes are not implemented" |
|
|
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval( |
|
expr, |
|
local_dict={"df": df}, |
|
parser=parser, |
|
engine=engine, |
|
) |
|
else: |
|
|
|
pd.eval( |
|
expr, |
|
local_dict={"df": df}, |
|
parser=parser, |
|
engine=engine, |
|
) |
|
|
|
@pytest.mark.parametrize("char", ["|", "&"]) |
|
def test_fails_ampersand_pipe(self, char, engine, parser): |
|
df = DataFrame(np.random.default_rng(2).standard_normal((5, 3))) |
|
ex = f"(df + 2)[df > 1] > 0 {char} (df > 0)" |
|
if parser == "python": |
|
msg = "cannot evaluate scalar only bool ops" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(ex, parser=parser, engine=engine) |
|
else: |
|
|
|
pd.eval(ex, parser=parser, engine=engine) |
|
|
|
|
|
class TestMath: |
|
def eval(self, *args, **kwargs): |
|
kwargs["level"] = kwargs.pop("level", 0) + 1 |
|
return pd.eval(*args, **kwargs) |
|
|
|
@pytest.mark.skipif( |
|
not NUMEXPR_INSTALLED, reason="Unary ops only implemented for numexpr" |
|
) |
|
@pytest.mark.parametrize("fn", _unary_math_ops) |
|
def test_unary_functions(self, fn): |
|
df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)}) |
|
a = df.a |
|
|
|
expr = f"{fn}(a)" |
|
got = self.eval(expr) |
|
with np.errstate(all="ignore"): |
|
expect = getattr(np, fn)(a) |
|
tm.assert_series_equal(got, expect, check_names=False) |
|
|
|
@pytest.mark.parametrize("fn", _binary_math_ops) |
|
def test_binary_functions(self, fn): |
|
df = DataFrame( |
|
{ |
|
"a": np.random.default_rng(2).standard_normal(10), |
|
"b": np.random.default_rng(2).standard_normal(10), |
|
} |
|
) |
|
a = df.a |
|
b = df.b |
|
|
|
expr = f"{fn}(a, b)" |
|
got = self.eval(expr) |
|
with np.errstate(all="ignore"): |
|
expect = getattr(np, fn)(a, b) |
|
tm.assert_almost_equal(got, expect, check_names=False) |
|
|
|
def test_df_use_case(self, engine, parser): |
|
df = DataFrame( |
|
{ |
|
"a": np.random.default_rng(2).standard_normal(10), |
|
"b": np.random.default_rng(2).standard_normal(10), |
|
} |
|
) |
|
df.eval( |
|
"e = arctan2(sin(a), b)", |
|
engine=engine, |
|
parser=parser, |
|
inplace=True, |
|
) |
|
got = df.e |
|
expect = np.arctan2(np.sin(df.a), df.b) |
|
tm.assert_series_equal(got, expect, check_names=False) |
|
|
|
def test_df_arithmetic_subexpression(self, engine, parser): |
|
df = DataFrame( |
|
{ |
|
"a": np.random.default_rng(2).standard_normal(10), |
|
"b": np.random.default_rng(2).standard_normal(10), |
|
} |
|
) |
|
df.eval("e = sin(a + b)", engine=engine, parser=parser, inplace=True) |
|
got = df.e |
|
expect = np.sin(df.a + df.b) |
|
tm.assert_series_equal(got, expect, check_names=False) |
|
|
|
@pytest.mark.parametrize( |
|
"dtype, expect_dtype", |
|
[ |
|
(np.int32, np.float64), |
|
(np.int64, np.float64), |
|
(np.float32, np.float32), |
|
(np.float64, np.float64), |
|
pytest.param(np.complex128, np.complex128, marks=td.skip_if_windows), |
|
], |
|
) |
|
def test_result_types(self, dtype, expect_dtype, engine, parser): |
|
|
|
|
|
|
|
|
|
|
|
df = DataFrame( |
|
{"a": np.random.default_rng(2).standard_normal(10).astype(dtype)} |
|
) |
|
assert df.a.dtype == dtype |
|
df.eval("b = sin(a)", engine=engine, parser=parser, inplace=True) |
|
got = df.b |
|
expect = np.sin(df.a) |
|
assert expect.dtype == got.dtype |
|
assert expect_dtype == got.dtype |
|
tm.assert_series_equal(got, expect, check_names=False) |
|
|
|
def test_undefined_func(self, engine, parser): |
|
df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)}) |
|
msg = '"mysin" is not a supported function' |
|
|
|
with pytest.raises(ValueError, match=msg): |
|
df.eval("mysin(a)", engine=engine, parser=parser) |
|
|
|
def test_keyword_arg(self, engine, parser): |
|
df = DataFrame({"a": np.random.default_rng(2).standard_normal(10)}) |
|
msg = 'Function "sin" does not support keyword arguments' |
|
|
|
with pytest.raises(TypeError, match=msg): |
|
df.eval("sin(x=a)", engine=engine, parser=parser) |
|
|
|
|
|
_var_s = np.random.default_rng(2).standard_normal(10) |
|
|
|
|
|
class TestScope: |
|
def test_global_scope(self, engine, parser): |
|
e = "_var_s * 2" |
|
tm.assert_numpy_array_equal( |
|
_var_s * 2, pd.eval(e, engine=engine, parser=parser) |
|
) |
|
|
|
def test_no_new_locals(self, engine, parser): |
|
x = 1 |
|
lcls = locals().copy() |
|
pd.eval("x + 1", local_dict=lcls, engine=engine, parser=parser) |
|
lcls2 = locals().copy() |
|
lcls2.pop("lcls") |
|
assert lcls == lcls2 |
|
|
|
def test_no_new_globals(self, engine, parser): |
|
x = 1 |
|
gbls = globals().copy() |
|
pd.eval("x + 1", engine=engine, parser=parser) |
|
gbls2 = globals().copy() |
|
assert gbls == gbls2 |
|
|
|
def test_empty_locals(self, engine, parser): |
|
|
|
x = 1 |
|
msg = "name 'x' is not defined" |
|
with pytest.raises(UndefinedVariableError, match=msg): |
|
pd.eval("x + 1", engine=engine, parser=parser, local_dict={}) |
|
|
|
def test_empty_globals(self, engine, parser): |
|
|
|
msg = "name '_var_s' is not defined" |
|
e = "_var_s * 2" |
|
with pytest.raises(UndefinedVariableError, match=msg): |
|
pd.eval(e, engine=engine, parser=parser, global_dict={}) |
|
|
|
|
|
@td.skip_if_no("numexpr") |
|
def test_invalid_engine(): |
|
msg = "Invalid engine 'asdf' passed" |
|
with pytest.raises(KeyError, match=msg): |
|
pd.eval("x + y", local_dict={"x": 1, "y": 2}, engine="asdf") |
|
|
|
|
|
@td.skip_if_no("numexpr") |
|
@pytest.mark.parametrize( |
|
("use_numexpr", "expected"), |
|
( |
|
(True, "numexpr"), |
|
(False, "python"), |
|
), |
|
) |
|
def test_numexpr_option_respected(use_numexpr, expected): |
|
|
|
from pandas.core.computation.eval import _check_engine |
|
|
|
with pd.option_context("compute.use_numexpr", use_numexpr): |
|
result = _check_engine(None) |
|
assert result == expected |
|
|
|
|
|
@td.skip_if_no("numexpr") |
|
def test_numexpr_option_incompatible_op(): |
|
|
|
with pd.option_context("compute.use_numexpr", False): |
|
df = DataFrame( |
|
{"A": [True, False, True, False, None, None], "B": [1, 2, 3, 4, 5, 6]} |
|
) |
|
result = df.query("A.isnull()") |
|
expected = DataFrame({"A": [None, None], "B": [5, 6]}, index=[4, 5]) |
|
tm.assert_frame_equal(result, expected) |
|
|
|
|
|
@td.skip_if_no("numexpr") |
|
def test_invalid_parser(): |
|
msg = "Invalid parser 'asdf' passed" |
|
with pytest.raises(KeyError, match=msg): |
|
pd.eval("x + y", local_dict={"x": 1, "y": 2}, parser="asdf") |
|
|
|
|
|
_parsers: dict[str, type[BaseExprVisitor]] = { |
|
"python": PythonExprVisitor, |
|
"pytables": pytables.PyTablesExprVisitor, |
|
"pandas": PandasExprVisitor, |
|
} |
|
|
|
|
|
@pytest.mark.parametrize("engine", ENGINES) |
|
@pytest.mark.parametrize("parser", _parsers) |
|
def test_disallowed_nodes(engine, parser): |
|
VisitorClass = _parsers[parser] |
|
inst = VisitorClass("x + 1", engine, parser) |
|
|
|
for ops in VisitorClass.unsupported_nodes: |
|
msg = "nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
getattr(inst, ops)() |
|
|
|
|
|
def test_syntax_error_exprs(engine, parser): |
|
e = "s +" |
|
with pytest.raises(SyntaxError, match="invalid syntax"): |
|
pd.eval(e, engine=engine, parser=parser) |
|
|
|
|
|
def test_name_error_exprs(engine, parser): |
|
e = "s + t" |
|
msg = "name 's' is not defined" |
|
with pytest.raises(NameError, match=msg): |
|
pd.eval(e, engine=engine, parser=parser) |
|
|
|
|
|
@pytest.mark.parametrize("express", ["a + @b", "@a + b", "@a + @b"]) |
|
def test_invalid_local_variable_reference(engine, parser, express): |
|
a, b = 1, 2 |
|
|
|
if parser != "pandas": |
|
with pytest.raises(SyntaxError, match="The '@' prefix is only"): |
|
pd.eval(express, engine=engine, parser=parser) |
|
else: |
|
with pytest.raises(SyntaxError, match="The '@' prefix is not"): |
|
pd.eval(express, engine=engine, parser=parser) |
|
|
|
|
|
def test_numexpr_builtin_raises(engine, parser): |
|
sin, dotted_line = 1, 2 |
|
if engine == "numexpr": |
|
msg = "Variables in expression .+" |
|
with pytest.raises(NumExprClobberingError, match=msg): |
|
pd.eval("sin + dotted_line", engine=engine, parser=parser) |
|
else: |
|
res = pd.eval("sin + dotted_line", engine=engine, parser=parser) |
|
assert res == sin + dotted_line |
|
|
|
|
|
def test_bad_resolver_raises(engine, parser): |
|
cannot_resolve = 42, 3.0 |
|
with pytest.raises(TypeError, match="Resolver of type .+"): |
|
pd.eval("1 + 2", resolvers=cannot_resolve, engine=engine, parser=parser) |
|
|
|
|
|
def test_empty_string_raises(engine, parser): |
|
|
|
with pytest.raises(ValueError, match="expr cannot be an empty string"): |
|
pd.eval("", engine=engine, parser=parser) |
|
|
|
|
|
def test_more_than_one_expression_raises(engine, parser): |
|
with pytest.raises(SyntaxError, match="only a single expression is allowed"): |
|
pd.eval("1 + 1; 2 + 2", engine=engine, parser=parser) |
|
|
|
|
|
@pytest.mark.parametrize("cmp", ("and", "or")) |
|
@pytest.mark.parametrize("lhs", (int, float)) |
|
@pytest.mark.parametrize("rhs", (int, float)) |
|
def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser): |
|
gen = { |
|
int: lambda: np.random.default_rng(2).integers(10), |
|
float: np.random.default_rng(2).standard_normal, |
|
} |
|
|
|
mid = gen[lhs]() |
|
lhs = gen[lhs]() |
|
rhs = gen[rhs]() |
|
|
|
ex1 = f"lhs {cmp} mid {cmp} rhs" |
|
ex2 = f"lhs {cmp} mid and mid {cmp} rhs" |
|
ex3 = f"(lhs {cmp} mid) & (mid {cmp} rhs)" |
|
for ex in (ex1, ex2, ex3): |
|
msg = "cannot evaluate scalar only bool ops|'BoolOp' nodes are not" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
pd.eval(ex, engine=engine, parser=parser) |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"other", |
|
[ |
|
"'x'", |
|
"...", |
|
], |
|
) |
|
def test_equals_various(other): |
|
df = DataFrame({"A": ["a", "b", "c"]}, dtype=object) |
|
result = df.eval(f"A == {other}") |
|
expected = Series([False, False, False], name="A") |
|
if USE_NUMEXPR: |
|
|
|
|
|
expected.name = None |
|
tm.assert_series_equal(result, expected) |
|
|
|
|
|
def test_inf(engine, parser): |
|
s = "inf + 1" |
|
expected = np.inf |
|
result = pd.eval(s, engine=engine, parser=parser) |
|
assert result == expected |
|
|
|
|
|
@pytest.mark.parametrize("column", ["Temp(°C)", "Capacitance(μF)"]) |
|
def test_query_token(engine, column): |
|
|
|
df = DataFrame( |
|
np.random.default_rng(2).standard_normal((5, 2)), columns=[column, "b"] |
|
) |
|
expected = df[df[column] > 5] |
|
query_string = f"`{column}` > 5" |
|
result = df.query(query_string, engine=engine) |
|
tm.assert_frame_equal(result, expected) |
|
|
|
|
|
def test_negate_lt_eq_le(engine, parser): |
|
df = DataFrame([[0, 10], [1, 20]], columns=["cat", "count"]) |
|
expected = df[~(df.cat > 0)] |
|
|
|
result = df.query("~(cat > 0)", engine=engine, parser=parser) |
|
tm.assert_frame_equal(result, expected) |
|
|
|
if parser == "python": |
|
msg = "'Not' nodes are not implemented" |
|
with pytest.raises(NotImplementedError, match=msg): |
|
df.query("not (cat > 0)", engine=engine, parser=parser) |
|
else: |
|
result = df.query("not (cat > 0)", engine=engine, parser=parser) |
|
tm.assert_frame_equal(result, expected) |
|
|
|
|
|
@pytest.mark.parametrize( |
|
"column", |
|
DEFAULT_GLOBALS.keys(), |
|
) |
|
def test_eval_no_support_column_name(request, column): |
|
|
|
if column in ["True", "False", "inf", "Inf"]: |
|
request.applymarker( |
|
pytest.mark.xfail( |
|
raises=KeyError, |
|
reason=f"GH 47859 DataFrame eval not supported with {column}", |
|
) |
|
) |
|
|
|
df = DataFrame( |
|
np.random.default_rng(2).integers(0, 100, size=(10, 2)), |
|
columns=[column, "col1"], |
|
) |
|
expected = df[df[column] > 6] |
|
result = df.query(f"{column}>6") |
|
|
|
tm.assert_frame_equal(result, expected) |
|
|
|
|
|
def test_set_inplace(using_copy_on_write, warn_copy_on_write): |
|
|
|
|
|
|
|
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}) |
|
result_view = df[:] |
|
ser = df["A"] |
|
with tm.assert_cow_warning(warn_copy_on_write): |
|
df.eval("A = B + C", inplace=True) |
|
expected = DataFrame({"A": [11, 13, 15], "B": [4, 5, 6], "C": [7, 8, 9]}) |
|
tm.assert_frame_equal(df, expected) |
|
if not using_copy_on_write: |
|
tm.assert_series_equal(ser, expected["A"]) |
|
tm.assert_series_equal(result_view["A"], expected["A"]) |
|
else: |
|
expected = Series([1, 2, 3], name="A") |
|
tm.assert_series_equal(ser, expected) |
|
tm.assert_series_equal(result_view["A"], expected) |
|
|
|
|
|
class TestValidate: |
|
@pytest.mark.parametrize("value", [1, "True", [1, 2, 3], 5.0]) |
|
def test_validate_bool_args(self, value): |
|
msg = 'For argument "inplace" expected type bool, received type' |
|
with pytest.raises(ValueError, match=msg): |
|
pd.eval("2+2", inplace=value) |
|
|