File size: 14,300 Bytes
7885a28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 |
"""
Module that contains many useful utilities
for validating data or function arguments
"""
from __future__ import annotations
from collections.abc import (
Iterable,
Sequence,
)
from typing import (
TypeVar,
overload,
)
import numpy as np
from pandas._libs import lib
from pandas.core.dtypes.common import (
is_bool,
is_integer,
)
BoolishT = TypeVar("BoolishT", bool, int)
BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
def _check_arg_length(fname, args, max_fname_arg_count, compat_args) -> None:
"""
Checks whether 'args' has length of at most 'compat_args'. Raises
a TypeError if that is not the case, similar to in Python when a
function is called with too many arguments.
"""
if max_fname_arg_count < 0:
raise ValueError("'max_fname_arg_count' must be non-negative")
if len(args) > len(compat_args):
max_arg_count = len(compat_args) + max_fname_arg_count
actual_arg_count = len(args) + max_fname_arg_count
argument = "argument" if max_arg_count == 1 else "arguments"
raise TypeError(
f"{fname}() takes at most {max_arg_count} {argument} "
f"({actual_arg_count} given)"
)
def _check_for_default_values(fname, arg_val_dict, compat_args) -> None:
"""
Check that the keys in `arg_val_dict` are mapped to their
default values as specified in `compat_args`.
Note that this function is to be called only when it has been
checked that arg_val_dict.keys() is a subset of compat_args
"""
for key in arg_val_dict:
# try checking equality directly with '=' operator,
# as comparison may have been overridden for the left
# hand object
try:
v1 = arg_val_dict[key]
v2 = compat_args[key]
# check for None-ness otherwise we could end up
# comparing a numpy array vs None
if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
match = False
else:
match = v1 == v2
if not is_bool(match):
raise ValueError("'match' is not a boolean")
# could not compare them directly, so try comparison
# using the 'is' operator
except ValueError:
match = arg_val_dict[key] is compat_args[key]
if not match:
raise ValueError(
f"the '{key}' parameter is not supported in "
f"the pandas implementation of {fname}()"
)
def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
"""
Checks whether the length of the `*args` argument passed into a function
has at most `len(compat_args)` arguments and whether or not all of these
elements in `args` are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `*args` parameter
args : tuple
The `*args` parameter passed into a function
max_fname_arg_count : int
The maximum number of arguments that the function `fname`
can accept, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args : dict
A dictionary of keys and their associated default values.
In order to accommodate buggy behaviour in some versions of `numpy`,
where a signature displayed keyword arguments but then passed those
arguments **positionally** internally when calling downstream
implementations, a dict ensures that the original
order of the keyword arguments is enforced.
Raises
------
TypeError
If `args` contains more values than there are `compat_args`
ValueError
If `args` contains values that do not correspond to those
of the default values specified in `compat_args`
"""
_check_arg_length(fname, args, max_fname_arg_count, compat_args)
# We do this so that we can provide a more informative
# error message about the parameters that we are not
# supporting in the pandas implementation of 'fname'
kwargs = dict(zip(compat_args, args))
_check_for_default_values(fname, kwargs, compat_args)
def _check_for_invalid_keys(fname, kwargs, compat_args) -> None:
"""
Checks whether 'kwargs' contains any keys that are not
in 'compat_args' and raises a TypeError if there is one.
"""
# set(dict) --> set of the dictionary's keys
diff = set(kwargs) - set(compat_args)
if diff:
bad_arg = next(iter(diff))
raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
def validate_kwargs(fname, kwargs, compat_args) -> None:
"""
Checks whether parameters passed to the **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname : str
The name of the function being passed the `**kwargs` parameter
kwargs : dict
The `**kwargs` parameter passed into `fname`
compat_args: dict
A dictionary of keys that `kwargs` is allowed to have and their
associated default values
Raises
------
TypeError if `kwargs` contains keys not in `compat_args`
ValueError if `kwargs` contains keys in `compat_args` that do not
map to the default values specified in `compat_args`
"""
kwds = kwargs.copy()
_check_for_invalid_keys(fname, kwargs, compat_args)
_check_for_default_values(fname, kwds, compat_args)
def validate_args_and_kwargs(
fname, args, kwargs, max_fname_arg_count, compat_args
) -> None:
"""
Checks whether parameters passed to the *args and **kwargs argument in a
function `fname` are valid parameters as specified in `*compat_args`
and whether or not they are set to their default values.
Parameters
----------
fname: str
The name of the function being passed the `**kwargs` parameter
args: tuple
The `*args` parameter passed into a function
kwargs: dict
The `**kwargs` parameter passed into `fname`
max_fname_arg_count: int
The minimum number of arguments that the function `fname`
requires, excluding those in `args`. Used for displaying
appropriate error messages. Must be non-negative.
compat_args: dict
A dictionary of keys that `kwargs` is allowed to
have and their associated default values.
Raises
------
TypeError if `args` contains more values than there are
`compat_args` OR `kwargs` contains keys not in `compat_args`
ValueError if `args` contains values not at the default value (`None`)
`kwargs` contains keys in `compat_args` that do not map to the default
value as specified in `compat_args`
See Also
--------
validate_args : Purely args validation.
validate_kwargs : Purely kwargs validation.
"""
# Check that the total number of arguments passed in (i.e.
# args and kwargs) does not exceed the length of compat_args
_check_arg_length(
fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
)
# Check there is no overlap with the positional and keyword
# arguments, similar to what is done in actual Python functions
args_dict = dict(zip(compat_args, args))
for key in args_dict:
if key in kwargs:
raise TypeError(
f"{fname}() got multiple values for keyword argument '{key}'"
)
kwargs.update(args_dict)
validate_kwargs(fname, kwargs, compat_args)
def validate_bool_kwarg(
value: BoolishNoneT,
arg_name: str,
none_allowed: bool = True,
int_allowed: bool = False,
) -> BoolishNoneT:
"""
Ensure that argument passed in arg_name can be interpreted as boolean.
Parameters
----------
value : bool
Value to be validated.
arg_name : str
Name of the argument. To be reflected in the error message.
none_allowed : bool, default True
Whether to consider None to be a valid boolean.
int_allowed : bool, default False
Whether to consider integer value to be a valid boolean.
Returns
-------
value
The same value as input.
Raises
------
ValueError
If the value is not a valid boolean.
"""
good_value = is_bool(value)
if none_allowed:
good_value = good_value or (value is None)
if int_allowed:
good_value = good_value or isinstance(value, int)
if not good_value:
raise ValueError(
f'For argument "{arg_name}" expected type bool, received '
f"type {type(value).__name__}."
)
return value # pyright: ignore[reportGeneralTypeIssues]
def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
"""
Validate the keyword arguments to 'fillna'.
This checks that exactly one of 'value' and 'method' is specified.
If 'method' is specified, this validates that it's a valid method.
Parameters
----------
value, method : object
The 'value' and 'method' keyword arguments for 'fillna'.
validate_scalar_dict_value : bool, default True
Whether to validate that 'value' is a scalar or dict. Specifically,
validate that it is not a list or tuple.
Returns
-------
value, method : object
"""
from pandas.core.missing import clean_fill_method
if value is None and method is None:
raise ValueError("Must specify a fill 'value' or 'method'.")
if value is None and method is not None:
method = clean_fill_method(method)
elif value is not None and method is None:
if validate_scalar_dict_value and isinstance(value, (list, tuple)):
raise TypeError(
'"value" parameter must be a scalar or dict, but '
f'you passed a "{type(value).__name__}"'
)
elif value is not None and method is not None:
raise ValueError("Cannot specify both 'value' and 'method'.")
return value, method
def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
"""
Validate percentiles (used by describe and quantile).
This function checks if the given float or iterable of floats is a valid percentile
otherwise raises a ValueError.
Parameters
----------
q: float or iterable of floats
A single percentile or an iterable of percentiles.
Returns
-------
ndarray
An ndarray of the percentiles if valid.
Raises
------
ValueError if percentiles are not in given interval([0, 1]).
"""
q_arr = np.asarray(q)
# Don't change this to an f-string. The string formatting
# is too expensive for cases where we don't need it.
msg = "percentiles should all be in the interval [0, 1]"
if q_arr.ndim == 0:
if not 0 <= q_arr <= 1:
raise ValueError(msg)
else:
if not all(0 <= qs <= 1 for qs in q_arr):
raise ValueError(msg)
return q_arr
@overload
def validate_ascending(ascending: BoolishT) -> BoolishT:
...
@overload
def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
...
def validate_ascending(
ascending: bool | int | Sequence[BoolishT],
) -> bool | int | list[BoolishT]:
"""Validate ``ascending`` kwargs for ``sort_index`` method."""
kwargs = {"none_allowed": False, "int_allowed": True}
if not isinstance(ascending, Sequence):
return validate_bool_kwarg(ascending, "ascending", **kwargs)
return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
"""
Check that the `closed` argument is among [None, "left", "right"]
Parameters
----------
closed : {None, "left", "right"}
Returns
-------
left_closed : bool
right_closed : bool
Raises
------
ValueError : if argument is not among valid values
"""
left_closed = False
right_closed = False
if closed is None:
left_closed = True
right_closed = True
elif closed == "left":
left_closed = True
elif closed == "right":
right_closed = True
else:
raise ValueError("Closed has to be either 'left', 'right' or None")
return left_closed, right_closed
def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
"""
Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
Parameters
----------
inclusive : {"both", "neither", "left", "right"}
Returns
-------
left_right_inclusive : tuple[bool, bool]
Raises
------
ValueError : if argument is not among valid values
"""
left_right_inclusive: tuple[bool, bool] | None = None
if isinstance(inclusive, str):
left_right_inclusive = {
"both": (True, True),
"left": (True, False),
"right": (False, True),
"neither": (False, False),
}.get(inclusive)
if left_right_inclusive is None:
raise ValueError(
"Inclusive has to be either 'both', 'neither', 'left' or 'right'"
)
return left_right_inclusive
def validate_insert_loc(loc: int, length: int) -> int:
"""
Check that we have an integer between -length and length, inclusive.
Standardize negative loc to within [0, length].
The exceptions we raise on failure match np.insert.
"""
if not is_integer(loc):
raise TypeError(f"loc must be an integer between -{length} and {length}")
if loc < 0:
loc += length
if not 0 <= loc <= length:
raise IndexError(f"loc must be an integer between -{length} and {length}")
return loc # pyright: ignore[reportGeneralTypeIssues]
def check_dtype_backend(dtype_backend) -> None:
if dtype_backend is not lib.no_default:
if dtype_backend not in ["numpy_nullable", "pyarrow"]:
raise ValueError(
f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
f"'pyarrow' are allowed.",
)
|