|
|
|
import warnings |
|
import numpy as np |
|
import scipy._lib._elementwise_iterative_method as eim |
|
from scipy._lib._util import _RichResult |
|
from scipy._lib._array_api import array_namespace, xp_sign, xp_copy, xp_take_along_axis |
|
|
|
_EERRORINCREASE = -1 |
|
|
|
def _derivative_iv(f, x, args, tolerances, maxiter, order, initial_step, |
|
step_factor, step_direction, preserve_shape, callback): |
|
|
|
xp = array_namespace(x) |
|
|
|
if not callable(f): |
|
raise ValueError('`f` must be callable.') |
|
|
|
if not np.iterable(args): |
|
args = (args,) |
|
|
|
tolerances = {} if tolerances is None else tolerances |
|
atol = tolerances.get('atol', None) |
|
rtol = tolerances.get('rtol', None) |
|
|
|
|
|
message = 'Tolerances and step parameters must be non-negative scalars.' |
|
tols = np.asarray([atol if atol is not None else 1, |
|
rtol if rtol is not None else 1, |
|
step_factor]) |
|
if (not np.issubdtype(tols.dtype, np.number) or np.any(tols < 0) |
|
or np.any(np.isnan(tols)) or tols.shape != (3,)): |
|
raise ValueError(message) |
|
step_factor = float(tols[2]) |
|
|
|
maxiter_int = int(maxiter) |
|
if maxiter != maxiter_int or maxiter <= 0: |
|
raise ValueError('`maxiter` must be a positive integer.') |
|
|
|
order_int = int(order) |
|
if order_int != order or order <= 0: |
|
raise ValueError('`order` must be a positive integer.') |
|
|
|
step_direction = xp.asarray(step_direction) |
|
initial_step = xp.asarray(initial_step) |
|
temp = xp.broadcast_arrays(x, step_direction, initial_step) |
|
x, step_direction, initial_step = temp |
|
|
|
message = '`preserve_shape` must be True or False.' |
|
if preserve_shape not in {True, False}: |
|
raise ValueError(message) |
|
|
|
if callback is not None and not callable(callback): |
|
raise ValueError('`callback` must be callable.') |
|
|
|
return (f, x, args, atol, rtol, maxiter_int, order_int, initial_step, |
|
step_factor, step_direction, preserve_shape, callback) |
|
|
|
|
|
def derivative(f, x, *, args=(), tolerances=None, maxiter=10, |
|
order=8, initial_step=0.5, step_factor=2.0, |
|
step_direction=0, preserve_shape=False, callback=None): |
|
"""Evaluate the derivative of a elementwise, real scalar function numerically. |
|
|
|
For each element of the output of `f`, `derivative` approximates the first |
|
derivative of `f` at the corresponding element of `x` using finite difference |
|
differentiation. |
|
|
|
This function works elementwise when `x`, `step_direction`, and `args` contain |
|
(broadcastable) arrays. |
|
|
|
Parameters |
|
---------- |
|
f : callable |
|
The function whose derivative is desired. The signature must be:: |
|
|
|
f(xi: ndarray, *argsi) -> ndarray |
|
|
|
where each element of ``xi`` is a finite real number and ``argsi`` is a tuple, |
|
which may contain an arbitrary number of arrays that are broadcastable with |
|
``xi``. `f` must be an elementwise function: each scalar element ``f(xi)[j]`` |
|
must equal ``f(xi[j])`` for valid indices ``j``. It must not mutate the array |
|
``xi`` or the arrays in ``argsi``. |
|
x : float array_like |
|
Abscissae at which to evaluate the derivative. Must be broadcastable with |
|
`args` and `step_direction`. |
|
args : tuple of array_like, optional |
|
Additional positional array arguments to be passed to `f`. Arrays |
|
must be broadcastable with one another and the arrays of `init`. |
|
If the callable for which the root is desired requires arguments that are |
|
not broadcastable with `x`, wrap that callable with `f` such that `f` |
|
accepts only `x` and broadcastable ``*args``. |
|
tolerances : dictionary of floats, optional |
|
Absolute and relative tolerances. Valid keys of the dictionary are: |
|
|
|
- ``atol`` - absolute tolerance on the derivative |
|
- ``rtol`` - relative tolerance on the derivative |
|
|
|
Iteration will stop when ``res.error < atol + rtol * abs(res.df)``. The default |
|
`atol` is the smallest normal number of the appropriate dtype, and |
|
the default `rtol` is the square root of the precision of the |
|
appropriate dtype. |
|
order : int, default: 8 |
|
The (positive integer) order of the finite difference formula to be |
|
used. Odd integers will be rounded up to the next even integer. |
|
initial_step : float array_like, default: 0.5 |
|
The (absolute) initial step size for the finite difference derivative |
|
approximation. |
|
step_factor : float, default: 2.0 |
|
The factor by which the step size is *reduced* in each iteration; i.e. |
|
the step size in iteration 1 is ``initial_step/step_factor``. If |
|
``step_factor < 1``, subsequent steps will be greater than the initial |
|
step; this may be useful if steps smaller than some threshold are |
|
undesirable (e.g. due to subtractive cancellation error). |
|
maxiter : int, default: 10 |
|
The maximum number of iterations of the algorithm to perform. See |
|
Notes. |
|
step_direction : integer array_like |
|
An array representing the direction of the finite difference steps (for |
|
use when `x` lies near to the boundary of the domain of the function.) |
|
Must be broadcastable with `x` and all `args`. |
|
Where 0 (default), central differences are used; where negative (e.g. |
|
-1), steps are non-positive; and where positive (e.g. 1), all steps are |
|
non-negative. |
|
preserve_shape : bool, default: False |
|
In the following, "arguments of `f`" refers to the array ``xi`` and |
|
any arrays within ``argsi``. Let ``shape`` be the broadcasted shape |
|
of `x` and all elements of `args` (which is conceptually |
|
distinct from ``xi` and ``argsi`` passed into `f`). |
|
|
|
- When ``preserve_shape=False`` (default), `f` must accept arguments |
|
of *any* broadcastable shapes. |
|
|
|
- When ``preserve_shape=True``, `f` must accept arguments of shape |
|
``shape`` *or* ``shape + (n,)``, where ``(n,)`` is the number of |
|
abscissae at which the function is being evaluated. |
|
|
|
In either case, for each scalar element ``xi[j]`` within ``xi``, the array |
|
returned by `f` must include the scalar ``f(xi[j])`` at the same index. |
|
Consequently, the shape of the output is always the shape of the input |
|
``xi``. |
|
|
|
See Examples. |
|
callback : callable, optional |
|
An optional user-supplied function to be called before the first |
|
iteration and after each iteration. |
|
Called as ``callback(res)``, where ``res`` is a ``_RichResult`` |
|
similar to that returned by `derivative` (but containing the current |
|
iterate's values of all variables). If `callback` raises a |
|
``StopIteration``, the algorithm will terminate immediately and |
|
`derivative` will return a result. `callback` must not mutate |
|
`res` or its attributes. |
|
|
|
Returns |
|
------- |
|
res : _RichResult |
|
An object similar to an instance of `scipy.optimize.OptimizeResult` with the |
|
following attributes. The descriptions are written as though the values will |
|
be scalars; however, if `f` returns an array, the outputs will be |
|
arrays of the same shape. |
|
|
|
success : bool array |
|
``True`` where the algorithm terminated successfully (status ``0``); |
|
``False`` otherwise. |
|
status : int array |
|
An integer representing the exit status of the algorithm. |
|
|
|
- ``0`` : The algorithm converged to the specified tolerances. |
|
- ``-1`` : The error estimate increased, so iteration was terminated. |
|
- ``-2`` : The maximum number of iterations was reached. |
|
- ``-3`` : A non-finite value was encountered. |
|
- ``-4`` : Iteration was terminated by `callback`. |
|
- ``1`` : The algorithm is proceeding normally (in `callback` only). |
|
|
|
df : float array |
|
The derivative of `f` at `x`, if the algorithm terminated |
|
successfully. |
|
error : float array |
|
An estimate of the error: the magnitude of the difference between |
|
the current estimate of the derivative and the estimate in the |
|
previous iteration. |
|
nit : int array |
|
The number of iterations of the algorithm that were performed. |
|
nfev : int array |
|
The number of points at which `f` was evaluated. |
|
x : float array |
|
The value at which the derivative of `f` was evaluated |
|
(after broadcasting with `args` and `step_direction`). |
|
|
|
See Also |
|
-------- |
|
jacobian, hessian |
|
|
|
Notes |
|
----- |
|
The implementation was inspired by jacobi [1]_, numdifftools [2]_, and |
|
DERIVEST [3]_, but the implementation follows the theory of Taylor series |
|
more straightforwardly (and arguably naively so). |
|
In the first iteration, the derivative is estimated using a finite |
|
difference formula of order `order` with maximum step size `initial_step`. |
|
Each subsequent iteration, the maximum step size is reduced by |
|
`step_factor`, and the derivative is estimated again until a termination |
|
condition is reached. The error estimate is the magnitude of the difference |
|
between the current derivative approximation and that of the previous |
|
iteration. |
|
|
|
The stencils of the finite difference formulae are designed such that |
|
abscissae are "nested": after `f` is evaluated at ``order + 1`` |
|
points in the first iteration, `f` is evaluated at only two new points |
|
in each subsequent iteration; ``order - 1`` previously evaluated function |
|
values required by the finite difference formula are reused, and two |
|
function values (evaluations at the points furthest from `x`) are unused. |
|
|
|
Step sizes are absolute. When the step size is small relative to the |
|
magnitude of `x`, precision is lost; for example, if `x` is ``1e20``, the |
|
default initial step size of ``0.5`` cannot be resolved. Accordingly, |
|
consider using larger initial step sizes for large magnitudes of `x`. |
|
|
|
The default tolerances are challenging to satisfy at points where the |
|
true derivative is exactly zero. If the derivative may be exactly zero, |
|
consider specifying an absolute tolerance (e.g. ``atol=1e-12``) to |
|
improve convergence. |
|
|
|
References |
|
---------- |
|
.. [1] Hans Dembinski (@HDembinski). jacobi. |
|
https://github.com/HDembinski/jacobi |
|
.. [2] Per A. Brodtkorb and John D'Errico. numdifftools. |
|
https://numdifftools.readthedocs.io/en/latest/ |
|
.. [3] John D'Errico. DERIVEST: Adaptive Robust Numerical Differentiation. |
|
https://www.mathworks.com/matlabcentral/fileexchange/13490-adaptive-robust-numerical-differentiation |
|
.. [4] Numerical Differentition. Wikipedia. |
|
https://en.wikipedia.org/wiki/Numerical_differentiation |
|
|
|
Examples |
|
-------- |
|
Evaluate the derivative of ``np.exp`` at several points ``x``. |
|
|
|
>>> import numpy as np |
|
>>> from scipy.differentiate import derivative |
|
>>> f = np.exp |
|
>>> df = np.exp # true derivative |
|
>>> x = np.linspace(1, 2, 5) |
|
>>> res = derivative(f, x) |
|
>>> res.df # approximation of the derivative |
|
array([2.71828183, 3.49034296, 4.48168907, 5.75460268, 7.3890561 ]) |
|
>>> res.error # estimate of the error |
|
array([7.13740178e-12, 9.16600129e-12, 1.17594823e-11, 1.51061386e-11, |
|
1.94262384e-11]) |
|
>>> abs(res.df - df(x)) # true error |
|
array([2.53130850e-14, 3.55271368e-14, 5.77315973e-14, 5.59552404e-14, |
|
6.92779167e-14]) |
|
|
|
Show the convergence of the approximation as the step size is reduced. |
|
Each iteration, the step size is reduced by `step_factor`, so for |
|
sufficiently small initial step, each iteration reduces the error by a |
|
factor of ``1/step_factor**order`` until finite precision arithmetic |
|
inhibits further improvement. |
|
|
|
>>> import matplotlib.pyplot as plt |
|
>>> iter = list(range(1, 12)) # maximum iterations |
|
>>> hfac = 2 # step size reduction per iteration |
|
>>> hdir = [-1, 0, 1] # compare left-, central-, and right- steps |
|
>>> order = 4 # order of differentiation formula |
|
>>> x = 1 |
|
>>> ref = df(x) |
|
>>> errors = [] # true error |
|
>>> for i in iter: |
|
... res = derivative(f, x, maxiter=i, step_factor=hfac, |
|
... step_direction=hdir, order=order, |
|
... # prevent early termination |
|
... tolerances=dict(atol=0, rtol=0)) |
|
... errors.append(abs(res.df - ref)) |
|
>>> errors = np.array(errors) |
|
>>> plt.semilogy(iter, errors[:, 0], label='left differences') |
|
>>> plt.semilogy(iter, errors[:, 1], label='central differences') |
|
>>> plt.semilogy(iter, errors[:, 2], label='right differences') |
|
>>> plt.xlabel('iteration') |
|
>>> plt.ylabel('error') |
|
>>> plt.legend() |
|
>>> plt.show() |
|
>>> (errors[1, 1] / errors[0, 1], 1 / hfac**order) |
|
(0.06215223140159822, 0.0625) |
|
|
|
The implementation is vectorized over `x`, `step_direction`, and `args`. |
|
The function is evaluated once before the first iteration to perform input |
|
validation and standardization, and once per iteration thereafter. |
|
|
|
>>> def f(x, p): |
|
... f.nit += 1 |
|
... return x**p |
|
>>> f.nit = 0 |
|
>>> def df(x, p): |
|
... return p*x**(p-1) |
|
>>> x = np.arange(1, 5) |
|
>>> p = np.arange(1, 6).reshape((-1, 1)) |
|
>>> hdir = np.arange(-1, 2).reshape((-1, 1, 1)) |
|
>>> res = derivative(f, x, args=(p,), step_direction=hdir, maxiter=1) |
|
>>> np.allclose(res.df, df(x, p)) |
|
True |
|
>>> res.df.shape |
|
(3, 5, 4) |
|
>>> f.nit |
|
2 |
|
|
|
By default, `preserve_shape` is False, and therefore the callable |
|
`f` may be called with arrays of any broadcastable shapes. |
|
For example: |
|
|
|
>>> shapes = [] |
|
>>> def f(x, c): |
|
... shape = np.broadcast_shapes(x.shape, c.shape) |
|
... shapes.append(shape) |
|
... return np.sin(c*x) |
|
>>> |
|
>>> c = [1, 5, 10, 20] |
|
>>> res = derivative(f, 0, args=(c,)) |
|
>>> shapes |
|
[(4,), (4, 8), (4, 2), (3, 2), (2, 2), (1, 2)] |
|
|
|
To understand where these shapes are coming from - and to better |
|
understand how `derivative` computes accurate results - note that |
|
higher values of ``c`` correspond with higher frequency sinusoids. |
|
The higher frequency sinusoids make the function's derivative change |
|
faster, so more function evaluations are required to achieve the target |
|
accuracy: |
|
|
|
>>> res.nfev |
|
array([11, 13, 15, 17], dtype=int32) |
|
|
|
The initial ``shape``, ``(4,)``, corresponds with evaluating the |
|
function at a single abscissa and all four frequencies; this is used |
|
for input validation and to determine the size and dtype of the arrays |
|
that store results. The next shape corresponds with evaluating the |
|
function at an initial grid of abscissae and all four frequencies. |
|
Successive calls to the function evaluate the function at two more |
|
abscissae, increasing the effective order of the approximation by two. |
|
However, in later function evaluations, the function is evaluated at |
|
fewer frequencies because the corresponding derivative has already |
|
converged to the required tolerance. This saves function evaluations to |
|
improve performance, but it requires the function to accept arguments of |
|
any shape. |
|
|
|
"Vector-valued" functions are unlikely to satisfy this requirement. |
|
For example, consider |
|
|
|
>>> def f(x): |
|
... return [x, np.sin(3*x), x+np.sin(10*x), np.sin(20*x)*(x-1)**2] |
|
|
|
This integrand is not compatible with `derivative` as written; for instance, |
|
the shape of the output will not be the same as the shape of ``x``. Such a |
|
function *could* be converted to a compatible form with the introduction of |
|
additional parameters, but this would be inconvenient. In such cases, |
|
a simpler solution would be to use `preserve_shape`. |
|
|
|
>>> shapes = [] |
|
>>> def f(x): |
|
... shapes.append(x.shape) |
|
... x0, x1, x2, x3 = x |
|
... return [x0, np.sin(3*x1), x2+np.sin(10*x2), np.sin(20*x3)*(x3-1)**2] |
|
>>> |
|
>>> x = np.zeros(4) |
|
>>> res = derivative(f, x, preserve_shape=True) |
|
>>> shapes |
|
[(4,), (4, 8), (4, 2), (4, 2), (4, 2), (4, 2)] |
|
|
|
Here, the shape of ``x`` is ``(4,)``. With ``preserve_shape=True``, the |
|
function may be called with argument ``x`` of shape ``(4,)`` or ``(4, n)``, |
|
and this is what we observe. |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
res = _derivative_iv(f, x, args, tolerances, maxiter, order, initial_step, |
|
step_factor, step_direction, preserve_shape, callback) |
|
(func, x, args, atol, rtol, maxiter, order, |
|
h0, fac, hdir, preserve_shape, callback) = res |
|
|
|
|
|
|
|
|
|
|
|
|
|
temp = eim._initialize(func, (x,), args, preserve_shape=preserve_shape) |
|
func, xs, fs, args, shape, dtype, xp = temp |
|
|
|
finfo = xp.finfo(dtype) |
|
atol = finfo.smallest_normal if atol is None else atol |
|
rtol = finfo.eps**0.5 if rtol is None else rtol |
|
|
|
x, f = xs[0], fs[0] |
|
df = xp.full_like(f, xp.nan) |
|
|
|
|
|
|
|
|
|
|
|
hdir = xp.broadcast_to(hdir, shape) |
|
hdir = xp.reshape(hdir, (-1,)) |
|
hdir = xp.astype(xp_sign(hdir), dtype) |
|
h0 = xp.broadcast_to(h0, shape) |
|
h0 = xp.reshape(h0, (-1,)) |
|
h0 = xp.astype(h0, dtype) |
|
h0[h0 <= 0] = xp.asarray(xp.nan, dtype=dtype) |
|
|
|
status = xp.full_like(x, eim._EINPROGRESS, dtype=xp.int32) |
|
nit, nfev = 0, 1 |
|
|
|
il = hdir < 0 |
|
ic = hdir == 0 |
|
ir = hdir > 0 |
|
io = il | ir |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
work = _RichResult(x=x, df=df, fs=f[:, xp.newaxis], error=xp.nan, h=h0, |
|
df_last=xp.nan, error_last=xp.nan, fac=fac, |
|
atol=atol, rtol=rtol, nit=nit, nfev=nfev, |
|
status=status, dtype=dtype, terms=(order+1)//2, |
|
hdir=hdir, il=il, ic=ic, ir=ir, io=io, |
|
|
|
|
|
diff_state=_RichResult(central=[], right=[], fac=None)) |
|
|
|
|
|
|
|
|
|
res_work_pairs = [('status', 'status'), ('df', 'df'), ('error', 'error'), |
|
('nit', 'nit'), ('nfev', 'nfev'), ('x', 'x')] |
|
|
|
def pre_func_eval(work): |
|
"""Determine the abscissae at which the function needs to be evaluated. |
|
|
|
See `_derivative_weights` for a description of the stencil (pattern |
|
of the abscissae). |
|
|
|
In the first iteration, there is only one stored function value in |
|
`work.fs`, `f(x)`, so we need to evaluate at `order` new points. In |
|
subsequent iterations, we evaluate at two new points. Note that |
|
`work.x` is always flattened into a 1D array after broadcasting with |
|
all `args`, so we add a new axis at the end and evaluate all point |
|
in one call to the function. |
|
|
|
For improvement: |
|
- Consider measuring the step size actually taken, since ``(x + h) - x`` |
|
is not identically equal to `h` with floating point arithmetic. |
|
- Adjust the step size automatically if `x` is too big to resolve the |
|
step. |
|
- We could probably save some work if there are no central difference |
|
steps or no one-sided steps. |
|
""" |
|
n = work.terms |
|
h = work.h[:, xp.newaxis] |
|
c = work.fac |
|
d = c**0.5 |
|
|
|
|
|
if work.nit == 0: |
|
hc = h / c**xp.arange(n, dtype=work.dtype) |
|
hc = xp.concat((-xp.flip(hc, axis=-1), hc), axis=-1) |
|
else: |
|
hc = xp.concat((-h, h), axis=-1) / c**(n-1) |
|
|
|
if work.nit == 0: |
|
hr = h / d**xp.arange(2*n, dtype=work.dtype) |
|
else: |
|
hr = xp.concat((h, h/d), axis=-1) / c**(n-1) |
|
|
|
n_new = 2*n if work.nit == 0 else 2 |
|
x_eval = xp.zeros((work.hdir.shape[0], n_new), dtype=work.dtype) |
|
il, ic, ir = work.il, work.ic, work.ir |
|
x_eval[ir] = work.x[ir][:, xp.newaxis] + hr[ir] |
|
x_eval[ic] = work.x[ic][:, xp.newaxis] + hc[ic] |
|
x_eval[il] = work.x[il][:, xp.newaxis] - hr[il] |
|
return x_eval |
|
|
|
def post_func_eval(x, f, work): |
|
""" Estimate the derivative and error from the function evaluations |
|
|
|
As in `pre_func_eval`: in the first iteration, there is only one stored |
|
function value in `work.fs`, `f(x)`, so we need to add the `order` new |
|
points. In subsequent iterations, we add two new points. The tricky |
|
part is getting the order to match that of the weights, which is |
|
described in `_derivative_weights`. |
|
|
|
For improvement: |
|
- Change the order of the weights (and steps in `pre_func_eval`) to |
|
simplify `work_fc` concatenation and eliminate `fc` concatenation. |
|
- It would be simple to do one-step Richardson extrapolation with `df` |
|
and `df_last` to increase the order of the estimate and/or improve |
|
the error estimate. |
|
- Process the function evaluations in a more numerically favorable |
|
way. For instance, combining the pairs of central difference evals |
|
into a second-order approximation and using Richardson extrapolation |
|
to produce a higher order approximation seemed to retain accuracy up |
|
to very high order. |
|
- Alternatively, we could use `polyfit` like Jacobi. An advantage of |
|
fitting polynomial to more points than necessary is improved noise |
|
tolerance. |
|
""" |
|
n = work.terms |
|
n_new = n if work.nit == 0 else 1 |
|
il, ic, io = work.il, work.ic, work.io |
|
|
|
|
|
|
|
|
|
work_fc = (f[ic][:, :n_new], work.fs[ic], f[ic][:, -n_new:]) |
|
work_fc = xp.concat(work_fc, axis=-1) |
|
if work.nit == 0: |
|
fc = work_fc |
|
else: |
|
fc = (work_fc[:, :n], work_fc[:, n:n+1], work_fc[:, -n:]) |
|
fc = xp.concat(fc, axis=-1) |
|
|
|
|
|
work_fo = xp.concat((work.fs[io], f[io]), axis=-1) |
|
if work.nit == 0: |
|
fo = work_fo |
|
else: |
|
fo = xp.concat((work_fo[:, 0:1], work_fo[:, -2*n:]), axis=-1) |
|
|
|
work.fs = xp.zeros((ic.shape[0], work.fs.shape[-1] + 2*n_new), dtype=work.dtype) |
|
work.fs[ic] = work_fc |
|
work.fs[io] = work_fo |
|
|
|
wc, wo = _derivative_weights(work, n, xp) |
|
work.df_last = xp.asarray(work.df, copy=True) |
|
work.df[ic] = fc @ wc / work.h[ic] |
|
work.df[io] = fo @ wo / work.h[io] |
|
work.df[il] *= -1 |
|
|
|
work.h /= work.fac |
|
work.error_last = work.error |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
work.error = xp.abs(work.df - work.df_last) |
|
|
|
def check_termination(work): |
|
"""Terminate due to convergence, non-finite values, or error increase""" |
|
stop = xp.astype(xp.zeros_like(work.df), xp.bool) |
|
|
|
i = work.error < work.atol + work.rtol*abs(work.df) |
|
work.status[i] = eim._ECONVERGED |
|
stop[i] = True |
|
|
|
if work.nit > 0: |
|
i = ~((xp.isfinite(work.x) & xp.isfinite(work.df)) | stop) |
|
work.df[i], work.status[i] = xp.nan, eim._EVALUEERR |
|
stop[i] = True |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
i = (work.error > work.error_last*10) & ~stop |
|
work.status[i] = _EERRORINCREASE |
|
stop[i] = True |
|
|
|
return stop |
|
|
|
def post_termination_check(work): |
|
return |
|
|
|
def customize_result(res, shape): |
|
return shape |
|
|
|
return eim._loop(work, callback, shape, maxiter, func, args, dtype, |
|
pre_func_eval, post_func_eval, check_termination, |
|
post_termination_check, customize_result, res_work_pairs, |
|
xp, preserve_shape) |
|
|
|
|
|
def _derivative_weights(work, n, xp): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fac = float(work.fac) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
diff_state = work.diff_state |
|
if fac != diff_state.fac: |
|
diff_state.central = [] |
|
diff_state.right = [] |
|
diff_state.fac = fac |
|
|
|
if len(diff_state.central) != 2*n + 1: |
|
|
|
|
|
|
|
i = np.arange(-n, n + 1) |
|
p = np.abs(i) - 1. |
|
s = np.sign(i) |
|
|
|
h = s / fac ** p |
|
A = np.vander(h, increasing=True).T |
|
b = np.zeros(2*n + 1) |
|
b[1] = 1 |
|
weights = np.linalg.solve(A, b) |
|
|
|
|
|
weights[n] = 0 |
|
for i in range(n): |
|
weights[-i-1] = -weights[i] |
|
|
|
|
|
|
|
diff_state.central = weights |
|
|
|
|
|
|
|
|
|
i = np.arange(2*n + 1) |
|
p = i - 1. |
|
s = np.sign(i) |
|
|
|
h = s / np.sqrt(fac) ** p |
|
A = np.vander(h, increasing=True).T |
|
b = np.zeros(2 * n + 1) |
|
b[1] = 1 |
|
weights = np.linalg.solve(A, b) |
|
|
|
diff_state.right = weights |
|
|
|
return (xp.asarray(diff_state.central, dtype=work.dtype), |
|
xp.asarray(diff_state.right, dtype=work.dtype)) |
|
|
|
|
|
def jacobian(f, x, *, tolerances=None, maxiter=10, order=8, initial_step=0.5, |
|
step_factor=2.0, step_direction=0): |
|
r"""Evaluate the Jacobian of a function numerically. |
|
|
|
Parameters |
|
---------- |
|
f : callable |
|
The function whose Jacobian is desired. The signature must be:: |
|
|
|
f(xi: ndarray) -> ndarray |
|
|
|
where each element of ``xi`` is a finite real. If the function to be |
|
differentiated accepts additional arguments, wrap it (e.g. using |
|
`functools.partial` or ``lambda``) and pass the wrapped callable |
|
into `jacobian`. `f` must not mutate the array ``xi``. See Notes |
|
regarding vectorization and the dimensionality of the input and output. |
|
x : float array_like |
|
Points at which to evaluate the Jacobian. Must have at least one dimension. |
|
See Notes regarding the dimensionality and vectorization. |
|
tolerances : dictionary of floats, optional |
|
Absolute and relative tolerances. Valid keys of the dictionary are: |
|
|
|
- ``atol`` - absolute tolerance on the derivative |
|
- ``rtol`` - relative tolerance on the derivative |
|
|
|
Iteration will stop when ``res.error < atol + rtol * abs(res.df)``. The default |
|
`atol` is the smallest normal number of the appropriate dtype, and |
|
the default `rtol` is the square root of the precision of the |
|
appropriate dtype. |
|
maxiter : int, default: 10 |
|
The maximum number of iterations of the algorithm to perform. See |
|
Notes. |
|
order : int, default: 8 |
|
The (positive integer) order of the finite difference formula to be |
|
used. Odd integers will be rounded up to the next even integer. |
|
initial_step : float array_like, default: 0.5 |
|
The (absolute) initial step size for the finite difference derivative |
|
approximation. Must be broadcastable with `x` and `step_direction`. |
|
step_factor : float, default: 2.0 |
|
The factor by which the step size is *reduced* in each iteration; i.e. |
|
the step size in iteration 1 is ``initial_step/step_factor``. If |
|
``step_factor < 1``, subsequent steps will be greater than the initial |
|
step; this may be useful if steps smaller than some threshold are |
|
undesirable (e.g. due to subtractive cancellation error). |
|
step_direction : integer array_like |
|
An array representing the direction of the finite difference steps (e.g. |
|
for use when `x` lies near to the boundary of the domain of the function.) |
|
Must be broadcastable with `x` and `initial_step`. |
|
Where 0 (default), central differences are used; where negative (e.g. |
|
-1), steps are non-positive; and where positive (e.g. 1), all steps are |
|
non-negative. |
|
|
|
Returns |
|
------- |
|
res : _RichResult |
|
An object similar to an instance of `scipy.optimize.OptimizeResult` with the |
|
following attributes. The descriptions are written as though the values will |
|
be scalars; however, if `f` returns an array, the outputs will be |
|
arrays of the same shape. |
|
|
|
success : bool array |
|
``True`` where the algorithm terminated successfully (status ``0``); |
|
``False`` otherwise. |
|
status : int array |
|
An integer representing the exit status of the algorithm. |
|
|
|
- ``0`` : The algorithm converged to the specified tolerances. |
|
- ``-1`` : The error estimate increased, so iteration was terminated. |
|
- ``-2`` : The maximum number of iterations was reached. |
|
- ``-3`` : A non-finite value was encountered. |
|
|
|
df : float array |
|
The Jacobian of `f` at `x`, if the algorithm terminated |
|
successfully. |
|
error : float array |
|
An estimate of the error: the magnitude of the difference between |
|
the current estimate of the Jacobian and the estimate in the |
|
previous iteration. |
|
nit : int array |
|
The number of iterations of the algorithm that were performed. |
|
nfev : int array |
|
The number of points at which `f` was evaluated. |
|
|
|
Each element of an attribute is associated with the corresponding |
|
element of `df`. For instance, element ``i`` of `nfev` is the |
|
number of points at which `f` was evaluated for the sake of |
|
computing element ``i`` of `df`. |
|
|
|
See Also |
|
-------- |
|
derivative, hessian |
|
|
|
Notes |
|
----- |
|
Suppose we wish to evaluate the Jacobian of a function |
|
:math:`f: \mathbf{R}^m \rightarrow \mathbf{R}^n`. Assign to variables |
|
``m`` and ``n`` the positive integer values of :math:`m` and :math:`n`, |
|
respectively, and let ``...`` represent an arbitrary tuple of integers. |
|
If we wish to evaluate the Jacobian at a single point, then: |
|
|
|
- argument `x` must be an array of shape ``(m,)`` |
|
- argument `f` must be vectorized to accept an array of shape ``(m, ...)``. |
|
The first axis represents the :math:`m` inputs of :math:`f`; the remainder |
|
are for evaluating the function at multiple points in a single call. |
|
- argument `f` must return an array of shape ``(n, ...)``. The first |
|
axis represents the :math:`n` outputs of :math:`f`; the remainder |
|
are for the result of evaluating the function at multiple points. |
|
- attribute ``df`` of the result object will be an array of shape ``(n, m)``, |
|
the Jacobian. |
|
|
|
This function is also vectorized in the sense that the Jacobian can be |
|
evaluated at ``k`` points in a single call. In this case, `x` would be an |
|
array of shape ``(m, k)``, `f` would accept an array of shape |
|
``(m, k, ...)`` and return an array of shape ``(n, k, ...)``, and the ``df`` |
|
attribute of the result would have shape ``(n, m, k)``. |
|
|
|
Suppose the desired callable ``f_not_vectorized`` is not vectorized; it can |
|
only accept an array of shape ``(m,)``. A simple solution to satisfy the required |
|
interface is to wrap ``f_not_vectorized`` as follows:: |
|
|
|
def f(x): |
|
return np.apply_along_axis(f_not_vectorized, axis=0, arr=x) |
|
|
|
Alternatively, suppose the desired callable ``f_vec_q`` is vectorized, but |
|
only for 2-D arrays of shape ``(m, q)``. To satisfy the required interface, |
|
consider:: |
|
|
|
def f(x): |
|
m, batch = x.shape[0], x.shape[1:] # x.shape is (m, ...) |
|
x = np.reshape(x, (m, -1)) # `-1` is short for q = prod(batch) |
|
res = f_vec_q(x) # pass shape (m, q) to function |
|
n = res.shape[0] |
|
return np.reshape(res, (n,) + batch) # return shape (n, ...) |
|
|
|
Then pass the wrapped callable ``f`` as the first argument of `jacobian`. |
|
|
|
References |
|
---------- |
|
.. [1] Jacobian matrix and determinant, *Wikipedia*, |
|
https://en.wikipedia.org/wiki/Jacobian_matrix_and_determinant |
|
|
|
Examples |
|
-------- |
|
The Rosenbrock function maps from :math:`\mathbf{R}^m \rightarrow \mathbf{R}`; |
|
the SciPy implementation `scipy.optimize.rosen` is vectorized to accept an |
|
array of shape ``(m, p)`` and return an array of shape ``p``. Suppose we wish |
|
to evaluate the Jacobian (AKA the gradient because the function returns a scalar) |
|
at ``[0.5, 0.5, 0.5]``. |
|
|
|
>>> import numpy as np |
|
>>> from scipy.differentiate import jacobian |
|
>>> from scipy.optimize import rosen, rosen_der |
|
>>> m = 3 |
|
>>> x = np.full(m, 0.5) |
|
>>> res = jacobian(rosen, x) |
|
>>> ref = rosen_der(x) # reference value of the gradient |
|
>>> res.df, ref |
|
(array([-51., -1., 50.]), array([-51., -1., 50.])) |
|
|
|
As an example of a function with multiple outputs, consider Example 4 |
|
from [1]_. |
|
|
|
>>> def f(x): |
|
... x1, x2, x3 = x |
|
... return [x1, 5*x3, 4*x2**2 - 2*x3, x3*np.sin(x1)] |
|
|
|
The true Jacobian is given by: |
|
|
|
>>> def df(x): |
|
... x1, x2, x3 = x |
|
... one = np.ones_like(x1) |
|
... return [[one, 0*one, 0*one], |
|
... [0*one, 0*one, 5*one], |
|
... [0*one, 8*x2, -2*one], |
|
... [x3*np.cos(x1), 0*one, np.sin(x1)]] |
|
|
|
Evaluate the Jacobian at an arbitrary point. |
|
|
|
>>> rng = np.random.default_rng(389252938452) |
|
>>> x = rng.random(size=3) |
|
>>> res = jacobian(f, x) |
|
>>> ref = df(x) |
|
>>> res.df.shape == (4, 3) |
|
True |
|
>>> np.allclose(res.df, ref) |
|
True |
|
|
|
Evaluate the Jacobian at 10 arbitrary points in a single call. |
|
|
|
>>> x = rng.random(size=(3, 10)) |
|
>>> res = jacobian(f, x) |
|
>>> ref = df(x) |
|
>>> res.df.shape == (4, 3, 10) |
|
True |
|
>>> np.allclose(res.df, ref) |
|
True |
|
|
|
""" |
|
xp = array_namespace(x) |
|
x = xp.asarray(x) |
|
int_dtype = xp.isdtype(x.dtype, 'integral') |
|
x0 = xp.asarray(x, dtype=xp.asarray(1.0).dtype) if int_dtype else x |
|
|
|
if x0.ndim < 1: |
|
message = "Argument `x` must be at least 1-D." |
|
raise ValueError(message) |
|
|
|
m = x0.shape[0] |
|
i = xp.arange(m) |
|
|
|
def wrapped(x): |
|
p = () if x.ndim == x0.ndim else (x.shape[-1],) |
|
|
|
new_shape = (m, m) + x0.shape[1:] + p |
|
xph = xp.expand_dims(x0, axis=1) |
|
if x.ndim != x0.ndim: |
|
xph = xp.expand_dims(xph, axis=-1) |
|
xph = xp_copy(xp.broadcast_to(xph, new_shape), xp=xp) |
|
xph[i, i] = x |
|
return f(xph) |
|
|
|
res = derivative(wrapped, x, tolerances=tolerances, |
|
maxiter=maxiter, order=order, initial_step=initial_step, |
|
step_factor=step_factor, preserve_shape=True, |
|
step_direction=step_direction) |
|
|
|
del res.x |
|
return res |
|
|
|
|
|
def hessian(f, x, *, tolerances=None, maxiter=10, |
|
order=8, initial_step=0.5, step_factor=2.0): |
|
r"""Evaluate the Hessian of a function numerically. |
|
|
|
Parameters |
|
---------- |
|
f : callable |
|
The function whose Hessian is desired. The signature must be:: |
|
|
|
f(xi: ndarray) -> ndarray |
|
|
|
where each element of ``xi`` is a finite real. If the function to be |
|
differentiated accepts additional arguments, wrap it (e.g. using |
|
`functools.partial` or ``lambda``) and pass the wrapped callable |
|
into `hessian`. `f` must not mutate the array ``xi``. See Notes |
|
regarding vectorization and the dimensionality of the input and output. |
|
x : float array_like |
|
Points at which to evaluate the Hessian. Must have at least one dimension. |
|
See Notes regarding the dimensionality and vectorization. |
|
tolerances : dictionary of floats, optional |
|
Absolute and relative tolerances. Valid keys of the dictionary are: |
|
|
|
- ``atol`` - absolute tolerance on the derivative |
|
- ``rtol`` - relative tolerance on the derivative |
|
|
|
Iteration will stop when ``res.error < atol + rtol * abs(res.df)``. The default |
|
`atol` is the smallest normal number of the appropriate dtype, and |
|
the default `rtol` is the square root of the precision of the |
|
appropriate dtype. |
|
order : int, default: 8 |
|
The (positive integer) order of the finite difference formula to be |
|
used. Odd integers will be rounded up to the next even integer. |
|
initial_step : float, default: 0.5 |
|
The (absolute) initial step size for the finite difference derivative |
|
approximation. |
|
step_factor : float, default: 2.0 |
|
The factor by which the step size is *reduced* in each iteration; i.e. |
|
the step size in iteration 1 is ``initial_step/step_factor``. If |
|
``step_factor < 1``, subsequent steps will be greater than the initial |
|
step; this may be useful if steps smaller than some threshold are |
|
undesirable (e.g. due to subtractive cancellation error). |
|
maxiter : int, default: 10 |
|
The maximum number of iterations of the algorithm to perform. See |
|
Notes. |
|
|
|
Returns |
|
------- |
|
res : _RichResult |
|
An object similar to an instance of `scipy.optimize.OptimizeResult` with the |
|
following attributes. The descriptions are written as though the values will |
|
be scalars; however, if `f` returns an array, the outputs will be |
|
arrays of the same shape. |
|
|
|
success : bool array |
|
``True`` where the algorithm terminated successfully (status ``0``); |
|
``False`` otherwise. |
|
status : int array |
|
An integer representing the exit status of the algorithm. |
|
|
|
- ``0`` : The algorithm converged to the specified tolerances. |
|
- ``-1`` : The error estimate increased, so iteration was terminated. |
|
- ``-2`` : The maximum number of iterations was reached. |
|
- ``-3`` : A non-finite value was encountered. |
|
|
|
ddf : float array |
|
The Hessian of `f` at `x`, if the algorithm terminated |
|
successfully. |
|
error : float array |
|
An estimate of the error: the magnitude of the difference between |
|
the current estimate of the Hessian and the estimate in the |
|
previous iteration. |
|
nfev : int array |
|
The number of points at which `f` was evaluated. |
|
|
|
Each element of an attribute is associated with the corresponding |
|
element of `ddf`. For instance, element ``[i, j]`` of `nfev` is the |
|
number of points at which `f` was evaluated for the sake of |
|
computing element ``[i, j]`` of `ddf`. |
|
|
|
See Also |
|
-------- |
|
derivative, jacobian |
|
|
|
Notes |
|
----- |
|
Suppose we wish to evaluate the Hessian of a function |
|
:math:`f: \mathbf{R}^m \rightarrow \mathbf{R}`, and we assign to variable |
|
``m`` the positive integer value of :math:`m`. If we wish to evaluate |
|
the Hessian at a single point, then: |
|
|
|
- argument `x` must be an array of shape ``(m,)`` |
|
- argument `f` must be vectorized to accept an array of shape |
|
``(m, ...)``. The first axis represents the :math:`m` inputs of |
|
:math:`f`; the remaining axes indicated by ellipses are for evaluating |
|
the function at several abscissae in a single call. |
|
- argument `f` must return an array of shape ``(...)``. |
|
- attribute ``dff`` of the result object will be an array of shape ``(m, m)``, |
|
the Hessian. |
|
|
|
This function is also vectorized in the sense that the Hessian can be |
|
evaluated at ``k`` points in a single call. In this case, `x` would be an |
|
array of shape ``(m, k)``, `f` would accept an array of shape |
|
``(m, ...)`` and return an array of shape ``(...)``, and the ``ddf`` |
|
attribute of the result would have shape ``(m, m, k)``. Note that the |
|
axis associated with the ``k`` points is included within the axes |
|
denoted by ``(...)``. |
|
|
|
Currently, `hessian` is implemented by nesting calls to `jacobian`. |
|
All options passed to `hessian` are used for both the inner and outer |
|
calls with one exception: the `rtol` used in the inner `jacobian` call |
|
is tightened by a factor of 100 with the expectation that the inner |
|
error can be ignored. A consequence is that `rtol` should not be set |
|
less than 100 times the precision of the dtype of `x`; a warning is |
|
emitted otherwise. |
|
|
|
References |
|
---------- |
|
.. [1] Hessian matrix, *Wikipedia*, |
|
https://en.wikipedia.org/wiki/Hessian_matrix |
|
|
|
Examples |
|
-------- |
|
The Rosenbrock function maps from :math:`\mathbf{R}^m \rightarrow \mathbf{R}`; |
|
the SciPy implementation `scipy.optimize.rosen` is vectorized to accept an |
|
array of shape ``(m, ...)`` and return an array of shape ``...``. Suppose we |
|
wish to evaluate the Hessian at ``[0.5, 0.5, 0.5]``. |
|
|
|
>>> import numpy as np |
|
>>> from scipy.differentiate import hessian |
|
>>> from scipy.optimize import rosen, rosen_hess |
|
>>> m = 3 |
|
>>> x = np.full(m, 0.5) |
|
>>> res = hessian(rosen, x) |
|
>>> ref = rosen_hess(x) # reference value of the Hessian |
|
>>> np.allclose(res.ddf, ref) |
|
True |
|
|
|
`hessian` is vectorized to evaluate the Hessian at multiple points |
|
in a single call. |
|
|
|
>>> rng = np.random.default_rng(4589245925010) |
|
>>> x = rng.random((m, 10)) |
|
>>> res = hessian(rosen, x) |
|
>>> ref = [rosen_hess(xi) for xi in x.T] |
|
>>> ref = np.moveaxis(ref, 0, -1) |
|
>>> np.allclose(res.ddf, ref) |
|
True |
|
|
|
""" |
|
|
|
|
|
|
|
|
|
kwargs = dict(maxiter=maxiter, order=order, initial_step=initial_step, |
|
step_factor=step_factor) |
|
tolerances = {} if tolerances is None else tolerances |
|
atol = tolerances.get('atol', None) |
|
rtol = tolerances.get('rtol', None) |
|
|
|
xp = array_namespace(x) |
|
x = xp.asarray(x) |
|
dtype = x.dtype if not xp.isdtype(x.dtype, 'integral') else xp.asarray(1.).dtype |
|
finfo = xp.finfo(dtype) |
|
rtol = finfo.eps**0.5 if rtol is None else rtol |
|
|
|
|
|
rtol_min = finfo.eps * 100 |
|
message = (f"The specified `{rtol=}`, but error estimates are likely to be " |
|
f"unreliable when `rtol < {rtol_min}`.") |
|
if 0 < rtol < rtol_min: |
|
warnings.warn(message, RuntimeWarning, stacklevel=2) |
|
rtol = rtol_min |
|
|
|
def df(x): |
|
tolerances = dict(rtol=rtol/100, atol=atol) |
|
temp = jacobian(f, x, tolerances=tolerances, **kwargs) |
|
nfev.append(temp.nfev if len(nfev) == 0 else temp.nfev.sum(axis=-1)) |
|
return temp.df |
|
|
|
nfev = [] |
|
res = jacobian(df, x, tolerances=tolerances, **kwargs) |
|
|
|
nfev = xp.cumulative_sum(xp.stack(nfev), axis=0) |
|
res_nit = xp.astype(res.nit[xp.newaxis, ...], xp.int64) |
|
res.nfev = xp_take_along_axis(nfev, res_nit, axis=0)[0] |
|
res.ddf = res.df |
|
del res.df |
|
del res.nit |
|
|
|
return res |
|
|