|
""" |
|
Dogleg algorithm with rectangular trust regions for least-squares minimization. |
|
|
|
The description of the algorithm can be found in [Voglis]_. The algorithm does |
|
trust-region iterations, but the shape of trust regions is rectangular as |
|
opposed to conventional elliptical. The intersection of a trust region and |
|
an initial feasible region is again some rectangle. Thus, on each iteration a |
|
bound-constrained quadratic optimization problem is solved. |
|
|
|
A quadratic problem is solved by well-known dogleg approach, where the |
|
function is minimized along piecewise-linear "dogleg" path [NumOpt]_, |
|
Chapter 4. If Jacobian is not rank-deficient then the function is decreasing |
|
along this path, and optimization amounts to simply following along this |
|
path as long as a point stays within the bounds. A constrained Cauchy step |
|
(along the anti-gradient) is considered for safety in rank deficient cases, |
|
in this situations the convergence might be slow. |
|
|
|
If during iterations some variable hit the initial bound and the component |
|
of anti-gradient points outside the feasible region, then a next dogleg step |
|
won't make any progress. At this state such variables satisfy first-order |
|
optimality conditions and they are excluded before computing a next dogleg |
|
step. |
|
|
|
Gauss-Newton step can be computed exactly by `numpy.linalg.lstsq` (for dense |
|
Jacobian matrices) or by iterative procedure `scipy.sparse.linalg.lsmr` (for |
|
dense and sparse matrices, or Jacobian being LinearOperator). The second |
|
option allows to solve very large problems (up to couple of millions of |
|
residuals on a regular PC), provided the Jacobian matrix is sufficiently |
|
sparse. But note that dogbox is not very good for solving problems with |
|
large number of constraints, because of variables exclusion-inclusion on each |
|
iteration (a required number of function evaluations might be high or accuracy |
|
of a solution will be poor), thus its large-scale usage is probably limited |
|
to unconstrained problems. |
|
|
|
References |
|
---------- |
|
.. [Voglis] C. Voglis and I. E. Lagaris, "A Rectangular Trust Region Dogleg |
|
Approach for Unconstrained and Bound Constrained Nonlinear |
|
Optimization", WSEAS International Conference on Applied |
|
Mathematics, Corfu, Greece, 2004. |
|
.. [NumOpt] J. Nocedal and S. J. Wright, "Numerical optimization, 2nd edition". |
|
""" |
|
import numpy as np |
|
from numpy.linalg import lstsq, norm |
|
|
|
from scipy.sparse.linalg import LinearOperator, aslinearoperator, lsmr |
|
from scipy.optimize import OptimizeResult |
|
|
|
from .common import ( |
|
step_size_to_bound, in_bounds, update_tr_radius, evaluate_quadratic, |
|
build_quadratic_1d, minimize_quadratic_1d, compute_grad, |
|
compute_jac_scale, check_termination, scale_for_robust_loss_function, |
|
print_header_nonlinear, print_iteration_nonlinear) |
|
|
|
|
|
def lsmr_operator(Jop, d, active_set): |
|
"""Compute LinearOperator to use in LSMR by dogbox algorithm. |
|
|
|
`active_set` mask is used to excluded active variables from computations |
|
of matrix-vector products. |
|
""" |
|
m, n = Jop.shape |
|
|
|
def matvec(x): |
|
x_free = x.ravel().copy() |
|
x_free[active_set] = 0 |
|
return Jop.matvec(x * d) |
|
|
|
def rmatvec(x): |
|
r = d * Jop.rmatvec(x) |
|
r[active_set] = 0 |
|
return r |
|
|
|
return LinearOperator((m, n), matvec=matvec, rmatvec=rmatvec, dtype=float) |
|
|
|
|
|
def find_intersection(x, tr_bounds, lb, ub): |
|
"""Find intersection of trust-region bounds and initial bounds. |
|
|
|
Returns |
|
------- |
|
lb_total, ub_total : ndarray with shape of x |
|
Lower and upper bounds of the intersection region. |
|
orig_l, orig_u : ndarray of bool with shape of x |
|
True means that an original bound is taken as a corresponding bound |
|
in the intersection region. |
|
tr_l, tr_u : ndarray of bool with shape of x |
|
True means that a trust-region bound is taken as a corresponding bound |
|
in the intersection region. |
|
""" |
|
lb_centered = lb - x |
|
ub_centered = ub - x |
|
|
|
lb_total = np.maximum(lb_centered, -tr_bounds) |
|
ub_total = np.minimum(ub_centered, tr_bounds) |
|
|
|
orig_l = np.equal(lb_total, lb_centered) |
|
orig_u = np.equal(ub_total, ub_centered) |
|
|
|
tr_l = np.equal(lb_total, -tr_bounds) |
|
tr_u = np.equal(ub_total, tr_bounds) |
|
|
|
return lb_total, ub_total, orig_l, orig_u, tr_l, tr_u |
|
|
|
|
|
def dogleg_step(x, newton_step, g, a, b, tr_bounds, lb, ub): |
|
"""Find dogleg step in a rectangular region. |
|
|
|
Returns |
|
------- |
|
step : ndarray, shape (n,) |
|
Computed dogleg step. |
|
bound_hits : ndarray of int, shape (n,) |
|
Each component shows whether a corresponding variable hits the |
|
initial bound after the step is taken: |
|
* 0 - a variable doesn't hit the bound. |
|
* -1 - lower bound is hit. |
|
* 1 - upper bound is hit. |
|
tr_hit : bool |
|
Whether the step hit the boundary of the trust-region. |
|
""" |
|
lb_total, ub_total, orig_l, orig_u, tr_l, tr_u = find_intersection( |
|
x, tr_bounds, lb, ub |
|
) |
|
bound_hits = np.zeros_like(x, dtype=int) |
|
|
|
if in_bounds(newton_step, lb_total, ub_total): |
|
return newton_step, bound_hits, False |
|
|
|
to_bounds, _ = step_size_to_bound(np.zeros_like(x), -g, lb_total, ub_total) |
|
|
|
|
|
|
|
|
|
|
|
|
|
cauchy_step = -minimize_quadratic_1d(a, b, 0, to_bounds)[0] * g |
|
|
|
step_diff = newton_step - cauchy_step |
|
step_size, hits = step_size_to_bound(cauchy_step, step_diff, |
|
lb_total, ub_total) |
|
bound_hits[(hits < 0) & orig_l] = -1 |
|
bound_hits[(hits > 0) & orig_u] = 1 |
|
tr_hit = np.any((hits < 0) & tr_l | (hits > 0) & tr_u) |
|
|
|
return cauchy_step + step_size * step_diff, bound_hits, tr_hit |
|
|
|
|
|
def dogbox(fun, jac, x0, f0, J0, lb, ub, ftol, xtol, gtol, max_nfev, x_scale, |
|
loss_function, tr_solver, tr_options, verbose): |
|
f = f0 |
|
f_true = f.copy() |
|
nfev = 1 |
|
|
|
J = J0 |
|
njev = 1 |
|
|
|
if loss_function is not None: |
|
rho = loss_function(f) |
|
cost = 0.5 * np.sum(rho[0]) |
|
J, f = scale_for_robust_loss_function(J, f, rho) |
|
else: |
|
cost = 0.5 * np.dot(f, f) |
|
|
|
g = compute_grad(J, f) |
|
|
|
jac_scale = isinstance(x_scale, str) and x_scale == 'jac' |
|
if jac_scale: |
|
scale, scale_inv = compute_jac_scale(J) |
|
else: |
|
scale, scale_inv = x_scale, 1 / x_scale |
|
|
|
Delta = norm(x0 * scale_inv, ord=np.inf) |
|
if Delta == 0: |
|
Delta = 1.0 |
|
|
|
on_bound = np.zeros_like(x0, dtype=int) |
|
on_bound[np.equal(x0, lb)] = -1 |
|
on_bound[np.equal(x0, ub)] = 1 |
|
|
|
x = x0 |
|
step = np.empty_like(x0) |
|
|
|
if max_nfev is None: |
|
max_nfev = x0.size * 100 |
|
|
|
termination_status = None |
|
iteration = 0 |
|
step_norm = None |
|
actual_reduction = None |
|
|
|
if verbose == 2: |
|
print_header_nonlinear() |
|
|
|
while True: |
|
active_set = on_bound * g < 0 |
|
free_set = ~active_set |
|
|
|
g_free = g[free_set] |
|
g_full = g.copy() |
|
g[active_set] = 0 |
|
|
|
g_norm = norm(g, ord=np.inf) |
|
if g_norm < gtol: |
|
termination_status = 1 |
|
|
|
if verbose == 2: |
|
print_iteration_nonlinear(iteration, nfev, cost, actual_reduction, |
|
step_norm, g_norm) |
|
|
|
if termination_status is not None or nfev == max_nfev: |
|
break |
|
|
|
x_free = x[free_set] |
|
lb_free = lb[free_set] |
|
ub_free = ub[free_set] |
|
scale_free = scale[free_set] |
|
|
|
|
|
if tr_solver == 'exact': |
|
J_free = J[:, free_set] |
|
newton_step = lstsq(J_free, -f, rcond=-1)[0] |
|
|
|
|
|
a, b = build_quadratic_1d(J_free, g_free, -g_free) |
|
elif tr_solver == 'lsmr': |
|
Jop = aslinearoperator(J) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
lsmr_op = lsmr_operator(Jop, scale, active_set) |
|
newton_step = -lsmr(lsmr_op, f, **tr_options)[0][free_set] |
|
newton_step *= scale_free |
|
|
|
|
|
|
|
a, b = build_quadratic_1d(Jop, g, -g) |
|
|
|
actual_reduction = -1.0 |
|
while actual_reduction <= 0 and nfev < max_nfev: |
|
tr_bounds = Delta * scale_free |
|
|
|
step_free, on_bound_free, tr_hit = dogleg_step( |
|
x_free, newton_step, g_free, a, b, tr_bounds, lb_free, ub_free) |
|
|
|
step.fill(0.0) |
|
step[free_set] = step_free |
|
|
|
if tr_solver == 'exact': |
|
predicted_reduction = -evaluate_quadratic(J_free, g_free, |
|
step_free) |
|
elif tr_solver == 'lsmr': |
|
predicted_reduction = -evaluate_quadratic(Jop, g, step) |
|
|
|
|
|
x_new = np.clip(x + step, lb, ub) |
|
|
|
f_new = fun(x_new) |
|
nfev += 1 |
|
|
|
step_h_norm = norm(step * scale_inv, ord=np.inf) |
|
|
|
if not np.all(np.isfinite(f_new)): |
|
Delta = 0.25 * step_h_norm |
|
continue |
|
|
|
|
|
if loss_function is not None: |
|
cost_new = loss_function(f_new, cost_only=True) |
|
else: |
|
cost_new = 0.5 * np.dot(f_new, f_new) |
|
actual_reduction = cost - cost_new |
|
|
|
Delta, ratio = update_tr_radius( |
|
Delta, actual_reduction, predicted_reduction, |
|
step_h_norm, tr_hit |
|
) |
|
|
|
step_norm = norm(step) |
|
termination_status = check_termination( |
|
actual_reduction, cost, step_norm, norm(x), ratio, ftol, xtol) |
|
|
|
if termination_status is not None: |
|
break |
|
|
|
if actual_reduction > 0: |
|
on_bound[free_set] = on_bound_free |
|
|
|
x = x_new |
|
|
|
mask = on_bound == -1 |
|
x[mask] = lb[mask] |
|
mask = on_bound == 1 |
|
x[mask] = ub[mask] |
|
|
|
f = f_new |
|
f_true = f.copy() |
|
|
|
cost = cost_new |
|
|
|
J = jac(x, f) |
|
njev += 1 |
|
|
|
if loss_function is not None: |
|
rho = loss_function(f) |
|
J, f = scale_for_robust_loss_function(J, f, rho) |
|
|
|
g = compute_grad(J, f) |
|
|
|
if jac_scale: |
|
scale, scale_inv = compute_jac_scale(J, scale_inv) |
|
else: |
|
step_norm = 0 |
|
actual_reduction = 0 |
|
|
|
iteration += 1 |
|
|
|
if termination_status is None: |
|
termination_status = 0 |
|
|
|
return OptimizeResult( |
|
x=x, cost=cost, fun=f_true, jac=J, grad=g_full, optimality=g_norm, |
|
active_mask=on_bound, nfev=nfev, njev=njev, status=termination_status) |
|
|