Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

27.2 kB

	"""
	Functions
	---------
	.. autosummary::
	:toctree: generated/

	line_search_armijo
	line_search_wolfe1
	line_search_wolfe2
	scalar_search_wolfe1
	scalar_search_wolfe2

	"""
	from warnings import warn

	from ._dcsrch import DCSRCH
	import numpy as np

	__all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2',
	'scalar_search_wolfe1', 'scalar_search_wolfe2',
	'line_search_armijo']

	class LineSearchWarning(RuntimeWarning):
	pass


	def _check_c1_c2(c1, c2):
	if not (0 < c1 < c2 < 1):
	raise ValueError("'c1' and 'c2' do not satisfy"
	"'0 < c1 < c2 < 1'.")


	#------------------------------------------------------------------------------
	# Minpack's Wolfe line and scalar searches
	#------------------------------------------------------------------------------

	def line_search_wolfe1(f, fprime, xk, pk, gfk=None,
	old_fval=None, old_old_fval=None,
	args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8,
	xtol=1e-14):
	"""
	As `scalar_search_wolfe1` but do a line search to direction `pk`

	Parameters
	----------
	f : callable
	Function `f(x)`
	fprime : callable
	Gradient of `f`
	xk : array_like
	Current point
	pk : array_like
	Search direction
	gfk : array_like, optional
	Gradient of `f` at point `xk`
	old_fval : float, optional
	Value of `f` at point `xk`
	old_old_fval : float, optional
	Value of `f` at point preceding `xk`

	The rest of the parameters are the same as for `scalar_search_wolfe1`.

	Returns
	-------
	stp, f_count, g_count, fval, old_fval
	As in `line_search_wolfe1`
	gval : array
	Gradient of `f` at the final point

	Notes
	-----
	Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``.

	"""
	if gfk is None:
	gfk = fprime(xk, *args)

	gval = [gfk]
	gc = [0]
	fc = [0]

	def phi(s):
	fc[0] += 1
	return f(xk + spk, args)

	def derphi(s):
	gval[0] = fprime(xk + spk, args)
	gc[0] += 1
	return np.dot(gval[0], pk)

	derphi0 = np.dot(gfk, pk)

	stp, fval, old_fval = scalar_search_wolfe1(
	phi, derphi, old_fval, old_old_fval, derphi0,
	c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol)

	return stp, fc[0], gc[0], fval, old_fval, gval[0]


	def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None,
	c1=1e-4, c2=0.9,
	amax=50, amin=1e-8, xtol=1e-14):
	"""
	Scalar function search for alpha that satisfies strong Wolfe conditions

	alpha > 0 is assumed to be a descent direction.

	Parameters
	----------
	phi : callable phi(alpha)
	Function at point `alpha`
	derphi : callable phi'(alpha)
	Objective function derivative. Returns a scalar.
	phi0 : float, optional
	Value of phi at 0
	old_phi0 : float, optional
	Value of phi at previous point
	derphi0 : float, optional
	Value derphi at 0
	c1 : float, optional
	Parameter for Armijo condition rule.
	c2 : float, optional
	Parameter for curvature condition rule.
	amax, amin : float, optional
	Maximum and minimum step size
	xtol : float, optional
	Relative tolerance for an acceptable step.

	Returns
	-------
	alpha : float
	Step size, or None if no suitable step was found
	phi : float
	Value of `phi` at the new point `alpha`
	phi0 : float
	Value of `phi` at `alpha=0`

	Notes
	-----
	Uses routine DCSRCH from MINPACK.

	Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1`` as described in [1]_.

	References
	----------

	.. [1] Nocedal, J., & Wright, S. J. (2006). Numerical optimization.
	In Springer Series in Operations Research and Financial Engineering.
	(Springer Series in Operations Research and Financial Engineering).
	Springer Nature.

	"""
	_check_c1_c2(c1, c2)

	if phi0 is None:
	phi0 = phi(0.)
	if derphi0 is None:
	derphi0 = derphi(0.)

	if old_phi0 is not None and derphi0 != 0:
	alpha1 = min(1.0, 1.012(phi0 - old_phi0)/derphi0)
	if alpha1 < 0:
	alpha1 = 1.0
	else:
	alpha1 = 1.0

	maxiter = 100

	dcsrch = DCSRCH(phi, derphi, c1, c2, xtol, amin, amax)
	stp, phi1, phi0, task = dcsrch(
	alpha1, phi0=phi0, derphi0=derphi0, maxiter=maxiter
	)

	return stp, phi1, phi0


	line_search = line_search_wolfe1


	#------------------------------------------------------------------------------
	# Pure-Python Wolfe line and scalar searches
	#------------------------------------------------------------------------------

	# Note: `line_search_wolfe2` is the public `scipy.optimize.line_search`

	def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None,
	old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None,
	extra_condition=None, maxiter=10):
	"""Find alpha that satisfies strong Wolfe conditions.

	Parameters
	----------
	f : callable f(x,*args)
	Objective function.
	myfprime : callable f'(x,*args)
	Objective function gradient.
	xk : ndarray
	Starting point.
	pk : ndarray
	Search direction. The search direction must be a descent direction
	for the algorithm to converge.
	gfk : ndarray, optional
	Gradient value for x=xk (xk being the current parameter
	estimate). Will be recomputed if omitted.
	old_fval : float, optional
	Function value for x=xk. Will be recomputed if omitted.
	old_old_fval : float, optional
	Function value for the point preceding x=xk.
	args : tuple, optional
	Additional arguments passed to objective function.
	c1 : float, optional
	Parameter for Armijo condition rule.
	c2 : float, optional
	Parameter for curvature condition rule.
	amax : float, optional
	Maximum step size
	extra_condition : callable, optional
	A callable of the form ``extra_condition(alpha, x, f, g)``
	returning a boolean. Arguments are the proposed step ``alpha``
	and the corresponding ``x``, ``f`` and ``g`` values. The line search
	accepts the value of ``alpha`` only if this
	callable returns ``True``. If the callable returns ``False``
	for the step length, the algorithm will continue with
	new iterates. The callable is only called for iterates
	satisfying the strong Wolfe conditions.
	maxiter : int, optional
	Maximum number of iterations to perform.

	Returns
	-------
	alpha : float or None
	Alpha for which ``x_new = x0 + alpha * pk``,
	or None if the line search algorithm did not converge.
	fc : int
	Number of function evaluations made.
	gc : int
	Number of gradient evaluations made.
	new_fval : float or None
	New function value ``f(x_new)=f(x0+alpha*pk)``,
	or None if the line search algorithm did not converge.
	old_fval : float
	Old function value ``f(x0)``.
	new_slope : float or None
	The local slope along the search direction at the
	new value ``<myfprime(x_new), pk>``,
	or None if the line search algorithm did not converge.


	Notes
	-----
	Uses the line search algorithm to enforce strong Wolfe
	conditions. See Wright and Nocedal, 'Numerical Optimization',
	1999, pp. 59-61.

	The search direction `pk` must be a descent direction (e.g.
	``-myfprime(xk)``) to find a step length that satisfies the strong Wolfe
	conditions. If the search direction is not a descent direction (e.g.
	``myfprime(xk)``), then `alpha`, `new_fval`, and `new_slope` will be None.

	Examples
	--------
	>>> import numpy as np
	>>> from scipy.optimize import line_search

	A objective function and its gradient are defined.

	>>> def obj_func(x):
	... return (x[0])2+(x[1])2
	>>> def obj_grad(x):
	... return [2x[0], 2x[1]]

	We can find alpha that satisfies strong Wolfe conditions.

	>>> start_point = np.array([1.8, 1.7])
	>>> search_gradient = np.array([-1.0, -1.0])
	>>> line_search(obj_func, obj_grad, start_point, search_gradient)
	(1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4])

	"""
	fc = [0]
	gc = [0]
	gval = [None]
	gval_alpha = [None]

	def phi(alpha):
	fc[0] += 1
	return f(xk + alpha * pk, *args)

	fprime = myfprime

	def derphi(alpha):
	gc[0] += 1
	gval[0] = fprime(xk + alpha * pk, *args) # store for later use
	gval_alpha[0] = alpha
	return np.dot(gval[0], pk)

	if gfk is None:
	gfk = fprime(xk, *args)
	derphi0 = np.dot(gfk, pk)

	if extra_condition is not None:
	# Add the current gradient as argument, to avoid needless
	# re-evaluation
	def extra_condition2(alpha, phi):
	if gval_alpha[0] != alpha:
	derphi(alpha)
	x = xk + alpha * pk
	return extra_condition(alpha, x, phi, gval[0])
	else:
	extra_condition2 = None

	alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2(
	phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax,
	extra_condition2, maxiter=maxiter)

	if derphi_star is None:
	warn('The line search algorithm did not converge',
	LineSearchWarning, stacklevel=2)
	else:
	# derphi_star is a number (derphi) -- so use the most recently
	# calculated gradient used in computing it derphi = gfk*pk
	# this is the gradient at the next step no need to compute it
	# again in the outer loop.
	derphi_star = gval[0]

	return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star


	def scalar_search_wolfe2(phi, derphi, phi0=None,
	old_phi0=None, derphi0=None,
	c1=1e-4, c2=0.9, amax=None,
	extra_condition=None, maxiter=10):
	"""Find alpha that satisfies strong Wolfe conditions.

	alpha > 0 is assumed to be a descent direction.

	Parameters
	----------
	phi : callable phi(alpha)
	Objective scalar function.
	derphi : callable phi'(alpha)
	Objective function derivative. Returns a scalar.
	phi0 : float, optional
	Value of phi at 0.
	old_phi0 : float, optional
	Value of phi at previous point.
	derphi0 : float, optional
	Value of derphi at 0
	c1 : float, optional
	Parameter for Armijo condition rule.
	c2 : float, optional
	Parameter for curvature condition rule.
	amax : float, optional
	Maximum step size.
	extra_condition : callable, optional
	A callable of the form ``extra_condition(alpha, phi_value)``
	returning a boolean. The line search accepts the value
	of ``alpha`` only if this callable returns ``True``.
	If the callable returns ``False`` for the step length,
	the algorithm will continue with new iterates.
	The callable is only called for iterates satisfying
	the strong Wolfe conditions.
	maxiter : int, optional
	Maximum number of iterations to perform.

	Returns
	-------
	alpha_star : float or None
	Best alpha, or None if the line search algorithm did not converge.
	phi_star : float
	phi at alpha_star.
	phi0 : float
	phi at 0.
	derphi_star : float or None
	derphi at alpha_star, or None if the line search algorithm
	did not converge.

	Notes
	-----
	Uses the line search algorithm to enforce strong Wolfe
	conditions. See Wright and Nocedal, 'Numerical Optimization',
	1999, pp. 59-61.

	"""
	_check_c1_c2(c1, c2)

	if phi0 is None:
	phi0 = phi(0.)

	if derphi0 is None:
	derphi0 = derphi(0.)

	alpha0 = 0
	if old_phi0 is not None and derphi0 != 0:
	alpha1 = min(1.0, 1.012(phi0 - old_phi0)/derphi0)
	else:
	alpha1 = 1.0

	if alpha1 < 0:
	alpha1 = 1.0

	if amax is not None:
	alpha1 = min(alpha1, amax)

	phi_a1 = phi(alpha1)
	#derphi_a1 = derphi(alpha1) evaluated below

	phi_a0 = phi0
	derphi_a0 = derphi0

	if extra_condition is None:
	def extra_condition(alpha, phi):
	return True

	for i in range(maxiter):
	if alpha1 == 0 or (amax is not None and alpha0 > amax):
	# alpha1 == 0: This shouldn't happen. Perhaps the increment has
	# slipped below machine precision?
	alpha_star = None
	phi_star = phi0
	phi0 = old_phi0
	derphi_star = None

	if alpha1 == 0:
	msg = 'Rounding errors prevent the line search from converging'
	else:
	msg = "The line search algorithm could not find a solution " + \
	f"less than or equal to amax: {amax}"

	warn(msg, LineSearchWarning, stacklevel=2)
	break

	not_first_iteration = i > 0
	if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \
	((phi_a1 >= phi_a0) and not_first_iteration):
	alpha_star, phi_star, derphi_star = \
	_zoom(alpha0, alpha1, phi_a0,
	phi_a1, derphi_a0, phi, derphi,
	phi0, derphi0, c1, c2, extra_condition)
	break

	derphi_a1 = derphi(alpha1)
	if (abs(derphi_a1) <= -c2*derphi0):
	if extra_condition(alpha1, phi_a1):
	alpha_star = alpha1
	phi_star = phi_a1
	derphi_star = derphi_a1
	break

	if (derphi_a1 >= 0):
	alpha_star, phi_star, derphi_star = \
	_zoom(alpha1, alpha0, phi_a1,
	phi_a0, derphi_a1, phi, derphi,
	phi0, derphi0, c1, c2, extra_condition)
	break

	alpha2 = 2 * alpha1 # increase by factor of two on each iteration
	if amax is not None:
	alpha2 = min(alpha2, amax)
	alpha0 = alpha1
	alpha1 = alpha2
	phi_a0 = phi_a1
	phi_a1 = phi(alpha1)
	derphi_a0 = derphi_a1

	else:
	# stopping test maxiter reached
	alpha_star = alpha1
	phi_star = phi_a1
	derphi_star = None
	warn('The line search algorithm did not converge',
	LineSearchWarning, stacklevel=2)

	return alpha_star, phi_star, phi0, derphi_star


	def _cubicmin(a, fa, fpa, b, fb, c, fc):
	"""
	Finds the minimizer for a cubic polynomial that goes through the
	points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa.

	If no minimizer can be found, return None.

	"""
	# f(x) = A (x-a)^3 + B(x-a)^2 + C*(x-a) + D

	with np.errstate(divide='raise', over='raise', invalid='raise'):
	try:
	C = fpa
	db = b - a
	dc = c - a
	denom = (db * dc) ** 2 * (db - dc)
	d1 = np.empty((2, 2))
	d1[0, 0] = dc ** 2
	d1[0, 1] = -db ** 2
	d1[1, 0] = -dc ** 3
	d1[1, 1] = db ** 3
	[A, B] = np.dot(d1, np.asarray([fb - fa - C * db,
	fc - fa - C * dc]).flatten())
	A /= denom
	B /= denom
	radical = B * B - 3 * A * C
	xmin = a + (-B + np.sqrt(radical)) / (3 * A)
	except ArithmeticError:
	return None
	if not np.isfinite(xmin):
	return None
	return xmin


	def _quadmin(a, fa, fpa, b, fb):
	"""
	Finds the minimizer for a quadratic polynomial that goes through
	the points (a,fa), (b,fb) with derivative at a of fpa.

	"""
	# f(x) = B(x-a)^2 + C(x-a) + D
	with np.errstate(divide='raise', over='raise', invalid='raise'):
	try:
	D = fa
	C = fpa
	db = b - a * 1.0
	B = (fb - D - C * db) / (db * db)
	xmin = a - C / (2.0 * B)
	except ArithmeticError:
	return None
	if not np.isfinite(xmin):
	return None
	return xmin


	def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo,
	phi, derphi, phi0, derphi0, c1, c2, extra_condition):
	"""Zoom stage of approximate linesearch satisfying strong Wolfe conditions.

	Part of the optimization algorithm in `scalar_search_wolfe2`.

	Notes
	-----
	Implements Algorithm 3.6 (zoom) in Wright and Nocedal,
	'Numerical Optimization', 1999, pp. 61.

	"""

	maxiter = 10
	i = 0
	delta1 = 0.2 # cubic interpolant check
	delta2 = 0.1 # quadratic interpolant check
	phi_rec = phi0
	a_rec = 0
	while True:
	# interpolate to find a trial step length between a_lo and
	# a_hi Need to choose interpolation here. Use cubic
	# interpolation and then if the result is within delta *
	# dalpha or outside of the interval bounded by a_lo or a_hi
	# then use quadratic interpolation, if the result is still too
	# close, then use bisection

	dalpha = a_hi - a_lo
	if dalpha < 0:
	a, b = a_hi, a_lo
	else:
	a, b = a_lo, a_hi

	# minimizer of cubic interpolant
	# (uses phi_lo, derphi_lo, phi_hi, and the most recent value of phi)
	#
	# if the result is too close to the end points (or out of the
	# interval), then use quadratic interpolation with phi_lo,
	# derphi_lo and phi_hi if the result is still too close to the
	# end points (or out of the interval) then use bisection

	if (i > 0):
	cchk = delta1 * dalpha
	a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi,
	a_rec, phi_rec)
	if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk):
	qchk = delta2 * dalpha
	a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi)
	if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk):
	a_j = a_lo + 0.5*dalpha

	# Check new value of a_j

	phi_aj = phi(a_j)
	if (phi_aj > phi0 + c1a_jderphi0) or (phi_aj >= phi_lo):
	phi_rec = phi_hi
	a_rec = a_hi
	a_hi = a_j
	phi_hi = phi_aj
	else:
	derphi_aj = derphi(a_j)
	if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj):
	a_star = a_j
	val_star = phi_aj
	valprime_star = derphi_aj
	break
	if derphi_aj*(a_hi - a_lo) >= 0:
	phi_rec = phi_hi
	a_rec = a_hi
	a_hi = a_lo
	phi_hi = phi_lo
	else:
	phi_rec = phi_lo
	a_rec = a_lo
	a_lo = a_j
	phi_lo = phi_aj
	derphi_lo = derphi_aj
	i += 1
	if (i > maxiter):
	# Failed to find a conforming step size
	a_star = None
	val_star = None
	valprime_star = None
	break
	return a_star, val_star, valprime_star


	#------------------------------------------------------------------------------
	# Armijo line and scalar searches
	#------------------------------------------------------------------------------

	def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
	"""Minimize over alpha, the function ``f(xk+alpha pk)``.

	Parameters
	----------
	f : callable
	Function to be minimized.
	xk : array_like
	Current point.
	pk : array_like
	Search direction.
	gfk : array_like
	Gradient of `f` at point `xk`.
	old_fval : float
	Value of `f` at point `xk`.
	args : tuple, optional
	Optional arguments.
	c1 : float, optional
	Value to control stopping criterion.
	alpha0 : scalar, optional
	Value of `alpha` at start of the optimization.

	Returns
	-------
	alpha
	f_count
	f_val_at_alpha

	Notes
	-----
	Uses the interpolation algorithm (Armijo backtracking) as suggested by
	Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57

	"""
	xk = np.atleast_1d(xk)
	fc = [0]

	def phi(alpha1):
	fc[0] += 1
	return f(xk + alpha1pk, args)

	if old_fval is None:
	phi0 = phi(0.)
	else:
	phi0 = old_fval # compute f(xk) -- done in past loop

	derphi0 = np.dot(gfk, pk)
	alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1,
	alpha0=alpha0)
	return alpha, fc[0], phi1


	def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1):
	"""
	Compatibility wrapper for `line_search_armijo`
	"""
	r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1,
	alpha0=alpha0)
	return r[0], r[1], 0, r[2]


	def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0):
	"""Minimize over alpha, the function ``phi(alpha)``.

	Uses the interpolation algorithm (Armijo backtracking) as suggested by
	Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57

	alpha > 0 is assumed to be a descent direction.

	Returns
	-------
	alpha
	phi1

	"""
	phi_a0 = phi(alpha0)
	if phi_a0 <= phi0 + c1alpha0derphi0:
	return alpha0, phi_a0

	# Otherwise, compute the minimizer of a quadratic interpolant:

	alpha1 = -(derphi0) * alpha0*2 / 2.0 / (phi_a0 - phi0 - derphi0 alpha0)
	phi_a1 = phi(alpha1)

	if (phi_a1 <= phi0 + c1alpha1derphi0):
	return alpha1, phi_a1

	# Otherwise, loop with cubic interpolation until we find an alpha which
	# satisfies the first Wolfe condition (since we are backtracking, we will
	# assume that the value of alpha is not too small and satisfies the second
	# condition.

	while alpha1 > amin: # we are assuming alpha>0 is a descent direction
	factor = alpha0*2 alpha1*2 (alpha1-alpha0)
	a = alpha0*2 (phi_a1 - phi0 - derphi0*alpha1) - \
	alpha1*2 (phi_a0 - phi0 - derphi0*alpha0)
	a = a / factor
	b = -alpha0*3 (phi_a1 - phi0 - derphi0*alpha1) + \
	alpha1*3 (phi_a0 - phi0 - derphi0*alpha0)
	b = b / factor

	alpha2 = (-b + np.sqrt(abs(b*2 - 3 a * derphi0))) / (3.0*a)
	phi_a2 = phi(alpha2)

	if (phi_a2 <= phi0 + c1alpha2derphi0):
	return alpha2, phi_a2

	if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96:
	alpha2 = alpha1 / 2.0

	alpha0 = alpha1
	alpha1 = alpha2
	phi_a0 = phi_a1
	phi_a1 = phi_a2

	# Failed to find a suitable step length
	return None, phi_a1


	#------------------------------------------------------------------------------
	# Non-monotone line search for DF-SANE
	#------------------------------------------------------------------------------

	def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta,
	gamma=1e-4, tau_min=0.1, tau_max=0.5):
	"""
	Nonmonotone backtracking line search as described in [1]_

	Parameters
	----------
	f : callable
	Function returning a tuple ``(f, F)`` where ``f`` is the value
	of a merit function and ``F`` the residual.
	x_k : ndarray
	Initial position.
	d : ndarray
	Search direction.
	prev_fs : float
	List of previous merit function values. Should have ``len(prev_fs) <= M``
	where ``M`` is the nonmonotonicity window parameter.
	eta : float
	Allowed merit function increase, see [1]_
	gamma, tau_min, tau_max : float, optional
	Search parameters, see [1]_

	Returns
	-------
	alpha : float
	Step length
	xp : ndarray
	Next position
	fp : float
	Merit function value at next position
	Fp : ndarray
	Residual at next position

	References
	----------
	[1] "Spectral residual method without gradient information for solving
	large-scale nonlinear systems of equations." W. La Cruz,
	J.M. Martinez, M. Raydan. Math. Comp. 75, 1429 (2006).

	"""
	f_k = prev_fs[-1]
	f_bar = max(prev_fs)

	alpha_p = 1
	alpha_m = 1
	alpha = 1

	while True:
	xp = x_k + alpha_p * d
	fp, Fp = f(xp)

	if fp <= f_bar + eta - gamma * alpha_p*2 f_k:
	alpha = alpha_p
	break

	alpha_tp = alpha_p*2 f_k / (fp + (2alpha_p - 1)f_k)

	xp = x_k - alpha_m * d
	fp, Fp = f(xp)

	if fp <= f_bar + eta - gamma * alpha_m*2 f_k:
	alpha = -alpha_m
	break

	alpha_tm = alpha_m*2 f_k / (fp + (2alpha_m - 1)f_k)

	alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
	alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)

	return alpha, xp, fp, Fp


	def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta,
	gamma=1e-4, tau_min=0.1, tau_max=0.5,
	nu=0.85):
	"""
	Nonmonotone line search from [1]

	Parameters
	----------
	f : callable
	Function returning a tuple ``(f, F)`` where ``f`` is the value
	of a merit function and ``F`` the residual.
	x_k : ndarray
	Initial position.
	d : ndarray
	Search direction.
	f_k : float
	Initial merit function value.
	C, Q : float
	Control parameters. On the first iteration, give values
	Q=1.0, C=f_k
	eta : float
	Allowed merit function increase, see [1]_
	nu, gamma, tau_min, tau_max : float, optional
	Search parameters, see [1]_

	Returns
	-------
	alpha : float
	Step length
	xp : ndarray
	Next position
	fp : float
	Merit function value at next position
	Fp : ndarray
	Residual at next position
	C : float
	New value for the control parameter C
	Q : float
	New value for the control parameter Q

	References
	----------
	.. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line
	search and its application to the spectral residual
	method'', IMA J. Numer. Anal. 29, 814 (2009).

	"""
	alpha_p = 1
	alpha_m = 1
	alpha = 1

	while True:
	xp = x_k + alpha_p * d
	fp, Fp = f(xp)

	if fp <= C + eta - gamma * alpha_p*2 f_k:
	alpha = alpha_p
	break

	alpha_tp = alpha_p*2 f_k / (fp + (2alpha_p - 1)f_k)

	xp = x_k - alpha_m * d
	fp, Fp = f(xp)

	if fp <= C + eta - gamma * alpha_m*2 f_k:
	alpha = -alpha_m
	break

	alpha_tm = alpha_m*2 f_k / (fp + (2alpha_m - 1)f_k)

	alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p)
	alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m)

	# Update C and Q
	Q_next = nu * Q + 1
	C = (nu * Q * (C + eta) + fp) / Q_next
	Q = Q_next

	return alpha, xp, fp, Fp, C, Q