diff --git a/abopt/algs/tests/test_lbfgs.py b/abopt/algs/tests/test_lbfgs.py index e209f30..bf4b618 100644 --- a/abopt/algs/tests/test_lbfgs.py +++ b/abopt/algs/tests/test_lbfgs.py @@ -11,7 +11,7 @@ from abopt.algs.lbfgs import pre_scaled_direct_bfgs, pre_scaled_inverse_dfp from abopt.algs.lbfgs import post_scaled_direct_bfgs, post_scaled_inverse_dfp -from abopt.testing import RosenProblem, ChiSquareProblem +from abopt.testing import RosenProblem, ChiSquareProblem, ChiSquareProblem_dual, ChiSquareProblem_dual2 import numpy from numpy.testing import assert_allclose @@ -61,3 +61,22 @@ def test_abopt_lbfgs_quad(precond): r = lbfgs.minimize(problem, x0, monitor=print) assert r.converged assert_allclose(problem.f(r.x), 0.0, atol=1e-7) + +precond = Preconditioner(Pvp=diag_scaling, vPp=diag_scaling) +@pytest.mark.parametrize("precond", +[None, precond]) +def test_abopt_lbfgs_quad_dual(precond): + lbfgs = LBFGS(linesearch=backtrace) + + J = numpy.array([ [0, 0, 2, 1], + [0, 10, 2, 0], + [40, 100, 0, 0], + [400, 0, 0, 0]]) + + problem = ChiSquareProblem_dual(J=J, precond=precond) + + x0 = numpy.zeros(4) + r = lbfgs.minimize(problem, x0, monitor=print) + assert r.converged + assert_allclose(problem.f(r.x), 0.0, atol=1e-7) + diff --git a/abopt/base.py b/abopt/base.py index 024fdc4..69364a6 100644 --- a/abopt/base.py +++ b/abopt/base.py @@ -168,8 +168,8 @@ def complete(self, state): self.znorm = dot(self.z, self.z) ** 0.5 self.xnorm = dot(self.x, self.x) ** 0.5 self.Pxnorm = dot(self.Px, self.Px) ** 0.5 - self.complete_y(state) - self.complete_g(state) + + self.complete_y_g(state) self.dy = self.y - state.y dx = addmul(self.x, state.x, -1) @@ -180,21 +180,19 @@ def complete(self, state): self.theta = dot(self.z, state.Pg) / (self.znorm * state.Pgnorm) return self - def complete_y(self, state): + def complete_y_g(self, state): problem = self.problem + f,g = problem.fg(self.x) if self.y is None: - self.y = problem.f(self.x) + self.y = f state.fev = state.fev + 1 - return self - def complete_g(self, state): dot = self.problem.vs.dot - problem = self.problem # fill missing values in prop if self.g is None: - self.g = problem.g(self.x) + self.g = g state.gev = state.gev + 1 if self.Pg is None: @@ -205,14 +203,14 @@ def complete_g(self, state): return self + class InitialProposal(Proposal): def complete(self, state): dot = self.problem.vs.dot addmul = self.problem.vs.addmul self.xnorm = dot(self.x, self.x) ** 0.5 self.Pxnorm = dot(self.Px, self.Px) ** 0.5 - self.complete_y(state) - self.complete_g(state) + self.complete_y_g(state) self.dy = None self.dxnorm = None @@ -256,7 +254,8 @@ class Problem(object): """ Defines a problem. """ - def __init__(self, objective, gradient, + def __init__(self, objective=None, gradient=None, + objective_gradient=None, hessian_vector_product=None, inverse_hessian_vector_product=None, vs=None, @@ -282,8 +281,15 @@ def __init__(self, objective, gradient, self._precond = precond self.vs = vs + self.problem_dual_eval = False + + if objective_gradient is None: + if not (objective is not None and gradient is not None): + raise ValueError("if objective_gradient is None, gradient and objective cannot be None.") + self._objective = objective self._gradient = gradient + self._objective_gradient = objective_gradient self._hessian_vector_product = hessian_vector_product self._inverse_hessian_vector_product = inverse_hessian_vector_product self.atol = atol @@ -314,15 +320,30 @@ def check_preconditioner(self, x0): if vs.dot(d, d) > 1e-6 * vs.dot(x0, x0): raise ValueError("Preconditioner's vPp and Qvp are not inverses.") - def f(self, x): - return self._objective(x) + if self._objective_gradient is not None: + f, _ = self.fg(x) + else: + f = self._objective(x) + return f def g(self, x): """ This returns the gradient for the original variable""" - g = self._gradient(x) + if self._objective_gradient is not None: + _, g = self.fg(x) + else: + g = self._gradient(x) return g + def fg(self, x): + """ This return the gradient and the function """ + if self._objective_gradient is not None: + f, g = self._objective_gradient(x) + else: + g = self.g(x) + f = self.f(x) + return f, g + def Hvp(self, x, v): """ This returns the raw hessian product H_x v uppercase H means Hessian, not Hessian inverse. diff --git a/abopt/linesearch/__init__.py b/abopt/linesearch/__init__.py index 312ac70..fccb129 100644 --- a/abopt/linesearch/__init__.py +++ b/abopt/linesearch/__init__.py @@ -28,6 +28,6 @@ def nullsearch(problem, state, z, rate, maxiter): addmul = problem.vs.addmul Px1 = addmul(state.Px, z, -rate) - prop = Proposal(problem, Px=Px1, z=z).complete_y(state) + prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state) print(Px1, state.Px, z) return prop, rate diff --git a/abopt/linesearch/backtrace.py b/abopt/linesearch/backtrace.py index cd6f751..f0500e4 100644 --- a/abopt/linesearch/backtrace.py +++ b/abopt/linesearch/backtrace.py @@ -13,7 +13,7 @@ def backtrace(problem, state, z, rate, maxiter, c=1e-5, tau=0.5): return None, None Px1 = addmul(state.Px, z, -rate) - prop = Proposal(problem, Px=Px1, z=z).complete_y(state) + prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state) i = 0 propmin = prop @@ -35,6 +35,6 @@ def backtrace(problem, state, z, rate, maxiter, c=1e-5, tau=0.5): rate *= tau Px1 = addmul(state.Px, z, -rate) - prop = Proposal(problem, Px=Px1, z=z).complete_y(state) + prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state) i = i + 1 return None, None diff --git a/abopt/linesearch/exact.py b/abopt/linesearch/exact.py index 1502a86..c3255a1 100644 --- a/abopt/linesearch/exact.py +++ b/abopt/linesearch/exact.py @@ -10,7 +10,7 @@ def exact(problem, state, z, rate, maxiter, c=0.5): from scipy.optimize import minimize_scalar Px1 = addmul(state.Px, z, - rate) - prop = Proposal(problem, Px=Px1, z=z).complete_y(state) + prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state) best = [prop, 1.0] @@ -18,7 +18,7 @@ def func(tau): if tau == 0: return state.y Px1 = addmul(state.Px, z, -tau * rate) - prop = Proposal(problem, Px=Px1, z=z).complete_y(state) + prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state) if prop.y < best[0].y: best[0] = prop diff --git a/abopt/testing/__init__.py b/abopt/testing/__init__.py index 2efe054..c01f6a8 100644 --- a/abopt/testing/__init__.py +++ b/abopt/testing/__init__.py @@ -2,6 +2,63 @@ from scipy.optimize import rosen, rosen_der, rosen_hess_prod, rosen_hess import numpy +class ChiSquareProblem_dual(Problem): + """ + chisquare problem with + + y = |J phi(x) - 1.0|^2 + + adding on same time evaluation of gradient and forward pass + """ + def __init__(self, J, phi=lambda x:x, phiprime=lambda x: 1, precond=None): + def f(x): return J.dot(phi(x)) + def vjp(x, v): return v.dot(J) * phiprime(x) + def jvp(x, v): return J.dot(v * phiprime(x)) + + def objective_gradient(x): + y = f(x) + g = vjp(x, y - 1.0) * 2 + return numpy.sum((y - 1.0) ** 2), g + + def hessian(x, v): + v = numpy.array(v) + return vjp(x, jvp(x, v)) * 2 + + Problem.__init__(self, objective_gradient = objective_gradient, + hessian_vector_product = hessian, + precond = precond) + +class ChiSquareProblem_dual2(Problem): + """ + chisquare problem with + + y = |J phi(x) - 1.0|^2 + + adding on same time evaluation of gradient and forward pass + """ + def __init__(self, J, phi=lambda x:x, phiprime=lambda x: 1, precond=None): + def f(x): return J.dot(phi(x)) + def vjp(x, v): return v.dot(J) * phiprime(x) + def jvp(x, v): return J.dot(v * phiprime(x)) + + def objective_gradient(x): + y = f(x) + g = vjp(x, y - 1.0) * 2 + return numpy.sum((y - 1.0) ** 2), g + + def objective(x): + y = f(x) + return numpy.sum((y - 1.0) ** 2) + + def hessian(x, v): + v = numpy.array(v) + return vjp(x, jvp(x, v)) * 2 + + Problem.__init__(self, objective_gradient = objective_gradient, objective=objective, + hessian_vector_product = hessian, + precond = precond) + + class ChiSquareProblem(Problem): """ chisquare problem with @@ -26,10 +83,12 @@ def hessian(x, v): return vjp(x, jvp(x, v)) * 2 Problem.__init__(self, - objective=objective, - gradient=gradient, - hessian_vector_product=hessian, - precond=precond) + objective = objective, + gradient = gradient, + objective_gradient = None, + hessian_vector_product = hessian, + precond = precond) + from scipy.linalg import inv def rosen_inverse_hess(x):