bccp · rainwoodman · Feb 25, 2021 · Feb 16, 2021 · Feb 16, 2021 · Feb 17, 2021
diff --git a/abopt/algs/tests/test_lbfgs.py b/abopt/algs/tests/test_lbfgs.py
@@ -11,7 +11,7 @@
 from abopt.algs.lbfgs import pre_scaled_direct_bfgs, pre_scaled_inverse_dfp
 from abopt.algs.lbfgs import post_scaled_direct_bfgs, post_scaled_inverse_dfp
 
-from abopt.testing import RosenProblem, ChiSquareProblem
+from abopt.testing import RosenProblem, ChiSquareProblem, ChiSquareProblem_dual, ChiSquareProblem_dual2
 import numpy
 from numpy.testing import assert_allclose
 
@@ -61,3 +61,22 @@ def test_abopt_lbfgs_quad(precond):
     r = lbfgs.minimize(problem, x0, monitor=print)
     assert r.converged
     assert_allclose(problem.f(r.x), 0.0, atol=1e-7)
+
+precond = Preconditioner(Pvp=diag_scaling, vPp=diag_scaling)
+@pytest.mark.parametrize("precond",
+[None, precond])
+def test_abopt_lbfgs_quad_dual(precond):
+    lbfgs = LBFGS(linesearch=backtrace)
+
+    J = numpy.array([ [0, 0,     2,  1],
+                      [0,  10,   2,  0],
+                      [40, 100,  0,  0],
+                      [400, 0,   0,  0]])
+
+    problem = ChiSquareProblem_dual(J=J, precond=precond)
+
+    x0 = numpy.zeros(4)
+    r = lbfgs.minimize(problem, x0, monitor=print)
+    assert r.converged
+    assert_allclose(problem.f(r.x), 0.0, atol=1e-7)
+
diff --git a/abopt/base.py b/abopt/base.py
@@ -168,8 +168,8 @@ def complete(self, state):
         self.znorm = dot(self.z, self.z) ** 0.5
         self.xnorm = dot(self.x, self.x) ** 0.5
         self.Pxnorm = dot(self.Px, self.Px) ** 0.5
-        self.complete_y(state)
-        self.complete_g(state)
+
+        self.complete_y_g(state)
 
         self.dy = self.y - state.y
         dx = addmul(self.x, state.x, -1)
@@ -180,21 +180,19 @@ def complete(self, state):
             self.theta = dot(self.z, state.Pg) / (self.znorm * state.Pgnorm)
         return self
 
-    def complete_y(self, state):
+    def complete_y_g(self, state):
         problem = self.problem
+        f,g = problem.fg(self.x)
 
         if self.y is None:
-            self.y = problem.f(self.x)
+            self.y = f
             state.fev = state.fev + 1
-        return self
 
-    def complete_g(self, state):
         dot = self.problem.vs.dot
-        problem = self.problem
 
         # fill missing values in prop
         if self.g is None:
-            self.g = problem.g(self.x)
+            self.g = g
             state.gev = state.gev + 1
 
         if self.Pg is None:
@@ -205,14 +203,14 @@ def complete_g(self, state):
 
         return self
 
+
 class InitialProposal(Proposal):
     def complete(self, state):
         dot = self.problem.vs.dot
         addmul = self.problem.vs.addmul
         self.xnorm = dot(self.x, self.x) ** 0.5
         self.Pxnorm = dot(self.Px, self.Px) ** 0.5
-        self.complete_y(state)
-        self.complete_g(state)
+        self.complete_y_g(state)
 
         self.dy = None
         self.dxnorm = None
@@ -256,7 +254,8 @@ class Problem(object):
     """ Defines a problem.
 
     """
-    def __init__(self, objective, gradient,
+    def __init__(self, objective=None, gradient=None,
+        objective_gradient=None,
         hessian_vector_product=None,
         inverse_hessian_vector_product=None,
         vs=None,
@@ -282,8 +281,15 @@ def __init__(self, objective, gradient,
         self._precond = precond
         self.vs = vs
 
+        self.problem_dual_eval = False
+
+        if objective_gradient is None:
+            if not (objective is not None and gradient is not None):
+                raise ValueError("if objective_gradient is None, gradient and objective cannot be None.")
+
         self._objective = objective
         self._gradient = gradient
+        self._objective_gradient = objective_gradient
         self._hessian_vector_product = hessian_vector_product
         self._inverse_hessian_vector_product = inverse_hessian_vector_product
         self.atol = atol
@@ -314,15 +320,30 @@ def check_preconditioner(self, x0):
         if vs.dot(d, d) > 1e-6 * vs.dot(x0, x0):
             raise ValueError("Preconditioner's vPp and Qvp are not inverses.")
 
-
     def f(self, x):
-        return self._objective(x)
+        if self._objective_gradient is not None:
+            f, _ = self.fg(x)
+        else:
+            f    = self._objective(x)
+        return f
 
     def g(self, x):
         """ This returns the gradient for the original variable"""
-        g = self._gradient(x)
+        if self._objective_gradient is not None:
+            _, g = self.fg(x)
+        else:
+            g = self._gradient(x)
         return g
 
+    def fg(self, x):
+        """ This return the gradient and the function """
+        if self._objective_gradient is not None:
+            f, g = self._objective_gradient(x)
+        else:
+            g = self.g(x)
+            f = self.f(x)
+        return f, g
+
     def Hvp(self, x, v):
         """ This returns the raw hessian product H_x v
             uppercase H means Hessian, not Hessian inverse.

diff --git a/abopt/linesearch/__init__.py b/abopt/linesearch/__init__.py
@@ -28,6 +28,6 @@ def nullsearch(problem, state, z, rate, maxiter):
 
     addmul = problem.vs.addmul
     Px1 = addmul(state.Px, z, -rate)
-    prop = Proposal(problem, Px=Px1, z=z).complete_y(state)
+    prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state)
     print(Px1, state.Px, z)
     return prop, rate
diff --git a/abopt/linesearch/backtrace.py b/abopt/linesearch/backtrace.py
@@ -13,7 +13,7 @@ def backtrace(problem, state, z, rate, maxiter, c=1e-5, tau=0.5):
         return None, None
 
     Px1 = addmul(state.Px, z, -rate)
-    prop = Proposal(problem, Px=Px1, z=z).complete_y(state)
+    prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state)
 
     i = 0
     propmin = prop
@@ -35,6 +35,6 @@ def backtrace(problem, state, z, rate, maxiter, c=1e-5, tau=0.5):
 
         rate *= tau
         Px1 = addmul(state.Px, z, -rate)
-        prop = Proposal(problem, Px=Px1, z=z).complete_y(state)
+        prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state)
         i = i + 1
     return None, None
diff --git a/abopt/linesearch/exact.py b/abopt/linesearch/exact.py
@@ -10,15 +10,15 @@ def exact(problem, state, z, rate, maxiter, c=0.5):
     from scipy.optimize import minimize_scalar
 
     Px1 = addmul(state.Px, z, - rate)
-    prop = Proposal(problem, Px=Px1, z=z).complete_y(state)
+    prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state)
 
     best = [prop, 1.0]
 
     def func(tau):
         if tau == 0: return state.y
 
         Px1 = addmul(state.Px, z, -tau * rate)
-        prop = Proposal(problem, Px=Px1, z=z).complete_y(state)
+        prop = Proposal(problem, Px=Px1, z=z).complete_y_g(state)
 
         if prop.y < best[0].y:
             best[0] = prop

diff --git a/abopt/testing/__init__.py b/abopt/testing/__init__.py
@@ -2,6 +2,63 @@
 from scipy.optimize import rosen, rosen_der, rosen_hess_prod, rosen_hess
 import numpy
 
+class ChiSquareProblem_dual(Problem):
+    """
+        chisquare problem with
+
+        y = |J phi(x) - 1.0|^2
+
+        adding on same time evaluation of gradient and forward pass
+    """
+    def __init__(self, J, phi=lambda x:x, phiprime=lambda x: 1, precond=None):
+        def f(x): return J.dot(phi(x))
+        def vjp(x, v): return v.dot(J) * phiprime(x)
+        def jvp(x, v): return J.dot(v * phiprime(x))
+
+        def objective_gradient(x):
+            y = f(x)
+            g = vjp(x, y - 1.0) * 2
+            return numpy.sum((y - 1.0) ** 2), g
+
+        def hessian(x, v):
+            v = numpy.array(v)
+            return vjp(x, jvp(x, v)) * 2
+
+        Problem.__init__(self, objective_gradient = objective_gradient,
+                      hessian_vector_product = hessian,
+                      precond = precond)
+
+class ChiSquareProblem_dual2(Problem):
+    """
+        chisquare problem with
+
+        y = |J phi(x) - 1.0|^2
+
+        adding on same time evaluation of gradient and forward pass
+    """
+    def __init__(self, J, phi=lambda x:x, phiprime=lambda x: 1, precond=None):
+        def f(x): return J.dot(phi(x))
+        def vjp(x, v): return v.dot(J) * phiprime(x)
+        def jvp(x, v): return J.dot(v * phiprime(x))
+
+        def objective_gradient(x):
+            y = f(x)
+            g = vjp(x, y - 1.0) * 2
+            return numpy.sum((y - 1.0) ** 2), g
+
+        def objective(x):
+            y = f(x)
+            return numpy.sum((y - 1.0) ** 2)
+
+        def hessian(x, v):
+            v = numpy.array(v)
+            return vjp(x, jvp(x, v)) * 2
+
+        Problem.__init__(self, objective_gradient = objective_gradient, objective=objective,
+                      hessian_vector_product = hessian,
+                      precond = precond)
+
+
 class ChiSquareProblem(Problem):
     """
         chisquare problem with
@@ -26,10 +83,12 @@ def hessian(x, v):
             return vjp(x, jvp(x, v)) * 2
 
         Problem.__init__(self,
-                      objective=objective,
-                      gradient=gradient,
-                      hessian_vector_product=hessian,
-                      precond=precond)
+                      objective = objective,
+                      gradient = gradient,
+                      objective_gradient = None,
+                      hessian_vector_product = hessian,
+                      precond = precond)
+
 
 from scipy.linalg import inv
 def rosen_inverse_hess(x):