TomographicImaging · MargaretDuff · Jan 20, 2025 · Jan 21, 2025 · Jan 21, 2025 · Jan 30, 2025
diff --git a/Wrappers/Python/cil/optimisation/algorithms/PD3O.py b/Wrappers/Python/cil/optimisation/algorithms/PD3O.py
@@ -18,7 +18,7 @@
 
 
 from cil.optimisation.algorithms import Algorithm
-from cil.optimisation.functions import ZeroFunction
+from cil.optimisation.functions import ZeroFunction, SGFunction, SVRGFunction, LSVRGFunction, SAGAFunction, SAGFunction, ApproximateGradientSumFunction
 import logging
 import warnings
 class PD3O(Algorithm):
@@ -125,7 +125,19 @@ def update(self):
         self.g.proximal(self.x_old, self.gamma, out = self.x)
 
         # update step        
-        self.f.gradient(self.x, out=self.x_old)                    
+
+        if isinstance(self.f, (SVRGFunction, LSVRGFunction)):
+            if len(self.f.data_passes_indices[-1]) == self.f.sampler.num_indices:
+                self.f._update_full_gradient_and_return(self.x, out=self.x_old)
+            else:
+                self.f.approximate_gradient( self.x, self.f.function_num, out=self.x_old)
+            self.f._data_passes_indices.pop(-1)    
+        elif isinstance(self.f, ApproximateGradientSumFunction):
+            self.f.approximate_gradient( self.x, self.f.function_num, out=self.x_old)
+        else:
+            self.f.gradient(self.x, out=self.x_old)    
+
+
         self.x_old *= self.gamma
         self.grad_f += self.x_old
         self.x.sapyb(2, self.grad_f, -1.0,  out=self.x_old) # 2*x - x_old + gamma*(grad_f_x_old) - gamma*(grad_f_x)

diff --git a/Wrappers/Python/cil/optimisation/functions/SVRGFunction.py b/Wrappers/Python/cil/optimisation/functions/SVRGFunction.py
@@ -224,13 +224,16 @@ class LSVRGFunction(SVRGFunction):
     snapshot_update_probability: positive float, default: 1/n
         The probability of updating the full gradient (taking a snapshot) at each iteration. The default is :math:`1./n` so, in expectation, a snapshot will be taken every :math:`n` iterations. 
     store_gradients : bool, default: `False`
-        Flag indicating whether to store an update a list of gradients for each function :math:`f_i` or just to store the snapshot point :math:`\tilde{x}` and its gradient :math:`\nabla \sum_{i=0}^{n-1}f_i(\tilde{x})`.
+        Flag indicating whether to store an update a list of gradients for each function :math:`f_i` or just to store the snapshot point :math:` \tilde{x}` and it's gradient :math:`\nabla \sum_{i=0}^{n-1}f_i(\tilde{x})`.
+    seed: int
+        Seed for the random number generator (numpy.random).
 
 
     Reference
     ---------
     Kovalev, D., Horváth, S. &; Richtárik, P.. (2020). Don’t Jump Through Hoops and Remove Those Loops:  SVRG and Katyusha are Better Without the Outer Loop. Proceedings of the 31st International Conference  on Algorithmic Learning Theory, in Proceedings of Machine Learning Research 117:451-467 Available from https://proceedings.mlr.press/v117/kovalev20a.html.
 
+
     Note
     ----
     In the case where `store_gradients=False` the memory requirements are 4 times the image size (1 stored full gradient at the "snapshot", one stored "snapshot" point and two lots of intermediary calculations). Alternatively, if  `store_gradients=True`  the memory requirement is `n+4` (`n` gradients at the snapshot for each function in the sum, one stored full gradient at the "snapshot", one stored "snapshot" point and two lots of intermediary calculations).

diff --git a/Wrappers/Python/test/test_algorithm_convergence.py b/Wrappers/Python/test/test_algorithm_convergence.py
@@ -1,6 +1,6 @@
-from cil.optimisation.algorithms import SPDHG, PDHG
-from cil.optimisation.functions import L2NormSquared, IndicatorBox, BlockFunction, ZeroFunction
-from cil.optimisation.operators import BlockOperator, IdentityOperator, MatrixOperator
+from cil.optimisation.algorithms import SPDHG, PDHG, PD3O
+from cil.optimisation.functions import L2NormSquared, IndicatorBox, BlockFunction, ZeroFunction, TotalVariation, MixedL21Norm
+from cil.optimisation.operators import BlockOperator, IdentityOperator, MatrixOperator, GradientOperator
 from cil.optimisation.utilities import Sampler 
 from cil.framework import AcquisitionGeometry, BlockDataContainer, BlockGeometry, VectorData
 
@@ -103,4 +103,34 @@ def test_SPDHG_toy_example(self):
         self.assertNumpyArrayAlmostEqual( 
             alg_stochastic.x.as_array(), u_cvxpy.value)
         self.assertNumpyArrayAlmostEqual(
-            alg_stochastic.x.as_array(), b.as_array(), decimal=6)
+            alg_stochastic.x.as_array(), b.as_array(), decimal=6)
+
+    def test_pd3o_convergence(self):
+        data = dataexample.CAMERA.get(size=(32, 32))
+        # pd30 convergence test using TV denoising
+
+        # regularisation parameter
+        alpha = 0.11
+
+        # use TotalVariation from CIL (with Fast Gradient Projection algorithm)
+        TV = TotalVariation(max_iteration=200)
+        tv_cil = TV.proximal(data, tau=alpha)
+
+        F = alpha * MixedL21Norm()
+        operator = GradientOperator(data.geometry)
+        norm_op = operator.norm()
+
+        # setup PD3O denoising  (H proximalble and G,F = 1/4 * L2NormSquared)
+        H = alpha * MixedL21Norm()
+        G = 0.25 * L2NormSquared(b=data)
+        F = 0.25 * L2NormSquared(b=data)
+        gamma = 2./F.L
+        delta = 1./(gamma*norm_op**2)
+
+        pd3O_with_f = PD3O(f=F, g=G, h=H, operator=operator, gamma=gamma, delta=delta,
+                           update_objective_interval=100)
+        pd3O_with_f.run(1000)
+
+        # pd30 vs fista
+        np.testing.assert_allclose(
+            tv_cil.array, pd3O_with_f.solution.array, atol=1e-2)
diff --git a/Wrappers/Python/test/test_algorithms.py b/Wrappers/Python/test/test_algorithms.py
@@ -35,7 +35,7 @@
 from cil.optimisation.operators import GradientOperator, BlockOperator, MatrixOperator
 
 
-from cil.optimisation.functions import MixedL21Norm, BlockFunction, L1Norm, KullbackLeibler, IndicatorBox, LeastSquares, ZeroFunction, L2NormSquared, OperatorCompositionFunction, TotalVariation
+from cil.optimisation.functions import MixedL21Norm, BlockFunction, L1Norm, KullbackLeibler, IndicatorBox, LeastSquares, ZeroFunction, L2NormSquared, OperatorCompositionFunction, TotalVariation, SGFunction, SVRGFunction, SAGAFunction, SAGFunction, LSVRGFunction
 from cil.optimisation.algorithms import Algorithm, GD, CGLS, SIRT, FISTA, ISTA, SPDHG, PDHG, LADMM, PD3O, PGD, APGD
 
 
@@ -1752,32 +1752,72 @@ def test_PD3O_PDHG_denoising_1_iteration(self):
         np.testing.assert_allclose(
             pdhg.objective[-1], pd3O.objective[-1], atol=1e-2)
 
-    def test_pd3o_convergence(self):
-
-        # pd30 convergence test using TV denoising
-
-        # regularisation parameter
-        alpha = 0.11
-
-        # use TotalVariation from CIL (with Fast Gradient Projection algorithm)
-        TV = TotalVariation(max_iteration=200)
-        tv_cil = TV.proximal(self.data, tau=alpha)
-
-        F = alpha * MixedL21Norm()
-        operator = GradientOperator(self.data.geometry)
-        norm_op = operator.norm()
-
-        # setup PD3O denoising  (H proximalble and G,F = 1/4 * L2NormSquared)
-        H = alpha * MixedL21Norm()
-        G = 0.25 * L2NormSquared(b=self.data)
-        F = 0.25 * L2NormSquared(b=self.data)
-        gamma = 2./F.L
-        delta = 1./(gamma*norm_op**2)
+
+    def test_PD3O_stochastic_f(self): 
+        sampler=Sampler.sequential(3)
+        initial = VectorData(np.zeros(21))
+        b =  VectorData(np.array(range(1,22)))
+        functions=[]
+        for i in range(3):
+            diagonal=np.zeros(21)
+            diagonal[7*i:7*(i+1)]=1
+            A=MatrixOperator(np.diag(diagonal))
+            functions.append( LeastSquares(A, A.direct(b)))
+
+        H1 = 0.1 * L1Norm()
+        operator = IdentityOperator(initial.geometry)
+        G1 = IndicatorBox(lower=0)
 
-        pd3O_with_f = PD3O(f=F, g=G, h=H, operator=operator, gamma=gamma, delta=delta,
-                           update_objective_interval=100)
-        pd3O_with_f.run(1000)
 
-        # pd30 vs fista
-        np.testing.assert_allclose(
-            tv_cil.array, pd3O_with_f.solution.array, atol=1e-2)
+        f=SVRGFunction(functions, sampler, snapshot_update_interval=3)
+        algo_pd3o = PD3O(f=f, g=G1, h=H1, operator=operator)
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 1)
+        self.assertEqual(f.data_passes_indices, [[0,1,2]])
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 4/3)
+        self.assertEqual(f.data_passes_indices, [[0,1,2], [0]])
+        algo_pd3o.run(1)
+        self.assertAlmostEqual(f.data_passes[-1], 5/3)
+        self.assertEqual(f.data_passes_indices, [[0,1,2], [0], [1]])
+        algo_pd3o.run(1)
+        self.assertAlmostEqual(f.data_passes[-1], 8/3)
+        self.assertEqual(f.data_passes_indices, [[0,1,2], [0], [1], [0,1,2]])
+
+        sampler=Sampler.sequential(3)
+        f=LSVRGFunction(functions, sampler)
+        algo_pd3o = PD3O(f=f, g=G1, h=H1, operator=operator)
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 1)
+        self.assertEqual(f.data_passes_indices, [[0,1,2]])
+
+        sampler=Sampler.sequential(3)
+        f=SGFunction(functions, sampler)
+        algo_pd3o = PD3O(f=f, g=G1, h=H1, operator=operator)
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 1/3)
+        self.assertEqual(f.data_passes_indices, [[0]])
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 2/3)
+        self.assertEqual(f.data_passes_indices, [[0], [1]])
+
+        sampler=Sampler.sequential(3)
+        f=SAGFunction(functions, sampler)
+        algo_pd3o = PD3O(f=f, g=G1, h=H1, operator=operator)
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 1/3)
+        self.assertEqual(f.data_passes_indices, [[0]])
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 2/3)
+        self.assertEqual(f.data_passes_indices, [[0], [1]])
+
+        sampler=Sampler.sequential(3)
+        f=SAGAFunction(functions, sampler)
+        algo_pd3o = PD3O(f=f, g=G1, h=H1, operator=operator)
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 1/3)
+        self.assertEqual(f.data_passes_indices, [[0]])
+        algo_pd3o.run(1)
+        self.assertEqual(f.data_passes[-1], 2/3)
+        self.assertEqual(f.data_passes_indices, [[0], [1]])
+