Merge branch 'dev' 🍰 🎆

Merge only implemented solvers, ignore RMSProp, MomentumSGD and SAGE ... ... and a happy new year!
mp4096 · Dec 31, 2015 · 262b959 · 262b959
2 parents 21a08a1 + ac69686
commit 262b959
Show file tree

Hide file tree

Showing 15 changed files with 1,033 additions and 1 deletion.
diff --git a/+Tests/+StyblinskiTang/ObjFun.m b/+Tests/+StyblinskiTang/ObjFun.m
@@ -0,0 +1,17 @@
+function J = ObjFun(x)
+%OBJFUN Styblinski--Tang test function
+%
+% References:
+%   [1] : https://en.wikipedia.org/wiki/Test_functions_for_optimization
+%
+% Input:
+%   x : decision variables (column or row vector)
+% Output:
+%   J : value of the objective function
+%
+% See also: TESTS.STYBLINSKITANG.STOCHGRAD
+%
+
+J = sum(x.^4 - 16.*x.^2 + 5.*x)./2;
+
+end
diff --git a/+Tests/+StyblinskiTang/StochGrad.m b/+Tests/+StyblinskiTang/StochGrad.m
@@ -0,0 +1,27 @@
+function sg = StochGrad(idx, x)
+%STOCHGRAD Stochastic gradient of the Styblinski--Tang test function
+%
+% References:
+%   [1] : https://en.wikipedia.org/wiki/Test_functions_for_optimization
+%
+% Input:
+%   idx : index of the gradient, 1..3
+%   x   : current decision variables (column or row vector)
+% Output:
+%   sg  : stochastic gradient, column vector
+%
+% See also: TESTS.STYBLINSKITANG.OBJFUN
+%
+
+switch idx
+    case 1
+        sg = 2.*x(:).^3;
+    case 2
+        sg = -16.*x(:);
+    case 3
+        sg = ones(length(x), 1).*2.5;
+    otherwise
+        error('Stochastic gradient index out of bounds!');
+end
+
+end
diff --git a/+Tests/Qa500D.mat b/+Tests/Qa500D.mat
diff --git a/+Tests/quadSum.m b/+Tests/quadSum.m
@@ -0,0 +1,174 @@
+%% Initialise
+
+clear
+close all
+clc
+
+%% Objective function and its gradients
+
+% Store coefficients (all Qa_i are symmetric)
+Qa(:, :,  1) = [ ...
+    1.610e+00, 6.334e-01, 1.027e+00; ...
+    6.334e-01, 1.989e+00, 3.588e+00; ...
+    1.027e+00, 3.588e+00, 6.119e-01];
+Qa(:, :,  2) = [ ...
+    1.326e+00, 1.426e+00, 1.722e+00; ...
+    1.426e+00, 8.150e-01, 3.847e+00; ...
+    1.722e+00, 3.847e+00, 1.572e-01];
+Qa(:, :,  3) = [ ...
+    1.944e+00, 2.213e+00, 9.709e-01; ...
+    2.213e+00, 8.345e+00, 2.251e+00; ...
+    9.709e-01, 2.251e+00, 2.143e+00];
+Qa(:, :,  4) = [ ...
+    4.476e+00, 1.311e+00, 1.070e+00; ...
+    1.311e+00, 1.005e+00, 2.457e+00; ...
+    1.070e+00, 2.457e+00, 1.288e+00];
+Qa(:, :,  5) = [ ...
+    4.817e+00, 8.572e-01, 3.927e+00; ...
+    8.572e-01, 1.317e+00, 1.326e+00; ...
+    3.927e+00, 1.326e+00, 1.518e+00];
+Qa(:, :,  6) = [ ...
+    5.076e-01, 2.797e+00, 5.638e-01; ...
+    2.797e+00, 6.397e+00, 2.024e+00; ...
+    5.638e-01, 2.024e+00, 2.789e+00];
+Qa(:, :,  7) = [ ...
+    3.269e-01, 2.386e-01, 3.345e+00; ...
+    2.386e-01, 2.251e+00, 4.631e+00; ...
+    3.345e+00, 4.631e+00, 2.869e+00];
+Qa(:, :,  8) = [ ...
+    3.377e-01, 1.830e+00, 1.292e+00; ...
+    1.830e+00, 3.729e-01, 2.225e+00; ...
+    1.292e+00, 2.225e+00, 2.145e+00];
+Qa(:, :,  9) = [ ...
+    7.065e-01, 4.301e+00, 1.533e+00; ...
+    4.301e+00, 2.973e-01, 1.655e+00; ...
+    1.533e+00, 1.655e+00, 4.824e+00];
+Qa(:, :, 10) = [ ...
+    6.107e+00, 3.592e-01, 9.282e-01; ...
+    3.592e-01, 2.499e+00, 1.400e+00; ...
+    9.282e-01, 1.400e+00, 1.414e+00];
+Qa(:, :, 11) = [ ...
+    1.174e+00, 1.761e+00, 1.028e+00; ...
+    1.761e+00, 3.724e+00, 1.430e+00; ...
+    1.028e+00, 1.430e+00, 9.004e-02];
+Qa(:, :, 12) = [ ...
+    5.300e+00, 6.501e-01, 2.003e+00; ...
+    6.501e-01, 2.022e+00, 3.085e+00; ...
+    2.003e+00, 3.085e+00, 5.210e-01];
+Qa(:, :, 13) = [ ...
+    3.421e+00, 1.030e+00, 2.393e-01; ...
+    1.030e+00, 3.872e+00, 1.806e+00; ...
+    2.393e-01, 1.806e+00, 2.305e-01];
+Qa(:, :, 14) = [ ...
+    9.676e-01, 1.394e+00, 6.407e-01; ...
+    1.394e+00, 6.058e-01, 1.159e+00; ...
+    6.407e-01, 1.159e+00, 4.025e+00];
+Qa(:, :, 15) = [ ...
+    3.178e+00, 4.478e+00, 1.695e+00; ...
+    4.478e+00, 3.416e+00, 1.484e+00; ...
+    1.695e+00, 1.484e+00, 1.688e-01];
+Qa(:, :, 16) = [ ...
+    2.482e+00, 3.579e+00, 8.461e-01; ...
+    3.579e+00, 5.098e-01, 1.880e+00; ...
+    8.461e-01, 1.880e+00, 1.194e+00];
+Qa(:, :, 17) = [ ...
+    8.783e-01, 9.422e-01, 1.467e-01; ...
+    9.422e-01, 3.140e-01, 4.093e+00; ...
+    1.467e-01, 4.093e+00, 9.792e-01];
+Qa(:, :, 18) = [ ...
+    3.957e+00, 1.058e+00, 1.099e+00; ...
+    1.058e+00, 5.548e+00, 3.787e+00; ...
+    1.099e+00, 3.787e+00, 1.741e-01];
+Qa(:, :, 19) = [ ...
+    1.187e+00, 1.090e+00, 4.036e+00; ...
+    1.090e+00, 1.794e-01, 9.418e-01; ...
+    4.036e+00, 9.418e-01, 4.685e-01];
+Qa(:, :, 20) = [ ...
+    3.290e+00, 1.562e+00, 5.638e-01; ...
+    1.562e+00, 2.019e+00, 3.075e+00; ...
+    5.638e-01, 3.075e+00, 7.198e-01];
+
+% Store the number of addends in the stochastic objective
+nQa = size(Qa, 3);
+
+% Define the objective function
+objFun = @(x) 0.5*([x', 1]*sum(Qa, 3)*[x; 1])./nQa;
+
+% Define the full gradient and the stochastic gradient functions
+grad = @(x) ([x; 1]'*sum(Qa, 3)*eye(3, 2))'./nQa;
+gradStoch = @(i, x) ([x; 1]'*Qa(:, :, i)*eye(3, 2))';
+
+%% Compute the objective function values (for plotting)
+
+rangeX = linspace(-2, +4, 100);
+rangeY = linspace(-6, +2, 100);
+
+[X, Y] = meshgrid(rangeX, rangeY);
+
+Z = zeros(size(X));
+
+for i = 1 : 1 : length(rangeX)
+    for j = 1 : 1 : length(rangeY)
+        Z(j, i) = objFun([rangeX(i); rangeY(j)]);
+    end
+end
+
+%% Perform optimisation
+
+x0 = [3; -4];
+nIter = 500;
+idxSG = randi(nQa, 1, nIter);
+
+solvers = { ...
+    'VanillaSGD', ...
+    'AdaGrad', ...
+    'AdaGradDecay', ...
+    'Adadelta', ...
+    'Adam', ...
+    'Adamax', ...
+    };
+
+xMat.VanillaSGD = VanillaSGD(gradStoch, x0, nIter, idxSG, 0.01);
+xMat.AdaGrad = AdaGrad(gradStoch, x0, nIter, idxSG, 0.1);
+xMat.AdaGradDecay = AdaGradDecay(gradStoch, x0, nIter, idxSG, 0.1, 0.9);
+xMat.Adadelta = Adadelta(gradStoch, x0, nIter, idxSG, 0.95);
+xMat.Adam = Adam(gradStoch, x0, nIter, idxSG, 0.1, 0.8, 0.999);
+xMat.Adamax = Adamax(gradStoch, x0, nIter, idxSG, 0.1, 0.9, 0.999);
+
+
+for i = 1 : 1 : length(solvers)
+    objFunMat.(solvers{i}) = ...
+        cellfun(objFun, num2cell(xMat.(solvers{i}), 1));
+end
+
+%% Plot results -- Convergence plot
+
+figConvergence = figure( ...
+    'Name', 'Convergence behaviour of different solvers');
+for i = 1 : 1 : length(solvers)
+    plot(objFunMat.(solvers{i}));
+    hold on
+end
+hold off
+legend(solvers);
+xlim([1, nIter + 1]);
+
+%% Plot results -- Contour plot of the objective function
+
+figContour = figure('Name', 'Contour plot of the objective function');
+
+surf(X, Y, Z, 'EdgeAlpha', 0.1);
+colormap bone
+xlabel('x');
+ylabel('y');
+xlim(rangeX([1, end]));
+ylim(rangeY([1, end]));
+
+hold on
+for i = 1 : 1 : length(solvers)
+    plot3(xMat.(solvers{i})(1, :), xMat.(solvers{i})(2, :), ...
+        objFunMat.(solvers{i}), ...
+        'LineWidth', 1.4);
+end
+hold off
+legend([{'Obj fun'}, solvers]);
diff --git a/+Tests/quadSumSparse500D.m b/+Tests/quadSumSparse500D.m
@@ -0,0 +1,101 @@
+%% Initialise
+
+clear
+close all
+clc
+
+%% Objective function and its gradients
+
+% Load coefficients (all Qa_i are symmetric)
+load +Tests/Qa500D
+
+% Store the number of decision variables
+nDecVar = size(Qa{1}, 1) - 1;
+
+% Store the number of addends in the stochastic objective
+nQa = length(Qa);
+
+% Define the objective function
+objFun = @(x) 0.5*([x', 1]*QaAvg*[x; 1]);
+
+% Define the full gradient and the stochastic gradient functions
+grad = @(x) ([x; 1]'*QaAvg(:, 1 : 1 : end - 1))';
+gradStoch = @(i, x) ([x; 1]'*Qa{i}(:, 1 : 1 : end - 1))';
+
+%% Perform optimisation -- Purely stochastic gradient
+
+x0 = ones(nDecVar, 1);
+nIter = 1000;
+idxSG = randi(nQa, 1, nIter);
+
+solvers = { ...
+    'VanillaSGD', ...
+    'AdaGrad', ...
+    'AdaGradDecay', ...
+    'Adadelta', ...
+    'Adam', ...
+    'Adamax', ...
+    };
+
+xMat.VanillaSGD = VanillaSGD(gradStoch, x0, nIter, idxSG, 1e-1);
+xMat.AdaGrad = AdaGrad(gradStoch, x0, nIter, idxSG, 1e-1);
+xMat.AdaGradDecay = AdaGradDecay(gradStoch, x0, nIter, idxSG, 1e-1, 0.9);
+xMat.Adadelta = Adadelta(gradStoch, x0, nIter, idxSG, 0.95);
+xMat.Adam = Adam(gradStoch, x0, nIter, idxSG, 1e-1, 0.9, 0.999);
+xMat.Adamax = Adamax(gradStoch, x0, nIter, idxSG, 1e-1, 0.9, 0.999);
+
+
+for i = 1 : 1 : length(solvers)
+    objFunMat.(solvers{i}) = ...
+        cellfun(objFun, num2cell(xMat.(solvers{i}), 1));
+end
+
+%% Plot results -- Convergence plot -- Purely stochastic gradient
+
+figure
+ax(1) = subplot(1, 2, 1);
+for i = 1 : 1 : length(solvers)
+    plot(objFunMat.(solvers{i}));
+    hold on
+end
+hold off
+grid on
+legend(solvers);
+title(['Convergence behaviour of different solvers, ', ...
+    'stochastic gradient only'])
+
+%% Perform optimisation -- Averaged stochastic gradient
+
+% Average over 10 random gradients
+idxSG = randi(nQa, 10, nIter);
+
+avgSG = @(idx, x) AvgGrad(gradStoch, idx, x);
+
+xMat.VanillaSGD = VanillaSGD(avgSG, x0, nIter, idxSG, 1e-1);
+xMat.AdaGrad = AdaGrad(avgSG, x0, nIter, idxSG, 1e-1);
+xMat.AdaGradDecay = AdaGradDecay(avgSG, x0, nIter, idxSG, 1e-1, 0.9);
+xMat.Adadelta = Adadelta(avgSG, x0, nIter, idxSG, 0.95);
+xMat.Adam = Adam(avgSG, x0, nIter, idxSG, 1e-1, 0.9, 0.999);
+xMat.Adamax = Adamax(avgSG, x0, nIter, idxSG, 1e-1, 0.9, 0.999);
+
+
+for i = 1 : 1 : length(solvers)
+    objFunMat.(solvers{i}) = ...
+        cellfun(objFun, num2cell(xMat.(solvers{i}), 1));
+end
+
+%% Plot results -- Convergence plot -- Averaged stochastic gradient
+
+ax(2) = subplot(1, 2, 2);
+for i = 1 : 1 : length(solvers)
+    plot(objFunMat.(solvers{i}));
+    hold on
+end
+hold off
+grid on
+legend(solvers);
+title(['Convergence behaviour of different solvers, ', ...
+    'averaged stochastic gradient']);
+
+linkaxes(ax, 'xy');
+xlim([1, nIter + 1]);