Skip to content

Commit

Permalink
First commit for dropout-trained neural-nets
Browse files Browse the repository at this point in the history
  • Loading branch information
Philip-Bachman committed Dec 18, 2012
0 parents commit ec52a81
Show file tree
Hide file tree
Showing 10 changed files with 907 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*.mat
*.m~
*.fig
164 changes: 164 additions & 0 deletions ActFunc.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
classdef ActFunc < handle
% This is a class for managing activation functions to be used by the hidden
% and output layers of a neural-net.
%

properties
% func_type determines which activation function to use
func_type
end

methods
function [self] = ActFunc( func_type )
% Constructor for ActFunc class
if ~exist('func_type','var')
func_type = 1;
end
self.func_type = func_type;
return
end

function [ acts ] = feedforward(self, pre_values, pre_weights)
% Compute feed-forward activations according to some function
switch self.func_type
case 1
acts = ActFunc.linear_ff(pre_values, pre_weights);
case 2
acts = ActFunc.sigmoid_ff(pre_values, pre_weights);
case 3
acts = ActFunc.tanh_ff(pre_values, pre_weights);
otherwise
error('No valid activation function type selected.');
end
return
end

function [ node_grads ] = backprop(...
self, post_grads, post_weights, pre_values, pre_weights)
% Backpropagate gradients through some activation function
switch self.func_type
case 1
node_grads = ActFunc.linear_bp(...
post_grads, post_weights, pre_values, pre_weights);
case 2
node_grads = ActFunc.sigmoid_bp(...
post_grads, post_weights, pre_values, pre_weights);
case 3
node_grads = ActFunc.tanh_bp(...
post_grads, post_weights, pre_values, pre_weights);
otherwise
error('No valid activation function type selected.');
end
return
end
end

methods (Static = true)
% The static methods for ActFunc are feed-forwards and backprops
%

function [ cur_acts ] = linear_ff(pre_acts, pre_weights)
% Compute simple linear activation function.
%
% Parameters:
% pre_acts: previous layer activations (obs_count x pre_dim)
% pre_weights: weights from pre -> cur (pre_dim x cur_dim)
% Outputs:
% cur_acts: activations at current layer (obs_count x cur_dim)
%
cur_acts = pre_acts * pre_weights;
return
end

function [ cur_grads ] = ...
linear_bp(post_grads, post_weights, pre_acts, pre_weights)
% Compute the gradient for each of the incident weights in
% pre_weights given the precipitating gradient in post_grad
%
% Parameters:
% post_grads: grads at next layer (obs_count x post_dim)
% post_weights: weights from current to post (cur_dim x post_dim)
% pre_acts: activations at previous layer (obs_count x pre_dim)
% pre_weights: weights from prev to current (pre_dim x cur_dim)
% Outputs:
% cur_grads: gradients at current layer (obs_count x cur_dim)
%
cur_grads = post_grads * post_weights';
return
end

function [ cur_acts ] = sigmoid_ff(pre_acts, pre_weights)
% Compute simple sigmoid activation function.
%
% Parameters:
% pre_acts: previous layer activations (obs_count x pre_dim)
% pre_weights: weights from pre -> cur (pre_dim x cur_dim)
% Outputs:
% cur_acts: activations at current layer (obs_count x cur_dim)
%
cur_acts = 1 ./ (1 + exp(-(pre_acts * pre_weights)));
return
end

function [ cur_grads ] = ...
sigmoid_bp(post_grads, post_weights, pre_acts, pre_weights)
% Compute the gradient for each of the incident weights in
% pre_weights given the precipitating gradient in post_grad
%
% Parameters:
% post_grads: grads at next layer (obs_count x post_dim)
% post_weights: weights from current to post (cur_dim x post_dim)
% pre_acts: activations at previous layer (obs_count x pre_dim)
% pre_weights: weights from prev to current (pre_dim x cur_dim)
% Outputs:
% cur_grads: gradients at current layer (obs_count x cur_dim)
%
e_mx = exp(-pre_acts * pre_weights);
sig_grads = e_mx ./ (1 + e_mx).^2;
cur_grads = sig_grads .* (post_grads * post_weights');
return
end

function [ cur_acts ] = tanh_ff(pre_acts, pre_weights)
% Compute simple hyperbolic tangent activation function.
%
% Parameters:
% pre_acts: previous layer activations (obs_count x pre_dim)
% pre_weights: weights from pre -> cur (pre_dim x cur_dim)
% Outputs:
% cur_acts: activations at current layer (obs_count x cur_dim)
%
cur_acts = tanh(pre_acts * pre_weights);
return
end

function [ cur_grads ] = ...
tanh_bp(post_grads, post_weights, pre_acts, pre_weights)
% Compute the gradient for each node in the current layer given
% the gradients in post_grads for nodes at the next layer.
%
% Parameters:
% post_grads: grads at next layer (obs_dim x post_dim)
% post_weights: weights from current to post (cur_dim x post_dim)
% pre_acts: activations at previous layer (obs_dim x pre_dim)
% pre_weights: weights from prev to current (pre_dim x cur_dim)
% Outputs:
% cur_grads: gradients at current layer (obs_dim x cur_dim)
%
tanh_grads = 1 - (tanh(pre_acts * pre_weights)).^2;
cur_grads = tanh_grads .* (post_grads * post_weights');
return
end

end

end





%%%%%%%%%%%%%%
% EYE BUFFER %
%%%%%%%%%%%%%%

88 changes: 88 additions & 0 deletions LossFunc.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
classdef LossFunc < handle
% This is a class for managing loss functions to be used at the output layer
% of a multi-layer neural-net.
%

properties
% func_type determines which loss function to use
func_type
end

methods
function [self] = LossFunc( func_type )
% Constructor for LossFunc class
if ~exist('func_type','var')
func_type = 1;
end
self.func_type = func_type;
return
end

function [ L dL ] = evaluate(self, Yh, Y)
% Compute feed-forward activations according to some function
if (size(Yh) ~= size(Y))
error('LossFunc: estimate and target vector size mismatch.');
end
switch self.func_type
case 1
[L dL] = LossFunc.least_squares(Yh, Y);
case 2
[L dL] = LossFunc.cross_entropy(Yh, Y);
case 3
[L dL] = LossFunc.binomial_deviance(Yh, Y);
otherwise
error('No valid loss function type selected.');
end
return
end
end

methods (Static = true)
% The static methods for LossFunc compute loss values and gradients for
% various loss functions.
%
function [ L dL ] = least_squares(Yh, Y)
% Compute loss and loss gradient for least-squares error
L = (1 / 2) * sum((Yh(:) - Y(:)).^2);
dL = Yh - Y;
return
end

function [ L dL ] = cross_entropy(Yh, Y)
% Compute loss and loss gradient for cross-entropy error
Yf = Y(:);
Yhf = Yh(:);
if ((sum(Yf == 1) + sum(Yf == 0)) < numel(Yf))
error('LossFunc.cross_entropy: values in Y 0/1.');
end
if ((sum(Yhf < eps) + sum(Yhf > 1)) > 0)
error('LossFunc.cross_entropy: Yh rows must be distributions.');
end
L = -Y .* log(Yh);
dL = -Y ./ Yh;
return
end

function [ L dL ] = binomial_deviance(Yh, Y)
% Compute loss and loss gradient for binomial-deviance error
Yf = Y(:);
if ((sum(Yf == 1) + sum(Yf == -1)) < numel(Yf))
error('LossFunc.binomial_deviance: Y values should be +/- 1.');
end
L = log(exp(-Y .* Yh) + 1);
dL = -(Y .* exp(-Y .* Yh)) ./ (exp(-Y .* Yh) + 1);
return
end
end

end






%%%%%%%%%%%%%%
% EYE BUFFER %
%%%%%%%%%%%%%%

Loading

0 comments on commit ec52a81

Please sign in to comment.