Skip to content

Commit

Permalink
#1. Fix error computing bug in bi-directional RNN when the number of …
Browse files Browse the repository at this point in the history
…hidden layer is more than 1

#2. Improve training part of bi-directional RNN. We don't re-run forward before updating weights
#3. Fix bugs in Dropout layer
#4. Change hidden layer settings in configuration file.
#5. Refactoring code
  • Loading branch information
zhongkaifu committed Feb 5, 2017
1 parent 9e2ac3a commit 1d08c95
Show file tree
Hide file tree
Showing 15 changed files with 757 additions and 546 deletions.
17 changes: 9 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,19 @@ MODEL_DIRECTION = BiDirectional
\#Model file path
MODEL_FILEPATH = Data\Models\ParseORG_CHS\model.bin

\#Hidden layers settings. BPTT, LSTM, Dropout are supported. Here are examples of these layer types
\#BPTT: 200:BPTT:5 -- Layer size is 200, BPTT value is 5
\#Dropout: 200:Dropout:0.5 -- Layer size is 200, Drop out ratio is 0.5
\#Hidden layers settings. LSTM and Dropout are supported. Here are examples of these layer types.
\#Dropout: Dropout:0.5 -- Drop out ratio is 0.5 and layer size is the same as previous layer.
\#If the model has more than one hidden layer, each layer settings are separated by comma. For example:
\#"300:LSTM, 200:LSTM" means the model has two LSTM layers. The first layer size is 300, and the second layer size is 200
HIDDEN_LAYER = 200:LSTM
\#"LSTM:300, LSTM:200" means the model has two LSTM layers. The first layer size is 300, and the second layer size is 200.
HIDDEN_LAYER = LSTM:200

\#Output layer settings. Softmax ands NCESoftmax are supported. Here is an example of NCESoftmax:
\#"NCESoftmax:20" means the output layer is NCESoftmax layer and its negative sample size is 20
OUTPUT_LAYER = Softmax
\#Output layer settings. Simple, Softmax ands sampled softmax are supported. Here is an example of sampled softmax:
\#"SampledSoftmax:20" means the output layer is sampled softmax layer and its negative sample size is 20.
\#"Simple" means the output is raw result from output layer. "Softmax" means the result is based on "Simple" result and run softmax.
OUTPUT_LAYER = Simple

\#CRF layer settings
\#If this option is true, output layer type must be "Simple" type.
CRF_LAYER = True

\#The file name for template feature set
Expand Down
386 changes: 217 additions & 169 deletions RNNSharp/BiRNN.cs

Large diffs are not rendered by default.

64 changes: 38 additions & 26 deletions RNNSharp/Config.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ public DropoutLayerConfig()
}
}

public class NCELayerConfig : LayerConfig
public class SampledSoftmaxLayerConfig : SoftmaxLayerConfig
{
public int NegativeSampleSize;

public NCELayerConfig()
public SampledSoftmaxLayerConfig()
{
LayerType = LayerType.NCESoftmax;
LayerType = LayerType.SampledSoftmax;
}
}

Expand All @@ -52,6 +52,14 @@ public SoftmaxLayerConfig()
}
}

public class SimpleLayerConfig : LayerConfig
{
public SimpleLayerConfig()
{
LayerType = LayerType.Simple;
}
}

public class Config
{
//Settings for model type
Expand Down Expand Up @@ -161,18 +169,18 @@ public void LoadFeatureConfigFromFile(string configFilePath)

featureContext = new Dictionary<string, List<int>>();

SetHiddenLayers();
SetOutputLayers();
SetPretrainedModel();
SetTFeatures();

var isCRFTraining = config.GetValueOptional(CRF_LAYER);
IsCRFTraining = false;
if (string.IsNullOrEmpty(isCRFTraining) == false)
{
IsCRFTraining = bool.Parse(isCRFTraining);
}

SetHiddenLayers();
SetOutputLayers();
SetPretrainedModel();
SetTFeatures();

//Load model type
ModelType = config.GetValueRequired(MODEL_TYPE)
.Equals(MODELTYPE.SeqLabel.ToString(), StringComparison.InvariantCultureIgnoreCase)
Expand Down Expand Up @@ -316,40 +324,44 @@ var type in
break;
}

if (IsCRFTraining == true && outputLayerType != LayerType.Simple)
{
throw new ArgumentException($"For RNN-CRF model, its output layer type must be simple layer.");
}

switch (outputLayerType)
{
case LayerType.Softmax:
var softmaxLayerConfig = new SoftmaxLayerConfig();
OutputLayerConfig = softmaxLayerConfig;

OutputLayerConfig = new SoftmaxLayerConfig();
Logger.WriteLine("Initialize configuration for softmax layer.");
break;

case LayerType.NCESoftmax:
var nceLayerConfig = new NCELayerConfig { NegativeSampleSize = int.Parse(items[1]) };
OutputLayerConfig = nceLayerConfig;
case LayerType.SampledSoftmax:
var sampledSoftmaxLayerConfig = new SampledSoftmaxLayerConfig { NegativeSampleSize = int.Parse(items[1]) };
OutputLayerConfig = sampledSoftmaxLayerConfig;

Logger.WriteLine(
$"Initialize configuration for NCESoftmax layer. Negative sample size = '{nceLayerConfig.NegativeSampleSize}'");
$"Initialize configuration for sampled Softmax layer. Negative sample size = '{sampledSoftmaxLayerConfig.NegativeSampleSize}'");
break;

case LayerType.Simple:
OutputLayerConfig = new SimpleLayerConfig();
Logger.WriteLine("Initialize configuration for simple layer.");
break;
}
}

private void SetHiddenLayers()
{
//Get hidden layer settings
//Example: LSTM:200, Dropout:0.5
HiddenLayersConfig = new List<LayerConfig>();
var hiddenLayers = config.GetValueRequired(HIDDEN_LAYER);
foreach (var layer in hiddenLayers.Split(','))
{
var items = layer.Split(':');
var sLayerSize = items[0].Trim();
var sLayerType = items[1].Trim();

//Parse layer size and type
var layerSize = int.Parse(sLayerSize);
var sLayerType = items[0].Trim();
var layerType = LayerType.None;

foreach (
var type in
Enum.GetValues(typeof(LayerType))
Expand All @@ -368,14 +380,17 @@ var type in
case LayerType.LSTM:
{
var layerConfig = new LSTMLayerConfig();
layerConfig.LayerSize = int.Parse(items[1]);
layerConfig.LayerType = layerType;
baseLayerConfig = layerConfig;
Logger.WriteLine("Initialize configuration for LSTM layer.");
Logger.WriteLine($"Initialize configuration for LSTM layer. Layer size = {layerConfig.LayerSize}");
}
break;

case LayerType.DropOut:
{
var layerConfig = new DropoutLayerConfig { DropoutRatio = float.Parse(items[2])};
var layerConfig = new DropoutLayerConfig { DropoutRatio = float.Parse(items[1])};
layerConfig.LayerType = layerType;
baseLayerConfig = layerConfig;
Logger.WriteLine(
$"Initialize configuration for Dropout layer. Dropout ratio = '{layerConfig.DropoutRatio}'");
Expand All @@ -386,9 +401,6 @@ var type in
throw new ArgumentException($"Invalidated layer type: {sLayerType}");
}

baseLayerConfig.LayerType = layerType;
baseLayerConfig.LayerSize = layerSize;

HiddenLayersConfig.Add(baseLayerConfig);
}
Logger.WriteLine($"Hidden layer : {HiddenLayersConfig.Count}");
Expand Down
110 changes: 92 additions & 18 deletions RNNSharp/DropoutLayer.cs
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
using AdvUtils;
using System;
using System.IO;

namespace RNNSharp
{
public class DropoutNeuron : Neuron
{
public bool[] mask;
}

internal class DropoutLayer : SimpleLayer
{
private readonly float dropoutRatio;
Expand All @@ -12,44 +18,86 @@ internal class DropoutLayer : SimpleLayer
public DropoutLayer(DropoutLayerConfig config) : base(config)
{
dropoutRatio = config.DropoutRatio;
mask = new bool[LayerSize];
rnd = new Random();
}

public DropoutLayer()

public override Neuron CopyNeuronTo()
{
rnd = new Random();
DropoutNeuron neuron = new DropoutNeuron();
neuron.mask = new bool[mask.Length];
mask.CopyTo(neuron.mask, 0);

neuron.Cells = new float[Cells.Length];
neuron.PrevCellOutputs = new float[previousCellOutputs.Length];
Cells.CopyTo(neuron.Cells, 0);
previousCellOutputs.CopyTo(neuron.PrevCellOutputs, 0);

return neuron;
}

public override void InitializeWeights(int sparseFeatureSize, int denseFeatureSize)
{
if (denseFeatureSize > 0)
{
Logger.WriteLine("Initializing dense feature matrix. layer size = {0}, feature size = {1}", LayerSize,
denseFeatureSize);
DenseFeatureSize = denseFeatureSize;
DenseWeights = new Matrix<float>(LayerSize, denseFeatureSize);
for (var i = 0; i < DenseWeights.Height; i++)
{
for (var j = 0; j < DenseWeights.Width; j++)
{
DenseWeights[i][j] = 1.0f;
}
}
}

if (sparseFeatureSize > 0)
{
Logger.WriteLine("Initializing sparse feature matrix. layer size = {0}, feature size = {1}", LayerSize,
sparseFeatureSize);
SparseFeatureSize = sparseFeatureSize;
SparseWeights = new Matrix<float>(LayerSize, SparseFeatureSize);
for (var i = 0; i < SparseWeights.Height; i++)
{
for (var j = 0; j < SparseWeights.Width; j++)
{
SparseWeights[i][j] = 1.0f;
}
}
}
}
public override void ForwardPass(SparseVector sparseFeature, float[] denseFeature)
{
if (LayerSize != denseFeature.Length)
{
throw new Exception("The layer size of dropout layer must be equal to its denseFeature size.");
throw new Exception($"The layer size of dropout layer must be equal to its denseFeature size. Layer size = {LayerSize}, Dense feature size = {denseFeature.Length}");
}

if (runningMode == RunningMode.Training)
{
mask = new bool[LayerSize];
for (var i = 0; i < LayerSize; i++)
{
var val = (float)rnd.NextDouble();
if (val < dropoutRatio)
{
mask[i] = true;
Cell[i] = 0;
Cells[i] = 0;
}
else
{
mask[i] = false;
Cell[i] = denseFeature[i];
Cells[i] = denseFeature[i];
}
}
}
else
{
for (var i = 0; i < LayerSize; i++)
{
Cell[i] = (float)(1.0 - dropoutRatio) * denseFeature[i];
Cells[i] = (float)(1.0 - dropoutRatio) * denseFeature[i];
}
}
}
Expand All @@ -58,15 +106,13 @@ public override void BackwardPass()
{
}

public override void ComputeLayerErr(SimpleLayer nextLayer, float[] destErrLayer, float[] srcErrLayer)
public override void ComputeLayerErr(SimpleLayer nextLayer, float[] destErrLayer, float[] srcErrLayer, Neuron neuron)
{
//error output->hidden for words from specific class
RNNHelper.matrixXvectorADDErr(destErrLayer, srcErrLayer, nextLayer.DenseWeights, LayerSize,
nextLayer.LayerSize);

DropoutNeuron dropoutNeuron = neuron as DropoutNeuron;
base.ComputeLayerErr(nextLayer, destErrLayer, srcErrLayer, dropoutNeuron);
for (var i = 0; i < LayerSize; i++)
{
if (mask[i])
if (dropoutNeuron.mask[i])
{
destErrLayer[i] = 0;
}
Expand All @@ -75,18 +121,46 @@ public override void ComputeLayerErr(SimpleLayer nextLayer, float[] destErrLayer

public override void ComputeLayerErr(SimpleLayer nextLayer)
{
//error output->hidden for words from specific class
Err = nextLayer.Err;
DenseWeights = nextLayer.DenseWeights;

base.ComputeLayerErr(nextLayer);
//Apply drop out on error in hidden layer
for (var i = 0; i < LayerSize; i++)
{
if (mask[i])
{
Err[i] = 0;
Errs[i] = 0;
}
}
}

public override void Save(BinaryWriter fo)
{
base.Save(fo);
fo.Write(dropoutRatio);
}

public static DropoutLayer Load(BinaryReader br, LayerType layerType)
{
DropoutLayer dropoutLayer;
DropoutLayerConfig config = new DropoutLayerConfig();
SimpleLayer simpleLayer = SimpleLayer.Load(br, layerType);
config.DropoutRatio = br.ReadSingle();
config.LayerSize = simpleLayer.LayerSize;

dropoutLayer = new DropoutLayer(config);
dropoutLayer.SparseFeatureSize = simpleLayer.SparseFeatureSize;
dropoutLayer.DenseFeatureSize = simpleLayer.DenseFeatureSize;

if (dropoutLayer.SparseFeatureSize > 0)
{
dropoutLayer.SparseWeights = simpleLayer.SparseWeights;
}

if (dropoutLayer.DenseFeatureSize > 0)
{
dropoutLayer.DenseWeights = simpleLayer.DenseWeights;
}

return dropoutLayer;
}
}
}
3 changes: 2 additions & 1 deletion RNNSharp/Enums.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,10 @@ public enum TFEATURE_WEIGHT_TYPE_ENUM
public enum LayerType
{
Softmax,
NCESoftmax,
SampledSoftmax,
LSTM,
DropOut,
Simple,
None
}

Expand Down
Loading

0 comments on commit 1d08c95

Please sign in to comment.