Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mnist example #217

Merged
merged 6 commits into from
Dec 14, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions doc/doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ INLINE_GROUPED_CLASSES = NO
# Man pages) or section (for LaTeX and RTF).
# The default value is: NO.

INLINE_SIMPLE_STRUCTS = NO
INLINE_SIMPLE_STRUCTS = YES

# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
# enum is documented as struct, union, or enum with the name of the typedef. So
Expand Down Expand Up @@ -758,7 +758,7 @@ WARN_LOGFILE =
# spaces.
# Note: If this tag is empty the current directory is searched.

INPUT =../../dynet/expr.h ../../dynet/training.h ../../dynet/rnn.h ../../examples/cpp/encdec/encdec.h ../../examples/cpp/rnnlm-batch/rnnlm-batch.h
INPUT =../../dynet/expr.h ../../dynet/training.h ../../dynet/rnn.h ../../examples/cpp/encdec/encdec.h ../../examples/cpp/rnnlm-batch/rnnlm-batch.h ../../examples/cpp/mnist/mlp.h

# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
Expand Down
11 changes: 10 additions & 1 deletion doc/source/examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,23 @@ Examples

Here are some simple models coded in the examples of Dynet. Feel free to use and modify them.

Feed-forward models
-------------------

Although Dynet was primarily built for natural language processing purposes it is still possible to code feed-forward nets. Here are some bricks and examples to do so.

.. doxygengroup:: ffbuilders
:members:
:content-only:

Language models
---------------

Language modelling is one of the cornerstones of natural language processing. Dynet allows great flexibility in the creation of neural language models. Here are some examples.

.. doxygengroup:: lmbuilders
:members:
:content-only:
:members:

Sequence to sequence models
---------------------------
Expand Down
2 changes: 1 addition & 1 deletion doc/source/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ Guided examples in Python can be found below :
A more comprehensive tutorial can be found here_ (EMNLP 2016 tutorial).


.. _here: https://github.com/clab/dynet_tutorial_examples
.. _here: https://github.com/clab/dynet_tutorial_examples
28 changes: 28 additions & 0 deletions examples/cpp/mnist/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# MNIST example

Here's an example usage of dynet for the "Hello World" of deep learning : MNIST digit classification

## Usage

First, download the MNIST dataset from the [official website](http://yann.lecun.com/exdb/mnist/) and decompress it.

wget -O - http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz | gunzip > train-images.idx3-ubyte
wget -O - http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz | gunzip > train-labels.idx1-ubyte
wget -O - http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz | gunzip > t10k-images.idx3-ubyte
wget -O - http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz | gunzip > t10k-labels.idx1-ubyte

Then, run the training (here for a batch size of 128 and 20 epochs) :

./train_mnist \
--train train-images.idx3-ubyte \
--train_labels train-labels.idx1-ubyte \
--dev t10k-images.idx3-ubyte \
--dev_labels t10k-labels.idx1-ubyte \
--batch_size 128 \
--num_epochs 20

## Benchmark

System | Speed | Test accuracy (after 20 epochs)
------------ | ------------- | -------------
Intel® Core™ i5-4200H CPU @ 2.80GHz × 4 | ~7±0.5 s per epoch| 97.84 %
299 changes: 299 additions & 0 deletions examples/cpp/mnist/mlp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,299 @@
#ifndef MLP_H
#define MLP_H

/**
* \file rnnlm-batch.h
* \defgroup ffbuilders ffbuilders
* \brief Feed forward nets builders
*
* An example implementation of a simple multilayer perceptron
*
*/

#include "dynet/nodes.h"
#include "dynet/dynet.h"
#include "dynet/training.h"
#include "dynet/timing.h"
#include "dynet/expr.h"
#include "dynet/io-macros.h"

#include <boost/serialization/utility.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/archive/binary_iarchive.hpp>
#include <boost/archive/binary_oarchive.hpp>
#include <boost/archive/text_iarchive.hpp>
#include <boost/archive/text_oarchive.hpp>

#include <iostream>
#include <fstream>
#include <sstream>
#include <algorithm>

using namespace std;
using namespace dynet;
using namespace dynet::expr;
/**
* \ingroup ffbuilders
* Common activation functions used in multilayer perceptrons
*/
enum Activation {
SIGMOID, /**< `SIGMOID` : Sigmoid function \f$x\longrightarrow \frac {1} {1+e^{-x}}\f$ */
TANH, /**< `TANH` : Tanh function \f$x\longrightarrow \frac {1-e^{-2x}} {1+e^{-2x}}\f$ */
RELU, /**< `RELU` : Rectified linear unit \f$x\longrightarrow \max(0,x)\f$ */
LINEAR, /**< `LINEAR` : Identity function \f$x\longrightarrow x\f$ */
SOFTMAX /**< `SOFTMAX` : Softmax function \f$\textbf{x}=(x_i)_{i=1,\dots,n}\longrightarrow \frac {e^{x_i}}{\sum_{j=1}^n e^{x_j} })_{i=1,\dots,n}\f$ */
};

/**
* \ingroup ffbuilders
* \struct Layer
* \brief Simple layer structure
* \details Contains all parameters defining a layer
*
*/
struct Layer {
public:
unsigned input_dim; /**< Input dimension */
unsigned output_dim; /**< Output dimension */
Activation activation = LINEAR; /**< Activation function */
float dropout_rate = 0; /**< Dropout rate */
/**
* \brief Build a feed forward layer
*
* \param input_dim Input dimension
* \param output_dim Output dimension
* \param activation Activation function
* \param dropout_rate Dropout rate
*/
Layer(unsigned input_dim, unsigned output_dim, Activation activation, float dropout_rate) :
input_dim(input_dim),
output_dim(output_dim),
activation(activation),
dropout_rate(dropout_rate) {};
Layer() {};
private:
friend class boost::serialization::access;
template<class Archive>
void serialize(Archive & ar, const unsigned int) {
ar & input_dim & output_dim & activation & dropout_rate;
}
};
DYNET_SERIALIZE_IMPL(Layer);

/**
* \ingroup ffbuilders
* \struct MLP
* \brief Simple multilayer perceptron
*
*/
struct MLP {
protected:
// Hyper-parameters
unsigned LAYERS = 0;

// Layers
vector<Layer> layers;
// Parameters
vector<vector<Parameter>> params;

bool dropout_active = true;

public:
/**
* \brief Default constructor
* \details Dont forget to add layers!
*/
MLP(Model & model) {
LAYERS = 0;
}
/**
* \brief Returns a Multilayer perceptron
* \details Creates a feedforward multilayer perceptron based on a list of layer descriptions
*
* \param model Model to contain parameters
* \param layers Layers description
*/
MLP(Model& model,
vector<Layer> layers) {
// Verify layers compatibility
for (unsigned l = 0; l < layers.size() - 1; ++l) {
if (layers[l].output_dim != layers[l + 1].input_dim)
throw invalid_argument("Layer dimensions don't match");
}

// Register parameters in model
for (Layer layer : layers) {
append(model, layer);
}
}

/**
* \brief Append a layer at the end of the network
* \details [long description]
*
* \param model [description]
* \param layer [description]
*/
void append(Model& model, Layer layer) {
// Check compatibility
if (LAYERS > 0)
if (layers[LAYERS - 1].output_dim != layer.input_dim)
throw invalid_argument("Layer dimensions don't match");

// Add to layers
layers.push_back(layer);
LAYERS++;
// Register parameters
Parameter W = model.add_parameters({layer.output_dim, layer.input_dim});
Parameter b = model.add_parameters({layer.output_dim});
params.push_back({W, b});
}

/**
* \brief Run the MLP on an input vector/batch
*
* \param x Input expression (vector or batch)
* \param cg Computation graph
*
* \return [description]
*/
Expression run(Expression x,
ComputationGraph& cg) {
// Expression for the current hidden state
Expression h_cur = x;
for (unsigned l = 0; l < LAYERS; ++l) {
// Initialize parameters in computation graph
Expression W = parameter(cg, params[l][0]);
Expression b = parameter(cg, params[l][1]);
// Aplly affine transform
Expression a = affine_transform({b, W, h_cur});
// Apply activation function
Expression h = activate(a, layers[l].activation);
// Take care of dropout
Expression h_dropped;
if (layers[l].dropout_rate > 0) {
if (dropout_active) {
// During training, drop random units
Expression mask = random_bernoulli(cg, {layers[l].output_dim}, 1 - layers[l].dropout_rate);
h_dropped = cmult(h, mask);
} else {
// At test time, multiply by the retention rate to scale
h_dropped = h * (1 - layers[l].dropout_rate);
}
} else {
// If there's no dropout, don't do anything
h_dropped = h;
}
// Set current hidden state
h_cur = h_dropped;
}

return h_cur;
}

/**
* \brief Return the negative log likelihood for the (batched) pair (x,y)
* \details For a batched input \f$\{x_i\}_{i=1,\dots,N}\f$, \f$\{y_i\}_{i=1,\dots,N}\f$, this computes \f$\sum_{i=1}^N \log(P(y_i\vert x_i))\f$ where \f$P(\textbf{y}\vert x_i)\f$ is modelled with $\mathrm{softmax}(MLP(x_i))$
*
* \param x Input batch
* \param labels Output labels
* \param cg Computation graph
* \return Expression for the negative log likelihood on the batch
*/
Expression get_nll(Expression x,
vector<unsigned> labels,
ComputationGraph& cg) {
// compute output
Expression y = run(x, cg);
// Do softmax
Expression losses = pickneglogsoftmax(y, labels);
// Sum across batches
return sum_batches(losses);
}

/**
* \brief Predict the most probable label
* \details Returns the argmax of the softmax of the networks output
*
* \param x Input
* \param cg Computation graph
*
* \return Label index
*/
int predict(Expression x,
ComputationGraph& cg) {
// run MLP to get class distribution
Expression y = run(x, cg);
// Get values
vector<float> probs = as_vector(cg.forward(y));
// Get argmax
unsigned argmax = 0;
for (unsigned i = 1; i < probs.size(); ++i) {
if (probs[i] > probs[argmax])
argmax = i;
}

return argmax;
}

/**
* \brief Enable dropout
* \details This is supposed to be used during training or during testing if you want to sample outputs using montecarlo
*/
void enable_dropout() {
dropout_active = true;
}

/**
* \brief Disable dropout
* \details Do this during testing if you want a deterministic network
*/
void disable_dropout() {
dropout_active = false;
}

/**
* \brief Check wether dropout is enabled or not
*
* \return Dropout state
*/
bool is_dropout_enabled() {
return dropout_active;
}

private:
inline Expression activate(Expression h, Activation f) {
switch (f) {
case LINEAR:
return h;
break;
case RELU:
return rectify(h);
break;
case SIGMOID:
return logistic(h);
break;
case TANH:
return tanh(h);
break;
case SOFTMAX:
return softmax(h);
break;
default:
throw invalid_argument("Unknown activation function");
break;
}
}

friend class boost::serialization::access;
template<class Archive>
void serialize(Archive & ar, const unsigned int) {
ar & LAYERS;
ar & layers & params;
ar & dropout_active;
}


};

#endif
Loading