Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added set_s function for rnns #127

Merged
merged 7 commits into from
Oct 28, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/doxygen/Doxyfile
Original file line number Diff line number Diff line change
Expand Up @@ -758,7 +758,7 @@ WARN_LOGFILE =
# spaces.
# Note: If this tag is empty the current directory is searched.

INPUT =../../dynet/expr.h ../../dynet/training.h
INPUT =../../dynet/expr.h ../../dynet/training.h ../../dynet/rnn.h

# This tag can be used to specify the character encoding of the source files
# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses
Expand Down
4 changes: 3 additions & 1 deletion doc/source/builders.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,6 @@ DyNet Builders
Builders combine together various operations to implement more
complicated things such as recurrent and LSTM networks

TODO: Create documentation
.. doxygengroup:: rnnbuilders
:members:
:content-only:
2 changes: 1 addition & 1 deletion dynet/expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* \defgroup operations
* \brief The various operations that you can use in building a DyNet graph
*
* TODO: Create documentation and explain expressions, etc...
* \details TODO: Create documentation and explain expressions, etc...
*/

#ifndef DYNET_EXPR_H
Expand Down
27 changes: 25 additions & 2 deletions dynet/fast-lstm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,40 @@ void FastLSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
}

// TO DO - Make this correct
// Copied c from the previous step (otherwise c.size()< h.size())
// Also is creating a new step something we want?
// wouldn't overwriting the current one be better?
Expression FastLSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
if (h_new.size()) { assert(h_new.size() == layers); }
const unsigned t = h.size();
h.push_back(vector<Expression>(layers));
c.push_back(vector<Expression>(layers));
for (unsigned i = 0; i < layers; ++i) {
Expression h_i = h_new[i];
Expression c_i = c[t - 1][i];
h[t][i] = h_i;
c[t][i] = c_i;
}
return h[t].back();
}
// Current implementation : s_new is either {new_c[0],...,new_c[n]}
// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
Expression FastLSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
if (s_new.size()) { assert(s_new.size() == layers || s_new.size() == 2 * layers ); }
bool only_c = s_new.size() == layers;
const unsigned t = c.size();
h.push_back(vector<Expression>(layers));
c.push_back(vector<Expression>(layers));
for (unsigned i = 0; i < layers; ++i) {
Expression y = h_new[i];
h[t][i] = y;
Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
Expression c_i = s_new[i];
h[t][i] = h_i;
c[t][i] = c_i;
}
return h[t].back();
}


Expression FastLSTMBuilder::add_input_impl(int prev, const Expression& x) {
h.push_back(vector<Expression>(layers));
c.push_back(vector<Expression>(layers));
Expand Down
1 change: 1 addition & 0 deletions dynet/fast-lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ struct FastLSTMBuilder : public RNNBuilder {
void start_new_sequence_impl(const std::vector<Expression>& h0) override;
Expression add_input_impl(int prev, const Expression& x) override;
Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;

public:
// first index is layer, then ...
Expand Down
41 changes: 25 additions & 16 deletions dynet/gru.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,19 @@ void GRUBuilder::new_graph_impl(ComputationGraph& cg) {
auto& p = params[i];

// z
Expression x2z = parameter(cg,p[X2Z]);
Expression h2z = parameter(cg,p[H2Z]);
Expression bz = parameter(cg,p[BZ]);
Expression x2z = parameter(cg, p[X2Z]);
Expression h2z = parameter(cg, p[H2Z]);
Expression bz = parameter(cg, p[BZ]);

// r
Expression x2r = parameter(cg,p[X2R]);
Expression h2r = parameter(cg,p[H2R]);
Expression br = parameter(cg,p[BR]);
Expression x2r = parameter(cg, p[X2R]);
Expression h2r = parameter(cg, p[H2R]);
Expression br = parameter(cg, p[BR]);

// h
Expression x2h = parameter(cg,p[X2H]);
Expression h2h = parameter(cg,p[H2H]);
Expression bh = parameter(cg,p[BH]);
Expression x2h = parameter(cg, p[X2H]);
Expression h2h = parameter(cg, p[H2H]);
Expression bh = parameter(cg, p[BH]);

vector<Expression> vars = {x2z, h2z, bz, x2r, h2r, br, x2h, h2h, bh};
param_vars.push_back(vars);
Expand All @@ -75,20 +75,29 @@ void GRUBuilder::start_new_sequence_impl(const std::vector<Expression>& h_0) {
}
}

// TO DO - Make this correct
// Copied c from the previous step (otherwise c.size()< h.size())
// Also is creating a new step something we want?
// wouldn't overwriting the current one be better?
Expression GRUBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
if (h_new.size()) { assert(h_new.size() == layers); }
const unsigned t = h.size();
h.push_back(vector<Expression>(layers));
for (unsigned i = 0; i < layers; ++i) {
Expression y = h_new[i];
h[t][i] = y;
Expression h_i = h_new[i];
h[t][i] = h_i;
}
return h[t].back();
}
// Current implementation : s_new is either {new_c[0],...,new_c[n]}
// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
Expression GRUBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
return set_h_impl(prev, s_new);
}

Expression GRUBuilder::add_input_impl(int prev, const Expression& x) {
//if(dropout_rate != 0.f)
//throw std::runtime_error("GRUBuilder doesn't support dropout yet");
//if(dropout_rate != 0.f)
//throw std::runtime_error("GRUBuilder doesn't support dropout yet");
const bool has_initial_state = (h0.size() > 0);
h.push_back(vector<Expression>(layers));
vector<Expression>& ht = h.back();
Expand Down Expand Up @@ -142,9 +151,9 @@ Expression GRUBuilder::add_input_impl(int prev, const Expression& x) {
void GRUBuilder::copy(const RNNBuilder & rnn) {
const GRUBuilder & rnn_gru = (const GRUBuilder&)rnn;
assert(params.size() == rnn_gru.params.size());
for(size_t i = 0; i < params.size(); ++i)
for(size_t j = 0; j < params[i].size(); ++j)
params[i][j] = rnn_gru.params[i][j];
for (size_t i = 0; i < params.size(); ++i)
for (size_t j = 0; j < params[i].size(); ++j)
params[i][j] = rnn_gru.params[i][j];
}

} // namespace dynet
1 change: 1 addition & 0 deletions dynet/gru.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ struct GRUBuilder : public RNNBuilder {
void start_new_sequence_impl(const std::vector<Expression>& h0) override;
Expression add_input_impl(int prev, const Expression& x) override;
Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;
Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;

// first index is layer, then ...
std::vector<std::vector<Parameter>> params;
Expand Down
72 changes: 47 additions & 25 deletions dynet/lstm.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,29 +48,29 @@ LSTMBuilder::LSTMBuilder(unsigned layers,
vector<Parameter> ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc};
params.push_back(ps);
} // layers
dropout_rate = 0.f;
dropout_rate = 0.f;
}

void LSTMBuilder::new_graph_impl(ComputationGraph& cg){
void LSTMBuilder::new_graph_impl(ComputationGraph& cg) {
param_vars.clear();

for (unsigned i = 0; i < layers; ++i){
for (unsigned i = 0; i < layers; ++i) {
auto& p = params[i];

//i
Expression i_x2i = parameter(cg,p[X2I]);
Expression i_h2i = parameter(cg,p[H2I]);
Expression i_c2i = parameter(cg,p[C2I]);
Expression i_bi = parameter(cg,p[BI]);
Expression i_x2i = parameter(cg, p[X2I]);
Expression i_h2i = parameter(cg, p[H2I]);
Expression i_c2i = parameter(cg, p[C2I]);
Expression i_bi = parameter(cg, p[BI]);
//o
Expression i_x2o = parameter(cg,p[X2O]);
Expression i_h2o = parameter(cg,p[H2O]);
Expression i_c2o = parameter(cg,p[C2O]);
Expression i_bo = parameter(cg,p[BO]);
Expression i_x2o = parameter(cg, p[X2O]);
Expression i_h2o = parameter(cg, p[H2O]);
Expression i_c2o = parameter(cg, p[C2O]);
Expression i_bo = parameter(cg, p[BO]);
//c
Expression i_x2c = parameter(cg,p[X2C]);
Expression i_h2c = parameter(cg,p[H2C]);
Expression i_bc = parameter(cg,p[BC]);
Expression i_x2c = parameter(cg, p[X2C]);
Expression i_h2c = parameter(cg, p[H2C]);
Expression i_bc = parameter(cg, p[BC]);

vector<Expression> vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc};
param_vars.push_back(vars);
Expand All @@ -83,7 +83,7 @@ void LSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
h.clear();
c.clear();
if (hinit.size() > 0) {
assert(layers*2 == hinit.size());
assert(layers * 2 == hinit.size());
h0.resize(layers);
c0.resize(layers);
for (unsigned i = 0; i < layers; ++i) {
Expand All @@ -97,13 +97,35 @@ void LSTMBuilder::start_new_sequence_impl(const vector<Expression>& hinit) {
}

// TO DO - Make this correct
// Copied c from the previous step (otherwise c.size()< h.size())
// Also is creating a new step something we want?
// wouldn't overwriting the current one be better?
Expression LSTMBuilder::set_h_impl(int prev, const vector<Expression>& h_new) {
if (h_new.size()) { assert(h_new.size() == layers); }
const unsigned t = h.size();
h.push_back(vector<Expression>(layers));
c.push_back(vector<Expression>(layers));
for (unsigned i = 0; i < layers; ++i) {
Expression h_i = h_new[i];
Expression c_i = c[t - 1][i];
h[t][i] = h_i;
c[t][i] = c_i;
}
return h[t].back();
}
// Current implementation : s_new is either {new_c[0],...,new_c[n]}
// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]}
Expression LSTMBuilder::set_s_impl(int prev, const std::vector<Expression>& s_new) {
if (s_new.size()) { assert(s_new.size() == layers || s_new.size() == 2 * layers ); }
bool only_c = s_new.size() == layers;
const unsigned t = c.size();
h.push_back(vector<Expression>(layers));
c.push_back(vector<Expression>(layers));
for (unsigned i = 0; i < layers; ++i) {
Expression y = h_new[i];
h[t][i] = y;
Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers];
Expression c_i = s_new[i];
h[t][i] = h_i;
c[t][i] = c_i;
}
return h[t].back();
}
Expand Down Expand Up @@ -149,11 +171,11 @@ Expression LSTMBuilder::add_input_impl(int prev, const Expression& x) {
Expression i_wt = tanh(i_awt);
// output
if (has_prev_state) {
Expression i_nwt = cwise_multiply(i_it,i_wt);
Expression i_crt = cwise_multiply(i_ft,i_c_tm1);
Expression i_nwt = cwise_multiply(i_it, i_wt);
Expression i_crt = cwise_multiply(i_ft, i_c_tm1);
ct[i] = i_crt + i_nwt;
} else {
ct[i] = cwise_multiply(i_it,i_wt);
ct[i] = cwise_multiply(i_it, i_wt);
}

Expression i_aot;
Expand All @@ -163,18 +185,18 @@ Expression LSTMBuilder::add_input_impl(int prev, const Expression& x) {
i_aot = affine_transform({vars[BO], vars[X2O], in, vars[C2O], ct[i]});
Expression i_ot = logistic(i_aot);
Expression ph_t = tanh(ct[i]);
in = ht[i] = cwise_multiply(i_ot,ph_t);
in = ht[i] = cwise_multiply(i_ot, ph_t);
}
if (dropout_rate) return dropout(ht.back(), dropout_rate);
else return ht.back();
else return ht.back();
}

void LSTMBuilder::copy(const RNNBuilder & rnn) {
const LSTMBuilder & rnn_lstm = (const LSTMBuilder&)rnn;
assert(params.size() == rnn_lstm.params.size());
for(size_t i = 0; i < params.size(); ++i)
for(size_t j = 0; j < params[i].size(); ++j)
params[i][j] = rnn_lstm.params[i][j];
for (size_t i = 0; i < params.size(); ++i)
for (size_t j = 0; j < params[i].size(); ++j)
params[i][j] = rnn_lstm.params[i][j];
}

void LSTMBuilder::save_parameters_pretraining(const string& fname) const {
Expand Down
2 changes: 1 addition & 1 deletion dynet/lstm.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ struct LSTMBuilder : public RNNBuilder {
void start_new_sequence_impl(const std::vector<Expression>& h0) override;
Expression add_input_impl(int prev, const Expression& x) override;
Expression set_h_impl(int prev, const std::vector<Expression>& h_new) override;

Expression set_s_impl(int prev, const std::vector<Expression>& s_new) override;
public:
// first index is layer, then ...
std::vector<std::vector<Parameter>> params;
Expand Down
Loading