diff --git a/doc/doxygen/Doxyfile b/doc/doxygen/Doxyfile index 526276130..1c1cf3b1a 100644 --- a/doc/doxygen/Doxyfile +++ b/doc/doxygen/Doxyfile @@ -758,7 +758,7 @@ WARN_LOGFILE = # spaces. # Note: If this tag is empty the current directory is searched. -INPUT =../../dynet/expr.h ../../dynet/training.h +INPUT =../../dynet/expr.h ../../dynet/training.h ../../dynet/rnn.h # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses diff --git a/doc/source/builders.rst b/doc/source/builders.rst index d6139ae1a..3ea25c8e2 100644 --- a/doc/source/builders.rst +++ b/doc/source/builders.rst @@ -4,4 +4,6 @@ DyNet Builders Builders combine together various operations to implement more complicated things such as recurrent and LSTM networks -TODO: Create documentation +.. doxygengroup:: rnnbuilders + :members: + :content-only: \ No newline at end of file diff --git a/dynet/expr.h b/dynet/expr.h index 8a155d5c8..b4672c315 100644 --- a/dynet/expr.h +++ b/dynet/expr.h @@ -3,7 +3,7 @@ * \defgroup operations * \brief The various operations that you can use in building a DyNet graph * - * TODO: Create documentation and explain expressions, etc... + * \details TODO: Create documentation and explain expressions, etc... */ #ifndef DYNET_EXPR_H diff --git a/dynet/fast-lstm.cc b/dynet/fast-lstm.cc index abc55e4a4..e9f693761 100644 --- a/dynet/fast-lstm.cc +++ b/dynet/fast-lstm.cc @@ -94,17 +94,40 @@ void FastLSTMBuilder::start_new_sequence_impl(const vector& hinit) { } // TO DO - Make this correct +// Copied c from the previous step (otherwise c.size()< h.size()) +// Also is creating a new step something we want? +// wouldn't overwriting the current one be better? Expression FastLSTMBuilder::set_h_impl(int prev, const vector& h_new) { if (h_new.size()) { assert(h_new.size() == layers); } const unsigned t = h.size(); h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); +} +// Current implementation : s_new is either {new_c[0],...,new_c[n]} +// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} +Expression FastLSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + if (s_new.size()) { assert(s_new.size() == layers || s_new.size() == 2 * layers ); } + bool only_c = s_new.size() == layers; + const unsigned t = c.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); for (unsigned i = 0; i < layers; ++i) { - Expression y = h_new[i]; - h[t][i] = y; + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; } return h[t].back(); } + Expression FastLSTMBuilder::add_input_impl(int prev, const Expression& x) { h.push_back(vector(layers)); c.push_back(vector(layers)); diff --git a/dynet/fast-lstm.h b/dynet/fast-lstm.h index 891b46021..16733a9e9 100644 --- a/dynet/fast-lstm.h +++ b/dynet/fast-lstm.h @@ -43,6 +43,7 @@ struct FastLSTMBuilder : public RNNBuilder { void start_new_sequence_impl(const std::vector& h0) override; Expression add_input_impl(int prev, const Expression& x) override; Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; public: // first index is layer, then ... diff --git a/dynet/gru.cc b/dynet/gru.cc index 85d944bdf..eff952c86 100644 --- a/dynet/gru.cc +++ b/dynet/gru.cc @@ -48,19 +48,19 @@ void GRUBuilder::new_graph_impl(ComputationGraph& cg) { auto& p = params[i]; // z - Expression x2z = parameter(cg,p[X2Z]); - Expression h2z = parameter(cg,p[H2Z]); - Expression bz = parameter(cg,p[BZ]); + Expression x2z = parameter(cg, p[X2Z]); + Expression h2z = parameter(cg, p[H2Z]); + Expression bz = parameter(cg, p[BZ]); // r - Expression x2r = parameter(cg,p[X2R]); - Expression h2r = parameter(cg,p[H2R]); - Expression br = parameter(cg,p[BR]); + Expression x2r = parameter(cg, p[X2R]); + Expression h2r = parameter(cg, p[H2R]); + Expression br = parameter(cg, p[BR]); // h - Expression x2h = parameter(cg,p[X2H]); - Expression h2h = parameter(cg,p[H2H]); - Expression bh = parameter(cg,p[BH]); + Expression x2h = parameter(cg, p[X2H]); + Expression h2h = parameter(cg, p[H2H]); + Expression bh = parameter(cg, p[BH]); vector vars = {x2z, h2z, bz, x2r, h2r, br, x2h, h2h, bh}; param_vars.push_back(vars); @@ -75,20 +75,29 @@ void GRUBuilder::start_new_sequence_impl(const std::vector& h_0) { } } +// TO DO - Make this correct +// Copied c from the previous step (otherwise c.size()< h.size()) +// Also is creating a new step something we want? +// wouldn't overwriting the current one be better? Expression GRUBuilder::set_h_impl(int prev, const vector& h_new) { if (h_new.size()) { assert(h_new.size() == layers); } const unsigned t = h.size(); h.push_back(vector(layers)); for (unsigned i = 0; i < layers; ++i) { - Expression y = h_new[i]; - h[t][i] = y; + Expression h_i = h_new[i]; + h[t][i] = h_i; } return h[t].back(); } +// Current implementation : s_new is either {new_c[0],...,new_c[n]} +// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} +Expression GRUBuilder::set_s_impl(int prev, const std::vector& s_new) { + return set_h_impl(prev, s_new); +} Expression GRUBuilder::add_input_impl(int prev, const Expression& x) { - //if(dropout_rate != 0.f) - //throw std::runtime_error("GRUBuilder doesn't support dropout yet"); + //if(dropout_rate != 0.f) + //throw std::runtime_error("GRUBuilder doesn't support dropout yet"); const bool has_initial_state = (h0.size() > 0); h.push_back(vector(layers)); vector& ht = h.back(); @@ -142,9 +151,9 @@ Expression GRUBuilder::add_input_impl(int prev, const Expression& x) { void GRUBuilder::copy(const RNNBuilder & rnn) { const GRUBuilder & rnn_gru = (const GRUBuilder&)rnn; assert(params.size() == rnn_gru.params.size()); - for(size_t i = 0; i < params.size(); ++i) - for(size_t j = 0; j < params[i].size(); ++j) - params[i][j] = rnn_gru.params[i][j]; + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_gru.params[i][j]; } } // namespace dynet diff --git a/dynet/gru.h b/dynet/gru.h index 79cf22a9d..8d907454a 100644 --- a/dynet/gru.h +++ b/dynet/gru.h @@ -28,6 +28,7 @@ struct GRUBuilder : public RNNBuilder { void start_new_sequence_impl(const std::vector& h0) override; Expression add_input_impl(int prev, const Expression& x) override; Expression set_h_impl(int prev, const std::vector& h_new) override; + Expression set_s_impl(int prev, const std::vector& s_new) override; // first index is layer, then ... std::vector> params; diff --git a/dynet/lstm.cc b/dynet/lstm.cc index a64ce8231..fe014ff2e 100644 --- a/dynet/lstm.cc +++ b/dynet/lstm.cc @@ -48,29 +48,29 @@ LSTMBuilder::LSTMBuilder(unsigned layers, vector ps = {p_x2i, p_h2i, p_c2i, p_bi, p_x2o, p_h2o, p_c2o, p_bo, p_x2c, p_h2c, p_bc}; params.push_back(ps); } // layers - dropout_rate = 0.f; + dropout_rate = 0.f; } -void LSTMBuilder::new_graph_impl(ComputationGraph& cg){ +void LSTMBuilder::new_graph_impl(ComputationGraph& cg) { param_vars.clear(); - for (unsigned i = 0; i < layers; ++i){ + for (unsigned i = 0; i < layers; ++i) { auto& p = params[i]; //i - Expression i_x2i = parameter(cg,p[X2I]); - Expression i_h2i = parameter(cg,p[H2I]); - Expression i_c2i = parameter(cg,p[C2I]); - Expression i_bi = parameter(cg,p[BI]); + Expression i_x2i = parameter(cg, p[X2I]); + Expression i_h2i = parameter(cg, p[H2I]); + Expression i_c2i = parameter(cg, p[C2I]); + Expression i_bi = parameter(cg, p[BI]); //o - Expression i_x2o = parameter(cg,p[X2O]); - Expression i_h2o = parameter(cg,p[H2O]); - Expression i_c2o = parameter(cg,p[C2O]); - Expression i_bo = parameter(cg,p[BO]); + Expression i_x2o = parameter(cg, p[X2O]); + Expression i_h2o = parameter(cg, p[H2O]); + Expression i_c2o = parameter(cg, p[C2O]); + Expression i_bo = parameter(cg, p[BO]); //c - Expression i_x2c = parameter(cg,p[X2C]); - Expression i_h2c = parameter(cg,p[H2C]); - Expression i_bc = parameter(cg,p[BC]); + Expression i_x2c = parameter(cg, p[X2C]); + Expression i_h2c = parameter(cg, p[H2C]); + Expression i_bc = parameter(cg, p[BC]); vector vars = {i_x2i, i_h2i, i_c2i, i_bi, i_x2o, i_h2o, i_c2o, i_bo, i_x2c, i_h2c, i_bc}; param_vars.push_back(vars); @@ -83,7 +83,7 @@ void LSTMBuilder::start_new_sequence_impl(const vector& hinit) { h.clear(); c.clear(); if (hinit.size() > 0) { - assert(layers*2 == hinit.size()); + assert(layers * 2 == hinit.size()); h0.resize(layers); c0.resize(layers); for (unsigned i = 0; i < layers; ++i) { @@ -97,13 +97,35 @@ void LSTMBuilder::start_new_sequence_impl(const vector& hinit) { } // TO DO - Make this correct +// Copied c from the previous step (otherwise c.size()< h.size()) +// Also is creating a new step something we want? +// wouldn't overwriting the current one be better? Expression LSTMBuilder::set_h_impl(int prev, const vector& h_new) { if (h_new.size()) { assert(h_new.size() == layers); } const unsigned t = h.size(); h.push_back(vector(layers)); + c.push_back(vector(layers)); + for (unsigned i = 0; i < layers; ++i) { + Expression h_i = h_new[i]; + Expression c_i = c[t - 1][i]; + h[t][i] = h_i; + c[t][i] = c_i; + } + return h[t].back(); +} +// Current implementation : s_new is either {new_c[0],...,new_c[n]} +// or {new_c[0],...,new_c[n],new_h[0],...,new_h[n]} +Expression LSTMBuilder::set_s_impl(int prev, const std::vector& s_new) { + if (s_new.size()) { assert(s_new.size() == layers || s_new.size() == 2 * layers ); } + bool only_c = s_new.size() == layers; + const unsigned t = c.size(); + h.push_back(vector(layers)); + c.push_back(vector(layers)); for (unsigned i = 0; i < layers; ++i) { - Expression y = h_new[i]; - h[t][i] = y; + Expression h_i = only_c ? h[t - 1][i] : s_new[i + layers]; + Expression c_i = s_new[i]; + h[t][i] = h_i; + c[t][i] = c_i; } return h[t].back(); } @@ -149,11 +171,11 @@ Expression LSTMBuilder::add_input_impl(int prev, const Expression& x) { Expression i_wt = tanh(i_awt); // output if (has_prev_state) { - Expression i_nwt = cwise_multiply(i_it,i_wt); - Expression i_crt = cwise_multiply(i_ft,i_c_tm1); + Expression i_nwt = cwise_multiply(i_it, i_wt); + Expression i_crt = cwise_multiply(i_ft, i_c_tm1); ct[i] = i_crt + i_nwt; } else { - ct[i] = cwise_multiply(i_it,i_wt); + ct[i] = cwise_multiply(i_it, i_wt); } Expression i_aot; @@ -163,18 +185,18 @@ Expression LSTMBuilder::add_input_impl(int prev, const Expression& x) { i_aot = affine_transform({vars[BO], vars[X2O], in, vars[C2O], ct[i]}); Expression i_ot = logistic(i_aot); Expression ph_t = tanh(ct[i]); - in = ht[i] = cwise_multiply(i_ot,ph_t); + in = ht[i] = cwise_multiply(i_ot, ph_t); } if (dropout_rate) return dropout(ht.back(), dropout_rate); - else return ht.back(); + else return ht.back(); } void LSTMBuilder::copy(const RNNBuilder & rnn) { const LSTMBuilder & rnn_lstm = (const LSTMBuilder&)rnn; assert(params.size() == rnn_lstm.params.size()); - for(size_t i = 0; i < params.size(); ++i) - for(size_t j = 0; j < params[i].size(); ++j) - params[i][j] = rnn_lstm.params[i][j]; + for (size_t i = 0; i < params.size(); ++i) + for (size_t j = 0; j < params[i].size(); ++j) + params[i][j] = rnn_lstm.params[i][j]; } void LSTMBuilder::save_parameters_pretraining(const string& fname) const { diff --git a/dynet/lstm.h b/dynet/lstm.h index 64a9c647c..ce92e153b 100644 --- a/dynet/lstm.h +++ b/dynet/lstm.h @@ -43,7 +43,7 @@ struct LSTMBuilder : public RNNBuilder { void start_new_sequence_impl(const std::vector& h0) override; Expression add_input_impl(int prev, const Expression& x) override; Expression set_h_impl(int prev, const std::vector& h_new) override; - + Expression set_s_impl(int prev, const std::vector& s_new) override; public: // first index is layer, then ... std::vector> params; diff --git a/dynet/rnn.h b/dynet/rnn.h index e49322bdb..d2d9497da 100644 --- a/dynet/rnn.h +++ b/dynet/rnn.h @@ -1,3 +1,11 @@ +/** + * \file rnn.h + * \defgroup rnnbuilders + * \brief Helper structures to build recurrent units + * + * \details TODO: Create documentation and explain rnns, etc... + */ + #ifndef DYNET_RNN_H_ #define DYNET_RNN_H_ @@ -18,41 +26,100 @@ inline void swap(RNNPointer& i1, RNNPointer& i2) { RNNPointer t = i1; i1 = i2; i2 = t; } -// interface for constructing an RNN, LSTM, GRU, etc. +/** + * \ingroup rnnbuilders + * \brief interface for constructing an RNN, LSTM, GRU, etc. + * \details [long description] + */ struct RNNBuilder { + /** + * + * \brief Default constructor + */ RNNBuilder() : cur(-1) {} virtual ~RNNBuilder(); + /** + * + * \brief Get pointer to the current state + * + * \return Pointer to the current state + */ RNNPointer state() const { return cur; } - // call this to reset the builder when you are working with a newly - // created ComputationGraph object + /** + * + * \brief Initialize with new computation graph + * \details call this to reset the builder when you are working with a newly + * created ComputationGraph object + * + * \param cg Computation graph + */ void new_graph(ComputationGraph& cg) { sm.transition(RNNOp::new_graph); new_graph_impl(cg); } - // Reset for new sequence - // call this before add_input and after new_graph, - // when starting a new sequence on the same hypergraph. - // h_0 is used to initialize hidden layers at timestep 0 to given values - void start_new_sequence(const std::vector& h_0={}) { + /** + * + * \brief Reset for new sequence + * \details call this before add_input and after new_graph, + * when starting a new sequence on the same hypergraph. + * + * \param h_0 `h_0` is used to initialize hidden layers at timestep 0 to given values + */ + void start_new_sequence(const std::vector& h_0 = {}) { sm.transition(RNNOp::start_new_sequence); cur = RNNPointer(-1); head.clear(); start_new_sequence_impl(h_0); } - // explicitly set the output state of a node - Expression set_h(const RNNPointer& prev, const std::vector& h_new={}) { + // + /** + * + * \brief Explicitly set the output state of a node + * + * \param prev Pointer to the previous state + * \param h_new The new hidden state + * + * \return The hidden representation of the deepest layer + */ + Expression set_h(const RNNPointer& prev, const std::vector& h_new = {}) { sm.transition(RNNOp::add_input); head.push_back(prev); cur = head.size() - 1; return set_h_impl(prev, h_new); } - // add another timestep by reading in the variable x - // return the hidden representation of the deepest layer + // + /** + * + * \brief Set the internal state of a node (for lstms/grus) + * \details For RNNs without internal states (SimpleRNN, GRU...), + * this has the same behaviour as `set_h` + * + * \param prev Pointer to the previous state + * \param s_new The new state. Can be `{new_c[0],...,new_c[n]}` + * or `{new_c[0],...,new_c[n], new_h[0],...,new_h[n]}` + * + * \return The hidden representation of the deepest layer + */ + Expression set_s(const RNNPointer& prev, const std::vector& s_new = {}) { + sm.transition(RNNOp::add_input); + head.push_back(prev); + cur = head.size() - 1; + return set_s_impl(prev, s_new); + } + + /** + * + * \brief Add another timestep by reading in the variable x + * + * \param x Input variable + * + * \return The hidden representation of the deepest layer + */ Expression add_input(const Expression& x) { sm.transition(RNNOp::add_input); head.push_back(cur); @@ -61,9 +128,18 @@ struct RNNBuilder { return add_input_impl(rcp, x); } - // add another timestep, but define recurrent connection to prev - // rather than to head[cur] - // this can be used to construct trees, implement beam search, etc. + /** + * + * \brief Add another timestep, with arbitrary recurrent connection. + * \details This allows to define a recurrent connection to `prev` + * rather than to head[cur]. + * This can be used to construct trees, implement beam search, etc. + * + * \param prev Pointer to the previous state + * \param x Input variable + * + * \return The hidden representation of the deepest layer + */ Expression add_input(const RNNPointer& prev, const Expression& x) { sm.transition(RNNOp::add_input); head.push_back(prev); @@ -71,48 +147,128 @@ struct RNNBuilder { return add_input_impl(prev, x); } - // rewind the last timestep - this DOES NOT remove the variables - // from the computation graph, it just means the next time step will - // see a different previous state. You can remind as many times as - // you want. + /** + * + * \brief Rewind the last timestep + * \details - this DOES NOT remove the variables from the computation graph, + * it just means the next time step will see a different previous state. + * You can remind as many times as you want. + */ void rewind_one_step() { cur = head[cur]; } - // Set dropout. In general, you should disable dropout at test time + /** + * + * \brief Set Dropout + * + * \param d Dropout rate + */ void set_dropout(float d) { dropout_rate = d; } + /** + * + * \brief Disable Dropout + * \details In general, you should disable dropout at test time + */ void disable_dropout() { dropout_rate = 0; } - // returns node (index) of most recent output + /** + * + * \brief Returns node (index) of most recent output + * + * \return Node (index) of most recent output + */ virtual Expression back() const = 0; - // access the final output of each hidden layer + /** + * + * \brief Access the final output of each hidden layer + * + * \return Final output of each hidden layer + */ virtual std::vector final_h() const = 0; + /** + * + * \brief Access the output of any hidden layer + * + * \param i Pointer to the step which output you want to access + * + * \return Output of each hidden layer at the given step + */ virtual std::vector get_h(RNNPointer i) const = 0; - // access the state of each hidden layer, in a format that can be used in - // start_new_sequence + + /** + * + * \brief Access the final state of each hidden layer + * \details This returns the state of each hidden layer, + * in a format that can be used in start_new_sequence + * (i.e. including any internal cell for LSTMs and the likes) + * + * \return vector containing, if it exists, the list of final + * internal states, followed by the list of final outputs for + * each layer + */ virtual std::vector final_s() const = 0; - virtual unsigned num_h0_components() const = 0; + /** + * + * \brief Access the state of any hidden layer + * \details See `final_s` for details + * + * \param i Pointer to the step which state you want to access + * + * \return Internal state of each hidden layer at the given step + */ virtual std::vector get_s(RNNPointer i) const = 0; - // copy the parameters of another builder + + /** + * + * \brief Number of components in `h_0` + * + * \return Number of components in `h_0` + */ + virtual unsigned num_h0_components() const = 0; + /** + * + * \brief Copy the parameters of another builder. + * + * \param params RNNBuilder you want to copy parameters from. + */ virtual void copy(const RNNBuilder & params) = 0; - // the following functions save all the parameters associated with a particular - // RNNBuilder's derived class to a file. These should not be used to seralize - // models, they should only be used to load and save parameters for pretraining. - // If you are interested in serializing models, use the boost serialization - // API against your model class + /** + * + * \brief This function saves all the parameters associated with + * a particular RNNBuilder's derived class to a file. + * \details This should not be used to seralize models, it should + * only be used to save parameters for pretraining. + * If you are interested in serializing models, use the boost + * serialization API against your model class. + * + * \param fname File you want to save your model to. + */ virtual void save_parameters_pretraining(const std::string& fname) const; + /** + * + * \brief Loads all the parameters associated with a particular RNNBuilder's + * derived class from a file. + * \details This should not be used to seralize models, it should + * only be used to load parameters from pretraining. + * If you are interested in serializing models, use the boost + * serialization API against your model class. + * + * \param fname File you want to read your model from. + */ virtual void load_parameters_pretraining(const std::string& fname); - protected: +protected: virtual void new_graph_impl(ComputationGraph& cg) = 0; virtual void start_new_sequence_impl(const std::vector& h_0) = 0; virtual Expression add_input_impl(int prev, const Expression& x) = 0; virtual Expression set_h_impl(int prev, const std::vector& h_new) = 0; + virtual Expression set_s_impl(int prev, const std::vector& c_new) = 0; RNNPointer cur; - float dropout_rate; - private: + float dropout_rate; +private: // the state machine ensures that the caller is behaving RNNStateMachine sm; std::vector head; // head[i] returns the head position @@ -122,21 +278,51 @@ struct RNNBuilder { void serialize(Archive& ar, const unsigned int); }; +/** + * \ingroup rnnbuilders + * \brief This provides a builder for the simplest RNN with tanh nonlinearity + * \details The equation for this RNN is : + * \f$h_t=\tanh(W_x x_t + W_h h_{t-1} + b)\f$ + * + */ struct SimpleRNNBuilder : public RNNBuilder { SimpleRNNBuilder() = default; + /** + * + * \brief Builds a simple RNN + * + * \param layers Number of layers + * \param input_dim Dimension of the input + * \param hidden_dim Hiddent layer (and output) size + * \param model Model holding the parameters + * \param support_lags Allow for auxiliary output? + */ explicit SimpleRNNBuilder(unsigned layers, unsigned input_dim, unsigned hidden_dim, Model* model, - bool support_lags=false); + bool support_lags = false); - protected: +protected: void new_graph_impl(ComputationGraph& cg) override; void start_new_sequence_impl(const std::vector& h_0) override; Expression add_input_impl(int prev, const Expression& x) override; Expression set_h_impl(int prev, const std::vector& h_new) override; - - public: + Expression set_s_impl(int prev, const std::vector& s_new) {return set_h_impl(prev, s_new);} + +public: + /** + * + * \brief Add auxiliary output + * \details Returns \f$h_t=\tanh(W_x x_t + W_h h_{t-1} + W_y y + b)\f$ + * where \f$y\f$ is an auxiliary output + * TODO : clarify + * + * \param x Input expression + * \param aux Auxiliary output expression + * + * \return The hidden representation of the deepest layer + */ Expression add_auxiliary_input(const Expression& x, const Expression &aux); Expression back() const override { return (cur == -1 ? h0.back() : h[cur].back()); } @@ -152,7 +338,7 @@ struct SimpleRNNBuilder : public RNNBuilder { void save_parameters_pretraining(const std::string& fname) const override; void load_parameters_pretraining(const std::string& fname) override; - private: +private: // first index is layer, then x2h h2h hb std::vector> params; @@ -179,13 +365,13 @@ struct SimpleRNNBuilder : public RNNBuilder { namespace boost { - namespace serialization { - template - void serialize(Archive& ar, dynet::RNNPointer& p, const unsigned int version) - { - ar & p.t; - } - } // namespace serialization +namespace serialization { +template +void serialize(Archive& ar, dynet::RNNPointer& p, const unsigned int version) +{ + ar & p.t; +} +} // namespace serialization } // namespace boost BOOST_CLASS_EXPORT_KEY(dynet::RNNBuilder) diff --git a/python/dynet.pxd b/python/dynet.pxd index a67b5cd88..07783f6aa 100644 --- a/python/dynet.pxd +++ b/python/dynet.pxd @@ -275,6 +275,7 @@ cdef extern from "dynet/rnn.h" namespace "dynet": CExpression add_input(CExpression &x) CExpression add_input(CRNNPointer prev, CExpression &x) CExpression set_h(CRNNPointer prev, vector[CExpression] ces) + CExpression set_s(CRNNPointer prev, vector[CExpression] ces) void rewind_one_step() CExpression back() vector[CExpression] final_h() diff --git a/python/dynet.pyx b/python/dynet.pyx index 8469e8540..502fa85d6 100644 --- a/python/dynet.pyx +++ b/python/dynet.pyx @@ -988,6 +988,16 @@ cdef class _RNNBuilder: # {{{ ces.push_back(e.c()) return Expression.from_cexpr(self.cg_version, self.thisptr.set_h(prev, ces)) + cdef set_s(self, CRNNPointer prev, es=None): + if self.cg_version != _cg.version(): raise ValueError("Using stale builder. Create .new_graph() after computation graph is renewed.") + cdef vector[CExpression] ces = vector[CExpression]() + cdef Expression e + if es: + for e in es: + ensure_freshness(e) + ces.push_back(e.c()) + return Expression.from_cexpr(self.cg_version, self.thisptr.set_s(prev, ces)) + cdef rewind_one_step(self): if self.cg_version != _cg.version(): raise ValueError("Using stale builder. Create .new_graph() after computation graph is renewed.") self.thisptr.rewind_one_step() @@ -1215,6 +1225,11 @@ cdef class RNNState: # {{{ cdef int state_idx = self.builder.thisptr.state() return RNNState(self.builder, state_idx, self, res) + cpdef RNNState set_s(self, es=None): + cdef Expression res = self.builder.set_s(CRNNPointer(self.state_idx), es) + cdef int state_idx = self.builder.thisptr.state() + return RNNState(self.builder, state_idx, self, res) + cpdef RNNState add_input(self, Expression x): cdef Expression res = self.builder.add_input_to_prev(CRNNPointer(self.state_idx), x) cdef int state_idx = self.builder.thisptr.state()