apache · gigasquid · Nov 16, 2018 · Nov 14, 2018
@@ -273,8 +273,10 @@ def grad(heads, variables, head_grads=None, retain_graph=None, create_graph=Fals
     returned as new NDArrays instead of stored into `variable.grad`.
     Supports recording gradient graph for computing higher order gradients.
 
-    .. Note: Currently only a very limited set of operators support higher order
-    gradients.
+    .. note::
+
+      Currently only a very limited set of operators support higher order \
+      gradients.
 
     Parameters
     ----------

@@ -326,8 +326,7 @@ def save_parameters(self, filename):
 
         References
         ----------
-        `Saving and Loading Gluon Models
-
+        `Saving and Loading Gluon Models \
         <https://mxnet.incubator.apache.org/tutorials/gluon/save_load_params.html>`_
         """
         params = self._collect_params_with_prefix()
@@ -372,8 +371,7 @@ def load_parameters(self, filename, ctx=None, allow_missing=False,
 
         References
         ----------
-        `Saving and Loading Gluon Models
-
+        `Saving and Loading Gluon Models \
         <https://mxnet.incubator.apache.org/tutorials/gluon/save_load_params.html>`_
         """
         loaded = ndarray.load(filename)

@@ -27,7 +27,7 @@
 from ...nn import Sequential, HybridSequential, BatchNorm
 
 class Concurrent(Sequential):
-    """Lays `Block`s concurrently.
+    """Lays `Block` s concurrently.
 
     This block feeds its input to all children blocks, and
     produce the output by concatenating all the children blocks' outputs
@@ -60,7 +60,7 @@ def forward(self, x):
 
 
 class HybridConcurrent(HybridSequential):
-    """Lays `HybridBlock`s concurrently.
+    """Lays `HybridBlock` s concurrently.
 
     This block feeds its input to all children blocks, and
     produce the output by concatenating all the children blocks' outputs

@@ -26,7 +26,7 @@
 class VariationalDropoutCell(ModifierCell):
     """
     Applies Variational Dropout on base cell.
-    (https://arxiv.org/pdf/1512.05287.pdf,
+    (https://arxiv.org/pdf/1512.05287.pdf, \
      https://www.stat.berkeley.edu/~tsmoon/files/Conference/asru2015.pdf).
 
     Variational dropout uses the same dropout mask across time-steps. It can be applied to RNN
@@ -197,24 +197,29 @@ def unroll(self, length, inputs, begin_state=None, layout='NTC', merge_outputs=N
 class LSTMPCell(HybridRecurrentCell):
     r"""Long-Short Term Memory Projected (LSTMP) network cell.
     (https://arxiv.org/abs/1402.1128)
+
     Each call computes the following function:
+
     .. math::
         \begin{array}{ll}
         i_t = sigmoid(W_{ii} x_t + b_{ii} + W_{ri} r_{(t-1)} + b_{ri}) \\
         f_t = sigmoid(W_{if} x_t + b_{if} + W_{rf} r_{(t-1)} + b_{rf}) \\
-        g_t = \tanh(W_{ig} x_t + b_{ig} + W_{rc} r_{(t-1)} + b_{rg}}) \\
+        g_t = \tanh(W_{ig} x_t + b_{ig} + W_{rc} r_{(t-1)} + b_{rg}) \\
         o_t = sigmoid(W_{io} x_t + b_{io} + W_{ro} r_{(t-1)} + b_{ro}) \\
         c_t = f_t * c_{(t-1)} + i_t * g_t \\
         h_t = o_t * \tanh(c_t) \\
         r_t = W_{hr} h_t
         \end{array}
+
     where :math:`r_t` is the projected recurrent activation at time `t`,
-    math:`h_t` is the hidden state at time `t`, :math:`c_t` is the
+    :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the
     cell state at time `t`, :math:`x_t` is the input at time `t`, and :math:`i_t`,
     :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, cell, and
     out gates, respectively.
+
     Parameters
     ----------
+
     hidden_size : int
         Number of units in cell state symbol.
     projection_size : int
@@ -234,7 +239,7 @@ class LSTMPCell(HybridRecurrentCell):
         to zero.
     h2h_bias_initializer : str or Initializer
         Initializer for the bias vector.
-    prefix : str, default 'lstmp_'
+    prefix : str, default ``'lstmp_``'
         Prefix for name of `Block`s
         (and name of weight if params is `None`).
     params : Parameter or None

@@ -45,8 +45,7 @@ class MNIST(dataset._DownloadedDataset):
     train : bool, default True
         Whether to load the training or testing set.
     transform : function, default None
-        A user defined callback that transforms each sample. For example:
-    ::
+        A user defined callback that transforms each sample. For example::
 
         transform=lambda data, label: (data.astype(np.float32)/255, label)
 
@@ -106,8 +105,7 @@ class FashionMNIST(MNIST):
     train : bool, default True
         Whether to load the training or testing set.
     transform : function, default None
-        A user defined callback that transforms each sample. For example:
-    ::
+        A user defined callback that transforms each sample. For example::
 
         transform=lambda data, label: (data.astype(np.float32)/255, label)
 
@@ -139,8 +137,7 @@ class CIFAR10(dataset._DownloadedDataset):
     train : bool, default True
         Whether to load the training or testing set.
     transform : function, default None
-        A user defined callback that transforms each sample. For example:
-    ::
+        A user defined callback that transforms each sample. For example::
 
         transform=lambda data, label: (data.astype(np.float32)/255, label)
 
@@ -204,8 +201,7 @@ class CIFAR100(CIFAR10):
     train : bool, default True
         Whether to load the training or testing set.
     transform : function, default None
-        A user defined callback that transforms each sample. For example:
-    ::
+        A user defined callback that transforms each sample. For example::
 
         transform=lambda data, label: (data.astype(np.float32)/255, label)
 
@@ -242,8 +238,7 @@ class ImageRecordDataset(dataset.RecordFileDataset):
 
         If 1, always convert images to colored (RGB).
     transform : function, default None
-        A user defined callback that transforms each sample. For example:
-    ::
+        A user defined callback that transforms each sample. For example::
 
         transform=lambda data, label: (data.astype(np.float32)/255, label)
 
@@ -279,8 +274,7 @@ class ImageFolderDataset(dataset.Dataset):
         If 0, always convert loaded images to greyscale (1 channel).
         If 1, always convert loaded images to colored (3 channels).
     transform : callable, default None
-        A function that takes data and label and transforms them:
-    ::
+        A function that takes data and label and transforms them::
 
         transform = lambda data, label: (data.astype(np.float32)/255, label)
 

@@ -621,7 +621,7 @@ class LogisticLoss(Loss):
 
     where `pred` is the classifier prediction and `label` is the target tensor
     containing values -1 or 1 (0 or 1 if `label_format` is binary).
-     `pred` and `label` can have arbitrary shape as long as they have the same number of elements.
+    `pred` and `label` can have arbitrary shape as long as they have the same number of elements.
 
     Parameters
     ----------

@@ -75,7 +75,7 @@ def __len__(self):
         return len(self._children)
 
     def hybridize(self, active=True, **kwargs):
-        """Activates or deactivates `HybridBlock`s recursively. Has no effect on
+        """Activates or deactivates `HybridBlock` s recursively. Has no effect on
         non-hybrid children.
 
         Parameters

@@ -580,7 +580,7 @@ class Constant(Parameter):
     will not change during training. But you can still update their values
     manually with the `set_data` method.
 
-    `Constant`s can be created with either::
+    `Constant` s can be created with either::
 
         const = mx.gluon.Constant('const', [[1,2],[3,4]])
 

@@ -805,10 +805,12 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
     location : list or tuple or dict
         Argument values used as location to compute gradient
 
-        - if type is list of numpy.ndarray
+        - if type is list of numpy.ndarray, \
             inner elements should have the same order as mxnet.sym.list_arguments().
-        - if type is dict of str -> numpy.ndarray
+
+        - if type is dict of str -> numpy.ndarray, \
             maps the name of arguments to the corresponding numpy.ndarray.
+
         *In either case, value of all the arguments must be provided.*
     aux_states : list or tuple or dict, optional
         The auxiliary states required when generating the executor for the symbol.
@@ -829,7 +831,7 @@ def check_numeric_gradient(sym, location, aux_states=None, numeric_eps=1e-3, rto
 
     References
     ---------
-    ..[1] https://github.com/Theano/Theano/blob/master/theano/gradient.py
+    [1] https://github.com/Theano/Theano/blob/master/theano/gradient.py
     """
     assert dtype in (np.float16, np.float32, np.float64)
     # cannot use finite differences with small eps without high precision