diff --git a/src/operator/pad-inl.h b/src/operator/pad-inl.h
index dc15ed786ae5..83a98fe9536d 100644
--- a/src/operator/pad-inl.h
+++ b/src/operator/pad-inl.h
@@ -37,21 +37,18 @@ struct PadParam : public dmlc::Parameter<PadParam> {
         .add_enum("constant", pad_enum::kConstant)
         .add_enum("edge", pad_enum::kEdge)
         .describe(
-            "Padding type to use. \"constant\" pads all values with a constant "
-            "value, the value of which can be specified with the "
-            "constant_value option. \"edge\" uses the boundary values of the "
-            "array as padding.");
+            "Padding type to use."
+            " \"constant\" pads with `constant_value` and"
+            " \"edge\" pads using the edge values of the input array.");
 
     DMLC_DECLARE_FIELD(pad_width).describe(
-        "A tuple of padding widths of length 2*r, where r is the rank of the "
-        "input tensor, specifying number of values padded to the edges of each "
-        "axis. (before_1, after_1, ... , before_N, after_N) unique pad widths "
-        "for each axis. Equivalent to pad_width in numpy.pad, but flattened.");
+        "Widths of the padding regions applied to the edges of each axis. "
+        "It is a tuple of integer padding widths for each axis of the format "
+        "``(before_1, after_1, ... , before_N, after_N)``. "
+        "It should be of length ``2*N`` where ``N`` is the number of dimensions of the array."
+        "This is equivalent to pad_width in numpy.pad, but flattened.");
     DMLC_DECLARE_FIELD(constant_value)
-        .describe(
-            "This option is only used when mode is \"constant\". This "
-            "value will be used as the padding value. Defaults to 0 if not "
-            "specified.")
+        .describe("The value used for padding when `mode` is \"constant\".")
         .set_default(0.0);
   }
 };
@@ -74,7 +71,8 @@ class PadOp : public Operator {
     int rank = in_data[pad_enum::kData].ndim();
     auto pad = param_.pad_width;
     DType constant_value = param_.constant_value;
-
+    // TODO(nswamy@): update the documentation and log below when support is added for more than
+    // 4D/5D arrays and not requiring higher dimensions to be zero.
     if ((rank == 4) && !pad[0] && !pad[1] && !pad[2] && !pad[3]) {
       Tensor<xpu, 4, DType> data =
           in_data[pad_enum::kData].get<xpu, 4, DType>(s);
@@ -88,8 +86,10 @@ class PadOp : public Operator {
           out_data[pad_enum::kOut].get<xpu, 5, DType>(s);
       pad_image(out, data, param_.pad_width, param_.mode, constant_value);
     } else {
-      LOG(FATAL) << "Only 4d or 5d input tensors with padding applied to "
-                    "dimensions > 1 is currently implemented.";
+      LOG(FATAL) << "Current implementation only supports 4D and 5D "
+                    "input arrays with padding applied "
+                    "only on axes 1, 2 and 3. "
+                    "Expects axes 4 and 5 in pad_width to be zero.";
     }
 
     // Assign(out, req[pad_enum::kOut], F<mshadow_op::identity>(data));
diff --git a/src/operator/pad.cc b/src/operator/pad.cc
index 89c9da8ee498..3d129c3110dc 100644
--- a/src/operator/pad.cc
+++ b/src/operator/pad.cc
@@ -400,12 +400,89 @@ Operator *PadProp::CreateOperatorEx(Context ctx, std::vector<TShape> *in_shape,
 DMLC_REGISTER_PARAMETER(PadParam);
 
 MXNET_REGISTER_OP_PROPERTY(Pad, PadProp)
-.describe(R"code(Pads an array.
+.describe(R"code(Pads an input array with a constant or edge values of the array.
+
+.. note:: `Pad` is deprecated. Use `pad` instead.
+
+.. note:: Current implementation only supports 4D and 5D input arrays with padding applied
+   only on axes 1, 2 and 3. Expects axes 4 and 5 in `pad_width` to be zero.
+
+This operation pads an input array with either a `constant_value` or edge values
+along each axis of the input array. The amount of padding is specified by `pad_width`.
+
+`pad_width` is a tuple of integer padding widths for each axis of the format
+``(before_1, after_1, ... , before_N, after_N)``. The `pad_width` should be of length ``2*N``
+where ``N`` is the number of dimensions of the array.
+
+For dimension ``N`` of the input array, ``before_N`` and ``after_N`` indicates how many values
+to add before and after the elements of the array along dimension ``N``.
+The widths of the higher two dimensions ``before_1``, ``after_1``, ``before_2``,
+``after_2`` must be 0.
+
+Example::
+
+   x = [[[[  1.   2.   3.]
+          [  4.   5.   6.]]
+
+         [[  7.   8.   9.]
+          [ 10.  11.  12.]]]
+
+
+        [[[ 11.  12.  13.]
+          [ 14.  15.  16.]]
+
+         [[ 17.  18.  19.]
+          [ 20.  21.  22.]]]]
+
+   pad(x,mode="edge", pad_width=(0,0,0,0,1,1,1,1)) =
+
+         [[[[  1.   1.   2.   3.   3.]
+            [  1.   1.   2.   3.   3.]
+            [  4.   4.   5.   6.   6.]
+            [  4.   4.   5.   6.   6.]]
+
+           [[  7.   7.   8.   9.   9.]
+            [  7.   7.   8.   9.   9.]
+            [ 10.  10.  11.  12.  12.]
+            [ 10.  10.  11.  12.  12.]]]
+
+
+          [[[ 11.  11.  12.  13.  13.]
+            [ 11.  11.  12.  13.  13.]
+            [ 14.  14.  15.  16.  16.]
+            [ 14.  14.  15.  16.  16.]]
+
+           [[ 17.  17.  18.  19.  19.]
+            [ 17.  17.  18.  19.  19.]
+            [ 20.  20.  21.  22.  22.]
+            [ 20.  20.  21.  22.  22.]]]]
+
+   pad(x, mode="constant", constant_value=0, pad_width=(0,0,0,0,2,2,1,1)) =
+
+         [[[[  0.   0.   0.   0.   0.]
+            [  0.   1.   2.   3.   0.]
+            [  0.   4.   5.   6.   0.]
+            [  0.   0.   0.   0.   0.]]
+
+           [[  0.   0.   0.   0.   0.]
+            [  0.   7.   8.   9.   0.]
+            [  0.  10.  11.  12.   0.]
+            [  0.   0.   0.   0.   0.]]]
+
+
+          [[[  0.   0.   0.   0.   0.]
+            [  0.  11.  12.  13.   0.]
+            [  0.  14.  15.  16.   0.]
+            [  0.   0.   0.   0.   0.]]
+
+           [[  0.   0.   0.   0.   0.]
+            [  0.  17.  18.  19.   0.]
+            [  0.  20.  21.  22.   0.]
+            [  0.   0.   0.   0.   0.]]]]
 
-Only supports 4-D and 5-D input arrays.
 
 )code" ADD_FILELINE)
-.add_argument("data", "NDArray-or-Symbol", "An n-dimensional input tensor.")
+.add_argument("data", "NDArray-or-Symbol", "An n-dimensional input array.")
 .add_arguments(PadParam::__FIELDS__());
 
 NNVM_REGISTER_OP(Pad).add_alias("pad");