[Relax][Frontend][Onnx] add sum and globalavgpool 1d/3d op (#16669)

add sum and globalavgpool op Co-authored-by: cheng wen <chengven027-intellif>
apache · Mar 12, 2024 · ca12cb6 · ca12cb6
1 parent cae1af6
commit ca12cb6
Show file tree

Hide file tree

Showing 9 changed files with 425 additions and 10 deletions.
diff --git a/include/tvm/relax/attrs/nn.h b/include/tvm/relax/attrs/nn.h
@@ -371,6 +371,28 @@ struct Pool3DAttrs : public tvm::AttrsNode<Pool3DAttrs> {
   }
 };  // struct Pool3dAttrs
 
+/*! \brief Attributes for 1d adaptive pool operator */
+struct AdaptivePool1DAttrs : public tvm::AttrsNode<AdaptivePool1DAttrs> {
+  Optional<Array<IntImm>> output_size;
+  String layout;
+  String out_layout;
+
+  TVM_DECLARE_ATTRS(AdaptivePool1DAttrs, "relax.attrs.AdaptivePool1DAttrs") {
+    TVM_ATTR_FIELD(output_size).describe("Output width.");
+    TVM_ATTR_FIELD(layout).describe(
+        "Dimension ordering of input data. Can be 'NCW', 'NWC', etc."
+        "'N', 'C', 'W' stands for batch, channel and width"
+        "dimensions respectively. Pooling is applied on the"
+        "'W' dimensions.");
+    TVM_ATTR_FIELD(out_layout)
+        .describe(
+            "Dimension ordering of output data. Can be 'NCW', 'NWC', etc."
+            "'N', 'C', 'W' stands for batch, channel and width"
+            "dimensions respectively. Pooling is applied on the"
+            "'W' dimensions.");
+  }
+};  // struct AdaptivePool1DAttrs
+
 /*! \brief Attributes for 2d adaptive pool operator */
 struct AdaptivePool2DAttrs : public tvm::AttrsNode<AdaptivePool2DAttrs> {
   Optional<Array<IntImm>> output_size;
@@ -393,6 +415,28 @@ struct AdaptivePool2DAttrs : public tvm::AttrsNode<AdaptivePool2DAttrs> {
   }
 };  // struct AdaptivePool2DAttrs
 
+/*! \brief Attributes for 3d adaptive pool operator */
+struct AdaptivePool3DAttrs : public tvm::AttrsNode<AdaptivePool3DAttrs> {
+  Optional<Array<IntImm>> output_size;
+  String layout;
+  String out_layout;
+
+  TVM_DECLARE_ATTRS(AdaptivePool3DAttrs, "relax.attrs.AdaptivePool3DAttrs") {
+    TVM_ATTR_FIELD(output_size).describe("Output depth, height and width.");
+    TVM_ATTR_FIELD(layout).describe(
+        "Dimension ordering of input data. Can be 'NCDHW', 'NDHWC', etc."
+        "'N', 'C', 'D', 'H', 'W' stands for batch, channel, depth, height, and width"
+        "dimensions respectively. Pooling is applied on 'D', 'H' and"
+        "'W' dimensions.");
+    TVM_ATTR_FIELD(out_layout)
+        .describe(
+            "Dimension ordering of output data. Can be 'NCDHW', 'NDHWC', etc."
+            "'N', 'C', 'D', 'H', 'W' stands for batch, channel, depth, height, and width"
+            "dimensions respectively. Pooling is applied on 'D', 'H' and"
+            "'W' dimensions.");
+  }
+};  // struct AdaptivePool3DAttrs
+
 /*! \brief Attributes used in softmax operators */
 struct SoftmaxAttrs : public tvm::AttrsNode<SoftmaxAttrs> {
   int axis;

diff --git a/python/tvm/relax/frontend/onnx/onnx_frontend.py b/python/tvm/relax/frontend/onnx/onnx_frontend.py
@@ -399,6 +399,17 @@ def _impl_v13(cls, bb, inputs, attr, params):
         return relax.op.add(inputs[0], inputs[1])
 
 
+class Sum(OnnxOpConverter):
+    """Convert an onnx Sum node into an equivalent Relax expression."""
+
+    @classmethod
+    def _impl_v1(cls, bb, inputs, attr, params):
+        for in_index in range(len(inputs) - 1):
+            inputs[in_index + 1] = relax.op.add(inputs[in_index], inputs[in_index + 1])
+
+        return inputs[len(inputs) - 1]
+
+
 class Mul(OnnxOpConverter):
     """Convert an onnx Mul node into an equivalent Relax expression."""
 
@@ -1538,7 +1549,17 @@ class GlobalAveragePool(OnnxOpConverter):
 
     @classmethod
     def _impl_v1(cls, bb, inputs, attr, params):
-        return relax.op.nn.adaptive_avg_pool2d(inputs[0], 1)
+        rank = len(inputs[0].struct_info.shape)
+        if rank == 3:
+            return relax.op.nn.adaptive_avg_pool1d(inputs[0], 1)
+        elif rank == 4:
+            return relax.op.nn.adaptive_avg_pool2d(inputs[0], 1)
+        elif rank == 5:
+            return relax.op.nn.adaptive_avg_pool3d(inputs[0], 1)
+        raise NotImplementedError(
+            "Global average pooling is only implemented for 1D, 2D, and 3D kernels, got %dD."
+            % (rank - 2)
+        )
 
 
 class Flatten(OnnxOpConverter):
@@ -1899,6 +1920,7 @@ def _get_convert_map():
         "Add": Add,
         "Mul": Mul,
         "Cast": Cast,
+        "Sum": Sum,
         "Gather": Gather,
         "Gemm": Gemm,
         "Reshape": Reshape,

diff --git a/python/tvm/relax/op/nn/__init__.py b/python/tvm/relax/op/nn/__init__.py
@@ -16,7 +16,9 @@
 # under the License.
 """Neural network related operators."""
 from .nn import (
+    adaptive_avg_pool1d,
     adaptive_avg_pool2d,
+    adaptive_avg_pool3d,
     attention,
     attention_var_len,
     avg_pool1d,

diff --git a/python/tvm/relax/op/nn/nn.py b/python/tvm/relax/op/nn/nn.py
@@ -1043,6 +1043,59 @@ def avg_pool3d(
     )
 
 
+def adaptive_avg_pool1d(
+    data: Expr,
+    output_size: Optional[Union[int, Tuple[int]]] = None,
+    layout: str = "NCW",
+    out_layout: Optional[str] = None,
+) -> Expr:
+    r"""1D adaptive average pooling operator. This operator is experimental.
+
+    This operator takes data as input and does 1D average value calculation
+    across each window represented by W.
+
+
+    In the default case, where the data_layout is `NCW`
+    a data Tensor with shape `(batch_size, in_channels, width)`,
+    to produce an output Tensor with shape
+    (batch_size, in_channels, output_width).
+
+    The pooling kernel and stride sizes are automatically chosen for
+    desired output sizes.
+
+    For output_size:
+        If this argument is not provided, input height and width will be used
+        as output width.
+
+        If a single integer is provided for output_size, the output size is
+        (N x C x output_size) for any input (NCW).
+
+    Parameters
+    ----------
+    data : relax.Expr
+        The input data to the operator.
+
+    output_size : Optional[Union[int, Tuple[int, int]]]
+        Output height and width.
+        If not specified, it will be the same as the input height and width.
+        If specified, it is required to have length either 1 or 2.
+
+    layout : str
+        Layout of the input.
+
+    out_layout : Optional[str]
+        Layout of the output. If not specified, it is the same as data_layout
+
+    Returns
+    -------
+    result : relax.Expr
+        The computed result.
+    """
+    if isinstance(output_size, int):
+        output_size = (output_size,)
+    return _ffi_api.adaptive_avg_pool1d(data, output_size, layout, out_layout)  # type: ignore
+
+
 def adaptive_avg_pool2d(
     data: Expr,
     output_size: Optional[Union[int, Tuple[int, int]]] = None,
@@ -1099,6 +1152,62 @@ def adaptive_avg_pool2d(
     return _ffi_api.adaptive_avg_pool2d(data, output_size, layout, out_layout)  # type: ignore
 
 
+def adaptive_avg_pool3d(
+    data: Expr,
+    output_size: Optional[Union[int, Tuple[int, int]]] = None,
+    layout: str = "NCDHW",
+    out_layout: Optional[str] = None,
+) -> Expr:
+    r"""3D adaptive average pooling operator. This operator is experimental.
+
+    This operator takes data as input and does 3D average value calculation
+    across each window represented by WxH.
+
+
+    In the default case, where the data_layout is `NCDHW`
+    a data Tensor with shape `(batch_size, in_channels, depth, height, width)`,
+    to produce an output Tensor with shape
+    (batch_size, in_channels, output_depth, output_height, output_width).
+
+    The pooling kernel and stride sizes are automatically chosen for
+    desired output sizes.
+
+    For output_size:
+        If this argument is not provided, input depth, height and width will be used
+        as output depth, height and width.
+
+        If a single integer is provided for output_size, the output size is
+        (N x C x output_size x output_size x output_size) for any input (NCDHW).
+
+        If a tuple of integers (depth, height, width) are provided for output_size,
+        the output size is (N x C x depth x height x width) for any input (NCDHW).
+
+    Parameters
+    ----------
+    data : relax.Expr
+        The input data to the operator.
+
+    output_size : Optional[Union[int, Tuple[int, int]]]
+        Output height and width.
+        If not specified, it will be the same as the input height and width.
+        If specified, it is required to have length either 1 or 3.
+
+    layout : str
+        Layout of the input.
+
+    out_layout : Optional[str]
+        Layout of the output. If not specified, it is the same as data_layout
+
+    Returns
+    -------
+    result : relax.Expr
+        The computed result.
+    """
+    if isinstance(output_size, int):
+        output_size = (output_size, output_size, output_size)
+    return _ffi_api.adaptive_avg_pool3d(data, output_size, layout, out_layout)  # type: ignore
+
+
 def relu(data: Expr) -> Expr:
     r"""Rectified linear unit.
 

diff --git a/python/tvm/relax/transform/legalize_ops/nn.py b/python/tvm/relax/transform/legalize_ops/nn.py
@@ -382,6 +382,33 @@ def _nn_avg_pool3d(bb: BlockBuilder, call: Call) -> Expr:
     )
 
 
+@register_legalize("relax.nn.adaptive_avg_pool1d")
+def _nn_adaptive_avg_pool1d(bb: BlockBuilder, call: Call) -> Expr:
+    if call.attrs.out_layout != call.attrs.layout:
+        logging.info(
+            "TOPI adaptive_avg_pool1d does not support different input-output "
+            "layouts, and thus cannot be legalized by TOPI"
+        )
+        return call
+
+    def te_adaptive_avg_pool1d(data, output_size, layout_str):
+        if output_size is None:
+            layout = tir.layout(layout_str)
+            idx_W = layout.index_of("W")
+            assert idx_W != -1
+            output_size = data.shape[idx_W]
+
+        return topi.nn.adaptive_pool1d(data, output_size, "avg", layout_str)
+
+    return bb.call_te(
+        te_adaptive_avg_pool1d,
+        call.args[0],
+        call.attrs.output_size,
+        call.attrs.layout,
+        primfunc_name_hint="adaptive_avg_pool1d",
+    )
+
+
 @register_legalize("relax.nn.adaptive_avg_pool2d")
 def _nn_adaptive_avg_pool2d(bb: BlockBuilder, call: Call) -> Expr:
     if call.attrs.out_layout != call.attrs.layout:
@@ -410,6 +437,35 @@ def te_adaptive_avg_pool2d(data, output_size, layout_str):
     )
 
 
+@register_legalize("relax.nn.adaptive_avg_pool3d")
+def _nn_adaptive_avg_pool3d(bb: BlockBuilder, call: Call) -> Expr:
+    if call.attrs.out_layout != call.attrs.layout:
+        logging.info(
+            "TOPI adaptive_avg_pool3d does not support different input-output "
+            "layouts, and thus cannot be legalized by TOPI"
+        )
+        return call
+
+    def te_adaptive_avg_pool3d(data, output_size, layout_str):
+        if output_size is None:
+            layout = tir.layout(layout_str)
+            idx_D = layout.index_of("D")
+            idx_H = layout.index_of("H")
+            idx_W = layout.index_of("W")
+            assert idx_D != -1 and idx_H != -1 and idx_W != -1
+            output_size = (data.shape[idx_D], data.shape[idx_H], data.shape[idx_W])
+
+        return topi.nn.adaptive_pool3d(data, output_size, "avg", layout_str)
+
+    return bb.call_te(
+        te_adaptive_avg_pool3d,
+        call.args[0],
+        call.attrs.output_size,
+        call.attrs.layout,
+        primfunc_name_hint="adaptive_avg_pool3d",
+    )
+
+
 register_legalize("relax.nn.relu", _call_topi_without_attr(topi.nn.relu))
 
 

diff --git a/python/tvm/topi/nn/pooling.py b/python/tvm/topi/nn/pooling.py
@@ -169,6 +169,13 @@ def adaptive_pool(data, output_size, pool_type, layout="NCHW"):
     return cpp.nn.adaptive_pool(data, output_size, POOL_TYPE_CODE[pool_type], layout)
 
 
+def adaptive_pool1d(data, output_size, pool_type, layout="NCW"):
+    """Perform pooling on three dimensional data.
+    See the two dimensional version above for details.
+    """
+    return cpp.nn.adaptive_pool1d(data, output_size, POOL_TYPE_CODE[pool_type], layout)
+
+
 def adaptive_pool3d(data, output_size, pool_type, layout="NCDHW"):
     """Perform pooling on three dimensional data.
     See the two dimensional version above for details.