From a8451b7455449bc0e3fa680d65b06df57315e6f3 Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Fri, 3 Dec 2021 11:44:57 -0800
Subject: [PATCH 1/9] Add topi batch norm and tests

---
 python/tvm/relay/op/nn/_nn.py                 |  5 ++
 python/tvm/relay/op/strategy/generic.py       | 23 ++++++
 python/tvm/topi/generic/nn.py                 | 17 ++++
 python/tvm/topi/nn/__init__.py                |  1 +
 python/tvm/topi/nn/batch_norm.py              | 81 +++++++++++++++++++
 python/tvm/topi/testing/__init__.py           |  1 +
 python/tvm/topi/testing/batch_norm.py         | 75 +++++++++++++++++
 src/topi/schedule.cc                          |  3 +
 .../topi/python/test_topi_batch_norm.py       | 77 ++++++++++++++++++
 9 files changed, 283 insertions(+)
 create mode 100644 python/tvm/topi/nn/batch_norm.py
 create mode 100644 python/tvm/topi/testing/batch_norm.py
 create mode 100644 tests/python/topi/python/test_topi_batch_norm.py

diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
index 17f75a07af64..33bfa9e9e77e 100644
--- a/python/tvm/relay/op/nn/_nn.py
+++ b/python/tvm/relay/op/nn/_nn.py
@@ -151,6 +151,11 @@ def legalize_batch_matmul(attrs, inputs, types):
 reg.register_pattern("nn.batch_matmul", reg.OpPattern.OUT_ELEMWISE_FUSABLE)
 
 
+# batch_norm
+reg.register_strategy("nn.batch_norm", strategy.batch_norm_strategy)
+reg.register_pattern("nn.batch_norm", reg.OpPattern.OUT_ELEMWISE_FUSABLE)
+
+
 # sparse_dense
 @reg.register_compute("nn.sparse_dense")
 def compute_sparse_dense(attrs, inputs, out_type):
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index ab12be16e17e..115f6cb7dcd2 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -848,6 +848,29 @@ def batch_matmul_strategy(attrs, inputs, out_type, target):
     return strategy
 
 
+# batch_norm
+def wrap_compute_batch_norm(topi_compute):
+    """wrap batch_norm topi compute"""
+
+    def _compute_batch_norm(attrs, inputs, out_type):
+        return [topi_compute(*inputs, attrs.axis, attrs.epsilon, attrs.center, attrs.scale)]
+
+    return _compute_batch_norm
+
+
+@override_native_generic_func("batch_norm_strategy")
+def batch_norm_strategy(attrs, inputs, out_type, target):
+    """batch_norm generic strategy"""
+    logger.warning("batch_norm is not optimized for this platform.")
+    strategy = _op.OpStrategy()
+    strategy.add_implementation(
+        wrap_compute_batch_norm(topi.nn.batch_norm),
+        wrap_topi_schedule(topi.generic.schedule_batch_norm),
+        name="batch_norm.generic",
+    )
+    return strategy
+
+
 # sparse dense
 def wrap_compute_sparse_dense(topi_compute):
     """wrap sparse dense topi compute"""
diff --git a/python/tvm/topi/generic/nn.py b/python/tvm/topi/generic/nn.py
index 22a90aa2cd07..ba63c539133e 100644
--- a/python/tvm/topi/generic/nn.py
+++ b/python/tvm/topi/generic/nn.py
@@ -815,6 +815,23 @@ def schedule_batch_matmul(outs):
     return _default_schedule(outs, False)
 
 
+def schedule_batch_norm(outs):
+    """Schedule for batch_norm
+
+    Parameters
+    ----------
+    outs: Array of Tensor
+          The computation graph description of sparse_transpose
+          in the format of an array of tensors.
+
+    Returns
+    -------
+    sch: Schedule
+        The computation schedule for the op.
+    """
+    return _default_schedule(outs, False)
+
+
 def schedule_correlation_nchw(outs):
     """Schedule for correlation_nchw
 
diff --git a/python/tvm/topi/nn/__init__.py b/python/tvm/topi/nn/__init__.py
index b5e766adbc12..d3d00305a17b 100644
--- a/python/tvm/topi/nn/__init__.py
+++ b/python/tvm/topi/nn/__init__.py
@@ -42,6 +42,7 @@
 from .bitserial_conv2d import *
 from .bitserial_dense import *
 from .batch_matmul import *
+from .batch_norm import *
 from .sparse import *
 from .pad import *
 from .fifo_buffer import *
diff --git a/python/tvm/topi/nn/batch_norm.py b/python/tvm/topi/nn/batch_norm.py
new file mode 100644
index 000000000000..b3410a62c460
--- /dev/null
+++ b/python/tvm/topi/nn/batch_norm.py
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Batch normalization."""
+from tvm import te
+from tvm import topi
+
+
+def batch_norm(
+    data: te.Tensor,
+    gamma: te.Tensor,
+    beta: te.Tensor,
+    axis: int = 1,
+    epsilon: float = 1e-5,
+    center: bool = True,
+    scale: bool = True,
+):
+    """Batch normalization layer (Ioffe and Szegedy, 2014).
+
+    Normalizes the input at each batch, i.e. applies a transformation
+    that maintains the mean activation close to 0 and the activation
+    standard deviation close to 1.
+
+    Parameters
+    ----------
+    data : tvm.te.Tensor
+        Input to be batch-normalized.
+
+    gamma : tvm.te.Tensor
+        Scale factor to be applied to the normalized tensor.
+
+    beta : tvm.te.Tensor
+        Offset to be applied to the normalized tensor.
+
+    axis : Optional[int] = 1
+        Specify along which shape axis the normalization should occur.
+
+    epsilon : Optional[float] = 1e-5
+        Small float added to variance to avoid dividing by zero.
+
+    center : Optional[bool] = True
+        If True, add offset of beta to normalized tensor, If False,
+        beta is ignored.
+
+    scale : Optional[bool] = True
+        If True, scale normalized tensor by gamma. If False, gamma
+        is ignored.
+
+    Returns
+    -------
+    output : tvm.te.Tensor
+        Normalized data with same shape as input
+    """
+    mean = topi.reduction.sum(data, axis=axis, keepdims=True) / data.shape[axis]
+    var_summands = topi.broadcast.power(topi.broadcast.subtract(data, mean), 2.0)
+    var = topi.reduction.sum(var_summands, axis=axis, keepdims=True) / data.shape[axis]
+    std = topi.math.sqrt(var + epsilon)
+    out = (data - mean) / std
+
+    shape = [1] * len(data.shape)
+    shape[axis] = data.shape[axis]
+
+    if scale:
+        out = out * topi.reshape(gamma, shape)
+    if center:
+        out = out + topi.reshape(beta, shape)
+
+    return out
diff --git a/python/tvm/topi/testing/__init__.py b/python/tvm/topi/testing/__init__.py
index 2d7d0a4b9e11..8f78805fff3b 100644
--- a/python/tvm/topi/testing/__init__.py
+++ b/python/tvm/topi/testing/__init__.py
@@ -49,6 +49,7 @@
 from .gather_nd_python import gather_nd_python
 from .strided_slice_python import strided_slice_python, strided_set_python
 from .batch_matmul import batch_matmul
+from .batch_norm import batch_norm
 from .slice_axis_python import slice_axis_python
 from .sequence_mask_python import sequence_mask
 from .poolnd_python import poolnd_python
diff --git a/python/tvm/topi/testing/batch_norm.py b/python/tvm/topi/testing/batch_norm.py
new file mode 100644
index 000000000000..4fca2cdac73e
--- /dev/null
+++ b/python/tvm/topi/testing/batch_norm.py
@@ -0,0 +1,75 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Batch Normalization implemented in Numpy."""
+import numpy as np
+
+
+def batch_norm(
+    x: np.ndarray,
+    gamma: np.ndarray,
+    beta: np.ndarray,
+    axis: int,
+    epsilon: float,
+    center: bool,
+    scale: bool,
+):
+    """Batch Normalization operator implemented in Numpy.
+
+    Parameters
+    ----------
+    data : np.ndarray
+        Input to be batch-normalized.
+
+    gamma : np.ndarray
+        Scale factor to be applied to the normalized tensor.
+
+    beta : np.ndarray
+        Offset to be applied to the normalized tensor.
+
+    axis : int
+        Specify along which shape axis the normalization should occur.
+
+    epsilon : float
+        Small float added to variance to avoid dividing by zero.
+
+    center : bool
+        If True, add offset of beta to normalized tensor, If False,
+        beta is ignored.
+
+    scale : bool
+        If True, scale normalized tensor by gamma. If False, gamma
+        is ignored.
+
+    Returns
+    -------
+    output : np.ndarray
+        Normalized data with same shape as input
+    """
+    mean = x.mean(axis=axis, keepdims=True)
+    var = x.var(axis=axis, keepdims=True)
+    std = np.sqrt(var + epsilon)
+    out = (x - mean) / std
+
+    shape = [1] * len(x.shape)
+    shape[axis] = x.shape[axis]
+
+    if scale:
+        out = out * gamma.reshape(shape)
+    if center:
+        out = out + beta.reshape(shape)
+
+    return out
diff --git a/src/topi/schedule.cc b/src/topi/schedule.cc
index 21f863bb2e70..0999f00ffd11 100644
--- a/src/topi/schedule.cc
+++ b/src/topi/schedule.cc
@@ -230,6 +230,9 @@ TVM_REGISTER_GENERIC_FUNC(schedule_dense)
 TVM_REGISTER_GENERIC_FUNC(schedule_batch_matmul)
     .set_default(WrapSchedule(topi::generic::default_schedule));
 
+TVM_REGISTER_GENERIC_FUNC(schedule_batch_norm)
+    .set_default(WrapSchedule(topi::generic::default_schedule));
+
 TVM_REGISTER_GENERIC_FUNC(schedule_pool)
     .set_default(WrapSchedule(topi::generic::default_schedule))
     .register_func({"cpu"}, WrapSchedule(topi::x86::default_schedule))
diff --git a/tests/python/topi/python/test_topi_batch_norm.py b/tests/python/topi/python/test_topi_batch_norm.py
new file mode 100644
index 000000000000..c8030cb92684
--- /dev/null
+++ b/tests/python/topi/python/test_topi_batch_norm.py
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Test code for batch_norm operator"""
+import numpy as np
+import pytest
+
+import tvm
+from tvm import te
+from tvm import topi
+import tvm.testing
+import tvm.topi.testing
+
+
+DEVICE = "llvm"
+_BATCH_NORM_IMPLEMENT = {
+    "generic": (topi.nn.batch_norm, topi.generic.schedule_batch_norm),
+}
+
+
+@pytest.mark.parametrize(
+    "shape, axis, epsilon, center, scale",
+    [
+        ((1,), 0, 0.1, True, True),
+        ((2, 3), 0, 0.1, True, True),
+        ((1, 2, 4), 0, 0.1, True, True),
+        ((1, 2, 3, 4), 0, 0.001, False, False),
+        ((2, 3, 4, 1), 1, 0.01, False, True),
+        ((3, 4, 1, 2), 2, 0.1, True, False),
+        ((4, 1, 2, 3), 3, 1.0, True, True),
+        ((1, 2, 4, 4, 5), 0, 0.1, True, True),
+    ],
+)
+def test_batch_norm(shape, axis, epsilon, center, scale):
+    x_np = np.random.random(shape).astype("float32")
+    gamma_np = np.random.random((shape[axis],)).astype("float32")
+    beta_np = np.random.random((shape[axis],)).astype("float32")
+
+    out_np = tvm.topi.testing.batch_norm(x_np, gamma_np, beta_np, axis, epsilon, center, scale)
+
+    x_te = te.placeholder(shape, name="x", dtype="float32")
+    gamma_te = te.placeholder((shape[axis],), name="gamma", dtype="float32")
+    beta_te = te.placeholder((shape[axis],), name="beta", dtype="float32")
+
+    with tvm.target.Target(DEVICE):
+        fcompute, fschedule = tvm.topi.testing.dispatch(DEVICE, _BATCH_NORM_IMPLEMENT)
+        out = fcompute(x_te, gamma_te, beta_te, axis, epsilon, center, scale)
+        s = fschedule([out])
+
+        dev = tvm.device(DEVICE, 0)
+
+        x_tvm = tvm.nd.array(x_np, dev)
+        gamma_tvm = tvm.nd.array(gamma_np, dev)
+        beta_tvm = tvm.nd.array(beta_np, dev)
+        out_tvm = tvm.nd.array(np.zeros(shape, dtype=out.dtype), dev)
+
+        f = tvm.build(s, [x_te, gamma_te, beta_te, out], DEVICE)
+        f(x_tvm, gamma_tvm, beta_tvm, out_tvm)
+
+        tvm.testing.assert_allclose(out_tvm.numpy(), out_np, rtol=1e-3)
+
+
+if __name__ == "__main__":
+    test_batch_norm()

From 96b35f6050c6d66146d9cae32de2e94770204d65 Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Fri, 3 Dec 2021 12:21:00 -0800
Subject: [PATCH 2/9] Handle none values correctly

---
 python/tvm/topi/nn/batch_norm.py              | 30 ++++++++++++++-----
 .../topi/python/test_topi_batch_norm.py       | 16 +++++-----
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/python/tvm/topi/nn/batch_norm.py b/python/tvm/topi/nn/batch_norm.py
index b3410a62c460..12db524dc0b2 100644
--- a/python/tvm/topi/nn/batch_norm.py
+++ b/python/tvm/topi/nn/batch_norm.py
@@ -15,6 +15,8 @@
 # specific language governing permissions and limitations
 # under the License.
 """Batch normalization."""
+import typing
+
 from tvm import te
 from tvm import topi
 
@@ -23,10 +25,10 @@ def batch_norm(
     data: te.Tensor,
     gamma: te.Tensor,
     beta: te.Tensor,
-    axis: int = 1,
-    epsilon: float = 1e-5,
-    center: bool = True,
-    scale: bool = True,
+    axis: typing.Optional[int] = None,
+    epsilon: typing.Optional[float] = None,
+    center: typing.Optional[bool] = None,
+    scale: typing.Optional[bool] = None,
 ):
     """Batch normalization layer (Ioffe and Szegedy, 2014).
 
@@ -45,17 +47,17 @@ def batch_norm(
     beta : tvm.te.Tensor
         Offset to be applied to the normalized tensor.
 
-    axis : Optional[int] = 1
+    axis : int, optional, default=1
         Specify along which shape axis the normalization should occur.
 
-    epsilon : Optional[float] = 1e-5
+    epsilon : float, optional, default=1e-5
         Small float added to variance to avoid dividing by zero.
 
-    center : Optional[bool] = True
+    center : bool, optional, default=True
         If True, add offset of beta to normalized tensor, If False,
         beta is ignored.
 
-    scale : Optional[bool] = True
+    scale : bool, optional, defualt=True
         If True, scale normalized tensor by gamma. If False, gamma
         is ignored.
 
@@ -64,6 +66,18 @@ def batch_norm(
     output : tvm.te.Tensor
         Normalized data with same shape as input
     """
+    if axis is None:
+        axis = 1
+
+    if epsilon is None:
+        epsilon = 1e-5
+
+    if center is None:
+        center = True
+
+    if scale is None:
+        scale = True
+
     mean = topi.reduction.sum(data, axis=axis, keepdims=True) / data.shape[axis]
     var_summands = topi.broadcast.power(topi.broadcast.subtract(data, mean), 2.0)
     var = topi.reduction.sum(var_summands, axis=axis, keepdims=True) / data.shape[axis]
diff --git a/tests/python/topi/python/test_topi_batch_norm.py b/tests/python/topi/python/test_topi_batch_norm.py
index c8030cb92684..08ec7efb5d77 100644
--- a/tests/python/topi/python/test_topi_batch_norm.py
+++ b/tests/python/topi/python/test_topi_batch_norm.py
@@ -14,7 +14,7 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-"""Test code for batch_norm operator"""
+"""Tests for the batch_norm operator."""
 import numpy as np
 import pytest
 
@@ -25,7 +25,7 @@
 import tvm.topi.testing
 
 
-DEVICE = "llvm"
+_DEVICE = "llvm"
 _BATCH_NORM_IMPLEMENT = {
     "generic": (topi.nn.batch_norm, topi.generic.schedule_batch_norm),
 }
@@ -46,8 +46,8 @@
 )
 def test_batch_norm(shape, axis, epsilon, center, scale):
     x_np = np.random.random(shape).astype("float32")
-    gamma_np = np.random.random((shape[axis],)).astype("float32")
-    beta_np = np.random.random((shape[axis],)).astype("float32")
+    gamma_np = np.random.random(shape[axis]).astype("float32")
+    beta_np = np.random.random(shape[axis]).astype("float32")
 
     out_np = tvm.topi.testing.batch_norm(x_np, gamma_np, beta_np, axis, epsilon, center, scale)
 
@@ -55,19 +55,19 @@ def test_batch_norm(shape, axis, epsilon, center, scale):
     gamma_te = te.placeholder((shape[axis],), name="gamma", dtype="float32")
     beta_te = te.placeholder((shape[axis],), name="beta", dtype="float32")
 
-    with tvm.target.Target(DEVICE):
-        fcompute, fschedule = tvm.topi.testing.dispatch(DEVICE, _BATCH_NORM_IMPLEMENT)
+    with tvm.target.Target(_DEVICE):
+        fcompute, fschedule = tvm.topi.testing.dispatch(_DEVICE, _BATCH_NORM_IMPLEMENT)
         out = fcompute(x_te, gamma_te, beta_te, axis, epsilon, center, scale)
         s = fschedule([out])
 
-        dev = tvm.device(DEVICE, 0)
+        dev = tvm.device(_DEVICE, 0)
 
         x_tvm = tvm.nd.array(x_np, dev)
         gamma_tvm = tvm.nd.array(gamma_np, dev)
         beta_tvm = tvm.nd.array(beta_np, dev)
         out_tvm = tvm.nd.array(np.zeros(shape, dtype=out.dtype), dev)
 
-        f = tvm.build(s, [x_te, gamma_te, beta_te, out], DEVICE)
+        f = tvm.build(s, [x_te, gamma_te, beta_te, out], _DEVICE)
         f(x_tvm, gamma_tvm, beta_tvm, out_tvm)
 
         tvm.testing.assert_allclose(out_tvm.numpy(), out_np, rtol=1e-3)

From 6ef7405be64c5b58efc2492843c48df7c0d1ca63 Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Mon, 6 Dec 2021 14:42:49 -0800
Subject: [PATCH 3/9] Return correct nun outputs for onnx

---
 python/tvm/relay/frontend/onnx.py       |  2 +-
 python/tvm/relay/op/nn/_nn.py           |  1 -
 python/tvm/relay/op/nn/nn.py            |  2 +-
 python/tvm/relay/op/strategy/generic.py |  3 +--
 python/tvm/topi/nn/batch_norm.py        | 26 ++++++++++++++++++++++---
 src/relay/op/nn/nn.cc                   |  5 ++++-
 6 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 189c7f054664..ca0819a58fb6 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -474,7 +474,7 @@ def _impl_v1(cls, inputs, attr, params):
             op_name="batch_norm",
             ignores=["spatial", "is_test", "consumed_inputs", "momentum", "training_mode"],
         )(inputs, attr, params)
-        return out[0]
+        return out
 
 
 class InstanceNorm(OnnxOpConverter):
diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
index 600009e56ce4..dea3ed5a5aae 100644
--- a/python/tvm/relay/op/nn/_nn.py
+++ b/python/tvm/relay/op/nn/_nn.py
@@ -155,7 +155,6 @@ def legalize_batch_matmul(attrs, inputs, types):
 reg.register_strategy("nn.batch_norm", strategy.batch_norm_strategy)
 reg.register_pattern("nn.batch_norm", reg.OpPattern.OUT_ELEMWISE_FUSABLE)
 
-
 # sparse_dense
 @reg.register_compute("nn.sparse_dense")
 def compute_sparse_dense(attrs, inputs, out_type):
diff --git a/python/tvm/relay/op/nn/nn.py b/python/tvm/relay/op/nn/nn.py
index c7b376ec3d64..32647d63c2ec 100644
--- a/python/tvm/relay/op/nn/nn.py
+++ b/python/tvm/relay/op/nn/nn.py
@@ -2045,7 +2045,7 @@ def batch_norm(
     result = _make.batch_norm(
         data, gamma, beta, moving_mean, moving_var, axis, epsilon, center, scale
     )
-    return expr.TupleWrapper(result, 3)
+    return expr.TupleWrapper(result, 5)
 
 
 def instance_norm(data, gamma, beta, axis=1, epsilon=1e-5, center=True, scale=True):
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index 5832b887458e..1db4537e5495 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -851,9 +851,8 @@ def batch_matmul_strategy(attrs, inputs, out_type, target):
 # batch_norm
 def wrap_compute_batch_norm(topi_compute):
     """wrap batch_norm topi compute"""
-
     def _compute_batch_norm(attrs, inputs, out_type):
-        return [topi_compute(*inputs, attrs.axis, attrs.epsilon, attrs.center, attrs.scale)]
+        return topi_compute(*inputs, attrs.axis, attrs.epsilon, attrs.center, attrs.scale)
 
     return _compute_batch_norm
 
diff --git a/python/tvm/topi/nn/batch_norm.py b/python/tvm/topi/nn/batch_norm.py
index 12db524dc0b2..27d68e2b4893 100644
--- a/python/tvm/topi/nn/batch_norm.py
+++ b/python/tvm/topi/nn/batch_norm.py
@@ -25,11 +25,13 @@ def batch_norm(
     data: te.Tensor,
     gamma: te.Tensor,
     beta: te.Tensor,
+    moving_mean: te.Tensor,
+    moving_var: te.Tensor,
     axis: typing.Optional[int] = None,
     epsilon: typing.Optional[float] = None,
     center: typing.Optional[bool] = None,
     scale: typing.Optional[bool] = None,
-):
+) -> typing.List[te.Tensor]:
     """Batch normalization layer (Ioffe and Szegedy, 2014).
 
     Normalizes the input at each batch, i.e. applies a transformation
@@ -47,6 +49,12 @@ def batch_norm(
     beta : tvm.te.Tensor
         Offset to be applied to the normalized tensor.
 
+    moving_mean : tvm.te.Tensor
+        Running mean of input.
+
+    moving_var : tvm.te.Tensor
+        Running variance of input.
+
     axis : int, optional, default=1
         Specify along which shape axis the normalization should occur.
 
@@ -63,8 +71,14 @@ def batch_norm(
 
     Returns
     -------
-    output : tvm.te.Tensor
+    output : list of tvm.te.Tensor
         Normalized data with same shape as input
+
+    moving_mean : tvm.te.Tensor
+        Running mean of input.
+
+    moving_var : tvm.te.Tensor
+        Running variance of input.
     """
     if axis is None:
         axis = 1
@@ -92,4 +106,10 @@ def batch_norm(
     if center:
         out = out + topi.reshape(beta, shape)
 
-    return out
+    moving_mean = moving_mean * 1 #+ mean * (1 - 1)
+    moving_var = moving_var * 1 #+ var * (1 - 1)
+
+    saved_mean = topi.reshape(mean, moving_mean.shape)
+    saved_var = topi.reshape(var, moving_var.shape)
+
+    return [out, moving_mean, moving_var, saved_mean, saved_var]
diff --git a/src/relay/op/nn/nn.cc b/src/relay/op/nn/nn.cc
index 574ecc0828dd..739338e56f4e 100644
--- a/src/relay/op/nn/nn.cc
+++ b/src/relay/op/nn/nn.cc
@@ -745,12 +745,15 @@ bool BatchNormRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
   reporter->Assign(types[4], TensorType({axis_size}, data->dtype));
 
   // output is a tuple of the normed data (same shape as input), new running mean,
-  // and new running average (the latter two are both vectors of length dim)
+  // new running variance, saved mean and saved variance (the latter are all
+  // vectors of length dim)
   std::vector<Type> fields;
   auto vec_ty = TensorType(Array<IndexExpr>({data->shape[axis]}), data->dtype);
   fields.push_back(TensorType(data->shape, data->dtype));
   fields.push_back(vec_ty);
   fields.push_back(vec_ty);
+  fields.push_back(vec_ty);
+  fields.push_back(vec_ty);
   reporter->Assign(types[5], TupleType(Array<Type>(fields)));
   return true;
 }

From 13fabe8c5aab725f6292f251b701d80e0bb42021 Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Tue, 7 Dec 2021 12:42:17 -0800
Subject: [PATCH 4/9] Use moving var/mean and update tests

---
 python/tvm/relay/frontend/onnx.py             |  5 +++-
 python/tvm/relay/op/nn/nn.py                  |  2 +-
 python/tvm/topi/nn/batch_norm.py              | 21 ++++++---------
 python/tvm/topi/testing/batch_norm.py         | 26 ++++++++++++++-----
 src/relay/op/nn/nn.cc                         |  2 --
 .../topi/python/test_topi_batch_norm.py       | 24 ++++++++++++-----
 6 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index ca0819a58fb6..261fe1df6ad2 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -474,7 +474,10 @@ def _impl_v1(cls, inputs, attr, params):
             op_name="batch_norm",
             ignores=["spatial", "is_test", "consumed_inputs", "momentum", "training_mode"],
         )(inputs, attr, params)
-        return out
+        # We only support test mode, so we return data, moving_mean, moving_var,
+        # and then moving_mean, and moving_var again as placeholders for
+        # the expected "saved_mean", "saved_var".
+        return _expr.TupleWrapper(_expr.Tuple((*out, out[1], out[2])), 5)
 
 
 class InstanceNorm(OnnxOpConverter):
diff --git a/python/tvm/relay/op/nn/nn.py b/python/tvm/relay/op/nn/nn.py
index 32647d63c2ec..c7b376ec3d64 100644
--- a/python/tvm/relay/op/nn/nn.py
+++ b/python/tvm/relay/op/nn/nn.py
@@ -2045,7 +2045,7 @@ def batch_norm(
     result = _make.batch_norm(
         data, gamma, beta, moving_mean, moving_var, axis, epsilon, center, scale
     )
-    return expr.TupleWrapper(result, 5)
+    return expr.TupleWrapper(result, 3)
 
 
 def instance_norm(data, gamma, beta, axis=1, epsilon=1e-5, center=True, scale=True):
diff --git a/python/tvm/topi/nn/batch_norm.py b/python/tvm/topi/nn/batch_norm.py
index 27d68e2b4893..1b4fad762568 100644
--- a/python/tvm/topi/nn/batch_norm.py
+++ b/python/tvm/topi/nn/batch_norm.py
@@ -92,24 +92,19 @@ def batch_norm(
     if scale is None:
         scale = True
 
-    mean = topi.reduction.sum(data, axis=axis, keepdims=True) / data.shape[axis]
-    var_summands = topi.broadcast.power(topi.broadcast.subtract(data, mean), 2.0)
-    var = topi.reduction.sum(var_summands, axis=axis, keepdims=True) / data.shape[axis]
-    std = topi.math.sqrt(var + epsilon)
-    out = (data - mean) / std
-
     shape = [1] * len(data.shape)
     shape[axis] = data.shape[axis]
 
+    moving_mean_rs = topi.reshape(moving_mean, shape)
+    moving_var_rs = topi.reshape(moving_var, shape)
+
+    out = (data - moving_mean_rs) / topi.math.sqrt(moving_var_rs + epsilon)
+
     if scale:
         out = out * topi.reshape(gamma, shape)
     if center:
         out = out + topi.reshape(beta, shape)
 
-    moving_mean = moving_mean * 1 #+ mean * (1 - 1)
-    moving_var = moving_var * 1 #+ var * (1 - 1)
-
-    saved_mean = topi.reshape(mean, moving_mean.shape)
-    saved_var = topi.reshape(var, moving_var.shape)
-
-    return [out, moving_mean, moving_var, saved_mean, saved_var]
+    # Moving mean and var aren't updated during test. To avoid
+    # placeholder reuse, we multiply by 1 and return them.
+    return [out, moving_mean * 1, moving_var * 1]
diff --git a/python/tvm/topi/testing/batch_norm.py b/python/tvm/topi/testing/batch_norm.py
index 4fca2cdac73e..0a79b6849d4e 100644
--- a/python/tvm/topi/testing/batch_norm.py
+++ b/python/tvm/topi/testing/batch_norm.py
@@ -22,6 +22,8 @@ def batch_norm(
     x: np.ndarray,
     gamma: np.ndarray,
     beta: np.ndarray,
+    moving_mean: np.ndarray,
+    moving_var: np.ndarray,
     axis: int,
     epsilon: float,
     center: bool,
@@ -40,6 +42,12 @@ def batch_norm(
     beta : np.ndarray
         Offset to be applied to the normalized tensor.
 
+    moving_mean : np.ndarray
+        Running mean of input.
+
+    moving_var : np.ndarray
+        Running variance of input.
+
     axis : int
         Specify along which shape axis the normalization should occur.
 
@@ -58,18 +66,24 @@ def batch_norm(
     -------
     output : np.ndarray
         Normalized data with same shape as input
-    """
-    mean = x.mean(axis=axis, keepdims=True)
-    var = x.var(axis=axis, keepdims=True)
-    std = np.sqrt(var + epsilon)
-    out = (x - mean) / std
 
+    moving_mean : np.ndarray
+        Running mean of input.
+
+    moving_var : np.ndarray
+        Running variance of input.
+    """
     shape = [1] * len(x.shape)
     shape[axis] = x.shape[axis]
 
+    moving_mean_rs = moving_mean.reshape(shape)
+    moving_var_rs = moving_var.reshape(shape)
+
+    out = (x - moving_mean_rs) / np.sqrt(moving_var_rs + epsilon)
+
     if scale:
         out = out * gamma.reshape(shape)
     if center:
         out = out + beta.reshape(shape)
 
-    return out
+    return [out, moving_mean, moving_var]
diff --git a/src/relay/op/nn/nn.cc b/src/relay/op/nn/nn.cc
index 739338e56f4e..89ef2708ff27 100644
--- a/src/relay/op/nn/nn.cc
+++ b/src/relay/op/nn/nn.cc
@@ -752,8 +752,6 @@ bool BatchNormRel(const Array<Type>& types, int num_inputs, const Attrs& attrs,
   fields.push_back(TensorType(data->shape, data->dtype));
   fields.push_back(vec_ty);
   fields.push_back(vec_ty);
-  fields.push_back(vec_ty);
-  fields.push_back(vec_ty);
   reporter->Assign(types[5], TupleType(Array<Type>(fields)));
   return true;
 }
diff --git a/tests/python/topi/python/test_topi_batch_norm.py b/tests/python/topi/python/test_topi_batch_norm.py
index 08ec7efb5d77..fbdd8261310b 100644
--- a/tests/python/topi/python/test_topi_batch_norm.py
+++ b/tests/python/topi/python/test_topi_batch_norm.py
@@ -46,31 +46,41 @@
 )
 def test_batch_norm(shape, axis, epsilon, center, scale):
     x_np = np.random.random(shape).astype("float32")
+    moving_mean_np = np.random.random(shape[axis]).astype("float32")
+    moving_var_np = np.random.random(shape[axis]).astype("float32")
     gamma_np = np.random.random(shape[axis]).astype("float32")
     beta_np = np.random.random(shape[axis]).astype("float32")
 
-    out_np = tvm.topi.testing.batch_norm(x_np, gamma_np, beta_np, axis, epsilon, center, scale)
+    out_x_np, out_moving_mean_np, out_moving_var_np = tvm.topi.testing.batch_norm(x_np, moving_mean_np, moving_var_np, gamma_np, beta_np, axis, epsilon, center, scale)
 
     x_te = te.placeholder(shape, name="x", dtype="float32")
+    moving_mean_te = te.placeholder((shape[axis],), name="moving_mean", dtype="float32")
+    moving_var_te = te.placeholder((shape[axis],), name="moving_var", dtype="float32")
     gamma_te = te.placeholder((shape[axis],), name="gamma", dtype="float32")
     beta_te = te.placeholder((shape[axis],), name="beta", dtype="float32")
 
     with tvm.target.Target(_DEVICE):
         fcompute, fschedule = tvm.topi.testing.dispatch(_DEVICE, _BATCH_NORM_IMPLEMENT)
-        out = fcompute(x_te, gamma_te, beta_te, axis, epsilon, center, scale)
-        s = fschedule([out])
+        out_x, out_moving_mean, out_moving_var = fcompute(x_te, moving_mean_te, moving_var_te, gamma_te, beta_te, axis, epsilon, center, scale)
+        s = fschedule([out_x, out_moving_mean, out_moving_var])
 
         dev = tvm.device(_DEVICE, 0)
 
         x_tvm = tvm.nd.array(x_np, dev)
+        moving_mean_tvm = tvm.nd.array(moving_mean_np, dev)
+        moving_var_tvm = tvm.nd.array(moving_var_np, dev)
         gamma_tvm = tvm.nd.array(gamma_np, dev)
         beta_tvm = tvm.nd.array(beta_np, dev)
-        out_tvm = tvm.nd.array(np.zeros(shape, dtype=out.dtype), dev)
+        out_x_tvm = tvm.nd.array(np.zeros(shape, dtype=out_x.dtype), dev)
+        out_moving_mean_tvm = tvm.nd.array(np.zeros((shape[axis],), dtype=out_moving_mean.dtype), dev)
+        out_moving_var_tvm = tvm.nd.array(np.zeros((shape[axis],), dtype=out_moving_var.dtype), dev)
 
-        f = tvm.build(s, [x_te, gamma_te, beta_te, out], _DEVICE)
-        f(x_tvm, gamma_tvm, beta_tvm, out_tvm)
+        f = tvm.build(s, [x_te, moving_mean_te, moving_var_te, gamma_te, beta_te, out_x, out_moving_mean, out_moving_var], _DEVICE)
+        f(x_tvm, moving_mean_tvm, moving_var_tvm, gamma_tvm, beta_tvm, out_x_tvm, out_moving_mean_tvm, out_moving_var_tvm)
 
-        tvm.testing.assert_allclose(out_tvm.numpy(), out_np, rtol=1e-3)
+        tvm.testing.assert_allclose(out_x_tvm.numpy(), out_x_np, rtol=1e-3)
+        tvm.testing.assert_allclose(out_moving_mean_tvm.numpy(), out_moving_mean_np, rtol=1e-3)
+        tvm.testing.assert_allclose(out_moving_var_tvm.numpy(), out_moving_var_np, rtol=1e-3)
 
 
 if __name__ == "__main__":

From 35678db98700973f68f7addd86340cb42f892de0 Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Thu, 9 Dec 2021 12:20:44 -0800
Subject: [PATCH 5/9] Add a test for batch norm folding

---
 tests/python/relay/test_op_level1.py | 47 ++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py
index 97406e7e0d48..3ecf6fd6e198 100644
--- a/tests/python/relay/test_op_level1.py
+++ b/tests/python/relay/test_op_level1.py
@@ -387,6 +387,7 @@ def test_batch_norm():
             )
         )
 
+        # axis=1
         beta = relay.var("beta", relay.TensorType((3,), dtype))
         gamma = relay.var("gamma", relay.TensorType((3,), dtype))
         moving_mean = relay.var("moving_mean", relay.TensorType((3,), dtype))
@@ -427,6 +428,52 @@ def test_batch_norm():
         )
 
 
+def test_batch_norm_fold_const():
+    axis = 1
+    dtype = "float32"
+    shape = [4, 5, 6]
+
+    data_np = np.random.random(shape).astype(dtype)
+    beta_np = np.random.random(shape[axis]).astype(dtype)
+    gamma_np = np.random.random(shape[axis]).astype(dtype)
+    moving_mean_np = np.random.random(shape[axis]).astype(dtype)
+    moving_var_np = np.random.random(shape[axis]).astype(dtype)
+
+    data = relay.var("data", relay.TensorType(shape, dtype))
+    beta = relay.var("beta", relay.TensorType((shape[1],), dtype))
+    gamma = relay.var("gamma", relay.TensorType((shape[1],), dtype))
+    moving_mean = relay.var("moving_mean", relay.TensorType((shape[1],), dtype))
+    moving_var = relay.var("moving_var", relay.TensorType((shape[1],), dtype))
+    out = relay.nn.batch_norm(data, gamma, beta, moving_mean, moving_var, axis=axis).astuple()
+    func = relay.Function([data, gamma, beta, moving_mean, moving_var], out)
+
+    out_const = relay.nn.batch_norm(
+        relay.const(data_np),
+        relay.const(gamma_np),
+        relay.const(beta_np),
+        relay.const(moving_mean_np),
+        relay.const(moving_var_np),
+        axis=axis,
+    ).astuple()
+    func_const = relay.Function([], out_const)
+
+    # Build the module with constants to have FoldConstant transform batch_norm.
+    mod_const = tvm.IRModule.from_expr(func_const)
+    lib_const = relay.build(mod_const, tvm.target.create("llvm"))
+    const_data_out = lib_const.params["p0"]
+    const_moving_mean_out = lib_const.params["p1"]
+    const_moving_var_out = lib_const.params["p2"]
+
+    # Run the Relay func without constants. This will use SimplyInference instead.
+    vm_data_out, vm_moving_mean_out, vm_moving_var_out = relay.create_executor(
+        "vm", device=tvm.device("llvm"), target="llvm"
+    ).evaluate(func)(data_np, gamma_np, beta_np, moving_mean_np, moving_var_np)
+
+    tvm.testing.assert_allclose(const_data_out.numpy(), vm_data_out.numpy())
+    tvm.testing.assert_allclose(const_moving_mean_out.numpy(), vm_moving_mean_out.numpy())
+    tvm.testing.assert_allclose(const_moving_var_out.numpy(), vm_moving_var_out.numpy())
+
+
 @pytest.mark.xfail
 def test_matmul_type_check():
     dtype = "float16"

From 9ca8c10d57cef979d43264ce5592f2003f8f2a77 Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Thu, 9 Dec 2021 12:28:12 -0800
Subject: [PATCH 6/9] Fix comment

---
 python/tvm/relay/frontend/onnx.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/tvm/relay/frontend/onnx.py b/python/tvm/relay/frontend/onnx.py
index 7cb5083856b5..dcdd4c5ce9a4 100644
--- a/python/tvm/relay/frontend/onnx.py
+++ b/python/tvm/relay/frontend/onnx.py
@@ -475,7 +475,7 @@ def _impl_v1(cls, inputs, attr, params):
             ignores=["spatial", "is_test", "consumed_inputs", "momentum", "training_mode"],
         )(inputs, attr, params)
         # We only support test mode, so we return data, moving_mean, moving_var,
-        # and then moving_mean, and moving_var again as placeholders for
+        # and then moving_mean and moving_var again as placeholders for
         # the expected "saved_mean", "saved_var".
         return _expr.TupleWrapper(_expr.Tuple((*out, out[1], out[2])), 5)
 

From edee344452121a55e6bc3e834d2fc36030188cdd Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Thu, 9 Dec 2021 12:30:37 -0800
Subject: [PATCH 7/9] Format with black

---
 python/tvm/relay/op/nn/_nn.py                 |  1 +
 python/tvm/relay/op/strategy/generic.py       |  1 +
 .../topi/python/test_topi_batch_norm.py       | 38 ++++++++++++++++---
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
index 37dda3b00566..fee160423115 100644
--- a/python/tvm/relay/op/nn/_nn.py
+++ b/python/tvm/relay/op/nn/_nn.py
@@ -156,6 +156,7 @@ def legalize_batch_matmul(attrs, inputs, types):
 reg.register_strategy("nn.batch_norm", strategy.batch_norm_strategy)
 reg.register_pattern("nn.batch_norm", reg.OpPattern.OUT_ELEMWISE_FUSABLE)
 
+
 # sparse_dense
 @reg.register_compute("nn.sparse_dense")
 def compute_sparse_dense(attrs, inputs, out_type):
diff --git a/python/tvm/relay/op/strategy/generic.py b/python/tvm/relay/op/strategy/generic.py
index 1db4537e5495..461e755f5212 100644
--- a/python/tvm/relay/op/strategy/generic.py
+++ b/python/tvm/relay/op/strategy/generic.py
@@ -851,6 +851,7 @@ def batch_matmul_strategy(attrs, inputs, out_type, target):
 # batch_norm
 def wrap_compute_batch_norm(topi_compute):
     """wrap batch_norm topi compute"""
+
     def _compute_batch_norm(attrs, inputs, out_type):
         return topi_compute(*inputs, attrs.axis, attrs.epsilon, attrs.center, attrs.scale)
 
diff --git a/tests/python/topi/python/test_topi_batch_norm.py b/tests/python/topi/python/test_topi_batch_norm.py
index fbdd8261310b..68b6aebaa762 100644
--- a/tests/python/topi/python/test_topi_batch_norm.py
+++ b/tests/python/topi/python/test_topi_batch_norm.py
@@ -51,7 +51,9 @@ def test_batch_norm(shape, axis, epsilon, center, scale):
     gamma_np = np.random.random(shape[axis]).astype("float32")
     beta_np = np.random.random(shape[axis]).astype("float32")
 
-    out_x_np, out_moving_mean_np, out_moving_var_np = tvm.topi.testing.batch_norm(x_np, moving_mean_np, moving_var_np, gamma_np, beta_np, axis, epsilon, center, scale)
+    out_x_np, out_moving_mean_np, out_moving_var_np = tvm.topi.testing.batch_norm(
+        x_np, moving_mean_np, moving_var_np, gamma_np, beta_np, axis, epsilon, center, scale
+    )
 
     x_te = te.placeholder(shape, name="x", dtype="float32")
     moving_mean_te = te.placeholder((shape[axis],), name="moving_mean", dtype="float32")
@@ -61,7 +63,9 @@ def test_batch_norm(shape, axis, epsilon, center, scale):
 
     with tvm.target.Target(_DEVICE):
         fcompute, fschedule = tvm.topi.testing.dispatch(_DEVICE, _BATCH_NORM_IMPLEMENT)
-        out_x, out_moving_mean, out_moving_var = fcompute(x_te, moving_mean_te, moving_var_te, gamma_te, beta_te, axis, epsilon, center, scale)
+        out_x, out_moving_mean, out_moving_var = fcompute(
+            x_te, moving_mean_te, moving_var_te, gamma_te, beta_te, axis, epsilon, center, scale
+        )
         s = fschedule([out_x, out_moving_mean, out_moving_var])
 
         dev = tvm.device(_DEVICE, 0)
@@ -72,11 +76,35 @@ def test_batch_norm(shape, axis, epsilon, center, scale):
         gamma_tvm = tvm.nd.array(gamma_np, dev)
         beta_tvm = tvm.nd.array(beta_np, dev)
         out_x_tvm = tvm.nd.array(np.zeros(shape, dtype=out_x.dtype), dev)
-        out_moving_mean_tvm = tvm.nd.array(np.zeros((shape[axis],), dtype=out_moving_mean.dtype), dev)
+        out_moving_mean_tvm = tvm.nd.array(
+            np.zeros((shape[axis],), dtype=out_moving_mean.dtype), dev
+        )
         out_moving_var_tvm = tvm.nd.array(np.zeros((shape[axis],), dtype=out_moving_var.dtype), dev)
 
-        f = tvm.build(s, [x_te, moving_mean_te, moving_var_te, gamma_te, beta_te, out_x, out_moving_mean, out_moving_var], _DEVICE)
-        f(x_tvm, moving_mean_tvm, moving_var_tvm, gamma_tvm, beta_tvm, out_x_tvm, out_moving_mean_tvm, out_moving_var_tvm)
+        f = tvm.build(
+            s,
+            [
+                x_te,
+                moving_mean_te,
+                moving_var_te,
+                gamma_te,
+                beta_te,
+                out_x,
+                out_moving_mean,
+                out_moving_var,
+            ],
+            _DEVICE,
+        )
+        f(
+            x_tvm,
+            moving_mean_tvm,
+            moving_var_tvm,
+            gamma_tvm,
+            beta_tvm,
+            out_x_tvm,
+            out_moving_mean_tvm,
+            out_moving_var_tvm,
+        )
 
         tvm.testing.assert_allclose(out_x_tvm.numpy(), out_x_np, rtol=1e-3)
         tvm.testing.assert_allclose(out_moving_mean_tvm.numpy(), out_moving_mean_np, rtol=1e-3)

From eeede9c47f2734825e7783697cda6bdf2cd14d32 Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Thu, 9 Dec 2021 19:59:42 -0800
Subject: [PATCH 8/9] Re-order test args to match interface

---
 .../topi/python/test_topi_batch_norm.py       | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/python/topi/python/test_topi_batch_norm.py b/tests/python/topi/python/test_topi_batch_norm.py
index 68b6aebaa762..202b6214bc7a 100644
--- a/tests/python/topi/python/test_topi_batch_norm.py
+++ b/tests/python/topi/python/test_topi_batch_norm.py
@@ -46,35 +46,35 @@
 )
 def test_batch_norm(shape, axis, epsilon, center, scale):
     x_np = np.random.random(shape).astype("float32")
-    moving_mean_np = np.random.random(shape[axis]).astype("float32")
-    moving_var_np = np.random.random(shape[axis]).astype("float32")
     gamma_np = np.random.random(shape[axis]).astype("float32")
     beta_np = np.random.random(shape[axis]).astype("float32")
+    moving_mean_np = np.random.random(shape[axis]).astype("float32")
+    moving_var_np = np.random.random(shape[axis]).astype("float32")
 
     out_x_np, out_moving_mean_np, out_moving_var_np = tvm.topi.testing.batch_norm(
-        x_np, moving_mean_np, moving_var_np, gamma_np, beta_np, axis, epsilon, center, scale
+        x_np, gamma_np, beta_np, moving_mean_np, moving_var_np, axis, epsilon, center, scale
     )
 
     x_te = te.placeholder(shape, name="x", dtype="float32")
-    moving_mean_te = te.placeholder((shape[axis],), name="moving_mean", dtype="float32")
-    moving_var_te = te.placeholder((shape[axis],), name="moving_var", dtype="float32")
     gamma_te = te.placeholder((shape[axis],), name="gamma", dtype="float32")
     beta_te = te.placeholder((shape[axis],), name="beta", dtype="float32")
+    moving_mean_te = te.placeholder((shape[axis],), name="moving_mean", dtype="float32")
+    moving_var_te = te.placeholder((shape[axis],), name="moving_var", dtype="float32")
 
     with tvm.target.Target(_DEVICE):
         fcompute, fschedule = tvm.topi.testing.dispatch(_DEVICE, _BATCH_NORM_IMPLEMENT)
         out_x, out_moving_mean, out_moving_var = fcompute(
-            x_te, moving_mean_te, moving_var_te, gamma_te, beta_te, axis, epsilon, center, scale
+            x_te, gamma_te, beta_te, moving_mean_te, moving_var_te, axis, epsilon, center, scale
         )
         s = fschedule([out_x, out_moving_mean, out_moving_var])
 
         dev = tvm.device(_DEVICE, 0)
 
         x_tvm = tvm.nd.array(x_np, dev)
-        moving_mean_tvm = tvm.nd.array(moving_mean_np, dev)
-        moving_var_tvm = tvm.nd.array(moving_var_np, dev)
         gamma_tvm = tvm.nd.array(gamma_np, dev)
         beta_tvm = tvm.nd.array(beta_np, dev)
+        moving_mean_tvm = tvm.nd.array(moving_mean_np, dev)
+        moving_var_tvm = tvm.nd.array(moving_var_np, dev)
         out_x_tvm = tvm.nd.array(np.zeros(shape, dtype=out_x.dtype), dev)
         out_moving_mean_tvm = tvm.nd.array(
             np.zeros((shape[axis],), dtype=out_moving_mean.dtype), dev
@@ -85,10 +85,10 @@ def test_batch_norm(shape, axis, epsilon, center, scale):
             s,
             [
                 x_te,
-                moving_mean_te,
-                moving_var_te,
                 gamma_te,
                 beta_te,
+                moving_mean_te,
+                moving_var_te,
                 out_x,
                 out_moving_mean,
                 out_moving_var,
@@ -97,10 +97,10 @@ def test_batch_norm(shape, axis, epsilon, center, scale):
         )
         f(
             x_tvm,
-            moving_mean_tvm,
-            moving_var_tvm,
             gamma_tvm,
             beta_tvm,
+            moving_mean_tvm,
+            moving_var_tvm,
             out_x_tvm,
             out_moving_mean_tvm,
             out_moving_var_tvm,

From 40fbaea68b0228765889451da429a5bbdeaf18d4 Mon Sep 17 00:00:00 2001
From: Michal Piszczek <imichaljp@gmail.com>
Date: Fri, 10 Dec 2021 14:26:14 -0800
Subject: [PATCH 9/9] Call fold constant manually

---
 tests/python/relay/test_op_level1.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/python/relay/test_op_level1.py b/tests/python/relay/test_op_level1.py
index 3ecf6fd6e198..2f3f86349a86 100644
--- a/tests/python/relay/test_op_level1.py
+++ b/tests/python/relay/test_op_level1.py
@@ -459,10 +459,11 @@ def test_batch_norm_fold_const():
 
     # Build the module with constants to have FoldConstant transform batch_norm.
     mod_const = tvm.IRModule.from_expr(func_const)
-    lib_const = relay.build(mod_const, tvm.target.create("llvm"))
-    const_data_out = lib_const.params["p0"]
-    const_moving_mean_out = lib_const.params["p1"]
-    const_moving_var_out = lib_const.params["p2"]
+    mod_const = relay.transform.FoldConstant()(mod_const)
+
+    const_data_out = mod_const["main"].body[0].data
+    const_moving_mean_out = mod_const["main"].body[1].data
+    const_moving_var_out = mod_const["main"].body[2].data
 
     # Run the Relay func without constants. This will use SimplyInference instead.
     vm_data_out, vm_moving_mean_out, vm_moving_var_out = relay.create_executor(