diff --git a/python/tvm/contrib/ethosu/cascader/device_config.py b/python/tvm/contrib/ethosu/cascader/device_config.py
index 4670a238cf960..5abdb302234bc 100644
--- a/python/tvm/contrib/ethosu/cascader/device_config.py
+++ b/python/tvm/contrib/ethosu/cascader/device_config.py
@@ -439,6 +439,23 @@ def is_partkernel(
 
         return part_kernel_first_utilization > depth_first_utilization or ifm_channels <= 8
 
+    def _get_input_banks(self, input_block_shape, input_bytewidth):
+        input_bytes = input_block_shape.area() * self._align(
+            input_block_shape.depth * input_bytewidth, 8
+        )
+        input_banks = _round_up_div(input_bytes, self._bank_size_bytes) * 2
+        input_banks = _round_up(input_banks, self._input_granularity)
+
+        return input_banks
+
+    def _get_accumulator_banks(self, output_block_shape, acc_bytewidth, depth):
+        acc_depth = _round_up(min(output_block_shape.depth, depth), 8)
+        acc_bytes = output_block_shape.area() * self._align(acc_depth, 8) * acc_bytewidth
+        acc_banks = _round_up_div(acc_bytes, self._bank_size_bytes) * 2
+        acc_banks = _round_up(acc_banks, self._accumulator_granularity[acc_bytewidth])
+
+        return acc_banks
+
     def get_elementwise_block_config(
         self,
         ifm_propagator: Propagator,
@@ -533,16 +550,9 @@ def get_elementwise_block_config(
             input2_block.round_up(self._input_micro_block)
 
             # Banks required for input block
-            input_bytes = input_block.area() * self._align(input_block.depth * input_bytewidth, 8)
-            input_banks = _round_up_div(input_bytes, self._bank_size_bytes) * 2
-            input_banks = _round_up(input_banks, self._input_granularity)
-
+            input_banks = self._get_input_banks(input_block, input_bytewidth)
             # Banks required for input2 block
-            input2_bytes = input2_block.area() * self._align(
-                input2_block.depth * input_bytewidth, 8
-            )
-            input2_banks = _round_up_div(input2_bytes, self._bank_size_bytes) * 2
-            input2_banks = _round_up(input2_banks, self._input_granularity)
+            input2_banks = self._get_input_banks(input2_block, input_bytewidth)
 
             # Check whether or not both IFMs fit into SHRAM
             if (input_banks + input2_banks) <= banks_available:
@@ -561,6 +571,29 @@ def get_elementwise_block_config(
 
         return block_config
 
+    def _get_subkernel_propagator(
+        self, op_attrs, ifm_propagator, input_layout, output_layout, depth
+    ):
+        op_type = op_attrs.get("op")
+        stride_h = int(op_attrs.get("stride_h", 1))
+        stride_w = int(op_attrs.get("stride_w", 1))
+        transform = ifm_propagator.transform
+
+        if input_layout == "NHCWB16":
+            transform[1][-1] = min(transform[1][-1], self._subkernel_limits[0] - stride_h)
+            transform[3][-1] = min(transform[3][-1], self._subkernel_limits[1] - stride_w)
+        else:
+            transform[1][-1] = min(transform[1][-1], self._subkernel_limits[0] - stride_h)
+            transform[2][-1] = min(transform[2][-1], self._subkernel_limits[1] - stride_w)
+
+        if op_type in ("ethosu_pooling", "ethosu_depthwise_conv2d"):
+            if output_layout == "NHCWB16" and input_layout == "NHWC":
+                transform[3][-1] = depth
+            elif output_layout == "NHCWB16" and input_layout == "NHCWB16":
+                transform[2][-1] = depth // 16
+
+        return Propagator(transform, ifm_propagator.offset)
+
     def get_valid_block_configs(
         self,
         ifm_propagator: Propagator,
@@ -612,33 +645,13 @@ def get_valid_block_configs(
         op_type = op_attrs.get("op")
         op_str = op_attrs.get("op_str")
         activation = op_attrs.get("activation", "NONE")
-        stride_h = int(op_attrs.get("stride_h", 1))
-        stride_w = int(op_attrs.get("stride_w", 1))
         upscaling_factor = 1 if op_attrs.get("upscale", "NONE") == "NONE" else 2
 
-        subkernel_transform = ifm_propagator.transform
         if output_layout == "NHCWB16":
             output_shape = _Shape([1, ofm_shape[1], ofm_shape[3], ofm_channels])
         else:
             output_shape = _Shape(ofm_shape)
 
-        if input_layout == "NHCWB16":
-            subkernel_transform[1][-1] = min(
-                subkernel_transform[1][-1], self._subkernel_limits[0] - stride_h
-            )
-            subkernel_transform[3][-1] = min(
-                subkernel_transform[3][-1], self._subkernel_limits[1] - stride_w
-            )
-        else:
-            subkernel_transform[1][-1] = min(
-                subkernel_transform[1][-1], self._subkernel_limits[0] - stride_h
-            )
-            subkernel_transform[2][-1] = min(
-                subkernel_transform[2][-1], self._subkernel_limits[1] - stride_w
-            )
-
-        subkernel_propagator = Propagator(subkernel_transform, ifm_propagator.offset)
-
         # Define search space
         max_height = min(output_shape.height, self._max_block_shape.height)
         min_height = max(self._micro_block.height, upscaling_factor)
@@ -655,7 +668,7 @@ def get_valid_block_configs(
         if activation == "LUT" and not self._lut_reserved:
             banks_available -= 2
 
-        # Input block depth has additional limitations for Operators that require full input depth
+        # Input block depth has additional limitations for operators that require full input depth
         input_block_depth = 0
         is_partkernel = self.is_partkernel(op_type, ifm_channels, ifm_dtype, kernel_h * kernel_w)
         if op_type == "ethosu_conv2d":
@@ -669,6 +682,10 @@ def get_valid_block_configs(
                 # Block depth has to be less than full depth or a multiple of the split depth
                 continue
 
+            subkernel_propagator = self._get_subkernel_propagator(
+                op_attrs, ifm_propagator, input_layout, output_layout, depth
+            )
+
             for width in range(min_width, max_width + min_width, min_width):
                 for height in range(min_height, max_height + min_height, min_height):
                     if output_layout == "NHCWB16":
@@ -709,19 +726,11 @@ def get_valid_block_configs(
                         input_block_shape.depth = input_block_depth
 
                     # Banks required for input block
-                    input_bytes = input_block_shape.area() * self._align(
-                        input_block_shape.depth * input_bytewidth, 8
-                    )
-                    input_banks = _round_up_div(input_bytes, self._bank_size_bytes) * 2
-                    input_banks = _round_up(input_banks, self._input_granularity)
-
+                    input_banks = self._get_input_banks(input_block_shape, input_bytewidth)
                     # Banks required for accumulation
-                    acc_depth = _round_up(min(output_block_shape.depth, ofm_channels), 8)
-                    acc_bytes = (
-                        output_block_shape.area() * self._align(acc_depth, 8) * acc_bytewidth
+                    acc_banks = self._get_accumulator_banks(
+                        output_block_shape, acc_bytewidth, depth
                     )
-                    acc_banks = _round_up_div(acc_bytes, self._bank_size_bytes) * 2
-                    acc_banks = _round_up(acc_banks, self._accumulator_granularity[acc_bytewidth])
 
                     if (input_banks + acc_banks) <= banks_available:
                         output_cycles = self._get_output_cycles(
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py
index 9581256303242..9e665009864d6 100644
--- a/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py
+++ b/python/tvm/relay/backend/contrib/ethosu/te/binary_elementwise.py
@@ -22,6 +22,7 @@
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
 from .dma import dma_ofm_compute, dma_ifm_compute
+from .common import get_layout_transform_matrices
 
 
 def binary_elementwise_compute(
@@ -196,21 +197,8 @@ def binary_elementwise_compute(
             attrs=binary_elementwise_attrs,
         )
 
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
+    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(int(ifm_channels))
+
     ifm_matrix = [
         [1, 0, 0, 0, 0],
         [0, 1, 0, 0, 0],
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/common.py b/python/tvm/relay/backend/contrib/ethosu/te/common.py
new file mode 100644
index 0000000000000..aac060308efcd
--- /dev/null
+++ b/python/tvm/relay/backend/contrib/ethosu/te/common.py
@@ -0,0 +1,60 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Common methods for the NPU tensor expressions"""
+
+from typing import Tuple, List
+
+
+def get_layout_transform_matrices(ofm_channels: int) -> Tuple[List[List[float]], List[List[float]]]:
+    """Get the NHWC->NHCWB16 and NHCWB16->NHWC layout transform matrices.
+    For information about the supported layouts see https://developer.arm.com/documentation/102420/
+    0200/Functional-description/Control-and-data-flow/Supported-memory-formats-for-feature-maps
+
+    Parameters
+    ----------
+    ofm_channels : int
+        The number of output channels in a NHWC layout
+
+    Returns
+    -------
+    nhwc_to_nhcwb16, nhcwb16_to_nhwc : Tuple[List[List[float]], List[List[float]]]
+        The layout transformation matrices
+    """
+
+    # The value of the last dimension (B16) is always 16.
+    nhwc_to_nhcwb16 = [
+        [1, 0, 0, 0, 0],
+        [0, 1, 0, 0, 0],
+        [0, 0, 0, 1 / 16, 0],
+        [0, 0, 1, 0, 0],
+        [0, 0, 0, 0, 16],
+        [0, 0, 0, 0, 1],
+    ]
+
+    # When we convert from NHWC to NHCWB16, the new C value is given by
+    # (ofm_channels - 1) // 16 + 1, which is a lossy operation, so we need to use
+    # the actual value of channels in the transform matrix to accurately recover
+    # the C in NHWC when we convert from NHCWB16 to NHWC.
+    nhcwb16_to_nhwc = [
+        [1, 0, 0, 0, 0, 0],
+        [0, 1, 0, 0, 0, 0],
+        [0, 0, 0, 1, 0, 0],
+        [0, 0, 0, 0, 0, ofm_channels],
+        [0, 0, 0, 0, 0, 1],
+    ]
+
+    return nhwc_to_nhcwb16, nhcwb16_to_nhwc
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
index 77bc5a300cbe4..e309ab5a2af4d 100644
--- a/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
+++ b/python/tvm/relay/backend/contrib/ethosu/te/convolution.py
@@ -23,6 +23,7 @@
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
 from .dma import dma_ofm_compute, dma_ifm_compute
+from .common import get_layout_transform_matrices
 
 
 def conv2d_compute(
@@ -175,21 +176,8 @@ def conv2d_compute(
         attrs=conv2d_attrs,
     )
 
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
+    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(ofm_channels)
+
     ifm_matrix = [
         [1, 0, 0, 0, 0],
         [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py b/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
index 79d4f05f9cf26..03ce0e5349640 100644
--- a/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
+++ b/python/tvm/relay/backend/contrib/ethosu/te/depthwise.py
@@ -23,6 +23,7 @@
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
 from .dma import dma_ofm_compute, dma_ifm_compute
+from .common import get_layout_transform_matrices
 
 
 def depthwise_conv2d_compute(
@@ -169,21 +170,8 @@ def depthwise_conv2d_compute(
         attrs=depthwise_conv2d_attrs,
     )
 
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
+    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(channels)
+
     ifm_matrix = [
         [1, 0, 0, 0, 0],
         [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/pooling.py b/python/tvm/relay/backend/contrib/ethosu/te/pooling.py
index f1b065cbcf17f..8c20ea7165265 100644
--- a/python/tvm/relay/backend/contrib/ethosu/te/pooling.py
+++ b/python/tvm/relay/backend/contrib/ethosu/te/pooling.py
@@ -23,6 +23,7 @@
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 
 from .dma import dma_ofm_compute, dma_ifm_compute
+from .common import get_layout_transform_matrices
 
 
 def pooling_compute(
@@ -157,21 +158,8 @@ def pooling_compute(
         attrs=pooling_attrs,
     )
 
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
+    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(int(ofm_channels))
+
     ifm_matrix = [
         [1, 0, 0, 0, 0],
         [0, stride_h, 0, 0, (pool_shape_h - stride_h)],
diff --git a/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py b/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py
index 69f06be955cba..50bbd36d98002 100644
--- a/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py
+++ b/python/tvm/relay/backend/contrib/ethosu/te/unary_elementwise.py
@@ -21,6 +21,7 @@
 from tvm import te
 from tvm.contrib.ethosu.cascader import TESubgraph, EthosuPart, Propagator, register_matcher
 from .dma import dma_ofm_compute, dma_ifm_compute
+from .common import get_layout_transform_matrices
 
 
 def unary_elementwise_compute(
@@ -129,21 +130,8 @@ def clz_imp(inp):
         attrs=unary_elementwise_attrs,
     )
 
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
+    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(int(ofm_channels))
+
     ifm_matrix = [
         [1, 0, 0, 0, 0],
         [0, 1, 0, 0, 0],
diff --git a/src/contrib/ethosu/cascader/block_config.cc b/src/contrib/ethosu/cascader/block_config.cc
index afa65de013569..667d2e1ebefb3 100644
--- a/src/contrib/ethosu/cascader/block_config.cc
+++ b/src/contrib/ethosu/cascader/block_config.cc
@@ -37,6 +37,8 @@ void BlockConfigNode::VisitAttrs(AttrVisitor* v) {
   v->Visit("_input_shape", &tmp_arr);
   tmp_arr = make_array(output_shape_);
   v->Visit("_output_shape", &tmp_arr);
+  v->Visit("_compute_cycles", &compute_cycles_);
+  v->Visit("_output_cycles", &output_cycles_);
 }
 
 BlockConfig::BlockConfig(const std::vector<int>& input_shape, const std::vector<int>& output_shape,
diff --git a/tests/python/contrib/test_ethosu/cascader/infra.py b/tests/python/contrib/test_ethosu/cascader/infra.py
index aa681c41f2108..614fed97a0a54 100644
--- a/tests/python/contrib/test_ethosu/cascader/infra.py
+++ b/tests/python/contrib/test_ethosu/cascader/infra.py
@@ -55,6 +55,7 @@ def make_simple_home_map(graph, var_region, const_region):
 
 if ethosu_enabled:
     from tvm.relay.backend.contrib.ethosu.tir.compiler import extract_constants, lower_to_te
+    from tvm.relay.backend.contrib.ethosu.te.common import get_layout_transform_matrices
 
     def create_te_graph(func):
         func, consts = extract_constants(func)
@@ -64,28 +65,24 @@ def create_te_graph(func):
         return te_graph, consts
 
     def make_matrices(
-        op_type, kernel, stride, padding, ifm_layout, ofm_layout, dilation=(1, 1), ifm_channels=1
+        op_type,
+        kernel,
+        stride,
+        padding,
+        ifm_layout,
+        ofm_layout,
+        dilation=(1, 1),
+        ifm_channels=1,
+        ofm_channels=1,
     ):
         kernel_h, kernel_w = kernel
         stride_h, stride_w = stride
         dilation_h, dilation_w = dilation
         dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
         dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
-        nhwc_to_nhcwb16 = [
-            [1, 0, 0, 0, 0],
-            [0, 1, 0, 0, 0],
-            [0, 0, 0, 1 / 16, 0],
-            [0, 0, 1, 0, 0],
-            [0, 0, 0, 0, 16],
-            [0, 0, 0, 0, 1],
-        ]
-        nhcwb16_to_nhwc = [
-            [1, 0, 0, 0, 0, 0],
-            [0, 1, 0, 0, 0, 0],
-            [0, 0, 0, 1, 0, 0],
-            [0, 0, 16, 0, 1, -16],
-            [0, 0, 0, 0, 0, 1],
-        ]
+
+        nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(ofm_channels)
+
         if op_type == "ethosu_conv2d":
             ifm_matrix = [
                 [1, 0, 0, 0, 0],
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_binary_elementwise_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_binary_elementwise_matcher.py
index bb1be7b8e251d..062e5ba0fafd5 100644
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_binary_elementwise_matcher.py
+++ b/tests/python/contrib/test_ethosu/cascader/test_ethosu_binary_elementwise_matcher.py
@@ -27,25 +27,12 @@
     match_ethosu_binary_elementwise,
     binary_elementwise_compute,
 )
+from tvm.relay.backend.contrib.ethosu.te.common import get_layout_transform_matrices
 
 
-def _make_matrices(broadcast, ifm_layout, ifm2_layout, ofm_layout):
+def _make_matrices(broadcast, ifm_layout, ifm2_layout, ofm_layout, ofm_channels):
     broadcast_h, broadcast_w, broadcast_c = broadcast
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
+    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(ofm_channels)
     ifm_matrix = [
         [1, 0, 0, 0, 0],
         [0, 1, 0, 0, 0],
@@ -93,14 +80,8 @@ def test_ethosu_binary_elementwise_matcher(
     ifm2_shape = [1] + [1 if (b == 1) else a for a, b in zip(ofm_shape[1:], ifm2_broadcast)]
     ifm_channels = ifm_shape[3]
     ifm2_channels = ifm2_shape[3]
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
+    ofm_channels = ofm_shape[3]
+    nhwc_to_nhcwb16, _ = get_layout_transform_matrices(ofm_channels)
     broadcast = [1 if a == 1 else 0 for a in ifm2_shape[1:]]
     if ifm_layout == "NHCWB16":
         ifm_shape = [
@@ -173,10 +154,7 @@ def test_ethosu_binary_elementwise_matcher(
     output_stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset)
 
     (ifm_transform, ifm2_transform) = _make_matrices(
-        broadcast,
-        ifm_layout,
-        ifm2_layout,
-        ofm_layout,
+        broadcast, ifm_layout, ifm2_layout, ofm_layout, ofm_channels
     )
 
     device_config = cs.EthosuDeviceConfig("ethos-u55-256")
@@ -190,19 +168,10 @@ def test_ethosu_binary_elementwise_matcher(
     propagated_ifm = ifm_propagator.propagate(output_stripe_config).shape
     propagated_ifm2 = ifm2_propagator.propagate(output_stripe_config).shape
 
-    # Layout conversions will align the propagated IFMs to the brick, i.e. 16
-    # so the expected ifm(2)_shape needs to be rounded up to 16
-    if ifm_layout != ofm_layout:
-        assert ifm_shape[:-1] == propagated_ifm[:-1]
-        assert ((ifm_shape[-1] + 16 - 1) // 16) * 16 == propagated_ifm[-1]
-    else:
-        assert ifm_shape == propagated_ifm
-
-    if ifm2_layout != ofm_layout:
-        assert ifm2_shape[:-1] == propagated_ifm2[:-1]
-        assert ((ifm2_shape[-1] + 16 - 1) // 16) * 16 == propagated_ifm2[-1]
-    else:
-        assert ifm2_shape == propagated_ifm2
+    # The layout transforms that have the exact number of output channels in them
+    # will lose no information about the number of channels
+    assert ifm_shape == propagated_ifm
+    assert ifm2_shape == propagated_ifm2
 
 
 if __name__ == "__main__":
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_block_config.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_block_config.py
index 18f15f9257dbf..09fd056ce794c 100644
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_block_config.py
+++ b/tests/python/contrib/test_ethosu/cascader/test_ethosu_block_config.py
@@ -22,6 +22,7 @@
 import math
 
 import tvm.contrib.ethosu.cascader as cs
+from tvm.relay.backend.contrib.ethosu.te.common import get_layout_transform_matrices
 
 from .infra import make_matrices
 
@@ -164,7 +165,7 @@
                 ((1, 6, 5, 16), (1, 6, 1, 5, 16)),
                 ((1, 4, 4, 16), (1, 4, 1, 4, 16)),
                 ((1, 8, 4, 16), (1, 8, 1, 4, 16)),
-                ((1, 10, 6, 4), (1, 5, 1, 12, 4), (1, 16, 1, 4, 4)),
+                ((1, 10, 6, 4), (1, 5, 1, 12, 4), (1, 10, 1, 6, 4)),
                 ((1, 6, 5, 16), (1, 6, 1, 5, 16)),
                 # Depthwise Conv2D
                 ((1, 6, 10, 16), (1, 6, 1, 10, 16)),
@@ -182,7 +183,7 @@
                 ((1, 6, 5, 16), (1, 6, 1, 5, 16)),
                 ((1, 4, 4, 16), (1, 4, 1, 4, 16)),
                 ((1, 8, 4, 16), (1, 8, 1, 4, 16)),
-                ((1, 10, 6, 8), (1, 16, 1, 4, 8)),
+                ((1, 10, 6, 8), (1, 10, 1, 6, 8)),
                 ((1, 6, 5, 16), (1, 6, 1, 5, 16)),
                 # Depthwise Conv2D
                 ((1, 6, 10, 16), (1, 6, 1, 10, 16)),
@@ -244,28 +245,23 @@ def test_best_block_config(
     acc_config,
     expected_block_configs,
 ):
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
-    ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices(
-        op_type, kernel, stride, padding, layouts[0], layouts[1], dilation, in_shape[3]
-    )
-
     ofm_channels = out_shape[3]
     ifm_channels = in_shape[3]
 
+    nhwc_to_nhcwb16, _ = get_layout_transform_matrices(ofm_channels)
+
+    ifm_matrix, ifm_offset, weight_matrix, weight_offset, _, _ = make_matrices(
+        op_type,
+        kernel,
+        stride,
+        padding,
+        layouts[0],
+        layouts[1],
+        dilation,
+        ifm_channels,
+        ofm_channels,
+    )
+
     if layouts[0] == "NHCWB16":
         in_shape = [
             int(math.ceil(n)) for n in np.matmul(nhwc_to_nhcwb16, in_shape + (1,)).tolist()[:-1]
@@ -321,9 +317,12 @@ def test_best_block_config(
     # Add tensors
     input_tensor = cs.Tensor(in_shape, "int8")
     part.set_input(0, input_tensor)
-    if op_type in ("ethosu_conv2d", "ethosu_depthwise_conv2d"):
+    if op_type == "ethosu_conv2d":
         weight_tensor = cs.Tensor([ofm_channels, kernel[0], kernel[1], ifm_channels], "int8")
         part.set_input(1, weight_tensor)
+    elif op_type == "ethosu_depthwise_conv2d":
+        weight_tensor = cs.Tensor([ofm_channels, kernel[0], kernel[1], 1], "int8")
+        part.set_input(1, weight_tensor)
 
     output_tensor = cs.Tensor(out_shape, "int8")
     part.set_output(output_tensor)
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py
index 5bd2be49f6204..17b41cbaf511e 100644
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py
+++ b/tests/python/contrib/test_ethosu/cascader/test_ethosu_conv2d_matcher.py
@@ -82,6 +82,7 @@ def test_ethosu_conv2d_matcher(
         ofm_layout,
         dilation,
         ifm_channels,
+        ofm_channels,
     )
 
     device_config = cs.EthosuDeviceConfig("ethos-u55-256")
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_depthwise2d_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_depthwise2d_matcher.py
index c2c45b6524f1b..1e6b6d58b24af 100644
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_depthwise2d_matcher.py
+++ b/tests/python/contrib/test_ethosu/cascader/test_ethosu_depthwise2d_matcher.py
@@ -83,6 +83,7 @@ def test_ethosu_depthwise2d_matcher(kernel, stride, dilation, padding, ifm_layou
         ifm_layout,
         ofm_layout,
         dilation,
+        ofm_channels=ofm_channels,
     )
 
     device_config = cs.EthosuDeviceConfig("ethos-u55-256")
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_pooling_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_pooling_matcher.py
index 6ce8ee9a2986d..b998ddaf70457 100644
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_pooling_matcher.py
+++ b/tests/python/contrib/test_ethosu/cascader/test_ethosu_pooling_matcher.py
@@ -66,6 +66,7 @@ def test_ethosu_pooling_matcher(pool_shape, stride, padding, ifm_layout, ofm_lay
         padding,
         ifm_layout,
         ofm_layout,
+        ofm_channels=ofm_channels,
     )
 
     device_config = cs.EthosuDeviceConfig("ethos-u55-256")
diff --git a/tests/python/contrib/test_ethosu/cascader/test_ethosu_unary_elementwise_matcher.py b/tests/python/contrib/test_ethosu/cascader/test_ethosu_unary_elementwise_matcher.py
index 0570524e09073..8139f1518f56e 100644
--- a/tests/python/contrib/test_ethosu/cascader/test_ethosu_unary_elementwise_matcher.py
+++ b/tests/python/contrib/test_ethosu/cascader/test_ethosu_unary_elementwise_matcher.py
@@ -27,24 +27,11 @@
     match_ethosu_unary_elementwise,
     unary_elementwise_compute,
 )
+from tvm.relay.backend.contrib.ethosu.te.common import get_layout_transform_matrices
 
 
-def _make_matrices(ifm_layout, ofm_layout):
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
-    nhcwb16_to_nhwc = [
-        [1, 0, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0, 0],
-        [0, 0, 0, 1, 0, 0],
-        [0, 0, 16, 0, 1, -16],
-        [0, 0, 0, 0, 0, 1],
-    ]
+def _make_matrices(ifm_layout, ofm_layout, ofm_channels):
+    nhwc_to_nhcwb16, nhcwb16_to_nhwc = get_layout_transform_matrices(ofm_channels)
     ifm_matrix = [
         [1, 0, 0, 0, 0],
         [0, 1, 0, 0, 0],
@@ -76,14 +63,7 @@ def _make_matrices(ifm_layout, ofm_layout):
 def test_ethosu_unary_elementwise_matcher(ofm_shape, ifm_layout, ofm_layout, op_type):
     ifm_shape = ofm_shape.copy()
     ofm_channels = ofm_shape[3]
-    nhwc_to_nhcwb16 = [
-        [1, 0, 0, 0, 0],
-        [0, 1, 0, 0, 0],
-        [0, 0, 0, 1 / 16, 0],
-        [0, 0, 1, 0, 0],
-        [0, 0, 0, 0, 16],
-        [0, 0, 0, 0, 1],
-    ]
+    nhwc_to_nhcwb16, _ = get_layout_transform_matrices(ofm_channels)
     if ifm_layout == "NHCWB16":
         ifm_shape = [
             int(math.ceil(n))
@@ -134,7 +114,7 @@ def test_ethosu_unary_elementwise_matcher(ofm_shape, ifm_layout, ofm_layout, op_
     stripes = [0] * len(ofm_shape)
     output_stripe_config = cs.StripeConfig(ofm_shape, ofm_shape, ofm_shape, order, stripes, offset)
 
-    ifm_transform = _make_matrices(ifm_layout, ofm_layout)
+    ifm_transform = _make_matrices(ifm_layout, ofm_layout, ofm_channels)
 
     device_config = cs.EthosuDeviceConfig("ethos-u55-256")
     part = match_ethosu_unary_elementwise(out, device_config)
@@ -145,13 +125,9 @@ def test_ethosu_unary_elementwise_matcher(ofm_shape, ifm_layout, ofm_layout, op_
 
     propagated_ifm = ifm_propagator.propagate(output_stripe_config).shape
 
-    # Layout conversions will align the propagated IFMs to the brick, i.e. 16
-    # so the expected ifm_shape needs to be rounded up to 16
-    if ifm_layout != ofm_layout:
-        assert ifm_shape[:-1] == propagated_ifm[:-1]
-        assert ((ifm_shape[-1] + 16 - 1) // 16) * 16 == propagated_ifm[-1]
-    else:
-        assert ifm_shape == propagated_ifm
+    # The layout transforms that have the exact number of output channels in them
+    # will lose no information about the number of channels
+    assert ifm_shape == propagated_ifm
 
 
 if __name__ == "__main__":