From 643d9912189bf2e8cbc9279a01d82d52d6691df9 Mon Sep 17 00:00:00 2001
From: Matthew Barrett <55580676+mbaret@users.noreply.github.com>
Date: Sat, 4 Dec 2021 07:35:19 +0000
Subject: [PATCH] [microNPU] Refactor codegen tests (#9623)

* [microNPU] Refactor codegen tests

Change-Id: I9c08520c9e03eb3fc32bd911b56c95981e851b4b

* Fix params

Change-Id: I8cea69ed3824c3a0417bb67abbabce460c17c4c6

* Remove prints

Change-Id: Iadf048e9590e724d73c2adac51bbe303de6f59a8

* Address review comments

Change-Id: I56d647d86e3d495abe38b13cca349a71ec81cf4d
---
 .../contrib/test_ethosu/test_codegen.py       | 787 ++++++------------
 1 file changed, 268 insertions(+), 519 deletions(-)

diff --git a/tests/python/contrib/test_ethosu/test_codegen.py b/tests/python/contrib/test_ethosu/test_codegen.py
index afd635d96cc6..42695db08342 100644
--- a/tests/python/contrib/test_ethosu/test_codegen.py
+++ b/tests/python/contrib/test_ethosu/test_codegen.py
@@ -24,7 +24,10 @@
 import tvm
 import tensorflow as tf
 from tvm import relay
+from tvm.relay.expr_functor import ExprMutator
+from tvm.relay.op.annotation import compiler_begin, compiler_end
 from tvm.relay.backend.contrib.ethosu import util
+from tvm.relay.backend.contrib.ethosu import preprocess
 from tvm.relay.op.contrib.ethosu import partition_for_ethosu
 from tests.python.relay.aot.aot_test_utils import generate_ref_data
 
@@ -166,89 +169,146 @@ def create_graph_activation(input_tensor_name, input_tensor_shape, input_tensor_
         infra.verify_source(compiled_models, accel_type)
 
 
-@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
-@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3), (1, 23, 32, 7)])
-@pytest.mark.parametrize(
-    "kernel_shape, activation",
-    [((3, 3), "relu"), ((1, 2), None)],
-)
-@pytest.mark.parametrize("padding", ["SAME", "VALID"])
-@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 2)), ((3, 2), (1, 1))])
-def test_tflite_depthwise_conv2d(
-    accel_type,
-    ifm_shape,
-    kernel_shape,
-    padding,
-    strides,
-    dilation,
-    activation,
+def _compare_ethosu_with_reference(
+    mod, input_data, output_data, accel_type, output_tolerance=0, print_cmm=False
 ):
-    dtype = "int8"
+    compiled_models = infra.build_source(
+        mod,
+        input_data,
+        output_data,
+        accel_type,
+        output_tolerance=output_tolerance,
+    )
 
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def depthwise_conv2d(self, x):
-                weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 1]
-                weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
-                # The input strides to the TensorFlow API needs to be of shape 1x4
-                tf_strides = [1, strides[0], strides[1], 1]
-                op = tf.nn.depthwise_conv2d(
-                    x, weight, strides=tf_strides, padding=padding, dilations=dilation
-                )
-                if activation:
-                    op = tf.nn.relu(op)
-                return op
+    # Assumes only two runtime.Modules are created -- i.e. single offload module
+    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
 
-        model = Model()
-        concrete_func = model.depthwise_conv2d.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
+    # Verify generated C source
+    if print_cmm:
+        get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
+        compilation_artifacts = get_artifacts(ethosu_module)
+        cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
+        infra.print_payload(cmms)
 
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
+    infra.verify_source(compiled_models, accel_type)
 
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
 
-    tflite_graph = create_tflite_graph()
+def _compare_tvm_with_tflite(
+    tf_func, shapes, accel_type, ranges=None, output_tolerance=0, print_cmm=False
+):
+    tensor_specs = [tf.TensorSpec(shape, dtype=tf.float32) for shape in shapes]
+    if not ranges:
+        ranges = [(0, 1) for _ in shapes]
+    concrete_func = tf_func.get_concrete_function(*tensor_specs)
+
+    # Convert the model
+    def representative_dataset():
+        for _ in range(100):
+            inputs = []
+            for i, shape in enumerate(shapes):
+                data = np.random.uniform(
+                    low=ranges[i][0], high=ranges[i][1], size=tuple(shape)
+                ).astype("float32")
+                inputs.append(data)
+
+            yield inputs
+
+    converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
+    converter.optimizations = [tf.lite.Optimize.DEFAULT]
+    converter.representative_dataset = representative_dataset
+    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
+    converter.inference_input_type = tf.int8
+    converter.inference_output_type = tf.int8
+    tflite_graph = converter.convert()
+
     tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
 
-    relay_module, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
+    relay_module, params = relay.frontend.from_tflite(tflite_model)
     mod = partition_for_ethosu(relay_module, params)
 
     # Generate reference data
     input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
 
-    compiled_models = infra.build_source(
+    _compare_ethosu_with_reference(
         mod,
         input_data,
         output_data,
         accel_type,
+        output_tolerance=output_tolerance,
+        print_cmm=print_cmm,
     )
 
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
 
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, accel_type)
+class EthosUAnnotator(ExprMutator):
+    """Annotate entire graph for Ethos-U offload"""
+
+    def __init__(self):
+        super(EthosUAnnotator, self).__init__()
+        self.compiler = "ethos-u"
+        self.last_call = True
+
+    def visit_call(self, call):
+        curr_last = self.last_call
+        self.last_call = False
+
+        params = []
+        for arg in call.args:
+            param = super().visit(arg)
+            if isinstance(param, relay.expr.Var):
+                param = compiler_begin(param, self.compiler)
+            params.append(param)
+
+        new_call = relay.Call(call.op, params, call.attrs)
+        if curr_last:
+            new_call = compiler_end(new_call, self.compiler)
+        return new_call
+
+    def visit_constant(self, constant):
+        new_constant = compiler_begin(constant, self.compiler)
+        return new_constant
+
+
+def _create_ethosu_partition(mod):
+    mod["main"] = EthosUAnnotator().visit(mod["main"])
+    mod = relay.transform.MergeCompilerRegions()(mod)
+    mod = relay.transform.InferType()(mod)
+    mod = relay.transform.PartitionGraph()(mod)
+    mod = relay.transform.InferType()(mod)
+    mod = preprocess.preprocess_ext_io()(mod)
+    return mod
+
+
+@pytest.mark.parametrize("accel_type", ACCEL_TYPES)
+@pytest.mark.parametrize("ifm_shape", [(1, 55, 55, 3), (1, 23, 32, 7)])
+@pytest.mark.parametrize(
+    "kernel_shape, activation_function",
+    [((3, 3), "RELU"), ((1, 2), "NONE")],
+)
+@pytest.mark.parametrize("padding", ["SAME", "VALID"])
+@pytest.mark.parametrize("strides, dilation", [((1, 1), (2, 2)), ((3, 2), (1, 1))])
+def test_tflite_depthwise_conv2d(
+    accel_type,
+    ifm_shape,
+    kernel_shape,
+    padding,
+    strides,
+    dilation,
+    activation_function,
+):
+    @tf.function
+    def depthwise_conv2d(x):
+        weight_shape = [kernel_shape[0], kernel_shape[1], ifm_shape[3], 1]
+        weight = tf.constant(np.random.uniform(size=weight_shape), dtype=tf.float32)
+        # The input strides to the TensorFlow API needs to be of shape 1x4
+        tf_strides = [1, strides[0], strides[1], 1]
+        op = tf.nn.depthwise_conv2d(
+            x, weight, strides=tf_strides, padding=padding, dilations=dilation
+        )
+        if activation_function:
+            op = tf.nn.relu(op)
+        return op
+
+    _compare_tvm_with_tflite(depthwise_conv2d, [ifm_shape], accel_type)
 
 
 @pytest.mark.parametrize(
@@ -270,69 +330,17 @@ def test_ethosu_pooling(
     activation_function,
     padding,
 ):
-    dtype = "int8"
+    @tf.function
+    def pooling(x):
+        if pooling_type == "MAX":
+            op = tf.nn.max_pool(x, pool_shape, strides, padding)
+        elif pooling_type == "AVG":
+            op = tf.nn.avg_pool(x, pool_shape, strides, padding)
+        if activation_function == "RELU":
+            op = tf.nn.relu(op)
+        return op
 
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, x):
-                if pooling_type == "MAX":
-                    op = tf.nn.max_pool(x, pool_shape, strides, padding)
-                elif pooling_type == "AVG":
-                    op = tf.nn.avg_pool(x, pool_shape, strides, padding)
-                if activation_function == "RELU":
-                    op = tf.nn.relu(op)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    relay_module, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"x": ifm_shape},
-        dtype_dict={"x": dtype},
-    )
-    mod = partition_for_ethosu(relay_module, params)
-
-    # Generate reference data
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
-
-    compiled_models = infra.build_source(
-        mod,
-        input_data,
-        output_data,
-        accel_type,
-    )
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, accel_type)
+    _compare_tvm_with_tflite(pooling, [ifm_shape], accel_type)
 
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
@@ -354,78 +362,30 @@ def test_ethosu_binary_elementwise(
     ifm2_shape,
     activation_function,
 ):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, lhs, rhs):
-                if operator_type == "ADD":
-                    op = tf.math.add(lhs, rhs)
-                elif operator_type == "SUB":
-                    op = tf.math.subtract(lhs, rhs)
-                elif operator_type == "MUL":
-                    op = tf.math.multiply(lhs, rhs)
-                elif operator_type == "MIN":
-                    op = tf.math.minimum(lhs, rhs)
-                elif operator_type == "MAX":
-                    op = tf.math.maximum(lhs, rhs)
-                if activation_function == "RELU":
-                    op = tf.nn.relu(op)
-                return op
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32), tf.TensorSpec(ifm2_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                data2 = np.random.rand(*tuple(ifm2_shape)) * 2
-                yield [data.astype(np.float32), data2.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"ifm": ifm_shape, "ifm2": ifm2_shape},
-        dtype_dict={"ifm": dtype, "ifm2": dtype},
-    )
-    mod = partition_for_ethosu(mod, params)
-
-    # Generate reference data
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
-
-    compiled_models = infra.build_source(
-        mod,
-        input_data,
-        output_data,
-        accel_type,
+    @tf.function
+    def binary_elementwise(lhs, rhs):
+        if operator_type == "ADD":
+            op = tf.math.add(lhs, rhs)
+        elif operator_type == "SUB":
+            op = tf.math.subtract(lhs, rhs)
+        elif operator_type == "MUL":
+            op = tf.math.multiply(lhs, rhs)
+        elif operator_type == "MIN":
+            op = tf.math.minimum(lhs, rhs)
+        elif operator_type == "MAX":
+            op = tf.math.maximum(lhs, rhs)
+        if activation_function == "RELU":
+            op = tf.nn.relu(op)
+        return op
+
+    _compare_tvm_with_tflite(
+        binary_elementwise,
+        shapes=[ifm_shape, ifm2_shape],
+        ranges=[(0, 1), (0, 2)],
+        accel_type=accel_type,
         output_tolerance=1 if operator_type == "MAX" else 0,
     )
 
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, accel_type)
-
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
 @pytest.mark.parametrize(
@@ -441,66 +401,17 @@ def test_binary_add_with_non_4d_shapes(
     ifm_shape,
     ifm2_shape,
 ):
-    dtype = "int8"
-
-    def create_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def tf_function(self, lhs, rhs):
-                return tf.math.add(lhs, rhs)
-
-        model = Model()
-        concrete_func = model.tf_function.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32), tf.TensorSpec(ifm2_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                data2 = np.random.rand(*tuple(ifm2_shape)) * 2
-                yield [data.astype(np.float32), data2.astype(np.float32)]
-
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    tflite_graph = create_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    mod, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"ifm": ifm_shape, "ifm2": ifm2_shape},
-        dtype_dict={"ifm": dtype, "ifm2": dtype},
-    )
-    mod = partition_for_ethosu(mod, params)
-
-    # Generate reference data
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
-
-    compiled_models = infra.build_source(
-        mod,
-        input_data,
-        output_data,
-        accel_type,
-        output_tolerance=0,
+    @tf.function
+    def binary_elementwise(lhs, rhs):
+        return tf.math.add(lhs, rhs)
+
+    _compare_tvm_with_tflite(
+        binary_elementwise,
+        shapes=[ifm_shape, ifm2_shape],
+        ranges=[(0, 1), (0, 2)],
+        accel_type=accel_type,
     )
 
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, accel_type)
-
 
 @pytest.mark.parametrize(
     "accel_type",
@@ -621,34 +532,19 @@ def create_relay_graph():
             relay.const(1.0, dtype="float32"),
             relay.const(0, dtype="int32"),
         )
-        func = relay.Function(relay.analysis.free_vars(add), add)
-        return tvm.IRModule.from_expr(func)
+        return tvm.IRModule.from_expr(relay.Function(relay.analysis.free_vars(add), add))
 
-    mod = create_relay_graph()
-    partitioned_mod = partition_for_ethosu(mod)
+    cpu_mod = create_relay_graph()
+    ethosu_mod = partition_for_ethosu(cpu_mod)
 
     # Generate reference data
     input_data = {"input": np.random.randint(low=0, high=255, size=ifm_shape, dtype=dtype)}
-    output_data = generate_ref_data(mod, input_data)
+    output_data = generate_ref_data(cpu_mod, input_data)
 
-    compiled_models = infra.build_source(
-        partitioned_mod,
-        input_data,
-        output_data,
-        accel_type,
-        output_tolerance=0,
+    _compare_ethosu_with_reference(
+        ethosu_mod, input_data, output_data, accel_type, output_tolerance=0
     )
 
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, accel_type)
-
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
 @pytest.mark.parametrize(
@@ -670,13 +566,9 @@ def create_model():
         ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
         ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
         c1 = relay.left_shift(ifm, ifm2)
-        f = relay.Function([ifm, ifm2], c1)
-        mod = tvm.IRModule()
-        mod["main"] = f
-        return mod
+        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], c1))
 
-    relay_mod = create_model()
-    mod = partition_for_ethosu(relay_mod)
+    cpu_mod = create_model()
 
     # Generate reference data
     in_min, in_max = util.get_range_for_dtype_str(dtype)
@@ -684,25 +576,13 @@ def create_model():
         "ifm": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype),
         "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype),
     }
-    output_data = generate_ref_data(relay_mod, input_data)
+    output_data = generate_ref_data(cpu_mod, input_data)
+    ethosu_mod = partition_for_ethosu(cpu_mod)
 
-    compiled_models = infra.build_source(
-        mod,
-        input_data,
-        output_data,
-        accel_type,
+    _compare_ethosu_with_reference(
+        ethosu_mod, input_data, output_data, accel_type, output_tolerance=0
     )
 
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, accel_type)
-
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
 @pytest.mark.parametrize(
@@ -719,50 +599,33 @@ def test_ethosu_right_shift_binary_elemwise(
     dtype = "int32"
 
     def create_model():
-        ifm_count = int(np.prod(ifm_shape))
-        ifm2_count = int(np.prod(ifm2_shape))
-
-        # Create a "partitioned" Relay function
-        ifms = relay.var("ifms", shape=[ifm_count + ifm2_count], dtype=dtype)
-        split = relay.split(ifms, [ifm_count])
-        ifm = relay.reshape(split[0], newshape=ifm_shape)
-        ifm2 = relay.reshape(split[1], newshape=ifm2_shape)
+        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
+        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
         shr_op = infra.make_ethosu_binary_elementwise(
             ifm, ifm2, ifm_shape[3], ifm2_shape[3], "SHR", ofm_dtype, reversed_operands
         )
+        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], shr_op))
+
+    def generate_output_data(input_data):
+        lhs = input_data["ifm"]
+        rhs = input_data["ifm2"]
+        if reversed_operands:
+            lhs = np.broadcast_to(lhs, ifm2_shape)
+            lhs, rhs = rhs, lhs
+        else:
+            rhs = np.broadcast_to(rhs, ifm_shape)
 
-        glb_ethosu = relay.GlobalVar("tvmgen_default_ethos_u_main_0")
-        func = (
-            relay.Function([ifms], shr_op)
-            .with_attr("Inline", 1)
-            .with_attr("Compiler", "ethos-u")
-            .with_attr("global_symbol", "tvmgen_default_ethos_u_main_0")
-            .with_attr("Primitive", 1)
-        )
-        mod = tvm.IRModule()
-        mod[glb_ethosu] = func
-        mod = relay.transform.InferType()(mod)
+        def rounding_right_shift(lhs, rhs):
+            r = 1 << (rhs - 1)
+            return (lhs + r) >> rhs
 
-        # Main
-        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
-        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
-        call = relay.Call(
-            glb_ethosu,
-            [
-                relay.concatenate(
-                    data=(
-                        relay.reshape(ifm, newshape=ifm_count),
-                        relay.reshape(ifm2, newshape=ifm2_count),
-                    ),
-                    axis=0,
-                )
-            ],
-        )
-        mod["main"] = relay.Function([ifm, ifm2], call)
-        mod = relay.transform.InferType()(mod)
-        return mod
+        return [
+            np.array([rounding_right_shift(x[0], x[1]) for x in zip(lhs.flat, rhs.flat)]).astype(
+                ofm_dtype
+            )
+        ]
 
-    mod = create_model()
+    cpu_mod = create_model()
 
     # Generate reference data
     in_min, in_max = util.get_range_for_dtype_str(dtype)
@@ -773,61 +636,39 @@ def create_model():
         "ifm": lhs,
         "ifm2": rhs,
     }
+    output_data = generate_output_data(input_data)
+    ethosu_mod = _create_ethosu_partition(cpu_mod)
 
-    if reversed_operands:
-        lhs = np.broadcast_to(lhs, ifm2_shape)
-        lhs, rhs = rhs, lhs
-    else:
-        rhs = np.broadcast_to(rhs, ifm_shape)
-
-    def rounding_right_shift(lhs, rhs):
-        r = 1 << (rhs - 1)
-        return (lhs + r) >> rhs
-
-    output_data = np.array(
-        [rounding_right_shift(x[0], x[1]) for x in zip(lhs.flat, rhs.flat)]
-    ).astype(ofm_dtype)
-
-    compiled_models = infra.build_source(mod, input_data, [output_data], accel_type)
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, accel_type)
+    _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
 
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
 @pytest.mark.parametrize("ifm_shape", [(3, 2), (1, 15, 11, 7), (3, 1, 12), (400,)])
 @pytest.mark.parametrize("ifm_scale, ifm_zp, ofm_scale, ofm_zp", [(1, 0, 1, 0), (0.015, 3, 0.2, 5)])
 def test_ethosu_identity_codegen(ifm_shape, ifm_scale, ifm_zp, ofm_scale, ofm_zp, accel_type):
-    # Create a "partitioned" Relay function
-    ifm0 = relay.var("ifm0", shape=ifm_shape, dtype="int8")
-    identity = infra.make_ethosu_identity(
-        ifm0, ifm_scale=ifm_scale, ifm_zero_point=ifm_zp, ofm_scale=ofm_scale, ofm_zero_point=ofm_zp
-    )
-    mod = infra.make_partitioned_function(identity)
-
-    in_data = np.random.randint(-120, high=120, size=ifm_shape, dtype="int8")
-    requant_data = (ifm_scale * (in_data - ifm_zp)) / ofm_scale + ofm_zp
-    out_data = np.round(np.clip(requant_data, -128, 127)).astype("int8")
+    def create_model():
+        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
+        identity = infra.make_ethosu_identity(
+            ifm,
+            ifm_scale=ifm_scale,
+            ifm_zero_point=ifm_zp,
+            ofm_scale=ofm_scale,
+            ofm_zero_point=ofm_zp,
+        )
+        return tvm.IRModule.from_expr(relay.Function([ifm], identity))
 
-    compiled_model = infra.build_source(
-        mod, {"ifm": in_data}, [out_data], accel_type, output_tolerance=1
-    )
+    def generate_output_data(input_data):
+        requant_data = (ifm_scale * (input_data["ifm"] - ifm_zp)) / ofm_scale + ofm_zp
+        return [np.round(np.clip(requant_data, -128, 127)).astype("int8")]
 
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_model[0].executor_factory.lib.imported_modules[0].imported_modules[0]
+    cpu_mod = create_model()
+    input_data = {"ifm": np.random.randint(-120, high=120, size=ifm_shape, dtype="int8")}
+    output_data = generate_output_data(input_data)
+    ethosu_mod = _create_ethosu_partition(cpu_mod)
 
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_model, accel_type)
+    _compare_ethosu_with_reference(
+        ethosu_mod, input_data, output_data, accel_type, output_tolerance=1
+    )
 
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
@@ -844,36 +685,17 @@ def test_ethosu_identity_codegen(ifm_shape, ifm_scale, ifm_zp, ofm_scale, ofm_zp
     ],
 )
 def test_relay_reshape_codegen(ifm_shape, new_shape, accel_type):
-    # Create a "partitioned" Relay graph
-    ifm0 = relay.var("ifm0", shape=ifm_shape, dtype="int8")
-    reshape = relay.op.reshape(ifm0, newshape=new_shape)
-    mod = infra.make_partitioned_function(reshape)
-
-    data = np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")
-
-    # Generate a reference output using Relay reshape that doesn't get offloaded
-    ref_mod = tvm.IRModule()
-    ref_mod["main"] = relay.Function([ifm0], reshape)
-    ref_mod = relay.transform.InferType()(ref_mod)
-
-    out_data = generate_ref_data(ref_mod, {"ifm0": data})
-
-    compiled_model = infra.build_source(
-        mod,
-        {"ifm": data},
-        out_data,
-        accel_type,
-    )
+    def create_model():
+        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
+        reshape = relay.op.reshape(ifm, newshape=new_shape)
+        return tvm.IRModule.from_expr(relay.Function([ifm], reshape))
 
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_model[0].executor_factory.lib.imported_modules[0].imported_modules[0]
+    cpu_mod = create_model()
+    input_data = {"ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")}
+    output_data = generate_ref_data(cpu_mod, input_data)
+    ethosu_mod = _create_ethosu_partition(cpu_mod)
 
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_model, accel_type)
+    _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
 
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
@@ -887,36 +709,17 @@ def test_relay_reshape_codegen(ifm_shape, new_shape, accel_type):
     ],
 )
 def test_relay_strided_slice_codegen(ifm_shape, begin, end, accel_type):
-    # Create a "partitioned" Relay graph
-    ifm0 = relay.var("ifm0", shape=ifm_shape, dtype="int8")
-    strided_slice = relay.op.strided_slice(ifm0, begin, end)
-    mod = infra.make_partitioned_function(strided_slice)
-
-    input_data = np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")
-
-    # Generate a reference output using Relay strided slice that doesn't get offloaded
-    ref_mod = tvm.IRModule()
-    ref_mod["main"] = relay.Function([ifm0], strided_slice)
-    ref_mod = relay.transform.InferType()(ref_mod)
-
-    out_data = generate_ref_data(ref_mod, {"ifm0": input_data})
-
-    compiled_model = infra.build_source(
-        mod,
-        {"ifm": input_data},
-        out_data,
-        accel_type,
-    )
+    def create_model():
+        ifm = relay.var("ifm", shape=ifm_shape, dtype="int8")
+        strided_slice = relay.op.strided_slice(ifm, begin, end)
+        return tvm.IRModule.from_expr(relay.Function([ifm], strided_slice))
 
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_model[0].executor_factory.lib.imported_modules[0].imported_modules[0]
+    cpu_mod = create_model()
+    input_data = {"ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")}
+    output_data = generate_ref_data(cpu_mod, input_data)
+    ethosu_mod = _create_ethosu_partition(cpu_mod)
 
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_model, accel_type)
+    _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
 
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
@@ -930,66 +733,13 @@ def test_ethosu_unary_elementwise(
     operator_type,
     ifm_shape,
 ):
-    dtype = "int8"
-
-    def get_tflite_graph():
-        class Model(tf.Module):
-            @tf.function
-            def abs_func(self, x):
-                if operator_type == "ABS":
-                    op = tf.math.abs(x)
-                return op
-
-        model = Model()
-
-        concrete_func = model.abs_func.get_concrete_function(
-            tf.TensorSpec(ifm_shape, dtype=tf.float32)
-        )
-
-        # Convert the model
-        def representative_dataset():
-            for _ in range(100):
-                data = np.random.rand(*tuple(ifm_shape))
-                yield [data.astype(np.float32) * 2 - 1]
+    @tf.function
+    def abs_func(x):
+        if operator_type == "ABS":
+            op = tf.math.abs(x)
+        return op
 
-        converter = tf.lite.TFLiteConverter.from_concrete_functions([concrete_func])
-        converter.optimizations = [tf.lite.Optimize.DEFAULT]
-        converter.representative_dataset = representative_dataset
-        converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
-        converter.inference_input_type = tf.int8
-        converter.inference_output_type = tf.int8
-        tflite_model = converter.convert()
-        return tflite_model
-
-    tflite_graph = get_tflite_graph()
-    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
-
-    relay_module, params = relay.frontend.from_tflite(
-        tflite_model,
-        shape_dict={"input": ifm_shape},
-        dtype_dict={"input": dtype},
-    )
-    mod = partition_for_ethosu(relay_module, params)
-
-    # Generate reference data
-    input_data, output_data = infra.generate_ref_data_tflite(tflite_graph)
-
-    compiled_models = infra.build_source(
-        mod,
-        input_data,
-        output_data,
-        accel_type,
-    )
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_models, accel_type)
+    _compare_tvm_with_tflite(abs_func, [ifm_shape], accel_type)
 
 
 def test_ethosu_section_name():
@@ -1046,33 +796,32 @@ def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtyp
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)
 def test_ethosu_clz(accel_type):
     ifm_shape = (1, 42, 5, 4)
-    # Create a "partitioned" Relay function
-    ifm0 = relay.var("ifm0", shape=ifm_shape, dtype="int32")
-    clz = infra.make_ethosu_unary_elementwise(ifm0, 4, "CLZ")
-    mod = infra.make_partitioned_function(clz)
-
-    in_data = np.random.randint(-500000, high=500000, size=ifm_shape, dtype="int32")
 
-    def clz_comp(n):
-        n_bin = np.binary_repr(n)
-        if n_bin[0] == "-":
-            return 0
-        else:
-            return 32 - len(n_bin)
-
-    out_data = np.array([clz_comp(i) for i in in_data.ravel()]).reshape(ifm_shape).astype("int32")
-
-    compiled_model = infra.build_source(mod, {"ifm": in_data}, [out_data], accel_type)
-
-    # Assumes only two runtime.Modules are created -- i.e. single offload module
-    ethosu_module = compiled_model[0].executor_factory.lib.imported_modules[0].imported_modules[0]
-
-    # Verify generated C source
-    get_artifacts = tvm._ffi.get_global_func("runtime.module.ethos-u.get_artifacts")
-    compilation_artifacts = get_artifacts(ethosu_module)
-    cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
-    infra.print_payload(cmms)
-    infra.verify_source(compiled_model, accel_type)
+    def create_model():
+        ifm = relay.var("ifm", shape=ifm_shape, dtype="int32")
+        clz = infra.make_ethosu_unary_elementwise(ifm, 4, "CLZ")
+        return tvm.IRModule.from_expr(relay.Function([ifm], clz))
+
+    def generate_output_data(input_data):
+        def clz_comp(n):
+            n_bin = np.binary_repr(n)
+            if n_bin[0] == "-":
+                return 0
+            else:
+                return 32 - len(n_bin)
+
+        return [
+            np.array([clz_comp(i) for i in input_data["ifm"].ravel()])
+            .reshape(ifm_shape)
+            .astype("int32")
+        ]
+
+    cpu_mod = create_model()
+    input_data = {"ifm": np.random.randint(-500000, high=500000, size=ifm_shape, dtype="int32")}
+    output_data = generate_output_data(input_data)
+    ethosu_mod = _create_ethosu_partition(cpu_mod)
+
+    _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
 
 
 @pytest.mark.parametrize("accel_type", ACCEL_TYPES)