From b8d6641f83abd6ef730abe39eb6dd5cefa9ccd82 Mon Sep 17 00:00:00 2001 From: Aleksei-grovety <113356454+Aleksei-grovety@users.noreply.github.com> Date: Thu, 15 Jun 2023 15:34:45 +0400 Subject: [PATCH 1/2] [microNPU][ETHOSU] Fix minimum buffer size Fix minimum buffer size for DMA operations according to alignment. --- .../relay/backend/contrib/ethosu/tir_to_cs_translator.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py b/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py index ee69f6c17e72..e88f9047ddc5 100644 --- a/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py +++ b/python/tvm/relay/backend/contrib/ethosu/tir_to_cs_translator.py @@ -926,17 +926,20 @@ def _create_npu_dma_op(serial_copy): """This is a helper function to capture the list of arguments to create a NpuDmaOperation object""" data_type_bytes = np.iinfo(np.dtype(serial_copy.read_address.dtype)).bits // 8 + length = int(serial_copy.length.value) * data_type_bytes + # The buffer size in bytes must be at least 16 bytes + length = max(length, 16) src = vapi.NpuAddressRange( # region will be updated later region=0, address=serial_copy.read_address, - length=int(serial_copy.length.value) * data_type_bytes, + length=length, ) dest = vapi.NpuAddressRange( # region will be updated later region=0, address=serial_copy.write_address, - length=int(serial_copy.length.value) * data_type_bytes, + length=length, ) return vapi.NpuDmaOperation(src, dest) @@ -1076,7 +1079,6 @@ def _create_npu_op_binary_elementwise(serial_binary_elementwise: spec.SerialBina def translate_ethosu_unary_elementwise( tir_extern_call: tvm.tir.Call, ) -> vapi.NpuElementWiseOperation: - """This function will translate a tir extern_call as produced by Relay to TIR compilation. Parameters From 8a3b50d6bedf1da338089743c264ea3cbdfab29d Mon Sep 17 00:00:00 2001 From: Aleksei-grovety <113356454+Aleksei-grovety@users.noreply.github.com> Date: Fri, 16 Jun 2023 18:49:19 +0400 Subject: [PATCH 2/2] add test --- .../test_ethosu/test_tir_to_cs_translator.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py index a293e2691923..05d6f71037fa 100644 --- a/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py +++ b/tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py @@ -84,6 +84,26 @@ def main(placeholder_3: T.Buffer((8192,), "int8"), ethosu_conv2d_1: T.Buffer((20 # fmt: on +# fmt: off +"""A tir test case with copy operation having a buffer size less than the minimum for a DMA operation""" +@tvm.script.ir_module +class CopyLessMinimal: + @T.prim_func + def main(ethos_u_0_i0: T.Buffer((1, 4), "int8"), ethosu_write: T.Buffer((1, 4), "int8")): + T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)}) + p1_global = T.allocate([4], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + ethosu_write_1 = T.allocate([4], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p1 = T.Buffer((4,), "int8") + p1_global_1 = T.Buffer((4,), "int8", data=p1_global) + T.call_extern("handle", "ethosu_copy", p1[0], 4, p1_global_1[0]) + ethos_u_0_i0_1 = T.Buffer((4,), "int8", data=ethos_u_0_i0.data) + ethosu_write_2 = T.Buffer((4,), "int8", data=ethosu_write_1, align=4) + T.call_extern("handle", "ethosu_binary_elementwise", "int8", 1, 1, 4, 1, 0, 1, ethos_u_0_i0_1[0], 0, 0, 0, T.float32(0.0039170472882688046), -128, "NHWC", 1, 1, 1, "int8", 1, 1, 4, 1, 0, 1, p1_global_1[0], 0, 0, 0, T.float32(0.0028046639636158943), -128, "NHWC", 1, 1, 1, "int8", 1, 1, 4, 1, 0, 1, ethosu_write_2[0], 0, 0, 0, T.float32(0.0067217112518846989), -128, "NHWC", 1, 1, 1, "ADD", 0, "NONE", 0, 0, "TFL", 0, 0, 0, 0, 0, 0) + ethosu_write_3 = T.Buffer((4,), "int8", data=ethosu_write.data) + T.call_extern("handle", "ethosu_identity", "int8", 1, 4, 1, 1, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 1, 4, 1, 1, 0, 4, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) +# fmt: on + + # fmt: off """A TIR test module of weight streaming""" @tvm.script.ir_module @@ -658,6 +678,21 @@ def populate_ethosu_copy_calls(stmt): }, ], }, + { + # Mod contains a copy operation with a buffer size of 4 bytes and it should be replaced by 16 + "tir_module": CopyLessMinimal, + "param_dict": { + 1: np.random.randint(np.iinfo("int8").min, np.iinfo("int8").max, [1, 4], "int8"), + }, + # Reference outputs + "ref": [ + { + "src": "p1", + "dest": "p1_global_1", + "length": 16, + }, + ], + }, ] for test_case in test_cases: