Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[microNPU][ETHOSU] Fix minimum buffer size #15104

Merged
merged 2 commits into from
Jun 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -926,17 +926,20 @@ def _create_npu_dma_op(serial_copy):
"""This is a helper function to capture the list of arguments
to create a NpuDmaOperation object"""
data_type_bytes = np.iinfo(np.dtype(serial_copy.read_address.dtype)).bits // 8
length = int(serial_copy.length.value) * data_type_bytes
# The buffer size in bytes must be at least 16 bytes
length = max(length, 16)
src = vapi.NpuAddressRange(
# region will be updated later
region=0,
address=serial_copy.read_address,
length=int(serial_copy.length.value) * data_type_bytes,
length=length,
)
dest = vapi.NpuAddressRange(
# region will be updated later
region=0,
address=serial_copy.write_address,
length=int(serial_copy.length.value) * data_type_bytes,
length=length,
)
return vapi.NpuDmaOperation(src, dest)

Expand Down Expand Up @@ -1076,7 +1079,6 @@ def _create_npu_op_binary_elementwise(serial_binary_elementwise: spec.SerialBina
def translate_ethosu_unary_elementwise(
tir_extern_call: tvm.tir.Call,
) -> vapi.NpuElementWiseOperation:

"""This function will translate a tir extern_call
as produced by Relay to TIR compilation.
Parameters
Expand Down
35 changes: 35 additions & 0 deletions tests/python/contrib/test_ethosu/test_tir_to_cs_translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ def main(placeholder_3: T.Buffer((8192,), "int8"), ethosu_conv2d_1: T.Buffer((20
# fmt: on


# fmt: off
"""A tir test case with copy operation having a buffer size less than the minimum for a DMA operation"""
@tvm.script.ir_module
class CopyLessMinimal:
@T.prim_func
def main(ethos_u_0_i0: T.Buffer((1, 4), "int8"), ethosu_write: T.Buffer((1, 4), "int8")):
T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)})
p1_global = T.allocate([4], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)})
ethosu_write_1 = T.allocate([4], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)})
p1 = T.Buffer((4,), "int8")
p1_global_1 = T.Buffer((4,), "int8", data=p1_global)
T.call_extern("handle", "ethosu_copy", p1[0], 4, p1_global_1[0])
ethos_u_0_i0_1 = T.Buffer((4,), "int8", data=ethos_u_0_i0.data)
ethosu_write_2 = T.Buffer((4,), "int8", data=ethosu_write_1, align=4)
T.call_extern("handle", "ethosu_binary_elementwise", "int8", 1, 1, 4, 1, 0, 1, ethos_u_0_i0_1[0], 0, 0, 0, T.float32(0.0039170472882688046), -128, "NHWC", 1, 1, 1, "int8", 1, 1, 4, 1, 0, 1, p1_global_1[0], 0, 0, 0, T.float32(0.0028046639636158943), -128, "NHWC", 1, 1, 1, "int8", 1, 1, 4, 1, 0, 1, ethosu_write_2[0], 0, 0, 0, T.float32(0.0067217112518846989), -128, "NHWC", 1, 1, 1, "ADD", 0, "NONE", 0, 0, "TFL", 0, 0, 0, 0, 0, 0)
ethosu_write_3 = T.Buffer((4,), "int8", data=ethosu_write.data)
T.call_extern("handle", "ethosu_identity", "int8", 1, 4, 1, 1, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 1, 4, 1, 1, 0, 4, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0)
# fmt: on


# fmt: off
"""A TIR test module of weight streaming"""
@tvm.script.ir_module
Expand Down Expand Up @@ -658,6 +678,21 @@ def populate_ethosu_copy_calls(stmt):
},
],
},
{
# Mod contains a copy operation with a buffer size of 4 bytes and it should be replaced by 16
"tir_module": CopyLessMinimal,
"param_dict": {
1: np.random.randint(np.iinfo("int8").min, np.iinfo("int8").max, [1, 4], "int8"),
},
# Reference outputs
"ref": [
{
"src": "p1",
"dest": "p1_global_1",
"length": 16,
},
],
},
]

for test_case in test_cases:
Expand Down