From 9875bd39195f053dddbdf4443aabc29c2c794bb8 Mon Sep 17 00:00:00 2001 From: Eric Lunderberg Date: Tue, 11 Jul 2023 10:37:15 -0500 Subject: [PATCH] Update EthosU unit tests to expect DeclBuffer nodes --- .../test_ethosu/cascader/test_integration.py | 15 +- .../test_ethosu/test_encode_constants.py | 170 +++++++++++------- .../test_ethosu/test_identity_optimizer.py | 5 +- .../test_ethosu/test_layout_optimizer.py | 2 +- .../test_ethosu/test_remove_concatenates.py | 8 +- .../test_ethosu/test_replace_conv2d.py | 70 ++++---- .../contrib/test_ethosu/test_replace_copy.py | 32 ++-- .../contrib/test_ethosu/test_scheduler.py | 20 ++- 8 files changed, 189 insertions(+), 133 deletions(-) diff --git a/tests/python/contrib/test_ethosu/cascader/test_integration.py b/tests/python/contrib/test_ethosu/cascader/test_integration.py index 14cc8fbc61cfc..1eb3c3b87aab4 100644 --- a/tests/python/contrib/test_ethosu/cascader/test_integration.py +++ b/tests/python/contrib/test_ethosu/cascader/test_integration.py @@ -109,7 +109,10 @@ def test_single_conv_compute_cycles_hint(): for single convolution. """ primfunc = _compile_model(_create_single_conv2d()) - ops = primfunc.body.body.seq + body = primfunc + while not isinstance(body, tvm.tir.SeqStmt): + body = body.body + ops = body.seq compute_cycles_hints = [2944, 320] for op, compute_cycle_hint in zip(ops, compute_cycles_hints): assert op.attr_key == "pragma_compute_cycles_hint" @@ -122,7 +125,10 @@ def test_double_conv_compute_cycles_hint(): for double convolution. """ primfunc = _compile_model(_create_double_conv2d()) - ops = primfunc.body.body.body.body.seq + body = primfunc + while not isinstance(body, tvm.tir.SeqStmt): + body = body.body + ops = body.seq compute_cycles_hints = [2944, 1408, 320, 240] for op, compute_cycle_hint in zip(ops, compute_cycles_hints): assert op.attr_key == "pragma_compute_cycles_hint" @@ -135,7 +141,10 @@ def test_scalar_add_compute_cycles_hint(): for add with scalar values. """ primfunc = _compile_model(_create_scalar_add()) - ops = primfunc.body.body.seq + body = primfunc + while not isinstance(body, tvm.tir.SeqStmt): + body = body.body + ops = body.seq compute_cycles_hints = [16, 24] for op, compute_cycle_hint in zip(ops, compute_cycles_hints): diff --git a/tests/python/contrib/test_ethosu/test_encode_constants.py b/tests/python/contrib/test_ethosu/test_encode_constants.py index 4341f367f0e1a..86c8763b9de65 100644 --- a/tests/python/contrib/test_ethosu/test_encode_constants.py +++ b/tests/python/contrib/test_ethosu/test_encode_constants.py @@ -39,8 +39,8 @@ class WeightStreamOnlyU55: def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.Buffer([8192], "int8", data=input_placeholder.data) - ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data) + placeholder = T.decl_buffer([8192], "int8", data=input_placeholder.data) + ethosu_write = T.decl_buffer([2048], "int8", data=input_ethosu_write.data) buffer1 = T.Buffer([160], "uint8") buffer3 = T.Buffer([144], "uint8") buffer5 = T.Buffer([144], "uint8") @@ -48,10 +48,10 @@ def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_writ buffer8 = T.Buffer([32], "uint8") # body p1_data = T.allocate([160], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.Buffer([160], "uint8", data=p1_data) + p1 = T.decl_buffer([160], "uint8", data=p1_data) + buffer9 = T.decl_buffer([144], "uint8", data=p1_data) p2_data = T.allocate([144], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.Buffer([144], "uint8", data=p2_data) - buffer9 = T.Buffer([144], "uint8", data=p1.data) + p2 = T.decl_buffer([144], "uint8", data=p2_data) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 160, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 144, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 128, T.int8(-1), T.int8(-1), 12, p1[128], 32, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -68,27 +68,38 @@ class WeightStreamOnlyU65: @T.prim_func def main(ifm: T.Buffer((1, 16, 16, 32), "int8"), ethosu_write: T.Buffer((1, 16, 16, 8), "int8")): T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)}) + + ifm_1 = T.decl_buffer((8192,), "int8", data=ifm.data) + ethosu_write_1 = T.decl_buffer((2048,), "int8", data=ethosu_write.data) + p2_global_6 = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p2_global_3 = T.decl_buffer((192,), "uint8", data=p2_global_6) + p2_global_4 = T.allocate([192], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p2_global_4_1 = T.decl_buffer((192,), "uint8", data=p2_global_4) + p2_global_5 = T.allocate([208], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p2_global_5_1 = T.decl_buffer((208,), "uint8", data=p2_global_5) + buffer_encoded = T.Buffer((192,), "uint8") - p2_global_3 = T.Buffer((192,), "uint8", data=p2_global_6) - T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 192, p2_global_3[0]) buffer_encoded_1 = T.Buffer((192,), "uint8") - p2_global_4_1 = T.Buffer((192,), "uint8", data=p2_global_4) - T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 192, p2_global_4_1[0]) buffer_encoded_2 = T.Buffer((208,), "uint8") - p2_global_5_1 = T.Buffer((208,), "uint8", data=p2_global_5) + buffer_encoded_3 = T.Buffer((192,), "uint8") + + + T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 192, p2_global_3[0]) + + T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 192, p2_global_4_1[0]) T.call_extern("handle", "ethosu_copy", buffer_encoded_2[0], 208, p2_global_5_1[0]) - ifm_1 = T.Buffer((8192,), "int8", data=ifm.data) - ethosu_write_1 = T.Buffer((2048,), "int8", data=ethosu_write.data) + + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_3[0], 80, p2_global_3[80], 80, 12, p2_global_3[160], 16, p2_global_3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) - buffer_encoded_3 = T.Buffer((192,), "uint8") - p2_global_6_1 = T.Buffer((192,), "uint8", data=p2_global_6) - T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 192, p2_global_6_1[0]) + + + T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 192, p2_global_3[0]) T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_4_1[0], 80, p2_global_4_1[80], 80, 12, p2_global_4_1[160], 16, p2_global_4_1[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_5_1[0], 96, p2_global_5_1[96], 80, 12, p2_global_5_1[176], 16, p2_global_5_1[192], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) - T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_6_1[0], 80, p2_global_6_1[80], 80, 12, p2_global_6_1[160], 16, p2_global_6_1[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_1[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p2_global_3[0], 80, p2_global_3[80], 80, 12, p2_global_3[160], 16, p2_global_3[176], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) __tvm_meta__ = None # fmt: on @@ -154,16 +165,18 @@ def _get_func(): class RereadWeightsU55: @T.prim_func def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None: - # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer1 = T.Buffer([384], "uint8") - placeholder = T.Buffer([8192], "int8", data=input_placeholder.data) - ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data) - # body + placeholder = T.decl_buffer([8192], "int8", data=input_placeholder.data) + ethosu_write = T.decl_buffer([2048], "int8", data=input_ethosu_write.data) + p1_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.Buffer([384], "uint8", data=p1_data) + p1 = T.decl_buffer([384], "uint8", data=p1_data) + p2_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.Buffer([384], "uint8", data=p2_data) + p2 = T.decl_buffer([384], "uint8", data=p2_data) + + buffer1 = T.Buffer([384], "uint8") + T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 384, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 384, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 304, T.int8(-1), T.int8(-1), 12, p1[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -175,17 +188,19 @@ def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_writ class RereadWeightsU65: @T.prim_func def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None: - # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - # buffer definition - placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data) - ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) - placeholder_encoded_1 = T.Buffer([464], "uint8") - # body + + placeholder = T.decl_buffer([8192], dtype="int8", data=input_placeholder.data) + ethosu_write = T.decl_buffer([2048], dtype="int8", data=input_ethosu_write.data) + p1_data = T.allocate([464], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.Buffer([464], "uint8", data=p1_data) + p1 = T.decl_buffer([464], "uint8", data=p1_data) + p2_data = T.allocate([464], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.Buffer([464], "uint8", data=p2_data) + p2 = T.decl_buffer([464], "uint8", data=p2_data) + + placeholder_encoded_1 = T.Buffer([464], "uint8") + T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 464, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 464, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 8, 32, 16, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 8, 8, 16, 0, 8, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p1[0], 192, p1[192], 176, 12, p1[368], 48, p1[416], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -256,17 +271,19 @@ def _get_func(): class DirectReadOnlyU55: @T.prim_func def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None: - # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) + + placeholder = T.decl_buffer([8192], "int8", data=input_placeholder.data) + ethosu_write = T.decl_buffer([2048], "int8", data=input_ethosu_write.data) + + ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) + ethosu_write_1 = T.decl_buffer([4096], "int8", data=ethosu_write_1_data) + buffer = T.Buffer([592], "uint8") buffer_1 = T.Buffer([160], "uint8") buffer_2 = T.Buffer([160], "uint8") buffer_3 = T.Buffer([80], "uint8") - placeholder = T.Buffer([8192], "int8", data=input_placeholder.data) - ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data) - # body - ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer[0], 592, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 160, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -276,18 +293,19 @@ def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_writ class DirectReadOnlyU65: @T.prim_func def main(input_placeholder: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write: T.Buffer((1, 16, 16, 8), "int8")) -> None: - # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - # buffer definition + + placeholder = T.decl_buffer([8192], dtype="int8", data=input_placeholder.data) + ethosu_write = T.decl_buffer([2048], dtype="int8", data=input_ethosu_write.data) + + ethosu_write_2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) + ethosu_write_2 = T.decl_buffer([4096], "int8", data=ethosu_write_2_data) + placeholder_encoded = T.Buffer([608], dtype="uint8") placeholder_encoded_1 = T.Buffer([160], dtype="uint8") placeholder_encoded_2 = T.Buffer([208], dtype="uint8") placeholder_encoded_3 = T.Buffer([96], dtype="uint8") - placeholder = T.Buffer([8192], dtype="int8", data=input_placeholder.data) - ethosu_write = T.Buffer([2048], dtype="int8", data=input_ethosu_write.data) - # body - ethosu_write_2_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_2 = T.Buffer([4096], "int8", data=ethosu_write_2_data) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_encoded[0], 304, placeholder_encoded[304], 304, 12, placeholder_encoded_1[0], 80, placeholder_encoded_1[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_encoded_2[0], 112, placeholder_encoded_2[112], 96, 12, placeholder_encoded_3[0], 48, placeholder_encoded_3[48], 48, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -354,23 +372,28 @@ def _get_func(): class MixedReadU55: @T.prim_func def main(input_ifm: T.Buffer((1,16,16,32), "int8"), input_ethosu_write: T.Buffer((1,16,16,8), "int8")) -> None: - # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) + + ifm = T.decl_buffer([8192], "int8", data=input_ifm.data) + ethosu_write = T.decl_buffer([2048], "int8", data=input_ethosu_write.data) + + p1_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True}) + p1 = T.decl_buffer([112], "uint8", data=p1_data) + + p3_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) + p3 = T.decl_buffer([4096], "int8", data=p3_data) + + p2_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True}) + p2 = T.decl_buffer([112], "uint8", data=p2_data) + buffer1 = T.Buffer([112], "uint8") buffer3 = T.Buffer([112], "uint8") buffer5 = T.Buffer([112], "uint8") buffer7 = T.Buffer([112], "uint8") buffer9 = T.Buffer([592], "uint8") buffer10 = T.Buffer([160], "uint8") - ifm = T.Buffer([8192], "int8", data=input_ifm.data) - ethosu_write = T.Buffer([2048], "int8", data=input_ethosu_write.data) - # body - p1_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.Buffer([112], "uint8", data=p1_data) - p3_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - p3 = T.Buffer([4096], "int8", data=p3_data) - p2_data = T.allocate([112], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.Buffer([112], "uint8", data=p2_data) + + T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 112, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, p3[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, buffer9[0], 592, T.int8(-1), T.int8(-1), 12, buffer10[0], 160, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 112, p2[0], dtype="handle")) @@ -388,32 +411,38 @@ class MixedReadU65: @T.prim_func def main(ifm: T.Buffer((1, 16, 16, 32), "int8"), ethosu_write: T.Buffer((1, 16, 16, 8), "int8")): T.func_attr({"from_legacy_te_schedule": T.bool(True), "global_symbol": "main", "tir.noalias": T.bool(True)}) + + ifm_1 = T.decl_buffer((8192,), "int8", data=ifm.data) + ethosu_write_3 = T.decl_buffer((2048,), "int8", data=ethosu_write.data) + p5_global = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p5_global_3 = T.decl_buffer((128,), "uint8", data=p5_global) + p5_global_1 = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p5_global_4 = T.decl_buffer((128,), "uint8", data=p5_global_1) + ethosu_write_1 = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + ethosu_write_2 = T.decl_buffer((4096,), "int8", data=ethosu_write_1) + p5_global_2 = T.allocate([128], "uint8", "global", annotations={"disable_lower_builtin": T.bool(True)}) + p5_global_5 = T.decl_buffer((128,), "uint8", data=p5_global_2) + + p1_encoded = T.Buffer((608,), "uint8") + p2_encoded = T.Buffer((160,), "uint8") buffer_encoded = T.Buffer((128,), "uint8") - p5_global_3 = T.Buffer((128,), "uint8", data=p5_global) - T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 128, p5_global_3[0]) buffer_encoded_1 = T.Buffer((128,), "uint8") - p5_global_4 = T.Buffer((128,), "uint8", data=p5_global_1) + buffer_encoded_2 = T.Buffer((128,), "uint8") + buffer_encoded_3 = T.Buffer((128,), "uint8") + + T.call_extern("handle", "ethosu_copy", buffer_encoded[0], 128, p5_global_3[0]) T.call_extern("handle", "ethosu_copy", buffer_encoded_1[0], 128, p5_global_4[0]) - ifm_1 = T.Buffer((8192,), "int8", data=ifm.data) - ethosu_write_2 = T.Buffer((4096,), "int8", data=ethosu_write_1) - p1_encoded = T.Buffer((608,), "uint8") - p2_encoded = T.Buffer((160,), "uint8") T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, ifm_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, p1_encoded[0], 304, p1_encoded[304], 304, 12, p2_encoded[0], 80, p2_encoded[80], 80, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) - buffer_encoded_2 = T.Buffer((128,), "uint8") - p5_global_5 = T.Buffer((128,), "uint8", data=p5_global_2) T.call_extern("handle", "ethosu_copy", buffer_encoded_2[0], 128, p5_global_5[0]) - ethosu_write_3 = T.Buffer((2048,), "int8", data=ethosu_write.data) T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_3[0], 48, p5_global_3[48], 48, 12, p5_global_3[96], 16, p5_global_3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) - buffer_encoded_3 = T.Buffer((128,), "uint8") - p5_global_6 = T.Buffer((128,), "uint8", data=p5_global) - T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 128, p5_global_6[0]) + T.call_extern("handle", "ethosu_copy", buffer_encoded_3[0], 128, p5_global_3[0]) T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_4[0], 48, p5_global_4[48], 48, 12, p5_global_4[96], 16, p5_global_4[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_5[0], 48, p5_global_5[48], 48, 12, p5_global_5[96], 16, p5_global_5[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) - T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_6[0], 48, p5_global_6[48], 48, 12, p5_global_6[96], 16, p5_global_6[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) + T.call_extern("handle", "ethosu_conv2d", "int8", 16, 16, 16, 16, 0, 16, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 16, 1, "int8", 16, 16, 2, 16, 0, 16, ethosu_write_3[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, p5_global_3[0], 48, p5_global_3[48], 48, 12, p5_global_3[96], 16, p5_global_3[112], 16, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0) __tvm_meta__ = None # fmt: on @@ -512,7 +541,10 @@ def get_graph(): # Check tile address for the scalar constant input hasn't been # overwritten. - extern_calls = tir_mod["main"].body.body.body.body + extern_calls = tir_mod["main"] + while not isinstance(extern_calls, tvm.tir.SeqStmt): + extern_calls = extern_calls.body + binary_elementwise = extern_calls[-1].value args = binary_elementwise.args diff --git a/tests/python/contrib/test_ethosu/test_identity_optimizer.py b/tests/python/contrib/test_ethosu/test_identity_optimizer.py index 3ae58dfc81bad..e5f2884827039 100644 --- a/tests/python/contrib/test_ethosu/test_identity_optimizer.py +++ b/tests/python/contrib/test_ethosu/test_identity_optimizer.py @@ -311,7 +311,10 @@ def get_graph(): # Check for hints in the TIR prim func that the identity optimization pass # has ran. There should not be an identity in the prim func. - assert prim_func.body.value.args[0] == "ethosu_pooling" + body = prim_func + while not isinstance(body, tvm.tir.Evaluate): + body = body.body + assert body.value.args[0] == "ethosu_pooling" def test_same_output(): diff --git a/tests/python/contrib/test_ethosu/test_layout_optimizer.py b/tests/python/contrib/test_ethosu/test_layout_optimizer.py index 69d549acbb3b6..355e302be9569 100644 --- a/tests/python/contrib/test_ethosu/test_layout_optimizer.py +++ b/tests/python/contrib/test_ethosu/test_layout_optimizer.py @@ -794,7 +794,7 @@ def get_graph(): prim_func = mod[external_gv_name] # Check for hints in the TIR prim func that the layout optimization pass has ran - ops = prim_func.body.body.seq + ops = prim_func.body.body.body.body.body.seq max_pool1, max_pool2 = ops assert str(max_pool1.value.args[31]) == '"NHCWB16"' diff --git a/tests/python/contrib/test_ethosu/test_remove_concatenates.py b/tests/python/contrib/test_ethosu/test_remove_concatenates.py index ef034930d7bc0..d719b67b90098 100644 --- a/tests/python/contrib/test_ethosu/test_remove_concatenates.py +++ b/tests/python/contrib/test_ethosu/test_remove_concatenates.py @@ -35,9 +35,9 @@ def main(input_placeholder: T.Buffer((1,8,12,16), "int8"), input_placeholder_1: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.Buffer(1536, dtype="int8", data=input_placeholder.data) - placeholder_1 = T.Buffer(1280, dtype="int8", data=input_placeholder_1.data) - T_concat = T.Buffer(4096, dtype="int8", data=input_T_concat.data) + placeholder = T.decl_buffer(1536, dtype="int8", data=input_placeholder.data) + placeholder_1 = T.decl_buffer(1280, dtype="int8", data=input_placeholder_1.data) + T_concat = T.decl_buffer(4096, dtype="int8", data=input_T_concat.data) buffer = T.Buffer([2992], "uint8") buffer_1 = T.Buffer([160], "uint8") @@ -49,7 +49,7 @@ def main(input_placeholder: T.Buffer((1,8,12,16), "int8"), input_placeholder_1: buffer_7 = T.Buffer([160], "uint8") # body T_concat_1_data = T.allocate([2816], "int8", "global", annotations={"disable_lower_builtin":True}) - T_concat_1 = T.Buffer([2816], "int8", data=T_concat_1_data) + T_concat_1 = T.decl_buffer([2816], "int8", data=T_concat_1_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 10, 16, 8, 0, 10, placeholder_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 160, 16, 1, "int8", 8, 10, 16, 8, 0, 10, T_concat_1[192], 0, 0, 0, T.float32(0.25), 14, "NHWC", 352, 16, 1, 3, 3, 1, 1, 1, 1, buffer[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 10, 16, 8, 0, 10, T_concat_1[192], 0, 0, 0, T.float32(0.5), 10, "NHWC", 352, 16, 1, "int8", 8, 10, 16, 8, 0, 10, T_concat[352], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 16, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_3[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 12, 16, 8, 0, 12, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 192, 16, 1, "int8", 8, 12, 16, 8, 0, 12, T_concat_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 352, 16, 1, 3, 3, 1, 1, 1, 1, buffer_4[0], 2992, T.int8(-1), T.int8(-1), 12, buffer_5[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_replace_conv2d.py b/tests/python/contrib/test_ethosu/test_replace_conv2d.py index 32d1303e124e5..80cf012fb92f9 100644 --- a/tests/python/contrib/test_ethosu/test_replace_conv2d.py +++ b/tests/python/contrib/test_ethosu/test_replace_conv2d.py @@ -368,17 +368,17 @@ def _visit(stmt): class Conv2dDoubleCascade1: @T.prim_func def main(input_placeholder_5: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 8), "int8")) -> None: - # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.Buffer([304], "uint8") + buffer = T.Buffer([304], "uint8") buffer_1 = T.Buffer([80], "uint8") buffer_2 = T.Buffer([320], "uint8") buffer_3 = T.Buffer([160], "uint8") - placeholder_5 = T.Buffer([192], 'int8', data=input_placeholder_5.data) - ethosu_write_1 = T.Buffer([512], 'int8', data=input_ethosu_write_1.data) - # body + + placeholder_5 = T.decl_buffer([192], 'int8', data=input_placeholder_5.data) + ethosu_write_1 = T.decl_buffer([512], 'int8', data=input_ethosu_write_1.data) + ethosu_write_2_data = T.allocate([1024], "int8", "global", annotations={"disable_lower_builtin": True}) - ethosu_write_2 = T.Buffer([1024], "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.decl_buffer([1024], "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 3, 8, 0, 4, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 32, 1, 1, 1, 1, 1, 1, 1, buffer_3[0], 160, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 128, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 1, 1, 1, 1, 1, 1, buffer[0], 304, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 4, 3, 8, 0, 4, placeholder_5[12], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 4, 32, 8, 0, 4, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 32, 1, 1, 1, 1, 1, 1, 1, buffer_3[0], 160, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -392,15 +392,18 @@ class Conv2dDoubleCascade2: def main(input_placeholder_5: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write_1: T.Buffer((1, 8, 8, 8), "int8")) -> None: # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) + placeholder_5 = T.decl_buffer([192], 'int8', data=input_placeholder_5.data) + ethosu_write_1 = T.decl_buffer([512], 'int8', data=input_ethosu_write_1.data) + + ethosu_write_2_data = T.allocate([1536], "int8", "global", annotations={"disable_lower_builtin": True}) + ethosu_write_2 = T.decl_buffer([1536], "int8", data=ethosu_write_2_data) + buffer = T.Buffer([80], "uint8") buffer_1 = T.Buffer([320], "uint8") buffer_2 = T.Buffer([1312], "uint8") buffer_3 = T.Buffer([2608], "uint8") - placeholder_5 = T.Buffer([192], 'int8', data=input_placeholder_5.data) - ethosu_write_1 = T.Buffer([512], 'int8', data=input_ethosu_write_1.data) # body - ethosu_write_2_data = T.allocate([1536], "int8", "global", annotations={"disable_lower_builtin": True}) - ethosu_write_2 = T.Buffer([1536], "int8", data=ethosu_write_2_data) + T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[256], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 1312, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[256], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 4, 8, 8, 4, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 64, 8, 1, 3, 3, 1, 1, 1, 1, buffer_3[0], 2608, T.int8(-1), T.int8(-1), 12, buffer[0], 80, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[48], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 5, 8, 32, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 3, 3, 1, 1, 1, 1, buffer_2[0], 1312, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -418,12 +421,12 @@ def main(input_placeholder_5: T.Buffer((1, 16, 16, 3), "int8"), input_ethosu_wri buffer_1 = T.Buffer([80], "uint8") buffer_2 = T.Buffer([320], "uint8") buffer_3 = T.Buffer([880], "uint8") - placeholder_5 = T.Buffer([768], 'int8', data=input_placeholder_5.data) - ethosu_write_1 = T.Buffer([640], 'int8', data=input_ethosu_write_1.data) + placeholder_5 = T.decl_buffer([768], 'int8', data=input_placeholder_5.data) + ethosu_write_1 = T.decl_buffer([640], 'int8', data=input_ethosu_write_1.data) # body ethosu_write_2_data = T.allocate([2560], "int8", "global", annotations={"disable_lower_builtin": True}) - ethosu_write_2 = T.Buffer([2560], "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.decl_buffer([2560], "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 3, 8, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 8, 8, 32, 8, 0, 8, ethosu_write_2[512], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 2, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 32, 8, 0, 8, ethosu_write_2[512], 0, 0, 0, T.float32(0.5), 10, "NHWC", 256, 32, 1, "int8", 8, 4, 8, 8, 0, 4, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 32, 8, 1, 2, 3, 2, 1, 2, 1, buffer[0], 1744, T.int8(-1), T.int8(-1), 12, buffer_1[0], 80, T.int8(-1), T.int8(-1), 2, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 12, 16, 3, 12, 0, 16, placeholder_5[192], 0, 0, 0, T.float32(0.5), 10, "NHWC", 48, 3, 1, "int8", 10, 8, 32, 10, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 32, 1, 2, 3, 2, 1, 2, 1, buffer_3[0], 880, T.int8(-1), T.int8(-1), 12, buffer_2[0], 320, T.int8(-1), T.int8(-1), 0, 1, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -443,11 +446,11 @@ def main(input_placeholder_5: T.Buffer((1, 8, 1, 8, 16), "int8"), input_ethosu_w buffer_1 = T.Buffer([352], "uint8") buffer_2 = T.Buffer([272], "uint8") buffer_3 = T.Buffer([11040], "uint8") - placeholder_5 = T.Buffer([1024], 'int8', data=input_placeholder_5.data) - ethosu_write_1 = T.Buffer([2048], 'int8', data=input_ethosu_write_1.data) + placeholder_5 = T.decl_buffer([1024], 'int8', data=input_placeholder_5.data) + ethosu_write_1 = T.decl_buffer([2048], 'int8', data=input_ethosu_write_1.data) # body ethosu_write_2_data = T.allocate([2304], "int8", "global", annotations={"disable_lower_builtin": True}) - ethosu_write_2 = T.Buffer((2304,), "int8", data=ethosu_write_2_data) + ethosu_write_2 = T.decl_buffer((2304,), "int8", data=ethosu_write_2_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[384], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[384], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 384, 16, 128, "int8", 4, 8, 26, 4, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 256, 16, 128, 3, 3, 1, 1, 1, 1, buffer_3[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_2[0], 272, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 6, 8, 3, 6, 0, 8, placeholder_5[256], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 5, 8, 35, 5, 0, 8, ethosu_write_2[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 384, 16, 128, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -465,11 +468,11 @@ def main(input_placeholder: T.Buffer((1, 8, 8, 3), "int8"), input_ethosu_write: buffer_1 = T.Buffer([320], "uint8") buffer_2 = T.Buffer([304], "uint8") buffer_3 = T.Buffer([80], "uint8") - placeholder = T.Buffer([192], 'int8', data=input_placeholder.data) - ethosu_write = T.Buffer([8192], 'int8', data=input_ethosu_write.data) + placeholder = T.decl_buffer([192], 'int8', data=input_placeholder.data) + ethosu_write = T.decl_buffer([8192], 'int8', data=input_ethosu_write.data) # body ethosu_write_1_data = T.allocate([4096], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_1 = T.Buffer([4096], "int8", data=ethosu_write_1_data) + ethosu_write_1 = T.decl_buffer([4096], "int8", data=ethosu_write_1_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, buffer[0], 160, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 32, 8, 16, 0, 32, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 8, 1, 1, 1, 1, 1, 1, 1, buffer_2[0], 304, T.int8(-1), T.int8(-1), 12, buffer_3[0], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 4, 8, 3, 4, 0, 8, placeholder[96], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 3, 1, "int8", 8, 16, 32, 8, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 512, 32, 1, 1, 1, 1, 1, 1, 1, buffer[0], 160, T.int8(-1), T.int8(-1), 12, buffer_1[0], 320, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "ZEROS", 0, 0, 0, dtype="handle")) @@ -487,11 +490,11 @@ def main(input_placeholder: T.Buffer((1, 8, 1, 8, 16), "int8"), input_ethosu_wri buffer_1 = T.Buffer([352], "uint8") buffer_2 = T.Buffer([11040], "uint8") buffer_3 = T.Buffer([272], "uint8") - placeholder = T.Buffer([1024], 'int8', data=input_placeholder.data) - ethosu_write = T.Buffer([32768], 'int8', data=input_ethosu_write.data) + placeholder = T.decl_buffer([1024], 'int8', data=input_placeholder.data) + ethosu_write = T.decl_buffer([32768], 'int8', data=input_ethosu_write.data) # body ethosu_write_1_data = T.allocate([12288], "int8", "global", annotations={"disable_lower_builtin":True}) - ethosu_write_1 = T.Buffer([12288], "int8", data=ethosu_write_1_data) + ethosu_write_1 = T.decl_buffer([12288], "int8", data=ethosu_write_1_data) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 3, 8, 0, 8, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 128, 16, 1, "int8", 16, 16, 35, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 768, 16, 256, 3, 3, 1, 1, 1, 1, buffer[0], 1456, T.int8(-1), T.int8(-1), 12, buffer_1[0], 352, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 35, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.5), 10, "NHCWB16", 768, 16, 256, "int8", 32, 32, 26, 32, 0, 32, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHCWB16", 1024, 16, 512, 3, 3, 1, 1, 1, 1, buffer_2[0], 11040, T.int8(-1), T.int8(-1), 12, buffer_3[0], 272, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NEAREST", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -636,6 +639,7 @@ def _get_func( config = { "enable_cascader": True, } + with tvm.transform.PassContext(opt_level=3, config={"relay.ext.ethos-u.options": config}): func = _get_func(*params[:-1]) mod, _ = _lower_to_tir(func, cascader=total_cascader(params[-1])) @@ -653,8 +657,8 @@ def main(input_placeholder_3: T.Buffer((1, 10, 12, 8), "int8"), input_ethosu_wri T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) buffer = T.Buffer([848], "uint8") buffer_1 = T.Buffer([160], "uint8") - placeholder_3 = T.Buffer([960], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.Buffer([1024], 'int8', data=input_ethosu_write_1.data) + placeholder_3 = T.decl_buffer([960], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.decl_buffer([1024], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 8, 8, 4, 8, 0, 8, placeholder_3[120], 0, 0, 0, T.float32(0.5), 10, "NHWC", 96, 8, 1, "int8", 8, 8, 16, 8, 0, 8, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 16, 1, 3, 3, 1, 1, 1, 1, buffer[0], 848, T.int8(-1), T.int8(-1), 12, buffer_1[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -668,8 +672,8 @@ def main(input_placeholder_3: T.Buffer((1, 7, 9, 5), "int8"), input_ethosu_write T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) buffer = T.Buffer([160], "uint8") buffer_1 = T.Buffer([656], "uint8") - placeholder_3 = T.Buffer([315], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.Buffer([240], 'int8', data=input_ethosu_write_1.data) + placeholder_3 = T.decl_buffer([315], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.decl_buffer([240], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 3, 5, 3, 3, 0, 5, placeholder_3[146], 0, 0, 0, T.float32(0.5), 10, "NHWC", 45, 5, 1, "int8", 3, 5, 16, 3, 0, 5, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 80, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 656, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -712,8 +716,8 @@ def main(input_placeholder_3: T.Buffer((4, 6, 8, 1), "int8"), input_ethosu_write T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) buffer = T.Buffer([160], "uint8") buffer_1 = T.Buffer([848], "uint8") - placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data) + placeholder_3 = T.decl_buffer([192], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.decl_buffer([768], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -728,8 +732,8 @@ def main(input_placeholder_3: T.Buffer((1, 24, 8), "int8"), input_ethosu_write_1 T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) buffer = T.Buffer([160], "uint8") buffer_1 = T.Buffer([848], "uint8") - placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data) + placeholder_3 = T.decl_buffer([192], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.decl_buffer([768], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -744,8 +748,8 @@ def main(input_placeholder_3: T.Buffer((192, 1), "int8"), input_ethosu_write_1: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) buffer = T.Buffer([160], "uint8") buffer_1 = T.Buffer([848], "uint8") - placeholder_3 = T.Buffer([192], 'int8', data=input_placeholder_3.data) - ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data) + placeholder_3 = T.decl_buffer([192], 'int8', data=input_placeholder_3.data) + ethosu_write_1 = T.decl_buffer([768], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) @@ -760,7 +764,7 @@ def main(placeholder_3: T.Buffer((192,), "int8"), input_ethosu_write_1: T.Buffer T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) buffer = T.Buffer([160], "uint8") buffer_1 = T.Buffer([848], "uint8") - ethosu_write_1 = T.Buffer([768], 'int8', data=input_ethosu_write_1.data) + ethosu_write_1 = T.decl_buffer([768], 'int8', data=input_ethosu_write_1.data) # body T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 1, 1, 0, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 5, 6, 4, 5, 0, 6, placeholder_3[72], 0, 0, 0, T.float32(0.5), 10, "NHWC", 24, 4, 1, "int8", 4, 6, 16, 4, 0, 6, ethosu_write_1[384], 0, 0, 0, T.float32(0.25), 14, "NHWC", 96, 16, 1, 3, 3, 1, 1, 1, 1, buffer_1[0], 848, T.int8(-1), T.int8(-1), 12, buffer[0], 160, T.int8(-1), T.int8(-1), 0, 1, 1, 1, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_replace_copy.py b/tests/python/contrib/test_ethosu/test_replace_copy.py index 94763c5d3fbf9..d4f35c23f76e2 100644 --- a/tests/python/contrib/test_ethosu/test_replace_copy.py +++ b/tests/python/contrib/test_ethosu/test_replace_copy.py @@ -35,14 +35,16 @@ class ReferenceModule: @T.prim_func def main(input_placeholder_3: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write_1: T.Buffer((1, 16, 16, 8), "int8")) -> None: - # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer_1 = T.Buffer([384], "uint8") - placeholder_3 = T.Buffer([8192], dtype="int8", data=input_placeholder_3.data) - ethosu_write_1 = T.Buffer([2048], dtype="int8", data=input_ethosu_write_1.data) - # body + + placeholder_3 = T.decl_buffer([8192], dtype="int8", data=input_placeholder_3.data) + ethosu_write_1 = T.decl_buffer([2048], dtype="int8", data=input_ethosu_write_1.data) + placeholder_global_data = T.allocate([384], "uint8", "global", annotations={"disable_lower_builtin": True}) - placeholder_global = T.Buffer([384], "uint8", data=placeholder_global_data) + placeholder_global = T.decl_buffer([384], "uint8", data=placeholder_global_data) + + buffer_1 = T.Buffer([384], "uint8") + T.evaluate(T.call_extern("ethosu_copy", buffer_1[0], 384, placeholder_global[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_3[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 8, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 304, T.int8(-1), T.int8(-1), 12, placeholder_global[304], 80, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) __tvm_meta__ = None @@ -79,17 +81,19 @@ def _get_func(): class WeightStream: @T.prim_func def main(input_placeholder_5: T.Buffer((1, 16, 16, 32), "int8"), input_ethosu_write_1: T.Buffer((1, 16, 16, 16), "int8")) -> None: - # function attr dict T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - buffer = T.Buffer([528], "uint8") - buffer_2 = T.Buffer([336], "uint8") - placeholder_5 = T.Buffer([8192], dtype="int8", data=input_placeholder_5.data) - ethosu_write_1 = T.Buffer([4096], dtype="int8", data=input_ethosu_write_1.data) - # body + + placeholder_5 = T.decl_buffer([8192], dtype="int8", data=input_placeholder_5.data) + ethosu_write_1 = T.decl_buffer([4096], dtype="int8", data=input_ethosu_write_1.data) + placeholder_d_global_data = T.allocate([528], "uint8", "global", annotations={"disable_lower_builtin": True}) - placeholder_d_global = T.Buffer([528], "uint8", data=placeholder_d_global_data) + placeholder_d_global = T.decl_buffer([528], "uint8", data=placeholder_d_global_data) placeholder_d_global_1_data = T.allocate([336], "uint8", "global", annotations={"disable_lower_builtin": True}) - placeholder_d_global_1 = T.Buffer([336], "uint8", data=placeholder_d_global_1_data) + placeholder_d_global_1 = T.decl_buffer([336], "uint8", data=placeholder_d_global_1_data) + + buffer = T.Buffer([528], "uint8") + buffer_2 = T.Buffer([336], "uint8") + T.evaluate(T.call_extern("ethosu_copy", buffer[0], 528, placeholder_d_global[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer_2[0], 336, placeholder_d_global_1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 0, 16, placeholder_5[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 16, 16, 10, 16, 0, 16, ethosu_write_1[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 256, 16, 1, 1, 1, 1, 1, 1, 1, placeholder_d_global[0], 416, T.int8(-1), T.int8(-1), 12, placeholder_d_global[416], 112, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle")) diff --git a/tests/python/contrib/test_ethosu/test_scheduler.py b/tests/python/contrib/test_ethosu/test_scheduler.py index 1edd840b0b0e9..e5e200b4cc7cb 100644 --- a/tests/python/contrib/test_ethosu/test_scheduler.py +++ b/tests/python/contrib/test_ethosu/test_scheduler.py @@ -198,18 +198,22 @@ class DiamondGraphTir: @T.prim_func def main(input_placeholder: T.Buffer((1, 56, 56, 96), "int8"), input_ethosu_write: T.Buffer((1, 56, 56, 24), "int8")) -> None: T.func_attr({"from_legacy_te_schedule": True, "global_symbol": "main", "tir.noalias": True}) - placeholder = T.Buffer([301056], dtype='int8', data=input_placeholder.data) - ethosu_write = T.Buffer([75264], dtype='int8', data=input_ethosu_write.data) - buffer1 = T.Buffer([2848], "uint8") - buffer3 = T.Buffer([976], "uint8") + + placeholder = T.decl_buffer([301056], dtype='int8', data=input_placeholder.data) + ethosu_write = T.decl_buffer([75264], dtype='int8', data=input_ethosu_write.data) + p1_data = T.allocate([2848], "uint8", "global", annotations={"disable_lower_builtin":True}) - p1 = T.Buffer([2848], "uint8", data=p1_data) + p1 = T.decl_buffer([2848], "uint8", data=p1_data) p2_data = T.allocate([976], "uint8", "global", annotations={"disable_lower_builtin":True}) - p2 = T.Buffer([976], "uint8", data=p2_data) + p2 = T.decl_buffer([976], "uint8", data=p2_data) p5_data = T.allocate([75264], "int8", "global", annotations={"disable_lower_builtin":True}) - p5 = T.Buffer([75264], "int8", data=p5_data) + p5 = T.decl_buffer([75264], "int8", data=p5_data) p6_data = T.allocate([75264], "int8", "global", annotations={"disable_lower_builtin":True}) - p6 = T.Buffer([75264], "int8", data=p6_data) + p6 = T.decl_buffer([75264], "int8", data=p6_data) + + buffer1 = T.Buffer([2848], "uint8") + buffer3 = T.Buffer([976], "uint8") + T.evaluate(T.call_extern("ethosu_copy", buffer1[0], 2848, p1[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_copy", buffer3[0], 976, p2[0], dtype="handle")) T.evaluate(T.call_extern("ethosu_conv2d", "int8", 56, 56, 96, 56, 0, 56, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 5376, 96, 1, "int8", 56, 56, 24, 56, 0, 56, p5[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 1344, 24, 1, 1, 1, 1, 1, 1, 1, p1[0], 2608, T.int8(-1), T.int8(-1), 12, p1[2608], 240, T.int8(-1), T.int8(-1), 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, dtype="handle"))