Skip to content

Commit

Permalink
Updated arm kernel type signature to match caller
Browse files Browse the repository at this point in the history
  • Loading branch information
Lunderberg committed Jul 4, 2023
1 parent 3eb00a8 commit 33bc277
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
10 changes: 5 additions & 5 deletions python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/max_pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _body():
ib = tvm.tir.ir_builder.create()
ib.emit(
tvm.tir.call_extern(
cc.dtype,
"int32",
f"{func_prefix}_{uniq_id}",
aa.access_ptr("r"),
cc.access_ptr("w"),
Expand All @@ -59,7 +59,7 @@ def _reduce_reset():
ib = tvm.tir.ir_builder.create()
ib.emit(
tvm.tir.call_extern(
cc.dtype, f"{func_prefix}_reset_{uniq_id}", cc.access_ptr("w"), cc.strides[0]
"int32", f"{func_prefix}_reset_{uniq_id}", cc.access_ptr("w"), cc.strides[0]
)
)
return ib.get()
Expand Down Expand Up @@ -96,7 +96,7 @@ def max_impl(uniq_id):
#endif
__attribute__((always_inline)) static inline int32_t max8_reset_{uniq_id}(
int8_t *res,
int N) {{
int32_t N) {{
memset(res, (int8_t)-128, N * sizeof(*res));
return 0;
}}
Expand All @@ -107,7 +107,7 @@ def max_impl(uniq_id):
__attribute__((always_inline)) static inline int32_t max8_loop_{uniq_id}(
int8_t *arg,
int8_t *res,
int N) {{
int32_t N) {{
for ( int i = 0; i < N; ++ i )
if ( arg[i] > res[i] )
res[i] = arg[i];
Expand All @@ -120,7 +120,7 @@ def max_impl(uniq_id):
__attribute__((always_inline)) static inline int32_t max8_{uniq_id}(
int8_t *arg,
int8_t *res,
int N) {{
int32_t N) {{
int32_t *parg32, *pres32;
int una_arg = (int32_t)arg & 0x3, una_res = (int32_t)res & 0x3;
int32_t retcode = 0;
Expand Down
12 changes: 6 additions & 6 deletions python/tvm/topi/arm_cpu/mprofile/dsp/micro_kernel/tensordot.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,14 @@ def _load_tensor_vars(halfwords, tensor_w) -> Iterator[str]:
var_name = f"{_get_int16_alias(halfwords[i])}__{_get_int16_alias(halfwords[i+1])}"
y, x = halfwords[i + 1] or halfwords[i]
tensor_index = (y * tensor_w + x + offset) // 2
yield f"int32_t tensor__{var_name} = tensor[{tensor_index}];"
yield f"int32_t tensor__{var_name} = ((int32_t*)tensor)[{tensor_index}];"


def _load_kernel_vars(halfwords) -> Iterator[str]:
assert len(halfwords) % 2 == 0
for i in range(0, len(halfwords), 2):
var_name = f"{_get_int16_alias(halfwords[i])}__{_get_int16_alias(halfwords[i+1])}"
yield f"int32_t kernel__{var_name} = kernel[{i // 2}];"
yield f"int32_t kernel__{var_name} = ((int32_t*)kernel)[{i // 2}];"


def _get_draft_macs(
Expand Down Expand Up @@ -280,7 +280,7 @@ def _write_sums_to_memory(num_outputs, offset, stride) -> Iterator[str]:

if stride > 1:
for i in range(num_outputs):
yield f"((int16_t*) output)[{i * stride + offset}] = (int16_t) requant_{i};"
yield f"output[{i * stride + offset}] = (int16_t) requant_{i};"

else:
num_packed = (num_outputs - offset) // 2
Expand All @@ -296,13 +296,13 @@ def _write_sums_to_memory(num_outputs, offset, stride) -> Iterator[str]:
)

if offset == 1:
yield "((int16_t*) output)[1] = (int16_t) requant_0;"
yield "output[1] = (int16_t) requant_0;"

for i in range(num_packed):
yield f"output[{offset + i}] = packed_res_{i};"

if (offset + num_outputs) % 2 == 1:
yield f"((int16_t*) output)[{num_packed * 2}] = (int16_t) requant_{num_packed * 2};"
yield f"output[{num_packed * 2}] = (int16_t) requant_{num_packed * 2};"


def tensordot_int16_impl(
Expand Down Expand Up @@ -390,7 +390,7 @@ def insert_lines(lines):
#define {function_name.upper()}_EXISTS
#include <arm_acle.h>
__attribute__((always_inline)) static inline int32_t {function_name}(
int32_t *output, int32_t *tensor, int32_t *kernel, int32_t *bias, int32_t *scale
int16_t *output, int16_t *tensor, int16_t *kernel, int32_t *bias, int32_t *scale
) {{
{_init_biased_accumulators(num_outputs)}
Expand Down

0 comments on commit 33bc277

Please sign in to comment.