diff --git a/gallery/how_to/work_with_relay/using_pipeline_executor.py b/gallery/how_to/work_with_relay/using_pipeline_executor.py old mode 100755 new mode 100644 diff --git a/python/gen_requirements.py b/python/gen_requirements.py old mode 100755 new mode 100644 diff --git a/python/tvm/auto_scheduler/compute_dag.py b/python/tvm/auto_scheduler/compute_dag.py old mode 100755 new mode 100644 diff --git a/python/tvm/contrib/hexagon/hexagon_profiler.py b/python/tvm/contrib/hexagon/hexagon_profiler.py old mode 100755 new mode 100644 diff --git a/python/tvm/micro/contrib/stm32/__init__.py b/python/tvm/micro/contrib/stm32/__init__.py old mode 100755 new mode 100644 index 80e57f2d3912..8558f5335955 --- a/python/tvm/micro/contrib/stm32/__init__.py +++ b/python/tvm/micro/contrib/stm32/__init__.py @@ -1,20 +1,20 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Module container of STM32 code generator.""" - -from .emitter import CodeEmitter, get_input_tensor_name, get_output_tensor_name +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Module container of STM32 code generator.""" + +from .emitter import CodeEmitter, get_input_tensor_name, get_output_tensor_name diff --git a/python/tvm/relay/frontend/common.py b/python/tvm/relay/frontend/common.py old mode 100755 new mode 100644 diff --git a/python/tvm/relay/transform/infer_layout_utils.py b/python/tvm/relay/transform/infer_layout_utils.py old mode 100755 new mode 100644 diff --git a/python/tvm/topi/hexagon/qnn/adaptive_avg_pool1d.py b/python/tvm/topi/hexagon/qnn/adaptive_avg_pool1d.py old mode 100755 new mode 100644 index 80f1cd1ecf78..14bdd45b56f7 --- a/python/tvm/topi/hexagon/qnn/adaptive_avg_pool1d.py +++ b/python/tvm/topi/hexagon/qnn/adaptive_avg_pool1d.py @@ -1,120 +1,120 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" Compute and schedule for adaptive_avg_pool1d slice op - -Following are few notes and assumptions made by the implementation: - -Assumptions: -1) The input is in NCW layout. Distilbert is the only model that calls - nn.adaptive_avg_pool1d and the only layout it uses is 'NCW'. -2) The op takes output_size as an argument and - only handles the specialized case where output_size is 1. - The argument output_size is used as the value of output_width. -3) Both input and output dtype is uint8/int8 and - quantization parameter is provided to the op. -4) Input is assumed to always be multiple of fixed chunk 32c64w. - -Notes: -1) If input width is used as output width, there can be two cases: - a. If the quantization parameters of input and output are same, - it can return the input as output so the op will be a no-op. - b. If the quantization parameters of input and output are different, - it will essentially be a requantize op. -2) If output_size is a value besides 1 or input_width, - adaptive_avg_pool1d may use dynamic stride and kernel for each output element. - When this case occurs, kernel won't be known at compile time. We want to use - the generic implementation nn.adaptive_avg_pool1d() for this case. -""" - -from tvm import te -from tvm import tir -from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate - - -def adaptive_avg_pool1d( - data: te.Tensor, - output_size: list, - odtype: str, - input_zero_point: int, - input_scale: float, - output_zero_point: int, - output_scale: float, -): - """adaptive_avg_pool1d compute""" - _, _, inw = data.shape - - out_width = output_size[0] - - n, c = data.shape[:2] - oshape = (n, c) + (out_width,) - - # Kernel is same as input_width since output_width is assumed to be 1 - if out_width == 1: - kw_r = inw - else: - raise RuntimeError(f"Unsupported output_size, {out_width}'") - - if odtype == "uint8": - temp_dtype = "uint32" - elif odtype == "int8": - temp_dtype = "int32" - else: - raise RuntimeError(f"Unsupported output dtype, {odtype}'") - - scale_with_area = input_scale / (output_scale * int(kw_r)) - scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16") - corr = (output_zero_point << rsh) - input_zero_point * kw_r * scale_fixed_point - - rw_r = te.reduce_axis((0, kw_r), name="rw_r") - - sum_compute = te.compute( - oshape, - lambda n, c, w: te.sum(data[n, c, w + rw_r].astype(temp_dtype), axis=[rw_r]), - name="sum", - ) - - avg_compute = te.compute( - oshape, - lambda n, c, w: saturate( - ((sum_compute[n, c, w] * scale_fixed_point) + corr) >> rsh, odtype - ).astype(odtype), - name="adaptive_avg_1d", - ) - return avg_compute - - -def stir_schedule_ncw_32c64w(outs, ins, input_layout: str): - """Schedule for input layout ncw-32c64w and output layout ncw""" - func = te.create_prim_func([ins, outs]) - s = tir.Schedule(func) - - sum_block = s.get_block("sum") - - # Input is multiple of fixed chunk but output is NxCx1 - # Hence transform_layout is only applied on input - input_transformed_layout = get_layout_transform_fn(input_layout) - s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout) - - return s - - -def tir_adaptive_avg_pool1d_schedule(outs, ins, output_layout: str, input_layout: str): - """STIR based schedule""" - if output_layout == "ncw": - return stir_schedule_ncw_32c64w(outs, ins, input_layout) - raise RuntimeError(f"Unexpected layout '{output_layout}'") +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" Compute and schedule for adaptive_avg_pool1d slice op + +Following are few notes and assumptions made by the implementation: + +Assumptions: +1) The input is in NCW layout. Distilbert is the only model that calls + nn.adaptive_avg_pool1d and the only layout it uses is 'NCW'. +2) The op takes output_size as an argument and + only handles the specialized case where output_size is 1. + The argument output_size is used as the value of output_width. +3) Both input and output dtype is uint8/int8 and + quantization parameter is provided to the op. +4) Input is assumed to always be multiple of fixed chunk 32c64w. + +Notes: +1) If input width is used as output width, there can be two cases: + a. If the quantization parameters of input and output are same, + it can return the input as output so the op will be a no-op. + b. If the quantization parameters of input and output are different, + it will essentially be a requantize op. +2) If output_size is a value besides 1 or input_width, + adaptive_avg_pool1d may use dynamic stride and kernel for each output element. + When this case occurs, kernel won't be known at compile time. We want to use + the generic implementation nn.adaptive_avg_pool1d() for this case. +""" + +from tvm import te +from tvm import tir +from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate + + +def adaptive_avg_pool1d( + data: te.Tensor, + output_size: list, + odtype: str, + input_zero_point: int, + input_scale: float, + output_zero_point: int, + output_scale: float, +): + """adaptive_avg_pool1d compute""" + _, _, inw = data.shape + + out_width = output_size[0] + + n, c = data.shape[:2] + oshape = (n, c) + (out_width,) + + # Kernel is same as input_width since output_width is assumed to be 1 + if out_width == 1: + kw_r = inw + else: + raise RuntimeError(f"Unsupported output_size, {out_width}'") + + if odtype == "uint8": + temp_dtype = "uint32" + elif odtype == "int8": + temp_dtype = "int32" + else: + raise RuntimeError(f"Unsupported output dtype, {odtype}'") + + scale_with_area = input_scale / (output_scale * int(kw_r)) + scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16") + corr = (output_zero_point << rsh) - input_zero_point * kw_r * scale_fixed_point + + rw_r = te.reduce_axis((0, kw_r), name="rw_r") + + sum_compute = te.compute( + oshape, + lambda n, c, w: te.sum(data[n, c, w + rw_r].astype(temp_dtype), axis=[rw_r]), + name="sum", + ) + + avg_compute = te.compute( + oshape, + lambda n, c, w: saturate( + ((sum_compute[n, c, w] * scale_fixed_point) + corr) >> rsh, odtype + ).astype(odtype), + name="adaptive_avg_1d", + ) + return avg_compute + + +def stir_schedule_ncw_32c64w(outs, ins, input_layout: str): + """Schedule for input layout ncw-32c64w and output layout ncw""" + func = te.create_prim_func([ins, outs]) + s = tir.Schedule(func) + + sum_block = s.get_block("sum") + + # Input is multiple of fixed chunk but output is NxCx1 + # Hence transform_layout is only applied on input + input_transformed_layout = get_layout_transform_fn(input_layout) + s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout) + + return s + + +def tir_adaptive_avg_pool1d_schedule(outs, ins, output_layout: str, input_layout: str): + """STIR based schedule""" + if output_layout == "ncw": + return stir_schedule_ncw_32c64w(outs, ins, input_layout) + raise RuntimeError(f"Unexpected layout '{output_layout}'") diff --git a/python/tvm/topi/hexagon/qnn/global_avg_pool2d.py b/python/tvm/topi/hexagon/qnn/global_avg_pool2d.py old mode 100755 new mode 100644 index 1c171be8976e..24d5224f71cf --- a/python/tvm/topi/hexagon/qnn/global_avg_pool2d.py +++ b/python/tvm/topi/hexagon/qnn/global_avg_pool2d.py @@ -1,95 +1,95 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -Assumptions: -1) The input is in NCHW layout. Squeezenet is the only model that calls - nn.global_avg_pool2d and the only layout it uses is 'NCHW'. -2) Both input and output dtype is uint8 and - quantization parameter is provided to the op. -3) Input is assumed to always be multiple of fixed chunk 32c8h8w. -""" - -from tvm import te -from tvm import tir -from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate - - -def global_avg_pool2d_u8( - data: te.Tensor, - odtype: str, - input_zero_point: int, - input_scale: float, - output_zero_point: int, - output_scale: float, -): - """global_avg_pool2d""" - input_b, input_c, input_h, input_w = data.shape - oshape = (input_b, input_c) + (1, 1) - - if input_h * input_w < 256: - bits = "16" - else: - bits = "32" - - if odtype == "uint8": - temp_dtype = "uint" + bits - elif odtype == "int8": - temp_dtype = "int" + bits - else: - raise RuntimeError(f"Unsupported output dtype, {odtype}'") - - pool_area = input_h * input_w - rh_r = te.reduce_axis((0, input_h), name="rh_r") - rw_r = te.reduce_axis((0, input_w), name="rw_r") - - scale_with_area = input_scale / (output_scale * int(pool_area)) - scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16") - corr = (output_zero_point << rsh) - input_zero_point * pool_area * scale_fixed_point - - sum_compute = te.compute( - oshape, - lambda n, c, h, w: te.sum( - data[n, c, h + rh_r, w + rw_r].astype(temp_dtype), axis=[rh_r, rw_r] - ), - name="sum", - ) - - avg_compute = te.compute( - oshape, - lambda n, c, h, w: saturate( - ((sum_compute[n, c, h, w] * scale_fixed_point) + corr) >> rsh, odtype - ).astype(odtype), - name="global_avg_pool2d", - ) - - return avg_compute - - -def stir_global_avg_pool2d_u8_schedule(outs: te.Tensor, ins: te.Tensor, input_layout: str): - """Schedule""" - func = te.create_prim_func([ins, outs]) - s = tir.Schedule(func) - - sum_block = s.get_block("sum") - - # Input is multiple of fixed chunk but output is NxCx1x1 - # Hence transform_layout is only applied on input - input_transformed_layout = get_layout_transform_fn(input_layout) - s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout) - - return s +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Assumptions: +1) The input is in NCHW layout. Squeezenet is the only model that calls + nn.global_avg_pool2d and the only layout it uses is 'NCHW'. +2) Both input and output dtype is uint8 and + quantization parameter is provided to the op. +3) Input is assumed to always be multiple of fixed chunk 32c8h8w. +""" + +from tvm import te +from tvm import tir +from ..utils import get_layout_transform_fn, get_fixed_point_value, saturate + + +def global_avg_pool2d_u8( + data: te.Tensor, + odtype: str, + input_zero_point: int, + input_scale: float, + output_zero_point: int, + output_scale: float, +): + """global_avg_pool2d""" + input_b, input_c, input_h, input_w = data.shape + oshape = (input_b, input_c) + (1, 1) + + if input_h * input_w < 256: + bits = "16" + else: + bits = "32" + + if odtype == "uint8": + temp_dtype = "uint" + bits + elif odtype == "int8": + temp_dtype = "int" + bits + else: + raise RuntimeError(f"Unsupported output dtype, {odtype}'") + + pool_area = input_h * input_w + rh_r = te.reduce_axis((0, input_h), name="rh_r") + rw_r = te.reduce_axis((0, input_w), name="rw_r") + + scale_with_area = input_scale / (output_scale * int(pool_area)) + scale_fixed_point, rsh = get_fixed_point_value(scale_with_area, "int16") + corr = (output_zero_point << rsh) - input_zero_point * pool_area * scale_fixed_point + + sum_compute = te.compute( + oshape, + lambda n, c, h, w: te.sum( + data[n, c, h + rh_r, w + rw_r].astype(temp_dtype), axis=[rh_r, rw_r] + ), + name="sum", + ) + + avg_compute = te.compute( + oshape, + lambda n, c, h, w: saturate( + ((sum_compute[n, c, h, w] * scale_fixed_point) + corr) >> rsh, odtype + ).astype(odtype), + name="global_avg_pool2d", + ) + + return avg_compute + + +def stir_global_avg_pool2d_u8_schedule(outs: te.Tensor, ins: te.Tensor, input_layout: str): + """Schedule""" + func = te.create_prim_func([ins, outs]) + s = tir.Schedule(func) + + sum_block = s.get_block("sum") + + # Input is multiple of fixed chunk but output is NxCx1x1 + # Hence transform_layout is only applied on input + input_transformed_layout = get_layout_transform_fn(input_layout) + s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout) + + return s diff --git a/python/tvm/topi/hexagon/qnn/qadd_qsub_qmul.py b/python/tvm/topi/hexagon/qnn/qadd_qsub_qmul.py old mode 100755 new mode 100644 index 043ad313bdef..a974ad643107 --- a/python/tvm/topi/hexagon/qnn/qadd_qsub_qmul.py +++ b/python/tvm/topi/hexagon/qnn/qadd_qsub_qmul.py @@ -1,270 +1,270 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=invalid-name - -"""Compute and schedule for quantized add, multiply, subtract op - -Please note the following assumptions made by the implementation: - -1) The inputs will be multiple of crouton layout except for the axis that needs broadcasting.""" - -from tvm import te -from tvm import tir -from ..utils import get_layout_transform_fn, get_fixed_point_value - - -def broadcast_axis(tensor_A, tensor_B): - """Find out the indices that will have broadcasting""" - A_broadcast = [] - B_broadcast = [] - - for i in range(len(tensor_A.shape)): - if tensor_A.shape[i] == tensor_B.shape[i]: - A_broadcast.append(1) - B_broadcast.append(1) - elif tensor_A.shape[i] == 1: - A_broadcast.append(0) - B_broadcast.append(1) - elif tensor_B.shape[i] == 1: - A_broadcast.append(1) - B_broadcast.append(0) - return A_broadcast, B_broadcast - - -def saturate(x: te.Tensor, dtype: str): - """Saturate value for the specified data type""" - return te.max(te.min_value(dtype), te.min(x, te.max_value(dtype))) - - -def get_int_scale( - scale_A: float, - scale_B: float, - scale_M: float, - zero_point_A: int, - zero_point_B: int, - zero_point_M: int, - op: str, -): - """ - Get fixed-point number and exp_scale_factor from topi.hexagon.utils.get_fixed_point_value. - Also, depending on the op, this function uses exp_scale_factor(log2 of the scale factor) - to adjust the output's zero_point. - """ - - C_recip = 1 / scale_M - - if op == "qmul": - scale = scale_A * scale_B * C_recip - scale_fixed_point, rsh = get_fixed_point_value(scale, "int16") - - # We need to adjust output's zero point value since the compute for the op is multiplied - # by a scaling factor. - # The scaling factor is 2^x where x is the exp_scale_factor which is assigned to rsh here. - # Since zero_point_M is multipled by 2^rsh while converting floating-point scale value - # into fixed-point number, we left shift it by rsh in our compute to reflect that. - - corr = zero_point_M << rsh - - return scale_fixed_point, rsh, corr - - a_scale_f = scale_A * C_recip - b_scale_f = scale_B * C_recip - scale_fixed_point_a, rsh_a = get_fixed_point_value(a_scale_f, "int16") - scale_fixed_point_b, rsh_b = get_fixed_point_value(b_scale_f, "int16") - - # Here we have two exp_scale_factors rsh_a and rsh_b. - # To avoid complexity, we want to use a common exp_scale_factor and - # we want to use the lowest of the two. - - # Since, either of scale_fixed_point_a or scale_fixed_point_b has already been multiplied - # by 2^max(rsh_a, rsh_b) in topi.hexagon.utils.get_fixed_point_value, - # we want to undo that by right shifting that scale_fixed_point value - # by the difference of rsh_a and rsh_b. - - # This results into having a common exp_scale_factor for both scale_fixed_point_a - # and scale_fixed_point_b. - - # We also set rsh here which is used to adjust the zero_point_M and compute the corr value, - # computation of which comes from the original equation of the op's compute. - - if rsh_a > rsh_b: - scale_fixed_point_a = scale_fixed_point_a >> (rsh_a - rsh_b) - rsh = rsh_b - else: - scale_fixed_point_b = scale_fixed_point_b >> (rsh_b - rsh_a) - rsh = rsh_a - - if op == "qadd": - corr = (zero_point_M << rsh) - ( - zero_point_A * scale_fixed_point_a + zero_point_B * scale_fixed_point_b - ) - else: - corr = (zero_point_M << rsh) - ( - zero_point_A * scale_fixed_point_a - zero_point_B * scale_fixed_point_b - ) - - return scale_fixed_point_a, scale_fixed_point_b, rsh, corr - - -def qadd_broadcast_compute( - tensor_A: te.Tensor, - tensor_B: te.Tensor, - output_shape: list, - zero_point_A: int, - scale_A: float, - zero_point_B: int, - scale_B: float, - zero_point_M: int, - scale_M: float, - dtype: str, -): - """Compute quantized add with broadcasting""" - A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B) - n_a, h_a, w_a, c_a = A_broadcast - n_b, h_b, w_b, c_b = B_broadcast - - scale_a, scale_b, rsh, corr = get_int_scale( - scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qadd" - ) - - return te.compute( - output_shape, - lambda n, h, w, c: saturate( - ( - ( - (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] * scale_a) - + (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] * scale_b) - + corr - ) - >> rsh - ), - dtype, - ).astype(dtype), - ) - - -def qsubtract_broadcast_compute( - tensor_A: te.Tensor, - tensor_B: te.Tensor, - output_shape: list, - zero_point_A: int, - scale_A: float, - zero_point_B: int, - scale_B: float, - zero_point_M: int, - scale_M: float, - dtype: str, -): - """Compute quantized subtract with broadcasting""" - A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B) - n_a, h_a, w_a, c_a = A_broadcast - n_b, h_b, w_b, c_b = B_broadcast - - scale_a, scale_b, rsh, corr = get_int_scale( - scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qsub" - ) - - return te.compute( - output_shape, - lambda n, h, w, c: saturate( - ( - ( - (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] * scale_a) - - (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] * scale_b) - + corr - ) - >> rsh - ), - dtype, - ).astype(dtype), - ) - - -def qmultiply_broadcast_compute( - tensor_A: te.Tensor, - tensor_B: te.Tensor, - output_shape: list, - zero_point_A: int, - scale_A: float, - zero_point_B: int, - scale_B: float, - zero_point_M: int, - scale_M: float, - dtype: str, -): - """Compute quantized multiply with broadcasting""" - A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B) - n_a, h_a, w_a, c_a = A_broadcast - n_b, h_b, w_b, c_b = B_broadcast - - scale_int, rsh, corr = get_int_scale( - scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qmul" - ) - - return te.compute( - output_shape, - lambda n, h, w, c: saturate( - ( - ( - scale_int - * (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] - zero_point_A) - * (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] - zero_point_B) - + corr - ) - >> rsh - ), - dtype, - ).astype(dtype), - ) - - -def tir_schedule_quant( - out_M: te.Tensor, - tensor_A: te.Tensor, - tensor_B: te.Tensor, - output_layout: str, - tensor_A_layout: str, - tensor_B_layout: str, -): - """Schedule for output layout nhwc-8h8w32c-2d""" - func = te.create_prim_func([tensor_A, tensor_B, out_M]) - - s = tir.Schedule(func) - - block = s.get_block("compute") - - if tensor_A_layout == "nhwc-8h8w32c-2d": - tensor_A_transformed_layout = get_layout_transform_fn(tensor_A_layout) - s.transform_layout(block, buffer=tensor_A.name, index_map=tensor_A_transformed_layout) - - if tensor_B_layout == "nhwc-8h8w32c-2d": - tensor_B_transformed_layout = get_layout_transform_fn(tensor_B_layout) - s.transform_layout(block, buffer=tensor_B.name, index_map=tensor_B_transformed_layout) - - output_transformed_layout = get_layout_transform_fn(output_layout) - s.transform_layout(block, buffer=out_M.name, index_map=output_transformed_layout) - - n, h, w, c = s.get_loops(block) - - h_o, h_i = s.split(h, [None, 8]) - w_o, w_i = s.split(w, [None, 8]) - c_o, c_i = s.split(c, [None, 32]) - wio, wii = s.split(w_i, [None, 4]) - - s.reorder(n, h_o, w_o, c_o, h_i, wio, wii, c_i) - - return s +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name + +"""Compute and schedule for quantized add, multiply, subtract op + +Please note the following assumptions made by the implementation: + +1) The inputs will be multiple of crouton layout except for the axis that needs broadcasting.""" + +from tvm import te +from tvm import tir +from ..utils import get_layout_transform_fn, get_fixed_point_value + + +def broadcast_axis(tensor_A, tensor_B): + """Find out the indices that will have broadcasting""" + A_broadcast = [] + B_broadcast = [] + + for i in range(len(tensor_A.shape)): + if tensor_A.shape[i] == tensor_B.shape[i]: + A_broadcast.append(1) + B_broadcast.append(1) + elif tensor_A.shape[i] == 1: + A_broadcast.append(0) + B_broadcast.append(1) + elif tensor_B.shape[i] == 1: + A_broadcast.append(1) + B_broadcast.append(0) + return A_broadcast, B_broadcast + + +def saturate(x: te.Tensor, dtype: str): + """Saturate value for the specified data type""" + return te.max(te.min_value(dtype), te.min(x, te.max_value(dtype))) + + +def get_int_scale( + scale_A: float, + scale_B: float, + scale_M: float, + zero_point_A: int, + zero_point_B: int, + zero_point_M: int, + op: str, +): + """ + Get fixed-point number and exp_scale_factor from topi.hexagon.utils.get_fixed_point_value. + Also, depending on the op, this function uses exp_scale_factor(log2 of the scale factor) + to adjust the output's zero_point. + """ + + C_recip = 1 / scale_M + + if op == "qmul": + scale = scale_A * scale_B * C_recip + scale_fixed_point, rsh = get_fixed_point_value(scale, "int16") + + # We need to adjust output's zero point value since the compute for the op is multiplied + # by a scaling factor. + # The scaling factor is 2^x where x is the exp_scale_factor which is assigned to rsh here. + # Since zero_point_M is multipled by 2^rsh while converting floating-point scale value + # into fixed-point number, we left shift it by rsh in our compute to reflect that. + + corr = zero_point_M << rsh + + return scale_fixed_point, rsh, corr + + a_scale_f = scale_A * C_recip + b_scale_f = scale_B * C_recip + scale_fixed_point_a, rsh_a = get_fixed_point_value(a_scale_f, "int16") + scale_fixed_point_b, rsh_b = get_fixed_point_value(b_scale_f, "int16") + + # Here we have two exp_scale_factors rsh_a and rsh_b. + # To avoid complexity, we want to use a common exp_scale_factor and + # we want to use the lowest of the two. + + # Since, either of scale_fixed_point_a or scale_fixed_point_b has already been multiplied + # by 2^max(rsh_a, rsh_b) in topi.hexagon.utils.get_fixed_point_value, + # we want to undo that by right shifting that scale_fixed_point value + # by the difference of rsh_a and rsh_b. + + # This results into having a common exp_scale_factor for both scale_fixed_point_a + # and scale_fixed_point_b. + + # We also set rsh here which is used to adjust the zero_point_M and compute the corr value, + # computation of which comes from the original equation of the op's compute. + + if rsh_a > rsh_b: + scale_fixed_point_a = scale_fixed_point_a >> (rsh_a - rsh_b) + rsh = rsh_b + else: + scale_fixed_point_b = scale_fixed_point_b >> (rsh_b - rsh_a) + rsh = rsh_a + + if op == "qadd": + corr = (zero_point_M << rsh) - ( + zero_point_A * scale_fixed_point_a + zero_point_B * scale_fixed_point_b + ) + else: + corr = (zero_point_M << rsh) - ( + zero_point_A * scale_fixed_point_a - zero_point_B * scale_fixed_point_b + ) + + return scale_fixed_point_a, scale_fixed_point_b, rsh, corr + + +def qadd_broadcast_compute( + tensor_A: te.Tensor, + tensor_B: te.Tensor, + output_shape: list, + zero_point_A: int, + scale_A: float, + zero_point_B: int, + scale_B: float, + zero_point_M: int, + scale_M: float, + dtype: str, +): + """Compute quantized add with broadcasting""" + A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B) + n_a, h_a, w_a, c_a = A_broadcast + n_b, h_b, w_b, c_b = B_broadcast + + scale_a, scale_b, rsh, corr = get_int_scale( + scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qadd" + ) + + return te.compute( + output_shape, + lambda n, h, w, c: saturate( + ( + ( + (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] * scale_a) + + (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] * scale_b) + + corr + ) + >> rsh + ), + dtype, + ).astype(dtype), + ) + + +def qsubtract_broadcast_compute( + tensor_A: te.Tensor, + tensor_B: te.Tensor, + output_shape: list, + zero_point_A: int, + scale_A: float, + zero_point_B: int, + scale_B: float, + zero_point_M: int, + scale_M: float, + dtype: str, +): + """Compute quantized subtract with broadcasting""" + A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B) + n_a, h_a, w_a, c_a = A_broadcast + n_b, h_b, w_b, c_b = B_broadcast + + scale_a, scale_b, rsh, corr = get_int_scale( + scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qsub" + ) + + return te.compute( + output_shape, + lambda n, h, w, c: saturate( + ( + ( + (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] * scale_a) + - (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] * scale_b) + + corr + ) + >> rsh + ), + dtype, + ).astype(dtype), + ) + + +def qmultiply_broadcast_compute( + tensor_A: te.Tensor, + tensor_B: te.Tensor, + output_shape: list, + zero_point_A: int, + scale_A: float, + zero_point_B: int, + scale_B: float, + zero_point_M: int, + scale_M: float, + dtype: str, +): + """Compute quantized multiply with broadcasting""" + A_broadcast, B_broadcast = broadcast_axis(tensor_A, tensor_B) + n_a, h_a, w_a, c_a = A_broadcast + n_b, h_b, w_b, c_b = B_broadcast + + scale_int, rsh, corr = get_int_scale( + scale_A, scale_B, scale_M, zero_point_A, zero_point_B, zero_point_M, "qmul" + ) + + return te.compute( + output_shape, + lambda n, h, w, c: saturate( + ( + ( + scale_int + * (tensor_A[n * n_a, h * h_a, w * w_a, c * c_a] - zero_point_A) + * (tensor_B[n * n_b, h * h_b, w * w_b, c * c_b] - zero_point_B) + + corr + ) + >> rsh + ), + dtype, + ).astype(dtype), + ) + + +def tir_schedule_quant( + out_M: te.Tensor, + tensor_A: te.Tensor, + tensor_B: te.Tensor, + output_layout: str, + tensor_A_layout: str, + tensor_B_layout: str, +): + """Schedule for output layout nhwc-8h8w32c-2d""" + func = te.create_prim_func([tensor_A, tensor_B, out_M]) + + s = tir.Schedule(func) + + block = s.get_block("compute") + + if tensor_A_layout == "nhwc-8h8w32c-2d": + tensor_A_transformed_layout = get_layout_transform_fn(tensor_A_layout) + s.transform_layout(block, buffer=tensor_A.name, index_map=tensor_A_transformed_layout) + + if tensor_B_layout == "nhwc-8h8w32c-2d": + tensor_B_transformed_layout = get_layout_transform_fn(tensor_B_layout) + s.transform_layout(block, buffer=tensor_B.name, index_map=tensor_B_transformed_layout) + + output_transformed_layout = get_layout_transform_fn(output_layout) + s.transform_layout(block, buffer=out_M.name, index_map=output_transformed_layout) + + n, h, w, c = s.get_loops(block) + + h_o, h_i = s.split(h, [None, 8]) + w_o, w_i = s.split(w, [None, 8]) + c_o, c_i = s.split(c, [None, 32]) + wio, wii = s.split(w_i, [None, 4]) + + s.reorder(n, h_o, w_o, c_o, h_i, wio, wii, c_i) + + return s diff --git a/python/tvm/topi/hexagon/qnn/quantize.py b/python/tvm/topi/hexagon/qnn/quantize.py old mode 100755 new mode 100644 index ff03aac0a862..3fd91ddce6ca --- a/python/tvm/topi/hexagon/qnn/quantize.py +++ b/python/tvm/topi/hexagon/qnn/quantize.py @@ -1,80 +1,80 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=invalid-name -"""Compute and schedule for hexagon quantize -Please note the following assumptions made by the implementation: -1) The input and output data will be multiple of crouton layout -2) And the supported layout is NHWC -3) The input layout will be nhwc-4h2w32c2w-2d and - output layout will be nhwc-8h8w32c-2d""" - - -from tvm import te -from tvm import tir -from ..utils import get_layout_transform_fn, saturate - - -def quantize_compute(tensor_A: te.Tensor, scale: float, zero_point: int, dtype: str): - """Compute for quantize""" - scale_recip = 1 / scale - - return te.compute( - tensor_A.shape, - lambda n, h, w, c: saturate( - ((tensor_A[n, h, w, c] * scale_recip).astype("int32") + zero_point), - dtype, - ).astype(dtype), - name="quantize", - ) - - -def tir_quantize_schedule( - out_M: te.Tensor, - tensor_A: te.Tensor, - input_layout: str, - output_layout: str, -): - """Schedule for output layout nhwc-8h8w32c-2d""" - func = te.create_prim_func([tensor_A, out_M]) - - s = tir.Schedule(func) - - block = s.get_block("quantize") - - input_transformed_layout = get_layout_transform_fn(input_layout) - s.transform_layout(block, buffer=tensor_A.name, index_map=input_transformed_layout) - - output_transformed_layout = get_layout_transform_fn(output_layout) - s.transform_layout(block, buffer=out_M.name, index_map=output_transformed_layout) - - # Fixed chunk size is 2048 byte - # For uint8 the layout for fixed chunk is 8x8x32 - # where each element is 1 bytes - # Split and reorder is done to iterate over the fixed chunk - # Channel is split by a factor of 32 - # Width is split by a factor of 8 - # Height is split by a factor of 8 - n, h, w, c = s.get_loops(block) - - h_o, h_i = s.split(h, [None, 8]) - w_o, w_i = s.split(w, [None, 8]) - c_o, c_i = s.split(c, [None, 32]) - wio, wii = s.split(w_i, [None, 4]) - - s.reorder(n, h_o, w_o, c_o, h_i, wio, wii, c_i) - - return s +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name +"""Compute and schedule for hexagon quantize +Please note the following assumptions made by the implementation: +1) The input and output data will be multiple of crouton layout +2) And the supported layout is NHWC +3) The input layout will be nhwc-4h2w32c2w-2d and + output layout will be nhwc-8h8w32c-2d""" + + +from tvm import te +from tvm import tir +from ..utils import get_layout_transform_fn, saturate + + +def quantize_compute(tensor_A: te.Tensor, scale: float, zero_point: int, dtype: str): + """Compute for quantize""" + scale_recip = 1 / scale + + return te.compute( + tensor_A.shape, + lambda n, h, w, c: saturate( + ((tensor_A[n, h, w, c] * scale_recip).astype("int32") + zero_point), + dtype, + ).astype(dtype), + name="quantize", + ) + + +def tir_quantize_schedule( + out_M: te.Tensor, + tensor_A: te.Tensor, + input_layout: str, + output_layout: str, +): + """Schedule for output layout nhwc-8h8w32c-2d""" + func = te.create_prim_func([tensor_A, out_M]) + + s = tir.Schedule(func) + + block = s.get_block("quantize") + + input_transformed_layout = get_layout_transform_fn(input_layout) + s.transform_layout(block, buffer=tensor_A.name, index_map=input_transformed_layout) + + output_transformed_layout = get_layout_transform_fn(output_layout) + s.transform_layout(block, buffer=out_M.name, index_map=output_transformed_layout) + + # Fixed chunk size is 2048 byte + # For uint8 the layout for fixed chunk is 8x8x32 + # where each element is 1 bytes + # Split and reorder is done to iterate over the fixed chunk + # Channel is split by a factor of 32 + # Width is split by a factor of 8 + # Height is split by a factor of 8 + n, h, w, c = s.get_loops(block) + + h_o, h_i = s.split(h, [None, 8]) + w_o, w_i = s.split(w, [None, 8]) + c_o, c_i = s.split(c, [None, 32]) + wio, wii = s.split(w_i, [None, 4]) + + s.reorder(n, h_o, w_o, c_o, h_i, wio, wii, c_i) + + return s diff --git a/python/tvm/topi/hexagon/resize2d.py b/python/tvm/topi/hexagon/resize2d.py old mode 100755 new mode 100644 index 0e817e2e9330..6e6c0e471db0 --- a/python/tvm/topi/hexagon/resize2d.py +++ b/python/tvm/topi/hexagon/resize2d.py @@ -1,116 +1,116 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=invalid-name - -"""Compute and schedule for resize2d -Please note the following assumptions made by the implementation: -1) The input and output data will be multiple of crouton layout -2) And the supported layout is NHWC""" - -from tvm import te -from tvm import tir -from tvm import topi -from .utils import get_layout_transform_fn - - -def resize2d_compute( - data, - roi, - size, - layout, - method="linear", - coordinate_transformation_mode="half_pixel", - rounding_method="", - bicubic_alpha=-0.5, - bicubic_exclude=0, - extrapolation_value=0.0, - out_dtype=None, - output_shape=None, -): - """Call resize2d op from topi.image""" - return topi.image.resize2d( - data, - roi, - size, - layout, - method, - coordinate_transformation_mode, - rounding_method, - bicubic_alpha, - bicubic_exclude, - extrapolation_value, - out_dtype, - output_shape, - ) - - -def tir_resize2d_schedule( - out_m, - input_a, - input_layout: str, - output_layout: str, -): - """Schedule for input and output layout nhwc-8h2w32c2w-2d and nhwc-8h8w32c-2d""" - func = te.create_prim_func([input_a, out_m]) - - s = tir.Schedule(func) - - block = s.get_block("resize") - - if input_layout in ( - "nhwc-8h2w32c2w-2d", - "nhwc-8h8w32c-2d", - ): - input_transformed_layout = get_layout_transform_fn(input_layout) - s.transform_layout(block, buffer=("read", 0), index_map=input_transformed_layout) - - output_transformed_layout = get_layout_transform_fn(output_layout) - s.transform_layout(block, buffer=("write", 0), index_map=output_transformed_layout) - - if output_layout == "nhwc-8h2w32c2w-2d": - # Fixed chunk size is 2048 byte - # For fp16 the layout for fixed chunk is 8x4x32 - # where each element is 2 bytes - # Split and reorder is done to iterate over the fixed chunk - # Channel is split by a factor of 32 - # Width is split by a factor of 4 - # Height is split by a factor of 8 - n, h, w, c = s.get_loops(block) - - ho, hi = s.split(h, [None, 8]) - wo, wi = s.split(w, [None, 4]) - co, ci = s.split(c, [None, 32]) - - s.reorder(n, ho, wo, co, hi, wi, ci) - - elif output_layout == "nhwc-8h8w32c-2d": - # Fixed chunk size is 2048 byte - # For uint8 the layout for fixed chunk is 8x8x32 - # where each element is 1 bytes - # Split and reorder is done to iterate over the fixed chunk - # Channel is split by a factor of 32 - # Width is split by a factor of 8 - # Height is split by a factor of 8 - n, h, w, c = s.get_loops(block) - - ho, hi = s.split(h, [None, 8]) - wo, wi = s.split(w, [None, 8]) - co, ci = s.split(c, [None, 32]) - - s.reorder(n, ho, wo, co, hi, wi, ci) - - return s +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name + +"""Compute and schedule for resize2d +Please note the following assumptions made by the implementation: +1) The input and output data will be multiple of crouton layout +2) And the supported layout is NHWC""" + +from tvm import te +from tvm import tir +from tvm import topi +from .utils import get_layout_transform_fn + + +def resize2d_compute( + data, + roi, + size, + layout, + method="linear", + coordinate_transformation_mode="half_pixel", + rounding_method="", + bicubic_alpha=-0.5, + bicubic_exclude=0, + extrapolation_value=0.0, + out_dtype=None, + output_shape=None, +): + """Call resize2d op from topi.image""" + return topi.image.resize2d( + data, + roi, + size, + layout, + method, + coordinate_transformation_mode, + rounding_method, + bicubic_alpha, + bicubic_exclude, + extrapolation_value, + out_dtype, + output_shape, + ) + + +def tir_resize2d_schedule( + out_m, + input_a, + input_layout: str, + output_layout: str, +): + """Schedule for input and output layout nhwc-8h2w32c2w-2d and nhwc-8h8w32c-2d""" + func = te.create_prim_func([input_a, out_m]) + + s = tir.Schedule(func) + + block = s.get_block("resize") + + if input_layout in ( + "nhwc-8h2w32c2w-2d", + "nhwc-8h8w32c-2d", + ): + input_transformed_layout = get_layout_transform_fn(input_layout) + s.transform_layout(block, buffer=("read", 0), index_map=input_transformed_layout) + + output_transformed_layout = get_layout_transform_fn(output_layout) + s.transform_layout(block, buffer=("write", 0), index_map=output_transformed_layout) + + if output_layout == "nhwc-8h2w32c2w-2d": + # Fixed chunk size is 2048 byte + # For fp16 the layout for fixed chunk is 8x4x32 + # where each element is 2 bytes + # Split and reorder is done to iterate over the fixed chunk + # Channel is split by a factor of 32 + # Width is split by a factor of 4 + # Height is split by a factor of 8 + n, h, w, c = s.get_loops(block) + + ho, hi = s.split(h, [None, 8]) + wo, wi = s.split(w, [None, 4]) + co, ci = s.split(c, [None, 32]) + + s.reorder(n, ho, wo, co, hi, wi, ci) + + elif output_layout == "nhwc-8h8w32c-2d": + # Fixed chunk size is 2048 byte + # For uint8 the layout for fixed chunk is 8x8x32 + # where each element is 1 bytes + # Split and reorder is done to iterate over the fixed chunk + # Channel is split by a factor of 32 + # Width is split by a factor of 8 + # Height is split by a factor of 8 + n, h, w, c = s.get_loops(block) + + ho, hi = s.split(h, [None, 8]) + wo, wi = s.split(w, [None, 8]) + co, ci = s.split(c, [None, 32]) + + s.reorder(n, ho, wo, co, hi, wi, ci) + + return s diff --git a/python/tvm/topi/hexagon/slice_ops/add_subtract_multiply.py b/python/tvm/topi/hexagon/slice_ops/add_subtract_multiply.py old mode 100755 new mode 100644 index 86b6adb997cb..0596f79b66a8 --- a/python/tvm/topi/hexagon/slice_ops/add_subtract_multiply.py +++ b/python/tvm/topi/hexagon/slice_ops/add_subtract_multiply.py @@ -1,87 +1,87 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# pylint: disable=invalid-name - -"""Compute and schedule for add, multiply, subtract slice op - -Please note the following assumptions made by the implementation: - -1) The inputs will be multiple of crouton layout except for the axis that needs broadcasting.""" - -from tvm import te -from tvm import tir -from tvm import topi -from ..utils import get_layout_transform_fn - - -def add_broadcast_compute(input_a, input_b): - """Call the add op from topi""" - return topi.add(input_a, input_b) - - -def subtract_broadcast_compute(input_a, input_b): - """Call the subtract op from topi""" - return topi.subtract(input_a, input_b) - - -def multiply_broadcast_compute(input_a, input_b): - """Call the multiply op from topi""" - return topi.multiply(input_a, input_b) - - -def tir_broadcast_schedule( - out_m, - input_a, - input_b, - output_layout: str, - input_a_layout: str, - input_b_layout: str, - op_name: str, -): - """Schedule for input and output layout nhwc-8h2w32c2w-2d considering broadcast""" - func = te.create_prim_func([input_a, input_b, out_m]) - - s = tir.Schedule(func) - - block_dict = {"add": "T_add", "subtract": "T_subtract", "multiply": "T_multiply"} - - block = s.get_block(block_dict[op_name]) - - if input_a_layout == "nhwc-8h2w32c2w-2d": - input_a_transformed_layout = get_layout_transform_fn(input_a_layout) - s.transform_layout(block, buffer=("read", 0), index_map=input_a_transformed_layout) - - if input_b_layout == "nhwc-8h2w32c2w-2d": - input_b_transformed_layout = get_layout_transform_fn(input_b_layout) - s.transform_layout(block, buffer=("read", 1), index_map=input_b_transformed_layout) - - output_transformed_layout = get_layout_transform_fn(output_layout) - s.transform_layout(block, buffer=("write", 0), index_map=output_transformed_layout) - - n, h, w, c = s.get_loops(block) - - h_o, h_i = s.split(h, [None, 8]) - w_o, w_i = s.split(w, [None, 4]) - c_o, c_i = s.split(c, [None, 32]) - wio, wii = s.split(w_i, [None, 2]) - - s.reorder(n, h_o, w_o, c_o, h_i, wio, c_i, wii) - - fused = s.fuse(c_i, wii) - s.vectorize(fused) - - return s +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name + +"""Compute and schedule for add, multiply, subtract slice op + +Please note the following assumptions made by the implementation: + +1) The inputs will be multiple of crouton layout except for the axis that needs broadcasting.""" + +from tvm import te +from tvm import tir +from tvm import topi +from ..utils import get_layout_transform_fn + + +def add_broadcast_compute(input_a, input_b): + """Call the add op from topi""" + return topi.add(input_a, input_b) + + +def subtract_broadcast_compute(input_a, input_b): + """Call the subtract op from topi""" + return topi.subtract(input_a, input_b) + + +def multiply_broadcast_compute(input_a, input_b): + """Call the multiply op from topi""" + return topi.multiply(input_a, input_b) + + +def tir_broadcast_schedule( + out_m, + input_a, + input_b, + output_layout: str, + input_a_layout: str, + input_b_layout: str, + op_name: str, +): + """Schedule for input and output layout nhwc-8h2w32c2w-2d considering broadcast""" + func = te.create_prim_func([input_a, input_b, out_m]) + + s = tir.Schedule(func) + + block_dict = {"add": "T_add", "subtract": "T_subtract", "multiply": "T_multiply"} + + block = s.get_block(block_dict[op_name]) + + if input_a_layout == "nhwc-8h2w32c2w-2d": + input_a_transformed_layout = get_layout_transform_fn(input_a_layout) + s.transform_layout(block, buffer=("read", 0), index_map=input_a_transformed_layout) + + if input_b_layout == "nhwc-8h2w32c2w-2d": + input_b_transformed_layout = get_layout_transform_fn(input_b_layout) + s.transform_layout(block, buffer=("read", 1), index_map=input_b_transformed_layout) + + output_transformed_layout = get_layout_transform_fn(output_layout) + s.transform_layout(block, buffer=("write", 0), index_map=output_transformed_layout) + + n, h, w, c = s.get_loops(block) + + h_o, h_i = s.split(h, [None, 8]) + w_o, w_i = s.split(w, [None, 4]) + c_o, c_i = s.split(c, [None, 32]) + wio, wii = s.split(w_i, [None, 2]) + + s.reorder(n, h_o, w_o, c_o, h_i, wio, c_i, wii) + + fused = s.fuse(c_i, wii) + s.vectorize(fused) + + return s diff --git a/python/tvm/topi/hexagon/slice_ops/clip.py b/python/tvm/topi/hexagon/slice_ops/clip.py old mode 100755 new mode 100644 diff --git a/python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py b/python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py old mode 100755 new mode 100644 index 30222c11bb54..9e6ae077851e --- a/python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py +++ b/python/tvm/topi/hexagon/slice_ops/global_avg_pool2d.py @@ -1,52 +1,52 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -""" -Assumptions: -1) The input is in NCHW layout. Squeezenet is the only model that calls - nn.global_avg_pool2d and the only layout it uses is 'NCHW'. -2) The op takes input data as an argument. -3) Both input and output dtype is float32 and -4) Input is assumed to always be multiple of fixed chunk 32c8h4w. -""" - -from tvm import te -from tvm import tir -from tvm import topi -from ..utils import get_layout_transform_fn - - -def global_avg_pool2d( - data: te.Tensor, -): - """global_avg_pool2d""" - return topi.nn.global_pool(data, "avg", "NCHW") - - -def stir_global_avg_pool2d_schedule(outs: te.Tensor, ins: te.Tensor, input_layout: str): - """Schedule""" - func = te.create_prim_func([ins, outs]) - s = tir.Schedule(func) - - sum_block = s.get_block("adaptive_pool_sum") - - # Input is multiple of fixed chunk but output is NxCx1x1 - # Hence transform_layout is only applied on input - input_transformed_layout = get_layout_transform_fn(input_layout) - s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout) - - return s +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Assumptions: +1) The input is in NCHW layout. Squeezenet is the only model that calls + nn.global_avg_pool2d and the only layout it uses is 'NCHW'. +2) The op takes input data as an argument. +3) Both input and output dtype is float32 and +4) Input is assumed to always be multiple of fixed chunk 32c8h4w. +""" + +from tvm import te +from tvm import tir +from tvm import topi +from ..utils import get_layout_transform_fn + + +def global_avg_pool2d( + data: te.Tensor, +): + """global_avg_pool2d""" + return topi.nn.global_pool(data, "avg", "NCHW") + + +def stir_global_avg_pool2d_schedule(outs: te.Tensor, ins: te.Tensor, input_layout: str): + """Schedule""" + func = te.create_prim_func([ins, outs]) + s = tir.Schedule(func) + + sum_block = s.get_block("adaptive_pool_sum") + + # Input is multiple of fixed chunk but output is NxCx1x1 + # Hence transform_layout is only applied on input + input_transformed_layout = get_layout_transform_fn(input_layout) + s.transform_layout(sum_block, buffer=("read", 0), index_map=input_transformed_layout) + + return s diff --git a/src/relay/backend/contrib/dnnl/query_layout.cc b/src/relay/backend/contrib/dnnl/query_layout.cc index 3762c1906f40..63e0d73ce229 100755 --- a/src/relay/backend/contrib/dnnl/query_layout.cc +++ b/src/relay/backend/contrib/dnnl/query_layout.cc @@ -1,379 +1,379 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file src/relay/backend/contrib/dnnl/query_layout.cc - * \brief layout auto-query func. - */ - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "../../../../runtime/contrib/dnnl/dnnl_utils.h" -#include "../../utils.h" -#include "dnnl.hpp" -namespace tvm { -namespace relay { -namespace contrib { - -using dim_t = dnnl_dim_t; -using dims_t = dnnl_dims_t; -using tvm::runtime::contrib::dtype_dl2dnnl; - -template -inline void array_set(T* arr, const U& val, size_t size) { - for (size_t i = 0; i < size; ++i) arr[i] = static_cast(val); -} - -template -inline void array_copy(T* dst, const T* src, size_t size) { - for (size_t i = 0; i < size; ++i) dst[i] = src[i]; -} - -template -inline void swap(T& t1, T& t2) { - T tmp(t1); - t1 = t2; - t2 = tmp; -} - -template -inline void simultaneous_sort(T* vals, T* vals_2nd_level, U* keys, size_t size, F comparator) { - if (size == 0) return; - - for (size_t i = 0; i < size - 1; ++i) { - bool swapped = false; - - for (size_t j = 0; j < size - i - 1; j++) { - auto res = comparator(vals[j], vals[j + 1]); - if (res == 0) res = comparator(vals_2nd_level[j], vals_2nd_level[j + 1]); - - if (res > 0) { - swap(vals[j], vals[j + 1]); - swap(vals_2nd_level[j], vals_2nd_level[j + 1]); - swap(keys[j], keys[j + 1]); - swapped = true; - } - } - - if (swapped == false) break; - } -} - -void compute_blocks(dims_t blocks, const dnnl::memory::desc* md) { - using format_kind_t = dnnl_format_kind_t; - const format_kind_t blocked = dnnl_blocked; - if (!(md->data.format_kind == blocked)) { - array_set(blocks, 0, md->data.ndims); - return; - } - array_set(blocks, 1, md->data.ndims); - const auto& bd = md->data.format_desc.blocking; - for (int iblk = 0; iblk < bd.inner_nblks; ++iblk) - blocks[bd.inner_idxs[iblk]] *= bd.inner_blks[iblk]; -} - -inline bool has_runtime_strides(const dnnl::memory::desc* md) { - using format_kind_t = dnnl_format_kind_t; - const format_kind_t blocked = dnnl_blocked; - if (!(md->data.format_kind == blocked)) return false; - for (int d = 0; d < md->data.ndims; ++d) - if (md->data.format_desc.blocking.strides[d] == DNNL_RUNTIME_DIM_VAL) return true; - return false; -} - -std::string md2fmt_tag_str(const dnnl::memory::desc* md) { - const auto& blk = md->data.format_desc.blocking; - - dims_t blocks = {0}; - compute_blocks(blocks, md); - - char dim_chars[DNNL_MAX_NDIMS + 1]; - - dims_t ou_blocks = {0}; - array_copy(ou_blocks, md->data.padded_dims, md->data.ndims); - - bool plain = true; - for (int d = 0; d < md->data.ndims; ++d) { - dim_chars[d] = (blocks[d] == 1 ? 'a' : 'A') + static_cast(d); - if (blocks[d] != 1) plain = false; - ou_blocks[d] /= blocks[d]; - } - - // Can't report meaningful tag for runtime dimensions. - if (has_runtime_strides(md)) return "*"; - - dims_t strides; - array_copy(strides, blk.strides, md->data.ndims); - - simultaneous_sort(strides, ou_blocks, dim_chars, md->data.ndims, - [](dim_t a, dim_t b) { return b - a; }); - - dim_chars[md->data.ndims] = '\0'; - - std::string s(dim_chars); - - if (!plain) { - for (int iblk = 0; iblk < blk.inner_nblks; ++iblk) { - char c = ('a' + static_cast(blk.inner_idxs[iblk])); - s += (std::to_string(blk.inner_blks[iblk]) + c); - } - } - return s; -} - -dnnl::memory::dims str2dims(const std::string& str_shape, bool dilates = false, - std::string interval = ",") { - // Split strings - std::vector str_dims; - size_t pos = 0, start = 0; - while ((pos = str_shape.find(interval, start)) != std::string::npos) { - std::string str_dim = str_shape.substr(start, pos - start); - if (pos > start) str_dims.push_back(str_dim); - start = pos + interval.size(); - } - if (str_shape.size() > start) { - str_dims.push_back(str_shape.substr(start)); - } - // transfer string to dims - dnnl::memory::dims out_dims; - if (dilates) { - std::transform(str_dims.begin(), str_dims.end(), std::back_inserter(out_dims), - [](const std::string& str) { return std::stoi(str) - 1; }); - } else { - std::transform(str_dims.begin(), str_dims.end(), std::back_inserter(out_dims), - [](const std::string& str) { return std::stoi(str); }); - } - return out_dims; -} - -void check_shapes(const std::vector shapes) { - std::regex valid_pat("(\\d*)(,(\\d*))*"); - bool checked = std::regex_match(shapes[0], valid_pat); - for (size_t i = 1; i < shapes.size() - 1; i++) { - checked &= std::regex_match(shapes[i], valid_pat); - } - checked &= std::regex_match(shapes[shapes.size() - 1], std::regex("\\d*")); - if (!checked) { - LOG(FATAL) << "Invalid input args for query dnnl optimal layout."; - } -} - -void check_layout(bool var, bool ref) { - if (var != ref) { - LOG(FATAL) << "Invalid input layout for query dnnl optimal layout."; - } -} - -std::string get_optimal_layout_for_conv(std::string data_layout, std::string kernel_layout, - std::string weight_shape, std::string out_shape, - std::string paddings, std::string strides, - std::string dilates, std::string G, std::string dtype) { - check_layout(std::regex_match(data_layout, std::regex("NC(D?)(H?)W")), true); - check_layout(std::regex_match(kernel_layout, std::regex("(G?)OI(D?)(H?)W")), true); - check_shapes({weight_shape, out_shape, paddings, strides, dilates, G}); - - dnnl::engine eng(dnnl::engine::kind::cpu, 0); - dnnl::stream s(eng); - using tag = dnnl::memory::format_tag; - - dnnl::memory::dim groups = std::stoi(G); - dnnl::memory::dims weight_dims_ = str2dims(weight_shape); - dnnl::memory::dims weight_dims = weight_dims_; - - if (groups > 1) { - if (weight_dims_.size() == 5) { - weight_dims = {groups * weight_dims_[1], groups * weight_dims_[2], weight_dims_[3], - weight_dims_[4]}; - } else { - weight_dims[1] = weight_dims[1] * groups; - } - } - - dnnl::memory::dims out_dims = str2dims(out_shape); - dnnl::memory::dims padding_dims = str2dims(paddings); - dnnl::memory::dims padding_dims_l(padding_dims.begin(), - padding_dims.begin() + padding_dims.size() / 2); - dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, - padding_dims.end()); - dnnl::memory::dims strides_dims = str2dims(strides); - dnnl::memory::dims dilates_dims = str2dims(dilates, true); - - dnnl::memory::dims input_dims = out_dims; - input_dims[1] = weight_dims[1]; - for (size_t i = 2; i < out_dims.size(); i++) { - dnnl::memory::dim K = weight_dims[i]; - dnnl::memory::dim S = strides_dims[i - 2]; - dnnl::memory::dim D = dilates_dims[i - 2]; - dnnl::memory::dim PL = padding_dims_l[i - 2]; - dnnl::memory::dim PR = padding_dims_r[i - 2]; - dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); - input_dims[i] = out_dims[i] * S - PL - PR + DK - 1; - } - - dnnl::memory::dims conv_src_dims = input_dims; - dnnl::memory::dims conv_weights_dims = weight_dims; - if (groups > 1) { - conv_weights_dims = {groups, out_dims[1] / groups, input_dims[1] / groups}; - conv_weights_dims.insert(conv_weights_dims.end(), weight_dims.begin() + 2, weight_dims.end()); - } - - dnnl::memory::dims conv_dst_dims = out_dims; - dnnl::memory::dims conv_strides = strides_dims; - dnnl::memory::dims conv_dilates = dilates_dims; - dnnl::memory::dims conv_padding_l = padding_dims_l; - dnnl::memory::dims conv_padding_r = padding_dims_r; - - auto dnnl_dtype = dtype_dl2dnnl(tvm::runtime::String2DLDataType(dtype)); - auto conv_src_md = dnnl::memory::desc({conv_src_dims}, dnnl_dtype, tag::any); - auto conv_weights_md = dnnl::memory::desc({conv_weights_dims}, dnnl_dtype, tag::any); - auto conv_dst_md = dnnl::memory::desc({conv_dst_dims}, dnnl_dtype, tag::any); - - auto conv_desc = dnnl::convolution_forward::desc( - dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, conv_src_md, - conv_weights_md, conv_dst_md, conv_strides, conv_dilates, conv_padding_l, conv_padding_r); - - auto conv_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, eng); - - auto src_format = conv_prim_desc.src_desc(); - auto weights_format = conv_prim_desc.weights_desc(); - auto dst_format = conv_prim_desc.dst_desc(); - std::string src_df, weight_df, dst_df; - - src_df = md2fmt_tag_str(&src_format); - weight_df = md2fmt_tag_str(&weights_format); - dst_df = md2fmt_tag_str(&dst_format); - std::string res = src_df + "," + weight_df + "," + dst_df; - return res; -} - -std::string get_optimal_layout_for_conv_transpose(std::string data_layout, - std::string kernel_layout, - std::string weight_shape, std::string out_shape, - std::string paddings, std::string output_paddings, - std::string strides, std::string dilates, - std::string G, std::string dtype) { - check_layout(std::regex_match(data_layout, std::regex("NC(D?)(H?)W")), true); - check_layout(std::regex_match(kernel_layout, std::regex("(G?)((IO)|(OI))(D?)(H?)W")), true); - check_shapes({weight_shape, out_shape, paddings, output_paddings, strides, dilates, G}); - - dnnl::engine eng(dnnl::engine::kind::cpu, 0); - dnnl::stream s(eng); - using tag = dnnl::memory::format_tag; - - dnnl::memory::dim groups = std::stoi(G); - dnnl::memory::dims weight_dims_ = str2dims(weight_shape); - dnnl::memory::dims weight_dims = weight_dims_; - if (groups > 1) { - if (weight_dims_.size() == 5) { - weight_dims = {groups * weight_dims_[1], groups * weight_dims_[2], weight_dims_[3], - weight_dims_[4]}; - } else { - weight_dims[1] = weight_dims[1] * groups; - } - } - dnnl::memory::dims out_dims = str2dims(out_shape); - dnnl::memory::dims padding_dims = str2dims(paddings); - dnnl::memory::dims padding_dims_l(padding_dims.begin(), - padding_dims.begin() + padding_dims.size() / 2); - dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, - padding_dims.end()); - dnnl::memory::dims output_padding_dims = str2dims(output_paddings); - dnnl::memory::dims strides_dims = str2dims(strides); - dnnl::memory::dims dilates_dims = str2dims(dilates, true); - - dnnl::memory::dims input_dims = out_dims; - if (out_dims[1] == weight_dims[0]) { - input_dims[1] = weight_dims[1]; - } else { - input_dims[1] = weight_dims[0]; - std::swap(weight_dims[0], weight_dims[1]); - } - for (size_t i = 2; i < out_dims.size(); i++) { - dnnl::memory::dim K = weight_dims[i]; - dnnl::memory::dim S = strides_dims[i - 2]; - dnnl::memory::dim D = dilates_dims[i - 2]; - dnnl::memory::dim PL = padding_dims_l[i - 2]; - dnnl::memory::dim PR = padding_dims_r[i - 2]; - dnnl::memory::dim OP = output_padding_dims[i - 2]; - dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); - input_dims[i] = (out_dims[i] - DK + PL + PR - OP) / S + 1; - } - - dnnl::memory::dims deconv_src_dims = input_dims; - dnnl::memory::dims deconv_weights_dims = weight_dims; - if (groups > 1) { - deconv_weights_dims = {groups, out_dims[1] / groups, input_dims[1] / groups}; - deconv_weights_dims.insert(deconv_weights_dims.end(), weight_dims.begin() + 2, - weight_dims.end()); - } - dnnl::memory::dims deconv_dst_dims = out_dims; - dnnl::memory::dims deconv_strides = strides_dims; - dnnl::memory::dims deconv_dilates = dilates_dims; - dnnl::memory::dims deconv_padding_l = padding_dims_l; - dnnl::memory::dims deconv_padding_r = padding_dims_r; - - auto dnnl_dtype = dtype_dl2dnnl(tvm::runtime::String2DLDataType(dtype)); - auto deconv_src_md = dnnl::memory::desc({deconv_src_dims}, dnnl_dtype, tag::any); - auto deconv_weights_md = dnnl::memory::desc({deconv_weights_dims}, dnnl_dtype, tag::any); - auto deconv_dst_md = dnnl::memory::desc({deconv_dst_dims}, dnnl_dtype, tag::any); - - auto deconv_desc = dnnl::deconvolution_forward::desc( - dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct, deconv_src_md, - deconv_weights_md, deconv_dst_md, deconv_strides, deconv_dilates, deconv_padding_l, - deconv_padding_r); - - auto deconv_prim_desc = dnnl::deconvolution_forward::primitive_desc(deconv_desc, eng); - - auto src_format = deconv_prim_desc.src_desc(); - auto weights_format = deconv_prim_desc.weights_desc(); - auto dst_format = deconv_prim_desc.dst_desc(); - std::string src_df, weight_df, dst_df; - - src_df = md2fmt_tag_str(&src_format); - weight_df = md2fmt_tag_str(&weights_format); - dst_df = md2fmt_tag_str(&dst_format); - std::string res = src_df + "," + weight_df + "," + dst_df; - return res; -} - -TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv") - .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = get_optimal_layout_for_conv(args[0], args[1], args[2], args[3], args[4], args[5], - args[6], args[7], args[8]); - }); - -TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv_transpose") - .set_body([](TVMArgs args, TVMRetValue* rv) { - *rv = get_optimal_layout_for_conv_transpose(args[0], args[1], args[2], args[3], args[4], - args[5], args[6], args[7], args[8], args[9]); - }); - -} // namespace contrib -} // namespace relay -} // namespace tvm +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/dnnl/query_layout.cc + * \brief layout auto-query func. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "../../../../runtime/contrib/dnnl/dnnl_utils.h" +#include "../../utils.h" +#include "dnnl.hpp" +namespace tvm { +namespace relay { +namespace contrib { + +using dim_t = dnnl_dim_t; +using dims_t = dnnl_dims_t; +using tvm::runtime::contrib::dtype_dl2dnnl; + +template +inline void array_set(T* arr, const U& val, size_t size) { + for (size_t i = 0; i < size; ++i) arr[i] = static_cast(val); +} + +template +inline void array_copy(T* dst, const T* src, size_t size) { + for (size_t i = 0; i < size; ++i) dst[i] = src[i]; +} + +template +inline void swap(T& t1, T& t2) { + T tmp(t1); + t1 = t2; + t2 = tmp; +} + +template +inline void simultaneous_sort(T* vals, T* vals_2nd_level, U* keys, size_t size, F comparator) { + if (size == 0) return; + + for (size_t i = 0; i < size - 1; ++i) { + bool swapped = false; + + for (size_t j = 0; j < size - i - 1; j++) { + auto res = comparator(vals[j], vals[j + 1]); + if (res == 0) res = comparator(vals_2nd_level[j], vals_2nd_level[j + 1]); + + if (res > 0) { + swap(vals[j], vals[j + 1]); + swap(vals_2nd_level[j], vals_2nd_level[j + 1]); + swap(keys[j], keys[j + 1]); + swapped = true; + } + } + + if (swapped == false) break; + } +} + +void compute_blocks(dims_t blocks, const dnnl::memory::desc* md) { + using format_kind_t = dnnl_format_kind_t; + const format_kind_t blocked = dnnl_blocked; + if (!(md->data.format_kind == blocked)) { + array_set(blocks, 0, md->data.ndims); + return; + } + array_set(blocks, 1, md->data.ndims); + const auto& bd = md->data.format_desc.blocking; + for (int iblk = 0; iblk < bd.inner_nblks; ++iblk) + blocks[bd.inner_idxs[iblk]] *= bd.inner_blks[iblk]; +} + +inline bool has_runtime_strides(const dnnl::memory::desc* md) { + using format_kind_t = dnnl_format_kind_t; + const format_kind_t blocked = dnnl_blocked; + if (!(md->data.format_kind == blocked)) return false; + for (int d = 0; d < md->data.ndims; ++d) + if (md->data.format_desc.blocking.strides[d] == DNNL_RUNTIME_DIM_VAL) return true; + return false; +} + +std::string md2fmt_tag_str(const dnnl::memory::desc* md) { + const auto& blk = md->data.format_desc.blocking; + + dims_t blocks = {0}; + compute_blocks(blocks, md); + + char dim_chars[DNNL_MAX_NDIMS + 1]; + + dims_t ou_blocks = {0}; + array_copy(ou_blocks, md->data.padded_dims, md->data.ndims); + + bool plain = true; + for (int d = 0; d < md->data.ndims; ++d) { + dim_chars[d] = (blocks[d] == 1 ? 'a' : 'A') + static_cast(d); + if (blocks[d] != 1) plain = false; + ou_blocks[d] /= blocks[d]; + } + + // Can't report meaningful tag for runtime dimensions. + if (has_runtime_strides(md)) return "*"; + + dims_t strides; + array_copy(strides, blk.strides, md->data.ndims); + + simultaneous_sort(strides, ou_blocks, dim_chars, md->data.ndims, + [](dim_t a, dim_t b) { return b - a; }); + + dim_chars[md->data.ndims] = '\0'; + + std::string s(dim_chars); + + if (!plain) { + for (int iblk = 0; iblk < blk.inner_nblks; ++iblk) { + char c = ('a' + static_cast(blk.inner_idxs[iblk])); + s += (std::to_string(blk.inner_blks[iblk]) + c); + } + } + return s; +} + +dnnl::memory::dims str2dims(const std::string& str_shape, bool dilates = false, + std::string interval = ",") { + // Split strings + std::vector str_dims; + size_t pos = 0, start = 0; + while ((pos = str_shape.find(interval, start)) != std::string::npos) { + std::string str_dim = str_shape.substr(start, pos - start); + if (pos > start) str_dims.push_back(str_dim); + start = pos + interval.size(); + } + if (str_shape.size() > start) { + str_dims.push_back(str_shape.substr(start)); + } + // transfer string to dims + dnnl::memory::dims out_dims; + if (dilates) { + std::transform(str_dims.begin(), str_dims.end(), std::back_inserter(out_dims), + [](const std::string& str) { return std::stoi(str) - 1; }); + } else { + std::transform(str_dims.begin(), str_dims.end(), std::back_inserter(out_dims), + [](const std::string& str) { return std::stoi(str); }); + } + return out_dims; +} + +void check_shapes(const std::vector shapes) { + std::regex valid_pat("(\\d*)(,(\\d*))*"); + bool checked = std::regex_match(shapes[0], valid_pat); + for (size_t i = 1; i < shapes.size() - 1; i++) { + checked &= std::regex_match(shapes[i], valid_pat); + } + checked &= std::regex_match(shapes[shapes.size() - 1], std::regex("\\d*")); + if (!checked) { + LOG(FATAL) << "Invalid input args for query dnnl optimal layout."; + } +} + +void check_layout(bool var, bool ref) { + if (var != ref) { + LOG(FATAL) << "Invalid input layout for query dnnl optimal layout."; + } +} + +std::string get_optimal_layout_for_conv(std::string data_layout, std::string kernel_layout, + std::string weight_shape, std::string out_shape, + std::string paddings, std::string strides, + std::string dilates, std::string G, std::string dtype) { + check_layout(std::regex_match(data_layout, std::regex("NC(D?)(H?)W")), true); + check_layout(std::regex_match(kernel_layout, std::regex("(G?)OI(D?)(H?)W")), true); + check_shapes({weight_shape, out_shape, paddings, strides, dilates, G}); + + dnnl::engine eng(dnnl::engine::kind::cpu, 0); + dnnl::stream s(eng); + using tag = dnnl::memory::format_tag; + + dnnl::memory::dim groups = std::stoi(G); + dnnl::memory::dims weight_dims_ = str2dims(weight_shape); + dnnl::memory::dims weight_dims = weight_dims_; + + if (groups > 1) { + if (weight_dims_.size() == 5) { + weight_dims = {groups * weight_dims_[1], groups * weight_dims_[2], weight_dims_[3], + weight_dims_[4]}; + } else { + weight_dims[1] = weight_dims[1] * groups; + } + } + + dnnl::memory::dims out_dims = str2dims(out_shape); + dnnl::memory::dims padding_dims = str2dims(paddings); + dnnl::memory::dims padding_dims_l(padding_dims.begin(), + padding_dims.begin() + padding_dims.size() / 2); + dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, + padding_dims.end()); + dnnl::memory::dims strides_dims = str2dims(strides); + dnnl::memory::dims dilates_dims = str2dims(dilates, true); + + dnnl::memory::dims input_dims = out_dims; + input_dims[1] = weight_dims[1]; + for (size_t i = 2; i < out_dims.size(); i++) { + dnnl::memory::dim K = weight_dims[i]; + dnnl::memory::dim S = strides_dims[i - 2]; + dnnl::memory::dim D = dilates_dims[i - 2]; + dnnl::memory::dim PL = padding_dims_l[i - 2]; + dnnl::memory::dim PR = padding_dims_r[i - 2]; + dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); + input_dims[i] = out_dims[i] * S - PL - PR + DK - 1; + } + + dnnl::memory::dims conv_src_dims = input_dims; + dnnl::memory::dims conv_weights_dims = weight_dims; + if (groups > 1) { + conv_weights_dims = {groups, out_dims[1] / groups, input_dims[1] / groups}; + conv_weights_dims.insert(conv_weights_dims.end(), weight_dims.begin() + 2, weight_dims.end()); + } + + dnnl::memory::dims conv_dst_dims = out_dims; + dnnl::memory::dims conv_strides = strides_dims; + dnnl::memory::dims conv_dilates = dilates_dims; + dnnl::memory::dims conv_padding_l = padding_dims_l; + dnnl::memory::dims conv_padding_r = padding_dims_r; + + auto dnnl_dtype = dtype_dl2dnnl(tvm::runtime::String2DLDataType(dtype)); + auto conv_src_md = dnnl::memory::desc({conv_src_dims}, dnnl_dtype, tag::any); + auto conv_weights_md = dnnl::memory::desc({conv_weights_dims}, dnnl_dtype, tag::any); + auto conv_dst_md = dnnl::memory::desc({conv_dst_dims}, dnnl_dtype, tag::any); + + auto conv_desc = dnnl::convolution_forward::desc( + dnnl::prop_kind::forward_inference, dnnl::algorithm::convolution_direct, conv_src_md, + conv_weights_md, conv_dst_md, conv_strides, conv_dilates, conv_padding_l, conv_padding_r); + + auto conv_prim_desc = dnnl::convolution_forward::primitive_desc(conv_desc, eng); + + auto src_format = conv_prim_desc.src_desc(); + auto weights_format = conv_prim_desc.weights_desc(); + auto dst_format = conv_prim_desc.dst_desc(); + std::string src_df, weight_df, dst_df; + + src_df = md2fmt_tag_str(&src_format); + weight_df = md2fmt_tag_str(&weights_format); + dst_df = md2fmt_tag_str(&dst_format); + std::string res = src_df + "," + weight_df + "," + dst_df; + return res; +} + +std::string get_optimal_layout_for_conv_transpose(std::string data_layout, + std::string kernel_layout, + std::string weight_shape, std::string out_shape, + std::string paddings, std::string output_paddings, + std::string strides, std::string dilates, + std::string G, std::string dtype) { + check_layout(std::regex_match(data_layout, std::regex("NC(D?)(H?)W")), true); + check_layout(std::regex_match(kernel_layout, std::regex("(G?)((IO)|(OI))(D?)(H?)W")), true); + check_shapes({weight_shape, out_shape, paddings, output_paddings, strides, dilates, G}); + + dnnl::engine eng(dnnl::engine::kind::cpu, 0); + dnnl::stream s(eng); + using tag = dnnl::memory::format_tag; + + dnnl::memory::dim groups = std::stoi(G); + dnnl::memory::dims weight_dims_ = str2dims(weight_shape); + dnnl::memory::dims weight_dims = weight_dims_; + if (groups > 1) { + if (weight_dims_.size() == 5) { + weight_dims = {groups * weight_dims_[1], groups * weight_dims_[2], weight_dims_[3], + weight_dims_[4]}; + } else { + weight_dims[1] = weight_dims[1] * groups; + } + } + dnnl::memory::dims out_dims = str2dims(out_shape); + dnnl::memory::dims padding_dims = str2dims(paddings); + dnnl::memory::dims padding_dims_l(padding_dims.begin(), + padding_dims.begin() + padding_dims.size() / 2); + dnnl::memory::dims padding_dims_r(padding_dims.end() - padding_dims.size() / 2, + padding_dims.end()); + dnnl::memory::dims output_padding_dims = str2dims(output_paddings); + dnnl::memory::dims strides_dims = str2dims(strides); + dnnl::memory::dims dilates_dims = str2dims(dilates, true); + + dnnl::memory::dims input_dims = out_dims; + if (out_dims[1] == weight_dims[0]) { + input_dims[1] = weight_dims[1]; + } else { + input_dims[1] = weight_dims[0]; + std::swap(weight_dims[0], weight_dims[1]); + } + for (size_t i = 2; i < out_dims.size(); i++) { + dnnl::memory::dim K = weight_dims[i]; + dnnl::memory::dim S = strides_dims[i - 2]; + dnnl::memory::dim D = dilates_dims[i - 2]; + dnnl::memory::dim PL = padding_dims_l[i - 2]; + dnnl::memory::dim PR = padding_dims_r[i - 2]; + dnnl::memory::dim OP = output_padding_dims[i - 2]; + dnnl::memory::dim DK = 1 + (K - 1) * (D + 1); + input_dims[i] = (out_dims[i] - DK + PL + PR - OP) / S + 1; + } + + dnnl::memory::dims deconv_src_dims = input_dims; + dnnl::memory::dims deconv_weights_dims = weight_dims; + if (groups > 1) { + deconv_weights_dims = {groups, out_dims[1] / groups, input_dims[1] / groups}; + deconv_weights_dims.insert(deconv_weights_dims.end(), weight_dims.begin() + 2, + weight_dims.end()); + } + dnnl::memory::dims deconv_dst_dims = out_dims; + dnnl::memory::dims deconv_strides = strides_dims; + dnnl::memory::dims deconv_dilates = dilates_dims; + dnnl::memory::dims deconv_padding_l = padding_dims_l; + dnnl::memory::dims deconv_padding_r = padding_dims_r; + + auto dnnl_dtype = dtype_dl2dnnl(tvm::runtime::String2DLDataType(dtype)); + auto deconv_src_md = dnnl::memory::desc({deconv_src_dims}, dnnl_dtype, tag::any); + auto deconv_weights_md = dnnl::memory::desc({deconv_weights_dims}, dnnl_dtype, tag::any); + auto deconv_dst_md = dnnl::memory::desc({deconv_dst_dims}, dnnl_dtype, tag::any); + + auto deconv_desc = dnnl::deconvolution_forward::desc( + dnnl::prop_kind::forward_inference, dnnl::algorithm::deconvolution_direct, deconv_src_md, + deconv_weights_md, deconv_dst_md, deconv_strides, deconv_dilates, deconv_padding_l, + deconv_padding_r); + + auto deconv_prim_desc = dnnl::deconvolution_forward::primitive_desc(deconv_desc, eng); + + auto src_format = deconv_prim_desc.src_desc(); + auto weights_format = deconv_prim_desc.weights_desc(); + auto dst_format = deconv_prim_desc.dst_desc(); + std::string src_df, weight_df, dst_df; + + src_df = md2fmt_tag_str(&src_format); + weight_df = md2fmt_tag_str(&weights_format); + dst_df = md2fmt_tag_str(&dst_format); + std::string res = src_df + "," + weight_df + "," + dst_df; + return res; +} + +TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = get_optimal_layout_for_conv(args[0], args[1], args[2], args[3], args[4], args[5], + args[6], args[7], args[8]); + }); + +TVM_REGISTER_GLOBAL("relay.ir.get_optimal_layout_for_conv_transpose") + .set_body([](TVMArgs args, TVMRetValue* rv) { + *rv = get_optimal_layout_for_conv_transpose(args[0], args[1], args[2], args[3], args[4], + args[5], args[6], args[7], args[8], args[9]); + }); + +} // namespace contrib +} // namespace relay +} // namespace tvm diff --git a/tests/python/contrib/test_dnnl.py b/tests/python/contrib/test_dnnl.py old mode 100755 new mode 100644 diff --git a/tests/python/contrib/test_hexagon/test_2d_physical_buffers.py b/tests/python/contrib/test_hexagon/test_2d_physical_buffers.py old mode 100755 new mode 100644 diff --git a/tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py b/tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py old mode 100755 new mode 100644 index 3f7e999c7bca..7cde83e0cb77 --- a/tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py +++ b/tests/python/contrib/test_hexagon/topi/slice_op/test_global_avg_pool2d.py @@ -1,167 +1,167 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test code for float16 and uint8 global_avg_pool2d.""" - -import numpy as np - -import tvm -from tvm import te -from tvm.topi.testing import adaptive_pool -import tvm.topi.hexagon.qnn as qn -import tvm.topi.hexagon.slice_ops as sl -from tvm.contrib.hexagon import allocate_hexagon_array -from ...infrastructure import transform_numpy, quantize_np, get_hexagon_target - - -SCALE_M_VAL = None -ZERO_POINT_M_VAL = None -SCALE_VAL = None -ZERO_POINT_VAL = None - - -class TestGlobalPool2D: - (input_shape,) = tvm.testing.parameters( - ([1, 32, 8, 8],), - ([1, 1056, 16, 16],), - ) - - # Fixed chunk layout is set as nchw-32c8h8w-2d for uint8 and nchw-32c8h4w-2d for float16. - # For optimization, it might get changed later. - # Since output shape will be NxCx1x1 which is not a - # multiple of fixed-chunk, output_layout is NCHW. - input_layout, output_layout, pool_type, layout, dtype = tvm.testing.parameters( - ("nchw-32c8h8w-2d", "nchw", "avg", "NCHW", "uint8"), - ("nchw-32c8h4w-2d", "nchw", "avg", "NCHW", "float16"), - ) - - @tvm.testing.fixture - def expected_output_np( - self, - input_np, - pool_type, - layout, - ): - """Generate expected output.""" - ref_np = tvm.topi.testing.adaptive_pool( - input_np, - (1, 1), - pool_type, - layout, - ) - return ref_np - - @tvm.testing.fixture - def input_np(self, input_shape, dtype): - if dtype in ("uint8", "int8"): - dtype = "float32" - return np.random.random(input_shape).astype(dtype) - - @tvm.testing.fixture - def quantize_input_np(self, input_np, dtype): - if dtype in ("uint8", "int8"): - global ZERO_POINT_VAL, SCALE_VAL - input_np_quantized, SCALE_VAL, ZERO_POINT_VAL = quantize_np(input_np, dtype) - return input_np_quantized - - @tvm.testing.fixture - def transformed_input_np(self, input_np, quantize_input_np, input_layout, layout, dtype): - if dtype == "float16": - return transform_numpy(input_np, layout.lower(), input_layout) - if dtype in ("uint8", "int8"): - return transform_numpy(quantize_input_np, layout.lower(), input_layout) - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.fixture - def quantize_expected_output_np(self, expected_output_np, dtype): - if dtype in ("uint8", "int8"): - global ZERO_POINT_M_VAL, SCALE_M_VAL - out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np( - expected_output_np, dtype - ) - - # Since output_layout is nchw, no transformation is needed. - return out_ref_quantized - - @tvm.testing.requires_hexagon - def test_global_pool2d( - self, - dtype, - input_shape, - input_layout, - transformed_input_np, - expected_output_np, - quantize_expected_output_np, - hexagon_session, - ): - a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype) - - if dtype == "float16": - m_tensor = sl.global_avg_pool2d(a_tensor) - tir_schedule = sl.stir_global_avg_pool2d_schedule(m_tensor, a_tensor, input_layout) - elif dtype in ["uint8", "int8"]: - m_tensor = qn.global_avg_pool2d_u8( - a_tensor, - dtype, - ZERO_POINT_VAL, - SCALE_VAL, - ZERO_POINT_M_VAL, - SCALE_M_VAL, - ) - tir_schedule = qn.stir_global_avg_pool2d_u8_schedule(m_tensor, a_tensor, input_layout) - - sch = tir_schedule.mod - - with tvm.transform.PassContext(opt_level=3): - func = tvm.build( - sch, - [a_tensor, m_tensor], - get_hexagon_target("v69"), - name="global_pool2d", - ) - - input_axis_separator = [4] - - a_data_nd = allocate_hexagon_array( - hexagon_session.device, - data=transformed_input_np, - dtype=dtype, - axis_separators=input_axis_separator, - mem_scope="global.vtcm", - ) - - m_data_nd = allocate_hexagon_array( - hexagon_session.device, - expected_output_np.shape, - dtype=dtype, - ) - - mod = hexagon_session.load_module(func) - mod(a_data_nd, m_data_nd) - - # Convert nd to np - m_data_np = m_data_nd.numpy() - - if dtype == "float16": - np.testing.assert_allclose(expected_output_np, m_data_np, rtol=1e-3, atol=1e-3) - elif dtype in ["int8", "uint8"]: - np.testing.assert_allclose(quantize_expected_output_np, m_data_np, atol=1) - - -if __name__ == "__main__": - tvm.testing.main() +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Test code for float16 and uint8 global_avg_pool2d.""" + +import numpy as np + +import tvm +from tvm import te +from tvm.topi.testing import adaptive_pool +import tvm.topi.hexagon.qnn as qn +import tvm.topi.hexagon.slice_ops as sl +from tvm.contrib.hexagon import allocate_hexagon_array +from ...infrastructure import transform_numpy, quantize_np, get_hexagon_target + + +SCALE_M_VAL = None +ZERO_POINT_M_VAL = None +SCALE_VAL = None +ZERO_POINT_VAL = None + + +class TestGlobalPool2D: + (input_shape,) = tvm.testing.parameters( + ([1, 32, 8, 8],), + ([1, 1056, 16, 16],), + ) + + # Fixed chunk layout is set as nchw-32c8h8w-2d for uint8 and nchw-32c8h4w-2d for float16. + # For optimization, it might get changed later. + # Since output shape will be NxCx1x1 which is not a + # multiple of fixed-chunk, output_layout is NCHW. + input_layout, output_layout, pool_type, layout, dtype = tvm.testing.parameters( + ("nchw-32c8h8w-2d", "nchw", "avg", "NCHW", "uint8"), + ("nchw-32c8h4w-2d", "nchw", "avg", "NCHW", "float16"), + ) + + @tvm.testing.fixture + def expected_output_np( + self, + input_np, + pool_type, + layout, + ): + """Generate expected output.""" + ref_np = tvm.topi.testing.adaptive_pool( + input_np, + (1, 1), + pool_type, + layout, + ) + return ref_np + + @tvm.testing.fixture + def input_np(self, input_shape, dtype): + if dtype in ("uint8", "int8"): + dtype = "float32" + return np.random.random(input_shape).astype(dtype) + + @tvm.testing.fixture + def quantize_input_np(self, input_np, dtype): + if dtype in ("uint8", "int8"): + global ZERO_POINT_VAL, SCALE_VAL + input_np_quantized, SCALE_VAL, ZERO_POINT_VAL = quantize_np(input_np, dtype) + return input_np_quantized + + @tvm.testing.fixture + def transformed_input_np(self, input_np, quantize_input_np, input_layout, layout, dtype): + if dtype == "float16": + return transform_numpy(input_np, layout.lower(), input_layout) + if dtype in ("uint8", "int8"): + return transform_numpy(quantize_input_np, layout.lower(), input_layout) + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.fixture + def quantize_expected_output_np(self, expected_output_np, dtype): + if dtype in ("uint8", "int8"): + global ZERO_POINT_M_VAL, SCALE_M_VAL + out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np( + expected_output_np, dtype + ) + + # Since output_layout is nchw, no transformation is needed. + return out_ref_quantized + + @tvm.testing.requires_hexagon + def test_global_pool2d( + self, + dtype, + input_shape, + input_layout, + transformed_input_np, + expected_output_np, + quantize_expected_output_np, + hexagon_session, + ): + a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype) + + if dtype == "float16": + m_tensor = sl.global_avg_pool2d(a_tensor) + tir_schedule = sl.stir_global_avg_pool2d_schedule(m_tensor, a_tensor, input_layout) + elif dtype in ["uint8", "int8"]: + m_tensor = qn.global_avg_pool2d_u8( + a_tensor, + dtype, + ZERO_POINT_VAL, + SCALE_VAL, + ZERO_POINT_M_VAL, + SCALE_M_VAL, + ) + tir_schedule = qn.stir_global_avg_pool2d_u8_schedule(m_tensor, a_tensor, input_layout) + + sch = tir_schedule.mod + + with tvm.transform.PassContext(opt_level=3): + func = tvm.build( + sch, + [a_tensor, m_tensor], + get_hexagon_target("v69"), + name="global_pool2d", + ) + + input_axis_separator = [4] + + a_data_nd = allocate_hexagon_array( + hexagon_session.device, + data=transformed_input_np, + dtype=dtype, + axis_separators=input_axis_separator, + mem_scope="global.vtcm", + ) + + m_data_nd = allocate_hexagon_array( + hexagon_session.device, + expected_output_np.shape, + dtype=dtype, + ) + + mod = hexagon_session.load_module(func) + mod(a_data_nd, m_data_nd) + + # Convert nd to np + m_data_np = m_data_nd.numpy() + + if dtype == "float16": + np.testing.assert_allclose(expected_output_np, m_data_np, rtol=1e-3, atol=1e-3) + elif dtype in ["int8", "uint8"]: + np.testing.assert_allclose(quantize_expected_output_np, m_data_np, atol=1) + + +if __name__ == "__main__": + tvm.testing.main() diff --git a/tests/python/contrib/test_hexagon/topi/test_adaptive_avg_pool1d.py b/tests/python/contrib/test_hexagon/topi/test_adaptive_avg_pool1d.py old mode 100755 new mode 100644 index 4d4aef25e33f..e5b6c4d79065 --- a/tests/python/contrib/test_hexagon/topi/test_adaptive_avg_pool1d.py +++ b/tests/python/contrib/test_hexagon/topi/test_adaptive_avg_pool1d.py @@ -1,185 +1,185 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -"""Test code for specialized case of adaptive_avg_pool1d.""" - -import numpy as np - -import tvm -from tvm import te -from tvm.topi.testing import adaptive_pool -import tvm.topi.hexagon.qnn as s1 -from tvm.contrib.hexagon import allocate_hexagon_array -from ..infrastructure import transform_numpy, quantize_np - - -SCALE_M_VAL = None -ZERO_POINT_M_VAL = None -SCALE_VAL = None -ZERO_POINT_VAL = None - - -class TestAdaptivePool1D: - """Test specialized case of adaptive_avg_pool1d.""" - - (input_shape,) = tvm.testing.parameters( - ([1, 128, 128],), - ([1, 64, 64],), - ([1, 64, 128],), - ([1, 32, 64],), - ([1, 128, 768],), - ) - - # Fixed chunk layout is set as ncw-32c64w-2d for now. - # The adaptive_avg_pool1d implementation only handles specialized case - # where output_size is 1 as it appears on quantized distilbert model. - # Since output size won't be a multiple of fixed-chunk, - # output_layout is ncw. - # For optimization, it might get changed later. - input_layout, output_layout, pool_type, layout, output_size, dtype, = tvm.testing.parameters( - ( - "ncw-32c64w-2d", - "ncw", - "avg", - "NCW", - [1], - "uint8", - ) - ) - - @tvm.testing.fixture - def expected_output_np( - self, - input_np, - output_size, - pool_type, - layout, - ): - """Generate expected output.""" - out_width = output_size[0] - - ref_np = adaptive_pool( - input_np, - out_width, - pool_type, - layout, - ) - return ref_np - - @tvm.testing.fixture - def input_np(self, input_shape, dtype): - if dtype in ("uint8", "int8"): - dtype = "float32" - return np.random.random(input_shape).astype(dtype) - - @tvm.testing.fixture - def quantize_input_np(self, input_np, dtype): - if dtype in ("uint8", "int8"): - global ZERO_POINT_VAL, SCALE_VAL - input_np_quantized, SCALE_VAL, ZERO_POINT_VAL = quantize_np(input_np, dtype) - return input_np_quantized - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.fixture - def transformed_input_np(self, quantize_input_np, input_layout, layout, dtype): - if dtype in ("uint8", "int8"): - return transform_numpy(quantize_input_np, layout.lower(), input_layout) - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.fixture - def quantize_expected_output_np(self, expected_output_np, dtype): - """Generate expected output.""" - if dtype in ("uint8", "int8"): - global ZERO_POINT_M_VAL, SCALE_M_VAL - out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np( - expected_output_np, dtype - ) - - # Since output_layout is ncw, no transformation is needed. - return out_ref_quantized - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.requires_hexagon - def test_pool1d( - self, - dtype, - output_size, - input_layout, - output_layout, - input_shape, - transformed_input_np, - quantize_expected_output_np, - hexagon_session, - ): - """Test adaptive_avg_pool1d.""" - target_hexagon = tvm.target.hexagon("v69") - a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype) - - m_tensor = s1.adaptive_avg_pool1d( - a_tensor, - output_size, - dtype, - ZERO_POINT_VAL, - SCALE_VAL, - ZERO_POINT_M_VAL, - SCALE_M_VAL, - ) - - tir_schedule = s1.tir_adaptive_avg_pool1d_schedule( - m_tensor, a_tensor, output_layout, input_layout - ) - - sch = tir_schedule.mod - - with tvm.transform.PassContext(opt_level=3): - func = tvm.build( - sch, - [a_tensor, m_tensor], - tvm.target.Target(target_hexagon, host=target_hexagon), - name="adaptive_pool1d", - ) - - input_axis_separator = [3] - - a_data_nd = allocate_hexagon_array( - hexagon_session.device, - data=transformed_input_np, - dtype=dtype, - axis_separators=input_axis_separator, - mem_scope="global.vtcm", - ) - - m_data_nd = allocate_hexagon_array( - hexagon_session.device, - quantize_expected_output_np.shape, - dtype=dtype, - ) - - mod = hexagon_session.load_module(func) - mod(a_data_nd, m_data_nd) - - # Convert nd to np - m_data_np = m_data_nd.numpy() - - np.testing.assert_allclose(quantize_expected_output_np, m_data_np, atol=2) - - -if __name__ == "__main__": - tvm.testing.main() +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Test code for specialized case of adaptive_avg_pool1d.""" + +import numpy as np + +import tvm +from tvm import te +from tvm.topi.testing import adaptive_pool +import tvm.topi.hexagon.qnn as s1 +from tvm.contrib.hexagon import allocate_hexagon_array +from ..infrastructure import transform_numpy, quantize_np + + +SCALE_M_VAL = None +ZERO_POINT_M_VAL = None +SCALE_VAL = None +ZERO_POINT_VAL = None + + +class TestAdaptivePool1D: + """Test specialized case of adaptive_avg_pool1d.""" + + (input_shape,) = tvm.testing.parameters( + ([1, 128, 128],), + ([1, 64, 64],), + ([1, 64, 128],), + ([1, 32, 64],), + ([1, 128, 768],), + ) + + # Fixed chunk layout is set as ncw-32c64w-2d for now. + # The adaptive_avg_pool1d implementation only handles specialized case + # where output_size is 1 as it appears on quantized distilbert model. + # Since output size won't be a multiple of fixed-chunk, + # output_layout is ncw. + # For optimization, it might get changed later. + input_layout, output_layout, pool_type, layout, output_size, dtype, = tvm.testing.parameters( + ( + "ncw-32c64w-2d", + "ncw", + "avg", + "NCW", + [1], + "uint8", + ) + ) + + @tvm.testing.fixture + def expected_output_np( + self, + input_np, + output_size, + pool_type, + layout, + ): + """Generate expected output.""" + out_width = output_size[0] + + ref_np = adaptive_pool( + input_np, + out_width, + pool_type, + layout, + ) + return ref_np + + @tvm.testing.fixture + def input_np(self, input_shape, dtype): + if dtype in ("uint8", "int8"): + dtype = "float32" + return np.random.random(input_shape).astype(dtype) + + @tvm.testing.fixture + def quantize_input_np(self, input_np, dtype): + if dtype in ("uint8", "int8"): + global ZERO_POINT_VAL, SCALE_VAL + input_np_quantized, SCALE_VAL, ZERO_POINT_VAL = quantize_np(input_np, dtype) + return input_np_quantized + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.fixture + def transformed_input_np(self, quantize_input_np, input_layout, layout, dtype): + if dtype in ("uint8", "int8"): + return transform_numpy(quantize_input_np, layout.lower(), input_layout) + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.fixture + def quantize_expected_output_np(self, expected_output_np, dtype): + """Generate expected output.""" + if dtype in ("uint8", "int8"): + global ZERO_POINT_M_VAL, SCALE_M_VAL + out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np( + expected_output_np, dtype + ) + + # Since output_layout is ncw, no transformation is needed. + return out_ref_quantized + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.requires_hexagon + def test_pool1d( + self, + dtype, + output_size, + input_layout, + output_layout, + input_shape, + transformed_input_np, + quantize_expected_output_np, + hexagon_session, + ): + """Test adaptive_avg_pool1d.""" + target_hexagon = tvm.target.hexagon("v69") + a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype) + + m_tensor = s1.adaptive_avg_pool1d( + a_tensor, + output_size, + dtype, + ZERO_POINT_VAL, + SCALE_VAL, + ZERO_POINT_M_VAL, + SCALE_M_VAL, + ) + + tir_schedule = s1.tir_adaptive_avg_pool1d_schedule( + m_tensor, a_tensor, output_layout, input_layout + ) + + sch = tir_schedule.mod + + with tvm.transform.PassContext(opt_level=3): + func = tvm.build( + sch, + [a_tensor, m_tensor], + tvm.target.Target(target_hexagon, host=target_hexagon), + name="adaptive_pool1d", + ) + + input_axis_separator = [3] + + a_data_nd = allocate_hexagon_array( + hexagon_session.device, + data=transformed_input_np, + dtype=dtype, + axis_separators=input_axis_separator, + mem_scope="global.vtcm", + ) + + m_data_nd = allocate_hexagon_array( + hexagon_session.device, + quantize_expected_output_np.shape, + dtype=dtype, + ) + + mod = hexagon_session.load_module(func) + mod(a_data_nd, m_data_nd) + + # Convert nd to np + m_data_np = m_data_nd.numpy() + + np.testing.assert_allclose(quantize_expected_output_np, m_data_np, atol=2) + + +if __name__ == "__main__": + tvm.testing.main() diff --git a/tests/python/contrib/test_hexagon/topi/test_add_subtract_multiply.py b/tests/python/contrib/test_hexagon/topi/test_add_subtract_multiply.py index e0bb6b5864d3..94cb5ffca543 100644 --- a/tests/python/contrib/test_hexagon/topi/test_add_subtract_multiply.py +++ b/tests/python/contrib/test_hexagon/topi/test_add_subtract_multiply.py @@ -1,411 +1,411 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Test code for Add, Subtract and Multiply.""" -import numpy as np - -import tvm -from tvm import te -import tvm.topi.hexagon.slice_ops as sl -import tvm.topi.hexagon.qnn as qn -from tvm.contrib.hexagon import allocate_hexagon_array -from ..infrastructure import ( - transform_numpy, - quantize_np, - get_hexagon_target, -) - -ZERO_POINT_A_VAL = None -SCALE_A_VAL = None - -ZERO_POINT_B_VAL = None -SCALE_B_VAL = None - -ZERO_POINT_M_VAL = None -SCALE_M_VAL = None - - -def hexagon_wrapper_allocation( - device, - layout, - axis_separators, - tensor_shape=None, - data_original=None, - transformed_data=None, - dtype=None, -): - """Input layout can either be nhwc-8h2w32c2w-2d or nhwc""" - if layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h8w32c-2d"]: - data_nd = allocate_hexagon_array( - device, - tensor_shape=tensor_shape, - data=transformed_data, - dtype=dtype, - axis_separators=axis_separators, - mem_scope="global.vtcm", - ) - elif layout == "nhwc": - data_nd = allocate_hexagon_array( - device, - data=data_original, - ) - return data_nd - - -class TestAddSubtractMultiplyBroadcast2d: - """Test Add, Subtract and Multiply class.""" - - ( - input_shape_a, - input_shape_b, - input_a_layout, - input_b_layout, - output_layout, - dtype, - ) = tvm.testing.parameters( - # no broadcast needed - short input - ( - [1, 8, 4, 32], - [1, 8, 4, 32], - "nhwc-8h2w32c2w-2d", - "nhwc-8h2w32c2w-2d", - "nhwc-8h2w32c2w-2d", - "float16", - ), - # no broadcast needed - large input - ( - [1, 56, 64, 128], - [1, 56, 64, 128], - "nhwc-8h2w32c2w-2d", - "nhwc-8h2w32c2w-2d", - "nhwc-8h2w32c2w-2d", - "float16", - ), - # one input needs broadcast - ( - [1, 56, 64, 128], - [1, 1, 64, 1], - "nhwc-8h2w32c2w-2d", - "nhwc", - "nhwc-8h2w32c2w-2d", - "float16", - ), - # Both input needs broadcast - ( - [1, 56, 1, 128], - [1, 1, 64, 1], - "nhwc", - "nhwc", - "nhwc-8h2w32c2w-2d", - "float16", - ), - # One axis in one input needs broadcast - ( - [1, 56, 20, 128], - [1, 56, 20, 1], - "nhwc-8h2w32c2w-2d", - "nhwc", - "nhwc-8h2w32c2w-2d", - "float16", - ), - # broadcast all axes in one input - ( - [1, 48, 56, 32], - [1, 1, 1, 1], - "nhwc-8h2w32c2w-2d", - "nhwc", - "nhwc-8h2w32c2w-2d", - "float16", - ), - ( - [1, 48, 32, 64], - [1, 48, 32, 64], - "nhwc-8h8w32c-2d", - "nhwc-8h8w32c-2d", - "nhwc-8h8w32c-2d", - "uint8", - ), - # broadcast axis 2 in one input - ( - [1, 48, 32, 64], - [1, 48, 1, 64], - "nhwc-8h8w32c-2d", - "nhwc", - "nhwc-8h8w32c-2d", - "uint8", - ), - # broadcast axis 1 in one input - ( - [1, 48, 32, 64], - [1, 1, 32, 64], - "nhwc-8h8w32c-2d", - "nhwc", - "nhwc-8h8w32c-2d", - "uint8", - ), - # broadcast axis 3 in one input - ( - [1, 8, 8, 32], - [1, 8, 8, 1], - "nhwc-8h8w32c-2d", - "nhwc", - "nhwc-8h8w32c-2d", - "uint8", - ), - # broadcast both inputs - ( - [1, 56, 1, 128], - [1, 1, 64, 1], - "nhwc", - "nhwc", - "nhwc-8h8w32c-2d", - "uint8", - ), - # broadcast both inputs - ( - [1, 48, 1, 1], - [1, 1, 32, 32], - "nhwc", - "nhwc", - "nhwc-8h8w32c-2d", - "uint8", - ), - # broadcast both inputs - ( - [1, 48, 1, 32], - [1, 1, 32, 1], - "nhwc", - "nhwc", - "nhwc-8h8w32c-2d", - "uint8", - ), - # broadcast all axes in one input - ( - [1, 48, 56, 32], - [1, 1, 1, 1], - "nhwc-8h8w32c-2d", - "nhwc", - "nhwc-8h8w32c-2d", - "uint8", - ), - ) - - op_name = tvm.testing.parameter("add", "subtract", "multiply") - - @tvm.testing.fixture - def expected_output_np(self, input_np_a, input_np_b, op_name): - """Generate expected output.""" - if op_name == "add": - out_ref = np.add(input_np_a, input_np_b) - elif op_name == "subtract": - out_ref = np.subtract(input_np_a, input_np_b) - elif op_name == "multiply": - out_ref = np.multiply(input_np_a, input_np_b) - return out_ref - - @tvm.testing.fixture - def transformed_expected_output_np(self, expected_output_np, output_layout, dtype): - """Generate expected output.""" - if dtype == "float16": - return transform_numpy(expected_output_np, "nhwc", output_layout) - if dtype in ["uint8", "int8"]: - global ZERO_POINT_M_VAL, SCALE_M_VAL - out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np( - expected_output_np, dtype - ) - return transform_numpy(out_ref_quantized, "nhwc", output_layout) - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.fixture - def input_np_a(self, input_shape_a, dtype): - """Generate numpy input for variable a.""" - if dtype in ["uint8", "int8"]: - dtype = "float32" - return np.random.random(input_shape_a).astype(dtype) - - @tvm.testing.fixture - def input_np_b(self, input_shape_b, dtype): - """Generate numpy input for variable b.""" - if dtype in ["uint8", "int8"]: - dtype = "float32" - return np.random.random(input_shape_b).astype(dtype) - - @tvm.testing.fixture - def quantize_input_np_a(self, input_np_a, dtype): - if dtype in ["uint8", "int8"]: - global ZERO_POINT_A_VAL, SCALE_A_VAL - input_np_a_quantized, SCALE_A_VAL, ZERO_POINT_A_VAL = quantize_np(input_np_a, dtype) - return input_np_a_quantized - return None - - @tvm.testing.fixture - def quantize_input_np_b(self, input_np_b, dtype): - if dtype in ["uint8", "int8"]: - global ZERO_POINT_B_VAL, SCALE_B_VAL - input_np_b_quantized, SCALE_B_VAL, ZERO_POINT_B_VAL = quantize_np(input_np_b, dtype) - return input_np_b_quantized - return None - - @tvm.testing.fixture - def transformed_input_np_a(self, input_np_a, quantize_input_np_a, input_a_layout, dtype): - if dtype == "float16": - return transform_numpy(input_np_a, "nhwc", input_a_layout) - if dtype in ["uint8", "int8"]: - return transform_numpy(quantize_input_np_a, "nhwc", input_a_layout) - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.fixture - def transformed_input_np_b(self, input_np_b, quantize_input_np_b, input_b_layout, dtype): - if dtype == "float16": - return transform_numpy(input_np_b, "nhwc", input_b_layout) - if dtype in ["uint8", "int8"]: - return transform_numpy(quantize_input_np_b, "nhwc", input_b_layout) - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.requires_hexagon - def test_transform( - self, - dtype, - input_shape_a, - input_shape_b, - input_np_a, - input_np_b, - quantize_input_np_a, - quantize_input_np_b, - transformed_input_np_a, - transformed_input_np_b, - expected_output_np, - transformed_expected_output_np, - hexagon_session, - output_layout, - input_a_layout, - input_b_layout, - op_name, - ): - """Test transform.""" - output_shape = expected_output_np.shape - a_tensor = te.placeholder(input_shape_a, name="a_tensor", dtype=dtype) - b_tensor = te.placeholder(input_shape_b, name="b_tensor", dtype=dtype) - if dtype == "float16": - if op_name == "add": - m_tensor = sl.add_broadcast_compute(a_tensor, b_tensor) - elif op_name == "subtract": - m_tensor = sl.subtract_broadcast_compute(a_tensor, b_tensor) - elif op_name == "multiply": - m_tensor = sl.multiply_broadcast_compute(a_tensor, b_tensor) - tir_schedule = sl.tir_broadcast_schedule( - m_tensor, a_tensor, b_tensor, output_layout, input_a_layout, input_b_layout, op_name - ) - elif dtype in ["uint8", "int8"]: - args = [ - a_tensor, - b_tensor, - output_shape, - ZERO_POINT_A_VAL, - SCALE_A_VAL, - ZERO_POINT_B_VAL, - SCALE_B_VAL, - ZERO_POINT_M_VAL, - SCALE_M_VAL, - dtype, - ] - if op_name == "add": - m_tensor = qn.qadd_broadcast_compute(*args) - elif op_name == "subtract": - m_tensor = qn.qsubtract_broadcast_compute(*args) - elif op_name == "multiply": - m_tensor = qn.qmultiply_broadcast_compute(*args) - tir_schedule = qn.tir_schedule_quant( - m_tensor, a_tensor, b_tensor, output_layout, input_a_layout, input_b_layout - ) - - sch = tir_schedule.mod - - input_axis_separator = [4] - if output_layout in ( - "nhwc-8h2w32c2w-2d", - "nhwc-8h8w32c-2d", - ): - output_axis_separator = [4] - else: - raise RuntimeError(f"Unexpected layout '{output_layout}'") - - with tvm.transform.PassContext(opt_level=3): - func = tvm.build( - sch, - [a_tensor, b_tensor, m_tensor], - get_hexagon_target("v69"), - name="slice_op_with_transform", - ) - - if dtype == "float16": - in_data_np_a = input_np_a - in_data_np_b = input_np_b - elif dtype in ["int8", "uint8"]: - in_data_np_a = quantize_input_np_a - in_data_np_b = quantize_input_np_b - else: - raise RuntimeError(f"Unsupport dtype '{dtype}'") - - a_data_nd = hexagon_wrapper_allocation( - hexagon_session.device, - layout=input_a_layout, - data_original=in_data_np_a, - transformed_data=transformed_input_np_a, - axis_separators=input_axis_separator, - ) - b_data_nd = hexagon_wrapper_allocation( - hexagon_session.device, - layout=input_b_layout, - data_original=in_data_np_b, - transformed_data=transformed_input_np_b, - axis_separators=input_axis_separator, - ) - m_data_nd = hexagon_wrapper_allocation( - hexagon_session.device, - layout=output_layout, - tensor_shape=transformed_expected_output_np.shape, - axis_separators=output_axis_separator, - dtype=dtype, - ) - - mod = hexagon_session.load_module(func) - mod(a_data_nd, b_data_nd, m_data_nd) - - batch, height, width, channel = output_shape - # convert nd to np and reshape to fixed chunk size layout - if output_layout == "nhwc-8h2w32c2w-2d": - m_data_np = m_data_nd.numpy().reshape( - [batch, height // 8, width // 4, channel // 32, 8, 2, 32, 2] - ) - elif output_layout == "nhwc-8h8w32c-2d": - m_data_np = m_data_nd.numpy().reshape( - [batch, height // 8, width // 8, channel // 32, 8, 8, 32] - ) - - if dtype == "float16": - np.testing.assert_allclose( - transformed_expected_output_np, m_data_np, rtol=1e-3, atol=1e-3 - ) - elif dtype in ["int8", "uint8"]: - np.testing.assert_allclose(transformed_expected_output_np, m_data_np, rtol=1, atol=1) - - -if __name__ == "__main__": - tvm.testing.main() +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Test code for Add, Subtract and Multiply.""" +import numpy as np + +import tvm +from tvm import te +import tvm.topi.hexagon.slice_ops as sl +import tvm.topi.hexagon.qnn as qn +from tvm.contrib.hexagon import allocate_hexagon_array +from ..infrastructure import ( + transform_numpy, + quantize_np, + get_hexagon_target, +) + +ZERO_POINT_A_VAL = None +SCALE_A_VAL = None + +ZERO_POINT_B_VAL = None +SCALE_B_VAL = None + +ZERO_POINT_M_VAL = None +SCALE_M_VAL = None + + +def hexagon_wrapper_allocation( + device, + layout, + axis_separators, + tensor_shape=None, + data_original=None, + transformed_data=None, + dtype=None, +): + """Input layout can either be nhwc-8h2w32c2w-2d or nhwc""" + if layout in ["nhwc-8h2w32c2w-2d", "nhwc-8h8w32c-2d"]: + data_nd = allocate_hexagon_array( + device, + tensor_shape=tensor_shape, + data=transformed_data, + dtype=dtype, + axis_separators=axis_separators, + mem_scope="global.vtcm", + ) + elif layout == "nhwc": + data_nd = allocate_hexagon_array( + device, + data=data_original, + ) + return data_nd + + +class TestAddSubtractMultiplyBroadcast2d: + """Test Add, Subtract and Multiply class.""" + + ( + input_shape_a, + input_shape_b, + input_a_layout, + input_b_layout, + output_layout, + dtype, + ) = tvm.testing.parameters( + # no broadcast needed - short input + ( + [1, 8, 4, 32], + [1, 8, 4, 32], + "nhwc-8h2w32c2w-2d", + "nhwc-8h2w32c2w-2d", + "nhwc-8h2w32c2w-2d", + "float16", + ), + # no broadcast needed - large input + ( + [1, 56, 64, 128], + [1, 56, 64, 128], + "nhwc-8h2w32c2w-2d", + "nhwc-8h2w32c2w-2d", + "nhwc-8h2w32c2w-2d", + "float16", + ), + # one input needs broadcast + ( + [1, 56, 64, 128], + [1, 1, 64, 1], + "nhwc-8h2w32c2w-2d", + "nhwc", + "nhwc-8h2w32c2w-2d", + "float16", + ), + # Both input needs broadcast + ( + [1, 56, 1, 128], + [1, 1, 64, 1], + "nhwc", + "nhwc", + "nhwc-8h2w32c2w-2d", + "float16", + ), + # One axis in one input needs broadcast + ( + [1, 56, 20, 128], + [1, 56, 20, 1], + "nhwc-8h2w32c2w-2d", + "nhwc", + "nhwc-8h2w32c2w-2d", + "float16", + ), + # broadcast all axes in one input + ( + [1, 48, 56, 32], + [1, 1, 1, 1], + "nhwc-8h2w32c2w-2d", + "nhwc", + "nhwc-8h2w32c2w-2d", + "float16", + ), + ( + [1, 48, 32, 64], + [1, 48, 32, 64], + "nhwc-8h8w32c-2d", + "nhwc-8h8w32c-2d", + "nhwc-8h8w32c-2d", + "uint8", + ), + # broadcast axis 2 in one input + ( + [1, 48, 32, 64], + [1, 48, 1, 64], + "nhwc-8h8w32c-2d", + "nhwc", + "nhwc-8h8w32c-2d", + "uint8", + ), + # broadcast axis 1 in one input + ( + [1, 48, 32, 64], + [1, 1, 32, 64], + "nhwc-8h8w32c-2d", + "nhwc", + "nhwc-8h8w32c-2d", + "uint8", + ), + # broadcast axis 3 in one input + ( + [1, 8, 8, 32], + [1, 8, 8, 1], + "nhwc-8h8w32c-2d", + "nhwc", + "nhwc-8h8w32c-2d", + "uint8", + ), + # broadcast both inputs + ( + [1, 56, 1, 128], + [1, 1, 64, 1], + "nhwc", + "nhwc", + "nhwc-8h8w32c-2d", + "uint8", + ), + # broadcast both inputs + ( + [1, 48, 1, 1], + [1, 1, 32, 32], + "nhwc", + "nhwc", + "nhwc-8h8w32c-2d", + "uint8", + ), + # broadcast both inputs + ( + [1, 48, 1, 32], + [1, 1, 32, 1], + "nhwc", + "nhwc", + "nhwc-8h8w32c-2d", + "uint8", + ), + # broadcast all axes in one input + ( + [1, 48, 56, 32], + [1, 1, 1, 1], + "nhwc-8h8w32c-2d", + "nhwc", + "nhwc-8h8w32c-2d", + "uint8", + ), + ) + + op_name = tvm.testing.parameter("add", "subtract", "multiply") + + @tvm.testing.fixture + def expected_output_np(self, input_np_a, input_np_b, op_name): + """Generate expected output.""" + if op_name == "add": + out_ref = np.add(input_np_a, input_np_b) + elif op_name == "subtract": + out_ref = np.subtract(input_np_a, input_np_b) + elif op_name == "multiply": + out_ref = np.multiply(input_np_a, input_np_b) + return out_ref + + @tvm.testing.fixture + def transformed_expected_output_np(self, expected_output_np, output_layout, dtype): + """Generate expected output.""" + if dtype == "float16": + return transform_numpy(expected_output_np, "nhwc", output_layout) + if dtype in ["uint8", "int8"]: + global ZERO_POINT_M_VAL, SCALE_M_VAL + out_ref_quantized, SCALE_M_VAL, ZERO_POINT_M_VAL = quantize_np( + expected_output_np, dtype + ) + return transform_numpy(out_ref_quantized, "nhwc", output_layout) + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.fixture + def input_np_a(self, input_shape_a, dtype): + """Generate numpy input for variable a.""" + if dtype in ["uint8", "int8"]: + dtype = "float32" + return np.random.random(input_shape_a).astype(dtype) + + @tvm.testing.fixture + def input_np_b(self, input_shape_b, dtype): + """Generate numpy input for variable b.""" + if dtype in ["uint8", "int8"]: + dtype = "float32" + return np.random.random(input_shape_b).astype(dtype) + + @tvm.testing.fixture + def quantize_input_np_a(self, input_np_a, dtype): + if dtype in ["uint8", "int8"]: + global ZERO_POINT_A_VAL, SCALE_A_VAL + input_np_a_quantized, SCALE_A_VAL, ZERO_POINT_A_VAL = quantize_np(input_np_a, dtype) + return input_np_a_quantized + return None + + @tvm.testing.fixture + def quantize_input_np_b(self, input_np_b, dtype): + if dtype in ["uint8", "int8"]: + global ZERO_POINT_B_VAL, SCALE_B_VAL + input_np_b_quantized, SCALE_B_VAL, ZERO_POINT_B_VAL = quantize_np(input_np_b, dtype) + return input_np_b_quantized + return None + + @tvm.testing.fixture + def transformed_input_np_a(self, input_np_a, quantize_input_np_a, input_a_layout, dtype): + if dtype == "float16": + return transform_numpy(input_np_a, "nhwc", input_a_layout) + if dtype in ["uint8", "int8"]: + return transform_numpy(quantize_input_np_a, "nhwc", input_a_layout) + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.fixture + def transformed_input_np_b(self, input_np_b, quantize_input_np_b, input_b_layout, dtype): + if dtype == "float16": + return transform_numpy(input_np_b, "nhwc", input_b_layout) + if dtype in ["uint8", "int8"]: + return transform_numpy(quantize_input_np_b, "nhwc", input_b_layout) + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.requires_hexagon + def test_transform( + self, + dtype, + input_shape_a, + input_shape_b, + input_np_a, + input_np_b, + quantize_input_np_a, + quantize_input_np_b, + transformed_input_np_a, + transformed_input_np_b, + expected_output_np, + transformed_expected_output_np, + hexagon_session, + output_layout, + input_a_layout, + input_b_layout, + op_name, + ): + """Test transform.""" + output_shape = expected_output_np.shape + a_tensor = te.placeholder(input_shape_a, name="a_tensor", dtype=dtype) + b_tensor = te.placeholder(input_shape_b, name="b_tensor", dtype=dtype) + if dtype == "float16": + if op_name == "add": + m_tensor = sl.add_broadcast_compute(a_tensor, b_tensor) + elif op_name == "subtract": + m_tensor = sl.subtract_broadcast_compute(a_tensor, b_tensor) + elif op_name == "multiply": + m_tensor = sl.multiply_broadcast_compute(a_tensor, b_tensor) + tir_schedule = sl.tir_broadcast_schedule( + m_tensor, a_tensor, b_tensor, output_layout, input_a_layout, input_b_layout, op_name + ) + elif dtype in ["uint8", "int8"]: + args = [ + a_tensor, + b_tensor, + output_shape, + ZERO_POINT_A_VAL, + SCALE_A_VAL, + ZERO_POINT_B_VAL, + SCALE_B_VAL, + ZERO_POINT_M_VAL, + SCALE_M_VAL, + dtype, + ] + if op_name == "add": + m_tensor = qn.qadd_broadcast_compute(*args) + elif op_name == "subtract": + m_tensor = qn.qsubtract_broadcast_compute(*args) + elif op_name == "multiply": + m_tensor = qn.qmultiply_broadcast_compute(*args) + tir_schedule = qn.tir_schedule_quant( + m_tensor, a_tensor, b_tensor, output_layout, input_a_layout, input_b_layout + ) + + sch = tir_schedule.mod + + input_axis_separator = [4] + if output_layout in ( + "nhwc-8h2w32c2w-2d", + "nhwc-8h8w32c-2d", + ): + output_axis_separator = [4] + else: + raise RuntimeError(f"Unexpected layout '{output_layout}'") + + with tvm.transform.PassContext(opt_level=3): + func = tvm.build( + sch, + [a_tensor, b_tensor, m_tensor], + get_hexagon_target("v69"), + name="slice_op_with_transform", + ) + + if dtype == "float16": + in_data_np_a = input_np_a + in_data_np_b = input_np_b + elif dtype in ["int8", "uint8"]: + in_data_np_a = quantize_input_np_a + in_data_np_b = quantize_input_np_b + else: + raise RuntimeError(f"Unsupport dtype '{dtype}'") + + a_data_nd = hexagon_wrapper_allocation( + hexagon_session.device, + layout=input_a_layout, + data_original=in_data_np_a, + transformed_data=transformed_input_np_a, + axis_separators=input_axis_separator, + ) + b_data_nd = hexagon_wrapper_allocation( + hexagon_session.device, + layout=input_b_layout, + data_original=in_data_np_b, + transformed_data=transformed_input_np_b, + axis_separators=input_axis_separator, + ) + m_data_nd = hexagon_wrapper_allocation( + hexagon_session.device, + layout=output_layout, + tensor_shape=transformed_expected_output_np.shape, + axis_separators=output_axis_separator, + dtype=dtype, + ) + + mod = hexagon_session.load_module(func) + mod(a_data_nd, b_data_nd, m_data_nd) + + batch, height, width, channel = output_shape + # convert nd to np and reshape to fixed chunk size layout + if output_layout == "nhwc-8h2w32c2w-2d": + m_data_np = m_data_nd.numpy().reshape( + [batch, height // 8, width // 4, channel // 32, 8, 2, 32, 2] + ) + elif output_layout == "nhwc-8h8w32c-2d": + m_data_np = m_data_nd.numpy().reshape( + [batch, height // 8, width // 8, channel // 32, 8, 8, 32] + ) + + if dtype == "float16": + np.testing.assert_allclose( + transformed_expected_output_np, m_data_np, rtol=1e-3, atol=1e-3 + ) + elif dtype in ["int8", "uint8"]: + np.testing.assert_allclose(transformed_expected_output_np, m_data_np, rtol=1, atol=1) + + +if __name__ == "__main__": + tvm.testing.main() diff --git a/tests/python/contrib/test_hexagon/topi/test_quantize.py b/tests/python/contrib/test_hexagon/topi/test_quantize.py index ac4f4d4e3047..2c54b12ab98e 100644 --- a/tests/python/contrib/test_hexagon/topi/test_quantize.py +++ b/tests/python/contrib/test_hexagon/topi/test_quantize.py @@ -1,128 +1,128 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""TIR quantize schedule tests.""" -import numpy as np - -import tvm -from tvm import te -import tvm.topi.hexagon.qnn as s1 -from tvm.contrib.hexagon import allocate_hexagon_array -from ..infrastructure import ( - transform_numpy, - quantize_np, - get_hexagon_target, -) - -QUANTIZE_SCALE = None -QUANTIZE_ZERO_POINT = None - - -class TestQuantize: - """Test quantize class.""" - - @tvm.testing.fixture - def expected_output_np(self, input_np, output_dtype): - global QUANTIZE_SCALE, QUANTIZE_ZERO_POINT - quant_np, QUANTIZE_SCALE, QUANTIZE_ZERO_POINT = quantize_np(input_np, output_dtype) - return quant_np - - @tvm.testing.fixture - def input_np(self, input_shape, input_dtype): - return np.random.random(input_shape).astype(input_dtype) - - @tvm.testing.fixture - def transformed_input_np(self, input_np, input_crouton_layout): - return transform_numpy(input_np, "nhwc", input_crouton_layout) - - @tvm.testing.fixture - def transformed_expected_output_np(self, expected_output_np, output_layout): - return transform_numpy(expected_output_np, "nhwc", output_layout) - - input_crouton_layout, output_layout, input_dtype = tvm.testing.parameters( - ("nhwc-4h2w32c2w-2d", "nhwc-8h8w32c-2d", "float32"), - ) - - output_dtype = tvm.testing.parameter("uint8", "int8") - - input_shape = tvm.testing.parameter( - (1, 8, 8, 32), (1, 16, 16, 32), (1, 16, 16, 128), (1, 64, 64, 64) - ) - - @tvm.testing.requires_hexagon - def test_quantize( - self, - input_dtype, - output_dtype, - transformed_input_np, - input_shape, - expected_output_np, - transformed_expected_output_np, - input_crouton_layout, - output_layout, - hexagon_session, - ): - """Test quantize.""" - a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=input_dtype) - - m_tensor = s1.quantize_compute(a_tensor, QUANTIZE_SCALE, QUANTIZE_ZERO_POINT, output_dtype) - - tir_schedule = s1.tir_quantize_schedule( - m_tensor, a_tensor, input_crouton_layout, output_layout - ) - - sch = tir_schedule.mod - - input_axis_separator = [4] - output_axis_separator = [4] - - with tvm.transform.PassContext(opt_level=3): - func = tvm.build( - sch, - [a_tensor, m_tensor], - get_hexagon_target("v69"), - name="quantize", - ) - - a_data_nd = allocate_hexagon_array( - hexagon_session.device, - data=transformed_input_np, - dtype=input_dtype, - axis_separators=input_axis_separator, - mem_scope="global.vtcm", - ) - - m_data_nd = allocate_hexagon_array( - hexagon_session.device, - tensor_shape=transformed_expected_output_np.shape, - dtype=output_dtype, - axis_separators=output_axis_separator, - mem_scope="global.vtcm", - ) - - mod = hexagon_session.load_module(func) - mod(a_data_nd, m_data_nd) - - b, h, weight, c = expected_output_np.shape - - # convert nd to np and reshape to fixed chunk size layout - m_data_np = m_data_nd.numpy().reshape([b, h // 8, weight // 8, c // 32, 8, 8, 32]) - - np.testing.assert_allclose(transformed_expected_output_np, m_data_np, atol=1) - - -if __name__ == "__main__": - tvm.testing.main() +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""TIR quantize schedule tests.""" +import numpy as np + +import tvm +from tvm import te +import tvm.topi.hexagon.qnn as s1 +from tvm.contrib.hexagon import allocate_hexagon_array +from ..infrastructure import ( + transform_numpy, + quantize_np, + get_hexagon_target, +) + +QUANTIZE_SCALE = None +QUANTIZE_ZERO_POINT = None + + +class TestQuantize: + """Test quantize class.""" + + @tvm.testing.fixture + def expected_output_np(self, input_np, output_dtype): + global QUANTIZE_SCALE, QUANTIZE_ZERO_POINT + quant_np, QUANTIZE_SCALE, QUANTIZE_ZERO_POINT = quantize_np(input_np, output_dtype) + return quant_np + + @tvm.testing.fixture + def input_np(self, input_shape, input_dtype): + return np.random.random(input_shape).astype(input_dtype) + + @tvm.testing.fixture + def transformed_input_np(self, input_np, input_crouton_layout): + return transform_numpy(input_np, "nhwc", input_crouton_layout) + + @tvm.testing.fixture + def transformed_expected_output_np(self, expected_output_np, output_layout): + return transform_numpy(expected_output_np, "nhwc", output_layout) + + input_crouton_layout, output_layout, input_dtype = tvm.testing.parameters( + ("nhwc-4h2w32c2w-2d", "nhwc-8h8w32c-2d", "float32"), + ) + + output_dtype = tvm.testing.parameter("uint8", "int8") + + input_shape = tvm.testing.parameter( + (1, 8, 8, 32), (1, 16, 16, 32), (1, 16, 16, 128), (1, 64, 64, 64) + ) + + @tvm.testing.requires_hexagon + def test_quantize( + self, + input_dtype, + output_dtype, + transformed_input_np, + input_shape, + expected_output_np, + transformed_expected_output_np, + input_crouton_layout, + output_layout, + hexagon_session, + ): + """Test quantize.""" + a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=input_dtype) + + m_tensor = s1.quantize_compute(a_tensor, QUANTIZE_SCALE, QUANTIZE_ZERO_POINT, output_dtype) + + tir_schedule = s1.tir_quantize_schedule( + m_tensor, a_tensor, input_crouton_layout, output_layout + ) + + sch = tir_schedule.mod + + input_axis_separator = [4] + output_axis_separator = [4] + + with tvm.transform.PassContext(opt_level=3): + func = tvm.build( + sch, + [a_tensor, m_tensor], + get_hexagon_target("v69"), + name="quantize", + ) + + a_data_nd = allocate_hexagon_array( + hexagon_session.device, + data=transformed_input_np, + dtype=input_dtype, + axis_separators=input_axis_separator, + mem_scope="global.vtcm", + ) + + m_data_nd = allocate_hexagon_array( + hexagon_session.device, + tensor_shape=transformed_expected_output_np.shape, + dtype=output_dtype, + axis_separators=output_axis_separator, + mem_scope="global.vtcm", + ) + + mod = hexagon_session.load_module(func) + mod(a_data_nd, m_data_nd) + + b, h, weight, c = expected_output_np.shape + + # convert nd to np and reshape to fixed chunk size layout + m_data_np = m_data_nd.numpy().reshape([b, h // 8, weight // 8, c // 32, 8, 8, 32]) + + np.testing.assert_allclose(transformed_expected_output_np, m_data_np, atol=1) + + +if __name__ == "__main__": + tvm.testing.main() diff --git a/tests/python/contrib/test_hexagon/topi/test_resize2d.py b/tests/python/contrib/test_hexagon/topi/test_resize2d.py index c0c6e7ca0fb4..7d7d80fc7f7c 100644 --- a/tests/python/contrib/test_hexagon/topi/test_resize2d.py +++ b/tests/python/contrib/test_hexagon/topi/test_resize2d.py @@ -1,202 +1,202 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -"""Resize 2D tesst. -""" -import numpy as np - -import tvm -from tvm import te -from tvm.topi.testing import resize2d_python -import tvm.topi.hexagon as s1 -from tvm.contrib.hexagon import allocate_hexagon_array - -from ..infrastructure import transform_numpy, get_hexagon_target - - -class TestResize2d: - """Test resize 2D class.""" - - (batch, channel, in_height, in_width, out_height, out_width,) = tvm.testing.parameters( - ( - 1, - 32, - 8, - 8, - 16, - 16, - ), - ( - 1, - 32, - 48, - 48, - 8, - 8, - ), - ) - - (layout, input_crouton_layout, output_layout, dtype,) = tvm.testing.parameters( - ("NHWC", "nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-2d", "float16"), - ("NHWC", "nhwc-8h8w32c-2d", "nhwc-8h8w32c-2d", "uint8"), - ) - - coord_trans = tvm.testing.parameter("asymmetric", "align_corners", "half_pixel") - method = tvm.testing.parameter("nearest_neighbor", "linear") - - @tvm.testing.fixture - def expected_output_np( - self, - input_np, - in_height, - in_width, - out_height, - out_width, - layout, - method, - coord_trans, - ): - """Generate expected output.""" - scale_h = out_height / in_height - scale_w = out_width / in_width - - return resize2d_python(input_np, (scale_h, scale_w), layout, method, coord_trans) - - @tvm.testing.fixture - def input_np(self, input_shape, dtype): - if dtype == "float16": - return np.random.random(input_shape).astype(dtype) - if dtype == "uint8": - return np.random.randint(0, 255, input_shape).astype(dtype) - if dtype == "int8": - return np.random.randint(-128, 127, input_shape).astype(dtype) - raise RuntimeError(f"dtype {dtype} is not valid.") - - @tvm.testing.fixture - def transformed_input_np(self, input_np, layout, input_crouton_layout, dtype): - if dtype in ["float16", "uint8", "int8"]: - return transform_numpy(input_np, layout.lower(), input_crouton_layout) - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.fixture - def transformed_expected_output_np(self, expected_output_np, layout, output_layout, dtype): - if dtype in ["float16", "uint8", "int8"]: - return transform_numpy(expected_output_np, layout.lower(), output_layout) - - raise RuntimeError(f"Unsupported data type '{dtype}'") - - @tvm.testing.fixture - def input_shape(self, batch, channel, in_height, in_width): - return (batch, in_height, in_width, channel) - - @tvm.testing.fixture - def output_shape(self, batch, channel, out_height, out_width): - return (batch, out_height, out_width, channel) - - @tvm.testing.requires_hexagon - def test_resize2d( - self, - dtype, - input_np, - transformed_input_np, - input_shape, - output_shape, - expected_output_np, - transformed_expected_output_np, - layout, - input_crouton_layout, - output_layout, - coord_trans, - method, - hexagon_session, - ): - """Test resize 2D.""" - a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype) - - m_tensor = s1.resize2d_compute( - a_tensor, - [0.0] * 4, - (output_shape[1], output_shape[2]), - layout=layout, - coordinate_transformation_mode=coord_trans, - method=method, - out_dtype=dtype, - ) - - tir_schedule = s1.tir_resize2d_schedule( - m_tensor, a_tensor, input_crouton_layout, output_layout - ) - - sch = tir_schedule.mod - - input_axis_separator = [4] - if output_layout in ( - "nhwc-8h2w32c2w-2d", - "nhwc-8h8w32c-2d", - ): - output_axis_separator = [4] - else: - raise RuntimeError(f"Unexpected layout '{output_layout}'") - - with tvm.transform.PassContext(opt_level=3): - func = tvm.build( - sch, - [a_tensor, m_tensor], - get_hexagon_target("v69"), - name="resize2d", - ) - - a_data_nd = allocate_hexagon_array( - hexagon_session.device, - data=transformed_input_np, - dtype=dtype, - axis_separators=input_axis_separator, - mem_scope="global.vtcm", - ) - - m_data_nd = allocate_hexagon_array( - hexagon_session.device, - transformed_expected_output_np.shape, - dtype=dtype, - axis_separators=output_axis_separator, - mem_scope="global.vtcm", - ) - - mod = hexagon_session.load_module(func) - mod(a_data_nd, m_data_nd) - - batch_size, height, width, channel = output_shape - # convert nd to np and reshape to fixed chunk size layout - if output_layout == "nhwc-8h2w32c2w-2d": - m_data_np = m_data_nd.numpy().reshape( - [batch_size, height // 8, width // 4, channel // 32, 8, 2, 32, 2] - ) - elif output_layout == "nhwc-8h8w32c-2d": - m_data_np = m_data_nd.numpy().reshape( - [batch_size, height // 8, width // 8, channel // 32, 8, 8, 32] - ) - - if dtype == "float16": - np.testing.assert_allclose( - transformed_expected_output_np, m_data_np, rtol=1e-3, atol=1e-3 - ) - elif dtype in ["int8", "uint8"]: - np.testing.assert_allclose(transformed_expected_output_np, m_data_np, rtol=1, atol=1) - - -if __name__ == "__main__": - tvm.testing.main() +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +"""Resize 2D tesst. +""" +import numpy as np + +import tvm +from tvm import te +from tvm.topi.testing import resize2d_python +import tvm.topi.hexagon as s1 +from tvm.contrib.hexagon import allocate_hexagon_array + +from ..infrastructure import transform_numpy, get_hexagon_target + + +class TestResize2d: + """Test resize 2D class.""" + + (batch, channel, in_height, in_width, out_height, out_width,) = tvm.testing.parameters( + ( + 1, + 32, + 8, + 8, + 16, + 16, + ), + ( + 1, + 32, + 48, + 48, + 8, + 8, + ), + ) + + (layout, input_crouton_layout, output_layout, dtype,) = tvm.testing.parameters( + ("NHWC", "nhwc-8h2w32c2w-2d", "nhwc-8h2w32c2w-2d", "float16"), + ("NHWC", "nhwc-8h8w32c-2d", "nhwc-8h8w32c-2d", "uint8"), + ) + + coord_trans = tvm.testing.parameter("asymmetric", "align_corners", "half_pixel") + method = tvm.testing.parameter("nearest_neighbor", "linear") + + @tvm.testing.fixture + def expected_output_np( + self, + input_np, + in_height, + in_width, + out_height, + out_width, + layout, + method, + coord_trans, + ): + """Generate expected output.""" + scale_h = out_height / in_height + scale_w = out_width / in_width + + return resize2d_python(input_np, (scale_h, scale_w), layout, method, coord_trans) + + @tvm.testing.fixture + def input_np(self, input_shape, dtype): + if dtype == "float16": + return np.random.random(input_shape).astype(dtype) + if dtype == "uint8": + return np.random.randint(0, 255, input_shape).astype(dtype) + if dtype == "int8": + return np.random.randint(-128, 127, input_shape).astype(dtype) + raise RuntimeError(f"dtype {dtype} is not valid.") + + @tvm.testing.fixture + def transformed_input_np(self, input_np, layout, input_crouton_layout, dtype): + if dtype in ["float16", "uint8", "int8"]: + return transform_numpy(input_np, layout.lower(), input_crouton_layout) + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.fixture + def transformed_expected_output_np(self, expected_output_np, layout, output_layout, dtype): + if dtype in ["float16", "uint8", "int8"]: + return transform_numpy(expected_output_np, layout.lower(), output_layout) + + raise RuntimeError(f"Unsupported data type '{dtype}'") + + @tvm.testing.fixture + def input_shape(self, batch, channel, in_height, in_width): + return (batch, in_height, in_width, channel) + + @tvm.testing.fixture + def output_shape(self, batch, channel, out_height, out_width): + return (batch, out_height, out_width, channel) + + @tvm.testing.requires_hexagon + def test_resize2d( + self, + dtype, + input_np, + transformed_input_np, + input_shape, + output_shape, + expected_output_np, + transformed_expected_output_np, + layout, + input_crouton_layout, + output_layout, + coord_trans, + method, + hexagon_session, + ): + """Test resize 2D.""" + a_tensor = te.placeholder(input_shape, name="a_tensor", dtype=dtype) + + m_tensor = s1.resize2d_compute( + a_tensor, + [0.0] * 4, + (output_shape[1], output_shape[2]), + layout=layout, + coordinate_transformation_mode=coord_trans, + method=method, + out_dtype=dtype, + ) + + tir_schedule = s1.tir_resize2d_schedule( + m_tensor, a_tensor, input_crouton_layout, output_layout + ) + + sch = tir_schedule.mod + + input_axis_separator = [4] + if output_layout in ( + "nhwc-8h2w32c2w-2d", + "nhwc-8h8w32c-2d", + ): + output_axis_separator = [4] + else: + raise RuntimeError(f"Unexpected layout '{output_layout}'") + + with tvm.transform.PassContext(opt_level=3): + func = tvm.build( + sch, + [a_tensor, m_tensor], + get_hexagon_target("v69"), + name="resize2d", + ) + + a_data_nd = allocate_hexagon_array( + hexagon_session.device, + data=transformed_input_np, + dtype=dtype, + axis_separators=input_axis_separator, + mem_scope="global.vtcm", + ) + + m_data_nd = allocate_hexagon_array( + hexagon_session.device, + transformed_expected_output_np.shape, + dtype=dtype, + axis_separators=output_axis_separator, + mem_scope="global.vtcm", + ) + + mod = hexagon_session.load_module(func) + mod(a_data_nd, m_data_nd) + + batch_size, height, width, channel = output_shape + # convert nd to np and reshape to fixed chunk size layout + if output_layout == "nhwc-8h2w32c2w-2d": + m_data_np = m_data_nd.numpy().reshape( + [batch_size, height // 8, width // 4, channel // 32, 8, 2, 32, 2] + ) + elif output_layout == "nhwc-8h8w32c-2d": + m_data_np = m_data_nd.numpy().reshape( + [batch_size, height // 8, width // 8, channel // 32, 8, 8, 32] + ) + + if dtype == "float16": + np.testing.assert_allclose( + transformed_expected_output_np, m_data_np, rtol=1e-3, atol=1e-3 + ) + elif dtype in ["int8", "uint8"]: + np.testing.assert_allclose(transformed_expected_output_np, m_data_np, rtol=1, atol=1) + + +if __name__ == "__main__": + tvm.testing.main() diff --git a/tests/python/frontend/pytorch/test_forward.py b/tests/python/frontend/pytorch/test_forward.py old mode 100755 new mode 100644 diff --git a/tests/python/frontend/tensorflow/test_forward.py b/tests/python/frontend/tensorflow/test_forward.py old mode 100755 new mode 100644 diff --git a/tests/python/unittest/test_transform_layout.py b/tests/python/unittest/test_transform_layout.py old mode 100755 new mode 100644