Skip to content

Commit

Permalink
[QNN][Relay][Topi] Add qnn.dense with weight layout
Browse files Browse the repository at this point in the history
This commit adds new Relay operation "qnn.contrib_dense_pack" that supports
different weights layout (nn.dense and qnn.dense do not support this
attribute). This new operation is full analog of "nn.contrib_dense_pack"
operation but in QNN space.
  • Loading branch information
ibsidorenko committed Feb 1, 2023
1 parent fc98e9c commit 15e85e5
Show file tree
Hide file tree
Showing 15 changed files with 740 additions and 88 deletions.
11 changes: 10 additions & 1 deletion python/tvm/relay/qnn/op/_qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,16 @@ def alter_op_layout_qnn_conv2d(attrs, inputs, tinfos, out_type):

# qnn.dense
register_strategy("qnn.dense", strategy.qnn_dense_strategy)
register_pattern("qnn.dense", OpPattern.OUT_ELEMWISE_FUSABLE)


@register_alter_op_layout("qnn.dense")
def alter_op_layout_qnn_dense(attrs, inputs, tinfos, out_type):
"""Alternate the layout of qnn.dense"""
return topi.nn.qnn_dense_alter_layout(attrs, inputs, tinfos, out_type)


# qnn.contrib_dense_pack
register_strategy("qnn.contrib_dense_pack", strategy.qnn_dense_pack_strategy)

# qnn.batch_matmul
register_strategy("qnn.batch_matmul", strategy.qnn_batch_matmul_strategy)
Expand Down
134 changes: 131 additions & 3 deletions python/tvm/relay/qnn/op/legalizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,62 @@ def helper_change_dtypes_to_int8(attrs, inputs, types, relay_op):
)


def helper_change_dtypes_to_uint8(attrs, inputs, types, relay_op):
"""Helper function to change dtypes to uint8 x uint8.
Legalizes QNN dense op for Hexagon DSP. It supports fast u8 x u8 vrmpy instruction.
Converting from int8 to uint8 can be done in following manner:
Original equation
scale * (QA - zp_a)
scale * (QA + 128 - 128 - zp_a)
scale * ( (QA + 128) - (zp_a + 128))
Replacing QA + 128 with QA' and (zp_a + 128) with zp_a'
We get our new quantized uint8 tensor - scale * (QA' - zp_a')
Parameters
----------
attrs : tvm.ir.Attrs
Attributes of current convolution
inputs : list of tvm.relay.Expr
The args of the Relay expr to be legalized
types : list of types
List of input and output types
Returns
-------
result : tvm.relay.Expr
The legalized expr
"""
# Collect the dtypes.
data_dtype = types[0].dtype
kernel_dtype = types[1].dtype

# Do nothing since it is already uint8.
if data_dtype == "uint8" and kernel_dtype == "uint8":
return None

# Collect the input exprs.
data, kernel, input_zero_point, kernel_zero_point, input_scale, kernel_scale = inputs

# Shift input if necessary.
if data_dtype == "int8":
# Compute (QA + 128) and (zp_a + 128)
data, input_zero_point = _shift(data, input_zero_point, "uint8")

# Shift kernel if necessary.
if kernel_dtype == "int8":
# Compute (QA + 128) and (zp_a + 128)
kernel, kernel_zero_point = _shift(kernel, kernel_zero_point, "uint8")

# Call qnn.conv2d/qnn.dense with modified inputs and zero points.
new_attrs = dict(attrs)
return relay_op(
data, kernel, input_zero_point, kernel_zero_point, input_scale, kernel_scale, **new_attrs
)


# Helper function to change dtypes to be same. ARM dotprod instructions prefer this setting.
def helper_change_dtypes_to_be_same(attrs, inputs, types, relay_op):
"""Sometimes MxNet + MLDNN can lead to uint8 x int8 datatypes for the conv inputs. However,
Expand Down Expand Up @@ -520,7 +576,7 @@ def _qnn_conv2d_legalize_hexagon(attrs, inputs, types):
out_channel = kernel_tensor.shape[0].value
ic_modified = False
oc_modified = False
data, kernel, input_zp, output_zp, input_scale, output_scale = inputs
data, kernel, data_zp, kernel_zp, data_scale, kernel_scale = inputs

if in_channel % IN_CHANNEL_VECTOR_LENGTH != 0:
new_in_channel = helper_align_up(in_channel, IN_CHANNEL_VECTOR_LENGTH)
Expand All @@ -537,21 +593,93 @@ def _qnn_conv2d_legalize_hexagon(attrs, inputs, types):
kernel = relay.nn.pad(kernel, pad_width=((0, diff), (0, 0), (0, 0), (0, 0)))
oc_modified = True

# Pad kernel zero point by 'diff' elements of 0 if it is not scalar
kernel_zp_tensor = types[3]
if len(kernel_zp_tensor.shape) != 0:
assert isinstance(kernel_zp, relay.Constant)
padded_kernel_zp_np = np.append(kernel_zp.data.numpy(), [0] * diff)
kernel_zp = relay.const(padded_kernel_zp_np)

# Pad kernel scale by 'diff' elements of 1.0 if it is not scalar
kernel_scale_tensor = types[5]
if len(kernel_scale_tensor.shape) != 0:
assert isinstance(kernel_scale, relay.Constant)
padded_kernel_scale_np = np.append(kernel_scale.data.numpy(), [1.0] * diff)
kernel_scale = relay.const(padded_kernel_scale_np)

if ic_modified is True or oc_modified is True:
new_attrs = dict(attrs)
if oc_modified:
new_attrs["channels"] = new_out_channel
out = relay.qnn.op.conv2d(
data, kernel, input_zp, output_zp, input_scale, output_scale, **new_attrs
data, kernel, data_zp, kernel_zp, data_scale, kernel_scale, **new_attrs
)
output_tensor = types[6]
original_out_shape = list(output_tensor.shape)
out = relay.strided_slice(out, begin=[0, 0, 0, 0], end=original_out_shape)
else:
out = relay.qnn.op.conv2d(
data, kernel, input_zp, output_zp, input_scale, output_scale, **new_attrs
data, kernel, data_zp, kernel_zp, data_scale, kernel_scale, **new_attrs
)

return out

return None


@qnn_dense_legalize.register("hexagon")
def _qnn_dense_legalize_hexagon(attrs, inputs, types):
"""Legalize qnn.dense op for vrmpy tensorization.
N dimension of weights should be aligned on vector length. If not, then N dimension is padded to
be a multiple of 32.
"""
assert len(types) == 7
assert len(inputs) == 6

data_tensor, kernel_tensor = types[0], types[1]
if "int8" not in data_tensor.dtype or "int8" not in kernel_tensor.dtype:
return None

N, _ = kernel_tensor.shape

if N % OUT_CHANNEL_VECTOR_LENGTH != 0:
N_padded = helper_align_up(N, OUT_CHANNEL_VECTOR_LENGTH)
diff = N_padded - N

data, kernel, data_zp, kernel_zp, data_scale, kernel_scale = inputs

# Pad weights by 'diff'
padded_kernel = relay.nn.pad(kernel, pad_width=((0, diff), (0, 0)))

kernel_zp_tensor, kernel_scale_tensor = types[3], types[5]

# Pad kernel zero point by 'diff' elements of 0 if it is not scalar
if len(kernel_zp_tensor.shape) != 0:
assert isinstance(kernel_zp, relay.Constant)
assert isinstance(diff, tvm.tir.IntImm)
padded_kernel_zp_np = np.append(kernel_zp.data.numpy(), [0] * diff.value)
kernel_zp = relay.const(padded_kernel_zp_np)

# Pad kernel scale by 'diff' elements of 1.0 if it is not scalar
if len(kernel_scale_tensor.shape) != 0:
assert isinstance(kernel_scale, relay.Constant)
assert isinstance(diff, tvm.tir.IntImm)
padded_kernel_scale_np = np.append(kernel_scale.data.numpy(), [1.0] * diff.value)
kernel_scale = relay.const(padded_kernel_scale_np)

# If units is explicitly specified, it is used to compute the output shape.
# We need to update units after padding to prevent a type error.
new_attrs = dict(attrs)
if attrs["units"] is not None:
new_attrs["units"] = N + diff

new_inputs = (data, padded_kernel, data_zp, kernel_zp, data_scale, kernel_scale)

out = relay.qnn.op.dense(*new_inputs, **new_attrs)

output_tensor = types[6]
out = relay.strided_slice(out, begin=[0, 0], end=list(output_tensor.shape))
return out

return None
64 changes: 64 additions & 0 deletions python/tvm/relay/qnn/op/qnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,70 @@ def dense(
)


def contrib_dense_pack(
data,
weight,
input_zero_point,
kernel_zero_point,
input_scale,
kernel_scale,
kernel_layout="NC",
units=None,
out_dtype="int32",
):
"""Qnn contrib_dense_pack operator.
Applies a quantized linear transformation
.. math::
`Y = X * W`
If doing Per-channel quantization, qnn expects the kernel_zero_scale
and optionally the kernel_zero_point will be 1-D vectors instead of scalars.
Parameters
----------
data : tvm.relay.Expr
The quantized input data to the operator.
weight : tvm.relay.Expr
The quantized weight expressions.
input_zero_point: tvm.relay.Expr
The input zero point.
kernel_zero_point: tvm.relay.Expr
The kernel zero point.
input_scale: tvm.relay.Expr
The scale for the input tensor.
kernel_scale: tvm.relay.Expr
The scale for the weight tensor. The scale for the weight tensor is
stored for access to this during relay. This information is not
needed in the pass pipeline after qnn.conv2d is lowered to the
sequence of steps as in nn.conv2d. See also input_scale in Requantize.
kernel_layout: str
The layout of weight, such as "NC" or "NC32n4c".
units : int, optional
Number of hidden units of the dense transformation.
out_dtype : str, optional
Specifies the output data type for mixed precision dense can be int32 or int16.
Returns
-------
result : tvm.relay.Expr
The computed result.
"""

return _make.contrib_dense_pack(
data,
weight,
input_zero_point,
kernel_zero_point,
input_scale,
kernel_scale,
kernel_layout,
units,
out_dtype,
)


def mul(
lhs,
rhs,
Expand Down
6 changes: 6 additions & 0 deletions python/tvm/relay/qnn/strategy/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,12 @@ def qnn_dense_strategy(attrs, inputs, out_type, target):
)


@override_native_generic_func("qnn_dense_pack_strategy")
def qnn_dense_pack_strategy(attrs, inputs, out_type, target):
"""qnn.contrib_dense_pack generic strategy"""
raise RuntimeError("qnn.contrib_dense_pack is currently only supported with Hexagon. ")


@override_native_generic_func("qnn_batch_matmul_strategy")
def qnn_batch_matmul_strategy(attrs, inputs, out_type, target):
"""qnn.batch_matmul generic strategy"""
Expand Down
18 changes: 18 additions & 0 deletions python/tvm/relay/qnn/strategy/hexagon.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,24 @@ def qnn_dense_strategy_hexagon(attrs, inputs, out_type, target):
return strategy


@qnn_dense_pack_strategy.register("hexagon")
def qnn_dense_pack_strategy_hexagon(attrs, inputs, out_type, target):
"""qnn.contrib_dense_pack strategy for Hexagon"""
strategy = _op.OpStrategy()
if (
"uint8" in inputs[0].dtype
and "int8" in inputs[1].dtype
and attrs["weight_layout"] == "NC32n4c"
):
# uint8 + uint8|int8 case
strategy.add_implementation(
wrap_topi_qnn_dense(topi.hexagon.qnn_dense_pack_vrmpy),
wrap_topi_schedule(topi.hexagon.schedule_qnn_dense_pack_vrmpy),
name="qnn_dense_pack_vrmpy.hexagon",
)
return strategy


@qnn_batch_matmul_strategy.register("hexagon")
def qnn_batch_matmul_strategy_hexagon(attrs, inputs, out_type, target):
"""qnn.batch_matmul strategy for Hexagon"""
Expand Down
1 change: 1 addition & 0 deletions python/tvm/topi/hexagon/qnn/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .adaptive_avg_pool1d import *
from .avg_pool2d import qnn_avg_pool2d_compute, qnn_avg_pool2d_schedule
from .conv2d_alter_op import *
from .dense_alter_op import *
from .dequantize import dequantize_compute, dequantize_schedule
from .global_avg_pool2d import *
from .nn import *
Expand Down
33 changes: 33 additions & 0 deletions python/tvm/topi/hexagon/qnn/dense_alter_op.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""QNN Dense alter op functions for Hexagon"""

from tvm import relay
from ..dense_alter_op import check_vrmpy_applicable
from ...nn import qnn_dense_alter_layout


@qnn_dense_alter_layout.register("hexagon")
def _alter_qnn_dense_layout(_attrs, inputs, tinfos, out_type):
data_tensor = tinfos[0]
weight_tensor = tinfos[1]

if check_vrmpy_applicable(data_tensor, weight_tensor):
weight_layout = "NC32n4c"
return relay.qnn.op.contrib_dense_pack(*inputs, weight_layout, None, out_type.dtype)
else:
return None
Loading

0 comments on commit 15e85e5

Please sign in to comment.