Skip to content

Commit

Permalink
fix format
Browse files Browse the repository at this point in the history
Signed-off-by: Mengni Wang <[email protected]>
  • Loading branch information
mengniwang95 committed Jun 25, 2024
1 parent 7b03794 commit f569dbb
Show file tree
Hide file tree
Showing 19 changed files with 58 additions and 45 deletions.
1 change: 1 addition & 0 deletions onnx_neural_compressor/algorithms/layer_wise/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ def _prepare_data_reader_for_next_split_model(
data_reader_for_next_split_model.append(inputs)
return DataReader(data_reader_for_next_split_model)


def _check_model_with_infer_shapes(model):
"""Check if the model has been shape inferred."""
if isinstance(model, (pathlib.Path, str)):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,9 +278,7 @@ def _collect_data(inputs):
node_name = name_to_node[node_output_names[output_idx]]
if node_output_names[output_idx] not in name_to_calibrator:
calib_method = (
q_config[node_name]["calibrate_method"]
if q_config and node_name in q_config
else 0
q_config[node_name]["calibrate_method"] if q_config and node_name in q_config else 0
)
assert calib_method in calibrator.CALIBRATOR, "Calibration method {} is not registered.".format(
calib_method
Expand Down Expand Up @@ -323,11 +321,7 @@ def _collect_data(inputs):
continue
if any([data.dtype in [bool] for data in datas]): # output type of some ops is bool, skip
continue
calib_method = (
q_config[node_name]["calibrate_method"]
if q_config and node_name in q_config
else 0
)
calib_method = q_config[node_name]["calibrate_method"] if q_config and node_name in q_config else 0
_calibrator = calibrator.CALIBRATOR[calib_method]()
_calibrator.collect(datas)
activation_tensors_calib_range.setdefault(output_name, []).append(list(_calibrator.calib_range))
Expand Down Expand Up @@ -395,7 +389,7 @@ def get_weight_tensors_calib_range(self):
os.path.dirname(self.model_wrapper.model_path) if self.model_wrapper.model_path is not None else ""
),
)
_calibrator = calibrator.CALIBRATOR[0]() # use minmax method to calibrate initializer tensors
_calibrator = calibrator.CALIBRATOR[0]() # use minmax method to calibrate initializer tensors
if initializer_tensor.flatten().size > 0:
_calibrator.collect(initializer_tensor)
weight_tensors_calib_range[initializer_tensor_name] = [list(_calibrator.calib_range)]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self, onnx_quantizer, onnx_node):
True if onnx_node.op_type in onnx_quantizer.op_types_to_exclude_output_quantization else False
)
self.per_channel = False
self.calibrate_method = 0 # minmax
self.calibrate_method = 0 # minmax
self.weight_sym = True
self.weight_dtype = None
self.activation_dtype = None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,12 +71,12 @@ def convert(self):

out_scale = 1.0
out_zp = 0
gather_new_output = node.output[0] + "_quantized" # dynamic quant output name
gather_new_output = node.output[0] + "_quantized" # dynamic quant output name
for child in children:
if child.op_type == "QuantizeLinear":
out_scale = onnx.numpy_helper.to_array(self.quantizer.model.get_initializer(children[0].input[1]))
out_zp = onnx.numpy_helper.to_array(self.quantizer.model.get_initializer(children[0].input[2]))
gather_new_output = children[0].output[0] # static quant output name
gather_new_output = children[0].output[0] # static quant output name
self.quantizer.remove_nodes.append(child)

kwargs = {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,8 @@ def tensor_proto_to_array(initializer, base_dir=""):
raise ValueError(
"Only float type quantization is supported. \
Weights {} is {}.".format(
initializer.name, str(onnx.helper.tensor_dtype_to_np_dtype(initializer.data_type)),
initializer.name,
str(onnx.helper.tensor_dtype_to_np_dtype(initializer.data_type)),
)
)
return weights
Expand Down Expand Up @@ -744,9 +745,7 @@ def quantize_outputs(self, node, initializer_use_weight_qType=True, direct_int8=
for child in self.model.get_children(node):
self.replace_input.append([child, tensor_name, dequant_node.output[0]])
if tensor_name not in self.quantized_value_map:
quantized_value = quant_utils.QuantizedValue(
tensor_name, dq_output, scale_name, zp_name
)
quantized_value = quant_utils.QuantizedValue(tensor_name, dq_output, scale_name, zp_name)
self.quantized_value_map[tensor_name] = quantized_value

def quantize_inputs(self, node, indices=None, initializer_use_weight_qType=True, direct_int8=False):
Expand Down Expand Up @@ -991,7 +990,10 @@ def _quantize_activation(self, node, tensor_name, direct_int8=False):

if tensor_name not in self.quantized_value_map:
quantized_value = quant_utils.QuantizedValue(
tensor_name, dq_output, scale_name, zp_name,
tensor_name,
dq_output,
scale_name,
zp_name,
)
self.quantized_value_map[tensor_name] = quantized_value

Expand Down
22 changes: 17 additions & 5 deletions onnx_neural_compressor/algorithms/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from packaging import version
import re
import struct
import sys
from importlib import util

import numpy as np
from packaging import version

from onnx_neural_compressor import constants, utility

if sys.version_info < (3, 11) and util.find_spec("onnxruntime_extensions"): # pragma: no cover
Expand Down Expand Up @@ -77,21 +79,22 @@ def attribute_to_kwarg(attribute):
"int1": (-1, 0),
"int2": (-2, 1),
"int3": (-4, 3),
"int4": (-8, 7), # onnx >= 1.16.0 defines TensorProto.INT4
"int4": (-8, 7), # onnx >= 1.16.0 defines TensorProto.INT4
"int5": (-16, 15),
"int6": (-32, 31),
"int7": (-64, 63),
"int8": (-128, 127),
"uint1": (0, 1),
"uint2": (0, 3),
"uint3": (0, 7),
"uint4": (0, 15), # onnx >= 1.16.0 defines TensorProto.UINT4
"uint4": (0, 15), # onnx >= 1.16.0 defines TensorProto.UINT4
"uint5": (0, 31),
"uint6": (0, 63),
"uint7": (0, 127),
"uint8": (0, 255),
}


def _qType_to_np_type(qType):
if isinstance(qType, int):
return onnx.helper.tensor_dtype_to_np_dtype(qType)
Expand All @@ -113,6 +116,7 @@ def find_by_name(name, item_list):
else:
return None


def get_qmin_qmax_for_qType(qType, reduce_range=False, sym=False): # noqa: N802
"""Get qmin, qmax for qType.
Expand Down Expand Up @@ -140,6 +144,7 @@ def get_qmin_qmax_for_qType(qType, reduce_range=False, sym=False): # noqa: N802

return qrange


def quantize_nparray(dtype, arr, scale, zero_point, low=None, high=None):
"""Quantize numpy array."""
q_weight = np.empty_like(np.asarray(arr), dtype=scale.dtype)
Expand All @@ -150,6 +155,7 @@ def quantize_nparray(dtype, arr, scale, zero_point, low=None, high=None):
np.clip(q_weight, low, high, out=q_weight)
return q_weight.astype(dtype)


def quantize_data_per_channel(data, axis, qType, sym, reduce_range=False):
"""Quantize tensor per-channel."""
quantize_range = get_qmin_qmax_for_qType(qType, reduce_range, sym)
Expand All @@ -167,10 +173,12 @@ def quantize_data_per_channel(data, axis, qType, sym, reduce_range=False):
quantized_data = quantize_nparray(dtype, data, scale, zero_point, low=quantize_range[0], high=quantize_range[1])
return rmin.reshape(-1, 1), rmax.reshape(-1, 1), zero_point.reshape(-1, 1), scale.reshape(-1, 1), quantized_data


def dequantize_data_with_scale_zero(tensor_value, scale_value, zo_value): # pragma: no cover
"""Dequantize tensor with scale and zero point."""
return (tensor_value.astype(scale_value.dtype) - zo_value.astype(scale_value.dtype)) * scale_value


def dequantize_data(tensor_value, scale_value, zo_value, axis=0): # pragma: no cover
"""Dequantize tensor."""
if not isinstance(scale_value, np.ndarray):
Expand All @@ -196,6 +204,7 @@ def dequantize_data(tensor_value, scale_value, zo_value, axis=0): # pragma: no
new_tensor_value = np.concatenate((new_tensor_value, new_per_channel_tensor_value), axis)
return new_tensor_value


def calculate_scale_zp(rmin, rmax, qType, sym, reduce_range=False):
"""Calculate scale and zero point."""
qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range, sym)
Expand All @@ -221,6 +230,7 @@ def calculate_scale_zp(rmin, rmax, qType, sym, reduce_range=False):
zero_point = np.round((qmax + qmin) / 2.0).astype(dtype) if sym else np.round(qmin - rmin / scale).astype(dtype)
return np.float32(scale), zero_point


def quantize_data(data, qType, sym, reduce_range=False, ratio=1.0, axis=None):
"""Quantize data.
Expand Down Expand Up @@ -254,17 +264,20 @@ def quantize_data(data, qType, sym, reduce_range=False, ratio=1.0, axis=None):
quantized_data = quantize_nparray(dtype, data, scale, zero_point, low=quantize_range[0], high=quantize_range[1])
return rmin, rmax, zero_point, scale, quantized_data


def qdq_data(data, qType, sym, reduce_range=False, ratio=1.0, axis=None):
_, _, zero_point, scale, quantized_data = quantize_data(data, qType, sym, reduce_range, ratio, axis)
return scale * (quantized_data - zero_point)


def is_B_transposed(node):
"""Whether inuput B is transposed."""
transB = [attr for attr in node.attribute if attr.name == "transB"]
if len(transB):
return 0 < onnx.helper.get_attribute_value(transB[0])
return False


def is_quantizable_type(data_type):
return data_type in [onnx.TensorProto.FLOAT, onnx.TensorProto.FLOAT16, onnx.TensorProto.BFLOAT16]

Expand Down Expand Up @@ -529,7 +542,6 @@ def dump_woq_stats(model, quantize_config):
utility.Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat()



def get_node_original_name(node) -> str:
"""Get the original name of the given node."""
node_name: str = node.name
Expand Down
3 changes: 2 additions & 1 deletion onnx_neural_compressor/algorithms/weight_only/rtn.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ def rtn_quantize(
"int" + str(num_bits),
sym,
ratio=ratios.get(node.input[1], 1),
axis=1)
axis=1,
)
q_weight = np.reshape(q_weight, (org_w_shape[1], -1))
q_weight = np.transpose(q_weight)
q_weight = q_weight[: org_w_shape[0], :].astype(dtype)
Expand Down
2 changes: 1 addition & 1 deletion onnx_neural_compressor/onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def remove_initializers(self, init_to_remove):
self.remove_initializer(initializer)

def get_initializer(self, name):
""""Find the initializer with specified name."""
""" "Find the initializer with specified name."""
for initializer in self.model.graph.initializer:
if initializer.name == name:
return initializer
Expand Down
1 change: 1 addition & 0 deletions onnx_neural_compressor/quantization/algorithm_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import onnx
import onnxruntime as ort

from onnx_neural_compressor import constants, data_reader, logger, utility
from onnx_neural_compressor.algorithms.post_training_quant import calibrate, quantizer
from onnx_neural_compressor.algorithms.smoother import core
Expand Down
10 changes: 6 additions & 4 deletions onnx_neural_compressor/quantization/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
import numpy as np
import onnx
import pydantic
from onnxruntime import quantization as ort_quant
from typing_extensions import Self

from onnxruntime import quantization as ort_quant
from onnx_neural_compressor import constants, data_reader, logger, quantization, utility

from collections import OrderedDict # isort: skip
Expand Down Expand Up @@ -114,7 +114,9 @@ def is_tunable(self, value: Any) -> bool:
return False

def __str__(self) -> str:
return "TuningParam(name={}, tunable_type={}, options={}).".format(self.name, str(self.tunable_type), str(self.options))
return "TuningParam(name={}, tunable_type={}, options={}).".format(
self.name, str(self.tunable_type), str(self.options)
)


# Config registry to store all registered configs.
Expand Down Expand Up @@ -653,7 +655,7 @@ class OperatorConfig:
per_channel: bool
weight_sym: bool
activation_sym: bool
calibrate_method: quantization.CalibrationMethod=quantization.CalibrationMethod.MinMax
calibrate_method: quantization.CalibrationMethod = quantization.CalibrationMethod.MinMax

def __post_init__(self):
self.weight_type = getattr(self.weight_type, "tensor_type", self.weight_type)
Expand Down Expand Up @@ -780,7 +782,6 @@ def __init__(
self.quant_last_matmul = quant_last_matmul
self._post_init()


def _post_init(self):
if self.white_list == constants.RTN_OP_LIST:
global_config = self.get_init_args()
Expand Down Expand Up @@ -1497,6 +1498,7 @@ def dynamic_trt_check(config, optype, execution_provider, quant_format=None):
dynamic_trt_check,
]


@register_config(algo_name=constants.STATIC_QUANT, priority=constants.PRIORITY_STATIC_QUANT)
class StaticQuantConfig(BaseConfig, ort_quant.StaticQuantConfig):

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def __init__(
self.perchannel = perchannel
self.layer_wise_quant = layer_wise_quant


class AWQWeightOnlyQuantConfig(WeightOnlyQuantConfig):

def __init__(
Expand Down
4 changes: 4 additions & 0 deletions onnx_neural_compressor/quantization/quant_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
# limitations under the License.

import enum

import onnx


class QuantType(enum.Enum): # pragma: no cover
"""Represent QuantType value."""

Expand All @@ -32,10 +34,12 @@ def tensor_type(self):
return onnx.TensorProto.UINT8
raise ValueError(f"Unexpected value qtype={self!r}.")


class QuantFormat(enum.Enum):
QOperator = 0
QDQ = 1


class CalibrationMethod(enum.Enum):
MinMax = 0
Entropy = 1
Expand Down
2 changes: 1 addition & 1 deletion onnx_neural_compressor/quantization/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
import onnxruntime as ort
from onnxruntime.quantization.quantize import QuantConfig

from onnx_neural_compressor.quantization import config
from onnx_neural_compressor.quantization import algorithm_entry as algos
from onnx_neural_compressor.quantization import config


# ORT-like user-facing API
Expand Down
1 change: 0 additions & 1 deletion onnx_neural_compressor/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,4 +324,3 @@ def trt_env_setup(model):
os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1"
else:
os.environ["ORT_TENSORRT_INT8_ENABLE"] = "0"

Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,9 @@ def test_static_quant(self):
q_model = onnx.load("quant.onnx")
node_num_extended = len(q_model.graph.node)


# check graph optimization work
self.assertGreater(node_num_basic, node_num_extended)


# check op_types_to_quantize work
cfg = config.StaticQuantConfig(
calibration_data_reader=self.data_reader,
Expand Down Expand Up @@ -178,7 +176,6 @@ def test_static_quant(self):
q_model = onnx.load("quant.onnx")
self.assertEqual(_count_op_num(q_model, "QLinearMatMul"), qmatmul_num_disable_last - 1)


def test_dynamic_quant(self):
cfg = config.DynamicQuantConfig(
weight_type=quantization.QuantType.QInt8,
Expand All @@ -199,6 +196,5 @@ def test_dynamic_quant(self):
quantization.quantize(self.model, "quant.onnx", cfg, ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED)



if __name__ == "__main__":
unittest.main()
8 changes: 2 additions & 6 deletions test/quantization/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,9 +183,7 @@ def test_static_quant_config(self):
else:
self.assertFalse("add" in configs_mapping)
if idx in [0, 1]:
self.assertEqual(
configs_mapping["Matmul"]["calibrate_method"], 0
)
self.assertEqual(configs_mapping["Matmul"]["calibrate_method"], 0)
self.assertLess(idx, 16)

for execution_provider in ["TensorrtExecutionProvider"]:
Expand Down Expand Up @@ -217,9 +215,7 @@ def test_static_quant_config(self):
configs_mapping = quant_config.to_config_mapping(model_info=model_info)
if "Matmul" in configs_mapping:
self.assertFalse(configs_mapping["Matmul"]["per_channel"])
self.assertEqual(
configs_mapping["Matmul"]["calibrate_method"], 0
)
self.assertEqual(configs_mapping["Matmul"]["calibrate_method"], 0)
if "add" in configs_mapping:
self.assertEqual(configs_mapping["add"]["calibrate_method"], 0)
self.assertLess(idx, 16)
Expand Down
Loading

0 comments on commit f569dbb

Please sign in to comment.