fix format

Signed-off-by: Mengni Wang <[email protected]>
mengniwang95 · Jun 25, 2024 · f569dbb · f569dbb
1 parent 7b03794
commit f569dbb
Show file tree

Hide file tree

Showing 19 changed files with 58 additions and 45 deletions.
diff --git a/onnx_neural_compressor/algorithms/layer_wise/core.py b/onnx_neural_compressor/algorithms/layer_wise/core.py
@@ -276,6 +276,7 @@ def _prepare_data_reader_for_next_split_model(
         data_reader_for_next_split_model.append(inputs)
     return DataReader(data_reader_for_next_split_model)
 
+
 def _check_model_with_infer_shapes(model):
     """Check if the model has been shape inferred."""
     if isinstance(model, (pathlib.Path, str)):

diff --git a/onnx_neural_compressor/algorithms/post_training_quant/calibrate.py b/onnx_neural_compressor/algorithms/post_training_quant/calibrate.py
@@ -278,9 +278,7 @@ def _collect_data(inputs):
                     node_name = name_to_node[node_output_names[output_idx]]
                     if node_output_names[output_idx] not in name_to_calibrator:
                         calib_method = (
-                            q_config[node_name]["calibrate_method"]
-                            if q_config and node_name in q_config
-                            else 0
+                            q_config[node_name]["calibrate_method"] if q_config and node_name in q_config else 0
                         )
                         assert calib_method in calibrator.CALIBRATOR, "Calibration method {} is not registered.".format(
                             calib_method
@@ -323,11 +321,7 @@ def _collect_data(inputs):
                 continue
             if any([data.dtype in [bool] for data in datas]):  # output type of some ops is bool, skip
                 continue
-            calib_method = (
-                q_config[node_name]["calibrate_method"]
-                if q_config and node_name in q_config
-                else 0
-            )
+            calib_method = q_config[node_name]["calibrate_method"] if q_config and node_name in q_config else 0
             _calibrator = calibrator.CALIBRATOR[calib_method]()
             _calibrator.collect(datas)
             activation_tensors_calib_range.setdefault(output_name, []).append(list(_calibrator.calib_range))
@@ -395,7 +389,7 @@ def get_weight_tensors_calib_range(self):
                     os.path.dirname(self.model_wrapper.model_path) if self.model_wrapper.model_path is not None else ""
                 ),
             )
-            _calibrator = calibrator.CALIBRATOR[0]() # use minmax method to calibrate initializer tensors
+            _calibrator = calibrator.CALIBRATOR[0]()  # use minmax method to calibrate initializer tensors
             if initializer_tensor.flatten().size > 0:
                 _calibrator.collect(initializer_tensor)
                 weight_tensors_calib_range[initializer_tensor_name] = [list(_calibrator.calib_range)]

diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/base_op.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/base_op.py
@@ -54,7 +54,7 @@ def __init__(self, onnx_quantizer, onnx_node):
             True if onnx_node.op_type in onnx_quantizer.op_types_to_exclude_output_quantization else False
         )
         self.per_channel = False
-        self.calibrate_method = 0 # minmax
+        self.calibrate_method = 0  # minmax
         self.weight_sym = True
         self.weight_dtype = None
         self.activation_dtype = None

diff --git a/onnx_neural_compressor/algorithms/post_training_quant/operators/gather.py b/onnx_neural_compressor/algorithms/post_training_quant/operators/gather.py
@@ -71,12 +71,12 @@ def convert(self):
 
             out_scale = 1.0
             out_zp = 0
-            gather_new_output = node.output[0] + "_quantized" # dynamic quant output name
+            gather_new_output = node.output[0] + "_quantized"  # dynamic quant output name
             for child in children:
                 if child.op_type == "QuantizeLinear":
                     out_scale = onnx.numpy_helper.to_array(self.quantizer.model.get_initializer(children[0].input[1]))
                     out_zp = onnx.numpy_helper.to_array(self.quantizer.model.get_initializer(children[0].input[2]))
-                    gather_new_output = children[0].output[0] # static quant output name
+                    gather_new_output = children[0].output[0]  # static quant output name
                     self.quantizer.remove_nodes.append(child)
 
             kwargs = {}

diff --git a/onnx_neural_compressor/algorithms/post_training_quant/quantizer.py b/onnx_neural_compressor/algorithms/post_training_quant/quantizer.py
@@ -572,7 +572,8 @@ def tensor_proto_to_array(initializer, base_dir=""):
             raise ValueError(
                 "Only float type quantization is supported. \
                 Weights {} is {}.".format(
-                    initializer.name, str(onnx.helper.tensor_dtype_to_np_dtype(initializer.data_type)),
+                    initializer.name,
+                    str(onnx.helper.tensor_dtype_to_np_dtype(initializer.data_type)),
                 )
             )
         return weights
@@ -744,9 +745,7 @@ def quantize_outputs(self, node, initializer_use_weight_qType=True, direct_int8=
             for child in self.model.get_children(node):
                 self.replace_input.append([child, tensor_name, dequant_node.output[0]])
             if tensor_name not in self.quantized_value_map:
-                quantized_value = quant_utils.QuantizedValue(
-                    tensor_name, dq_output, scale_name, zp_name
-                )
+                quantized_value = quant_utils.QuantizedValue(tensor_name, dq_output, scale_name, zp_name)
                 self.quantized_value_map[tensor_name] = quantized_value
 
     def quantize_inputs(self, node, indices=None, initializer_use_weight_qType=True, direct_int8=False):
@@ -991,7 +990,10 @@ def _quantize_activation(self, node, tensor_name, direct_int8=False):
 
         if tensor_name not in self.quantized_value_map:
             quantized_value = quant_utils.QuantizedValue(
-                tensor_name, dq_output, scale_name, zp_name,
+                tensor_name,
+                dq_output,
+                scale_name,
+                zp_name,
             )
             self.quantized_value_map[tensor_name] = quantized_value
 

diff --git a/onnx_neural_compressor/algorithms/utility.py b/onnx_neural_compressor/algorithms/utility.py
@@ -15,12 +15,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import numpy as np
-from packaging import version
 import re
 import struct
 import sys
 from importlib import util
+
+import numpy as np
+from packaging import version
+
 from onnx_neural_compressor import constants, utility
 
 if sys.version_info < (3, 11) and util.find_spec("onnxruntime_extensions"):  # pragma: no cover
@@ -77,21 +79,22 @@ def attribute_to_kwarg(attribute):
     "int1": (-1, 0),
     "int2": (-2, 1),
     "int3": (-4, 3),
-    "int4": (-8, 7), # onnx >= 1.16.0 defines TensorProto.INT4
+    "int4": (-8, 7),  # onnx >= 1.16.0 defines TensorProto.INT4
     "int5": (-16, 15),
     "int6": (-32, 31),
     "int7": (-64, 63),
     "int8": (-128, 127),
     "uint1": (0, 1),
     "uint2": (0, 3),
     "uint3": (0, 7),
-    "uint4": (0, 15), # onnx >= 1.16.0 defines TensorProto.UINT4
+    "uint4": (0, 15),  # onnx >= 1.16.0 defines TensorProto.UINT4
     "uint5": (0, 31),
     "uint6": (0, 63),
     "uint7": (0, 127),
     "uint8": (0, 255),
 }
 
+
 def _qType_to_np_type(qType):
     if isinstance(qType, int):
         return onnx.helper.tensor_dtype_to_np_dtype(qType)
@@ -113,6 +116,7 @@ def find_by_name(name, item_list):
     else:
         return None
 
+
 def get_qmin_qmax_for_qType(qType, reduce_range=False, sym=False):  # noqa: N802
     """Get qmin, qmax for qType.
 
@@ -140,6 +144,7 @@ def get_qmin_qmax_for_qType(qType, reduce_range=False, sym=False):  # noqa: N802
 
     return qrange
 
+
 def quantize_nparray(dtype, arr, scale, zero_point, low=None, high=None):
     """Quantize numpy array."""
     q_weight = np.empty_like(np.asarray(arr), dtype=scale.dtype)
@@ -150,6 +155,7 @@ def quantize_nparray(dtype, arr, scale, zero_point, low=None, high=None):
         np.clip(q_weight, low, high, out=q_weight)
     return q_weight.astype(dtype)
 
+
 def quantize_data_per_channel(data, axis, qType, sym, reduce_range=False):
     """Quantize tensor per-channel."""
     quantize_range = get_qmin_qmax_for_qType(qType, reduce_range, sym)
@@ -167,10 +173,12 @@ def quantize_data_per_channel(data, axis, qType, sym, reduce_range=False):
     quantized_data = quantize_nparray(dtype, data, scale, zero_point, low=quantize_range[0], high=quantize_range[1])
     return rmin.reshape(-1, 1), rmax.reshape(-1, 1), zero_point.reshape(-1, 1), scale.reshape(-1, 1), quantized_data
 
+
 def dequantize_data_with_scale_zero(tensor_value, scale_value, zo_value):  # pragma: no cover
     """Dequantize tensor with scale and zero point."""
     return (tensor_value.astype(scale_value.dtype) - zo_value.astype(scale_value.dtype)) * scale_value
 
+
 def dequantize_data(tensor_value, scale_value, zo_value, axis=0):  # pragma: no cover
     """Dequantize tensor."""
     if not isinstance(scale_value, np.ndarray):
@@ -196,6 +204,7 @@ def dequantize_data(tensor_value, scale_value, zo_value, axis=0):  # pragma: no
             new_tensor_value = np.concatenate((new_tensor_value, new_per_channel_tensor_value), axis)
         return new_tensor_value
 
+
 def calculate_scale_zp(rmin, rmax, qType, sym, reduce_range=False):
     """Calculate scale and zero point."""
     qmin, qmax = get_qmin_qmax_for_qType(qType, reduce_range, sym)
@@ -221,6 +230,7 @@ def calculate_scale_zp(rmin, rmax, qType, sym, reduce_range=False):
         zero_point = np.round((qmax + qmin) / 2.0).astype(dtype) if sym else np.round(qmin - rmin / scale).astype(dtype)
     return np.float32(scale), zero_point
 
+
 def quantize_data(data, qType, sym, reduce_range=False, ratio=1.0, axis=None):
     """Quantize data.
 
@@ -254,17 +264,20 @@ def quantize_data(data, qType, sym, reduce_range=False, ratio=1.0, axis=None):
     quantized_data = quantize_nparray(dtype, data, scale, zero_point, low=quantize_range[0], high=quantize_range[1])
     return rmin, rmax, zero_point, scale, quantized_data
 
+
 def qdq_data(data, qType, sym, reduce_range=False, ratio=1.0, axis=None):
     _, _, zero_point, scale, quantized_data = quantize_data(data, qType, sym, reduce_range, ratio, axis)
     return scale * (quantized_data - zero_point)
 
+
 def is_B_transposed(node):
     """Whether inuput B is transposed."""
     transB = [attr for attr in node.attribute if attr.name == "transB"]
     if len(transB):
         return 0 < onnx.helper.get_attribute_value(transB[0])
     return False
 
+
 def is_quantizable_type(data_type):
     return data_type in [onnx.TensorProto.FLOAT, onnx.TensorProto.FLOAT16, onnx.TensorProto.BFLOAT16]
 
@@ -529,7 +542,6 @@ def dump_woq_stats(model, quantize_config):
     utility.Statistics(output_data, header="Mixed Precision Statistics", field_names=field_names).print_stat()
 
 
-
 def get_node_original_name(node) -> str:
     """Get the original name of the given node."""
     node_name: str = node.name

diff --git a/onnx_neural_compressor/algorithms/weight_only/rtn.py b/onnx_neural_compressor/algorithms/weight_only/rtn.py
@@ -139,7 +139,8 @@ def rtn_quantize(
                     "int" + str(num_bits),
                     sym,
                     ratio=ratios.get(node.input[1], 1),
-                    axis=1)
+                    axis=1,
+                )
                 q_weight = np.reshape(q_weight, (org_w_shape[1], -1))
                 q_weight = np.transpose(q_weight)
                 q_weight = q_weight[: org_w_shape[0], :].astype(dtype)

diff --git a/onnx_neural_compressor/onnx_model.py b/onnx_neural_compressor/onnx_model.py
@@ -267,7 +267,7 @@ def remove_initializers(self, init_to_remove):
             self.remove_initializer(initializer)
 
     def get_initializer(self, name):
-        """"Find the initializer with specified name."""
+        """ "Find the initializer with specified name."""
         for initializer in self.model.graph.initializer:
             if initializer.name == name:
                 return initializer

diff --git a/onnx_neural_compressor/quantization/algorithm_entry.py b/onnx_neural_compressor/quantization/algorithm_entry.py
@@ -18,6 +18,7 @@
 
 import onnx
 import onnxruntime as ort
+
 from onnx_neural_compressor import constants, data_reader, logger, utility
 from onnx_neural_compressor.algorithms.post_training_quant import calibrate, quantizer
 from onnx_neural_compressor.algorithms.smoother import core

diff --git a/onnx_neural_compressor/quantization/config.py b/onnx_neural_compressor/quantization/config.py
@@ -28,9 +28,9 @@
 import numpy as np
 import onnx
 import pydantic
+from onnxruntime import quantization as ort_quant
 from typing_extensions import Self
 
-from onnxruntime import quantization as ort_quant
 from onnx_neural_compressor import constants, data_reader, logger, quantization, utility
 
 from collections import OrderedDict  # isort: skip
@@ -114,7 +114,9 @@ def is_tunable(self, value: Any) -> bool:
             return False
 
     def __str__(self) -> str:
-        return "TuningParam(name={}, tunable_type={}, options={}).".format(self.name, str(self.tunable_type), str(self.options))
+        return "TuningParam(name={}, tunable_type={}, options={}).".format(
+            self.name, str(self.tunable_type), str(self.options)
+        )
 
 
 # Config registry to store all registered configs.
@@ -653,7 +655,7 @@ class OperatorConfig:
     per_channel: bool
     weight_sym: bool
     activation_sym: bool
-    calibrate_method: quantization.CalibrationMethod=quantization.CalibrationMethod.MinMax
+    calibrate_method: quantization.CalibrationMethod = quantization.CalibrationMethod.MinMax
 
     def __post_init__(self):
         self.weight_type = getattr(self.weight_type, "tensor_type", self.weight_type)
@@ -780,7 +782,6 @@ def __init__(
         self.quant_last_matmul = quant_last_matmul
         self._post_init()
 
-
     def _post_init(self):
         if self.white_list == constants.RTN_OP_LIST:
             global_config = self.get_init_args()
@@ -1497,6 +1498,7 @@ def dynamic_trt_check(config, optype, execution_provider, quant_format=None):
     dynamic_trt_check,
 ]
 
+
 @register_config(algo_name=constants.STATIC_QUANT, priority=constants.PRIORITY_STATIC_QUANT)
 class StaticQuantConfig(BaseConfig, ort_quant.StaticQuantConfig):
 

diff --git a/onnx_neural_compressor/quantization/matmul_nbits_quantizer.py b/onnx_neural_compressor/quantization/matmul_nbits_quantizer.py
@@ -71,6 +71,7 @@ def __init__(
         self.perchannel = perchannel
         self.layer_wise_quant = layer_wise_quant
 
+
 class AWQWeightOnlyQuantConfig(WeightOnlyQuantConfig):
 
     def __init__(

diff --git a/onnx_neural_compressor/quantization/quant_utils.py b/onnx_neural_compressor/quantization/quant_utils.py
@@ -16,8 +16,10 @@
 # limitations under the License.
 
 import enum
+
 import onnx
 
+
 class QuantType(enum.Enum):  # pragma: no cover
     """Represent QuantType value."""
 
@@ -32,10 +34,12 @@ def tensor_type(self):
             return onnx.TensorProto.UINT8
         raise ValueError(f"Unexpected value qtype={self!r}.")
 
+
 class QuantFormat(enum.Enum):
     QOperator = 0
     QDQ = 1
 
+
 class CalibrationMethod(enum.Enum):
     MinMax = 0
     Entropy = 1

diff --git a/onnx_neural_compressor/quantization/quantize.py b/onnx_neural_compressor/quantization/quantize.py
@@ -20,8 +20,8 @@
 import onnxruntime as ort
 from onnxruntime.quantization.quantize import QuantConfig
 
-from onnx_neural_compressor.quantization import config
 from onnx_neural_compressor.quantization import algorithm_entry as algos
+from onnx_neural_compressor.quantization import config
 
 
 # ORT-like user-facing API

diff --git a/onnx_neural_compressor/utility.py b/onnx_neural_compressor/utility.py
@@ -324,4 +324,3 @@ def trt_env_setup(model):
         os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1"
     else:
         os.environ["ORT_TENSORRT_INT8_ENABLE"] = "0"
-
diff --git a/test/quantization/post_training_quant/test_post_training_quant.py b/test/quantization/post_training_quant/test_post_training_quant.py
@@ -128,11 +128,9 @@ def test_static_quant(self):
         q_model = onnx.load("quant.onnx")
         node_num_extended = len(q_model.graph.node)
 
-
         # check graph optimization work
         self.assertGreater(node_num_basic, node_num_extended)
 
-
         # check op_types_to_quantize work
         cfg = config.StaticQuantConfig(
             calibration_data_reader=self.data_reader,
@@ -178,7 +176,6 @@ def test_static_quant(self):
         q_model = onnx.load("quant.onnx")
         self.assertEqual(_count_op_num(q_model, "QLinearMatMul"), qmatmul_num_disable_last - 1)
 
-
     def test_dynamic_quant(self):
         cfg = config.DynamicQuantConfig(
             weight_type=quantization.QuantType.QInt8,
@@ -199,6 +196,5 @@ def test_dynamic_quant(self):
         quantization.quantize(self.model, "quant.onnx", cfg, ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED)
 
 
-
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/quantization/test_config.py b/test/quantization/test_config.py
@@ -183,9 +183,7 @@ def test_static_quant_config(self):
                 else:
                     self.assertFalse("add" in configs_mapping)
                 if idx in [0, 1]:
-                    self.assertEqual(
-                        configs_mapping["Matmul"]["calibrate_method"], 0
-                    )
+                    self.assertEqual(configs_mapping["Matmul"]["calibrate_method"], 0)
                 self.assertLess(idx, 16)
 
         for execution_provider in ["TensorrtExecutionProvider"]:
@@ -217,9 +215,7 @@ def test_static_quant_config(self):
                 configs_mapping = quant_config.to_config_mapping(model_info=model_info)
                 if "Matmul" in configs_mapping:
                     self.assertFalse(configs_mapping["Matmul"]["per_channel"])
-                    self.assertEqual(
-                        configs_mapping["Matmul"]["calibrate_method"], 0
-                    )
+                    self.assertEqual(configs_mapping["Matmul"]["calibrate_method"], 0)
                 if "add" in configs_mapping:
                     self.assertEqual(configs_mapping["add"]["calibrate_method"], 0)
                 self.assertLess(idx, 16)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -324,4 +324,3 @@ def trt_env_setup(model):
		os.environ["ORT_TENSORRT_INT8_ENABLE"] = "1"
		else:
		os.environ["ORT_TENSORRT_INT8_ENABLE"] = "0"