Skip to content

Commit

Permalink
migrate export to 2x and 3x from deprecated (#1845)
Browse files Browse the repository at this point in the history
Signed-off-by: xin3he <[email protected]>
  • Loading branch information
xin3he authored Jun 13, 2024
1 parent 0eced14 commit 794b276
Show file tree
Hide file tree
Showing 15 changed files with 657 additions and 8 deletions.
2 changes: 2 additions & 0 deletions neural_compressor/experimental/export/qlinear2qdq.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pragma: no cover
"""Helper functions to export onnx model from QLinearops to QDQ."""
from deprecated import deprecated

Expand Down
2 changes: 2 additions & 0 deletions neural_compressor/experimental/export/tf2onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pragma: no cover
"""Helper functions to export model from TensorFlow to ONNX."""

import re
Expand Down
2 changes: 2 additions & 0 deletions neural_compressor/experimental/export/torch2onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pragma: no cover
"""Helper functions to export model from PyTorch/TensorFlow to ONNX."""

import os
Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/model/onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,7 +827,7 @@ def find_ffn_matmul(self, attention_index, attention_matmul_list, block_len):
def export(self, save_path, conf):
"""Export Qlinear to QDQ model."""
from neural_compressor.config import ONNXQlinear2QDQConfig
from neural_compressor.experimental.export import onnx_qlinear_to_qdq
from neural_compressor.utils.export import onnx_qlinear_to_qdq

if isinstance(conf, ONNXQlinear2QDQConfig):
add_nodes, remove_nodes, inits = onnx_qlinear_to_qdq(self._model, self._input_name_to_nodes)
Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/model/tensorflow_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,7 +1009,7 @@ def export(self, save_path, conf):
+ "we reset opset_version={} here".format(conf.opset_version)
)

from neural_compressor.experimental.export import tf_to_fp32_onnx, tf_to_int8_onnx
from neural_compressor.utils.export import tf_to_fp32_onnx, tf_to_int8_onnx

inputs_as_nchw = conf.kwargs.get("inputs_as_nchw", None)
if conf.dtype == "int8":
Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/model/torch_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ def export(
"but the torch version found is {}".format(Version("1.12.0"), version)
)

from neural_compressor.experimental.export import torch_to_fp32_onnx, torch_to_int8_onnx
from neural_compressor.utils.export import torch_to_fp32_onnx, torch_to_int8_onnx

if conf.dtype == "int8":
torch_to_int8_onnx(
Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/onnxrt/utils/onnx_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,7 @@ def find_ffn_matmul(self, attention_index, attention_matmul_list, block_len):
def export(self, save_path, conf):
"""Export Qlinear to QDQ model."""
from neural_compressor.config import ONNXQlinear2QDQConfig
from neural_compressor.experimental.export import onnx_qlinear_to_qdq
from neural_compressor.utils.export import onnx_qlinear_to_qdq

if isinstance(conf, ONNXQlinear2QDQConfig):
if len(self._input_name_to_nodes) == 0:
Expand Down
2 changes: 1 addition & 1 deletion neural_compressor/torch/export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from neural_compressor.torch.export._export import export_model_for_pt2e_quant, export
from neural_compressor.torch.export.pt2e_export import export_model_for_pt2e_quant, export
File renamed without changes.
3 changes: 1 addition & 2 deletions neural_compressor/torch/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@
import torch
from typing_extensions import TypeAlias

from neural_compressor.common import logger
from neural_compressor.common.utils import Mode
from neural_compressor.common.utils import LazyImport, Mode, logger

OP_NAME_AND_TYPE_TUPLE_TYPE: TypeAlias = Tuple[str, Union[torch.nn.Module, Callable]]

Expand Down
21 changes: 21 additions & 0 deletions neural_compressor/utils/export/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2021 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Intel Neural Compressor Export."""

from .torch2onnx import torch_to_fp32_onnx, torch_to_int8_onnx
from .qlinear2qdq import onnx_qlinear_to_qdq
from .tf2onnx import tf_to_fp32_onnx, tf_to_int8_onnx
82 changes: 82 additions & 0 deletions neural_compressor/utils/export/qlinear2qdq.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2021 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper functions to export onnx model from QLinear ops to QDQ."""
from neural_compressor.adaptor.ox_utils.util import find_by_name
from neural_compressor.utils import logger
from neural_compressor.utils.utility import LazyImport

numpy_helper = LazyImport("onnx.numpy_helper")


def check_model(model):
"""Check optype for input model.
Args:
model (ModelProto): onnx model.
"""
has_integerop = False
has_qlinearop = False
for node in model.graph.node:
if node.op_type.endswith("Integer"):
has_integerop = True
elif node.op_type.startswith("QLinear"):
has_qlinearop = True
elif node.op_type in ["QAttention", "QGemm", "QEmbedLayerNormalization"]:
has_qlinearop = True
elif node.op_type in ["Gather"]:
input_data = find_by_name(node.input[0], model.graph.initializer)
if input_data is not None and numpy_helper.to_array(input_data).dtype in ["int8", "uint8"]:
has_qlinearop = True
if has_integerop:
logger.info("This model has Integer ops, these ops will be skipped.")
if has_qlinearop:
return True
else:
logger.info("This model has no QLinear ops, save the original model.")
return False


def onnx_qlinear_to_qdq(
model,
input_name_to_nodes,
):
"""Export ONNX QLinearops model into QDQ model.
Args:
model (ModelProto): int8 onnx model.
input_name_to_nodes (dict): the mapping of tensor name and its destination nodes.
"""
from neural_compressor.adaptor.ox_utils.operators import QOPERATORS

add_nodes = []
remove_nodes = []
inits = []
if check_model(model):
for node in model.graph.node:
if node.op_type in QOPERATORS:
if node.output[0] not in input_name_to_nodes:
continue
children = []
for out in node.output:
children.extend(input_name_to_nodes[node.output[0]])
converter = QOPERATORS[node.op_type](node, children, model.graph.initializer)
done, add_node, init = converter.convert()
if done:
add_nodes.extend(add_node)
inits.extend(init)
remove_nodes.append(node)
return add_nodes, remove_nodes, inits
118 changes: 118 additions & 0 deletions neural_compressor/utils/export/tf2onnx.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper functions to export model from TensorFlow to ONNX."""

import re

from neural_compressor.utils import logger
from neural_compressor.utils.utility import LazyImport

t2o = LazyImport("tf2onnx")


def _split_nodename_and_shape(name):
"""Split input name with shape into name and shape."""
# pattern for a node name
inputs = []
shapes = {}
# input takes in most cases the format name:0, where 0 is the output number
# in some cases placeholders don't have a rank which onnx can't handle so we let uses override the shape
# by appending the same, ie : [1,28,28,3]
name_pattern = r"(?:([\w\d/\-\._:]+)(\[[\-\d,]+\])?),?"
splits = re.split(name_pattern, name)
for i in range(1, len(splits), 3):
inputs.append(splits[i] + ":0")
if splits[i + 1] is not None:
shape = [int(n) for n in splits[i + 1][1:-1].split(",")]
shape = [n if n >= 0 else None for n in shape]
shapes[splits[i] + ":0"] = shape
if not shapes:
shapes = None
return inputs, shapes


def tf_to_fp32_onnx(graph_def, save_path, opset_version=14, input_names=None, output_names=None, inputs_as_nchw=None):
"""Export FP32 Tensorflow model into FP32 ONNX model using tf2onnx tool.
Args:
graph_def (graph_def to convert): fp32 graph_def.
save_path (str): save path of ONNX model.
opset_version (int, optional): opset version. Defaults to 14.
input_names (list, optional): input names. Defaults to None.
output_names (list, optional): output names. Defaults to None.
inputs_as_nchw (list, optional): transpose the input. Defaults to None.
"""
shape_override = None
if isinstance(input_names, str):
input_names, shape_override = _split_nodename_and_shape(input_names)
else:
input_names[:] = [o + ":0" for o in input_names]
output_names[:] = [o + ":0" for o in output_names]
t2o.convert.from_graph_def(
graph_def=graph_def,
input_names=input_names,
output_names=output_names,
inputs_as_nchw=inputs_as_nchw,
shape_override=shape_override,
opset=opset_version,
output_path=save_path,
)
info = "The FP32 ONNX Model exported to path: {0}".format(save_path)
logger.info("*" * len(info))
logger.info(info)
logger.info("*" * len(info))


def tf_to_int8_onnx(
int8_model, save_path, opset_version: int = 14, input_names=None, output_names=None, inputs_as_nchw=None
):
"""Export INT8 Tensorflow model into INT8 ONNX model.
Args:
int8_model (tensorflow ITEX QDQ model): int8 model.
save_path (str): save path of ONNX model.
opset_version (int, optional): opset version. Defaults to 14.
input_names (list, optional): input names. Defaults to None.
output_names (list, optional): output names. Defaults to None.
inputs_as_nchw (list, optional): transpose the input. Defaults to None.
"""
shape_override = None
if isinstance(input_names, str):
input_names, shape_override = _split_nodename_and_shape(input_names)
else:
input_names[:] = [o + ":0" for o in input_names]
output_names[:] = [o + ":0" for o in output_names]
onnx_convert_graph = "./converted_graph.onnx"
from neural_compressor.adaptor.tf_utils.tf2onnx_converter import TensorflowQDQToOnnxQDQConverter

TensorflowQDQToOnnxQDQConverter(
int8_model, input_names, output_names, shape_override, inputs_as_nchw, opset_version
).convert(onnx_convert_graph)

import onnxruntime as ort

sess_options = ort.SessionOptions()
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
sess_options.optimized_model_filepath = save_path
import onnx

model = onnx.load(onnx_convert_graph)
ort.InferenceSession(model.SerializeToString(), sess_options)
info = "The INT8 ONNX Model is exported to path: {0}".format(save_path)
logger.info("*" * len(info))
logger.info(info)
logger.info("*" * len(info))
Loading

0 comments on commit 794b276

Please sign in to comment.