From d84799eabef33263c29297e3150b70c0cb830826 Mon Sep 17 00:00:00 2001 From: Ashwini Khade Date: Mon, 26 Aug 2019 18:07:59 -0700 Subject: [PATCH] add implementation for dynamic quantize linear --- onnxruntime/core/providers/common.h | 9 +++ .../providers/cpu/cpu_execution_provider.cc | 2 + .../cpu/tensor/dynamicquantizelinear.cc | 75 +++++++++++++++++++ .../cpu/tensor/dynamicquantizelinear.h | 20 +++++ .../providers/cpu/tensor/quantize_linear.cc | 8 -- onnxruntime/test/onnx/main.cc | 9 +-- .../tensor/dynamic_quantize_linear_test.cc | 51 +++++++++++++ .../test/python/onnx_backend_test_series.py | 3 - 8 files changed, 160 insertions(+), 17 deletions(-) create mode 100644 onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.cc create mode 100644 onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.h create mode 100644 onnxruntime/test/providers/cpu/tensor/dynamic_quantize_linear_test.cc diff --git a/onnxruntime/core/providers/common.h b/onnxruntime/core/providers/common.h index 5bd8f69c5b1cb..c23dded2f9c40 100644 --- a/onnxruntime/core/providers/common.h +++ b/onnxruntime/core/providers/common.h @@ -33,4 +33,13 @@ inline bool IsScalarOr1ElementVector(const Tensor* input) { } } +/** +Clamps input between provided min and max values +**/ +inline float clamp(float v, float lo, float hi) { + if (v < lo) return lo; + if (v > hi) return hi; + return v; +} + } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc index a700a151f82d4..1aa697fadc9cd 100644 --- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc +++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc @@ -302,6 +302,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, Re // opset 11 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Clip); +class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, uint8_t, DynamicQuantizeLinear); void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { static const BuildKernelCreateInfoFn function_table[] = { @@ -589,6 +590,7 @@ void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) { //opset 11 BuildKernelCreateInfo, + BuildKernelCreateInfo, }; for (auto& function_table_entry : function_table) { diff --git a/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.cc b/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.cc new file mode 100644 index 0000000000000..dafa3a322f5e8 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.cc @@ -0,0 +1,75 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "dynamicquantizelinear.h" +#include "core/providers/common.h" +#include "core/util/math_cpuonly.h" +#include +#include + +namespace onnxruntime { + +ONNX_CPU_OPERATOR_TYPED_KERNEL( + DynamicQuantizeLinear, + 11, + uint8_t, + KernelDefBuilder() + .TypeConstraint("T2", DataTypeImpl::GetTensorType()), + DynamicQuantizeLinear); + + +static float RoundHalfToEven(float input) { + std::fesetround(FE_TONEAREST); + auto result = std::nearbyintf(input); + return result; +} + +// formula is Y = X / Scale + ZeroPoint +template +Status DynamicQuantizeLinear::Compute(OpKernelContext* ctx) const { + auto x_ptr = ctx->Input(0); + ORT_ENFORCE(x_ptr != nullptr); + auto& x = *x_ptr; + const auto* x_data = x.template Data(); + + auto& y = *ctx->Output(0, x.Shape()); + std::vector shape({}); + auto& y_scale = *ctx->Output(1, shape); + auto& y_zeropoint = *ctx->Output(2, shape); + + // find quantization range min and max + float qmax = std::numeric_limits::max(); + float qmin = std::numeric_limits::min(); + // Adjust the int8 range to -127 to 127 so that zero point can be 0 + if (qmin == -128) { + qmin = -127; + } + + // find input range min and max + auto min = ConstEigenVectorMap(x_data, x.Shape().Size()).minCoeff(); + min = std::min(min, qmin); + auto max = ConstEigenVectorMap(x_data, x.Shape().Size()).maxCoeff(); + max = std::max(max, qmin); + + // find scale and zero point + auto scale = (max - min) / (qmax - qmin); + auto* output_scale = y_scale.template MutableData(); + *output_scale = scale; + + const auto initial_zero_point = qmin - min / scale; + auto zero_point = static_cast(RoundHalfToEven(std::max(qmin, std::min(qmax, initial_zero_point)))); + auto* output_zp = y_zeropoint.template MutableData(); + *output_zp = zero_point; + + // quantize the data + auto* output = y.template MutableData(); + const auto num_of_elements = x.Shape().Size(); + + for (int i = 0; i < num_of_elements; ++i) { + output[i] = static_cast(clamp(RoundHalfToEven(static_cast(x_data[i] / scale)) + zero_point, qmin, qmax)); + } + + return Status::OK(); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.h b/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.h new file mode 100644 index 0000000000000..fa15cc9126cb6 --- /dev/null +++ b/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.h @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/common/common.h" +#include "core/framework/op_kernel.h" + +namespace onnxruntime { + +template +class DynamicQuantizeLinear final : public OpKernel { + public: + DynamicQuantizeLinear(const OpKernelInfo& info) : OpKernel(info) { + } + + Status Compute(OpKernelContext* context) const override; + +}; +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc b/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc index 49006cc3725cd..e345ad4da3cd8 100644 --- a/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc +++ b/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc @@ -80,14 +80,6 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL( .TypeConstraint("y", DataTypeImpl::GetTensorType()), QuantizeLinear); -// clamp doesn't exist in the version of that we're using, so -// make a local one. -static float clamp(float v, float lo, float hi) { - if (v < lo) return lo; - if (v > hi) return hi; - return v; -} - static float RoundHalfToEven(float input) { std::fesetround(FE_TONEAREST); auto result = std::nearbyintf(input); diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index db35cef4e912c..37242cf2bb62b 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -380,12 +380,9 @@ int real_main(int argc, char* argv[], Ort::Env& env) { {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"}, {"tf_inception_v2", "result mismatch"}, {"mxnet_arcface", "result mismatch"}, - {"dynamicquantizelinear", "not implemented yet"}, - {"dynamicquantizelinear_expanded", "not implemented yet"}, - {"dynamicquantizelinear_max_adjusted", "not implemented yet"}, - {"dynamicquantizelinear_max_adjusted_expanded", "not implemented yet"}, - {"dynamicquantizelinear_min_adjusted", "not implemented yet"}, - {"dynamicquantizelinear_min_adjusted_expanded", "not implemented yet"}, + {"dynamicquantizelinear_expanded", "Round(11) not implemented yet"}, + {"dynamicquantizelinear_max_adjusted_expanded", "Round(11) not implemented yet"}, + {"dynamicquantizelinear_min_adjusted_expanded", "Round(11) not implemented yet"}, {"top_k", "not implemented yet for opset 11", {"onnxtip"}}, {"top_k_smallest", "not implemented yet for opset 11", {"onnxtip"}}, {"unique_not_sorted_without_axis", "not implemented yet"}, diff --git a/onnxruntime/test/providers/cpu/tensor/dynamic_quantize_linear_test.cc b/onnxruntime/test/providers/cpu/tensor/dynamic_quantize_linear_test.cc new file mode 100644 index 0000000000000..0d6d9bb5dae1d --- /dev/null +++ b/onnxruntime/test/providers/cpu/tensor/dynamic_quantize_linear_test.cc @@ -0,0 +1,51 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "gtest/gtest.h" +#include "test/providers/provider_test_utils.h" + +namespace onnxruntime { +namespace test { + +// range = [-ve, +ve] +TEST(QuantizeLinearOpTest, DynamicQuantizeLinear) { + OpTester test("DynamicQuantizeLinear", 11); + std::vector dims{6}; + test.AddInput("x", dims, {0, 2, -3, -2.5f, 1.34f, 0.5f}); + test.AddOutput("y", dims, {153, 255, 0, 26, 221, 179}); + test.AddOutput("y_scale", {}, {0.0196078438f}); + test.AddOutput("y_zero_point", {}, {153}); + test.Run(); +} + +// quantize with 2D data with min adjustment to include 0 in the input range. +TEST(QuantizeLinearOpTest, DynamicQuantizeLinear_Min_Adjusted) { + OpTester test("DynamicQuantizeLinear", 11); + std::vector dims{3, 4}; + test.AddInput("x", dims, + {1, 2.1f, 1.3f, 2.5f, + 3.34f, 4.0f, 1.5f, 2.6f, + 3.9f, 4.0f, 3.0f, 2.345f}); + + test.AddOutput("y", dims, + {64, 134, 83, 159, + 213, 255, 96, 166, + 249, 255, 191, 149}); + test.AddOutput("y_scale", {}, {0.01568628f}); + test.AddOutput("y_zero_point", {}, {0}); + test.Run(); +} + +// quantize max adjustment to include 0 in the input range. +TEST(QuantizeLinearOpTest, DynamicQuantizeLinear_Max_Adjusted) { + OpTester test("DynamicQuantizeLinear", 11); + std::vector dims{6}; + test.AddInput("x", dims, {-1.0f, -2.1f, -1.3f, -2.5f, -3.34f, -4.0f}); + test.AddOutput("y", dims, {191, 121, 172, 96, 42, 0}); + test.AddOutput("y_scale", {}, {0.01568628f}); + test.AddOutput("y_zero_point", {}, {255}); + test.Run(); +} + +} // namespace test +} // namespace onnxruntime diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py index a2f6011cced1b..a57270174cb8e 100644 --- a/onnxruntime/test/python/onnx_backend_test_series.py +++ b/onnxruntime/test/python/onnx_backend_test_series.py @@ -110,11 +110,8 @@ def create_backend_test(testname=None): '^test_cumsum_1d_reverse_exclusive_cpu.*', '^test_cumsum_2d_axis_0_cpu.*', '^test_cumsum_2d_axis_1_cpu.*', - '^test_dynamicquantizelinear*', '^test_dynamicquantizelinear_expanded*', - '^test_dynamicquantizelinear_max_adjusted*', '^test_dynamicquantizelinear_max_adjusted_expanded*', - '^test_dynamicquantizelinear_min_adjusted*', '^test_dynamicquantizelinear_min_adjusted_expanded*', '^test_depthtospace*', '^test_gather_elements*',