From d84799eabef33263c29297e3150b70c0cb830826 Mon Sep 17 00:00:00 2001
From: Ashwini Khade <askhade@microsoft.com>
Date: Mon, 26 Aug 2019 18:07:59 -0700
Subject: [PATCH] add implementation for dynamic quantize linear

---
 onnxruntime/core/providers/common.h           |  9 +++
 .../providers/cpu/cpu_execution_provider.cc   |  2 +
 .../cpu/tensor/dynamicquantizelinear.cc       | 75 +++++++++++++++++++
 .../cpu/tensor/dynamicquantizelinear.h        | 20 +++++
 .../providers/cpu/tensor/quantize_linear.cc   |  8 --
 onnxruntime/test/onnx/main.cc                 |  9 +--
 .../tensor/dynamic_quantize_linear_test.cc    | 51 +++++++++++++
 .../test/python/onnx_backend_test_series.py   |  3 -
 8 files changed, 160 insertions(+), 17 deletions(-)
 create mode 100644 onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.cc
 create mode 100644 onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.h
 create mode 100644 onnxruntime/test/providers/cpu/tensor/dynamic_quantize_linear_test.cc
diff --git a/onnxruntime/core/providers/common.h b/onnxruntime/core/providers/common.h
index 5bd8f69c5b1cb..c23dded2f9c40 100644
--- a/onnxruntime/core/providers/common.h
+++ b/onnxruntime/core/providers/common.h
@@ -33,4 +33,13 @@ inline bool IsScalarOr1ElementVector(const Tensor* input) {
   }
 }
 
+/**
+Clamps input between provided min and max values
+**/
+inline float clamp(float v, float lo, float hi) {
+  if (v < lo) return lo;
+  if (v > hi) return hi;
+  return v;
+}
+
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
index a700a151f82d4..1aa697fadc9cd 100644
--- a/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
+++ b/onnxruntime/core/providers/cpu/cpu_execution_provider.cc
@@ -302,6 +302,7 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 10, Re
 
 // opset 11
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Clip);
+class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, uint8_t, DynamicQuantizeLinear);
 
 void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
   static const BuildKernelCreateInfoFn function_table[] = {
@@ -589,6 +590,7 @@ void RegisterOnnxOperatorKernels(KernelRegistry& kernel_registry) {
 
       //opset 11
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, Clip)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCpuExecutionProvider, kOnnxDomain, 11, uint8_t, DynamicQuantizeLinear)>,
   };
 
   for (auto& function_table_entry : function_table) {
diff --git a/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.cc b/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.cc
new file mode 100644
index 0000000000000..dafa3a322f5e8
--- /dev/null
+++ b/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.cc
@@ -0,0 +1,75 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "dynamicquantizelinear.h"
+#include "core/providers/common.h"
+#include "core/util/math_cpuonly.h"
+#include <cmath>
+#include <cfenv>
+
+namespace onnxruntime {
+
+ONNX_CPU_OPERATOR_TYPED_KERNEL(
+    DynamicQuantizeLinear,
+    11,
+    uint8_t,
+    KernelDefBuilder()
+        .TypeConstraint("T2", DataTypeImpl::GetTensorType<uint8_t>()),
+    DynamicQuantizeLinear<uint8_t>);
+
+
+static float RoundHalfToEven(float input) {
+  std::fesetround(FE_TONEAREST);
+  auto result = std::nearbyintf(input);
+  return result;
+}
+
+// formula is Y = X / Scale + ZeroPoint
+template <typename T>
+Status DynamicQuantizeLinear<T>::Compute(OpKernelContext* ctx) const {
+  auto x_ptr = ctx->Input<Tensor>(0);
+  ORT_ENFORCE(x_ptr != nullptr);
+  auto& x = *x_ptr;
+  const auto* x_data = x.template Data<float>();
+
+  auto& y = *ctx->Output(0, x.Shape());
+  std::vector<int64_t> shape({});
+  auto& y_scale = *ctx->Output(1, shape);
+  auto& y_zeropoint = *ctx->Output(2, shape); 
+  
+  // find quantization range min and max
+  float qmax = std::numeric_limits<T>::max();
+  float qmin = std::numeric_limits<T>::min();
+  // Adjust the int8 range to -127 to 127 so that zero point can be 0
+  if (qmin == -128) {
+    qmin = -127;
+  }
+
+  // find input range min and max
+  auto min = ConstEigenVectorMap<float>(x_data, x.Shape().Size()).minCoeff();
+  min = std::min(min, qmin);
+  auto max = ConstEigenVectorMap<float>(x_data, x.Shape().Size()).maxCoeff();
+  max = std::max(max, qmin);
+
+  // find scale and zero point
+  auto scale = (max - min) / (qmax - qmin);
+  auto* output_scale = y_scale.template MutableData<float>();
+  *output_scale = scale;
+
+  const auto initial_zero_point = qmin - min / scale;
+  auto zero_point = static_cast<T>(RoundHalfToEven(std::max(qmin, std::min(qmax, initial_zero_point))));
+  auto* output_zp = y_zeropoint.template MutableData<T>();
+  *output_zp = zero_point;
+
+  // quantize the data
+  auto* output = y.template MutableData<T>();
+  const auto num_of_elements = x.Shape().Size();
+
+  for (int i = 0; i < num_of_elements; ++i) {
+    output[i] = static_cast<T>(clamp(RoundHalfToEven(static_cast<float>(x_data[i] / scale)) + zero_point, qmin, qmax));
+  }
+
+  return Status::OK();
+}
+
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.h b/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.h
new file mode 100644
index 0000000000000..fa15cc9126cb6
--- /dev/null
+++ b/onnxruntime/core/providers/cpu/tensor/dynamicquantizelinear.h
@@ -0,0 +1,20 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/common/common.h"
+#include "core/framework/op_kernel.h"
+
+namespace onnxruntime {
+
+template <typename T>
+class DynamicQuantizeLinear final : public OpKernel {
+ public:
+  DynamicQuantizeLinear(const OpKernelInfo& info) : OpKernel(info) {
+  }
+
+  Status Compute(OpKernelContext* context) const override;
+
+};
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc b/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc
index 49006cc3725cd..e345ad4da3cd8 100644
--- a/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc
+++ b/onnxruntime/core/providers/cpu/tensor/quantize_linear.cc
@@ -80,14 +80,6 @@ ONNX_CPU_OPERATOR_TYPED_KERNEL(
         .TypeConstraint("y", DataTypeImpl::GetTensorType<int8_t>()),
     QuantizeLinear<int8_t>);
 
-// clamp doesn't exist in the version of <algorithm> that we're using, so
-// make a local one.
-static float clamp(float v, float lo, float hi) {
-  if (v < lo) return lo;
-  if (v > hi) return hi;
-  return v;
-}
-
 static float RoundHalfToEven(float input) {
   std::fesetround(FE_TONEAREST);
   auto result = std::nearbyintf(input);
diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc
index db35cef4e912c..37242cf2bb62b 100644
--- a/onnxruntime/test/onnx/main.cc
+++ b/onnxruntime/test/onnx/main.cc
@@ -380,12 +380,9 @@ int real_main(int argc, char* argv[], Ort::Env& env) {
       {"maxpool_with_argmax_2d_precomputed_strides", "ShapeInferenceError"},
       {"tf_inception_v2", "result mismatch"},
       {"mxnet_arcface", "result mismatch"},
-      {"dynamicquantizelinear", "not implemented yet"},
-      {"dynamicquantizelinear_expanded", "not implemented yet"},
-      {"dynamicquantizelinear_max_adjusted", "not implemented yet"},
-      {"dynamicquantizelinear_max_adjusted_expanded", "not implemented yet"},
-      {"dynamicquantizelinear_min_adjusted", "not implemented yet"},
-      {"dynamicquantizelinear_min_adjusted_expanded", "not implemented yet"},
+      {"dynamicquantizelinear_expanded", "Round(11) not implemented yet"},
+      {"dynamicquantizelinear_max_adjusted_expanded", "Round(11) not implemented yet"},
+      {"dynamicquantizelinear_min_adjusted_expanded", "Round(11) not implemented yet"},
       {"top_k", "not implemented yet for opset 11", {"onnxtip"}},
       {"top_k_smallest", "not implemented yet for opset 11", {"onnxtip"}},
       {"unique_not_sorted_without_axis", "not implemented yet"},
diff --git a/onnxruntime/test/providers/cpu/tensor/dynamic_quantize_linear_test.cc b/onnxruntime/test/providers/cpu/tensor/dynamic_quantize_linear_test.cc
new file mode 100644
index 0000000000000..0d6d9bb5dae1d
--- /dev/null
+++ b/onnxruntime/test/providers/cpu/tensor/dynamic_quantize_linear_test.cc
@@ -0,0 +1,51 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "gtest/gtest.h"
+#include "test/providers/provider_test_utils.h"
+
+namespace onnxruntime {
+namespace test {
+
+// range = [-ve, +ve]
+TEST(QuantizeLinearOpTest, DynamicQuantizeLinear) {
+  OpTester test("DynamicQuantizeLinear", 11);
+  std::vector<int64_t> dims{6};
+  test.AddInput<float>("x", dims, {0, 2, -3, -2.5f, 1.34f, 0.5f});  
+  test.AddOutput<uint8_t>("y", dims, {153, 255, 0, 26, 221, 179});
+  test.AddOutput<float>("y_scale", {}, {0.0196078438f});
+  test.AddOutput<uint8_t>("y_zero_point", {}, {153});  
+  test.Run();
+}
+
+// quantize with 2D data with min adjustment to include 0 in the input range.
+TEST(QuantizeLinearOpTest, DynamicQuantizeLinear_Min_Adjusted) {
+  OpTester test("DynamicQuantizeLinear", 11);
+  std::vector<int64_t> dims{3, 4};
+  test.AddInput<float>("x", dims,
+                       {1, 2.1f, 1.3f, 2.5f,
+                        3.34f, 4.0f, 1.5f, 2.6f,
+                        3.9f, 4.0f, 3.0f, 2.345f});
+
+  test.AddOutput<uint8_t>("y", dims,
+                          {64, 134, 83, 159,
+                           213, 255, 96, 166,
+                           249, 255, 191, 149});
+  test.AddOutput<float>("y_scale", {}, {0.01568628f});
+  test.AddOutput<uint8_t>("y_zero_point", {}, {0});
+  test.Run();
+}
+
+// quantize max adjustment to include 0 in the input range.
+TEST(QuantizeLinearOpTest, DynamicQuantizeLinear_Max_Adjusted) {
+  OpTester test("DynamicQuantizeLinear", 11);
+  std::vector<int64_t> dims{6};
+  test.AddInput<float>("x", dims, {-1.0f, -2.1f, -1.3f, -2.5f, -3.34f, -4.0f});
+  test.AddOutput<uint8_t>("y", dims, {191, 121, 172, 96, 42, 0});
+  test.AddOutput<float>("y_scale", {}, {0.01568628f});
+  test.AddOutput<uint8_t>("y_zero_point", {}, {255});
+  test.Run();
+}
+
+}  // namespace test
+}  // namespace onnxruntime
diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py
index a2f6011cced1b..a57270174cb8e 100644
--- a/onnxruntime/test/python/onnx_backend_test_series.py
+++ b/onnxruntime/test/python/onnx_backend_test_series.py
@@ -110,11 +110,8 @@ def create_backend_test(testname=None):
                                  '^test_cumsum_1d_reverse_exclusive_cpu.*',
                                  '^test_cumsum_2d_axis_0_cpu.*',
                                  '^test_cumsum_2d_axis_1_cpu.*',
-                                 '^test_dynamicquantizelinear*',
                                  '^test_dynamicquantizelinear_expanded*',
-                                 '^test_dynamicquantizelinear_max_adjusted*',
                                  '^test_dynamicquantizelinear_max_adjusted_expanded*',
-                                 '^test_dynamicquantizelinear_min_adjusted*',
                                  '^test_dynamicquantizelinear_min_adjusted_expanded*',
                                  '^test_depthtospace*',
                                  '^test_gather_elements*',