Address comments

* Refactor use of weight transpose to common file between contrib codegens. * Make function areguments more explicit. * Update network hashes. Change-Id: Ib53bc7d2837b62908b92fd09062cbe9a8bb4ab30
apache · Oct 5, 2022 · 53b3383 · 53b3383
1 parent 1b3d315
commit 53b3383
Show file tree

Hide file tree

Showing 9 changed files with 123 additions and 69 deletions.
diff --git a/cmake/modules/contrib/CMSISNN.cmake b/cmake/modules/contrib/CMSISNN.cmake
@@ -18,6 +18,8 @@
 if(USE_CMSISNN)
   add_definitions(-DTVM_USE_CMSISNN)
   message(STATUS "Build with CMSIS-NN support")
-  tvm_file_glob(GLOB RELAY_CONTRIB_CMSISNN_SRCS src/relay/backend/contrib/cmsisnn/*.cc)
+  tvm_file_glob(GLOB RELAY_CONTRIB_CMSISNN_SRCS
+    src/relay/backend/contrib/cmsisnn/*.cc
+    src/relay/backend/contrib/constant_transforms.cc)
   list(APPEND COMPILER_SRCS ${RELAY_CONTRIB_CMSISNN_SRCS})
 endif(USE_CMSISNN)
diff --git a/cmake/modules/contrib/EthosN.cmake b/cmake/modules/contrib/EthosN.cmake
@@ -35,7 +35,8 @@ if(NOT USE_ETHOSN STREQUAL "OFF")
     list(APPEND RUNTIME_SRCS ${ETHOSN_RUNTIME_CONTRIB_SRC})
 
     tvm_file_glob(GLOB COMPILER_ETHOSN_SRCS
-                  src/relay/backend/contrib/ethosn/*)
+                  src/relay/backend/contrib/ethosn/*
+                  src/relay/backend/contrib/constant_transforms.cc)
     list(APPEND COMPILER_SRCS ${COMPILER_ETHOSN_SRCS})
 
     list(APPEND TVM_LINKER_LIBS ${ETHOSN_COMPILER_LIBRARY}

diff --git a/src/relay/backend/contrib/cmsisnn/generate_constants.cc b/src/relay/backend/contrib/cmsisnn/generate_constants.cc
@@ -31,6 +31,7 @@
 #include "../../../op/make_op.h"
 #include "../../../qnn/utils.h"
 #include "../../../transforms/pattern_utils.h"
+#include "../constant_transforms.h"
 #include "convolutions.h"
 
 namespace tvm {
@@ -49,39 +50,6 @@ class GenerateConstantsMutator : public MixedModeMutator {
   explicit GenerateConstantsMutator(const IRModule& mod) : mod_(mod) {}
 
  private:
-  /*!  * \brief Converts Kernel layout from HWIO to OHWI to align to CMSIS-NN requirements */
-  Expr ConvertKernelLayout(Expr kernel_expr, const Conv2DAttrs* conv2d_attrs, Attrs* new_attrs) {
-    auto attrs = make_object<Conv2DAttrs>();
-    attrs->strides = std::move(conv2d_attrs->strides);
-    attrs->padding = std::move(conv2d_attrs->padding);
-    attrs->dilation = std::move(conv2d_attrs->dilation);
-    attrs->groups = conv2d_attrs->groups;
-    attrs->channels = std::move(conv2d_attrs->channels);
-    attrs->kernel_size = std::move(conv2d_attrs->kernel_size);
-    attrs->data_layout = std::move(conv2d_attrs->data_layout);
-    attrs->kernel_layout = runtime::String("OHWI");
-    attrs->out_layout = std::move(conv2d_attrs->out_layout);
-    attrs->out_dtype = std::move(conv2d_attrs->out_dtype);
-    *new_attrs = tvm::Attrs{attrs};
-
-    std::string kernel_layout = conv2d_attrs->kernel_layout.c_str();
-    int pos_o = kernel_layout.find("O");
-    int pos_h = kernel_layout.find("H");
-    int pos_w = kernel_layout.find("W");
-    int pos_i = kernel_layout.find("I");
-
-    IRModule kernel_module;
-    auto func_body = MakeTranspose(
-        kernel_expr, {Integer(pos_o), Integer(pos_h), Integer(pos_w), Integer(pos_i)});
-    auto kernel_func =
-        Function(FreeVars(func_body), func_body, Type(), FreeTypeVars(func_body, kernel_module));
-    GlobalVar kernel_var("main");
-    kernel_module->Add(kernel_var, kernel_func);
-    kernel_module = relay::transform::FoldConstant()(kernel_module);
-    kernel_func = Downcast<Function>(kernel_module->Lookup("main"));
-    return kernel_func->body;
-  }
-
   /*!  * \brief Performs weight transpose and substitutes existing constants in the composite
    *            function for Conv2D with CMSIS-NN Requantize constants */
   Expr GenerateConv2dRequantConstants(const Expr& expr) {
@@ -108,13 +76,13 @@ class GenerateConstantsMutator : public MixedModeMutator {
 
     auto* conv2d_attrs = conv2d_call->attrs.as<Conv2DAttrs>();
     tvm::Attrs new_conv2d_attrs = conv2d_call->attrs;
-    Expr conv2d_kernel = conv2d_call->args[1];
+    Constant conv2d_kernel = Downcast<Constant>(conv2d_call->args[1]);
 
     Array<PrimExpr> input_shape = conv2d_call->args[0]->type_as<TensorTypeNode>()->shape;
     Array<PrimExpr> kernel_shape = conv2d_call->args[1]->type_as<TensorTypeNode>()->shape;
     if (!IsCMSISNNDepthwise(conv2d_attrs, input_shape, kernel_shape)) {
       // Transpose weights: HWIO -> OHWI for Conv2D
-      conv2d_kernel = ConvertKernelLayout(conv2d_call->args[1], conv2d_attrs, &new_conv2d_attrs);
+      conv2d_kernel = TransposeWeights(conv2d_kernel, conv2d_attrs->kernel_layout, "OHWI");
     }
 
     // Obtain input and output scales from Relay's Requantization

diff --git a/src/relay/backend/contrib/constant_transforms.cc b/src/relay/backend/contrib/constant_transforms.cc
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "constant_transforms.h"
+
+#include "../../transforms/pattern_utils.h"
+#include "../../transforms/simplify_expr.h"
+
+/*!
+ * \file src/relay/backend/contrib/constant_transforms.cc
+ * \brief Transforms applied to constant operations during codegen for BYOC backends.
+ */
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+
+Expr FoldConstantExpr(const Expr& expr, bool fold_qnn) {
+  auto mod = IRModule::FromExpr(expr);
+  mod = transform::FoldConstant(fold_qnn)(mod);
+  auto entry_func = Downcast<Function>(mod->Lookup("main"));
+  return expr.as<FunctionNode>() == nullptr ? entry_func->body : entry_func;
+}
+
+Constant TransposeWeights(const Constant& data, const std::string& source_layout,
+                          const std::string& target_layout) {
+  Array<Integer> transpose_matrix;
+  for (const char& c : target_layout) {
+    int pos = source_layout.find(c);
+    transpose_matrix.push_back(pos);
+  }
+  Expr transpose = MakeTranspose(data, transpose_matrix);
+  transpose = InferType(FoldConstantExpr(transpose));
+  Constant transposed_data = Downcast<Constant>(transpose);
+  return transposed_data;
+}
+
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/contrib/constant_transforms.h b/src/relay/backend/contrib/constant_transforms.h
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/utils.h
+ * \brief Transforms applied to constant operations during codegen for BYOC backends.
+ */
+
+#include <tvm/relay/expr.h>
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+
+/*!
+ * \brief Apply constant folding on an expression.
+ *
+ * \param expr The expression to fold.
+ * \param fold_qnn Whether to fold constants for QNN operations.
+ * \returns The new folded expression.
+ */
+Expr FoldConstantExpr(const Expr& expr, bool fold_qnn = true);
+
+/*!
+ *\brief Transpose weights from `source_layout` to `target_layout`
+ *
+ * \param data The constant expression to transpose.
+ * \param source_layout The current layout of the constant e.g. "OHWI".
+ * \param target_layout The target layout of the constant e.g. "HWIO".
+ */
+Constant TransposeWeights(const Constant& data, const std::string& source_layout,
+                          const std::string& target_layout);
+
+}  // namespace contrib
+}  // namespace relay
+}  // namespace tvm
diff --git a/src/relay/backend/contrib/ethosn/convert_equivalent.cc b/src/relay/backend/contrib/ethosn/convert_equivalent.cc
@@ -32,6 +32,7 @@
 #include "../../../qnn/utils.h"
 #include "../../../transforms/pattern_utils.h"
 #include "../../../transforms/simplify_expr.h"
+#include "../constant_transforms.h"
 #include "ethosn_api.h"
 
 namespace tvm {

diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.cc b/src/relay/backend/contrib/ethosn/ethosn_api.cc
@@ -41,6 +41,7 @@
 #include "../../../op/make_op.h"
 #include "../../../transforms/pattern_utils.h"
 #include "../../../transforms/simplify_expr.h"
+#include "../constant_transforms.h"
 #include "ethosn_support_library/Support.hpp"
 #include "ethosn_support_library/SupportQueries.hpp"
 #include "tvm/relay/qnn/attrs.h"
@@ -171,19 +172,6 @@ EthosnError EthosnAPI::QnnConv2d(const Expr& expr, ConvolutionParams* params) {
   return err;
 }
 
-Constant TransposeWeights(const Constant& data, const std::string& input_layout,
-                          const std::string& target_layout) {
-  Array<Integer> transpose_matrix;
-  for (const char& c : target_layout) {
-    int pos = input_layout.find(c);
-    transpose_matrix.push_back(pos);
-  }
-  Expr transpose = MakeTranspose(data, transpose_matrix);
-  transpose = InferType(FoldConstantExpr(transpose));
-  Constant transposed_data = Downcast<Constant>(transpose);
-  return transposed_data;
-}
-
 EthosnError EthosnAPI::QnnFullyConnected(const Expr& expr, FullyConnectedParams* params) {
   Call requantize = Downcast<Call>(expr);
   Call bias_add = Downcast<Call>(requantize->args[0]);
@@ -211,7 +199,9 @@ EthosnError EthosnAPI::QnnFullyConnected(const Expr& expr, FullyConnectedParams*
   err += Tvm2Npu(input_zero_point, input_scale, &data_q_info);
   err += Tvm2Npu(kernel_zero_point, kernel_scale, &weights_q_info);
   std::valarray<float> bias = data_q_info.GetScale() * weights_q_info.GetScales();
-  err += Tvm2Npu(0, bias, 3, &bias_q_info);
+  const int bias_zero_point = 0;
+  const unsigned int bias_axis = 3;
+  err += Tvm2Npu(bias_zero_point, bias, bias_axis, &bias_q_info);
   err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);
 
   // Create fc info
@@ -1081,13 +1071,6 @@ EthosnError EthosnAPI::AsConstant(const Expr& expr, T* out) {
   return EthosnError();
 }
 
-Expr FoldConstantExpr(const Expr& expr, bool fold_qnn) {
-  auto mod = IRModule::FromExpr(expr);
-  mod = transform::FoldConstant(fold_qnn)(mod);
-  auto entry_func = Downcast<Function>(mod->Lookup("main"));
-  return expr.as<FunctionNode>() == nullptr ? entry_func->body : entry_func;
-}
-
 }  // namespace ethosn
 }  // namespace contrib
 }  // namespace relay

diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.h b/src/relay/backend/contrib/ethosn/ethosn_api.h
@@ -324,15 +324,6 @@ class EthosnAPI {
   static EthosnError AsConstant(const Expr& expr, std::valarray<float>* out);
 };
 
-/*!
- * \brief Apply constant folding on an expression.
- *
- * \param expr The expression to fold.
- * \param fold_qnn Whether to fold constants for QNN operations.
- * \returns The new folded expression.
- */
-Expr FoldConstantExpr(const Expr& expr, bool fold_qnn = true);
-
 }  // namespace ethosn
 }  // namespace contrib
 }  // namespace relay

diff --git a/tests/python/contrib/test_ethosn/test_networks.py b/tests/python/contrib/test_ethosn/test_networks.py
@@ -145,7 +145,7 @@ def test_resnet_50_int8():
     # version or a change in the Ethos-N codegen. To update this requires running
     # on hardware that isn't available in CI.
     _compile_hash = {
-        "6b130a99397715156d5fb833809a92d2",
+        "f16dc9caa8e696bc5da8a5c6a644eb72",
         "6e5fcbab831607b9da1039aff4e56871",
         "41acecca37b2735bd580f6ec38d8c2e0",
     }
@@ -190,7 +190,7 @@ def test_inception_v4():
     # codegen, which could come about from either a change in Support Library
     # version or a change in the Ethos-N codegen. To update this requires running
     # on hardware that isn't available in CI.
-    _compile_hash = {"2eeae331898f8e94c74868e190077837"}
+    _compile_hash = {"c00c119506b34c8e87f81aa009b42431"}
     _test_image_network(
         model_url="https://storage.googleapis.com/download.tensorflow.org/"
         "models/inception_v4_299_quant_20181026.tgz",