Merge branch 'op2func_refactor' of https://github.com/chenwhql/Paddle …

…into op2func
PaddlePaddle · Oct 19, 2021 · 1f89db1 · 1f89db1 · paddle-bot-old · Oct 19, 2021
2 parents 05aac0d + e0710fd
commit 1f89db1
Show file tree

Hide file tree

Showing 28 changed files with 574 additions and 636 deletions.
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
@@ -390,7 +390,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer)
 cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer)
 cc_library(generator SRCS generator.cc DEPS enforce place)
 
-cc_library(tcmpt_utils SRCS tcmpt_utils.cc DEPS lod_tensor selected_rows place tcmpt)
+cc_library(tcmpt_utils SRCS tcmpt_utils.cc DEPS lod_tensor selected_rows place tcmpt var_type_traits)
 
 # Get the current working branch
 execute_process(
@@ -454,3 +454,4 @@ if(WITH_TESTING AND TEST selected_rows_test)
 endif()
 
 cc_test(scope_guard_test SRCS scope_guard_test.cc)
+cc_test(tcmpt_utils_test SRCS tcmpt_utils_test.cc DEPS tcmpt_utils)
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h
@@ -116,8 +116,6 @@ inline std::string GradOriginalVarName(const std::string& grad_var_name) {
 const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var);
 Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var);
 
-OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key);
-
 class ExecutionContext;
 class OperatorBase;
 
@@ -534,13 +532,15 @@ class OperatorWithKernel : public OperatorBase {
   }
 
   /* member functions for adapting to tcmpt lib */
-  // TODO(chenweihang): Temporarily as a class method
-  virtual pt::KernelKey ConstructPtKernelKey(
-      const VariableValueMap& inputs, const AttributeMap& attrs,
-      const platform::Place& ctx_place) const;
-
-  virtual pt::KernelContext ConstructPtKernelContext(
-      const RuntimeContext& ctx, const platform::DeviceContext& dev_ctx) const;
+  /** In the Tensor calculation library, the new Kernel adopts a clearer and
+    * more streamlined design. The arguments of the Kernel and the input and
+    * output arguments registered in the original OpMaker do not match in some
+    * cases, so we use map to record the arguments required by the kernel.
+    * When selecting Kernel during Op execution, select the arguments of the
+    * original Op according to the GetExpectedPtKernelArgs returned arguments.
+    */
+  virtual KernelSignature GetExpectedPtKernelArgs(
+      const ExecutionContext& ctx) const;
 
  private:
   void RunImpl(const Scope& scope, const platform::Place& place) const final;
@@ -563,8 +563,9 @@ class OperatorWithKernel : public OperatorBase {
                                const std::vector<std::string>& inplace_vars,
                                const Scope& exec_scope) const;
 
-  void ChooseKernel(const RuntimeContext& ctx, const Scope& scope,
-                    const platform::Place& place) const;
+  OpKernelType InnerGetExpectedKernelType(const ExecutionContext& ctx) const;
+
+  void ChooseKernel(const ExecutionContext& ctx) const;
 
   void HandleComplexGradToRealGrad(const Scope& scope,
                                    RuntimeContext* ctx) const;
@@ -582,8 +583,10 @@ class OperatorWithKernel : public OperatorBase {
                                    const std::string& name) const;
 
   /* member functions for adapting to tcmpt lib */
-  void ChoosePtKernel(const RuntimeContext& ctx,
-                      const platform::DeviceContext& dev_ctx) const;
+  void ChoosePtKernel(const ExecutionContext& ctx) const;
+
+  pt::KernelContext BuildPtKernelContext(
+      const RuntimeContext& ctx, const platform::DeviceContext& dev_ctx) const;
 
  protected:
   mutable std::unique_ptr<OpKernelType> kernel_type_;
@@ -595,10 +598,11 @@ class OperatorWithKernel : public OperatorBase {
   mutable bool all_kernels_must_compute_runtime_shape_ = false;
   mutable std::mutex cache_update_mutex_;
   mutable bool enable_cache_transfer_scope_ = false;
-  // TODO(chenweihang): Similar duplicate members are used for new tcmpt lib,
-  // maybe we have better impl methods
+  // NOTE(chenweihang): Similar op members are used to adapt to
+  // new tcmpt kernel, if there is a better design in the future,
+  // we may polish the implementation here
   mutable bool run_pt_kernel_ = false;
-  mutable std::unique_ptr<pt::KernelKey> pt_kernel_key_;
+  mutable std::unique_ptr<KernelSignature> pt_kernel_signature_;
   mutable std::unique_ptr<pt::Kernel> pt_kernel_;
 };
 

diff --git a/paddle/fluid/framework/tcmpt_utils.cc b/paddle/fluid/framework/tcmpt_utils.cc
@@ -12,11 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <sstream>
+
 #include "paddle/fluid/framework/tcmpt_utils.h"
 
 #include "paddle/fluid/framework/lod_tensor.h"
 #include "paddle/fluid/framework/selected_rows.h"
 #include "paddle/fluid/framework/variable.h"
+#include "paddle/fluid/string/string_helper.h"
 
 namespace paddle {
 namespace framework {
@@ -62,7 +65,7 @@ std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor>(
     proto::VarType::Type type) {
   return MakeTensorImpl<pt::DenseTensor, LoDTensor>(
       tensor, pt::TransToPtBackend(place), pt::TransToPtDataType(type),
-      pt::TransToPtLayout(tensor.layout()));
+      pt::TransToPtDataLayout(tensor.layout()));
 }
 
 template <>
@@ -71,21 +74,7 @@ std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor>(
     proto::VarType::Type type) {
   return MakeTensorImpl<pt::DenseTensor, Tensor>(
       tensor, pt::TransToPtBackend(place), pt::TransToPtDataType(type),
-      pt::TransToPtLayout(tensor.layout()));
-}
-
-template <>
-void ShareTensorImpl<pt::DenseTensor>(pt::DenseTensor* tensor_impl,
-                                      LoDTensor* out) {
-  out->ResetHolderWithType(tensor_impl->allocation(),
-                           pt::TransToProtoVarType(tensor_impl->type()));
-}
-
-template <>
-void ShareTensorImpl<pt::DenseTensor>(pt::DenseTensor* tensor_impl,
-                                      Tensor* out) {
-  out->ResetHolderWithType(tensor_impl->allocation(),
-                           pt::TransToProtoVarType(tensor_impl->type()));
+      pt::TransToPtDataLayout(tensor.layout()));
 }
 
 std::shared_ptr<pt::TensorInterface> InputVariableToPtTensor(
@@ -164,5 +153,115 @@ std::shared_ptr<pt::TensorInterface> OutputVariableToPtTensor(
   return nullptr;
 }
 
+OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key) {
+  proto::VarType::Type data_type = pt::TransToProtoVarType(kernel_key.dtype());
+  platform::Place place = pt::TransToFluidPlace(kernel_key.backend());
+  DataLayout data_layout = pt::TransToFluidDataLayout(kernel_key.layout());
+  LibraryType library_type = LibraryType::kPlain;
+  if (kernel_key.backend() == pt::Backend::kMKLDNN) {
+    library_type = LibraryType::kMKLDNN;
+  } else if (kernel_key.backend() == pt::Backend::kCUDNN) {
+    library_type = LibraryType::kCUDNN;
+  } else {
+    // do nothing
+  }
+  // TODO(chenweihang): the customized_type_value is lost
+  return OpKernelType(data_type, place, data_layout, library_type);
+}
+
+pt::KernelKey TransOpKernelTypeToPtKernelKey(const OpKernelType& kernel_type) {
+  pt::Backend backend = pt::TransToPtBackend(kernel_type.place_);
+  if (kernel_type.library_type_ == LibraryType::kMKLDNN) {
+    backend = pt::Backend::kMKLDNN;
+  } else if (kernel_type.library_type_ == LibraryType::kCUDNN) {
+    backend = pt::Backend::kCUDNN;
+  } else {
+    // do
+  }
+  pt::DataLayout layout = pt::TransToPtDataLayout(kernel_type.data_layout_);
+  pt::DataType dtype = pt::TransToPtDataType(kernel_type.data_type_);
+  return pt::KernelKey(backend, layout, dtype);
+}
+
+KernelSignatureMap& KernelSignatureMap::Instance() {
+  static KernelSignatureMap g_kernel_signature_map;
+  return g_kernel_signature_map;
+}
+
+const paddle::SmallVector<std::string>&
+KernelArgsNameMakerByOpProto::GetInputArgsNames() {
+  for (int i = 0; i < op_proto_->inputs_size(); ++i) {
+    auto& in = op_proto_->inputs()[i];
+    auto& in_name = in.name();
+    if ((in.has_extra() && in.extra()) || (in.has_quant() && in.quant())) {
+      VLOG(1) << "Parse PtKernel input: skip extra & quant input - " << in_name;
+      continue;
+    }
+    // If contains dispensable input, we should override the
+    // GetExpectedPtKernelArgs method self
+    if (in.has_dispensable() && in.dispensable()) {
+      VLOG(1) << "Parse PtKernel input: skip dispensable input - " << in_name;
+      continue;
+    }
+    VLOG(1) << "Parse PtKernel input: " << in_name;
+    input_names_.emplace_back(in_name);
+  }
+  return input_names_;
+}
+
+const paddle::SmallVector<std::string>&
+KernelArgsNameMakerByOpProto::GetOutputArgsNames() {
+  for (int i = 0; i < op_proto_->outputs_size(); ++i) {
+    auto& out = op_proto_->outputs()[i];
+    auto& out_name = out.name();
+    // TODO(chenweihang): outputs also need skip some cases
+    VLOG(1) << "Parse PtKernel output: " << out_name;
+    output_names_.emplace_back(out_name);
+  }
+  return output_names_;
+}
+
+const paddle::SmallVector<std::string>&
+KernelArgsNameMakerByOpProto::GetAttrsArgsNames() {
+  for (int i = 0; i < op_proto_->attrs_size(); ++i) {
+    auto& attr = op_proto_->attrs()[i];
+    auto& attr_name = attr.name();
+    if (attr_name == "use_mkldnn" || attr_name == "op_role" ||
+        attr_name == "op_role_var" || attr_name == "op_namescope" ||
+        attr_name == "op_callstack" || attr_name == "op_device") {
+      VLOG(1) << "Parse PtKernel attribute: skip needless attr - " << attr_name;
+      continue;
+    }
+    if ((attr.has_extra() && attr.extra()) ||
+        (attr.has_quant() && attr.quant())) {
+      VLOG(1) << "Parse PtKernel attribute: skip extra & quant attr - "
+              << attr_name;
+      continue;
+    }
+    VLOG(1) << "Parse PtKernel attribute: " << attr_name;
+    attr_names_.emplace_back(attr_name);
+  }
+
+  return attr_names_;
+}
+
+KernelSignature KernelArgsNameMakerByOpProto::GetKernelSignature() {
+  return std::make_pair(
+      op_proto_->type(),
+      std::make_tuple(GetInputArgsNames(), GetAttrsArgsNames(),
+                      GetOutputArgsNames()));
+}
+
+std::string KernelSignatureToString(const KernelSignature& signature) {
+  std::stringstream os;
+  os << "Kernel Signature - name: " << signature.first << "; inputs: "
+     << string::join_strings(std::get<0>(signature.second), ", ")
+     << "; attributes: "
+     << string::join_strings(std::get<1>(signature.second), ", ")
+     << "; outputs: "
+     << string::join_strings(std::get<2>(signature.second), ", ");
+  return os.str();
+}
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/tcmpt_utils.h b/paddle/fluid/framework/tcmpt_utils.h
@@ -14,14 +14,25 @@ limitations under the License. */
 
 #pragma once
 
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/op_kernel_type.h"
 #include "paddle/fluid/framework/tensor.h"
+#include "paddle/fluid/imperative/type_defs.h"
+#include "paddle/fluid/platform/macros.h"
 #include "paddle/fluid/platform/place.h"
-
 #include "paddle/tcmpt/api/include/core.h"
+#include "paddle/utils/flat_hash_map.h"
+#include "paddle/utils/small_vector.h"
 
 namespace paddle {
 namespace framework {
 
+/* tensor translate */
+
 template <typename PtTensorImplT, typename VariableT>
 std::shared_ptr<PtTensorImplT> MakeTensorImpl(const VariableT& tensor,
                                               pt::Backend backend,
@@ -38,16 +49,81 @@ std::shared_ptr<PtTensorImplT> MakeTensorImpl(const Tensor& tensor,
                                               const platform::Place& place,
                                               proto::VarType::Type type);
 
-template <typename PtTensorImplT>
-void ShareTensorImpl(PtTensorImplT* tensor_impl, LoDTensor* out);
-
-template <typename PtTensorImplT>
-void ShareTensorImpl(PtTensorImplT* tensor_impl, Tensor* out);
-
 std::shared_ptr<pt::TensorInterface> InputVariableToPtTensor(
     const framework::Variable& variable, const pt::TensorArgDef& arg_def);
 std::shared_ptr<pt::TensorInterface> OutputVariableToPtTensor(
     framework::Variable* variable, const pt::TensorArgDef& arg_def);
 
+/* Kernel Key translate */
+
+OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key);
+pt::KernelKey TransOpKernelTypeToPtKernelKey(const OpKernelType& kernel_type);
+
+/* Kernel Args parse */
+
+// TODO(chenweihang): we can generate this map by proto info in compile time
+class KernelSignatureMap {
+ public:
+  static KernelSignatureMap& Instance();
+
+  bool Has(const std::string& op_type) const {
+    return map_.find(op_type) != map_.end();
+  }
+
+  void Insert(const std::string& op_type, const KernelSignature& signature) {
+    PADDLE_ENFORCE_NE(
+        Has(op_type), true,
+        platform::errors::AlreadyExists(
+            "Operator (%s)'s Kernel Signature has been registered.", op_type));
+    map_.insert({op_type, signature});
+  }
+
+  const KernelSignature* GetNullable(const std::string& op_type) const {
+    auto it = map_.find(op_type);
+    if (it == map_.end()) {
+      return nullptr;
+    } else {
+      return &it->second;
+    }
+  }
+
+ private:
+  KernelSignatureMap() = default;
+  paddle::flat_hash_map<std::string, KernelSignature> map_;
+
+  DISABLE_COPY_AND_ASSIGN(KernelSignatureMap);
+};
+
+class KernelArgsNameMaker {
+ public:
+  virtual ~KernelArgsNameMaker() {}
+  virtual const paddle::SmallVector<std::string>& GetInputArgsNames() = 0;
+  virtual const paddle::SmallVector<std::string>& GetOutputArgsNames() = 0;
+  virtual const paddle::SmallVector<std::string>& GetAttrsArgsNames() = 0;
+};
+
+class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker {
+ public:
+  explicit KernelArgsNameMakerByOpProto(framework::proto::OpProto* op_proto)
+      : op_proto_(op_proto) {}
+
+  ~KernelArgsNameMakerByOpProto() {}
+
+  const paddle::SmallVector<std::string>& GetInputArgsNames() override;
+  const paddle::SmallVector<std::string>& GetOutputArgsNames() override;
+  const paddle::SmallVector<std::string>& GetAttrsArgsNames() override;
+
+  KernelSignature GetKernelSignature();
+
+ private:
+  framework::proto::OpProto* op_proto_;
+
+  paddle::SmallVector<std::string> input_names_;
+  paddle::SmallVector<std::string> output_names_;
+  paddle::SmallVector<std::string> attr_names_;
+};
+
+std::string KernelSignatureToString(const KernelSignature& signature);
+
 }  // namespace framework
 }  // namespace paddle