PaddlePaddle · Charles-hit · Aug 8, 2023 · Jul 20, 2023 · Jul 24, 2023 · Jul 24, 2023
diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt
@@ -10,5 +10,6 @@ add_subdirectory(prim)
 add_subdirectory(jit)
 add_subdirectory(ir)
 add_subdirectory(ir_adaptor)
+add_subdirectory(primitive)
 # NOTE: please add subdirectory inference at last.
 add_subdirectory(inference)
diff --git a/paddle/fluid/framework/type_info.cc b/paddle/fluid/framework/type_info.cc
@@ -17,6 +17,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/raw_tensor.h"
 #include "paddle/fluid/framework/string_array.h"
 #include "paddle/fluid/prim/utils/static/desc_tensor.h"
+#include "paddle/fluid/primitive/type/desc_tensor.h"
 
 namespace phi {
 
@@ -40,6 +41,8 @@ template class TypeInfoTraits<phi::TensorBase, paddle::framework::Strings>;
 template class TypeInfoTraits<phi::TensorBase, paddle::framework::FeedList>;
 template class TypeInfoTraits<phi::TensorBase, egr::VariableCompatTensor>;
 template class TypeInfoTraits<phi::TensorBase, paddle::prim::DescTensor>;
+template class TypeInfoTraits<phi::TensorBase,
+                              paddle::primitive::experimental::DescTensor>;
 template class TypeInfoTraits<phi::TensorBase,
                               paddle::framework::VariableRefArray>;
 

diff --git a/paddle/fluid/ir/dialect/CMakeLists.txt b/paddle/fluid/ir/dialect/CMakeLists.txt
@@ -52,5 +52,12 @@ file(GLOB PD_DIALECT_SRCS "*.cc")
 cc_library(
   pd_dialect
   SRCS ${PD_DIALECT_SRCS} ${op_source_file}
-  DEPS framework_proto phi phi_utils pd_interface pd_trait ir)
+  DEPS framework_proto
+       phi
+       phi_utils
+       pd_interface
+       pd_trait
+       ir
+       primitive_vjp_experimental
+       type_info)
 target_include_directories(pd_dialect PRIVATE ${PD_DIALECT_BINARY_DIR})
diff --git a/paddle/fluid/ir/dialect/op_generator/op_gen.py b/paddle/fluid/ir/dialect/op_generator/op_gen.py
@@ -17,7 +17,11 @@
 
 import yaml
 from op_build_gen import gen_build_func_str
-from op_interface_gen import gen_exclusive_interface_str, gen_op_infer_meta_str
+from op_interface_gen import (
+    gen_exclusive_interface_str,
+    gen_op_infer_meta_str,
+    vjp_interface_gen_op_list,
+)
 from op_member_func_gen import gen_op_get_inputs_outputs_str
 from op_verify_gen import gen_verify_func_str
 
@@ -43,6 +47,7 @@
 #include "paddle/fluid/ir/dialect/op_yaml_info_util.h"
 #include "paddle/fluid/ir/interface/op_yaml_info.h"
 #include "paddle/fluid/ir/interface/infermeta.h"
+#include "paddle/fluid/ir/interface/vjp.h"
 #include "paddle/fluid/ir/trait/inplace.h"
 #include "paddle/fluid/framework/infershape_utils.h"
 #include "paddle/phi/core/infermeta_utils.h"
@@ -303,6 +308,9 @@ def __init__(self, op_yaml_item, op_compat_item):
         else:
             self.infer_meta_func = None
 
+        # parse backward name
+        self.backward_name = self.parse_backward_name()
+
         # parse inplace && view
         self.inplace_map = self.parse_op_inplace_info()
         self.view_map = self.parse_op_view_info()
@@ -612,6 +620,12 @@ def parse_kernel_map(self):
         else:
             return None
 
+    def parse_backward_name(self):
+        if 'backward' in self.op_yaml_item:
+            return self.op_yaml_item['backward']
+        else:
+            return None
+
     def get_phi_dtype_name(self, name):
         name = name.replace('Scalar', 'phi::Scalar')
         name = name.replace('IntArray', 'phi::IntArray')
@@ -720,6 +734,11 @@ def OpGenerator(
         if op_info.infer_meta_func:
             op_interfaces += ["InferMetaInterface"]
 
+        if (
+            op_info.backward_name
+            and op_info.op_phi_name[0] in vjp_interface_gen_op_list
+        ):
+            op_interfaces += ["VjpInterface"]
         exclusive_interface_str = gen_exclusive_interface_str(op_info)
 
         # If op has inplace info, we will generate inplace op and non-inplace op.

diff --git a/paddle/fluid/ir/dialect/op_generator/op_interface_gen.py b/paddle/fluid/ir/dialect/op_generator/op_interface_gen.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 # generator interfaces
+from vjp_interface_gen_op_list import vjp_interface_gen_op_list
 
 OP_INFER_SHAPE_TEMPLATE = """
 void {op_name}::InferMeta( phi::InferMetaContext *infer_meta ) {{
@@ -38,4 +39,6 @@ def gen_exclusive_interface_str(op_info):
         exclusive_interface_str += (
             "  static void InferMeta( phi::InferMetaContext *infer_meta );"
         )
+    if op_info.op_phi_name[0] in vjp_interface_gen_op_list:
+        exclusive_interface_str += "\n  static std::vector<std::vector<ir::OpResult>> Vjp(ir::Operation* op, const std::vector<std::vector<ir::OpResult>>& out_grads, const std::vector<std::vector<int>>& stop_gradients);"
     return exclusive_interface_str
diff --git a/paddle/fluid/ir/dialect/op_generator/vjp_interface_gen_op_list.py b/paddle/fluid/ir/dialect/op_generator/vjp_interface_gen_op_list.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# =====================================
+# VjpInterface gen op list
+# =====================================
+# we don't support vjp function code
+# gen now, so we use a whitelist to
+# control the generation of Vjp methods.
+# TODO(wanghao107)
+# remove this file and support Vjp methods
+# code gen.
+vjp_interface_gen_op_list = ["tanh", "mean"]
diff --git a/paddle/fluid/ir/dialect/pd_api.cc b/paddle/fluid/ir/dialect/pd_api.cc
@@ -53,5 +53,23 @@ ir::OpResult full(std::vector<int64_t> shape,
   return full_op.out();
 }
 
+ir::OpResult tanh_grad(ir::OpResult out, ir::OpResult grad_out) {
+  paddle::dialect::TanhGradOp tanh_grad_op =
+      APIBuilder::Instance().GetBuilder()->Build<paddle::dialect::TanhGradOp>(
+          out, grad_out);
+  return tanh_grad_op.result(0);
+}
+
+ir::OpResult mean_grad(ir::OpResult x,
+                       ir::OpResult out_grad,
+                       std::vector<int64_t> axis,
-                       std::vector<int64_t> axis,
+                       const std::vector<int64_t>& axis,
-                       std::vector<int64_t> axis,
+                       const std::vector<int64_t>& axis,
+                       bool keepdim,
+                       bool reduce_all) {
+  paddle::dialect::MeanGradOp mean_grad_op =
+      APIBuilder::Instance().GetBuilder()->Build<paddle::dialect::MeanGradOp>(
+          x, out_grad, axis, keepdim, reduce_all);
+  return mean_grad_op.result(0);
+}
+
 }  // namespace dialect
 }  // namespace paddle
diff --git a/paddle/fluid/ir/dialect/pd_api.h b/paddle/fluid/ir/dialect/pd_api.h
@@ -39,5 +39,12 @@ ir::OpResult full(std::vector<int64_t> shape,
                   phi::DataType dtype = phi::DataType::FLOAT32,
                   phi::Place place = phi::CPUPlace());
 
+ir::OpResult tanh_grad(ir::OpResult out, ir::OpResult grad_out);
+
+ir::OpResult mean_grad(ir::OpResult x,
+                       ir::OpResult out_grad,
+                       std::vector<int64_t> axis = {},
+                       bool keepdim = false,
+                       bool reduce_all = false);
 }  // namespace dialect
 }  // namespace paddle
diff --git a/paddle/fluid/ir/dialect/pd_dialect.h b/paddle/fluid/ir/dialect/pd_dialect.h
@@ -91,6 +91,9 @@ class APIBuilder {
     ctx_ = ir::IrContext::Instance();
     ctx_->GetOrRegisterDialect<paddle::dialect::PaddleDialect>();
   }
+
+  APIBuilder(const APIBuilder&) = delete;
+
   ir::IrContext* ctx_;
   std::shared_ptr<ir::Builder> builder_;
 };

diff --git a/paddle/fluid/ir/dialect/pd_op_vjp_manual.cc b/paddle/fluid/ir/dialect/pd_op_vjp_manual.cc
@@ -0,0 +1,101 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/ir/dialect/pd_attribute.h"
+#include "paddle/fluid/ir/dialect/pd_op.h"
+#include "paddle/fluid/primitive/rule/vjp/vjp.h"
+#include "paddle/fluid/primitive/type/desc_tensor.h"
+#include "paddle/ir/core/op_base.h"
+
+// TODO(wanghao107)
+// this file will be generated in pd_op.cc
+
+namespace paddle {
+namespace dialect {
+std::vector<std::vector<ir::OpResult>> TanhOp::Vjp(
+    ir::Operation* op,
+    const std::vector<std::vector<ir::OpResult>>& out_grads,
+    const std::vector<std::vector<int>>& stop_gradients) {
+  TanhOp op_obj = op->dyn_cast<TanhOp>();
+  Tensor out(
+      std::make_shared<primitive::experimental::DescTensor>(op_obj.out()));
+  Tensor grad_out(
+      std::make_shared<primitive::experimental::DescTensor>(out_grads[0][0]));
+  std::vector<std::vector<Tensor>> tensor_res =
+      primitive::experimental::tanh_vjp(out, grad_out, stop_gradients);
+  std::vector<std::vector<ir::OpResult>> res(1, std::vector<ir::OpResult>(1));
+  if (!stop_gradients[0][0]) {
+    res[0][0] = std::static_pointer_cast<primitive::experimental::DescTensor>(
+                    tensor_res[0][0].impl())
+                    ->getValue()
+                    .dyn_cast<ir::OpResult>();
+  }
+  return res;
+}
+
+std::vector<std::vector<ir::OpResult>> Tanh_Op::Vjp(
+    ir::Operation* op,
+    const std::vector<std::vector<ir::OpResult>>& out_grads,
+    const std::vector<std::vector<int>>& stop_gradients) {
+  // TODO(wanghao107)
+  // we don't support inplace now,
+  // so use the non-inplace version instead currently.
+  // Support inplace in the future.
+  Tanh_Op op_obj = op->dyn_cast<Tanh_Op>();
+  Tensor out(
+      std::make_shared<primitive::experimental::DescTensor>(op_obj.out()));
+  Tensor grad_out(
+      std::make_shared<primitive::experimental::DescTensor>(out_grads[0][0]));
+  std::vector<std::vector<Tensor>> tensor_res =
+      primitive::experimental::tanh_vjp(out, grad_out, stop_gradients);
+  std::vector<std::vector<ir::OpResult>> res(1, std::vector<ir::OpResult>(1));
+  if (!stop_gradients[0][0]) {
+    res[0][0] = std::static_pointer_cast<primitive::experimental::DescTensor>(
+                    tensor_res[0][0].impl())
+                    ->getValue()
+                    .dyn_cast<ir::OpResult>();
+  }
+  return res;
+}
+
+std::vector<std::vector<ir::OpResult>> MeanOp::Vjp(
+    ir::Operation* op,
+    const std::vector<std::vector<ir::OpResult>>& out_grads,
+    const std::vector<std::vector<int>>& stop_gradients) {
+  MeanOp op_obj = op->dyn_cast<MeanOp>();
+  Tensor x(std::make_shared<primitive::experimental::DescTensor>(op_obj.x()));
+  Tensor out_grad(
+      std::make_shared<primitive::experimental::DescTensor>(out_grads[0][0]));
+
+  std::vector<int64_t> axis =
+      op->attribute("axis")
+          .dyn_cast<paddle::dialect::IntArrayAttribute>()
+          .data()
+          .GetData();
+  bool keepdim = op->attribute("keepdim").dyn_cast<ir::BoolAttribute>().data();
+  bool reduce_all = false;
+  std::vector<std::vector<Tensor>> tensor_res =
+      primitive::experimental::mean_vjp(
+          x, out_grad, axis, keepdim, reduce_all, stop_gradients);
+  std::vector<std::vector<ir::OpResult>> res(1, std::vector<ir::OpResult>(1));
+  if (!stop_gradients[0][0]) {
+    res[0][0] = std::static_pointer_cast<primitive::experimental::DescTensor>(
+                    tensor_res[0][0].impl())
+                    ->getValue()
+                    .dyn_cast<ir::OpResult>();
+  }
+  return res;
+}
+}  // namespace dialect
+}  // namespace paddle
diff --git a/paddle/fluid/ir/interface/vjp.h b/paddle/fluid/ir/interface/vjp.h
@@ -20,21 +20,24 @@ namespace dialect {
 class VjpInterface : public ir::OpInterfaceBase<VjpInterface> {
  public:
   struct Concept {
-    explicit Concept(std::vector<std::vector<ir::Value>> (*vjp)(
-        std::vector<std::vector<ir::Value>> out_grads,
+    explicit Concept(std::vector<std::vector<ir::OpResult>> (*vjp)(
+        ir::Operation* op,
+        const std::vector<std::vector<ir::OpResult>>& out_grads,
         const std::vector<std::vector<int>>& stop_gradients))
         : vjp_(vjp) {}
-    std::vector<std::vector<ir::Value>> (*vjp_)(
-        std::vector<std::vector<ir::Value>> out_grads,
+    std::vector<std::vector<ir::OpResult>> (*vjp_)(
+        ir::Operation* op,
+        const std::vector<std::vector<ir::OpResult>>& out_grads,
         const std::vector<std::vector<int>>& stop_gradients);
   };
 
   template <class ConcreteOp>
   struct Model : public Concept {
-    static std::vector<std::vector<ir::Value>> Vjp(
-        std::vector<std::vector<ir::Value>> out_grads,
+    static std::vector<std::vector<ir::OpResult>> Vjp(
+        ir::Operation* op,
+        const std::vector<std::vector<ir::OpResult>>& out_grads,
         const std::vector<std::vector<int>>& stop_gradients) {
-      return ConcreteOp::Vjp(out_grads, stop_gradients);
+      return ConcreteOp::Vjp(op, out_grads, stop_gradients);
     }
 
     Model() : Concept(Vjp) {}
@@ -43,10 +46,11 @@ class VjpInterface : public ir::OpInterfaceBase<VjpInterface> {
   VjpInterface(ir::Operation* op, Concept* impl)
       : ir::OpInterfaceBase<VjpInterface>(op), impl_(impl) {}
 
-  std::vector<std::vector<ir::Value>> Vjp(
-      std::vector<std::vector<ir::Value>> out_grads,
+  std::vector<std::vector<ir::OpResult>> Vjp(
+      ir::Operation* op,
+      const std::vector<std::vector<ir::OpResult>>& out_grads,
       const std::vector<std::vector<int>>& stop_gradients) {
-    return impl_->vjp_(out_grads, stop_gradients);
+    return impl_->vjp_(op, out_grads, stop_gradients);
   }
 
  private:

diff --git a/paddle/fluid/primitive/CMakeLists.txt b/paddle/fluid/primitive/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(backend)
+add_subdirectory(rule)
diff --git a/paddle/fluid/primitive/README.md b/paddle/fluid/primitive/README.md
@@ -0,0 +1 @@
+# Paddle Primitive Operator System and Combined Strategy Design
diff --git a/paddle/fluid/primitive/backend/CMakeLists.txt b/paddle/fluid/primitive/backend/CMakeLists.txt
@@ -0,0 +1,10 @@
+if(NOT (NOT WITH_PYTHON AND ON_INFER))
-if(NOT (NOT WITH_PYTHON AND ON_INFER))
+if(WITH_PYTHON OR NOT ON_INFER))
-if(NOT (NOT WITH_PYTHON AND ON_INFER))
+if(WITH_PYTHON OR NOT ON_INFER))
+  cc_library(
+    primitive_backend_eager_experimental
+    SRCS eager_backend.cc
+    DEPS final_dygraph_function eager_utils phi)
+endif()
+cc_library(
+  primitive_backend_static_experimental
+  SRCS static_backend.cc
+  DEPS pd_dialect)
diff --git a/paddle/fluid/primitive/backend/eager_backend.cc b/paddle/fluid/primitive/backend/eager_backend.cc
@@ -0,0 +1,26 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/primitive/backend/eager_backend.h"
+#include "paddle/fluid/eager/api/all.h"
+#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
+#include "paddle/fluid/primitive/primitive/primitive.h"
+
+namespace paddle {
+namespace primitive {
+namespace backend {
+namespace experimental {}  // namespace experimental
+}  // namespace backend
+}  // namespace primitive
+}  // namespace paddle
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		add_subdirectory(backend)
		add_subdirectory(rule)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# Paddle Primitive Operator System and Combined Strategy Design