From 734d146150e553f97bcb06667bb06c58aa24bea7 Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Wed, 9 Feb 2022 05:17:16 +0000 Subject: [PATCH 01/12] move expand to pten --- paddle/fluid/operators/expand_v2_op.cc | 37 +-- paddle/fluid/operators/expand_v2_op.h | 114 --------- paddle/fluid/operators/expand_v2_op_npu.cc | 2 +- paddle/fluid/operators/expand_v2_op_xpu.cc | 2 +- paddle/pten/kernels/cpu/expand_grad_kernel.cc | 30 +++ paddle/pten/kernels/cpu/expand_kernel.cc | 30 +++ paddle/pten/kernels/expand_grad_kernel.h | 27 ++ paddle/pten/kernels/expand_kernel.h | 28 +++ paddle/pten/kernels/gpu/expand_grad_kernel.cu | 29 +++ paddle/pten/kernels/gpu/expand_kernel.cu | 30 +++ .../kernels/impl/expand_grad_kernel_impl.h | 133 ++++++++++ paddle/pten/kernels/impl/expand_kernel_impl.h | 232 ++++++++++++++++++ 12 files changed, 548 insertions(+), 146 deletions(-) create mode 100644 paddle/pten/kernels/cpu/expand_grad_kernel.cc create mode 100644 paddle/pten/kernels/cpu/expand_kernel.cc create mode 100644 paddle/pten/kernels/expand_grad_kernel.h create mode 100644 paddle/pten/kernels/expand_kernel.h create mode 100644 paddle/pten/kernels/gpu/expand_grad_kernel.cu create mode 100644 paddle/pten/kernels/gpu/expand_kernel.cu create mode 100644 paddle/pten/kernels/impl/expand_grad_kernel_impl.h create mode 100644 paddle/pten/kernels/impl/expand_kernel_impl.h diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index 6d803c500d90f..af6b57dfa258c 100755 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -219,6 +219,13 @@ class ExpandV2GradOp : public framework::OperatorWithKernel { } } + framework::KernelSignature GetExpectedPtenKernelArgs( + const framework::ExecutionContext& ctx) const override { + return framework::KernelSignature( + "expand_grad", {framework::GradVarName("Out")}, {}, + {framework::GradVarName("X")}); + } + protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { @@ -296,33 +303,3 @@ REGISTER_OPERATOR(expand_v2_grad, ops::ExpandV2GradOp, ops::ExpandV2DoubleGradOpMaker, ops::ExpandV2DoubleGradOpMaker, ops::ExpandV2GradNoNeedBufVarsInferer); -REGISTER_OP_CPU_KERNEL( - expand_v2, ops::ExpandV2Kernel, - ops::ExpandV2Kernel, - ops::ExpandV2Kernel, - ops::ExpandV2Kernel, - ops::ExpandV2Kernel); -REGISTER_OP_CPU_KERNEL( - expand_v2_grad, - ops::ExpandV2GradKernel, - ops::ExpandV2GradKernel, - ops::ExpandV2GradKernel, - ops::ExpandV2GradKernel); -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -REGISTER_OP_CUDA_KERNEL( - expand_v2, ops::ExpandV2Kernel, - ops::ExpandV2Kernel, - ops::ExpandV2Kernel, - ops::ExpandV2Kernel, - ops::ExpandV2Kernel, - ops::ExpandV2Kernel); -REGISTER_OP_CUDA_KERNEL( - expand_v2_grad, - ops::ExpandV2GradKernel, - ops::ExpandV2GradKernel, - ops::ExpandV2GradKernel, - ops::ExpandV2GradKernel, - ops::ExpandV2GradKernel); -#endif diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h index dd1625013444b..35a56cec9ccd2 100644 --- a/paddle/fluid/operators/expand_v2_op.h +++ b/paddle/fluid/operators/expand_v2_op.h @@ -231,119 +231,5 @@ class ExpandV2Kernel : public framework::OpKernel { } }; -template -class ExpandV2GradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto expand_shape = get_expand_shape(context); - auto x_dims = in0->dims(); - auto vec_in_dims = framework::vectorize(x_dims); - auto diff = expand_shape.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - // 1. reshape_dims_vec is the broadcast parameter. - // 2. reduce_dims_vec is the dimension parameter to compute gradients. For - // each dimension expanded, the gradients should be summed to original - // size. - std::vector repeat_times(vec_in_dims.size()); - for (size_t i = 0; i < vec_in_dims.size(); ++i) { - if (expand_shape[i] < 0) { - repeat_times[i] = 1; - } else { - repeat_times[i] = expand_shape[i] / vec_in_dims[i]; - } - } - std::vector reshape_dims_vec; - std::vector reduce_dims_vec; - for (size_t i = 0; i < repeat_times.size(); ++i) { - reduce_dims_vec.push_back(reshape_dims_vec.size()); - reshape_dims_vec.push_back(repeat_times[i]); - reshape_dims_vec.push_back(vec_in_dims[i]); - } - - int dims = reduce_dims_vec.size(); - - bool just_copy = true; - for (size_t i = 0; i < repeat_times.size(); i++) { - if (repeat_times[i] != 1) { - just_copy = false; - break; - } - } - // no need reduce, just copy - if (just_copy) { - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); - out0->mutable_data(context.GetPlace()); - framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), - out0); - } else { - PADDLE_ENFORCE_GE(dims, 1, - platform::errors::InvalidArgument( - "The rank of the input 'Out@GRAD' for " - "expand_v2_grad op must be greater than or " - "equal to 1, but the value received is %d.", - dims)); - PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The rank of the input 'Out@GRAD' for " - "expand_v2_grad op must be less than or equal " - "to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, dims)); - switch (dims) { - case 1: - ExpandBackward<1>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 2: - ExpandBackward<2>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 3: - ExpandBackward<3>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 4: - ExpandBackward<4>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 5: - ExpandBackward<5>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 6: - ExpandBackward<6>(context, reshape_dims_vec, reduce_dims_vec); - break; - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Only support tensor with rank being between 1 and 6. But " - "received tensor's rank = %d.", - dims)); - } - } - } - - protected: - template - void ExpandBackward(const framework::ExecutionContext& context, - const std::vector& reshape_dims_vec, - const std::vector& reduce_dims_vec) const { - size_t reshape_size = reshape_dims_vec.size(); - size_t reduce_size = reduce_dims_vec.size(); - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); - out0->mutable_data(context.GetPlace()); - auto x_grad = EigenVector::Flatten(*out0); - Eigen::DSizes reshape_dims; - for (size_t i = 0; i < reshape_size; ++i) { - reshape_dims[i] = reshape_dims_vec[i]; - } - Eigen::DSizes reduce_dims; - for (size_t i = 0; i < reduce_size; ++i) { - reduce_dims[i] = reduce_dims_vec[i]; - } - auto out_grad = EigenVector::Flatten(*in0); - auto& place = - *context.template device_context().eigen_device(); - EigenBroadcastGrad, T, Dims>::Eval( - place, x_grad, out_grad, reduce_dims, reshape_dims); - } -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/expand_v2_op_npu.cc b/paddle/fluid/operators/expand_v2_op_npu.cc index 58418d01b81da..a4807d1d26d7c 100644 --- a/paddle/fluid/operators/expand_v2_op_npu.cc +++ b/paddle/fluid/operators/expand_v2_op_npu.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/expand_v2_op.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/expand_v2_op_xpu.cc b/paddle/fluid/operators/expand_v2_op_xpu.cc index 791f8e823655c..d13a403175589 100644 --- a/paddle/fluid/operators/expand_v2_op_xpu.cc +++ b/paddle/fluid/operators/expand_v2_op_xpu.cc @@ -13,7 +13,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/expand_v2_op.h" +#include "paddle/fluid/framework/op_registry.h" namespace paddle { namespace operators { diff --git a/paddle/pten/kernels/cpu/expand_grad_kernel.cc b/paddle/pten/kernels/cpu/expand_grad_kernel.cc new file mode 100644 index 0000000000000..6fafcdf94a44e --- /dev/null +++ b/paddle/pten/kernels/cpu/expand_grad_kernel.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/expand_grad_kernel.h" +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/impl/expand_grad_kernel_impl.h" + +PT_REGISTER_KERNEL(expand_grad, + CPU, + ALL_LAYOUT, + pten::ExpandKernel, + float, + double, + int, + int64_t, + bool) {} \ No newline at end of file diff --git a/paddle/pten/kernels/cpu/expand_kernel.cc b/paddle/pten/kernels/cpu/expand_kernel.cc new file mode 100644 index 0000000000000..57c646d1b691e --- /dev/null +++ b/paddle/pten/kernels/cpu/expand_kernel.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/kernels/expand_kernel.h" +#include "paddle/pten/backends/cpu/cpu_context.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/impl/expand_kernel_impl.h" + +PT_REGISTER_KERNEL(expand, + CPU, + ALL_LAYOUT, + pten::ExpandKernel, + float, + double, + int, + int64_t, + bool) {} \ No newline at end of file diff --git a/paddle/pten/kernels/expand_grad_kernel.h b/paddle/pten/kernels/expand_grad_kernel.h new file mode 100644 index 0000000000000..a4ca5da0606af --- /dev/null +++ b/paddle/pten/kernels/expand_grad_kernel.h @@ -0,0 +1,27 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/pten/core/dense_tensor.h" + +namespace pten { + +template +void ExpandGradKernel(const Context& ctx, + const DenseTensor& out_grad, + const DenseTensor& x, + DenseTensor* x_grad); + +} // namespace pten \ No newline at end of file diff --git a/paddle/pten/kernels/expand_kernel.h b/paddle/pten/kernels/expand_kernel.h new file mode 100644 index 0000000000000..c63876720d982 --- /dev/null +++ b/paddle/pten/kernels/expand_kernel.h @@ -0,0 +1,28 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/pten/common/scalar_array.h" +#include "paddle/pten/core/dense_tensor.h" + +namespace pten { + +template +void ExpandKernel(const Context& ctx, + const DenseTensor& x, + const ScalarArray& shape, + DenseTensor* out); + +} // namepsace pten \ No newline at end of file diff --git a/paddle/pten/kernels/gpu/expand_grad_kernel.cu b/paddle/pten/kernels/gpu/expand_grad_kernel.cu new file mode 100644 index 0000000000000..f2bbdf224b998 --- /dev/null +++ b/paddle/pten/kernels/gpu/expand_grad_kernel.cu @@ -0,0 +1,29 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/expand_grad_kernel.h" +#include "paddle/pten/kernels/impl/expand_grad_kernel_impl.h" + +PT_REGISTER_KERNEL(expand_grad, + GPU, + ALL_LAYOUT, + pten::ExpandGradKernel, + float, + double, + int, + int64_t, + bool) {} \ No newline at end of file diff --git a/paddle/pten/kernels/gpu/expand_kernel.cu b/paddle/pten/kernels/gpu/expand_kernel.cu new file mode 100644 index 0000000000000..0390508ac9aa5 --- /dev/null +++ b/paddle/pten/kernels/gpu/expand_kernel.cu @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/backends/gpu/gpu_context.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/kernels/expand_kernel.h" +#include "paddle/pten/kernels/impl/expand_kernel_impl.h" + +PT_REGISTER_KERNEL(expand, + GPU, + ALL_LAYOUT, + pten::ExpandKernel, + float, + double, + int, + int64_t, + bool) {} \ No newline at end of file diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h new file mode 100644 index 0000000000000..2d570dcfafeb6 --- /dev/null +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -0,0 +1,133 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace pten{ + +template +class ExpandGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in0 = context.Input("X"); + auto expand_shape = get_expand_shape(context); + auto x_dims = in0->dims(); + auto vec_in_dims = framework::vectorize(x_dims); + auto diff = expand_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + // 1. reshape_dims_vec is the broadcast parameter. + // 2. reduce_dims_vec is the dimension parameter to compute gradients. For + // each dimension expanded, the gradients should be summed to original + // size. + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + if (expand_shape[i] < 0) { + repeat_times[i] = 1; + } else { + repeat_times[i] = expand_shape[i] / vec_in_dims[i]; + } + } + std::vector reshape_dims_vec; + std::vector reduce_dims_vec; + for (size_t i = 0; i < repeat_times.size(); ++i) { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(repeat_times[i]); + reshape_dims_vec.push_back(vec_in_dims[i]); + } + + int dims = reduce_dims_vec.size(); + + bool just_copy = true; + for (size_t i = 0; i < repeat_times.size(); i++) { + if (repeat_times[i] != 1) { + just_copy = false; + break; + } + } + // no need reduce, just copy + if (just_copy) { + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), + out0); + } else { + PADDLE_ENFORCE_GE(dims, 1, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_v2_grad op must be greater than or " + "equal to 1, but the value received is %d.", + dims)); + PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_v2_grad op must be less than or equal " + "to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, dims)); + switch (dims) { + case 1: + ExpandBackward<1>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 2: + ExpandBackward<2>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 3: + ExpandBackward<3>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 4: + ExpandBackward<4>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 5: + ExpandBackward<5>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 6: + ExpandBackward<6>(context, reshape_dims_vec, reduce_dims_vec); + break; + default: + PADDLE_THROW(platform::errors::InvalidArgument( + "Only support tensor with rank being between 1 and 6. But " + "received tensor's rank = %d.", + dims)); + } + } + } + + protected: + template + void ExpandBackward(const framework::ExecutionContext& context, + const std::vector& reshape_dims_vec, + const std::vector& reduce_dims_vec) const { + size_t reshape_size = reshape_dims_vec.size(); + size_t reduce_size = reduce_dims_vec.size(); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + auto x_grad = EigenVector::Flatten(*out0); + Eigen::DSizes reshape_dims; + for (size_t i = 0; i < reshape_size; ++i) { + reshape_dims[i] = reshape_dims_vec[i]; + } + Eigen::DSizes reduce_dims; + for (size_t i = 0; i < reduce_size; ++i) { + reduce_dims[i] = reduce_dims_vec[i]; + } + auto out_grad = EigenVector::Flatten(*in0); + auto& place = + *context.template device_context().eigen_device(); + EigenBroadcastGrad, T, Dims>::Eval( + place, x_grad, out_grad, reduce_dims, reshape_dims); + } +}; + +} // namespace pten \ No newline at end of file diff --git a/paddle/pten/kernels/impl/expand_kernel_impl.h b/paddle/pten/kernels/impl/expand_kernel_impl.h new file mode 100644 index 0000000000000..58a9aae027d32 --- /dev/null +++ b/paddle/pten/kernels/impl/expand_kernel_impl.h @@ -0,0 +1,232 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/operators/eigen/eigen_function.h" + +#define MAX_RANK_SUPPORTED 6 + +namespace pten { + +inline std::vector get_expand_shape( + const framework::ExecutionContext& ctx) { + if (ctx.HasInput("Shape")) { + auto* shape_tensor = ctx.Input("Shape"); + auto* shape_data = shape_tensor->data(); + framework::Tensor cpu_shape_tensor; + if (platform::is_gpu_place(shape_tensor->place())) { + paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(), + &cpu_shape_tensor); + shape_data = cpu_shape_tensor.data(); + } +#ifdef PADDLE_WITH_ASCEND_CL + if (platform::is_npu_place(shape_tensor->place())) { + paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(), + &cpu_shape_tensor); + shape_data = cpu_shape_tensor.data(); + } +#endif +#ifdef PADDLE_WITH_XPU + if (platform::is_xpu_place(shape_tensor->place())) { + paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(), + &cpu_shape_tensor); + shape_data = cpu_shape_tensor.data(); + } +#endif + auto vec_shape = + std::vector(shape_data, shape_data + shape_tensor->numel()); + return vec_shape; + } + + auto list_expand_shapes_tensor = + ctx.MultiInput("expand_shapes_tensor"); + if (list_expand_shapes_tensor.size() > 0) { + // get tensor from + std::vector vec_epxand_shape; + for (size_t i = 0; i < list_expand_shapes_tensor.size(); ++i) { + auto tensor = list_expand_shapes_tensor[i]; + if (platform::is_gpu_place(tensor->place())) { + framework::Tensor temp; + paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); + vec_epxand_shape.push_back(*temp.data()); + } +#ifdef PADDLE_WITH_ASCEND_CL + else if (platform::is_npu_place(tensor->place())) { // NOLINT + framework::Tensor temp; + paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); + vec_epxand_shape.push_back(*temp.data()); + } +#endif +#ifdef PADDLE_WITH_XPU + else if (platform::is_xpu_place(tensor->place())) { // NOLINT + framework::Tensor temp; + paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); + vec_epxand_shape.push_back(*temp.data()); + } +#endif + else { // NOLINT + vec_epxand_shape.push_back(*tensor->data()); + } + } + return vec_epxand_shape; + } else { + return ctx.Attr>("shape"); + } +} + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; +template +using EigenTensor = framework::EigenTensor; +using framework::To32BitIndex; + +template +class ExpandKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto rank = context.Input("X")->dims().size(); + PADDLE_ENFORCE_GE( + rank, 1, + platform::errors::InvalidArgument( + "The rank of the input 'X' for expand_v2 op must be positive, " + "but the value received is %d.", + rank)); + PADDLE_ENFORCE_LE( + rank, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'X' for expand_v2 op must be less than " + "or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, rank)); + auto expand_shape = get_expand_shape(context); + auto shape_size = expand_shape.size(); + PADDLE_ENFORCE_GE( + shape_size, rank, + platform::errors::InvalidArgument( + "The number (%d) of elements of 'shape' for expand_v2 op must be " + "greater than or equal to the rank (%d) of the input 'X'.", + shape_size, rank)); + PADDLE_ENFORCE_LE( + shape_size, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The number (%d) of elements of 'shape' for expand_v2 op must be " + "less than or equal to %d.", + shape_size, MAX_RANK_SUPPORTED)); + rank = std::max(rank, static_cast(shape_size)); + switch (rank) { + case 1: + Expand<1>(context); + break; + case 2: + Expand<2>(context); + break; + case 3: + Expand<3>(context); + break; + case 4: + Expand<4>(context); + break; + case 5: + Expand<5>(context); + break; + case 6: + Expand<6>(context); + break; + } + } + + protected: + template + void Expand(const framework::ExecutionContext& context) const { + auto* in0 = context.Input("X"); + + auto in_dims = in0->dims(); + auto expand_shape = get_expand_shape(context); + auto vec_in_dims = framework::vectorize(in_dims); + auto diff = expand_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + PADDLE_ENFORCE_NE(expand_shape[i], 0, + platform::errors::InvalidArgument( + "The expanded size cannot be zero.")); + if (i < diff) { + PADDLE_ENFORCE_GT( + expand_shape[i], 0, + platform::errors::InvalidArgument( + "The expanded size (%d) for non-existing dimensions must be " + "positive for expand_v2 op.", + expand_shape[i])); + repeat_times[i] = expand_shape[i]; + } else if (expand_shape[i] > 0) { + if (vec_in_dims[i] != 1) { + PADDLE_ENFORCE_EQ( + vec_in_dims[i], expand_shape[i], + platform::errors::InvalidArgument( + "The value (%d) of the non-singleton dimension does not match" + " the corresponding value (%d) in shape for expand_v2 op.", + vec_in_dims[i], expand_shape[i])); + repeat_times[i] = 1; + } else { + repeat_times[i] = expand_shape[i]; + } + } else { + PADDLE_ENFORCE_EQ( + expand_shape[i], -1, + platform::errors::InvalidArgument( + "When the value in shape is negative for expand_v2 op, " + "only -1 is supported, but the value received is %d.", + expand_shape[i])); + repeat_times[i] = 1; + } + } + + auto* out0 = context.Output("Out"); + Eigen::DSizes bcast_dims; + for (size_t i = 0; i < repeat_times.size(); ++i) { + bcast_dims[i] = repeat_times[i]; + } + + framework::DDim new_in_dims = framework::make_ddim(vec_in_dims); + framework::DDim out_dims(new_in_dims); + for (size_t i = 0; i < repeat_times.size(); ++i) { + out_dims[i] *= repeat_times[i]; + } + + out0->Resize(out_dims); + auto x = EigenTensor::From(*in0, new_in_dims); + out0->mutable_data(context.GetPlace()); + auto y = EigenTensor::From(*out0, out_dims); + auto& place = + *context.template device_context().eigen_device(); + // use 32-bit index to speed up + bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); + if (use_32bit_index) { + EigenBroadcast, T, Rank>::Eval( + place, To32BitIndex(y), To32BitIndex(x), bcast_dims); + } else { + EigenBroadcast, T, Rank>::Eval(place, y, x, + bcast_dims); + } + } +}; + +} // namespace pten \ No newline at end of file From dd047ed36fcb67b05df403d7dad036678c38cbb7 Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Fri, 11 Feb 2022 06:41:17 +0000 Subject: [PATCH 02/12] move expand_v2 to pten --- paddle/fluid/operators/expand_v2_op.cc | 7 - paddle/fluid/operators/expand_v2_op.h | 114 ++++++ paddle/pten/core/compat/op_utils.h | 2 + paddle/pten/kernels/cpu/expand_grad_kernel.cc | 13 +- paddle/pten/kernels/cpu/expand_kernel.cc | 12 +- paddle/pten/kernels/gpu/expand_grad_kernel.cu | 4 +- paddle/pten/kernels/gpu/expand_kernel.cu | 3 +- .../kernels/impl/expand_grad_kernel_impl.h | 228 ++++++------ paddle/pten/kernels/impl/expand_kernel_impl.h | 341 ++++++++---------- paddle/pten/ops/compat/expand_sig.cc | 32 ++ 10 files changed, 429 insertions(+), 327 deletions(-) create mode 100644 paddle/pten/ops/compat/expand_sig.cc diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index af6b57dfa258c..0c5b0b62b89f3 100755 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -219,13 +219,6 @@ class ExpandV2GradOp : public framework::OperatorWithKernel { } } - framework::KernelSignature GetExpectedPtenKernelArgs( - const framework::ExecutionContext& ctx) const override { - return framework::KernelSignature( - "expand_grad", {framework::GradVarName("Out")}, {}, - {framework::GradVarName("X")}); - } - protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h index 35a56cec9ccd2..dd1625013444b 100644 --- a/paddle/fluid/operators/expand_v2_op.h +++ b/paddle/fluid/operators/expand_v2_op.h @@ -231,5 +231,119 @@ class ExpandV2Kernel : public framework::OpKernel { } }; +template +class ExpandV2GradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* in0 = context.Input("X"); + auto expand_shape = get_expand_shape(context); + auto x_dims = in0->dims(); + auto vec_in_dims = framework::vectorize(x_dims); + auto diff = expand_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + // 1. reshape_dims_vec is the broadcast parameter. + // 2. reduce_dims_vec is the dimension parameter to compute gradients. For + // each dimension expanded, the gradients should be summed to original + // size. + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + if (expand_shape[i] < 0) { + repeat_times[i] = 1; + } else { + repeat_times[i] = expand_shape[i] / vec_in_dims[i]; + } + } + std::vector reshape_dims_vec; + std::vector reduce_dims_vec; + for (size_t i = 0; i < repeat_times.size(); ++i) { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(repeat_times[i]); + reshape_dims_vec.push_back(vec_in_dims[i]); + } + + int dims = reduce_dims_vec.size(); + + bool just_copy = true; + for (size_t i = 0; i < repeat_times.size(); i++) { + if (repeat_times[i] != 1) { + just_copy = false; + break; + } + } + // no need reduce, just copy + if (just_copy) { + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), + out0); + } else { + PADDLE_ENFORCE_GE(dims, 1, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_v2_grad op must be greater than or " + "equal to 1, but the value received is %d.", + dims)); + PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, + platform::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_v2_grad op must be less than or equal " + "to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, dims)); + switch (dims) { + case 1: + ExpandBackward<1>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 2: + ExpandBackward<2>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 3: + ExpandBackward<3>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 4: + ExpandBackward<4>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 5: + ExpandBackward<5>(context, reshape_dims_vec, reduce_dims_vec); + break; + case 6: + ExpandBackward<6>(context, reshape_dims_vec, reduce_dims_vec); + break; + default: + PADDLE_THROW(platform::errors::InvalidArgument( + "Only support tensor with rank being between 1 and 6. But " + "received tensor's rank = %d.", + dims)); + } + } + } + + protected: + template + void ExpandBackward(const framework::ExecutionContext& context, + const std::vector& reshape_dims_vec, + const std::vector& reduce_dims_vec) const { + size_t reshape_size = reshape_dims_vec.size(); + size_t reduce_size = reduce_dims_vec.size(); + auto* in0 = context.Input(framework::GradVarName("Out")); + auto* out0 = context.Output(framework::GradVarName("X")); + out0->mutable_data(context.GetPlace()); + auto x_grad = EigenVector::Flatten(*out0); + Eigen::DSizes reshape_dims; + for (size_t i = 0; i < reshape_size; ++i) { + reshape_dims[i] = reshape_dims_vec[i]; + } + Eigen::DSizes reduce_dims; + for (size_t i = 0; i < reduce_size; ++i) { + reduce_dims[i] = reduce_dims_vec[i]; + } + auto out_grad = EigenVector::Flatten(*in0); + auto& place = + *context.template device_context().eigen_device(); + EigenBroadcastGrad, T, Dims>::Eval( + place, x_grad, out_grad, reduce_dims, reshape_dims); + } +}; + } // namespace operators } // namespace paddle diff --git a/paddle/pten/core/compat/op_utils.h b/paddle/pten/core/compat/op_utils.h index 93090616366f0..79fc383ddeb13 100644 --- a/paddle/pten/core/compat/op_utils.h +++ b/paddle/pten/core/compat/op_utils.h @@ -40,6 +40,8 @@ const std::unordered_set deprecated_op_names({"flatten", "mean", "reshape", "reshape_grad", + "expand", + "expand_grad", "sum"}); class DefaultKernelSignatureMap { diff --git a/paddle/pten/kernels/cpu/expand_grad_kernel.cc b/paddle/pten/kernels/cpu/expand_grad_kernel.cc index 6fafcdf94a44e..518d81d89e5b2 100644 --- a/paddle/pten/kernels/cpu/expand_grad_kernel.cc +++ b/paddle/pten/kernels/cpu/expand_grad_kernel.cc @@ -19,12 +19,11 @@ #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/impl/expand_grad_kernel_impl.h" -PT_REGISTER_KERNEL(expand_grad, - CPU, - ALL_LAYOUT, - pten::ExpandKernel, - float, +PT_REGISTER_KERNEL(expand_grad, + CPU, + ALL_LAYOUT, + pten::ExpandGradKernel, + float, double, int, - int64_t, - bool) {} \ No newline at end of file + int64_t) {} diff --git a/paddle/pten/kernels/cpu/expand_kernel.cc b/paddle/pten/kernels/cpu/expand_kernel.cc index 57c646d1b691e..c5c019bd729e8 100644 --- a/paddle/pten/kernels/cpu/expand_kernel.cc +++ b/paddle/pten/kernels/cpu/expand_kernel.cc @@ -19,12 +19,12 @@ #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/kernels/impl/expand_kernel_impl.h" -PT_REGISTER_KERNEL(expand, - CPU, - ALL_LAYOUT, - pten::ExpandKernel, - float, +PT_REGISTER_KERNEL(expand, + CPU, + ALL_LAYOUT, + pten::ExpandKernel, + float, double, int, int64_t, - bool) {} \ No newline at end of file + bool) {} diff --git a/paddle/pten/kernels/gpu/expand_grad_kernel.cu b/paddle/pten/kernels/gpu/expand_grad_kernel.cu index f2bbdf224b998..49f8718c483ce 100644 --- a/paddle/pten/kernels/gpu/expand_grad_kernel.cu +++ b/paddle/pten/kernels/gpu/expand_grad_kernel.cu @@ -24,6 +24,6 @@ PT_REGISTER_KERNEL(expand_grad, pten::ExpandGradKernel, float, double, + paddle::platform::float16, int, - int64_t, - bool) {} \ No newline at end of file + int64_t) {} diff --git a/paddle/pten/kernels/gpu/expand_kernel.cu b/paddle/pten/kernels/gpu/expand_kernel.cu index 0390508ac9aa5..e0d8536d6ab34 100644 --- a/paddle/pten/kernels/gpu/expand_kernel.cu +++ b/paddle/pten/kernels/gpu/expand_kernel.cu @@ -25,6 +25,7 @@ PT_REGISTER_KERNEL(expand, pten::ExpandKernel, float, double, + paddle::platform::float16, int, int64_t, - bool) {} \ No newline at end of file + bool) {} diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index 2d570dcfafeb6..131fb64dd6718 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -14,120 +14,136 @@ #pragma once -namespace pten{ +#include "paddle/pten/kernels/funcs/eigen/common.h" +#include "paddle/pten/kernels/funcs/eigen/eigen_function.h" +#include "paddle/pten/kernels/impl/expand_kernel_impl.h" -template -class ExpandGradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto expand_shape = get_expand_shape(context); - auto x_dims = in0->dims(); - auto vec_in_dims = framework::vectorize(x_dims); - auto diff = expand_shape.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - // 1. reshape_dims_vec is the broadcast parameter. - // 2. reduce_dims_vec is the dimension parameter to compute gradients. For - // each dimension expanded, the gradients should be summed to original - // size. - std::vector repeat_times(vec_in_dims.size()); - for (size_t i = 0; i < vec_in_dims.size(); ++i) { - if (expand_shape[i] < 0) { - repeat_times[i] = 1; - } else { - repeat_times[i] = expand_shape[i] / vec_in_dims[i]; - } - } - std::vector reshape_dims_vec; - std::vector reduce_dims_vec; - for (size_t i = 0; i < repeat_times.size(); ++i) { - reduce_dims_vec.push_back(reshape_dims_vec.size()); - reshape_dims_vec.push_back(repeat_times[i]); - reshape_dims_vec.push_back(vec_in_dims[i]); - } +namespace pten { +template +void ExpandBackward(const Context& ctx, + const DenseTensor& out_grad, + const std::vector& reshape_dims_vec, + const std::vector& reduce_dims_vec, + DenseTensor* in_grad) { + size_t reshape_size = reshape_dims_vec.size(); + size_t reduce_size = reduce_dims_vec.size(); + // auto* in0 = context.Input(framework::GradVarName("Out")); + // auto* out0 = context.Output(framework::GradVarName("X")); + // out0->mutable_data(context.GetPlace()); + ctx.template Alloc(in_grad); + in_grad->data(); - int dims = reduce_dims_vec.size(); + auto x_grad = EigenVector::Flatten(*in_grad); + Eigen::DSizes reshape_dims; + for (size_t i = 0; i < reshape_size; ++i) { + reshape_dims[i] = reshape_dims_vec[i]; + } + Eigen::DSizes reduce_dims; + for (size_t i = 0; i < reduce_size; ++i) { + reduce_dims[i] = reduce_dims_vec[i]; + } + auto out_grad0 = EigenVector::Flatten(out_grad); + auto& place = *ctx.eigen_device(); + pten::funcs::EigenBroadcastGrad, T, Dims>::Eval( + place, x_grad, out_grad0, reduce_dims, reshape_dims); +} - bool just_copy = true; - for (size_t i = 0; i < repeat_times.size(); i++) { - if (repeat_times[i] != 1) { - just_copy = false; - break; - } - } - // no need reduce, just copy - if (just_copy) { - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); - out0->mutable_data(context.GetPlace()); - framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), - out0); +template +void ExpandGradKernel(const Context& ctx, + const DenseTensor& out_grad, + const DenseTensor& x, + const ScalarArray& shape, + DenseTensor* in_grad) { + auto expand_shape = shape.GetData(); + auto x_dims = out_grad.dims(); + auto vec_in_dims = framework::vectorize(x_dims); + auto diff = expand_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + // 1. reshape_dims_vec is the broadcast parameter. + // 2. reduce_dims_vec is the dimension parameter to compute gradients. For + // each dimension expanded, the gradients should be summed to original + // size. + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + if (expand_shape[i] < 0) { + repeat_times[i] = 1; } else { - PADDLE_ENFORCE_GE(dims, 1, - platform::errors::InvalidArgument( - "The rank of the input 'Out@GRAD' for " - "expand_v2_grad op must be greater than or " - "equal to 1, but the value received is %d.", - dims)); - PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The rank of the input 'Out@GRAD' for " - "expand_v2_grad op must be less than or equal " - "to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, dims)); - switch (dims) { - case 1: - ExpandBackward<1>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 2: - ExpandBackward<2>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 3: - ExpandBackward<3>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 4: - ExpandBackward<4>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 5: - ExpandBackward<5>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 6: - ExpandBackward<6>(context, reshape_dims_vec, reduce_dims_vec); - break; - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Only support tensor with rank being between 1 and 6. But " - "received tensor's rank = %d.", - dims)); - } + repeat_times[i] = expand_shape[i] / vec_in_dims[i]; } } + std::vector reshape_dims_vec; + std::vector reduce_dims_vec; + for (size_t i = 0; i < repeat_times.size(); ++i) { + reduce_dims_vec.push_back(reshape_dims_vec.size()); + reshape_dims_vec.push_back(repeat_times[i]); + reshape_dims_vec.push_back(vec_in_dims[i]); + } + + int dims = reduce_dims_vec.size(); - protected: - template - void ExpandBackward(const framework::ExecutionContext& context, - const std::vector& reshape_dims_vec, - const std::vector& reduce_dims_vec) const { - size_t reshape_size = reshape_dims_vec.size(); - size_t reduce_size = reduce_dims_vec.size(); - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); - out0->mutable_data(context.GetPlace()); - auto x_grad = EigenVector::Flatten(*out0); - Eigen::DSizes reshape_dims; - for (size_t i = 0; i < reshape_size; ++i) { - reshape_dims[i] = reshape_dims_vec[i]; + bool just_copy = true; + for (size_t i = 0; i < repeat_times.size(); i++) { + if (repeat_times[i] != 1) { + just_copy = false; + break; } - Eigen::DSizes reduce_dims; - for (size_t i = 0; i < reduce_size; ++i) { - reduce_dims[i] = reduce_dims_vec[i]; + } + // no need reduce, just copy + if (just_copy) { + // in_grad->mutable_data(ctx.GetPlace()); + ctx.template Alloc(in_grad); + in_grad->data(); + + // framework::TensorCopy(*out_grad, ctx.GetPlace(), ctx.device_context(), + // in_grad); + } else { + PADDLE_ENFORCE_GE(dims, + 1, + pten::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_v2_grad op must be greater than or " + "equal to 1, but the value received is %d.", + dims)); + PADDLE_ENFORCE_LE(dims, + MAX_RANK_SUPPORTED, + pten::errors::InvalidArgument( + "The rank of the input 'Out@GRAD' for " + "expand_v2_grad op must be less than or equal " + "to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, + dims)); + switch (dims) { + case 1: + ExpandBackward( + ctx, out_grad, reshape_dims_vec, reduce_dims_vec, in_grad); + break; + case 2: + ExpandBackward( + ctx, out_grad, reshape_dims_vec, reduce_dims_vec, in_grad); + break; + case 3: + ExpandBackward( + ctx, out_grad, reshape_dims_vec, reduce_dims_vec, in_grad); + break; + case 4: + ExpandBackward( + ctx, out_grad, reshape_dims_vec, reduce_dims_vec, in_grad); + break; + case 5: + ExpandBackward( + ctx, out_grad, reshape_dims_vec, reduce_dims_vec, in_grad); + break; + case 6: + ExpandBackward( + ctx, out_grad, reshape_dims_vec, reduce_dims_vec, in_grad); + break; + default: + PADDLE_THROW(pten::errors::InvalidArgument( + "Only support tensor with rank being between 1 and 6. But " + "received tensor's rank = %d.", + dims)); } - auto out_grad = EigenVector::Flatten(*in0); - auto& place = - *context.template device_context().eigen_device(); - EigenBroadcastGrad, T, Dims>::Eval( - place, x_grad, out_grad, reduce_dims, reshape_dims); } -}; +} -} // namespace pten \ No newline at end of file +} // namespace pten diff --git a/paddle/pten/kernels/impl/expand_kernel_impl.h b/paddle/pten/kernels/impl/expand_kernel_impl.h index 58a9aae027d32..d1ccf2921335b 100644 --- a/paddle/pten/kernels/impl/expand_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_kernel_impl.h @@ -17,216 +17,161 @@ #include #include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/operators/eigen/eigen_function.h" - +#include "paddle/pten/kernels/funcs/eigen/common.h" +#include "paddle/pten/kernels/funcs/eigen/eigen_function.h" #define MAX_RANK_SUPPORTED 6 namespace pten { +using Tensor = DenseTensor; +// template +// using EigenVector = pten::EigenVector; +// template +// using EigenTensor = pten::EigenTensor; +// using framework::To32BitIndex; -inline std::vector get_expand_shape( - const framework::ExecutionContext& ctx) { - if (ctx.HasInput("Shape")) { - auto* shape_tensor = ctx.Input("Shape"); - auto* shape_data = shape_tensor->data(); - framework::Tensor cpu_shape_tensor; - if (platform::is_gpu_place(shape_tensor->place())) { - paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(), - &cpu_shape_tensor); - shape_data = cpu_shape_tensor.data(); - } -#ifdef PADDLE_WITH_ASCEND_CL - if (platform::is_npu_place(shape_tensor->place())) { - paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(), - &cpu_shape_tensor); - shape_data = cpu_shape_tensor.data(); - } -#endif -#ifdef PADDLE_WITH_XPU - if (platform::is_xpu_place(shape_tensor->place())) { - paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(), - &cpu_shape_tensor); - shape_data = cpu_shape_tensor.data(); - } -#endif - auto vec_shape = - std::vector(shape_data, shape_data + shape_tensor->numel()); - return vec_shape; - } - - auto list_expand_shapes_tensor = - ctx.MultiInput("expand_shapes_tensor"); - if (list_expand_shapes_tensor.size() > 0) { - // get tensor from - std::vector vec_epxand_shape; - for (size_t i = 0; i < list_expand_shapes_tensor.size(); ++i) { - auto tensor = list_expand_shapes_tensor[i]; - if (platform::is_gpu_place(tensor->place())) { - framework::Tensor temp; - paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); - vec_epxand_shape.push_back(*temp.data()); - } -#ifdef PADDLE_WITH_ASCEND_CL - else if (platform::is_npu_place(tensor->place())) { // NOLINT - framework::Tensor temp; - paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); - vec_epxand_shape.push_back(*temp.data()); - } -#endif -#ifdef PADDLE_WITH_XPU - else if (platform::is_xpu_place(tensor->place())) { // NOLINT - framework::Tensor temp; - paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp); - vec_epxand_shape.push_back(*temp.data()); - } -#endif - else { // NOLINT - vec_epxand_shape.push_back(*tensor->data()); +template +void Expand(const Context& ctx, + const DenseTensor& x, + const ScalarArray& shape, + DenseTensor* out) { + auto in_dims = x.dims(); + auto expand_shape = shape.GetData(); + auto vec_in_dims = framework::vectorize(in_dims); + auto diff = expand_shape.size() - vec_in_dims.size(); + vec_in_dims.insert(vec_in_dims.begin(), diff, 1); + std::vector repeat_times(vec_in_dims.size()); + for (size_t i = 0; i < vec_in_dims.size(); ++i) { + PADDLE_ENFORCE_NE( + expand_shape[i], + 0, + pten::errors::InvalidArgument("The expanded size cannot be zero.")); + if (i < diff) { + PADDLE_ENFORCE_GT( + expand_shape[i], + 0, + pten::errors::InvalidArgument( + "The expanded size (%d) for non-existing dimensions must be " + "positive for expand_v2 op.", + expand_shape[i])); + repeat_times[i] = expand_shape[i]; + } else if (expand_shape[i] > 0) { + if (vec_in_dims[i] != 1) { + PADDLE_ENFORCE_EQ( + vec_in_dims[i], + expand_shape[i], + pten::errors::InvalidArgument( + "The value (%d) of the non-singleton dimension does not match" + " the corresponding value (%d) in shape for expand_v2 op.", + vec_in_dims[i], + expand_shape[i])); + repeat_times[i] = 1; + } else { + repeat_times[i] = expand_shape[i]; } + } else { + PADDLE_ENFORCE_EQ( + expand_shape[i], + -1, + pten::errors::InvalidArgument( + "When the value in shape is negative for expand_v2 op, " + "only -1 is supported, but the value received is %d.", + expand_shape[i])); + repeat_times[i] = 1; } - return vec_epxand_shape; - } else { - return ctx.Attr>("shape"); } -} -using Tensor = framework::Tensor; -template -using EigenVector = framework::EigenVector; -template -using EigenTensor = framework::EigenTensor; -using framework::To32BitIndex; - -template -class ExpandKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); - PADDLE_ENFORCE_GE( - rank, 1, - platform::errors::InvalidArgument( - "The rank of the input 'X' for expand_v2 op must be positive, " - "but the value received is %d.", - rank)); - PADDLE_ENFORCE_LE( - rank, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The rank of the input 'X' for expand_v2 op must be less than " - "or equal to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, rank)); - auto expand_shape = get_expand_shape(context); - auto shape_size = expand_shape.size(); - PADDLE_ENFORCE_GE( - shape_size, rank, - platform::errors::InvalidArgument( - "The number (%d) of elements of 'shape' for expand_v2 op must be " - "greater than or equal to the rank (%d) of the input 'X'.", - shape_size, rank)); - PADDLE_ENFORCE_LE( - shape_size, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The number (%d) of elements of 'shape' for expand_v2 op must be " - "less than or equal to %d.", - shape_size, MAX_RANK_SUPPORTED)); - rank = std::max(rank, static_cast(shape_size)); - switch (rank) { - case 1: - Expand<1>(context); - break; - case 2: - Expand<2>(context); - break; - case 3: - Expand<3>(context); - break; - case 4: - Expand<4>(context); - break; - case 5: - Expand<5>(context); - break; - case 6: - Expand<6>(context); - break; - } + auto* out0 = out; + Eigen::DSizes bcast_dims; + for (size_t i = 0; i < repeat_times.size(); ++i) { + bcast_dims[i] = repeat_times[i]; } - protected: - template - void Expand(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); - - auto in_dims = in0->dims(); - auto expand_shape = get_expand_shape(context); - auto vec_in_dims = framework::vectorize(in_dims); - auto diff = expand_shape.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - std::vector repeat_times(vec_in_dims.size()); - for (size_t i = 0; i < vec_in_dims.size(); ++i) { - PADDLE_ENFORCE_NE(expand_shape[i], 0, - platform::errors::InvalidArgument( - "The expanded size cannot be zero.")); - if (i < diff) { - PADDLE_ENFORCE_GT( - expand_shape[i], 0, - platform::errors::InvalidArgument( - "The expanded size (%d) for non-existing dimensions must be " - "positive for expand_v2 op.", - expand_shape[i])); - repeat_times[i] = expand_shape[i]; - } else if (expand_shape[i] > 0) { - if (vec_in_dims[i] != 1) { - PADDLE_ENFORCE_EQ( - vec_in_dims[i], expand_shape[i], - platform::errors::InvalidArgument( - "The value (%d) of the non-singleton dimension does not match" - " the corresponding value (%d) in shape for expand_v2 op.", - vec_in_dims[i], expand_shape[i])); - repeat_times[i] = 1; - } else { - repeat_times[i] = expand_shape[i]; - } - } else { - PADDLE_ENFORCE_EQ( - expand_shape[i], -1, - platform::errors::InvalidArgument( - "When the value in shape is negative for expand_v2 op, " - "only -1 is supported, but the value received is %d.", - expand_shape[i])); - repeat_times[i] = 1; - } - } + framework::DDim new_in_dims = framework::make_ddim(vec_in_dims); + framework::DDim out_dims(new_in_dims); + for (size_t i = 0; i < repeat_times.size(); ++i) { + out_dims[i] *= repeat_times[i]; + } - auto* out0 = context.Output("Out"); - Eigen::DSizes bcast_dims; - for (size_t i = 0; i < repeat_times.size(); ++i) { - bcast_dims[i] = repeat_times[i]; - } + out0->Resize(out_dims); + auto x0 = EigenTensor::From(x, new_in_dims); + ctx.template Alloc(out0); + out0->data(); - framework::DDim new_in_dims = framework::make_ddim(vec_in_dims); - framework::DDim out_dims(new_in_dims); - for (size_t i = 0; i < repeat_times.size(); ++i) { - out_dims[i] *= repeat_times[i]; - } + auto y = EigenTensor::From(*out0, out_dims); + auto& place = *ctx.eigen_device(); + // use 32-bit index to speed up + bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); + if (use_32bit_index) { + pten::funcs::EigenBroadcast, T, Rank>::Eval( + place, To32BitIndex(y), To32BitIndex(x0), bcast_dims); + } else { + pten::funcs::EigenBroadcast, T, Rank>::Eval( + place, y, x0, bcast_dims); + } +} - out0->Resize(out_dims); - auto x = EigenTensor::From(*in0, new_in_dims); - out0->mutable_data(context.GetPlace()); - auto y = EigenTensor::From(*out0, out_dims); - auto& place = - *context.template device_context().eigen_device(); - // use 32-bit index to speed up - bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); - if (use_32bit_index) { - EigenBroadcast, T, Rank>::Eval( - place, To32BitIndex(y), To32BitIndex(x), bcast_dims); - } else { - EigenBroadcast, T, Rank>::Eval(place, y, x, - bcast_dims); - } +template +void ExpandKernel(const Context& ctx, + const DenseTensor& x, + const ScalarArray& shape, + DenseTensor* out) { + auto rank = x.dims().size(); + PADDLE_ENFORCE_GE( + rank, + 1, + pten::errors::InvalidArgument( + "The rank of the input 'X' for expand_v2 op must be positive, " + "but the value received is %d.", + rank)); + PADDLE_ENFORCE_LE( + rank, + MAX_RANK_SUPPORTED, + pten::errors::InvalidArgument( + "The rank of the input 'X' for expand_v2 op must be less than " + "or equal to %d, but the value received is %d.", + MAX_RANK_SUPPORTED, + rank)); + auto expand_shape = shape.GetData(); + auto shape_size = expand_shape.size(); + PADDLE_ENFORCE_GE( + shape_size, + rank, + pten::errors::InvalidArgument( + "The number (%d) of elements of 'shape' for expand_v2 op must be " + "greater than or equal to the rank (%d) of the input 'X'.", + shape_size, + rank)); + PADDLE_ENFORCE_LE( + shape_size, + MAX_RANK_SUPPORTED, + pten::errors::InvalidArgument( + "The number (%d) of elements of 'shape' for expand_v2 op must be " + "less than or equal to %d.", + shape_size, + MAX_RANK_SUPPORTED)); + rank = std::max(rank, static_cast(shape_size)); + switch (rank) { + case 1: + Expand(ctx, x, shape, out); + break; + case 2: + Expand(ctx, x, shape, out); + break; + case 3: + Expand(ctx, x, shape, out); + break; + case 4: + Expand(ctx, x, shape, out); + break; + case 5: + Expand(ctx, x, shape, out); + break; + case 6: + Expand(ctx, x, shape, out); + break; } -}; +} -} // namespace pten \ No newline at end of file +} // namespace pten diff --git a/paddle/pten/ops/compat/expand_sig.cc b/paddle/pten/ops/compat/expand_sig.cc new file mode 100644 index 0000000000000..3c4d05c63965f --- /dev/null +++ b/paddle/pten/ops/compat/expand_sig.cc @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/pten/core/compat/op_utils.h" + +namespace pten { + +KernelSignature ExpandOpArgumentMapping(const ArgumentMappingContext& ctx) { + if (ctx.HasInput("Shape")) { + return KernelSignature("expand", {"X"}, {"Shape"}, {"Out"}); + } else if (ctx.InputSize("expand_shapes_tensor") > 0) { + return KernelSignature("expand", {"X"}, {"expand_shapes_tensor"}, {"Out"}); + } else { + return KernelSignature("expand", {"X"}, {"shape"}, {"Out"}); + } +} +} // namespace pten + +PT_REGISTER_BASE_KERNEL_NAME(expand_v2, expand); + +PT_REGISTER_ARG_MAPPING_FN(expand_v2, pten::ExpandOpArgumentMapping); From ed0c89d55b956ab7af9b57a4544baadcb1a5062d Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Fri, 11 Feb 2022 07:12:00 +0000 Subject: [PATCH 03/12] move expand_v2 to pten --- paddle/fluid/operators/expand_v2_op.cc | 3 +++ paddle/fluid/operators/expand_v2_op.h | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) mode change 100755 => 100644 paddle/fluid/operators/expand_v2_op.cc diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc old mode 100755 new mode 100644 index 0c5b0b62b89f3..901e073ccde23 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -16,6 +16,9 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/framework/op_registry.h" + +#define MAX_RANK_SUPPORTED 6 namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h index dd1625013444b..fabfe736a7bc8 100644 --- a/paddle/fluid/operators/expand_v2_op.h +++ b/paddle/fluid/operators/expand_v2_op.h @@ -91,7 +91,7 @@ inline std::vector get_expand_shape( return ctx.Attr>("shape"); } } - +/* using Tensor = framework::Tensor; template @@ -344,6 +344,6 @@ class ExpandV2GradKernel : public framework::OpKernel { place, x_grad, out_grad, reduce_dims, reshape_dims); } }; - +*/ } // namespace operators } // namespace paddle From c38cc1d6d013b1cb1a771c1616de98cc3e0c068d Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Fri, 11 Feb 2022 09:51:02 +0000 Subject: [PATCH 04/12] fix grad register --- paddle/fluid/operators/expand_v2_op.cc | 7 ++++++ paddle/fluid/operators/expand_v2_op_npu.cc | 1 + paddle/fluid/operators/expand_v2_op_xpu.cc | 1 + paddle/pten/kernels/impl/expand_kernel_impl.h | 9 ++++---- paddle/pten/ops/compat/expand_sig.cc | 22 +++++++++++++++++++ 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index 901e073ccde23..58188cb799f68 100644 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -247,6 +247,13 @@ class ExpandV2GradOp : public framework::OperatorWithKernel { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); } + + framework::KernelSignature GetExpectedPtenKernelArgs( + const framework::ExecutionContext& ctx) const override { + return framework::KernelSignature("expand_grad", + {framework::GradVarName("Out"), "X"}, + {"shape"}, {framework::GradVarName("X")}); + } }; template diff --git a/paddle/fluid/operators/expand_v2_op_npu.cc b/paddle/fluid/operators/expand_v2_op_npu.cc index a4807d1d26d7c..c6a64d3494dd0 100644 --- a/paddle/fluid/operators/expand_v2_op_npu.cc +++ b/paddle/fluid/operators/expand_v2_op_npu.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/expand_v2_op.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/expand_v2_op_xpu.cc b/paddle/fluid/operators/expand_v2_op_xpu.cc index d13a403175589..e2e0cff100173 100644 --- a/paddle/fluid/operators/expand_v2_op_xpu.cc +++ b/paddle/fluid/operators/expand_v2_op_xpu.cc @@ -13,6 +13,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU +#include "paddle/fluid/operators/expand_v2_op.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/pten/kernels/impl/expand_kernel_impl.h b/paddle/pten/kernels/impl/expand_kernel_impl.h index d1ccf2921335b..4c7d662217cb3 100644 --- a/paddle/pten/kernels/impl/expand_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_kernel_impl.h @@ -82,7 +82,6 @@ void Expand(const Context& ctx, } } - auto* out0 = out; Eigen::DSizes bcast_dims; for (size_t i = 0; i < repeat_times.size(); ++i) { bcast_dims[i] = repeat_times[i]; @@ -94,12 +93,12 @@ void Expand(const Context& ctx, out_dims[i] *= repeat_times[i]; } - out0->Resize(out_dims); + out->Resize(out_dims); auto x0 = EigenTensor::From(x, new_in_dims); - ctx.template Alloc(out0); - out0->data(); + ctx.template Alloc(out); + out->data(); - auto y = EigenTensor::From(*out0, out_dims); + auto y = EigenTensor::From(*out, out_dims); auto& place = *ctx.eigen_device(); // use 32-bit index to speed up bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); diff --git a/paddle/pten/ops/compat/expand_sig.cc b/paddle/pten/ops/compat/expand_sig.cc index 3c4d05c63965f..bea528516301d 100644 --- a/paddle/pten/ops/compat/expand_sig.cc +++ b/paddle/pten/ops/compat/expand_sig.cc @@ -25,8 +25,30 @@ KernelSignature ExpandOpArgumentMapping(const ArgumentMappingContext& ctx) { return KernelSignature("expand", {"X"}, {"shape"}, {"Out"}); } } + +/* +KernelSignature ExpandGradOpArgumentMapping(const ArgumentMappingContext& ctx) { + if (ctx.HasInput("Shape")) { + return KernelSignature("expand_grad", + {GradVarName("Out"), "X"}, + {"Shape"}, + {GradVarName("X")}); + } else if (ctx.InputSize("expand_shapes_tensor") > 0) { + return KernelSignature("expand_grad", + {GradVarName("Out"), "X"}, + {"expand_shapes_tensor"}, + {GradVarName("X")}); + } else { + return KernelSignature("expand_grad", + {GradVarName("Out"), "X"}, + {"shape"}, + {GradVarName("X")}); + } +} +*/ } // namespace pten PT_REGISTER_BASE_KERNEL_NAME(expand_v2, expand); +PT_REGISTER_BASE_KERNEL_NAME(expand_v2_grad, expand_grad); PT_REGISTER_ARG_MAPPING_FN(expand_v2, pten::ExpandOpArgumentMapping); From d82900560e35922a7d1aabd2578980728936f488 Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Mon, 14 Feb 2022 03:05:30 +0000 Subject: [PATCH 05/12] fix grad register --- paddle/fluid/operators/expand_v2_op.cc | 15 +- paddle/fluid/operators/expand_v2_op.h | 254 ------------------ paddle/pten/kernels/expand_grad_kernel.h | 11 +- paddle/pten/kernels/expand_kernel.h | 5 +- .../kernels/impl/expand_grad_kernel_impl.h | 4 +- paddle/pten/ops/compat/expand_sig.cc | 10 +- 6 files changed, 26 insertions(+), 273 deletions(-) diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index 58188cb799f68..4ba72589511ad 100644 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -248,12 +248,15 @@ class ExpandV2GradOp : public framework::OperatorWithKernel { tensor.place(), tensor.layout()); } - framework::KernelSignature GetExpectedPtenKernelArgs( - const framework::ExecutionContext& ctx) const override { - return framework::KernelSignature("expand_grad", - {framework::GradVarName("Out"), "X"}, - {"shape"}, {framework::GradVarName("X")}); - } + /* + framework::KernelSignature GetExpectedPtenKernelArgs( + const framework::ExecutionContext& ctx) const override { + return framework::KernelSignature("expand_grad", + {framework::GradVarName("Out"), "X"}, + {"shape"}, + {framework::GradVarName("X")}); + } + */ }; template diff --git a/paddle/fluid/operators/expand_v2_op.h b/paddle/fluid/operators/expand_v2_op.h index fabfe736a7bc8..158a9d1bc526b 100644 --- a/paddle/fluid/operators/expand_v2_op.h +++ b/paddle/fluid/operators/expand_v2_op.h @@ -91,259 +91,5 @@ inline std::vector get_expand_shape( return ctx.Attr>("shape"); } } -/* -using Tensor = framework::Tensor; -template -using EigenVector = framework::EigenVector; -template -using EigenTensor = framework::EigenTensor; -using framework::To32BitIndex; - -template -class ExpandV2Kernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto rank = context.Input("X")->dims().size(); - PADDLE_ENFORCE_GE( - rank, 1, - platform::errors::InvalidArgument( - "The rank of the input 'X' for expand_v2 op must be positive, " - "but the value received is %d.", - rank)); - PADDLE_ENFORCE_LE( - rank, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The rank of the input 'X' for expand_v2 op must be less than " - "or equal to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, rank)); - auto expand_shape = get_expand_shape(context); - auto shape_size = expand_shape.size(); - PADDLE_ENFORCE_GE( - shape_size, rank, - platform::errors::InvalidArgument( - "The number (%d) of elements of 'shape' for expand_v2 op must be " - "greater than or equal to the rank (%d) of the input 'X'.", - shape_size, rank)); - PADDLE_ENFORCE_LE( - shape_size, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The number (%d) of elements of 'shape' for expand_v2 op must be " - "less than or equal to %d.", - shape_size, MAX_RANK_SUPPORTED)); - rank = std::max(rank, static_cast(shape_size)); - switch (rank) { - case 1: - Expand<1>(context); - break; - case 2: - Expand<2>(context); - break; - case 3: - Expand<3>(context); - break; - case 4: - Expand<4>(context); - break; - case 5: - Expand<5>(context); - break; - case 6: - Expand<6>(context); - break; - } - } - - protected: - template - void Expand(const framework::ExecutionContext& context) const { - auto* in0 = context.Input("X"); - - auto in_dims = in0->dims(); - auto expand_shape = get_expand_shape(context); - auto vec_in_dims = framework::vectorize(in_dims); - auto diff = expand_shape.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - std::vector repeat_times(vec_in_dims.size()); - for (size_t i = 0; i < vec_in_dims.size(); ++i) { - PADDLE_ENFORCE_NE(expand_shape[i], 0, - platform::errors::InvalidArgument( - "The expanded size cannot be zero.")); - if (i < diff) { - PADDLE_ENFORCE_GT( - expand_shape[i], 0, - platform::errors::InvalidArgument( - "The expanded size (%d) for non-existing dimensions must be " - "positive for expand_v2 op.", - expand_shape[i])); - repeat_times[i] = expand_shape[i]; - } else if (expand_shape[i] > 0) { - if (vec_in_dims[i] != 1) { - PADDLE_ENFORCE_EQ( - vec_in_dims[i], expand_shape[i], - platform::errors::InvalidArgument( - "The value (%d) of the non-singleton dimension does not match" - " the corresponding value (%d) in shape for expand_v2 op.", - vec_in_dims[i], expand_shape[i])); - repeat_times[i] = 1; - } else { - repeat_times[i] = expand_shape[i]; - } - } else { - PADDLE_ENFORCE_EQ( - expand_shape[i], -1, - platform::errors::InvalidArgument( - "When the value in shape is negative for expand_v2 op, " - "only -1 is supported, but the value received is %d.", - expand_shape[i])); - repeat_times[i] = 1; - } - } - - auto* out0 = context.Output("Out"); - Eigen::DSizes bcast_dims; - for (size_t i = 0; i < repeat_times.size(); ++i) { - bcast_dims[i] = repeat_times[i]; - } - - framework::DDim new_in_dims = framework::make_ddim(vec_in_dims); - framework::DDim out_dims(new_in_dims); - for (size_t i = 0; i < repeat_times.size(); ++i) { - out_dims[i] *= repeat_times[i]; - } - - out0->Resize(out_dims); - auto x = EigenTensor::From(*in0, new_in_dims); - out0->mutable_data(context.GetPlace()); - auto y = EigenTensor::From(*out0, out_dims); - auto& place = - *context.template device_context().eigen_device(); - // use 32-bit index to speed up - bool use_32bit_index = y.size() < Eigen::NumTraits::highest(); - if (use_32bit_index) { - EigenBroadcast, T, Rank>::Eval( - place, To32BitIndex(y), To32BitIndex(x), bcast_dims); - } else { - EigenBroadcast, T, Rank>::Eval(place, y, x, - bcast_dims); - } - } -}; - -template -class ExpandV2GradKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - auto* in0 = context.Input("X"); - auto expand_shape = get_expand_shape(context); - auto x_dims = in0->dims(); - auto vec_in_dims = framework::vectorize(x_dims); - auto diff = expand_shape.size() - vec_in_dims.size(); - vec_in_dims.insert(vec_in_dims.begin(), diff, 1); - // 1. reshape_dims_vec is the broadcast parameter. - // 2. reduce_dims_vec is the dimension parameter to compute gradients. For - // each dimension expanded, the gradients should be summed to original - // size. - std::vector repeat_times(vec_in_dims.size()); - for (size_t i = 0; i < vec_in_dims.size(); ++i) { - if (expand_shape[i] < 0) { - repeat_times[i] = 1; - } else { - repeat_times[i] = expand_shape[i] / vec_in_dims[i]; - } - } - std::vector reshape_dims_vec; - std::vector reduce_dims_vec; - for (size_t i = 0; i < repeat_times.size(); ++i) { - reduce_dims_vec.push_back(reshape_dims_vec.size()); - reshape_dims_vec.push_back(repeat_times[i]); - reshape_dims_vec.push_back(vec_in_dims[i]); - } - - int dims = reduce_dims_vec.size(); - - bool just_copy = true; - for (size_t i = 0; i < repeat_times.size(); i++) { - if (repeat_times[i] != 1) { - just_copy = false; - break; - } - } - // no need reduce, just copy - if (just_copy) { - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); - out0->mutable_data(context.GetPlace()); - framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), - out0); - } else { - PADDLE_ENFORCE_GE(dims, 1, - platform::errors::InvalidArgument( - "The rank of the input 'Out@GRAD' for " - "expand_v2_grad op must be greater than or " - "equal to 1, but the value received is %d.", - dims)); - PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, - platform::errors::InvalidArgument( - "The rank of the input 'Out@GRAD' for " - "expand_v2_grad op must be less than or equal " - "to %d, but the value received is %d.", - MAX_RANK_SUPPORTED, dims)); - switch (dims) { - case 1: - ExpandBackward<1>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 2: - ExpandBackward<2>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 3: - ExpandBackward<3>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 4: - ExpandBackward<4>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 5: - ExpandBackward<5>(context, reshape_dims_vec, reduce_dims_vec); - break; - case 6: - ExpandBackward<6>(context, reshape_dims_vec, reduce_dims_vec); - break; - default: - PADDLE_THROW(platform::errors::InvalidArgument( - "Only support tensor with rank being between 1 and 6. But " - "received tensor's rank = %d.", - dims)); - } - } - } - - protected: - template - void ExpandBackward(const framework::ExecutionContext& context, - const std::vector& reshape_dims_vec, - const std::vector& reduce_dims_vec) const { - size_t reshape_size = reshape_dims_vec.size(); - size_t reduce_size = reduce_dims_vec.size(); - auto* in0 = context.Input(framework::GradVarName("Out")); - auto* out0 = context.Output(framework::GradVarName("X")); - out0->mutable_data(context.GetPlace()); - auto x_grad = EigenVector::Flatten(*out0); - Eigen::DSizes reshape_dims; - for (size_t i = 0; i < reshape_size; ++i) { - reshape_dims[i] = reshape_dims_vec[i]; - } - Eigen::DSizes reduce_dims; - for (size_t i = 0; i < reduce_size; ++i) { - reduce_dims[i] = reduce_dims_vec[i]; - } - auto out_grad = EigenVector::Flatten(*in0); - auto& place = - *context.template device_context().eigen_device(); - EigenBroadcastGrad, T, Dims>::Eval( - place, x_grad, out_grad, reduce_dims, reshape_dims); - } -}; -*/ } // namespace operators } // namespace paddle diff --git a/paddle/pten/kernels/expand_grad_kernel.h b/paddle/pten/kernels/expand_grad_kernel.h index a4ca5da0606af..8bcb599cc9bc2 100644 --- a/paddle/pten/kernels/expand_grad_kernel.h +++ b/paddle/pten/kernels/expand_grad_kernel.h @@ -14,14 +14,17 @@ #pragma once +#include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/device_context.h" namespace pten { -template +template void ExpandGradKernel(const Context& ctx, - const DenseTensor& out_grad, const DenseTensor& x, - DenseTensor* x_grad); + const DenseTensor& out_grad, + const ScalarArray& shape, + DenseTensor* in_grad); -} // namespace pten \ No newline at end of file +} // namespace pten diff --git a/paddle/pten/kernels/expand_kernel.h b/paddle/pten/kernels/expand_kernel.h index c63876720d982..91bea8c07e40b 100644 --- a/paddle/pten/kernels/expand_kernel.h +++ b/paddle/pten/kernels/expand_kernel.h @@ -16,13 +16,14 @@ #include "paddle/pten/common/scalar_array.h" #include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/device_context.h" namespace pten { template -void ExpandKernel(const Context& ctx, +void ExpandKernel(const Context& ctx, const DenseTensor& x, const ScalarArray& shape, DenseTensor* out); -} // namepsace pten \ No newline at end of file +} // namepsace pten diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index 131fb64dd6718..9e18c8e44759a 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -50,12 +50,12 @@ void ExpandBackward(const Context& ctx, template void ExpandGradKernel(const Context& ctx, - const DenseTensor& out_grad, const DenseTensor& x, + const DenseTensor& out_grad, const ScalarArray& shape, DenseTensor* in_grad) { auto expand_shape = shape.GetData(); - auto x_dims = out_grad.dims(); + auto x_dims = x.dims(); auto vec_in_dims = framework::vectorize(x_dims); auto diff = expand_shape.size() - vec_in_dims.size(); vec_in_dims.insert(vec_in_dims.begin(), diff, 1); diff --git a/paddle/pten/ops/compat/expand_sig.cc b/paddle/pten/ops/compat/expand_sig.cc index bea528516301d..a04052cdac696 100644 --- a/paddle/pten/ops/compat/expand_sig.cc +++ b/paddle/pten/ops/compat/expand_sig.cc @@ -26,29 +26,29 @@ KernelSignature ExpandOpArgumentMapping(const ArgumentMappingContext& ctx) { } } -/* KernelSignature ExpandGradOpArgumentMapping(const ArgumentMappingContext& ctx) { if (ctx.HasInput("Shape")) { return KernelSignature("expand_grad", - {GradVarName("Out"), "X"}, + {"X", GradVarName("Out")}, {"Shape"}, {GradVarName("X")}); } else if (ctx.InputSize("expand_shapes_tensor") > 0) { return KernelSignature("expand_grad", - {GradVarName("Out"), "X"}, + {"X", GradVarName("Out")}, {"expand_shapes_tensor"}, {GradVarName("X")}); } else { return KernelSignature("expand_grad", - {GradVarName("Out"), "X"}, + {"X", GradVarName("Out")}, {"shape"}, {GradVarName("X")}); } } -*/ + } // namespace pten PT_REGISTER_BASE_KERNEL_NAME(expand_v2, expand); PT_REGISTER_BASE_KERNEL_NAME(expand_v2_grad, expand_grad); PT_REGISTER_ARG_MAPPING_FN(expand_v2, pten::ExpandOpArgumentMapping); +PT_REGISTER_ARG_MAPPING_FN(expand_v2_grad, pten::ExpandGradOpArgumentMapping); From 360ce935bc5854568b12f0229c3c957b219fde71 Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Mon, 14 Feb 2022 07:00:19 +0000 Subject: [PATCH 06/12] fix tensorcpry --- paddle/fluid/operators/expand_v2_op.cc | 10 ---------- paddle/pten/kernels/impl/expand_grad_kernel_impl.h | 10 +--------- paddle/pten/kernels/impl/expand_kernel_impl.h | 7 ------- 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index 4ba72589511ad..901e073ccde23 100644 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -247,16 +247,6 @@ class ExpandV2GradOp : public framework::OperatorWithKernel { return framework::OpKernelType(expected_kernel_type.data_type_, tensor.place(), tensor.layout()); } - - /* - framework::KernelSignature GetExpectedPtenKernelArgs( - const framework::ExecutionContext& ctx) const override { - return framework::KernelSignature("expand_grad", - {framework::GradVarName("Out"), "X"}, - {"shape"}, - {framework::GradVarName("X")}); - } - */ }; template diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index 9e18c8e44759a..f29dd2cfe223b 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -27,9 +27,6 @@ void ExpandBackward(const Context& ctx, DenseTensor* in_grad) { size_t reshape_size = reshape_dims_vec.size(); size_t reduce_size = reduce_dims_vec.size(); - // auto* in0 = context.Input(framework::GradVarName("Out")); - // auto* out0 = context.Output(framework::GradVarName("X")); - // out0->mutable_data(context.GetPlace()); ctx.template Alloc(in_grad); in_grad->data(); @@ -90,12 +87,7 @@ void ExpandGradKernel(const Context& ctx, } // no need reduce, just copy if (just_copy) { - // in_grad->mutable_data(ctx.GetPlace()); - ctx.template Alloc(in_grad); - in_grad->data(); - - // framework::TensorCopy(*out_grad, ctx.GetPlace(), ctx.device_context(), - // in_grad); + pten::Copy(ctx, x, false, in_grad); } else { PADDLE_ENFORCE_GE(dims, 1, diff --git a/paddle/pten/kernels/impl/expand_kernel_impl.h b/paddle/pten/kernels/impl/expand_kernel_impl.h index 4c7d662217cb3..14f2b8ad243fc 100644 --- a/paddle/pten/kernels/impl/expand_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_kernel_impl.h @@ -23,13 +23,6 @@ namespace pten { using Tensor = DenseTensor; -// template -// using EigenVector = pten::EigenVector; -// template -// using EigenTensor = pten::EigenTensor; -// using framework::To32BitIndex; template void Expand(const Context& ctx, From 9ca3c872e593a182e5e0447a2179921b7ac4c781 Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Mon, 14 Feb 2022 07:18:52 +0000 Subject: [PATCH 07/12] fix tensorcopy --- paddle/pten/kernels/impl/expand_grad_kernel_impl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index f29dd2cfe223b..37630dff87e1c 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/pten/kernels/copy_kernel.h" #include "paddle/pten/kernels/funcs/eigen/common.h" #include "paddle/pten/kernels/funcs/eigen/eigen_function.h" #include "paddle/pten/kernels/impl/expand_kernel_impl.h" From f7b88cf0ea9696389bcb88bd2a56f91b2bc45aec Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Mon, 14 Feb 2022 08:56:39 +0000 Subject: [PATCH 08/12] fix tensorcopy --- paddle/pten/kernels/impl/expand_grad_kernel_impl.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index 37630dff87e1c..c86616ba22d14 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -88,6 +88,8 @@ void ExpandGradKernel(const Context& ctx, } // no need reduce, just copy if (just_copy) { + ctx.template Alloc(in_grad); + in_grad->data(); pten::Copy(ctx, x, false, in_grad); } else { PADDLE_ENFORCE_GE(dims, From e26f7c1253919cf94975c878b2e7064870d4fa9e Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Mon, 14 Feb 2022 11:32:34 +0000 Subject: [PATCH 09/12] fix tensorcopy --- paddle/pten/kernels/impl/expand_grad_kernel_impl.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index c86616ba22d14..010c71b956af7 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -88,9 +88,8 @@ void ExpandGradKernel(const Context& ctx, } // no need reduce, just copy if (just_copy) { - ctx.template Alloc(in_grad); - in_grad->data(); - pten::Copy(ctx, x, false, in_grad); + auto* in_grad0 = ctx.template Alloc(in_grad); + pten::Copy(ctx, x, false, in_grad0); } else { PADDLE_ENFORCE_GE(dims, 1, From 42cbee0ca67e58d780d61787b102c1ae8719fb4b Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Mon, 14 Feb 2022 11:39:35 +0000 Subject: [PATCH 10/12] fix tensorcopy --- paddle/pten/kernels/impl/expand_grad_kernel_impl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index 010c71b956af7..7baa23c410a23 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -88,8 +88,8 @@ void ExpandGradKernel(const Context& ctx, } // no need reduce, just copy if (just_copy) { - auto* in_grad0 = ctx.template Alloc(in_grad); - pten::Copy(ctx, x, false, in_grad0); + in_grad->mutable_data(ctx.GetPlace()); + pten::Copy(ctx, x, false, in_grad); } else { PADDLE_ENFORCE_GE(dims, 1, From 374d3dad2eeafc6dce71167020aa246ae44a7f28 Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Mon, 14 Feb 2022 13:08:37 +0000 Subject: [PATCH 11/12] fix ci --- paddle/pten/kernels/impl/expand_grad_kernel_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index 7baa23c410a23..0913b11a325e1 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -88,7 +88,7 @@ void ExpandGradKernel(const Context& ctx, } // no need reduce, just copy if (just_copy) { - in_grad->mutable_data(ctx.GetPlace()); + ctx.template Alloc(in_grad); pten::Copy(ctx, x, false, in_grad); } else { PADDLE_ENFORCE_GE(dims, From a5c89ee5c34ffcf3a606a43937b2f0f02a9f1d8b Mon Sep 17 00:00:00 2001 From: linjieccc <623543001@qq.com> Date: Mon, 14 Feb 2022 13:30:55 +0000 Subject: [PATCH 12/12] fix tensorcopy --- paddle/pten/kernels/impl/expand_grad_kernel_impl.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h index 0913b11a325e1..05ccf2e00da23 100644 --- a/paddle/pten/kernels/impl/expand_grad_kernel_impl.h +++ b/paddle/pten/kernels/impl/expand_grad_kernel_impl.h @@ -88,8 +88,7 @@ void ExpandGradKernel(const Context& ctx, } // no need reduce, just copy if (just_copy) { - ctx.template Alloc(in_grad); - pten::Copy(ctx, x, false, in_grad); + pten::Copy(ctx, out_grad, false, in_grad); } else { PADDLE_ENFORCE_GE(dims, 1,