From d18dfc07d5051d7b767978659959c1f59e6ac2b1 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Mon, 14 Feb 2022 07:40:55 +0000 Subject: [PATCH 01/11] Support custom implement for C++ API --- paddle/pten/api/include/manual_api.h | 6 -- paddle/pten/api/lib/CMakeLists.txt | 5 +- paddle/pten/api/lib/api_invoke_impl.cc | 102 ++++++++++++++++++ paddle/pten/api/lib/api_invoke_impl.h | 29 +++++ paddle/pten/api/lib/manual_api.cc | 65 ----------- paddle/pten/tests/api/test_split_api.cc | 1 - python/paddle/utils/code_gen/api.yaml | 5 + python/paddle/utils/code_gen/api_gen.py | 1 + .../paddle/utils/code_gen/backward_api_gen.py | 1 + 9 files changed, 141 insertions(+), 74 deletions(-) create mode 100644 paddle/pten/api/lib/api_invoke_impl.cc create mode 100644 paddle/pten/api/lib/api_invoke_impl.h diff --git a/paddle/pten/api/include/manual_api.h b/paddle/pten/api/include/manual_api.h index 942bbe9704572..7d6c2364c5ede 100644 --- a/paddle/pten/api/include/manual_api.h +++ b/paddle/pten/api/include/manual_api.h @@ -30,11 +30,5 @@ namespace experimental { // TODO(chenweihang): Replace backend by place when place is ready PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking); -// TODO(chentianyu03): Split API has extra logic to calculate the outputs size, -// api_gen do not support -PADDLE_API std::vector split(const Tensor& x, - const ScalarArray& num_or_sections, - const Scalar& axis); - } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/CMakeLists.txt b/paddle/pten/api/lib/CMakeLists.txt index 2cf737eb8b17f..b16d4d3ddda9e 100644 --- a/paddle/pten/api/lib/CMakeLists.txt +++ b/paddle/pten/api/lib/CMakeLists.txt @@ -82,8 +82,9 @@ add_custom_command( VERBATIM) cc_library(pten_data_transform SRCS data_transform.cc DEPS pten_tensor transfer_layout_kernel cast_kernel data_device_transform) +cc_library(api_invoke_impl SRCS api_invoke_impl.cc DEPS pten_tensor pten kernel_dispatch pten_data_transform) cc_library(manual_api SRCS manual_api.cc DEPS pten_tensor pten kernel_dispatch) cc_library(sparse_api SRCS sparse_api.cc DEPS pten_tensor pten kernel_dispatch pten_data_transform) -cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch pten_data_transform) -cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_data_transform pten_function_api) +cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch pten_data_transform api_invoke_impl) +cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_data_transform pten_function_api api_invoke_impl) cc_library(wrapped_infermeta SRCS ${wrapped_infermeta_source_file} DEPS pten) diff --git a/paddle/pten/api/lib/api_invoke_impl.cc b/paddle/pten/api/lib/api_invoke_impl.cc new file mode 100644 index 0000000000000..afdabdff6e0c5 --- /dev/null +++ b/paddle/pten/api/lib/api_invoke_impl.cc @@ -0,0 +1,102 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/pten/api/lib/api_invoke_impl.h" + +#include "paddle/pten/api/lib/api_registry.h" +#include "paddle/pten/api/lib/api_utils.h" +#include "paddle/pten/api/lib/data_transform.h" +#include "paddle/pten/api/lib/kernel_dispatch.h" +#include "paddle/pten/api/lib/utils/storage.h" +#include "paddle/pten/common/backend.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/core/meta_tensor.h" +#include "paddle/pten/infermeta/binary.h" +#include "paddle/pten/infermeta/multiary.h" +#include "paddle/pten/infermeta/nullary.h" +#include "paddle/pten/infermeta/unary.h" + +#include "glog/logging.h" + +namespace paddle { +namespace experimental { + +PADDLE_API std::vector split_impl(const Tensor& x, + const ScalarArray& num_or_sections, + const Scalar& axis) { + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; + + if (kernel_backend == Backend::UNDEFINED || + kernel_layout == DataLayout::UNDEFINED || + kernel_data_type == DataType::UNDEFINED) { + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + if (kernel_backend == Backend::UNDEFINED) { + kernel_backend = kernel_key.backend(); + } + if (kernel_layout == DataLayout::UNDEFINED) { + kernel_layout = kernel_key.layout(); + } + if (kernel_data_type == DataType::UNDEFINED) { + kernel_data_type = kernel_key.dtype(); + } + } + + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "split", {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << "split API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + VLOG(6) << "split API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); + + auto dense_x = PrepareData(x, kernel.InputAt(0), {}); + + // Calculate the number of out tensors + size_t out_number; + if (num_or_sections.GetData().size() == 1) { + out_number = num_or_sections.GetData()[0]; + } else { + out_number = num_or_sections.GetData().size(); + } + + std::vector out; + auto dense_outs = SetKernelOutput(out_number, kernel_backend, &out); + std::vector meta_outs; + for (size_t i = 0; i < out_number; ++i) { + meta_outs.push_back(dense_outs[i]); + } + + pten::SplitInferMeta( + MakeMetaTensor(*dense_x), num_or_sections, axis, &meta_outs); + + using kernel_signature = void (*)(const platform::DeviceContext&, + const pten::DenseTensor&, + const pten::ScalarArray&, + const pten::Scalar&, + std::vector&); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + (*kernel_fn)(*dev_ctx, + *dense_x, + pten::ScalarArray(num_or_sections), + pten::Scalar(axis), + dense_outs); + + return out; +} + +} // namespace experimental +} // namespace paddle \ No newline at end of file diff --git a/paddle/pten/api/lib/api_invoke_impl.h b/paddle/pten/api/lib/api_invoke_impl.h new file mode 100644 index 0000000000000..1c66c69ff46db --- /dev/null +++ b/paddle/pten/api/lib/api_invoke_impl.h @@ -0,0 +1,29 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/common/scalar.h" +#include "paddle/pten/common/scalar_array.h" + +namespace paddle { +namespace experimental { + +PADDLE_API std::vector split_impl(const Tensor& x, + const ScalarArray& num_or_sections, + const Scalar& axis); + +} // namespace experimental +} // namespace paddle diff --git a/paddle/pten/api/lib/manual_api.cc b/paddle/pten/api/lib/manual_api.cc index 667bd177ee1f6..3b3cdce8f2ec3 100644 --- a/paddle/pten/api/lib/manual_api.cc +++ b/paddle/pten/api/lib/manual_api.cc @@ -78,71 +78,6 @@ PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { return out; } -PADDLE_API std::vector split(const Tensor& x, - const ScalarArray& num_or_sections, - const Scalar& axis) { - Backend kernel_backend = Backend::UNDEFINED; - DataLayout kernel_layout = DataLayout::UNDEFINED; - DataType kernel_data_type = DataType::UNDEFINED; - - if (kernel_backend == Backend::UNDEFINED || - kernel_layout == DataLayout::UNDEFINED || - kernel_data_type == DataType::UNDEFINED) { - auto kernel_key_set = ParseKernelKeyByInputArgs(x); - auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); - if (kernel_backend == Backend::UNDEFINED) { - kernel_backend = kernel_key.backend(); - } - if (kernel_layout == DataLayout::UNDEFINED) { - kernel_layout = kernel_key.layout(); - } - if (kernel_data_type == DataType::UNDEFINED) { - kernel_data_type = kernel_key.dtype(); - } - } - - auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( - "split", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "split API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "split API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto dense_x = PrepareData(x, kernel.InputAt(0), {}); - - // Calculate the number of out tensors - size_t out_number; - if (num_or_sections.GetData().size() == 1) { - out_number = num_or_sections.GetData()[0]; - } else { - out_number = num_or_sections.GetData().size(); - } - - std::vector out; - auto dense_outs = SetKernelOutput(out_number, kernel_backend, &out); - std::vector meta_outs; - for (size_t i = 0; i < out_number; ++i) { - meta_outs.push_back(dense_outs[i]); - } - - pten::SplitInferMeta( - MakeMetaTensor(*dense_x), num_or_sections, axis, &meta_outs); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const pten::DenseTensor&, - const pten::ScalarArray&, - const pten::Scalar&, - std::vector&); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, - *dense_x, - pten::ScalarArray(num_or_sections), - pten::Scalar(axis), - dense_outs); - - return out; -} } // namespace experimental } // namespace paddle diff --git a/paddle/pten/tests/api/test_split_api.cc b/paddle/pten/tests/api/test_split_api.cc index ac139832aa008..738137f16a5db 100644 --- a/paddle/pten/tests/api/test_split_api.cc +++ b/paddle/pten/tests/api/test_split_api.cc @@ -17,7 +17,6 @@ #include "paddle/pten/api/include/api.h" -#include "paddle/pten/api/include/manual_api.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 6f64eaadc893a..03e60667c7fff 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -165,6 +165,11 @@ kernel : func : sign +- api : split + args : (const Tensor& x, const ScalarArray& num_or_sections, const Scalar& axis) + output : std::vector + invoke : split_impl(x, num_or_sections, axis) + - api : subtract args : (const Tensor& x, const Tensor& y) output : Tensor diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 629d68230a111..5bd0ed5d7d95c 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -78,6 +78,7 @@ def source_include(header_file_path): #include "glog/logging.h" +#include "paddle/pten/api/lib/api_invoke_impl.h" #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_utils.h" #include "paddle/pten/api/lib/data_transform.h" diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index 96fabfc3db213..538f6c2170a70 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -124,6 +124,7 @@ def source_include(header_file_path): #include "glog/logging.h" +#include "paddle/pten/api/lib/api_invoke_impl.h" #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_utils.h" #include "paddle/pten/api/lib/data_transform.h" From 046b283ae1e186902f2787194261ed2630f039d9 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Mon, 14 Feb 2022 07:49:17 +0000 Subject: [PATCH 02/11] rename api_invoke_impl to api_custom_impl --- paddle/pten/api/lib/CMakeLists.txt | 6 +++--- .../pten/api/lib/{api_invoke_impl.cc => api_custom_impl.cc} | 2 +- .../pten/api/lib/{api_invoke_impl.h => api_custom_impl.h} | 0 python/paddle/utils/code_gen/api_gen.py | 2 +- python/paddle/utils/code_gen/backward_api_gen.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) rename paddle/pten/api/lib/{api_invoke_impl.cc => api_custom_impl.cc} (98%) rename paddle/pten/api/lib/{api_invoke_impl.h => api_custom_impl.h} (100%) diff --git a/paddle/pten/api/lib/CMakeLists.txt b/paddle/pten/api/lib/CMakeLists.txt index b16d4d3ddda9e..e53d9e3f26968 100644 --- a/paddle/pten/api/lib/CMakeLists.txt +++ b/paddle/pten/api/lib/CMakeLists.txt @@ -82,9 +82,9 @@ add_custom_command( VERBATIM) cc_library(pten_data_transform SRCS data_transform.cc DEPS pten_tensor transfer_layout_kernel cast_kernel data_device_transform) -cc_library(api_invoke_impl SRCS api_invoke_impl.cc DEPS pten_tensor pten kernel_dispatch pten_data_transform) +cc_library(api_custom_impl SRCS api_custom_impl.cc DEPS pten_tensor pten kernel_dispatch pten_data_transform) cc_library(manual_api SRCS manual_api.cc DEPS pten_tensor pten kernel_dispatch) cc_library(sparse_api SRCS sparse_api.cc DEPS pten_tensor pten kernel_dispatch pten_data_transform) -cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch pten_data_transform api_invoke_impl) -cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_data_transform pten_function_api api_invoke_impl) +cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch pten_data_transform api_custom_impl) +cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_data_transform pten_function_api api_custom_impl) cc_library(wrapped_infermeta SRCS ${wrapped_infermeta_source_file} DEPS pten) diff --git a/paddle/pten/api/lib/api_invoke_impl.cc b/paddle/pten/api/lib/api_custom_impl.cc similarity index 98% rename from paddle/pten/api/lib/api_invoke_impl.cc rename to paddle/pten/api/lib/api_custom_impl.cc index afdabdff6e0c5..d897c58fc4607 100644 --- a/paddle/pten/api/lib/api_invoke_impl.cc +++ b/paddle/pten/api/lib/api_custom_impl.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/pten/api/lib/api_invoke_impl.h" +#include "paddle/pten/api/lib/api_custom_impl.h" #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_utils.h" diff --git a/paddle/pten/api/lib/api_invoke_impl.h b/paddle/pten/api/lib/api_custom_impl.h similarity index 100% rename from paddle/pten/api/lib/api_invoke_impl.h rename to paddle/pten/api/lib/api_custom_impl.h diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 5bd0ed5d7d95c..61c389eec2134 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -78,7 +78,7 @@ def source_include(header_file_path): #include "glog/logging.h" -#include "paddle/pten/api/lib/api_invoke_impl.h" +#include "paddle/pten/api/lib/api_custom_impl.h" #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_utils.h" #include "paddle/pten/api/lib/data_transform.h" diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index 538f6c2170a70..7a206ba9f310d 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -124,7 +124,7 @@ def source_include(header_file_path): #include "glog/logging.h" -#include "paddle/pten/api/lib/api_invoke_impl.h" +#include "paddle/pten/api/lib/api_custom_impl.h" #include "paddle/pten/api/lib/api_registry.h" #include "paddle/pten/api/lib/api_utils.h" #include "paddle/pten/api/lib/data_transform.h" From 999a49aa76ff9855bc32f1056ba03cf3195bce42 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 15 Feb 2022 09:11:58 +0000 Subject: [PATCH 03/11] remove manual_api --- paddle/pten/api/CMakeLists.txt | 2 +- paddle/pten/api/all.h | 1 - paddle/pten/api/include/manual_api.h | 34 -------- paddle/pten/api/lib/CMakeLists.txt | 7 +- paddle/pten/api/lib/api_custom_impl.cc | 72 ++++++++++------ paddle/pten/api/lib/api_custom_impl.h | 12 ++- paddle/pten/api/lib/api_declare.h | 1 - paddle/pten/api/lib/manual_api.cc | 84 ------------------- paddle/pten/api/lib/tensor.cc | 2 +- paddle/pten/core/infermeta_utils.cc | 10 +++ paddle/pten/core/infermeta_utils.h | 16 ++++ paddle/pten/infermeta/unary.cc | 10 +++ paddle/pten/infermeta/unary.h | 5 ++ paddle/pten/tests/api/CMakeLists.txt | 4 +- paddle/pten/tests/api/test_data_transform.cc | 1 - paddle/pten/tests/api/test_to_api.cc | 2 +- .../pten/tests/kernels/test_split_dev_api.cc | 1 - python/paddle/utils/code_gen/api.yaml | 5 ++ 18 files changed, 110 insertions(+), 159 deletions(-) delete mode 100644 paddle/pten/api/include/manual_api.h delete mode 100644 paddle/pten/api/lib/manual_api.cc diff --git a/paddle/pten/api/CMakeLists.txt b/paddle/pten/api/CMakeLists.txt index a993cb3ff8041..57cd596360489 100644 --- a/paddle/pten/api/CMakeLists.txt +++ b/paddle/pten/api/CMakeLists.txt @@ -1,2 +1,2 @@ add_subdirectory(lib) -cc_library(pten_api SRCS all.cc DEPS pten_function_api pten_bw_function_api manual_api sparse_api) +cc_library(pten_api SRCS all.cc DEPS pten_function_api pten_bw_function_api sparse_api) diff --git a/paddle/pten/api/all.h b/paddle/pten/api/all.h index a327bd998cb76..7221e1780506f 100644 --- a/paddle/pten/api/all.h +++ b/paddle/pten/api/all.h @@ -26,7 +26,6 @@ limitations under the License. */ // new pten apis #include "paddle/pten/api/include/api.h" -#include "paddle/pten/api/include/manual_api.h" #include "paddle/pten/api/include/sparse_api.h" #include "paddle/pten/api/include/tensor.h" diff --git a/paddle/pten/api/include/manual_api.h b/paddle/pten/api/include/manual_api.h deleted file mode 100644 index 7d6c2364c5ede..0000000000000 --- a/paddle/pten/api/include/manual_api.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/pten/api/include/tensor.h" -#include "paddle/pten/common/backend.h" -#include "paddle/pten/common/scalar.h" -#include "paddle/pten/common/scalar_array.h" - -/** - * This file stores some special APIs that are implemented manually - * or difficult to automatically generated. - */ - -namespace paddle { -namespace experimental { - -// TODO(chenweihang): Replace backend by place when place is ready -PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking); - -} // namespace experimental -} // namespace paddle diff --git a/paddle/pten/api/lib/CMakeLists.txt b/paddle/pten/api/lib/CMakeLists.txt index e53d9e3f26968..37723038c5993 100644 --- a/paddle/pten/api/lib/CMakeLists.txt +++ b/paddle/pten/api/lib/CMakeLists.txt @@ -3,11 +3,11 @@ add_subdirectory(utils) cc_library(ext_compat_utils SRCS ext_compat_utils.cc DEPS place) if (WITH_GPU) - nv_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils pten_enforce manual_api pten_function_api) + nv_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils pten_enforce pten_function_api) elseif (WITH_ROCM) - hip_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils pten_enforce manual_api pten_function_api) + hip_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils pten_enforce pten_function_api) else() - cc_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils pten_enforce manual_api pten_function_api) + cc_library(pten_tensor SRCS tensor.cc DEPS tensor_base dense_tensor pten_api_utils ext_compat_utils pten_enforce pten_function_api) endif() cc_library(kernel_dispatch SRCS kernel_dispatch.cc DEPS pten_tensor pten_context kernel_factory) @@ -83,7 +83,6 @@ add_custom_command( cc_library(pten_data_transform SRCS data_transform.cc DEPS pten_tensor transfer_layout_kernel cast_kernel data_device_transform) cc_library(api_custom_impl SRCS api_custom_impl.cc DEPS pten_tensor pten kernel_dispatch pten_data_transform) -cc_library(manual_api SRCS manual_api.cc DEPS pten_tensor pten kernel_dispatch) cc_library(sparse_api SRCS sparse_api.cc DEPS pten_tensor pten kernel_dispatch pten_data_transform) cc_library(pten_function_api SRCS ${api_source_file} DEPS pten_tensor pten kernel_dispatch pten_data_transform api_custom_impl) cc_library(pten_bw_function_api SRCS ${bw_api_source_file} DEPS pten_tensor pten kernel_dispatch backward_infermeta pten_data_transform pten_function_api api_custom_impl) diff --git a/paddle/pten/api/lib/api_custom_impl.cc b/paddle/pten/api/lib/api_custom_impl.cc index d897c58fc4607..30b23a4008a42 100644 --- a/paddle/pten/api/lib/api_custom_impl.cc +++ b/paddle/pten/api/lib/api_custom_impl.cc @@ -19,7 +19,6 @@ limitations under the License. */ #include "paddle/pten/api/lib/data_transform.h" #include "paddle/pten/api/lib/kernel_dispatch.h" #include "paddle/pten/api/lib/utils/storage.h" -#include "paddle/pten/common/backend.h" #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/meta_tensor.h" #include "paddle/pten/infermeta/binary.h" @@ -32,28 +31,53 @@ limitations under the License. */ namespace paddle { namespace experimental { -PADDLE_API std::vector split_impl(const Tensor& x, - const ScalarArray& num_or_sections, - const Scalar& axis) { - Backend kernel_backend = Backend::UNDEFINED; - DataLayout kernel_layout = DataLayout::UNDEFINED; - DataType kernel_data_type = DataType::UNDEFINED; - - if (kernel_backend == Backend::UNDEFINED || - kernel_layout == DataLayout::UNDEFINED || - kernel_data_type == DataType::UNDEFINED) { - auto kernel_key_set = ParseKernelKeyByInputArgs(x); - auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); - if (kernel_backend == Backend::UNDEFINED) { - kernel_backend = kernel_key.backend(); - } - if (kernel_layout == DataLayout::UNDEFINED) { - kernel_layout = kernel_key.layout(); - } - if (kernel_data_type == DataType::UNDEFINED) { - kernel_data_type = kernel_key.dtype(); - } - } +Tensor copy_to_impl(const Tensor& x, Backend backend, bool blocking) { + // 1. Get kernel signature and kernel + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + kernel_key_set.backend_set = kernel_key_set.backend_set | BackendSet(backend); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "copy", kernel_key); + + VLOG(6) << "to API kernel key: " << kernel_key; + VLOG(6) << "to API kernel: " << kernel; + + // 2. Get Device Context + auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); + auto kernel_context = pten::KernelContext(dev_ctx); + + // 3. Auto data transform + auto dense_x = std::dynamic_pointer_cast(x.impl()); + kernel_context.EmplaceBackInput(dense_x.get()); + kernel_context.EmplaceBackAttr(blocking); + + // 4. Prepare outputs & InferMeta + auto dense_out = std::make_shared( + pten::make_intrusive( + pten::TransToPtenPlace(backend)), + pten::DenseTensorMeta()); + pten::MetaTensor meta_out(dense_out.get()); + pten::UnchangedInferMeta(*dense_x, &meta_out); + dense_out->mutable_data(pten::TransToPtenPlace(backend)); + kernel_context.EmplaceBackOutput(dense_out.get()); + Tensor out; + out.set_impl(dense_out); + + // 5. Call kernel + kernel(&kernel_context); + + return out; +} + +std::vector split_impl(const Tensor& x, + const ScalarArray& num_or_sections, + const Scalar& axis) { + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + + Backend kernel_backend = kernel_key.backend(); + DataLayout kernel_layout = kernel_key.layout(); + DataType kernel_data_type = kernel_key.dtype(); auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( "split", {kernel_backend, kernel_layout, kernel_data_type}); @@ -99,4 +123,4 @@ PADDLE_API std::vector split_impl(const Tensor& x, } } // namespace experimental -} // namespace paddle \ No newline at end of file +} // namespace paddle diff --git a/paddle/pten/api/lib/api_custom_impl.h b/paddle/pten/api/lib/api_custom_impl.h index 1c66c69ff46db..15717e36f680b 100644 --- a/paddle/pten/api/lib/api_custom_impl.h +++ b/paddle/pten/api/lib/api_custom_impl.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -15,15 +15,19 @@ limitations under the License. */ #pragma once #include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/common/backend.h" #include "paddle/pten/common/scalar.h" #include "paddle/pten/common/scalar_array.h" namespace paddle { namespace experimental { -PADDLE_API std::vector split_impl(const Tensor& x, - const ScalarArray& num_or_sections, - const Scalar& axis); +// TODO(chenweihang): Replace backend by place when place is ready +Tensor copy_to_impl(const Tensor& x, Backend backend, bool blocking); + +std::vector split_impl(const Tensor& x, + const ScalarArray& num_or_sections, + const Scalar& axis); } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/api_declare.h b/paddle/pten/api/lib/api_declare.h index 998e01e41eae2..57cf6b7ff9ee7 100644 --- a/paddle/pten/api/lib/api_declare.h +++ b/paddle/pten/api/lib/api_declare.h @@ -18,5 +18,4 @@ limitations under the License. */ #include "paddle/pten/api/lib/api_registry.h" PT_DECLARE_API(Math); -PT_DECLARE_API(Utils); PT_DECLARE_API(SparseApi); diff --git a/paddle/pten/api/lib/manual_api.cc b/paddle/pten/api/lib/manual_api.cc deleted file mode 100644 index 3b3cdce8f2ec3..0000000000000 --- a/paddle/pten/api/lib/manual_api.cc +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/pten/api/include/manual_api.h" - -#include - -#include "glog/logging.h" - -#include "paddle/pten/api/lib/api_registry.h" -#include "paddle/pten/api/lib/api_utils.h" -#include "paddle/pten/api/lib/data_transform.h" -#include "paddle/pten/api/lib/kernel_dispatch.h" -#include "paddle/pten/api/lib/utils/storage.h" -#include "paddle/pten/core/kernel_registry.h" -#include "paddle/pten/core/meta_tensor.h" -#include "paddle/pten/infermeta/unary.h" - -PT_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -PT_DECLARE_KERNEL(copy, GPU, ALL_LAYOUT); -#endif - -#ifdef PADDLE_WITH_XPU -PT_DECLARE_KERNEL(copy, XPU, ALL_LAYOUT); -#endif - -namespace paddle { -namespace experimental { - -PADDLE_API Tensor copy_to(const Tensor& x, Backend backend, bool blocking) { - // 1. Get kernel signature and kernel - auto kernel_key_set = ParseKernelKeyByInputArgs(x); - kernel_key_set.backend_set = kernel_key_set.backend_set | BackendSet(backend); - auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); - auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( - "copy", kernel_key); - - VLOG(0) << "to API kernel key: " << kernel_key; - VLOG(0) << "to API kernel: " << kernel; - - // 2. Get Device Context - auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); - auto kernel_context = pten::KernelContext(dev_ctx); - - // 3. Auto data transform - auto dense_x = std::dynamic_pointer_cast(x.impl()); - kernel_context.EmplaceBackInput(dense_x.get()); - kernel_context.EmplaceBackAttr(blocking); - - // 4. Prepare outputs & InferMeta - auto dense_out = std::make_shared( - pten::make_intrusive( - pten::TransToPtenPlace(backend)), - pten::DenseTensorMeta()); - pten::MetaTensor meta_out(dense_out.get()); - pten::UnchangedInferMeta(*dense_x, &meta_out); - dense_out->mutable_data(pten::TransToPtenPlace(backend)); - kernel_context.EmplaceBackOutput(dense_out.get()); - Tensor out; - out.set_impl(dense_out); - - // 5. Call kernel - kernel(&kernel_context); - - return out; -} - -} // namespace experimental -} // namespace paddle - -PT_REGISTER_API(Utils); diff --git a/paddle/pten/api/lib/tensor.cc b/paddle/pten/api/lib/tensor.cc index 6fb0d2706ca90..d351bd8b73102 100644 --- a/paddle/pten/api/lib/tensor.cc +++ b/paddle/pten/api/lib/tensor.cc @@ -19,7 +19,6 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/pten/api/include/manual_api.h" #include "paddle/pten/api/lib/ext_compat_utils.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/api/lib/utils/storage.h" @@ -61,6 +60,7 @@ namespace experimental { // declare cast api Tensor cast(const Tensor &x, DataType out_dtype); +Tensor copy_to(const Tensor &x, Backend backend, bool blocking); /////// Tensor Methods //////// diff --git a/paddle/pten/core/infermeta_utils.cc b/paddle/pten/core/infermeta_utils.cc index da2f0c92aa546..09d53f5043cd5 100644 --- a/paddle/pten/core/infermeta_utils.cc +++ b/paddle/pten/core/infermeta_utils.cc @@ -83,6 +83,16 @@ MetaTensor* InferMetaContext::MutableOutputAt(size_t idx) { return outputs_.at(idx).get(); } +std::vector InferMetaContext::MutableOutputBetween(size_t start, + size_t end) { + std::vector result; + result.reserve(end - start); + for (size_t i = start; i < end; ++i) { + result.emplace_back(*outputs_.at(i)); + } + return result; +} + MetaFnFactory& MetaFnFactory::Instance() { static MetaFnFactory g_meta_fn_map; return g_meta_fn_map; diff --git a/paddle/pten/core/infermeta_utils.h b/paddle/pten/core/infermeta_utils.h index 6de91db9382e2..9a8d0965f437a 100644 --- a/paddle/pten/core/infermeta_utils.h +++ b/paddle/pten/core/infermeta_utils.h @@ -50,6 +50,7 @@ class InferMetaContext { const MetaTensor& InputAt(size_t idx) const; std::vector InputsBetween(size_t start, size_t end) const; MetaTensor* MutableOutputAt(size_t idx); + std::vector MutableOutputBetween(size_t start, size_t end); template AttrType AttrAt(size_t idx) { @@ -176,6 +177,21 @@ struct InferMetaFnImpl { } }; + template + struct InferMetaFnCallHelper*, Tail...> { + template + static void Call(InferMetaContext* ctx, PreviousArgs&... pargs) { + const std::pair range = ctx->OutputRangeAt(out_idx); + std::vector tmp = + ctx->MutableOutputBetween(range.first, range.second); + std::vector* arg = &tmp; + InferMetaFnCallHelper< + Tail...>::template Call(ctx, + pargs..., + arg); + } + }; + // TODO(chenweihang): support vector output later template diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc index ca59937399a22..f3dc33ea7361f 100644 --- a/paddle/pten/infermeta/unary.cc +++ b/paddle/pten/infermeta/unary.cc @@ -79,6 +79,13 @@ void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out) { out->set_layout(x.layout()); } +void CopyToInferMeta(const MetaTensor& x, + Backend backend, + bool blocking, + MetaTensor* out) { + UnchangedInferMeta(x, out); +} + void CreateLikeInferMeta(const MetaTensor& x, DataType dtype, DataLayout layout, @@ -449,3 +456,6 @@ void SplitInferMeta(const MetaTensor& x, } } // namespace pten + +PT_REGISTER_INFER_META_FN(copy_to, pten::CopyToInferMeta); +PT_REGISTER_INFER_META_FN(split, pten::SplitInferMeta); diff --git a/paddle/pten/infermeta/unary.h b/paddle/pten/infermeta/unary.h index 4c816c4adbc23..d6a2cad60cb85 100644 --- a/paddle/pten/infermeta/unary.h +++ b/paddle/pten/infermeta/unary.h @@ -41,6 +41,11 @@ void FlattenInferMeta(const MetaTensor& x, void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out); +void CopyToInferMeta(const MetaTensor& x, + Backend backend, + bool blocking, + MetaTensor* out); + void CreateLikeInferMeta(const MetaTensor& x, DataType dtype, DataLayout layout, diff --git a/paddle/pten/tests/api/CMakeLists.txt b/paddle/pten/tests/api/CMakeLists.txt index d875dbd4444ae..ccb28583fbc39 100644 --- a/paddle/pten/tests/api/CMakeLists.txt +++ b/paddle/pten/tests/api/CMakeLists.txt @@ -1,7 +1,7 @@ if(WITH_ROCM) - hip_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor pten_function_api manual_api glog) + hip_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor pten_function_api glog) else() - cc_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor pten_function_api manual_api glog) + cc_test(test_pten_tensor SRCS test_pten_tensor.cc DEPS pten_tensor pten_function_api glog) endif() cc_test(test_pten_exception SRCS test_pten_exception.cc DEPS gtest) diff --git a/paddle/pten/tests/api/test_data_transform.cc b/paddle/pten/tests/api/test_data_transform.cc index 434607df7942c..4d89195e5842b 100644 --- a/paddle/pten/tests/api/test_data_transform.cc +++ b/paddle/pten/tests/api/test_data_transform.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "paddle/pten/api/include/api.h" -#include "paddle/pten/api/include/manual_api.h" #include "paddle/pten/common/complex.h" #include "paddle/pten/core/compat/convert_utils.h" #include "paddle/pten/core/dense_tensor.h" diff --git a/paddle/pten/tests/api/test_to_api.cc b/paddle/pten/tests/api/test_to_api.cc index 641c9e186d997..a62f4122c0e25 100644 --- a/paddle/pten/tests/api/test_to_api.cc +++ b/paddle/pten/tests/api/test_to_api.cc @@ -15,7 +15,7 @@ limitations under the License. */ #include #include -#include "paddle/pten/api/include/manual_api.h" +#include "paddle/pten/api/include/api.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/core/dense_tensor.h" diff --git a/paddle/pten/tests/kernels/test_split_dev_api.cc b/paddle/pten/tests/kernels/test_split_dev_api.cc index b4e3619e11a3a..a3f63212dd98c 100644 --- a/paddle/pten/tests/kernels/test_split_dev_api.cc +++ b/paddle/pten/tests/kernels/test_split_dev_api.cc @@ -18,7 +18,6 @@ limitations under the License. */ #include "paddle/pten/kernels/split_kernel.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" -#include "paddle/pten/api/include/manual_api.h" #include "paddle/pten/api/lib/utils/allocator.h" #include "paddle/pten/backends/cpu/cpu_context.h" #include "paddle/pten/core/dense_tensor.h" diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 24d631d2995e7..032ffba7383b2 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -34,6 +34,11 @@ kernel : func : conj +- api : copy_to + args : (const Tensor& x, Backend backend, bool blocking) + output : Tensor + invoke : copy_to_impl(x, backend, blocking) + - api : divide args : (const Tensor& x, const Tensor& y) output : Tensor From edc218492806ac717285841a2e02974653bf5944 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 15 Feb 2022 10:01:16 +0000 Subject: [PATCH 04/11] delete mutable_data in copy_to api --- paddle/pten/api/lib/api_custom_impl.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/paddle/pten/api/lib/api_custom_impl.cc b/paddle/pten/api/lib/api_custom_impl.cc index 30b23a4008a42..65d0fe022b672 100644 --- a/paddle/pten/api/lib/api_custom_impl.cc +++ b/paddle/pten/api/lib/api_custom_impl.cc @@ -58,7 +58,6 @@ Tensor copy_to_impl(const Tensor& x, Backend backend, bool blocking) { pten::DenseTensorMeta()); pten::MetaTensor meta_out(dense_out.get()); pten::UnchangedInferMeta(*dense_x, &meta_out); - dense_out->mutable_data(pten::TransToPtenPlace(backend)); kernel_context.EmplaceBackOutput(dense_out.get()); Tensor out; out.set_impl(dense_out); From 2cd1df74b3da75248bbef48c338e9decd46b19bd Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 15 Feb 2022 15:37:29 +0000 Subject: [PATCH 05/11] fix problem of copy_to --- paddle/pten/api/lib/api_custom_impl.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/pten/api/lib/api_custom_impl.cc b/paddle/pten/api/lib/api_custom_impl.cc index 65d0fe022b672..30b23a4008a42 100644 --- a/paddle/pten/api/lib/api_custom_impl.cc +++ b/paddle/pten/api/lib/api_custom_impl.cc @@ -58,6 +58,7 @@ Tensor copy_to_impl(const Tensor& x, Backend backend, bool blocking) { pten::DenseTensorMeta()); pten::MetaTensor meta_out(dense_out.get()); pten::UnchangedInferMeta(*dense_x, &meta_out); + dense_out->mutable_data(pten::TransToPtenPlace(backend)); kernel_context.EmplaceBackOutput(dense_out.get()); Tensor out; out.set_impl(dense_out); From 77853a9533feaae96b42c6aa138555aface5df35 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 16 Feb 2022 04:15:05 +0000 Subject: [PATCH 06/11] add unittest for infer_meta_fn_factory --- paddle/pten/tests/core/test_meta_fn_utils.cc | 55 ++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/paddle/pten/tests/core/test_meta_fn_utils.cc b/paddle/pten/tests/core/test_meta_fn_utils.cc index f4edc3555bc6c..59b45c5a7dd4d 100644 --- a/paddle/pten/tests/core/test_meta_fn_utils.cc +++ b/paddle/pten/tests/core/test_meta_fn_utils.cc @@ -60,5 +60,60 @@ TEST(MetaFnFactory, InferMetaFnExists) { EXPECT_EQ(dense_out1.dims()[1], dense_out2.dims()[1]); } +TEST(MetaFnFactory, CopyInferMetaFn) { + pten::DenseTensor dense_x; + dense_x.Resize(pten::framework::make_ddim({3, 4})); + + pten::MetaTensor meta_x(&dense_x); + pten::DenseTensor dense_out1; + pten::MetaTensor meta_out(&dense_out1); + pten::UnchangedInferMeta(meta_x, &meta_out); + + auto shared_meat_x = std::make_shared(&dense_x); + pten::DenseTensor dense_out2; + auto shared_meta_out = std::make_shared(&dense_out2); + pten::InferMetaContext ctx; + ctx.EmplaceBackInput(shared_meat_x); + ctx.EmplaceBackAttr(Backend::CPU); + ctx.EmplaceBackAttr(false); + ctx.EmplaceBackOutput(shared_meta_out); + ctx.SetMetaConfig(/*is_runtime=*/true); + pten::MetaFnFactory::Instance().Get("copy_to")(&ctx); + + EXPECT_EQ(dense_out1.dims().size(), dense_out2.dims().size()); + EXPECT_EQ(dense_out1.dims()[0], dense_out2.dims()[0]); + EXPECT_EQ(dense_out1.dims()[1], dense_out2.dims()[1]); +} + +TEST(MetaFnFactory, SplitInferMetaFn) { + pten::DenseTensor dense_x; + dense_x.Resize(pten::framework::make_ddim({4, 10})); + pten::MetaTensor meta_x(&dense_x); + auto shared_meat_x = std::make_shared(&dense_x); + + pten::DenseTensor dense_out1; + pten::DenseTensor dense_out2; + paddle::SmallVector> out; + out.push_back(std::make_shared(&dense_out1)); + out.push_back(std::make_shared(&dense_out2)); + pten::InferMetaContext ctx; + ctx.EmplaceBackInput(shared_meat_x); + ScalarArray num_or_sections{2, 2}; + Scalar axis{0}; + ctx.EmplaceBackAttr(num_or_sections); + ctx.EmplaceBackAttr(axis); + ctx.EmplaceBackOutputs(out); + ctx.SetMetaConfig(/*is_runtime=*/true); + pten::MetaFnFactory::Instance().Get("split")(&ctx); + + ASSERT_EQ(dense_out1.dims().size(), 2); + ASSERT_EQ(dense_out1.dims()[0], 2); + ASSERT_EQ(dense_out1.dims()[1], 10); + + ASSERT_EQ(dense_out2.dims().size(), 2); + ASSERT_EQ(dense_out2.dims()[0], 2); + ASSERT_EQ(dense_out2.dims()[1], 10); +} + } // namespace tests } // namespace pten From 73a2a1a1ed2740307a85f5562df5bfd3222f9aba Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 16 Feb 2022 05:37:41 +0000 Subject: [PATCH 07/11] fix split cofig in yaml --- python/paddle/utils/code_gen/api.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index ee38f16540416..792d81ffef257 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -172,7 +172,7 @@ - api : split args : (const Tensor& x, const ScalarArray& num_or_sections, const Scalar& axis) - output : std::vector + output : Tensor[] invoke : split_impl(x, num_or_sections, axis) - api : subtract From e8d5b8e83650cf9ec5c749984546a1f446c3e601 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 16 Feb 2022 06:12:55 +0000 Subject: [PATCH 08/11] fix split cofig in yaml --- python/paddle/utils/code_gen/api.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 792d81ffef257..0b025ab9586bb 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -35,7 +35,7 @@ func : conj - api : copy_to - args : (const Tensor& x, Backend backend, bool blocking) + args : (Tensor x, Backend backend, bool blocking) output : Tensor invoke : copy_to_impl(x, backend, blocking) @@ -171,7 +171,7 @@ func : sign - api : split - args : (const Tensor& x, const ScalarArray& num_or_sections, const Scalar& axis) + args : (Tensor x, ScalarArray num_or_sections, Scalar axis) output : Tensor[] invoke : split_impl(x, num_or_sections, axis) From 23bbd71f551f96ad7698d3e021e1d41d48d7c8a1 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 16 Feb 2022 06:59:38 +0000 Subject: [PATCH 09/11] modify sum api yaml --- python/paddle/utils/code_gen/api.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 0b025ab9586bb..3e22cf03554c7 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -190,7 +190,6 @@ func : SumInferMeta kernel : func : sum - param : [x, axis, dtype, keep_dim] data_type : x - api : zeros_like From 85b19f626e3f5d0a73f39580e401392b9d63ae08 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Thu, 17 Feb 2022 17:20:53 +0000 Subject: [PATCH 10/11] add copy_to wrapped infermeta --- paddle/pten/core/infermeta_utils.cc | 10 ++++ paddle/pten/core/infermeta_utils.h | 16 +++++- paddle/pten/infermeta/unary.cc | 10 ++++ paddle/pten/infermeta/unary.h | 5 ++ paddle/pten/tests/core/test_meta_fn_utils.cc | 55 ++++++++++++++++++++ 5 files changed, 95 insertions(+), 1 deletion(-) diff --git a/paddle/pten/core/infermeta_utils.cc b/paddle/pten/core/infermeta_utils.cc index da2f0c92aa546..09d53f5043cd5 100644 --- a/paddle/pten/core/infermeta_utils.cc +++ b/paddle/pten/core/infermeta_utils.cc @@ -83,6 +83,16 @@ MetaTensor* InferMetaContext::MutableOutputAt(size_t idx) { return outputs_.at(idx).get(); } +std::vector InferMetaContext::MutableOutputBetween(size_t start, + size_t end) { + std::vector result; + result.reserve(end - start); + for (size_t i = start; i < end; ++i) { + result.emplace_back(*outputs_.at(i)); + } + return result; +} + MetaFnFactory& MetaFnFactory::Instance() { static MetaFnFactory g_meta_fn_map; return g_meta_fn_map; diff --git a/paddle/pten/core/infermeta_utils.h b/paddle/pten/core/infermeta_utils.h index 59d2a4ed3c089..fa0e38dd5fa99 100644 --- a/paddle/pten/core/infermeta_utils.h +++ b/paddle/pten/core/infermeta_utils.h @@ -52,6 +52,7 @@ class InferMetaContext { const MetaTensor& InputAt(size_t idx) const; std::vector InputsBetween(size_t start, size_t end) const; MetaTensor* MutableOutputAt(size_t idx); + std::vector MutableOutputBetween(size_t start, size_t end); template AttrType AttrAt(size_t idx) { @@ -186,7 +187,20 @@ struct InferMetaFnImpl { } }; - // TODO(chenweihang): support vector output later + template + struct InferMetaFnCallHelper*, Tail...> { + template + static void Call(InferMetaContext* ctx, PreviousArgs&... pargs) { + const std::pair range = ctx->OutputRangeAt(out_idx); + std::vector tmp = + ctx->MutableOutputBetween(range.first, range.second); + std::vector* arg = &tmp; + InferMetaFnCallHelper< + Tail...>::template Call(ctx, + pargs..., + arg); + } + }; template struct InferMetaFnCallHelper { diff --git a/paddle/pten/infermeta/unary.cc b/paddle/pten/infermeta/unary.cc index ec9ba519b95ba..264bee2466d19 100644 --- a/paddle/pten/infermeta/unary.cc +++ b/paddle/pten/infermeta/unary.cc @@ -79,6 +79,13 @@ void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out) { out->set_layout(x.layout()); } +void CopyToInferMeta(const MetaTensor& x, + Backend backend, + bool blocking, + MetaTensor* out) { + UnchangedInferMeta(x, out); +} + void CreateLikeInferMeta(const MetaTensor& x, DataType dtype, DataLayout layout, @@ -500,3 +507,6 @@ void TraceInferMeta( } } // namespace pten + +PT_REGISTER_INFER_META_FN(copy_to, pten::CopyToInferMeta); +PT_REGISTER_INFER_META_FN(split, pten::SplitInferMeta); diff --git a/paddle/pten/infermeta/unary.h b/paddle/pten/infermeta/unary.h index 5bdf1d491c634..01f560b8b4c6e 100644 --- a/paddle/pten/infermeta/unary.h +++ b/paddle/pten/infermeta/unary.h @@ -41,6 +41,11 @@ void FlattenInferMeta(const MetaTensor& x, void CastInferMeta(const MetaTensor& x, DataType out_dtype, MetaTensor* out); +void CopyToInferMeta(const MetaTensor& x, + Backend backend, + bool blocking, + MetaTensor* out); + void CreateLikeInferMeta(const MetaTensor& x, DataType dtype, DataLayout layout, diff --git a/paddle/pten/tests/core/test_meta_fn_utils.cc b/paddle/pten/tests/core/test_meta_fn_utils.cc index f4edc3555bc6c..59b45c5a7dd4d 100644 --- a/paddle/pten/tests/core/test_meta_fn_utils.cc +++ b/paddle/pten/tests/core/test_meta_fn_utils.cc @@ -60,5 +60,60 @@ TEST(MetaFnFactory, InferMetaFnExists) { EXPECT_EQ(dense_out1.dims()[1], dense_out2.dims()[1]); } +TEST(MetaFnFactory, CopyInferMetaFn) { + pten::DenseTensor dense_x; + dense_x.Resize(pten::framework::make_ddim({3, 4})); + + pten::MetaTensor meta_x(&dense_x); + pten::DenseTensor dense_out1; + pten::MetaTensor meta_out(&dense_out1); + pten::UnchangedInferMeta(meta_x, &meta_out); + + auto shared_meat_x = std::make_shared(&dense_x); + pten::DenseTensor dense_out2; + auto shared_meta_out = std::make_shared(&dense_out2); + pten::InferMetaContext ctx; + ctx.EmplaceBackInput(shared_meat_x); + ctx.EmplaceBackAttr(Backend::CPU); + ctx.EmplaceBackAttr(false); + ctx.EmplaceBackOutput(shared_meta_out); + ctx.SetMetaConfig(/*is_runtime=*/true); + pten::MetaFnFactory::Instance().Get("copy_to")(&ctx); + + EXPECT_EQ(dense_out1.dims().size(), dense_out2.dims().size()); + EXPECT_EQ(dense_out1.dims()[0], dense_out2.dims()[0]); + EXPECT_EQ(dense_out1.dims()[1], dense_out2.dims()[1]); +} + +TEST(MetaFnFactory, SplitInferMetaFn) { + pten::DenseTensor dense_x; + dense_x.Resize(pten::framework::make_ddim({4, 10})); + pten::MetaTensor meta_x(&dense_x); + auto shared_meat_x = std::make_shared(&dense_x); + + pten::DenseTensor dense_out1; + pten::DenseTensor dense_out2; + paddle::SmallVector> out; + out.push_back(std::make_shared(&dense_out1)); + out.push_back(std::make_shared(&dense_out2)); + pten::InferMetaContext ctx; + ctx.EmplaceBackInput(shared_meat_x); + ScalarArray num_or_sections{2, 2}; + Scalar axis{0}; + ctx.EmplaceBackAttr(num_or_sections); + ctx.EmplaceBackAttr(axis); + ctx.EmplaceBackOutputs(out); + ctx.SetMetaConfig(/*is_runtime=*/true); + pten::MetaFnFactory::Instance().Get("split")(&ctx); + + ASSERT_EQ(dense_out1.dims().size(), 2); + ASSERT_EQ(dense_out1.dims()[0], 2); + ASSERT_EQ(dense_out1.dims()[1], 10); + + ASSERT_EQ(dense_out2.dims().size(), 2); + ASSERT_EQ(dense_out2.dims()[0], 2); + ASSERT_EQ(dense_out2.dims()[1], 10); +} + } // namespace tests } // namespace pten From 714a77ef52968463ae302f8c7ee203b0aeaee021 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 23 Feb 2022 17:32:05 +0000 Subject: [PATCH 11/11] rollback copy impl --- paddle/phi/api/lib/api_custom_impl.cc | 43 ++++++++++++++------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index cd6ccbbcf7d99..66dba2cc2e1b0 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -32,36 +32,39 @@ namespace paddle { namespace experimental { Tensor copy_to_impl(const Tensor& x, Backend backend, bool blocking) { + // 1. Get kernel signature and kernel auto kernel_key_set = ParseKernelKeyByInputArgs(x); kernel_key_set.backend_set = kernel_key_set.backend_set | BackendSet(backend); auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( "copy", kernel_key); - VLOG(6) << "to API kernel key: " << kernel_key; - VLOG(6) << "to API kernel: " << kernel; + VLOG(0) << "to API kernel key: " << kernel_key; + VLOG(0) << "to API kernel: " << kernel; + // 2. Get Device Context auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); - - auto dense_x = TensorToDenseTensor(x); - - Tensor out; - auto kernel_out = SetKernelOutput(kernel_key.backend(), &out); - phi::MetaTensor meta_out(kernel_out); + auto kernel_context = phi::KernelContext(dev_ctx); + + // 3. Auto data transform + auto dense_x = std::dynamic_pointer_cast(x.impl()); + kernel_context.EmplaceBackInput(dense_x.get()); + kernel_context.EmplaceBackAttr(blocking); + + // 4. Prepare outputs & InferMeta + auto dense_out = std::make_shared( + phi::make_intrusive( + phi::TransToPtenPlace(backend)), + phi::DenseTensorMeta()); + phi::MetaTensor meta_out(dense_out.get()); phi::UnchangedInferMeta(*dense_x, &meta_out); + dense_out->mutable_data(phi::TransToPtenPlace(backend)); + kernel_context.EmplaceBackOutput(dense_out.get()); + Tensor out; + out.set_impl(dense_out); - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - phi::Place, - bool, - phi::DenseTensor*); - - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, - *dense_x, - phi::TransToPtenPlace(backend), - blocking, - kernel_out); + // 5. Call kernel + kernel(&kernel_context); return out; }