Skip to content

Commit

Permalink
Merge branch 'op2func_refactor' of https://github.com/chenwhql/Paddle
Browse files Browse the repository at this point in the history
…into op2func
  • Loading branch information
zyfncg committed Oct 19, 2021
2 parents 05aac0d + e0710fd commit 1f89db1
Show file tree
Hide file tree
Showing 28 changed files with 574 additions and 636 deletions.
3 changes: 2 additions & 1 deletion paddle/fluid/framework/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ cc_library(save_load_util SRCS save_load_util.cc DEPS tensor scope layer)
cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer)
cc_library(generator SRCS generator.cc DEPS enforce place)

cc_library(tcmpt_utils SRCS tcmpt_utils.cc DEPS lod_tensor selected_rows place tcmpt)
cc_library(tcmpt_utils SRCS tcmpt_utils.cc DEPS lod_tensor selected_rows place tcmpt var_type_traits)

# Get the current working branch
execute_process(
Expand Down Expand Up @@ -454,3 +454,4 @@ if(WITH_TESTING AND TEST selected_rows_test)
endif()

cc_test(scope_guard_test SRCS scope_guard_test.cc)
cc_test(tcmpt_utils_test SRCS tcmpt_utils_test.cc DEPS tcmpt_utils)
318 changes: 92 additions & 226 deletions paddle/fluid/framework/operator.cc

Large diffs are not rendered by default.

36 changes: 20 additions & 16 deletions paddle/fluid/framework/operator.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,6 @@ inline std::string GradOriginalVarName(const std::string& grad_var_name) {
const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var);
Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var);

OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key);

class ExecutionContext;
class OperatorBase;

Expand Down Expand Up @@ -534,13 +532,15 @@ class OperatorWithKernel : public OperatorBase {
}

/* member functions for adapting to tcmpt lib */
// TODO(chenweihang): Temporarily as a class method
virtual pt::KernelKey ConstructPtKernelKey(
const VariableValueMap& inputs, const AttributeMap& attrs,
const platform::Place& ctx_place) const;

virtual pt::KernelContext ConstructPtKernelContext(
const RuntimeContext& ctx, const platform::DeviceContext& dev_ctx) const;
/** In the Tensor calculation library, the new Kernel adopts a clearer and
* more streamlined design. The arguments of the Kernel and the input and
* output arguments registered in the original OpMaker do not match in some
* cases, so we use map to record the arguments required by the kernel.
* When selecting Kernel during Op execution, select the arguments of the
* original Op according to the GetExpectedPtKernelArgs returned arguments.
*/
virtual KernelSignature GetExpectedPtKernelArgs(
const ExecutionContext& ctx) const;

private:
void RunImpl(const Scope& scope, const platform::Place& place) const final;
Expand All @@ -563,8 +563,9 @@ class OperatorWithKernel : public OperatorBase {
const std::vector<std::string>& inplace_vars,
const Scope& exec_scope) const;

void ChooseKernel(const RuntimeContext& ctx, const Scope& scope,
const platform::Place& place) const;
OpKernelType InnerGetExpectedKernelType(const ExecutionContext& ctx) const;

void ChooseKernel(const ExecutionContext& ctx) const;

void HandleComplexGradToRealGrad(const Scope& scope,
RuntimeContext* ctx) const;
Expand All @@ -582,8 +583,10 @@ class OperatorWithKernel : public OperatorBase {
const std::string& name) const;

/* member functions for adapting to tcmpt lib */
void ChoosePtKernel(const RuntimeContext& ctx,
const platform::DeviceContext& dev_ctx) const;
void ChoosePtKernel(const ExecutionContext& ctx) const;

pt::KernelContext BuildPtKernelContext(
const RuntimeContext& ctx, const platform::DeviceContext& dev_ctx) const;

protected:
mutable std::unique_ptr<OpKernelType> kernel_type_;
Expand All @@ -595,10 +598,11 @@ class OperatorWithKernel : public OperatorBase {
mutable bool all_kernels_must_compute_runtime_shape_ = false;
mutable std::mutex cache_update_mutex_;
mutable bool enable_cache_transfer_scope_ = false;
// TODO(chenweihang): Similar duplicate members are used for new tcmpt lib,
// maybe we have better impl methods
// NOTE(chenweihang): Similar op members are used to adapt to
// new tcmpt kernel, if there is a better design in the future,
// we may polish the implementation here
mutable bool run_pt_kernel_ = false;
mutable std::unique_ptr<pt::KernelKey> pt_kernel_key_;
mutable std::unique_ptr<KernelSignature> pt_kernel_signature_;
mutable std::unique_ptr<pt::Kernel> pt_kernel_;
};

Expand Down
131 changes: 115 additions & 16 deletions paddle/fluid/framework/tcmpt_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <sstream>

#include "paddle/fluid/framework/tcmpt_utils.h"

#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/string/string_helper.h"

namespace paddle {
namespace framework {
Expand Down Expand Up @@ -62,7 +65,7 @@ std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor>(
proto::VarType::Type type) {
return MakeTensorImpl<pt::DenseTensor, LoDTensor>(
tensor, pt::TransToPtBackend(place), pt::TransToPtDataType(type),
pt::TransToPtLayout(tensor.layout()));
pt::TransToPtDataLayout(tensor.layout()));
}

template <>
Expand All @@ -71,21 +74,7 @@ std::shared_ptr<pt::DenseTensor> MakeTensorImpl<pt::DenseTensor>(
proto::VarType::Type type) {
return MakeTensorImpl<pt::DenseTensor, Tensor>(
tensor, pt::TransToPtBackend(place), pt::TransToPtDataType(type),
pt::TransToPtLayout(tensor.layout()));
}

template <>
void ShareTensorImpl<pt::DenseTensor>(pt::DenseTensor* tensor_impl,
LoDTensor* out) {
out->ResetHolderWithType(tensor_impl->allocation(),
pt::TransToProtoVarType(tensor_impl->type()));
}

template <>
void ShareTensorImpl<pt::DenseTensor>(pt::DenseTensor* tensor_impl,
Tensor* out) {
out->ResetHolderWithType(tensor_impl->allocation(),
pt::TransToProtoVarType(tensor_impl->type()));
pt::TransToPtDataLayout(tensor.layout()));
}

std::shared_ptr<pt::TensorInterface> InputVariableToPtTensor(
Expand Down Expand Up @@ -164,5 +153,115 @@ std::shared_ptr<pt::TensorInterface> OutputVariableToPtTensor(
return nullptr;
}

OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key) {
proto::VarType::Type data_type = pt::TransToProtoVarType(kernel_key.dtype());
platform::Place place = pt::TransToFluidPlace(kernel_key.backend());
DataLayout data_layout = pt::TransToFluidDataLayout(kernel_key.layout());
LibraryType library_type = LibraryType::kPlain;
if (kernel_key.backend() == pt::Backend::kMKLDNN) {
library_type = LibraryType::kMKLDNN;
} else if (kernel_key.backend() == pt::Backend::kCUDNN) {
library_type = LibraryType::kCUDNN;
} else {
// do nothing
}
// TODO(chenweihang): the customized_type_value is lost
return OpKernelType(data_type, place, data_layout, library_type);
}

pt::KernelKey TransOpKernelTypeToPtKernelKey(const OpKernelType& kernel_type) {
pt::Backend backend = pt::TransToPtBackend(kernel_type.place_);
if (kernel_type.library_type_ == LibraryType::kMKLDNN) {
backend = pt::Backend::kMKLDNN;
} else if (kernel_type.library_type_ == LibraryType::kCUDNN) {
backend = pt::Backend::kCUDNN;
} else {
// do
}
pt::DataLayout layout = pt::TransToPtDataLayout(kernel_type.data_layout_);
pt::DataType dtype = pt::TransToPtDataType(kernel_type.data_type_);
return pt::KernelKey(backend, layout, dtype);
}

KernelSignatureMap& KernelSignatureMap::Instance() {
static KernelSignatureMap g_kernel_signature_map;
return g_kernel_signature_map;
}

const paddle::SmallVector<std::string>&
KernelArgsNameMakerByOpProto::GetInputArgsNames() {
for (int i = 0; i < op_proto_->inputs_size(); ++i) {
auto& in = op_proto_->inputs()[i];
auto& in_name = in.name();
if ((in.has_extra() && in.extra()) || (in.has_quant() && in.quant())) {
VLOG(1) << "Parse PtKernel input: skip extra & quant input - " << in_name;
continue;
}
// If contains dispensable input, we should override the
// GetExpectedPtKernelArgs method self
if (in.has_dispensable() && in.dispensable()) {
VLOG(1) << "Parse PtKernel input: skip dispensable input - " << in_name;
continue;
}
VLOG(1) << "Parse PtKernel input: " << in_name;
input_names_.emplace_back(in_name);
}
return input_names_;
}

const paddle::SmallVector<std::string>&
KernelArgsNameMakerByOpProto::GetOutputArgsNames() {
for (int i = 0; i < op_proto_->outputs_size(); ++i) {
auto& out = op_proto_->outputs()[i];
auto& out_name = out.name();
// TODO(chenweihang): outputs also need skip some cases
VLOG(1) << "Parse PtKernel output: " << out_name;
output_names_.emplace_back(out_name);
}
return output_names_;
}

const paddle::SmallVector<std::string>&
KernelArgsNameMakerByOpProto::GetAttrsArgsNames() {
for (int i = 0; i < op_proto_->attrs_size(); ++i) {
auto& attr = op_proto_->attrs()[i];
auto& attr_name = attr.name();
if (attr_name == "use_mkldnn" || attr_name == "op_role" ||
attr_name == "op_role_var" || attr_name == "op_namescope" ||
attr_name == "op_callstack" || attr_name == "op_device") {
VLOG(1) << "Parse PtKernel attribute: skip needless attr - " << attr_name;
continue;
}
if ((attr.has_extra() && attr.extra()) ||
(attr.has_quant() && attr.quant())) {
VLOG(1) << "Parse PtKernel attribute: skip extra & quant attr - "
<< attr_name;
continue;
}
VLOG(1) << "Parse PtKernel attribute: " << attr_name;
attr_names_.emplace_back(attr_name);
}

return attr_names_;
}

KernelSignature KernelArgsNameMakerByOpProto::GetKernelSignature() {
return std::make_pair(
op_proto_->type(),
std::make_tuple(GetInputArgsNames(), GetAttrsArgsNames(),
GetOutputArgsNames()));
}

std::string KernelSignatureToString(const KernelSignature& signature) {
std::stringstream os;
os << "Kernel Signature - name: " << signature.first << "; inputs: "
<< string::join_strings(std::get<0>(signature.second), ", ")
<< "; attributes: "
<< string::join_strings(std::get<1>(signature.second), ", ")
<< "; outputs: "
<< string::join_strings(std::get<2>(signature.second), ", ");
return os.str();
}

} // namespace framework
} // namespace paddle
90 changes: 83 additions & 7 deletions paddle/fluid/framework/tcmpt_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,25 @@ limitations under the License. */

#pragma once

#include <string>
#include <unordered_map>
#include <vector>

#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/op_kernel_type.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/imperative/type_defs.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/place.h"

#include "paddle/tcmpt/api/include/core.h"
#include "paddle/utils/flat_hash_map.h"
#include "paddle/utils/small_vector.h"

namespace paddle {
namespace framework {

/* tensor translate */

template <typename PtTensorImplT, typename VariableT>
std::shared_ptr<PtTensorImplT> MakeTensorImpl(const VariableT& tensor,
pt::Backend backend,
Expand All @@ -38,16 +49,81 @@ std::shared_ptr<PtTensorImplT> MakeTensorImpl(const Tensor& tensor,
const platform::Place& place,
proto::VarType::Type type);

template <typename PtTensorImplT>
void ShareTensorImpl(PtTensorImplT* tensor_impl, LoDTensor* out);

template <typename PtTensorImplT>
void ShareTensorImpl(PtTensorImplT* tensor_impl, Tensor* out);

std::shared_ptr<pt::TensorInterface> InputVariableToPtTensor(
const framework::Variable& variable, const pt::TensorArgDef& arg_def);
std::shared_ptr<pt::TensorInterface> OutputVariableToPtTensor(
framework::Variable* variable, const pt::TensorArgDef& arg_def);

/* Kernel Key translate */

OpKernelType TransPtKernelKeyToOpKernelType(const pt::KernelKey& kernel_key);
pt::KernelKey TransOpKernelTypeToPtKernelKey(const OpKernelType& kernel_type);

/* Kernel Args parse */

// TODO(chenweihang): we can generate this map by proto info in compile time
class KernelSignatureMap {
public:
static KernelSignatureMap& Instance();

bool Has(const std::string& op_type) const {
return map_.find(op_type) != map_.end();
}

void Insert(const std::string& op_type, const KernelSignature& signature) {
PADDLE_ENFORCE_NE(
Has(op_type), true,
platform::errors::AlreadyExists(
"Operator (%s)'s Kernel Signature has been registered.", op_type));
map_.insert({op_type, signature});
}

const KernelSignature* GetNullable(const std::string& op_type) const {
auto it = map_.find(op_type);
if (it == map_.end()) {
return nullptr;
} else {
return &it->second;
}
}

private:
KernelSignatureMap() = default;
paddle::flat_hash_map<std::string, KernelSignature> map_;

DISABLE_COPY_AND_ASSIGN(KernelSignatureMap);
};

class KernelArgsNameMaker {
public:
virtual ~KernelArgsNameMaker() {}
virtual const paddle::SmallVector<std::string>& GetInputArgsNames() = 0;
virtual const paddle::SmallVector<std::string>& GetOutputArgsNames() = 0;
virtual const paddle::SmallVector<std::string>& GetAttrsArgsNames() = 0;
};

class KernelArgsNameMakerByOpProto : public KernelArgsNameMaker {
public:
explicit KernelArgsNameMakerByOpProto(framework::proto::OpProto* op_proto)
: op_proto_(op_proto) {}

~KernelArgsNameMakerByOpProto() {}

const paddle::SmallVector<std::string>& GetInputArgsNames() override;
const paddle::SmallVector<std::string>& GetOutputArgsNames() override;
const paddle::SmallVector<std::string>& GetAttrsArgsNames() override;

KernelSignature GetKernelSignature();

private:
framework::proto::OpProto* op_proto_;

paddle::SmallVector<std::string> input_names_;
paddle::SmallVector<std::string> output_names_;
paddle::SmallVector<std::string> attr_names_;
};

std::string KernelSignatureToString(const KernelSignature& signature);

} // namespace framework
} // namespace paddle
Loading

1 comment on commit 1f89db1

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on 1f89db1 Oct 19, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #21 Commit ID: 1f89db1 contains failed CI.

🔹 Failed: PR-CI-APPROVAL

approve_failed
2021-10-19 10:21:31 正在保存至: “bk.txt”
2021-10-19 10:21:31 0K 100% 5.41M=0s
2021-10-19 10:21:31 2021-10-19 10:21:31 (5.41 MB/s) - 已保存 “bk.txt” [5/5])
2021-10-19 10:21:38 ****************
2021-10-19 10:21:38 0. You must have one RD (lanxianghit (Recommend), phlrain or luotao1) approval for changing the FLAGS, which manages the environment variables.
2021-10-19 10:21:38 1. You must have Dianhai approval for change 20+ files or add than 1000+ lines of content.
2021-10-19 10:21:38 2. You must have one RD (XiaoguangHu01,chenwhql,zhiqiu,Xreki,luotao1) approval for paddle/fluid/framework/operator.h, which manages the underlying code for fluid.
2021-10-19 10:21:38 3. You must have one RD (zhiqiu (Recommend) , phlrain) approval for the changes of paddle/fluid/pybind/op_function_generator.cc, which manages the logic of automatic generating op functions for dygraph.
2021-10-19 10:21:38 4. You must have one RD (XiaoguangHu01,chenwhql,zhiqiu,Xreki,luotao1) approval for the usage of const_cast.
2021-10-19 10:21:38 5. You must have one RD (Avin0323(Recommend) or zhouwei25 or wanghuancoder or luotao1) approval for modifying unity_build_rule.cmake which the rules of Unity Build.
2021-10-19 10:21:38 There are 6 approved errors.
2021-10-19 10:21:38 ****************
2021-10-19 10:21:38 + EXCODE=6
2021-10-19 10:21:38 + echo 'EXCODE: 6'
2021-10-19 10:21:38 EXCODE: 6
2021-10-19 10:21:38 + echo 'ipipe_log_param_EXCODE: 6'
2021-10-19 10:21:38 ipipe_log_param_EXCODE: 6
2021-10-19 10:21:38 + exit 6

🔹 Failed: PR-CI-OP-benchmark

Unknown Failed
2021-10-19 11:15:00 + echo '[tools/test_ci_op_benchmark.sh:271] [ERROR] Missing test script of "mean"(paddle/fluid/operators/mean_op.cu) in benchmark.'
2021-10-19 11:15:00 [tools/test_ci_op_benchmark.sh:271] [ERROR] Missing test script of "mean"(paddle/fluid/operators/mean_op.cu) in benchmark.
2021-10-19 11:15:00 + for op_name in '${!CHANGE_OP_MAP[@]}'
2021-10-19 11:15:00 + '[' -z '' ']'
2021-10-19 11:15:00 + exit_code=8
2021-10-19 11:15:00 + LOG '[ERROR] Missing test script of "fill_any_like"(paddle/fluid/operators/fill_any_like_op.cu) in benchmark.'
2021-10-19 11:15:00 + echo '[tools/test_ci_op_benchmark.sh:271] [ERROR] Missing test script of "fill_any_like"(paddle/fluid/operators/fill_any_like_op.cu) in benchmark.'
2021-10-19 11:15:00 [tools/test_ci_op_benchmark.sh:271] [ERROR] Missing test script of "fill_any_like"(paddle/fluid/operators/fill_any_like_op.cu) in benchmark.
2021-10-19 11:15:00 + for op_name in '${!CHANGE_OP_MAP[@]}'
2021-10-19 11:15:00 + '[' -z matmul,matmul,matmul.json,True ']'
2021-10-19 11:15:00 + '[' 8 -ne 0 ']'
2021-10-19 11:15:00 + LOG '[INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details.'
2021-10-19 11:15:00 + echo '[tools/test_ci_op_benchmark.sh:275] [INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details.'
2021-10-19 11:15:00 [tools/test_ci_op_benchmark.sh:275] [INFO] See https://github.com/PaddlePaddle/Paddle/wiki/PR-CI-OP-benchmark-Manual for details.
2021-10-19 11:15:00 + LOG '[INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR.'
2021-10-19 11:15:00 + echo '[tools/test_ci_op_benchmark.sh:276] [INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR.'
2021-10-19 11:15:00 [tools/test_ci_op_benchmark.sh:276] [INFO] Or you can apply for one RD (Avin0323(Recommend), Xreki, luotao1) approval to pass this PR.
2021-10-19 11:15:00 + exit 8
2021-10-19 11:15:00 {build code state=8}

🔹 Failed: PR-CI-Windows

test_failed
2021-10-19 12:24:57 The following tests FAILED:
2021-10-19 12:24:57 677 - test_jit_save_load (Failed)
2021-10-19 12:24:57 Errors while running CTest
2021-10-19 12:24:58 =========================================
2021-10-19 12:24:58 There are more than 10 failed unit tests, so no unit test retry!!!
2021-10-19 12:24:58 =========================================
2021-10-19 12:24:58 34 - lodtensor_printer_test (Timeout)
2021-10-19 12:24:58 46 - unroll_array_ops_test (Timeout)
2021-10-19 12:24:58 47 - data_type_test (Timeout)
2021-10-19 12:24:58 63 - data_layout_transform_test (Timeout)
2021-10-19 12:24:58 64 - program_desc_test (Timeout)
2021-10-19 12:24:58 75 - prune_test (Timeout)
2021-10-19 12:24:58 77 - selected_rows_test (Timeout)
2021-10-19 12:24:58 82 - dlpack_tensor_test (Timeout)
2021-10-19 12:24:58 91 - graph_helper_test (Timeout)
2021-10-19 12:24:58 99 - test_seqpool_concat_fuse_pass (Timeout)
2021-10-19 12:24:58 115 - test_conv_bias_mkldnn_fuse_pass (Timeout)
2021-10-19 12:24:58 116 - test_conv_activation_mkldnn_fuse_pass (Timeout)
2021-10-19 12:24:58 118 - test_conv_elementwise_add_mkldnn_fuse_pass (Timeout)
2021-10-19 12:24:58 126 - test_cpu_quantize_pass (Timeout)
2021-10-19 12:24:58 128 - test_reshape_transpose_matmul_mkldnn_fuse_pass (Timeout)

Please sign in to comment.