Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix RecordEvent interface #39675

Merged
merged 7 commits into from
Feb 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion paddle/fluid/framework/data_layout_transform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout,

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("ext_reorder",
platform::EventRole::kUniqueOp);
platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();
} else {
Expand Down
16 changes: 11 additions & 5 deletions paddle/fluid/framework/operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device/device_wrapper.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/common/scalar_array.h"
#include "paddle/pten/core/kernel_factory.h"
Expand Down Expand Up @@ -261,10 +262,12 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
// TODO(wangchaochaohu) : refine code to use only one RecordEvent)
// in order to record different op type cost time
// and different op name cost time,we set two event.
platform::RecordEvent op_type_record_event(Type());
platform::RecordEvent op_type_record_event(
Type().c_str(), platform::TracerEventType::Operator, 1);
auto op_name = platform::OpName(outputs_, Type());
platform::RecordEvent op_name_record_event(
op_name, platform::EventRole::kUniqueOp);
op_name, platform::TracerEventType::Operator, 1,
platform::EventRole::kUniqueOp);
RunImpl(scope, place);
}

Expand Down Expand Up @@ -1253,7 +1256,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
Scope* transfer_scope = nullptr;
{
platform::RecordEvent record_event("prepare_data",
platform::EventRole::kInnerOp);
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
if (need_prepare_data_) {
transfer_scope = PrepareData(scope, *kernel_type_,
&transfered_inplace_vars, runtime_ctx);
Expand All @@ -1265,7 +1269,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,

if (!all_kernels_must_compute_runtime_shape_) {
platform::RecordEvent record_event("infer_shape",
platform::EventRole::kInnerOp);
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
RuntimeInferShapeContext infer_shape_ctx(*this, *runtime_ctx);
this->Info().infer_shape_(&infer_shape_ctx);
}
Expand All @@ -1278,7 +1283,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// not Scope. Imperative mode only pass inputs and get outputs.
{
platform::RecordEvent record_event("compute",
platform::EventRole::kInnerOp);
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
if (run_pten_kernel_) {
pten::KernelContext pt_kernel_context;
// Do data transform before building KernelContext
Expand Down
22 changes: 13 additions & 9 deletions paddle/fluid/imperative/prepared_operator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
#endif
#include "paddle/fluid/framework/library_type.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"

DECLARE_bool(check_nan_inf);
DECLARE_bool(benchmark);
Expand Down Expand Up @@ -348,16 +348,18 @@ static void PreparedOpRunImpl(
framework::Scope scope;

{
platform::RecordEvent record_event(op.Type() + " infer_shape",
platform::EventRole::kInnerOp);
platform::RecordEvent record_event(op.Type() + "::infer_shape",
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
DygraphInferShapeContext<VarType> infer_shape_ctx(
&ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type);
op.Info().infer_shape_(&infer_shape_ctx);
}

{
platform::RecordEvent record_event(op.Type() + " compute",
platform::EventRole::kInnerOp);
platform::RecordEvent record_event(op.Type() + "::compute",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这种尽量避免,新执行器只需要记录compute,无需op name,老执行器可以保持和原来一致

platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);

func(DygraphExecutionContext<VarType>(op, scope, *dev_ctx, ctx, ins, outs,
attrs, default_attrs));
Expand Down Expand Up @@ -403,16 +405,18 @@ static void PreparedOpRunPtImpl(
const framework::AttributeMap& attrs,
const framework::AttributeMap& default_attrs) {
{
platform::RecordEvent record_event(op.Type() + " infer_shape",
platform::EventRole::kInnerOp);
platform::RecordEvent record_event(op.Type() + "::infer_shape",
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);
DygraphInferShapeContext<VarType> infer_shape_ctx(
&ins, &outs, &attrs, &default_attrs, op.Type(), &kernel_type);
op.Info().infer_shape_(&infer_shape_ctx);
}

{
platform::RecordEvent record_event(op.Type() + " compute",
platform::EventRole::kInnerOp);
platform::RecordEvent record_event(op.Type() + "::compute",
platform::TracerEventType::OperatorInner,
1, platform::EventRole::kInnerOp);

PreparePtenData<VarType>(pt_kernel, pt_kernel_signature, ins);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace());
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();

Expand All @@ -73,8 +74,9 @@ class EltwiseAddMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dy, dout->format(), ctx.GetPlace());
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p,
*reorder_dst_memory_p);
astream.wait();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> {
handler.AcquireDstMemory(dx, dout->format(), ctx.GetPlace());
auto reorder_p =
handler.AcquireReorder(reorder_dst_memory_p, reorder_src_memory_p);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);

reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p);
astream.wait();
Expand All @@ -78,8 +79,9 @@ class EltwiseSubMKLDNNGradKernel : public ElemwiseGradKernel<T> {
reorder_attr.set_output_scales(0, scales);
auto reorder_p = std::make_shared<dnnl::reorder>(
*(reorder_src_memory_p), *(reorder_dst_memory_p), reorder_attr);
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *reorder_src_memory_p,
*reorder_dst_memory_p);
astream.wait();
Expand Down
7 changes: 4 additions & 3 deletions paddle/fluid/operators/marker_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */

#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -63,8 +63,9 @@ class MarkerOpCPUKernel : public framework::OpKernel<T> {
auto marker_pos = ctx.Attr<std::string>("marker_pos");

platform::RecordEvent record_event(
"MarkerCPU", platform::EventRole::kInnerOp,
"marker_" + marker_role + "_" + marker_pos);
"MarkerCPU", "marker_" + marker_role + "_" + marker_pos,
platform::TracerEventType::OperatorInner, 1,
platform::EventRole::kInnerOp);
}
};
} // namespace operators
Expand Down
7 changes: 4 additions & 3 deletions paddle/fluid/operators/marker_op.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"

namespace paddle {
namespace operators {
Expand Down Expand Up @@ -45,8 +45,9 @@ class MarkerOpCUDAKernel : public framework::OpKernel<T> {
auto* in_temp = A.mutable_data<T>({32, 1}, ctx.GetPlace());
auto* out_temp = B.mutable_data<T>({32, 1}, ctx.GetPlace());
platform::RecordEvent record_event(
"MarkerCUDA", platform::EventRole::kInnerOp,
"marker_" + marker_role + "_" + marker_pos);
"MarkerCUDA", "marker_" + marker_role + "_" + marker_pos,
platform::TracerEventType::OperatorInner, 1,
platform::EventRole::kInnerOp);
SimpleMarkerKernel<T><<<1, 32, 0, dev_ctx.stream()>>>(in_temp, out_temp,
32);
}
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -976,8 +976,9 @@ class ConvMKLDNNGradOpKernel : public framework::OpKernel<T> {
handler.AcquireReorder(reorder_dst_memory_p, diff_weights_memory_p);

{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, *diff_weights_memory_p,
*reorder_dst_memory_p);
astream.wait();
Expand Down
10 changes: 6 additions & 4 deletions paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,8 +264,9 @@ class ConvTransposeMKLDNNHandlerT
dev_ctx.SetBlob(key_reorder_p, reorder_p);

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
Expand All @@ -286,8 +287,9 @@ class ConvTransposeMKLDNNHandlerT
auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
dev_ctx.GetBlob(key_reorder_p));
if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
Expand Down
10 changes: 6 additions & 4 deletions paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,9 @@ class FCPrimitiveFactory {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();

{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, *dst_mem);
astream.wait();
}
Expand All @@ -312,8 +313,9 @@ class FCPrimitiveFactory {

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream,
{{DNNL_ARG_FROM, *src_mem}, {DNNL_ARG_TO, *dst_mem}});
astream.wait();
Expand Down
10 changes: 6 additions & 4 deletions paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,9 @@ class MulPrimitiveFactory {

auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
Expand Down Expand Up @@ -277,8 +278,9 @@ class MulPrimitiveFactory {

auto &astream = platform::MKLDNNDeviceContext::tls().get_stream();
{
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder.execute(astream, src_mem, dst_mem);
astream.wait();
}
Expand Down
5 changes: 3 additions & 2 deletions paddle/fluid/platform/mkldnn_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ limitations under the License. */
#include "dnnl.hpp"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace paddle {
#ifdef PADDLE_WITH_MKLDNN
using MKLDNNMemoryFormat = dnnl::memory::format_tag;
Expand Down Expand Up @@ -190,7 +190,8 @@ inline void Reorder(dnnl::memory src, dnnl::memory dst,
auto reorder_prim = dnnl::reorder(src, dst);
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_prim.execute(astream, src, dst);
astream.wait();
}
Expand Down
21 changes: 13 additions & 8 deletions paddle/fluid/platform/mkldnn_reuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ class MKLDNNHandlerNoCachingT {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();

platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
Expand All @@ -221,8 +222,9 @@ class MKLDNNHandlerNoCachingT {
std::make_shared<dnnl::reorder>(*user_memory_p, *target_memory_p);

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
Expand Down Expand Up @@ -514,7 +516,8 @@ class MKLDNNHandlerT {
auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();

platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::TracerEventType::UserDefined,
2, platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
Expand Down Expand Up @@ -558,8 +561,9 @@ class MKLDNNHandlerT {
dev_ctx_.SetBlob(key_reorder_p, reorder_p);

auto& astream = platform::MKLDNNDeviceContext::tls().get_stream();
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
Expand All @@ -580,8 +584,9 @@ class MKLDNNHandlerT {
auto reorder_p = std::static_pointer_cast<dnnl::reorder>(
dev_ctx_.GetBlob(key_reorder_p));
if (reorder_p != nullptr) {
platform::RecordEvent record_reorder("int_reorder",
platform::EventRole::kUniqueOp);
platform::RecordEvent record_reorder(
"int_reorder", platform::TracerEventType::UserDefined, 2,
platform::EventRole::kUniqueOp);
reorder_p->execute(astream, {{DNNL_ARG_FROM, *user_memory_p},
{DNNL_ARG_TO, *target_memory_p}});
astream.wait();
Expand Down
Loading